попробуйте следующее ...
library(xml2)
пример данных
doc <- read_xml( '<report id="322231">
<update>
<when>1136281841</when>
<what>When uploading a objective-c++ file (.mm) bugzilla sets the MIME type as application/octet-stream</what>
</update>
<update>
<when>1136420901</when>
<what>When uploading a objective-c++ file (.mm) bugzilla sets the MIME type as application/octet-stream</what>
</update>
</report>')
код
#create nodeset with all 'what'-nodes
what.nodes <- xml_find_all( doc, ".//what" )
#no make a data.frame
df <- data.frame(
#get report-attribute "id" by retracing the ancestor tree from the what.nodes
report_id = xml_attr( xml_find_first( what.nodes, ".//ancestor::report" ), "id" ),
#get the sibling 'when' fro the what-node
when = xml_text( xml_find_first( what.nodes, ".//preceding-sibling::when" ) ),
#get 'what'
what = xml_text( what.nodes ),
#set stringsAsfactors
stringsAsFactors = FALSE )
#get rows with unique values from the bottom-up
df[ !duplicated( df$what, fromLast = TRUE ), ]
выход
# report_id when what
# 2 322231 1136420901 When uploading a objective-c++ file (.mm) bugzilla sets the MIME type as application/octet-stream