Вы можете использовать ElementTree
для разбора XML и извлечения координат:
import xml.etree.ElementTree as ET
from xml.etree.ElementTree import Element
xml_raw = '''
<annotation>
...
<object>
<name>text</name>
<pose>Unspecified</pose>
<truncated>0</truncated>
<difficult>0</difficult>
<bndbox>
<xmin>192</xmin>
<ymin>157</ymin>
<xmax>530</xmax>
<ymax>223</ymax>
</bndbox>
</object>
<object>
...
</object>
...
</annotation>
'''
if __name__ == '__main__':
root: Element = ET.fromstring(xml_raw)
for obj in root.findall('object'):
bndbox: Element = obj.find('bndbox')
name = obj.find('name').text
xmin, xmax, ymin, ymax = [int(bndbox.find(x).text) for x in ['xmin', 'xmax', 'ymin', 'ymax']]
coords = [(x, y) for x in [xmin, xmax] for y in [ymin, ymax]]
print(name, coords)
, который выводит:
text [(192, 157), (192, 223), (530, 157), (530, 223)]
text [(561, 159), (561, 219), (645, 159), (645, 219)]
text [(74, 247), (74, 311), (465, 247), (465, 311)]
text [(493, 255), (493, 305), (625, 255), (625, 305)]
text [(85, 339), (85, 400), (496, 339), (496, 400)]