Для всех, кто интересуется, это то, как мне удалось проанализировать и очистить формат файла tcx.
import csv
from bs4 import BeautifulSoup as Soup
import pandas as pd
data = open('jc180512.tcx', 'r')
soup = Soup(data, 'lxml-xml')
time = []
dist = []
caden = []
speed = []
watts = []
hrbpm = []
hrbp = [0 for i in range(6)] # hardcoded a number of zeros at the
начало col сердцебиения, поскольку измерения не производятся до 6-го цикла.
for i in soup.find_all('Time'):
time.append((i))
for i in soup.find_all('DistanceMeters'):
dist.append(i)
for i in soup.find_all('Cadence'):
caden.append(i)
for i in soup.find_all('Speed'):
speed.append(i)
for i in soup.find_all('Watts'):
watts.append(i)
for i in soup.find_all('Value'):
hrbpm.append(i)
heartrate = hrbp + hrbpm
f = open("test.csv", "w")
for i in range(len(time)):
f.write("{}, {}, {}, {}, {}\n".format(time[i], dist[i], caden[i], speed[i], watts[i]))
f.close()
g = open('test0.csv', 'w')
for i in range(len(heartrate)):
g.write('{}\n'.format(heartrate[i]))
g.close
df1 = pd.read_csv('test.csv', header = None)
df2 = pd.read_csv('test0.csv', header = None)
all_data = pd.concat([df1, df2], axis=1)
all_data = all_data.fillna(0)
all_data = all_data.astype(str)
all_data[0] = all_data[0].applymap(lambda x: x.lstrip('<Time>').rstrip('</Time>'))
all_data[0] = all_data[0].applymap(lambda x: x.lstrip('<Value>').rstrip('</Value>'))
all_data[0] = all_data[0].applymap(lambda x: x.rstrip('Z'))
all_data[0] = all_data[0].applymap(lambda x: x.replace('T', ' '))
all_data[1] = all_data[1].map(lambda x: x.rstrip('</DistanceMeters>').lstrip(' <DistanceMeters>'))
all_data[2] = all_data[2].map(lambda x: x.lstrip(' <Cadence>').rstrip('</Cadence>'))
all_data[3] = all_data[3].map(lambda x: x.lstrip(' <ns3:Speed>').rstrip('</ns3:Speed>'))
all_data[4] = all_data[4].map(lambda x: x.lstrip(' <ns3:Watts>').rstrip('</ns3:Watts>'))
all_data.to_csv('final.csv', index = False)
with open('final.csv') as inf:
with open('output_1.csv', 'w') as outf:
for line in inf:
outf.write(','.join(line.split(' ')))