Мое окончательное решение, хотя я уверен, что это не самый элегантный способ получить конечный результат:
import os
import fnmatch
import pandas as pd
start_dirctory='DIR' # change this
df_result= None
for path, dirs, files in os.walk(start_dirctory):
for file in fnmatch.filter(files, '*.txt'):
full_name=os.path.join(path, file)
df_tmp= pd.read_csv(full_name, header=None)
df_tmp['date']=os.path.basename(path)
df_tmp['file']=os.path.basename(file)
# df_tmp.set_index([df_tmp['date'], df_tmp['time']], inplace=True)
# add the line number
df_tmp['line_number']= range(df_tmp.shape[0])
# add the code here that generates the infos
# you additionally need here to the df
# then concatenate the files together
if df_result is None:
df_result= df_tmp
else:
df_result= pd.concat([df_result, df_tmp], axis='index', ignore_index=True)
# Slice filename from 6 to 7 to get location
df_result['location'] = df_result['file'].str.slice(6,7)
# Slice filename from 0 to 6 to get time
df_result['time'] = df_result['file'].str.slice(0,6)
# Combine date and time and format as datetime
df_result['date'] = pd.to_datetime(df_result['date'] + ' ' + df_result['time'], errors='raise', dayfirst=False)
# Round all the datetimes to the nearest 5 min
df_result['date'] = df_result['date'].dt.round('5min')
# Add line number as minutes to the date
df_result['date'] = df_result['date'] + pd.to_timedelta(df_result['line_number'],unit='m')
del df_result['file']
del df_result['line_number']
del df_result['time']
# Make the date the index in df
df_result = df_result.set_index(df_result['date'])
# Delete date in df
del df_result['date']
# Change columns and rename df_result
df_result.columns = ['10hz', '12.5hz', '16hz', '20hz','25hz','31.5hz','40hz','50hz','63hz','80hz','100hz','125hz','160hz','200hz','250hz','315hz','400hz','500hz','630hz','800hz','1000hz','1250hz','1600hz','2000hz','2500hz','3150hz','4000hz','5000hz','6300hz','8000hz','10000hz']