Я пытаюсь просмотреть некоторые таблицы Excel и извлечь информацию из определенных вкладок с помощью регулярных выражений. Я написал следующий код:
import pandas as pd
import os
import re
root = r"my_dir"
agg_df = pd.DataFrame()
for directory, subdirectory, files in os.walk(root):
for file in files:
if file.endswith('.xlsm'):
filepath = os.path.join(directory, file)
xls = pd.ExcelFile(filepath)
for i in xls.sheet_names:
if re.search(r'Apples', i):
df_temp = pd.read_excel(filepath, sheet_names=i)
df_temp['Filepath'] = filepath
df_temp['Sheet_Name'] = i
agg_df = agg_df.append(df_temp)
elif re.search(r'Oranges', i):
df_temp = pd.read_excel(filepath, sheet_names=i)
df_temp['Filepath'] = filepath
df_temp['Sheet_Name'] = i
agg_df = agg_df.append(df_temp)
elif re.search('Grapes', i):
df_temp = pd.read_excel(filepath, sheet_names=i)
df_temp['Filepath'] = filepath
df_temp['Sheet_Name'] = i
agg_df = agg_df.append(df_temp)
elif re.search(r'Tomatoes', i):
df_temp = pd.read_excel(filepath, sheet_names=i)
df_temp['Filepath'] = filepath
df_temp['Sheet_Name'] = i
agg_df = agg_df.append(df_temp)
elif re.search(r'Peaches', i):
df_temp = pd.read_excel(filepath, sheet_names=i)
df_temp['Filepath'] = filepath
df_temp['Sheet_Name'] = i
agg_df = agg_df.append(df_temp)
elif re.search(r'Pears', i):
df_temp = pd.read_excel(filepath, sheet_names=i)
df_temp['Filepath'] = filepath
df_temp['Sheet_Name'] = i
agg_df = agg_df.append(df_temp)
elif re.search(r'Bananas', i):
df_temp = pd.read_excel(filepath, sheet_names=i)
df_temp['Filepath'] = filepath
df_temp['Sheet_Name'] = i
agg_df = agg_df.append(df_temp)
elif re.search(r'Mangos', i):
df_temp = pd.read_excel(filepath, sheet_names=i)
df_temp['Filepath'] = filepath
df_temp['Sheet_Name'] = i
agg_df = agg_df.append(df_temp)
Однако это та первая вкладка, а не та, которую я пытаюсь указать.