Возвращает список всех файлов в каталоге, в котором вы можете выполнить итерацию:
from os import walk
from os.path import join
def retrieve_file_paths(dirName): #Declare the function to return all file paths of the particular directory
filepaths = [] #setup file paths variable
for root, directories, files in walk(dirName): #Read all directory, subdirectories and file lists
for filename in files:
filepath = join(root, filename) #Create the full filepath by using os module.
filepaths.append(filepath)
return filepaths #return all paths
, в конце оно должно выглядеть следующим образом:
import pandas as pd
from os import walk
from os.path import join
dirName = "/your/dir"
def sample_per(df2):
if len(df2) <= 10000:
return df2.sample(frac=0.05)
elif len(df2) >= 15000:
return df2.sample(frac=0.03)
else:
return df2.sample(frac=0.01)
def retrieve_file_paths(dirName): #Declare the function to return all file paths of the particular directory
filepaths = [] #setup file paths variable
for root, directories, files in walk(dirName): #Read all directory, subdirectories and file lists
for filename in files:
filepath = join(root, filename) #Create the full filepath by using os module.
filepaths.append(filepath)
return filepaths #return all paths
def main():
global dirName
for filepath in retrieve_file_paths(dirName):
df = pd.read_excel(r+filepath)
df2 = df.loc[(df['Track Item']=='Y')]
final = sample_per(df2)
df.loc[df['Retailer Item ID'].isin(final['Retailer Item ID']), 'Track Item'] = 'Audit'
df.to_csv('Test.csv',index=False)
if __name__ == '__main__':
main()