Я новичок в Python и уверен, что приведенное ниже описание может быть оптимизировано, однако я столкнулся с проблемой, связанной с моим последним шагом в сценарии.
Цель состоит не в том, чтобы загрузить файл, если он имеет был ранее загружен. В настоящее время я регистрирую загрузку в файле с именем download_history.log
. Поэтому мне нужно выполнить проверку здесь, чтобы выполнить следующую проверку журнала - если он существует в журнале, ничего не делать и переходить к следующему файлу. если он не существует, загрузите файл и войдите в него.
Буду признателен за любую помощь.
#!/usr/bin/env python3
import boto
import sys, os
import zipfile
import shutil
import glob
import re
from boto.s3.key import Key
from boto.exception import S3ResponseError
#Make the download files
DOWNLOAD_LOCATION_PATH = os.path.expanduser("~") + "/AWSSplunk/Downloads/"
if not os.path.exists(DOWNLOAD_LOCATION_PATH):
print ("Making download directory")
os.mkdir(DOWNLOAD_LOCATION_PATH)
#Delete Output Folder if it exsists
OUTPUT_FOLDER = os.path.expanduser("~") + "/AWSSplunk/Output/"
shutil.rmtree(OUTPUT_FOLDER)
#Define the AWS Bucket
def backup_s3_folder():
BUCKET_NAME = "my-bucket-name"
AWS_ACCESS_KEY_ID= os.getenv("##################")
AWS_ACCESS_SECRET_KEY = os.getenv("#########################")
conn = boto.connect_s3(AWS_ACCESS_KEY_ID, AWS_ACCESS_SECRET_KEY)
bucket = conn.get_bucket(BUCKET_NAME)
#goto through the list of files
bucket_list = bucket.list()
for l in bucket_list:
key_string = str(l.key)
s3_path = DOWNLOAD_LOCATION_PATH + key_string
try:
# Add files to the log file
print ("Downloading file ", key_string)
file_object = open('download_history.log', 'a')
file_object.write(key_string)
file_object.write("\n")
# Working code
file_object.close()
l.get_contents_to_filename(s3_path)
except (OSError,S3ResponseError) as e:
pass
# check if the file has been downloaded locally
if not os.path.exists(s3_path):
try:
os.makedirs(s3_path)
except OSError as exc:
# let guard againts race conditions
import errno
if exc.errno != errno.EEXIST:
raise
if __name__ == '__main__':
backup_s3_folder()
# Start the unzipping process
print("Unzipping Starting")
dir_path = os.path.expanduser("~") + "/AWSSplunk/Downloads/"
for path, dir_list, file_list in os.walk(dir_path):
for file_name in file_list:
if file_name.endswith(".zip"):
abs_file_path = os.path.join(path, file_name)
parent_path = os.path.split(abs_file_path)[0]
output_folder_name = os.path.splitext(abs_file_path)[0]
output_path = os.path.join(parent_path, output_folder_name)
zip_obj = zipfile.ZipFile(abs_file_path, 'r')
zip_obj.extractall(output_path)
zip_obj.close()
print("Unzipping Completed")
# Start moving files to output
print("Moving Files")
FILE_LOCATION_PATH = os.path.expanduser("~") + "/AWSSplunk/Output/"
if not os.path.exists(FILE_LOCATION_PATH):
print ("Making download directory")
os.mkdir(FILE_LOCATION_PATH)
# .log files move
for root, dirs, files in os.walk(dir_path):
for file in files:
if file.endswith('.log'):
count = 1
destination_file = os.path.join(FILE_LOCATION_PATH, file)
while os.path.exists(destination_file):
destination_file = os.path.join(FILE_LOCATION_PATH, f"{file}_{count}")
count += 1
shutil.move(os.path.join(root, file), destination_file)
# .txt files move
for root, dirs, files in os.walk(dir_path):
for file in files:
if file.endswith('.txt'):
count = 1
destination_file = os.path.join(FILE_LOCATION_PATH, file)
while os.path.exists(destination_file):
destination_file = os.path.join(FILE_LOCATION_PATH, f"{file}_{count}")
count += 1
shutil.move(os.path.join(root, file), destination_file)
# .json files move
for root, dirs, files in os.walk(dir_path):
for file in files:
if file.endswith('.json'):
count = 1
destination_file = os.path.join(FILE_LOCATION_PATH, file)
while os.path.exists(destination_file):
destination_file = os.path.join(FILE_LOCATION_PATH, f"{file}_{count}")
count += 1
shutil.move(os.path.join(root, file), destination_file)
print("Files Move Complete")
# Delete Directory
print("Cleaning up Downloads Directory")
shutil.rmtree(DOWNLOAD_LOCATION_PATH)
# Remove EFR Audit Logs stratinbg with 2020
print("Remove the encrypted Audit Logs")
pattern = "^(2020)"
FILE_LOCATION_PATH = os.path.expanduser("~") + "/AWSSplunk/Output/"
for root, dirs, files in os.walk(FILE_LOCATION_PATH):
for file in filter(lambda x: re.match(pattern, x), files):
os.remove(os.path.join(root, file))
# Remove EFR Audit Logs stratinbg with EFR
pattern = "^(EFR)"
FILE_LOCATION_PATH = os.path.expanduser("~") + "/AWSSplunk/Output/"
for root, dirs, files in os.walk(FILE_LOCATION_PATH):
for file in filter(lambda x: re.match(pattern, x), files):
os.remove(os.path.join(root, file))
# Remove EFR Audit Logs stratinbg with 2019
pattern = "^(2019)"
FILE_LOCATION_PATH = os.path.expanduser("~") + "/AWSSplunk/Output/"
for root, dirs, files in os.walk(FILE_LOCATION_PATH):
for file in filter(lambda x: re.match(pattern, x), files):
os.remove(os.path.join(root, file))
# Script clean up
print("Script Complete")
#with open("download_history.log", "a") as myfile:
# myfile.write('New Line\n')