Я собрал воедино скрипт, который проходит по каталогу и загружает содержимое в базу данных postgres.
В основном это работает, за исключением того, что я получаю сообщение об ошибке с некоторыми файлами, а не с другими. Я успешно загрузил 43 файла, прежде чем нажать эту ошибку.
Это ошибка, с которой я сталкиваюсь - похоже, она удваивает "\" в имени пути к файлу, для которого выдает ошибку:
Traceback (most recent call last):
File "uploader_script.py", line 136, in <module>
main()
File "uploader_script.py", line 69, in main
filestat = os.stat(os.path.join(root, name))
WindowsError: [Error 3] The system cannot find the path specified: 'C:\\Data\\04-09-2018-upload\\Data - Educational - Statistics - Standardized Exam Scores\\United States\\Data - Minnesota\\The Act - Profile Report - State Graduating Class 2016 Minnesota [MN_ACT_Profile_Report]\\._The Act - Profile Report - State Graduating Class 2016 Minnesota [MN_ACT_Profile_Report].pdf'
Это мой код:
"""A tool for saving files to and from a postgresql db.
*** THIS IS WRITTEN FOR PYTHON 2.7 ***
"""
# To use it for the current directory, run this in the command line:
# python uploader_script.py .\ --store
import os
import sys
import argparse
import psycopg2
import time
import datetime
import msvcrt as m
db_conn_str = "postgresql://xxx:xxx@111.11.111.1:5432/DBname"
# Define your table schema
create_table_stm = """
CREATE TABLE IF NOT EXISTS data_warehouse (
id serial primary key,
orig_filename text not null,
file_extension text not null,
created_date date not null,
last_modified_date date not null,
upload_timestamp_UTC timestamp not null,
uploaded_by text not null,
file_size_in_bytes integer not null,
original_containing_folder text not null,
file_data bytea not null
)
"""
# raw_input will be different in python3
uploaded_by = raw_input("Please, enter your [Firstname] [Lastname]: ")
if not uploaded_by:
print "You did not enter your name. Press ENTER to exit this script, then attempt to run the script again."
m.getch()
exit()
else:
print "Thank you, " + uploaded_by + "! Please, press ENTER to upload the files."
m.getch()
# Walk through the directory
def main():
parser = argparse.ArgumentParser()
parser_action = parser.add_mutually_exclusive_group(required=True)
parser_action.add_argument("--store", action='store_const', const=True, help="Load an image from the named file and save it in the DB")
parser_action.add_argument("--fetch", type=int, help="Fetch an image from the DB and store it in the named file, overwriting it if it exists. Takes the database file identifier as an argument.", metavar='42')
parser.add_argument("parentdir", help="Name of folder to write to / fetch from")
args = parser.parse_args()
conn = psycopg2.connect(db_conn_str)
curs = conn.cursor()
# Run the create_table_stm code at the top of this file to generate the table if it does not already exist
curs.execute(create_table_stm)
for root, dirs, files in os.walk(args.parentdir):
for name in files:
# Store the original file path from the computer the file was uploaded from.
joined_var = os.path.join(root)
original_path = os.path.abspath(joined_var)
# Set the file the script is looking at to a variable for later use to pull filesize
filestat = os.stat(os.path.join(root, name))
# Split the file extension from the filename
file_extension_holder = os.path.splitext(name)[1]
# Time module: https://docs.python.org/3.7/library/time.html#module-time
# The return value is a number giving the number of seconds since the epoch (see the time module).
# The epoch is the point where the time starts, and is platform dependent. For Windows and Unix, the epoch is January 1, 1970, 00:00:00 (UTC).
# To find out what the epoch is on a given platform, look at time.gmtime(0). The code below is written for Windows.
# Datetime module: https://docs.python.org/3/library/datetime.html
# More info: /206111/kak-uznat-datu-i-vremya-sozdaniya-i-izmeneniya-faila-v-python
# Generate the created_date -- I suspect there is a more straightforward way to do this with the time or datetime module. But this works.
c_time_in_seconds = os.path.getctime(os.path.join(root, name))
c_time_array = str(time.gmtime(c_time_in_seconds)[:3])
c_date_str = ''.join(c_time_array)
c_format_str = '(%Y, %m, %d)'
c_datetime_obj = datetime.datetime.strptime(c_date_str, c_format_str)
created_date = c_datetime_obj.date()
# Generate the last_modified_date
m_time_in_seconds = os.path.getmtime(os.path.join(root, name))
m_time_array = str(time.gmtime(m_time_in_seconds)[:3])
m_date_str = ''.join(m_time_array)
m_format_str = '(%Y, %m, %d)'
m_datetime_obj = datetime.datetime.strptime(m_date_str, m_format_str)
last_modified_date = m_datetime_obj.date()
# Generate the timestamp of the upload (in UTC timezone)
py_uploaded_timestamp = datetime.datetime.now()
if args.store:
with open(os.path.join(root, name),'rb') as f:
# read the binary
filedata = psycopg2.Binary(f.read())
# Call the st_size command from os.stat to read the filesize in bytes
filesize = filestat.st_size
# This has to agree with the table schema you set at the top of this file
curs.execute("INSERT INTO data_warehouse(id, orig_filename, file_extension, created_date, last_modified_date, upload_timestamp_UTC, uploaded_by, file_size_in_bytes, original_containing_folder, file_data) VALUES (DEFAULT,%s,%s,%s,%s,%s,%s,%s,%s,%s) RETURNING id", (name, file_extension_holder, created_date, last_modified_date, py_uploaded_timestamp, uploaded_by, filesize, original_path, filedata))
print curs
returned_id = curs.fetchone()[0]
print("Stored {0} into DB record {1}".format(args.parentdir, returned_id))
conn.commit()
elif args.fetch is not None:
with open(args.parentdir,'wb') as f:
curs.execute("SELECT file_data, orig_filename FROM files WHERE id = %s", (int(args.fetch),))
(file_data, orig_parentdir) = curs.fetchone()
f.write(file_data)
print("Fetched {0} into file {1}; original parentdir was {2}".format(args.fetch, args.parentdir, orig_filename))
for name in dirs:
print(os.path.join(root, name))
conn.close()
if __name__ == '__main__':
main()