Я написал скрипт Python для загрузки файлов с SFTP-сервера с использованием Python с использованием многопоточности, чтобы он мог подключаться к нескольким серверам одновременно и загружать файлы с них параллельно.
Работает до 10 подключений, но если имеется 25 подключений, появляется эта ошибка
предположим, что на каждом сервере необходимо загрузить 5000 файлов размером 130 Мб (почти)
Код часто успешно работает при последующих попытках или будет успешно выполняться для первых нескольких файлов в диапазоне дат, а затем из-за ошибки в середине загрузки всех файлов, которые мне нужно получить (см. Ошибку ниже)
В чем причина ошибки, и, пожалуйста, помогите мне ее решить. Заранее спасибо
Мой код:
import sys, os, string, threading
import paramiko
import os
import pysftp
import csv
import socket
from stat import S_ISDIR, S_ISREG
import time
import threading
from threading import Thread
from time import sleep
import os.path
import shutil
import lock
threading.Lock()
privatekeyfile = os.path.expanduser("C:\\Users\\Rohan\\.ssh\\cool.prv") # public key authentication
mykey = paramiko.RSAKey.from_private_key_file(privatekeyfile)
cnopts = pysftp.CnOpts()
cnopts.hostkeys = None
def workon(serverad,user, textfile, serverpath, local_path_temp):
with pysftp.Connection(host=serverad, username=user,private_key=mykey, cnopts=cnopts) as sftp:
r = (socket.gethostbyaddr(serverad))
print("connection successful with ",r) # connect to SFTP server
def get_file2(sftp ,remotedir): # make the list of all the files on server path of last 2 days
result = []
for entry in sftp.listdir_attr(remotedir):
remotepath = remotedir + "/" + entry.filename
mode = entry.st_mode
if S_ISDIR(mode):
result += get_file2(sftp, remotepath)
elif S_ISREG(mode):
if (time.time() - entry.st_mtime) // (24 * 3600) > 0:
result.append(entry.filename)
return result
remote_path = serverpath
d = get_file2(sftp, remote_path)
def process():
myname = [] # make list of files already in log file
for name in j.readlines():
while '\n' in name:
name = name.replace('\n', '')
myname.append(name)
import os
filtered_list = [string for string in d if string not in myname] # make filtered list of files preent on server but not in local path
print("filtered list:", filtered_list)
print(len(filtered_list))
local_path = local_path_temp
def compare_files(sftp, remotedir, remotefile, localdir, preserve_mtime=True):
remotepath = remotedir + "/" + remotefile # download the files inside filtered list
localpath = os.path.join(localdir, remotefile)
mode = sftp.stat(remotepath).st_mode
if S_ISDIR(mode):
try:
os.mkdir(localpath, mode=777)
except OSError:
pass
compare_files(sftp, remotepath, localpath, preserve_mtime)
elif S_ISREG(mode):
sftp.get(remotepath, localpath, preserve_mtime=True)
for files in filtered_list:
compare_files(sftp, remote_path, files, local_path, preserve_mtime=False)
with open(filename, 'a') as f: # append the downloaded file name in the log file
for item in filtered_list:
f.write("%s\n" % item)
with open(filename, 'r') as f:
unique_lines = set(f.readlines())
h = sorted(unique_lines)
with open(filename, 'w') as f:
f.writelines(h)
filename=textfile # if the log file does not exist it can create a new log file
try:
with open(filename, 'r+') as j:
process()
except IOError:
with open(filename, 'w+') as j:
process()
def main():
threads=[]
config_file_path = "config15.txt" # read config file and assign IP,username,logfile,serverpath,localpath
file = open(config_file_path, 'r')
reader = csv.reader(file)
all_rows = [row for row in reader]
for line in all_rows:
if len(line) != 5:
continue
server_ip = line[0]
username = line[1]
txt_file = line[2]
server_path = line[3]
local_path = line[4]
t = threading.Thread(target=workon, args=(server_ip, username, txt_file, server_path, local_path)) # use of multithreading
t.start()
threads.append(t)
with open(config_file_path) as f: # adjust sleep time according to connections in config file
if(sum(1 for _ in f)) >= 5:
sleep(5)
else:
sleep(1)
for t in threads:
t.join()
if __name__ == "__main__": # execute main function
main()
Ошибка:
Traceback (most recent call last):
File "C:\Users\Rohan\AppData\Local\Programs\Python\Python37-32\lib\threading.py", line 917, in _bootstrap_inner
self.run()
File "C:\Users\Rohan\AppData\Local\Programs\Python\Python37-32\lib\threading.py", line 865, in run
self._target(*self._args, **self._kwargs)
File "C:/Users/Rohan/PycharmProjects/untitled1/th8.py", line 98, in workon
process()
File "C:/Users/Rohan/PycharmProjects/untitled1/th8.py", line 80, in process
compare_files(sftp, remote_path, files, local_path, preserve_mtime=False)
File "C:/Users/Rohan/PycharmProjects/untitled1/th8.py", line 77, in compare_files
sftp.get(remotepath, localpath, preserve_mtime=True)
File "C:\Users\Rohan\PycharmProjects\untitled1\venv\lib\site-packages\pysftp\__init__.py", line 249, in get
self._sftp.get(remotepath, localpath, callback=callback)
File "C:\Users\Rohan\PycharmProjects\untitled1\venv\lib\site-packages\paramiko\sftp_client.py", line 806, in get
"size mismatch in get! {} != {}".format(s.st_size, size)
OSError: size mismatch in get! 0 != 275856