Не используйте логическое значение. Используйте while True:
, а затем используйте break
для выхода из цикла.
def start_requests(self):
# get the file path of the csv file that contains the pairs from the settings.py
with open(self.settings["PROXY_CSV_FILE"], mode="r") as csv_file:
# requests is a list of dictionaries like this -> {url: str, ua: str, ip: str}
requests = process_csv(csv_file)
j = 1
for i, req in enumerate(requests):
import pdb; pdb.set_trace()
while True :
x = len(requests) - i
# Return needed url with set delay of 3 seconds
yield SplashRequest(url=req["url"], callback=self.parse, args={"wait": 3},
# Pair with user agent specified in csv file
headers={"User-Agent": req["ua"]},
# Sets splash_url to whatever the current proxy that goes with current URL is instead of actual splash url
splash_url = req["ip"],
priority = x,
meta={'priority': x} # <- check here!!
)
j = j + 1
if j == len(requests):
j = 1
break
Но, похоже, вам вообще не нужно while
или j
, используйте for _ in range(len(requests)):
Кроме того, вы должны установить x
вне внутреннего цикла, поскольку он не изменяется в этом цикле.
def start_requests(self):
# get the file path of the csv file that contains the pairs from the settings.py
with open(self.settings["PROXY_CSV_FILE"], mode="r") as csv_file:
# requests is a list of dictionaries like this -> {url: str, ua: str, ip: str}
requests = process_csv(csv_file)
for i, req in enumerate(requests):
import pdb; pdb.set_trace()
x = len(requests) - i
for _ in range(len(requests)):
# Return needed url with set delay of 3 seconds
yield SplashRequest(url=req["url"], callback=self.parse, args={"wait": 3},
# Pair with user agent specified in csv file
headers={"User-Agent": req["ua"]},
# Sets splash_url to whatever the current proxy that goes with current URL is instead of actual splash url
splash_url = req["ip"],
priority = x,
meta={'priority': x} # <- check here!!
)