Итак, я новичок в селене. У меня есть код, работающий для автоматизации извлечения информации из URL. Но теперь, когда я пытаюсь сделать тот же процесс с циклом, повторяющимся на нескольких страницах, я сталкиваюсь с проблемами.
class Sublink_File(object):
def __init__(self,location,category):
self.vals = []
self.location = location
for i in range(25):
self.vals.append(f"https://{location}.craigslist.org/search/{category}?s={i*120}")
firefox_profile = webdriver.FirefoxProfile()
firefox_profile.set_preference('permissions.default.image', 2)
self.driver = webdriver.Firefox(firefox_profile=firefox_profile)
self.delay = 3
def extract_post_urls(self,page):
url_list = []
html_page = urllib.request.urlopen(page)
soup = BeautifulSoup(html_page, "lxml")
for link in soup.find_all('a', href=True):
if (location and 'cto') in link['href']:
print(link['href'])
url_list.append(link['href'])
return url_list
def load_craiglist_url(self):
for values in range(len(self.vals)):
self.driver.get(self.vals[values])
self.extract_post_urls(self.driver.get(self.vals[values]))
try:
wait = WebDriverWait(self.driver, self.delay)
wait.until(EC.presence_of_all_elements_located((By.ID, "searchform")))
print("Page is Ready")
except TimeoutException:
print("Loading took too much time")
location = "austin"
Austin_File = Sublink_File(location, "cta")
Austin_File.load_craiglist_url()
Сама ошибка:
Traceback (most recent call last):
File "SubLink_File.py", line 50, in <module>
Austin_File.load_craiglist_url()
File "SubLink_File.py", line 38, in load_craiglist_url
self.extract_post_urls(self.driver.get(self.vals[values])) File "SubLink_File.py", line 27, in extract_post_urls html_page = urllib.request.urlopen(page) File
"C:\ProgramData\Anaconda3\lib\urllib\request.py", line 222, in urlopen return opener.open(url, data, timeout) File "C:\ProgramData\Anaconda3\lib\urllib\request.py", line 516, in open req.timeout = timeout AttributeError: 'NoneType' object has no attribute 'timeout'