Я пытаюсь загрузить все сообщения Instagram определенного человека, выполнив поиск по имени пользователя в окне поиска. После того, как я нашел целевой профиль с помощью селена webdriver, я использую
driver.execute_script("window.scrollTo(0,document.body.scrollHeight);")
, чтобы прокрутить вниз все сообщения, присутствующие в этом профиле. Позже я пытаюсь найти все URL этих изображений. К сожалению, я могу получить 30 URL-адресов только из 37 изображений. Но я могу просмотреть оставшиеся 7 изображений в браузере, проверив браузер.
Мой код
from selenium import webdriver
from bs4 import BeautifulSoup
from time import sleep
import requests
import shutil
import os
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
class App:
def __init__(self,username='*******',password='*****',target_username='******',
path="C:\\Users\\ranga\\Music\\Selenium\\photos"):
self.username=username
self.password=password
self.target_username=target_username
self.path=path
self.driver=webdriver.Firefox(executable_path="C:\\Users\\ranga\\Music\\Selenium\\geckodriver.exe")
self.main_url="https://www.instagram.com"
self.driver.get(self.main_url)
self.error=False
sleep(3)
# login function
self.log_in()
sleep(2)
if self.error is False:
self.close_dialogbox_if_there()
self.open_target_profile()
if self.error is False:
self.scroll_down()
if self.error is False:
if not os.path.exists(path):
os.mkdir(path)
self.downloading_images()
self.driver.close()
def downloading_images(self):
#sleep(5)
#print(self.driver.page_source)
soup=BeautifulSoup(self.driver.page_source,'html.parser')
self.driver.set_page_load_timeout(3)
#print(soup.prettify())
all_images=soup.find_all('img')
print('length of all images:',len(all_images))
for index,img in enumerate(all_images):
#print('img:{0}'.format(img))
#input('Stop for now')
file_name='image_'+str(index)+'.jpg'
image_path=os.path.join(self.path,file_name)
link=img['src']
print('Downloading Image..',index,':',link)
response=requests.get(link,stream=True)
try:
with open(image_path,'wb') as file:
shutil.copyfileobj(response.raw,file)
except Exception as e:
print(e)
print('Could not download image number',index)
def close_dialogbox_if_there(self):
try:
sleep(2)
close_button=self.driver.find_element_by_xpath("/html/body/div[3]/div/div/div[3]/button[2]")
close_button.click()
except Exception:
pass
def open_target_profile(self):
try:
search_bar=self.driver.find_element_by_xpath('//input[@placeholder="Search"]')
search_bar.send_keys(self.target_username)
target_profile=self.main_url + '/' + self.target_username +'/'
self.driver.get(target_profile)
sleep(2)
except Exception:
self.error=True
print('Could not find Search bar')
def scroll_down(self):
try:
no_of_posts=self.driver.find_element_by_xpath('//*[@id="react-root"]/section/main/div/header/section/ul/li[1]/span/span')
no_of_posts=str(no_of_posts.text).replace(',','')
self.no_of_posts=int(no_of_posts)
if self.no_of_posts>12:
no_of_scrolls= int(self.no_of_posts/12) + 1
for value in range(no_of_scrolls):
self.driver.execute_script("window.scrollTo(0,document.body.scrollHeight);")
sleep(2)
except Exception:
self.error=True
print('Could not find number of posts while scroll down')
def log_in(self):
try:
login_button=self.driver.find_element_by_xpath('//*[@id="react-root"]/section/main/article/div[2]/div[2]/p/a')
login_button.click()
sleep(2)
try:
user_name_input=self.driver.find_element_by_xpath('//input[@name="username"]')
user_name_input.send_keys(self.username)
password_input=self.driver.find_element_by_xpath('//input[@name="password"]')
password_input.send_keys(self.password)
password_input.submit()
except Exception:
print('Some exception occured while trying to find username or password')
self.error=True
except Exception:
self.error=True
print('Unable to find login button')
if __name__ == '__main__':
app=App()
Ниже приведены мои журналы
DevTools listening on ws://127.0.0.1:59120/devtools/browser/8310d943-619d-4278-9d52-4ae4aa68047f
length of all images: 30
Downloading Image.. 0
Downloading Image.. 1
Downloading Image.. 2
Downloading Image.. 3
Downloading Image.. 4
Downloading Image.. 5
Downloading Image.. 6
Downloading Image.. 7
Downloading Image.. 8
Downloading Image.. 9
Downloading Image.. 10
Downloading Image.. 11
Downloading Image.. 12
Downloading Image.. 13
Downloading Image.. 14
Downloading Image.. 15
Downloading Image.. 16
Downloading Image.. 17
Downloading Image.. 18
Downloading Image.. 19
Downloading Image.. 20
Downloading Image.. 21
Downloading Image.. 22
Downloading Image.. 23
Downloading Image.. 24
Downloading Image.. 25
Downloading Image.. 26
Downloading Image.. 27
Downloading Image.. 28
Downloading Image.. 29
Traceback (most recent call last):
File "part_1_login.py", line 119, in <module>
app=App()
File "part_1_login.py", line 32, in __init__
self.downloading_images()
File "part_1_login.py", line 50, in downloading_images
response=requests.get(link,stream=True)
File "C:\Users\ranga\AppData\Local\Programs\Python\Python37-32\lib\site-packages\requests\api.py", line 75, in get
return request('get', url, params=params, **kwargs)
File "C:\Users\ranga\AppData\Local\Programs\Python\Python37-32\lib\site-packages\requests\api.py", line 60, in request
return session.request(method=method, url=url, **kwargs)
File "C:\Users\ranga\AppData\Local\Programs\Python\Python37-32\lib\site-packages\requests\sessions.py", line 519, in request
prep = self.prepare_request(req)
File "C:\Users\ranga\AppData\Local\Programs\Python\Python37-32\lib\site-packages\requests\sessions.py", line 462, in prepare_request
hooks=merge_hooks(request.hooks, self.hooks),
File "C:\Users\ranga\AppData\Local\Programs\Python\Python37-32\lib\site-packages\requests\models.py", line 313, in prepare
self.prepare_url(url, params)
File "C:\Users\ranga\AppData\Local\Programs\Python\Python37-32\lib\site-packages\requests\models.py", line 387, in prepare_url
raise MissingSchema(error)
requests.exceptions.MissingSchema: Invalid URL '/static/images/web/mobile_nav_type_logo.png/735145cfe0a4.png': No schema supplied. Perhaps you meant http:///static/images/web/mobile_nav_type_logo.png/735145cfe0a4.png?