У меня есть эти тестовые коды о поиске в сети, которые я пробую, но я не могу получить все названия фильмов с сайта.Вот код
from requests import get
from bs4 import BeautifulSoup
import pandas as pd
url = 'http://www.imdb.com/search/title?
release_date=2017&sort=num_votes,desc&page=1'
response = get(url)
print(response.text[:500])
html_soup = BeautifulSoup(response.text, 'html.parser')
type(html_soup)
movie_containers = html_soup.find_all('div', class_ = 'lister-item
mode-advanced')
print(type(movie_containers))
print(len(movie_containers))
first_movie = movie_containers[0]
first_movie
first_movie.div
first_movie.a
first_movie.h3
first_movie.h3.a
first_name = first_movie.h3.a.text
first_year = first_movie.h3.find('span', class_ = 'lister-item-year text-
muted unbold ')
print(first_movie.strong)
first_imdb = float(first_movie.strong.text)
print"IMDB= " ,first_imdb
first_mscore = first_movie.find('span', class_ = 'metascore favorable')
first_mscore = int(first_mscore.text)
print "First MetaScore", first_mscore
first_votes = first_movie.find('span', attrs = {'name':'nv'})
first_votes['data-value']
first_votes = int(first_votes['data-value'])
print "First_Votes=",first_votes
eighth_movie_mscore = movie_containers[7].find('div', class_ = 'ratings-
metascore')
type(eighth_movie_mscore)
# Lists to store the scraped data in
names = []
years = []
imdb_ratings = []
metascores = []
votes = []
# Extract data from individual movie container
for container in movie_containers:
# If the movie has Metascore, then extract:
if container.find('div', class_ = 'ratings-metascore') is not None:
# The name
name = container.h3.a.text
names.append(name)
# The year
year = container.h3.find('span', class_ = 'lister-item-year').text
years.append(year)
# The IMDB rating
imdb = float(container.strong.text)
imdb_ratings.append(imdb)
# The Metascore
m_score = container.find('span', class_ = 'metascore').text
metascores.append(int(m_score))
# The number of votes
vote = container.find('span', attrs = {'name':'nv'})['data-value']
votes.append(int(vote))
test_df = pd.DataFrame({
'movie': names,
'year': years,
'imdb': imdb_ratings,
'metascore': metascores,
'votes': votes})
print(test_df.info())
print (test_df)
На выходе отображаются не только названия фильмов, но и остальные отображаются правильно, без каких-либо проблем.RangeIndex: 46 записей, от 0 до 45 столбцов данных (всего 5 столбцов): imdb 46 ненулевой float64 metascore 46 ненулевой фильм int64 46 ненулевой объект голосов 46 ненулевой int64 год 46 ненулевой объект dtypes: float64 (1), int64 (2), object (2) использование памяти: 1,9+ КБ