Причина, по которой вы получаете ошибку, потому что вы не называете это по запрошенной ссылке.Я создал одну функцию для get_soup (url), которая вызывает цикл.
from bs4 import BeautifulSoup
import ssl
import json
import ast
import json
import os
from urllib.request import Request, urlopen
# For ignoring SSL certificate errors
ctx = ssl.create_default_context()
ctx.check_hostname = False
ctx.verify_mode = ssl.CERT_NONE
def get_soup(url):
req = Request(url, headers={'User-Agent': 'Mozilla/5.0'})
webpage = urlopen(req).read()
soup = BeautifulSoup(webpage, 'html.parser')
return soup
url = 'https://www.youtube.com/feed/trending'
soup=get_soup(url)
html = soup.prettify('utf-8')
video_details = {}
other_details = {}
#All the trending youtube links
youtubelinks = []
for a in soup.select('a[href^="/watch?v="]')[:1]:
youtubelinks.append("https://www.youtube.com"+ a['href'])
for link in youtubelinks:
link=get_soup(link)
for span in link.findAll('span',attrs={'class': 'watch-title'}):
video_details['TITLE'] = span.text.strip()
for script in link.findAll('script',attrs={'type': 'application/ld+json'}):
channelDesctiption = json.loads(script.text.strip())
video_details['CHANNEL_NAME'] = channelDesctiption['itemListElement'][0]['item']['name']
for div in link.findAll('div',attrs={'class': 'watch-view-count'}):
video_details['NUMBER_OF_VIEWS'] = div.text.strip()
for button in link.findAll('button',attrs={'title': 'I like this'}):
video_details['LIKES'] = button.text.strip()
for button in link.findAll('button',attrs={'title': 'I dislike this'}):
video_details['DISLIKES'] = button.text.strip()
for span in link.findAll('span',attrs={'class': 'yt-subscription-button-subscriber-count-branded-horizontal yt-subscriber-count'}):
video_details['NUMBER_OF_SUBSCRIPTIONS'] = span.text.strip()
hashtags = []
for span in link.findAll('span',attrs={'class': 'standalone-collection-badge-renderer-text'}):
for a in link.findAll('a',attrs={'class': 'yt-uix-sessionlink'}):
hashtags.append(a.text.strip())
video_details['HASH_TAGS'] = hashtags
print(video_details)
with open('output_file.html', 'wb') as file:
file.write(html)
with open('data.json', 'w', encoding='utf8') as outfile:
json.dump(video_details, outfile, ensure_ascii=False,indent=4)
print ('----------Extraction of data is complete. Check json file.----------')
вывод:
{'LIKES': '11,114', 'CHANNEL_NAME': 'World Rugby', 'DISLIKES': '293', 'NUMBER_OF_SUBSCRIPTIONS': '614K', 'NUMBER_OF_VIEWS': '634,395 views', 'TITLE': 'HIGHLIGHTS: Japan v Ireland - Rugby World Cup 2019', 'HASH_TAGS': ['GB', '', 'Review', '#1 on Trending', '', 'World Rugby', 'Sign in', 'Sign in', 'Sign in', 'Sign in', 'https://youtube.com/user/worldrugby', 'https://youtube.com/user/worldrugby', 'http://www.rugbyworldcup.com', 'https://twitter.com/rugbyworldcup', 'https://www.facebook.com/rugbyworldcup', 'http://www.instagram.com/rugbyworldcup', 'http://giphy.com/worldrugby', 'https://www.tiktok.com/@rugbyworldcup...', 'https://www.snapchat.com/add/rugbywor...', 'Sports', 'Extended Highlights: New Zealand v South Africa\n \n\n - Duration: 8:51.\n \nWorld Rugby\n869,064 viewsNew', '8:51', "Schmidt and Best's post match press conference| Japan v Ireland\n \n\n - Duration: 12:00.\n \nWorld Rugby\n48,365 viewsNew", '12:00', 'Liverpool players react to their FIFA 20 ratings | Van Dijk with Salah, Mane, Firmino and more\n \n\n - Duration: 5:52.\n \nLiverpool FC\n2,178,177 viewsNew', '5:52', "35th America's Cup Race 7 NZL vs. USA | AMERICA'S CUP\n \n\n - Duration: 23:23.\n \nAmerica's Cup\n152,003 views", '23:23', "Guy's maiden voyage on his hydrofoil boat | Guy Martin Proper\n \n\n - Duration: 7:09.\n \nGuy Martin Proper\n66,941 viewsNew", '7:09', "Furious Boris Johnson humiliates Jeremy Corbyn, rages at Labour's Brexit LIES and gets long APPLAUSE\n \n\n - Duration: 7:32.\n \nProductiehuisEU\n394,890 viewsNew", '7:32', "KOREA vs. BRAZIL - Highlights | Women's Volleyball World Cup 2019\n \n\n - Duration: 8:49.\n \nVolleyball World\n145,837 viewsNew", '8:49', "Jonah Lomu's 15 unforgettable Rugby World Cup tries\n \n\n - Duration: 6:00.\n \nWorld Rugby\n995,979 views", '6:00', 'Extended Highlights: France v Argentina\n \n\n - Duration: 8:35.\n \nWorld Rugby\n347,394 viewsNew', '8:35', 'What Martin Johnson did just before the 2003 World Cup final || Rugby World Cup Memories - Neil Back\n \n\n - Duration: 8:58.\n \nRugbyPass Official\n95,379 views', '8:58', "Ireland's Shock reaction to Japan Loss\n \n\n - Duration: 12:04.\n \nRugbyPass Official\n6,045 viewsNew", '12:04', 'Bodybuilder Tries Rugby, Gets SMASHED\n \n\n - Duration: 15:17.\n \nJuji & Tom\n2,138,650 views', '15:17', 'EXTENDED HIGHLIGHTS | Matchday One: Japan vs Russia\n \n\n - Duration: 23:38.\n \nWorld Rugby\n338,672 viewsNew', '23:38', 'My Story: Ruaridh McConnochie\n \n\n - Duration: 7:24.\n \nEngland Rugby\n20,312 viewsNew', '7:24', 'Japan head coach speaks after historic victory over Ireland\n \n\n - Duration: 1:21.\n \nWorld Rugby\n52,472 viewsNew', '1:21', 'HIGHLIGHTS: Argentina v Tonga - Rugby World Cup 2019\n \n\n - Duration: 2:56.\n \nWorld Rugby\n195,221 viewsNew', '2:56', 'Extended Highlights: Russia v Samoa - Rugby World Cup 2019\n \n\n - Duration: 23:11.\n \nWorld Rugby\n222,043 viewsNew', '23:11', 'Argentina vs Tonga (28-12) | Rugby World Cup 2019 Highlights\n \n\n - Duration: 3:16.\n \nITV\n16,274 viewsNew', '3:16', "Guy competes with the British America's Cup team | Guy Martin Proper\n \n\n - Duration: 9:29.\n \nGuy Martin Proper\n40,810 viewsNew", '9:29', 'Irish Rugby TV: Ireland v New Zealand 2018 GUINNESS Series Highlights\n \n\n - Duration: 7:13.\n \nIrish Rugby TV\n777,015 views', '7:13', '', 'History']}
----------Extraction of data is complete. Check json file.----------