Привет и добро пожаловать в stackoverflow.
Ваша ошибка заключалась в том, что вы не пытались получить определенные элементы с помощью
details.find_all
Поэтому, пожалуйста, проверьте мой код для справки и изучите документацию BeautfiulSoup для ваш прогресс.
import requests
from bs4 import BeautifulSoup
r = requests.get("https://www.legco.gov.hk/general/chinese/members/yr16-20/biographies.htm")
soup = BeautifulSoup(r.text,'html.parser')
for anchor in soup.find_all('a'):
href = anchor.get('href', '/')
if href.startswith("lky"):
href = "https://www.legco.gov.hk/general/english/members/yr16-20/" + href
print(href)
r2 = requests.get(href)
#get name
detail_soup = BeautifulSoup(r2.text,'html.parser')
name = detail_soup.find("h2")
name = name.text.strip()
print("Name:", name, "\n")
#find headers
strong = detail_soup.find_all('strong')
heads = []
for i in strong:
heads.append(i.text.strip())
print('Headers:\n',heads)
#find header childs
data_details = []
details = detail_soup.find_all('li')
for i in details:
data_details.append(i.text.strip())
print('\nDetails:\n', data_details)
#table parsing
div = detail_soup.find('div',{'id':'container'})
table = div.find('table')
data = []
rows = table.find_all('tr')
for row in rows:
cols = row.find_all('td')
cols = [ele.text.strip() for ele in cols]
data.append([ele for ele in cols if ele])
print("\nTable data:\n",data)
Вывод:
https://www.legco.gov.hk/general/english/members/yr16-20/lky.htm
Name: Hon Andrew LEUNG Kwan-yuen, GBS, JP
Headers:
['Constituency :',
'Education and professional qualifications :',
'Occupation :', 'Political affiliation :', 'Office address', ':', 'Office telephone', ':', 'Office fax', ':', 'E-mail', ':', 'Website', ':']
Details:
['Functional Constituency - Industrial (First)',
'BSc (Hon), Leeds University', 'Fellow, Textiles Institute',
'Fellow, Clothing and Footwear Institute',
'Honorary Doctor of Business Administration,
Coventry University, UK', 'Merchant', 'Business and Professionals Alliance for Hong Kong']
Table data:
[['Office address', ':', 'Room 710,
Legislative Council Complex,\r\n1 Legislative Council Road,
Central, Hong Kong'], ['Office telephone', ':', '2537 1339'],
['Office fax', ':', '2697 8482'],
['E-mail', ':', 'andrewleunglegco@outlook.com'],
['Website', ':', '-']]