Я попытался 3 разными способами:
import numpy as np
import pandas as pd
from bs4 import BeautifulSoup as soup
from selenium import webdriver
driver = webdriver.Chrome(executable_path='C:/Users/lemonade/Documents/work/chromedriver')
my_url = "https://www.carehome.co.uk/"
def make_soup(url):
driver.get(url)
m_soup = soup(driver.page_source, features='html.parser')
return m_soup
main_page = make_soup(my_url)
boroughs = [borough.text.strip() for borough in main_page.select('.seo_links.seo_links_country [href]')]
indexs = list(range(16,19))
london_list = [boroughs[i] for i in indexs]
boroughs1 = [bo.replace("Borough","") for bo in london_list]
boroughs2 = [b1.replace("&","and") for b1 in boroughs1]
boroughs3 = ['-'.join(b2.split()) for b2 in boroughs2]
borough_links = ["https://www.carehome.co.uk/care_search_results.cfm/searchunitary/" + b3 for b3 in boroughs3]
borough_soup = [make_soup(b_link) for b_link in borough_links]
for soups in borough_soup:
titles = [title.text.strip() for title in soups.select('.home-name [href]')]
return(titles)
for soups in borough_soup:
addresses = [address.text.strip() for address in soups.select('.home-name>p.grey')]
return(addresses)
df = pd.DataFrame(zip(titles, addresses), columns = ['title','address'])
print(df)
Вместо этого я попробовал приведенный ниже код: Это дало | AttributeError: у объекта 'list' нет атрибута 'text' |
title_html = [soups.select('.home-name [href]') for soups in borough_soup]
titles = [title.text.strip() for title in title_html ]
addresses_html =[soups.select('.home-name>p.grey') for soups in borough_soup]
addresses = [address.text.strip() for address in addresses_html]
Я пытался создать и добавить список и вернуть этот список. [См. Ниже] Это просто выводит один элемент из списка.
def func(borough_soup):
for soups in borough_soup:
titles = [title_html.text.strip() for title_html in soups.select('.home-name [href]')]
for title in titles:
titles1 = []
titles1.append(title)
return(titles1)
Любая помощь будет высоко ценится!