Содержимое динамически загружается из тега script
. Вы можете вывести соответствующую строку и проанализировать с помощью jsonlite
, а затем извлечь в интересующие элементы данных, например, purrr
R:
library(rvest)
library(stringr)
library(jsonlite)
library(purrr)
p <- read_html('https://www.immobilienscout24.de/Suche/S-2/Wohnung-Miete/Rheinland-Pfalz/Koblenz') %>% html_text()
data <- jsonlite::parse_json(str_match_all(p,'resultListModel: (.*\\})')[[1]][,2])
results <- data$searchResponseModel$resultlist.resultlist$resultlistEntries[[1]]$resultlistEntry
df <- map_df(results, function(item) {
data.frame(property = item$resultlist.realEstate$address$description$text,
datePublished = item$`@publishDate`,
stringsAsFactors=FALSE)
})
View(df)
![enter image description here](https://i.stack.imgur.com/ZIZEK.png)
Py:
from bs4 import BeautifulSoup as bs
import requests, re, json
import pandas as pd
r = requests.get('https://www.immobilienscout24.de/Suche/S-2/Wohnung-Miete/Rheinland-Pfalz/Koblenz')
p = re.compile(r'resultListModel: (.*})')
data = json.loads(p.findall(r.text)[0])
info = [
(entry['resultlist.realEstate']['address']['description']['text']
,entry['@publishDate'])
for entry in data['searchResponseModel']['resultlist.resultlist']['resultlistEntries'][0]['resultlistEntry']
]
print(pd.DataFrame(info,columns = ['Property','PublishedDate']))