Поскольку вы упоминаете все страницы в комментарии выше, я буду использовать класс для хранения объекта XMLHTTP и снабдить его методами для извлечения данных, в то же время добавив метод для поиска количества страниц результатов и их циклического выполнения.Тестирование дало мне 251 ряд результатов.
Примечание: Обнаружено в результате отладки, что сохранение SetRequestHeader вызывало для вас запросы на проверку человеком.Удаление этого означало, что метод XMLHTTP работал.Он работал с и без меня.
Класс clsHTTP
Option Explicit
Private http As Object
Private Sub Class_Initialize()
Set http = CreateObject("MSXML2.XMLHTTP")
End Sub
Public Function GetString(ByVal url As String) As String
Dim sResponse As String
With http
.Open "GET", url, False
.setRequestHeader "If-Modified-Since", "Sat, 1 Jan 2000 00:00:00 GMT"
.send
sResponse = StrConv(.responseBody, vbUnicode)
GetString = sResponse
End With
End Function
Public Function GetInfo(ByVal html As HTMLDocument) As Variant
Dim names As Object, telNums As Object, i As Long, namesArray(), telsArray()
Set names = html.querySelectorAll("[class*='ldb-contact-name']")
Set telNums = html.querySelectorAll(".ldb-phone-number")
ReDim namesArray(0 To names.Length - 1)
ReDim telsArray(0 To telNums.Length - 1)
For i = 0 To names.Length - 1
namesArray(i) = names.item(i).innerText
telsArray(i) = telNums.item(i).innerText
Next
GetInfo = Array(namesArray, telsArray)
End Function
Стандартный модуль 1
Option Explicit
Public Sub GetReviewData()
Dim sResponse As String, html As HTMLDocument, http As clsHTTP
Dim numPages As Long, pageNum As Long, url As String
Dim results As Collection, item As Variant, ws As Worksheet
url = "https://www.zillow.com/detroit-mi/real-estate-agent-reviews/"
Set http = New clsHTTP
Set html = New HTMLDocument
Set results = New Collection
Set ws = ThisWorkbook.Worksheets("Sheet1")
With html
.body.innerHTML = http.GetString(url)
numPages = .querySelectorAll("[data-idx]").item(html.querySelectorAll("[data-idx]").Length - 2).innerText
results.Add http.GetInfo(html)
If numPages > 1 Then
For pageNum = 2 To numPages
url = "https://www.zillow.com/detroit-mi/real-estate-agent-reviews/?page=" & pageNum & "&showAdvancedItems=false®ionID=17762&locationText=Detroit%20MI"
.body.innerHTML = http.GetString(url)
results.Add http.GetInfo(html)
Next
End If
Dim numResults As Long
If results.Count > 0 Then
Application.ScreenUpdating = False
For Each item In results
numResults = UBound(item(0)) + 1
With ws
.Cells(GetLastRow(ws, 1) + 1, 1).Resize(numResults, 1) = Application.Transpose(item(0))
.Cells(GetLastRow(ws, 2) + 1, 2).Resize(numResults, 1) = Application.Transpose(item(1))
End With
Next
Application.ScreenUpdating = True
End If
End With
End Sub
Public Function GetLastRow(ByVal ws As Worksheet, Optional ByVal columnNumber As Long = 1) As Long
With ws
GetLastRow = .Cells(.Rows.Count, columnNumber).End(xlUp).Row
End With
End Function
Селен:
Option Explicit
Public Sub GetReviewData()
Dim html As HTMLDocument
Dim numPages As Long, pageNum As Long, url As String
Dim results As Collection, item As Variant, ws As Worksheet
Dim d As WebDriver, elements As WebElements
url = "https://www.zillow.com/detroit-mi/real-estate-agent-reviews/?page=1&showAdvancedItems=false®ionID=17762&locationText=Detroit%20MI"
Set html = New HTMLDocument
Set results = New Collection
Set ws = ThisWorkbook.Worksheets("Sheet1")
Set d = New ChromeDriver
With d
.Start "Chrome"
.get url
Set elements = .FindElementsByCss("[data-idx]")
numPages = elements(elements.Count - 1).Text
html.body.innerHTML = .PageSource
results.Add GetInfo(html)
If numPages > 1 Then
For pageNum = 2 To numPages
Application.Wait Now + TimeSerial(0, 0, 2)
url = "https://www.zillow.com/detroit-mi/real-estate-agent-reviews/?page=" & pageNum & "&showAdvancedItems=false®ionID=17762&locationText=Detroit%20MI"
.get url
html.body.innerHTML = .PageSource
results.Add GetInfo(html)
Next
End If
Dim numResults As Long
If results.Count > 0 Then
Application.ScreenUpdating = False
For Each item In results
numResults = UBound(item(0)) + 1
With ws
.Cells(GetLastRow(ws, 1) + 1, 1).Resize(numResults, 1) = Application.Transpose(item(0))
.Cells(GetLastRow(ws, 2) + 1, 2).Resize(numResults, 1) = Application.Transpose(item(1))
End With
Next
Application.ScreenUpdating = True
End If
End With
End Sub
Public Function GetLastRow(ByVal ws As Worksheet, Optional ByVal columnNumber As Long = 1) As Long
With ws
GetLastRow = .Cells(.Rows.Count, columnNumber).End(xlUp).Row
End With
End Function
Public Function GetInfo(ByVal html As HTMLDocument) As Variant
Dim names As Object, telNums As Object, i As Long, namesArray(), telsArray()
Set names = html.querySelectorAll("[class*='ldb-contact-name']")
Set telNums = html.querySelectorAll(".ldb-phone-number")
ReDim namesArray(0 To names.Length - 1)
ReDim telsArray(0 To telNums.Length - 1)
For i = 0 To names.Length - 1
namesArray(i) = names.item(i).innerText
telsArray(i) = telNums.item(i).innerText
Next
GetInfo = Array(namesArray, telsArray)
End Function