Я изменил ваш существующий скрипт для обхода нескольких страниц, который теперь может извлекать оттуда name
, Date of Incorporation
, email
и address
каждого контейнера. Обязательно создайте лист с именем DataContainer
перед выполнением приведенного ниже сценария.
Sub GetInfo()
Const prefix$ = "https://www.zaubacorp.com/company-list/nic-300/p-"
Const suffix$ = "-company.html"
Dim Html As New HTMLDocument, Htmldoc As New HTMLDocument
Dim newHtml As New HTMLDocument, newUrl$, elem As Object, oDate As Object, R&, I&
Dim Wb As Workbook, ws As Worksheet, adr As Object, P&, pageNum&
Set Wb = ThisWorkbook
Set ws = Wb.Worksheets("DataContainer") '----------->create a sheet and name it `DataContainer` in order for the script to write the results in there
For pageNum = 1 To 2 '---------------------------------> this is where you put the highest number the script will traverse
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", prefix & pageNum & suffix, False
.send
Html.body.innerHTML = .responseText
End With
With Html.querySelectorAll("#table tbody tr")
For I = 0 To .Length - 1
Htmldoc.body.innerHTML = .item(I).outerHTML
newUrl = Htmldoc.querySelector("a[href]").getAttribute("href")
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", newUrl, False
.send
newHtml.body.innerHTML = .responseText
End With
R = R + 1: ws.Cells(R, 1) = newHtml.querySelector(".container > h1").innerText
For Each oDate In newHtml.getElementsByTagName("p")
If InStr(oDate.innerText, "Date of Incorporation") > 0 Then
ws.Cells(R, 2) = oDate.ParentNode.NextSibling.innerText
Exit For
End If
Next oDate
For Each elem In newHtml.getElementsByTagName("b")
If InStr(elem.innerText, "Email ID:") > 0 Then
ws.Cells(R, 3) = elem.ParentNode.innerText
Exit For
End If
Next elem
For Each adr In newHtml.getElementsByTagName("b")
If InStr(adr.innerText, "Address:") > 0 Then
ws.Cells(R, 4) = adr.ParentNode.NextSibling.innerText
Exit For
End If
Next adr
Next I
End With
Next pageNum
End Sub