Почему мой код VBA не извлекает информацию из HTMLDoc сайта? - PullRequest
0 голосов
/ 03 мая 2018

Я не уверен, почему мой код не работает (возвращая названия компаний, номера телефонов и контактные номера из HTMLDoc веб-сайта, с которого я пытаюсь получить информацию. Можете ли вы помочь определить, что я делаю неправильно ( скорее всего, с типами данных IHTMLElement и IHTMLElementCollection и / или доступом к HTML через getElementsByTagName, getElementsByClassName и т. д.). Спасибо !!

Option Explicit

Sub FinalMantaSub()
Dim IE As New SHDocVw.InternetExplorer
Dim HTMLDoc As MSHTML.HTMLDocument

IE.Visible = False
IE.navigate "https://www.manta.com/search?search_source=business&search=general+hospitals&search_location=Dallas+TX&pt=32.7825%2C-96.8207"

Do While IE.READYSTATE <> READYSTATE_COMPLETE
DoEvents
Loop

Set HTMLDoc = IE.document

Range("A3").Value = "Name"
Range("B3").Value = "Address"
Range("C3").Value = "Phone"

'variables to output on excel sheet
Dim BusinessNameFinal As String
Dim BusinessAddressFinal As String
Dim BusinessPhoneFinal As String

'variables used to create final BusinessAddress variable
Dim streetAddress As IHTMLElement
Dim addressLocality As IHTMLElement
Dim addressRegion As IHTMLElement
Dim postalCode As IHTMLElement

Dim itemprop As String
Dim itemprop2 As String

Dim BusinessNameCollection As IHTMLElementCollection
Dim BusinessName As IHTMLElement
Dim BusinessAddressCollection As IHTMLElementCollection
Dim BusinessAddress As IHTMLElement
Dim BusinessPhoneCollection As IHTMLElementCollection
Dim BusinessPhone As IHTMLElement

Dim RowNumber As Long

'get ready for business name looping
RowNumber = 4
Set BusinessName = HTMLDoc.getElementsByClassName("media-heading text-primary h4")(0).getElementsByTagName("strong").innerText
Set BusinessNameCollection = BusinessName.all

    'loop for business names
    For Each BusinessName In BusinessNameCollection
        Cells(RowNumber, 1).Value = BusinessName
        RowNumber = RowNumber + 1
    Next BusinessName

'get ready for business address looping
RowNumber = 4
itemprop = HTMLDoc.getElementsByClassName("mvm mhn").getElementsByTagName("span").getAttribute("itemprop")
    If itemprop = "streetAddress" Then
        Set streetAddress = HTMLDoc.getElementsByClassName("mvm mhn").getElementsByTagName("span").innerText
    ElseIf itemprop = "addressLocality" Then
        Set addressLocality = HTMLDoc.getElementsByTagName("span").innerText
    ElseIf itemprop = "addressRegion" Then
        Set addressRegion = HTMLDoc.getElementsByTagName("span").innerText
    ElseIf itemprop = "postalCode" Then
        Set postalCode = HTMLDoc.getElementsByTagName("span").innerText
    End If
Set BusinessAddress = streetAddress & addressLocality & addressRegion & postalCode
Set BusinessAddressCollection = BusinessAddress.all

    'loop for business addresses
    For Each BusinessAddress In BusinessAddressCollection
        BusinessAddress = streetAddress & vbNewLine & addressLocality & ", " & addressRegion & " " & postalCode
        Cells(RowNumber, 2).Value = BusinessAddress
        RowNumber = RowNumber + 1
    Next BusinessAddress

'get ready for business phone looping
RowNumber = 4
itemprop2 = HTMLDoc.getElementsByClassName("hidden-device-xs")(0).getAttribute("itemprop")
    If itemprop2 = "telephone" Then
        BusinessPhone = HTMLDoc.getElementsByClassName("hidden-device-xs")(0).getElementsByTagName("strong").innerText
    End If
Set BusinessPhone = HTMLDoc.getElementsByClassName("hidden-device-xs")(0).getElementsByTagName("strong").innerText
Set BusinessPhoneCollection = BusinessPhone.all

    'loop for business phones
    For Each BusinessPhone In BusinessPhoneCollection
        Cells(RowNumber, 3).Value = BusinessPhone
        RowNumber = RowNumber + 1
    Next BusinessPhone

Range("A1").Activate
Set HTMLDoc = Nothing

 'do some final formatting
 Range("A3").CurrentRegion.WrapText = False
 Range("A3").CurrentRegion.EntireColumn.AutoFit
 Range("A1:C1").EntireColumn.HorizontalAlignment = xlCenter
 Range("A1:D1").Merge
 Range("A1").Value = "Manta.com Business Contacts"
 Range("A1").Font.Bold = True
 Application.StatusBar = ""
 MsgBox "Done!"

 End Sub

1 Ответ

0 голосов
/ 03 мая 2018

Это извлекает информацию. Вы не зациклили все страницы результатов в своем коде или упомянули об этом, поэтому я настроил это, чтобы показать вам, как сделать первую страницу результатов. Дайте мне знать, как это происходит.

Код:

Option Explicit

Public Sub FinalMantaSub()     '<== Can't have ad blocker enabled for this site

    Dim IE As New SHDocVw.InternetExplorer
    Dim HTMLDoc As MSHTML.HTMLDocument

    IE.Visible = True
    IE.navigate "https://www.manta.com/search?search_source=business&search=general+hospitals&search_location=Dallas+TX&pt=32.7825%2C-96.8207"

    Do While IE.readyState <> READYSTATE_COMPLETE
        DoEvents
    Loop

    Set HTMLDoc = IE.document

    Dim c As Object, i As Long

    Set c = HTMLDoc.querySelectorAll("div.media-body")

    Do While Not c(i) Is Nothing
        Debug.Print "Result #" & i + 1
        Debug.Print vbNewLine
        Debug.Print "Name: " & c(i).querySelector("[itemprop=""name""]").innerText
        Debug.Print "Address: " & c(i).querySelector("[itemprop=""address""]").innerText
        Debug.Print "Phone: " & c(i).querySelector("[itemprop=""telephone""]").innerText
        Debug.Print String$(20, Chr$(61))
        i = i + 1
    Loop
    IE.Quit
End Sub

Снимок вывода:

Snapshot

Обновление:

Существует огромное количество результатов, но вы можете создать внешний цикл следующим образом. Вы могли бы затем превратить вышеупомянутое в подпрограмму, которая называется.

    Dim arr() As String, pageNo As Long
    arr = Split(HTMLDoc.querySelector(".pagination.pagination-md.mll a").href, "&pt")
    pageNo = 1

    Do While Err.Number = 0

        On Error GoTo Errhand:

        Dim url As String
        url = Split(arr(0), "&")(0) & "&pg=" & pageNo & "&pt" & arr(1)
        Debug.Print url
        IE.navigate url
        Do While IE.readyState <> READYSTATE_COMPLETE
            DoEvents
        Loop
        pageNo = pageNo + 1
    Loop

Errhand:
    Debug.Print "Stopped after " & pageNo & " pages."
...