Веб-утилизация с использованием JSON - PullRequest
0 голосов
/ 20 июня 2019

Я разработал код для удаления имени, адреса, веб-сайта, контактов со страниц в таблицу Excel.Так как я новичок в JSON & Web scrapping, поэтому не смог уладить эту проблему;ссылка на первую страницу - https://www.yelp.com/search?cflt=hvac&find_loc=San+Francisco%2C+CA;, но этот код не работает должным образом.Более того, я использовал тот же код для других сетей, и тогда он работал нормально.Вот мой код

Option Explicit

Public Sub FetchTabularInfo()
    Dim Http As XMLHTTP60, Html As HTMLDocument, col As Variant, csrf As Variant, i&, page As Long
    Dim headers(), ws As Worksheet, iCol As Collection

    Set ws = ThisWorkbook.Worksheets("Sheet1")
    headers = Array("SrNo", "Name", "Address", "Mobile", "Email")
    Set Http = New XMLHTTP60
    Set Html = New HTMLDocument

    ws.Cells(1, 1).Resize(1, UBound(headers) + 1) = headers

    For page = 1 To 73 'To cover all pages

        With Http
            .Open "GET", "https://www.yelp.com/search?cflt=hvac&find_loc=San+Francisco%2C+CA" & CStr(page), Falsev 'Last letter of URL is page number whose range will be given in outerloop
            .send
            Html.body.innerHTML = .responseText
        End With
        Set iCol = New Collection
        With Html.querySelectorAll(".table tr a[onclick^='show_ngo_info']")
            For i = 0 To .Length - 1
                iCol.Add Split(Split(.Item(i).getAttribute("onclick"), "(""")(1), """)")(0)
            Next i
        End With

        Dim r As Long, results()
        ReDim results(1 To iCol.Count, 1 To UBound(headers) + 1)
        r = 0
        For Each col In iCol
            r = r + 1
            With Http
                .Open "GET", "https://www.yelp.com/index.php/ajaxcontroller/get_csrf", False
                .send
                csrf = .responseText
            End With

            csrf = Split(Replace(Split(csrf, ":")(1), """", ""), "}")(0)

            Dim json As Object
            With Http
                .Open "POST", "https://www.yelp.com/index.php/ajaxcontroller/show_ngo_info", False
                .setRequestHeader "X-Requested-With", "XMLHttpRequest"
                .setRequestHeader "Content-Type", "application/x-www-form-urlencoded; charset=UTF-8"
                .send "id=" & col & "&csrf_test_name=" & csrf
                Set json = JsonConverter.ParseJson(.responseText)

                Dim orgName As String, address As String, srNo As Long, city As String
                Dim state As String, tel As String, mobile As String, website As String, email As String

                On Error Resume Next
                orgName = json("registeration_info")(1)("nr_orgName")
                address = json("registeration_info")(1)("nr_add")

                srNo = r                         '<unsure where this is coming from.

                mobile = json("infor")("0")("Mobile")

                email = json("infor")("0")("Email")
                On Error GoTo 0

                Dim arr()
                arr = Array(srNo, orgName, address, tel, email)
                For i = LBound(headers) To UBound(headers)
                    results(r, i + 1) = arr(i)
                Next
            End With
        Next col
        Set iCol = Nothing: Set json = Nothing
        ws.Cells(GetLastRow(ws) + 1, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
    Next
End Sub

Public Function GetLastRow(ByVal sh As Worksheet) As Long
    On Error Resume Next
    GetLastRow = sh.Cells.Find(What:="*", _
                               After:=sh.Range("A1"), _
                               Lookat:=xlPart, _
                               LookIn:=xlFormulas, _
                               SearchOrder:=xlByRows, _
                               SearchDirection:=xlPrevious, _
                               MatchCase:=False).Row
    On Error GoTo 0
End Function
Добро пожаловать на сайт PullRequest, где вы можете задавать вопросы и получать ответы от других членов сообщества.
...