VBA-JSON для импорта данных со всех страниц в одной таблице - PullRequest
0 голосов
/ 14 июня 2019

У меня есть данные из интернета, но моя программа работала для одной страницы, теперь, когда я создал внешний цикл, чтобы программа получала одинаковый набор данных со всех страниц ссылки, что выдает ошибку, т.е.с без с.Поскольку я новичок в vba, любая экспертная помощь требовала решения этой проблемы.Вот мой код:

Option Explicit

Public Sub FetchTabularInfo()
    Dim Http As New XMLHTTP60, Html As New HTMLDocument
    Dim col As Variant, icol As New Collection
    Dim csrf As Variant, i&
    Dim s As String, re As Object, p As String, page As Long, rx As String 'Variable Definations
    Const START_PAGE As Long = 1
    Const END_PAGE As Long = 4
    Const RESULTS_PER_PAGE As Long = 40

p = "\[{""@context"".*?\]"
    Set re = CreateObject("VBScript.RegExp")
    Application.ScreenUpdating = False
    With CreateObject("MSXML2.XMLHTTP")
        For page = START_PAGE To END_PAGE

    With Http
        .Open "GET", "https://ngodarpan.gov.in/index.php/home/statewise_ngo/2620/10/1", False
        .send
        Html.body.innerHTML = .responseText
    End With

    With Html.querySelectorAll(".table tr a[onclick^='show_ngo_info']")
        For i = 0 To .Length - 1
            icol.Add Split(Split(.Item(i).getAttribute("onclick"), "(""")(1), """)")(0)
        Next i
    End With

    Dim r As Long, headers(), results(), ws As Worksheet

    Set ws = ThisWorkbook.Worksheets("Sheet1")
    headers = Array("SrNo", "Name of VGO/NGO", "Address", "City", "State", "Tel", "Mobile", "Web", "Email")
    ReDim results(1 To icol.Count, 1 To UBound(headers) + 1)

    For Each col In icol
        r = r + 1
        With Http
            .Open "GET", "https://ngodarpan.gov.in/index.php/ajaxcontroller/get_csrf", False
            .send
            csrf = .responseText
        End With

        csrf = Split(Replace(Split(csrf, ":")(1), """", ""), "}")(0)

        Dim json As Object
        With Http
            .Open "POST", "https://ngodarpan.gov.in/index.php/ajaxcontroller/show_ngo_info", False
            .setRequestHeader "X-Requested-With", "XMLHttpRequest"
            .setRequestHeader "Content-Type", "application/x-www-form-urlencoded; charset=UTF-8"
            .send "id=" & col & "&csrf_test_name=" & csrf
            Set json = JsonConverter.ParseJson(.responseText)

            Dim orgName As String, address As String, so As Long, city As String
            Dim state As String, tel As String, mobile As String, website As String, email As String

            On Error Resume Next
            orgName = json("registeration_info")(1)("nr_orgName")
            address = json("registeration_info")(1)("nr_add")
            city = json("registeration_info")(1)("nr_city")
            srNo = r '<unsure where this is coming from.
            state = Replace$(json("registeration_info")(1)("StateName"), "amp;", vbNullString)
            tel = IIf(IsNull(json("infor")("0")("Off_phone1")), vbNullString, json("infor")("0")("Off_phone1")) '<unsure where this is coming from. Need a csrf to test with
            mobile = json("infor")("0")("Mobile")
            website = json("infor")("0")("ngo_url")
            email = json("infor")("0")("Email")
            On Error GoTo 0

            Dim arr()
            arr = Array(srNo, orgName, address, city, state, tel, mobile, website, email)
            For i = LBound(headers) To UBound(headers)
               results(r, i + 1) = arr(i)
            Next
        End With
    Next col
    With ws
        .Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
        .Cells(2, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
    End With

End With

End Sub

1 Ответ

2 голосов
/ 15 июня 2019

Вам нужен внешний цикл над страницами и объединить номер страницы в URL.

r необходимо сбросить на 0 в начале каждой новой страницы.

Вам нужно находить последнюю использованную строку независимо от столбца каждый раз, когда вы записываете массив текущей страницы (или измеряете один огромный массив в начале и заполняете его, а затем записываете только один раз на лист).

Удалить автоинстанцию.

Я вижу, как выглядит дублированная информация в выводе, так что стоит поискать источник данных по этому поводу.

Option Explicit

Public Sub FetchTabularInfo()
    Dim Http As XMLHTTP60, Html As HTMLDocument, col As Variant, csrf As Variant, i&, page As Long
    Dim headers(), ws As Worksheet, iCol As Collection

    Set ws = ThisWorkbook.Worksheets("Sheet1")
    headers = Array("SrNo", "Name of VGO/NGO", "Address", "City", "State", "Tel", "Mobile", "Web", "Email")
    Set Http = New XMLHTTP60
    Set Html = New HTMLDocument

    ws.Cells(1, 1).Resize(1, UBound(headers) + 1) = headers

    For page = 1 To 4

        With Http
            .Open "GET", "https://ngodarpan.gov.in/index.php/home/statewise_ngo/76/35/" & CStr(page), False
            .send
            Html.body.innerHTML = .responseText
        End With
        Set iCol = New Collection
        With Html.querySelectorAll(".table tr a[onclick^='show_ngo_info']")
            For i = 0 To .Length - 1
                iCol.Add Split(Split(.item(i).getAttribute("onclick"), "(""")(1), """)")(0)
            Next i
        End With

        Dim r As Long, results()
        ReDim results(1 To iCol.Count, 1 To UBound(headers) + 1)
        r = 0
        For Each col In iCol
            r = r + 1
            With Http
                .Open "GET", "https://ngodarpan.gov.in/index.php/ajaxcontroller/get_csrf", False
                .send
                csrf = .responseText
            End With

            csrf = Split(Replace(Split(csrf, ":")(1), """", ""), "}")(0)

            Dim json As Object
            With Http
                .Open "POST", "https://ngodarpan.gov.in/index.php/ajaxcontroller/show_ngo_info", False
                .setRequestHeader "X-Requested-With", "XMLHttpRequest"
                .setRequestHeader "Content-Type", "application/x-www-form-urlencoded; charset=UTF-8"
                .send "id=" & col & "&csrf_test_name=" & csrf
                Set json = JsonConverter.ParseJson(.responseText)

                Dim orgName As String, address As String, srNo As Long, city As String
                Dim state As String, tel As String, mobile As String, website As String, email As String

                On Error Resume Next
                orgName = json("registeration_info")(1)("nr_orgName")
                address = json("registeration_info")(1)("nr_add")
                city = json("registeration_info")(1)("nr_city")
                srNo = r                         '<unsure where this is coming from.
                state = Replace$(json("registeration_info")(1)("StateName"), "amp;", vbNullString)
                tel = IIf(IsNull(json("infor")("0")("Off_phone1")), vbNullString, json("infor")("0")("Off_phone1")) '<unsure where this is coming from. Need a csrf to test with
                mobile = json("infor")("0")("Mobile")
                website = json("infor")("0")("ngo_url")
                email = json("infor")("0")("Email")
                On Error GoTo 0

                Dim arr()
                arr = Array(srNo, orgName, address, city, state, tel, mobile, website, email)
                For i = LBound(headers) To UBound(headers)
                    results(r, i + 1) = arr(i)
                Next
            End With
        Next col
        Set iCol = Nothing: Set json = Nothing
        ws.Cells(GetLastRow(ws) + 1, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
    Next
End Sub

Public Function GetLastRow(ByVal sh As Worksheet) As Long
    On Error Resume Next
    GetLastRow = sh.Cells.Find(What:="*", _
                               After:=sh.Range("A1"), _
                               Lookat:=xlPart, _
                               LookIn:=xlFormulas, _
                               SearchOrder:=xlByRows, _
                               SearchDirection:=xlPrevious, _
                               MatchCase:=False).Row
    On Error GoTo 0
End Function
Добро пожаловать на сайт PullRequest, где вы можете задавать вопросы и получать ответы от других членов сообщества.
...