Веб-парсинг VBA становится все медленнее и медленнее, нужно любое решение - PullRequest
0 голосов
/ 18 июня 2020

У меня неизвестная проблема ... вот мой код VBA ... код работает отлично, как я хочу ... проблема в том, что каждый следующий день код VBA становится медленнее. в первый день извлечение данных из URL было намного быстрее ... примерно, я запускаю этот код для 60000 URL ... теперь он становится медленнее почти на 1/3 скорости вниз по градиенту. пожалуйста, если возможна какая-либо помощь

        Public Sub GetTelNumber()

        On Error Resume Next

        Dim sResponse As String, html As HTMLDocument
        Dim url As String
        Dim N As Long
        Dim XY As Long
        Dim re As Object
        Dim Str, SG As String



        For Rept = 1 To 1000
        Set re = CreateObject("vbscript.regexp")
        XY = 50
        G = ActiveCell.Row
        If IsEmpty(ActiveCell) Then End





        For N = ActiveCell.Row To ActiveCell.Row + XY


                url = Sheets("ExtData").Range("A" & N).Value
                With CreateObject("MSXML2.XMLHTTP")
                    .Open "GET", url, False
                    .setRequestHeader "If-Modified-Since", "Sat, 1 Jan 2000 00:00:00 GMT"
                    .Send
                    sResponse = StrConv(.responseBody, vbUnicode)
                    S = .responseText
                End With

                Dim cipherKey As String, cipherDict As Object
                Set cipherDict = CreateObject("Scripting.Dictionary")
                cipherKey = Split(Split(sResponse, "smoothing:grayscale}.icon-")(1), ".mobilesv")(0)
                cipherKey = Replace$(cipherKey, ":before{content:""\9d", Chr$(32))

                Dim arr() As String, tempArr() As String, i As Long, j As Long
                arr = Split(cipherKey, """}.icon-")
                For i = LBound(arr) To UBound(arr)
                    tempArr = Split(arr(i), Chr$(32))
                    cipherDict(tempArr(0)) = i 'needs adjustment
                Next

                Set re = CreateObject("vbscript.regexp")
                Set html = New HTMLDocument
                Dim storesTextToDecipher As Object

                With html
                    .body.innerHTML = sResponse

                        Set storesTextToDecipher = .querySelectorAll(".telnowpr")

                        ActiveCell.Offset(0, 1) = UCase(Mid(html.querySelector(".e_prop").href, InStrRev(html.querySelector(".e_prop").href, "-") + 1, 100))
                        ActiveCell.Offset(0, 2) = html.querySelector(".fn").innerText
                        ActiveCell.Offset(0, 3) = Split(html.querySelectorAll("#brd_cm_srch")(0).innerText, " in")(0)

                        ActiveCell.Offset(0, 4) = html.querySelectorAll(".lng_add")(2).innerText
                        If ActiveCell.Offset(0, 4) = "" Then
                        ActiveCell.Offset(0, 4) = html.querySelectorAll(".lng_add")(1).innerText
                        ElseIf ActiveCell.Offset(0, 4) = "" Then
                        ActiveCell.Offset(0, 4) = html.querySelectorAll(".lng_add")(0).innerText
                        End If

                        ActiveCell.Offset(0, 6) = WorksheetFunction.Proper(Trim$(Replace$(GetString(re, S, "addressRegion"":(.*"")"), Chr$(34), vbNullString)))
                        ActiveCell.Offset(0, 7) = Trim$(Replace$(GetString(re, S, "postalCode"":(.*"")"), Chr$(34), vbNullString))

                        SG = Split(ActiveCell.Offset(0, 4), ActiveCell.Offset(0, 7))(0)
                        SG = Mid(SG, 1, InStrRev(SG, ",") - 1)
                        ActiveCell.Offset(0, 5) = Trim(Mid(SG, InStrRev(SG, ",") + 1, 100))


                        ActiveCell.Offset(0, 8) = Application.WorksheetFunction.VLookup(ActiveCell.Offset(0, 7), Sheets("India PinCode").Columns("A:F"), 3, 0)
                        ActiveCell.Offset(0, 9) = Application.WorksheetFunction.VLookup(ActiveCell.Offset(0, 7), Sheets("India PinCode").Columns("A:F"), 4, 0)
                        'ActiveCell.Offset(0, 10) = GetWANO(ActiveCell, ActiveCell.Offset(0, 1)) 'if wa no need together
                        ActiveCell.Offset(0, 10) = GetWALink(ActiveCell, ActiveCell.Offset(0, 1))
                        ActiveCell.Offset(0, 11) = Trim$(Replace$(GetString(re, S, "url"":(.*)BZDET"), Chr$(34), vbNullString)) & "BZDET"
                        ActiveCell.Offset(0, 13) = GetStoreNumber(storesTextToDecipher.Item(1), cipherDict)

                        If ActiveCell.Offset(0, 13) = "" Then
                           ActiveCell.Offset(0, 13) = "No"
                        ElseIf StringCount(ActiveCell.Offset(0, 13), "X") * 14 <> Len(ActiveCell.Offset(0, 13)) Then
                           ActiveCell.Offset(0, 13) = "Missing:"
                        End If

                        ActiveCell.Offset(0, 14) = html.querySelectorAll(".whatsapptxt")(0).innerText


                Range("A" & N + 1).Select
                Range("P1") = N - G
                End With



        Next N

                Dim LastRow As Long
                Dim sht As Worksheet

                Set sht = Sheets("FDATA")
                LastRow = sht.Cells(sht.Rows.Count, "A").End(xlUp).Row

                Range("A2:O" & XY + 2).Copy
                sht.Range("A" & LastRow + 1).PasteSpecial Paste:=xlPasteValues, Operation:=xlNone, SkipBlanks _
                    :=False, Transpose:=False
                Application.CutCopyMode = False
                Range("A2:O" & XY + 2).Delete Shift:=xlUp



                Range("A2").Select
                ActiveWorkbook.Save



        Next Rept
    End Sub

Обновите код согласно предложению

    Public Sub GetTelNumber()
On Error Resume Next

    Dim html As HTMLDocument
    Dim N As Long
    Dim re, Cr, cipherDict As Object
    Dim sResponse, cipherKey, Str, SG As String
    Dim myArr, RsltArr(14) As Variant

    Set re = CreateObject("vbscript.regexp")
    Set Cr = CreateObject("MSXML2.XMLHTTP")
    Set cipherDict = CreateObject("Scripting.Dictionary")
    Set html = New HTMLDocument

  For Rept = 1 To 50

    myArr = Application.Transpose(Range("A2:A52"))

    Call Clear_Browser
    Application.Wait (Now + TimeValue("0:00:05"))

    'Call UpdateStop

    For N = 1 To 50

            With Cr
                .Open "GET", myArr(N), False
                .setRequestHeader "If-Modified-Since", "Sat, 1 Jan 2000 00:00:00 GMT"
                .Send
                sResponse = StrConv(.responseBody, vbUnicode)
                S = .responseText
            End With

            cipherKey = Split(Split(sResponse, "smoothing:grayscale}.icon-")(1), ".mobilesv")(0)
            cipherKey = Replace$(cipherKey, ":before{content:""\9d", Chr$(32))

            Dim arr() As String, tempArr() As String, i As Long, j As Long
            arr = Split(cipherKey, """}.icon-")

            For i = LBound(arr) To UBound(arr)
                tempArr = Split(arr(i), Chr$(32))
                cipherDict(tempArr(0)) = i 'needs adjustment
            Next

            Dim storesTextToDecipher As Object

            With html
                .body.innerHTML = sResponse
                    Set storesTextToDecipher = .querySelectorAll(".telnowpr")

                    RsltArr(0) = UCase(Mid(html.querySelector(".e_prop").href, InStrRev(html.querySelector(".e_prop").href, "-") + 1, 100))
                    RsltArr(1) = html.querySelector(".fn").innerText
                    RsltArr(2) = Split(html.querySelectorAll("#brd_cm_srch")(0).innerText, " in")(0)

                    RsltArr(3) = html.querySelectorAll(".lng_add")(2).innerText

                    If RsltArr(3) = vbNullString Then
                    RsltArr(3) = html.querySelectorAll(".lng_add")(1).innerText
                    ElseIf RsltArr(3) = vbNullString Then
                    RsltArr(3) = html.querySelectorAll(".lng_add")(0).innerText
                    End If

                    RsltArr(5) = WorksheetFunction.Proper(Trim$(Replace$(GetString(re, S, "addressRegion"":(.*"")"), Chr$(34), vbNullString)))
                    RsltArr(6) = Trim$(Replace$(GetString(re, S, "postalCode"":(.*"")"), Chr$(34), vbNullString))

                    SG = Split(RsltArr(3), RsltArr(6))(0)
                    SG = Mid(SG, 1, InStrRev(SG, ",") - 1)
                    RsltArr(4) = Trim(Mid(SG, InStrRev(SG, ",") + 1, 100))
                    RsltArr(7) = Application.VLookup(Val(RsltArr(6)), Sheets("India PinCode").Columns("A:F"), 3, 0)
                    RsltArr(8) = Application.VLookup(Val(RsltArr(6)), Sheets("India PinCode").Columns("A:F"), 4, 0)
                    'ActiveCell.Offset(0, 10) = GetWANO(ActiveCell, ActiveCell.Offset(0, 1)) 'if wa no need together
                    RsltArr(9) = GetWALink(myArr(N), RsltArr(0))
                    RsltArr(10) = Trim$(Replace$(GetString(re, S, "url"":(.*)BZDET"), Chr$(34), vbNullString)) & "BZDET"
                    RsltArr(12) = GetStoreNumber(storesTextToDecipher.Item(1), cipherDict)


                    RsltArr(13) = html.querySelectorAll(".whatsapptxt")(0).innerText
            End With

            Range("B" & N + 1 & ":O" & N + 1) = RsltArr

            Erase RsltArr

    Next N
            'Call UpdateStart
            Dim LastRow As Long
            Dim sht As Worksheet

            Set sht = Sheets("FDATA")
            LastRow = sht.Cells(sht.Rows.Count, "A").End(xlUp).Row

            Range("A2:O52").Copy
            sht.Range("A" & LastRow + 1).PasteSpecial Paste:=xlPasteValues, Operation:=xlNone, SkipBlanks _
                :=False, Transpose:=False
            Application.CutCopyMode = False
            Range("A2:O52").Delete Shift:=xlUp
            ActiveWorkbook.Save

            Erase myArr

    Next Rept
End Sub
...