Я использую объект MSXML2.XMLHTTP60
для подключения к веб-сайту, захвата HTML-кода и анализа его на предмет данных. По сути, это веб-сканер.
После 10-й записи я получаю ошибку 91. Это означает, что данные отсутствуют в объекте. Я завершаю программу и затем, после перезапуска, получаю эту ошибку в другом месте, которое ранее работало. Я не могу пройти через это. Позже я узнал, что эту вторую ошибку 91 можно «решить», перезапустив Excel.
Теперь, для первой ошибки 91, я обнаружил, что когда я прохожу программу, все работает. Затем я добавил задержку после стольких циклов. Задержка составляет около 3 секунд около места первой ошибки 91. Кажется, это решает проблему.
Я не совсем понимаю, почему эта задержка была столь критичной? Я хотел бы убрать эту задержку и заставить программу работать на максимальной скорости. Если только это не является абсолютно необходимым из-за какого-то сбоя. Я не думаю, что мне нужна задержка, чтобы заставить функцию работать должным образом.
Вот полный код; места, где происходит ошибка, помечены.
Option Explicit
Public baseLink As String
Public baseRow As String
Public basePageNav As String
Public rowNumber As Integer
Public someCounter As Integer
Public masterBrowser As New MSXML2.XMLHTTP60
Sub Initialization()
baseLink = "https://www.fbo.gov/index.php"
baseRow = "row_"
basePageNav = "?s=opportunity&mode=list&tab=list&pageID="
rowNumber = 2
someCounter = 1
End Sub
Sub extractData(URLLink As String)
' Dim httpAddress As New MSXML2.XMLHTTP60
Dim cResponse As String
' Set HTMLdoc = CreateObject("htmlfile")
Dim HTMLdoc As New HTMLDocument
Dim testString As String
Dim closePos As Integer
Dim titleString As String
Dim solicitationNumber As String
Dim agencyName As String
Dim officeName As String
Dim locationName As String
Dim postedDate As String
Dim responseDate As String
Dim noticeString As String
Dim classificationCode As String
Dim NAICScode As String
Dim temp As String
With masterBrowser
.Open "GET", URLLink, False
.send
End With
With masterBrowser
While Not .readyState = 4
Application.Wait Now + TimeValue("0:00:01")
Wend
While Not .Status = 200
Application.Wait Now + TimeValue("0:00:01")
Wend
cResponse = StrConv(.responseBody, vbUnicode)
End With
' For some reason, there is a bug in the library. If the applciation runs
' too fast, then the program will eventually recieve a 91 error.
' This if statement will cause the program to delay by 3 secs every 9th
' entry in order to prevent the 91 error from appearing
' If someCounter = 9 Then
' Application.Wait (Now + TimeValue("0:00:03"))
' someCounter = 1
' Else
' someCounter = someCounter + 1
'End If
' cResponse = StrConv(httpAddress.responseBody, vbUnicode)
cResponse = Mid$(cResponse, InStr(1, cResponse, "<!DOCTYPE "))
With HTMLdoc
.body.innerHTML = cResponse
If .getElementsByClassName("agency-header")(0) Is Nothing Then 'If this is empty, then this will cause the first 91 error
MsgBox ("It is empty")
' So, lets try another reconnect?
Dim anotherAttemptB As String
With masterBrowser
.Open "GET", URLLink, False
.send
anotherAttemptB = StrConv(.responseBody, vbUnicode)
End With
anotherAttemptB = Mid$(anotherAttemptB, InStr(1, anotherAttemptB, "<!DOCTYPE "))
.body.innerHTML = anotherAttemptB
closePos = InStr(.getElementsByClassName("agency-header")(0).innerHTML, "/") ' 91 error caused here
Else
closePos = InStr(.getElementsByClassName("agency-header")(0).innerHTML, "/")
End If
titleString = Mid(.getElementsByClassName("agency-header")(0).innerHTML, 1, closePos)
titleString = Replace(titleString, "<H2>", "")
titleString = Replace(titleString, "</", "")
testString = .getElementsByClassName("sol-num")(0).innerHTML
solicitationNumber = Replace(.getElementsByClassName("sol-num")(0).innerHTML, "Solicitation Number: ", "")
testString = .getElementsByClassName("agency-name")(0).innerHTML
agencyName = Split(.getElementsByClassName("agency-name")(0).innerHTML, "<BR>")(0)
agencyName = Replace(agencyName, "Agency: ", "")
officeName = Split(.getElementsByClassName("agency-name")(0).innerHTML, "<BR>")(1)
officeName = Replace(officeName, "Office: ", "")
locationName = Split(.getElementsByClassName("agency-name")(0).innerHTML, "<BR>")(2)
locationName = Replace(locationName, "Location: ", "")
postedDate = .getElementById("dnf_class_values_procurement_notice__posted_date__widget").innerHTML
responseDate = .getElementById("dnf_class_values_procurement_notice__response_deadline__widget").innerHTML
responseDate = Replace(responseDate, " ", " ")
noticeString = .getElementById("dnf_class_values_procurement_notice__procurement_type__widget").innerHTML
classificationCode = .getElementById("dnf_class_values_procurement_notice__classification_code__widget").innerHTML
NAICScode = .getElementById("dnf_class_values_procurement_notice__naics_code__widget").innerHTML
closePos = InStr(NAICScode, " ")
NAICScode = Mid(NAICScode, 1, closePos)
End With
Worksheets("Sheet1").Range("A" & rowNumber).Value = titleString
Worksheets("Sheet1").Range("B" & rowNumber).Value = solicitationNumber
Worksheets("Sheet1").Range("C" & rowNumber).Value = agencyName
Worksheets("Sheet1").Range("D" & rowNumber).Value = officeName
Worksheets("Sheet1").Range("E" & rowNumber).Value = locationName
Worksheets("Sheet1").Range("F" & rowNumber).Value = postedDate
Worksheets("Sheet1").Range("G" & rowNumber).Value = responseDate
Worksheets("Sheet1").Range("H" & rowNumber).Value = noticeString
Worksheets("Sheet1").Range("I" & rowNumber).Value = classificationCode
Worksheets("Sheet1").Range("J" & rowNumber).Value = NAICScode
Worksheets("Sheet1").Range("K" & rowNumber).Value = URLLink
rowNumber = rowNumber + 1
Set HTMLdoc = Nothing
End Sub
Sub test()
Initialization
Dim linkArray(19) As String
Dim xmlhttp As New MSXML2.XMLHTTP60
Dim myUrl As String
Dim i As Integer
Dim sResponse As String
'Set HTML = CreateObject("htmlfile")
Dim HTML As New HTMLDocument
Dim testStrin As String
Worksheets("Sheet1").Range("A1").Value = "Title:"
Worksheets("Sheet1").Range("B1").Value = "Solicitation #:"
Worksheets("Sheet1").Range("C1").Value = "Agency:"
Worksheets("Sheet1").Range("D1").Value = "Office:"
Worksheets("Sheet1").Range("E1").Value = "Location:"
Worksheets("Sheet1").Range("F1").Value = "Posted On:"
Worksheets("Sheet1").Range("G1").Value = "Response Date:"
Worksheets("Sheet1").Range("H1").Value = "Notice Type:"
Worksheets("Sheet1").Range("I1").Value = "Classification Code:"
Worksheets("Sheet1").Range("J1").Value = "NAICS Code:"
Worksheets("Sheet1").Range("K1").Value = "Link:"
myUrl = baseLink & basePageNav & "1"
With masterBrowser
.Open "GET", myUrl, False
.send
sResponse = StrConv(.responseBody, vbUnicode)
End With
' sResponse = StrConv(xmlhttp.responseBody, vbUnicode)
sResponse = Mid$(sResponse, InStr(1, sResponse, "<!DOCTYPE "))
With HTML
.body.innerHTML = sResponse
For i = 0 To 19
Dim linkString As String
Dim openPos As Integer
Dim closePos As Integer
If HTML.getElementById(baseRow & CStr(i)) Is Nothing Then ' To make matters even more confusing, after the first 91 error, here is the second until you restart excel
MsgBox ("It is also empty")
Dim anotherAttempt As String
With masterBrowser
.Open "GET", myUrl, False
.send
anotherAttempt = StrConv(.responseBody, vbUnicode)
End With
anotherAttempt = Mid$(anotherAttempt, InStr(1, anotherAttempt, "<!DOCTYPE "))
.body.innerHTML = anotherAttempt
linkString = .getElementById(baseRow & CStr(i)).Children(0).innerHTML
Else
linkString = .getElementById(baseRow & CStr(i)).Children(0).innerHTML
End If
openPos = InStr(linkString, "?")
closePos = InStr(linkString, "w") + 3
linkString = Mid(linkString, openPos, closePos - openPos)
linkString = Replace(linkString, "amp;", "")
linkString = baseLink & linkString
linkArray(i) = linkString
Next i
End With
For i = 0 To 19
extractData (linkArray(i))
Next i
Set HTML = Nothing
MsgBox ("Finished")
End Sub
В соответствии с запросом QHarr, текст ответа от первого появления ошибки 91 выглядит следующим образом:
"<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
<html lang="en">
<head>
<meta http-equiv="X-UA-Compatible" content="IE=8" />
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1
И текст ответа, когда возникает вторая ошибка 91:
"<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
<html lang="en">
<head>
<meta http-equiv="X-UA-Compatible" content="IE=8" />
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1
И текст ответа до ошибки 91:
"<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
<html lang="en">
<head>
<meta http-equiv="X-UA-Compatible" content="IE=8" />
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1