при поиске определенного события. например, «дата оскар 2018», Google показывает виджет с датой события, прежде чем любые результаты поиска. Мне нужно получить эту дату в Excel, но она кажется сложной с точки зрения фактического кодирования. Я возился с этими функциями, но не получил никаких результатов. div
Меня интересует:
<div class="Z0LcW">5 March 2018, 1:00 am GMT</div>
Вот полный код, который я пытаюсь использовать:
Option Explicit
Public Sub Example()
Call GoogleSearchDescription("oscars 2018 date")
End Sub
Public Function GoogleSearchDescription(ByVal SearchTerm As String) As String
Dim Query As String: Query = "https://www.google.com/search?q=" & URLEncode(SearchTerm)
Dim HTML As String: HTML = GetHTML(Query)
Dim Description() As String: Description = RegExer(HTML, "(<div class=""Z0LcW"">[\w\s.<>/]+<\/div>)")
Description(0) = FilterHTML(Description(0))
Debug.Print Description(0)
Debug.Print "ok"
End Function
Public Function GetHTML(ByVal URL As String) As String
On Error Resume Next
Dim HTML As Object
With CreateObject("InternetExplorer.Application")
.navigate URL
Do Until .ReadyState = 4: DoEvents: Loop
Do While .Busy: DoEvents: Loop
Set HTML = .Document.Body
GetHTML = HTML.innerHTML
.Quit
End With
Set HTML = Nothing
End Function
Private Function URLEncode(ByVal UnformattedString As String) As String
'CAUTION: This function URLEncodes strings to match Google Maps API URL specifications, see note below for details
'Note: We convert spaces to + signs, and skip converting plus signs to anything because they replace spaces
'We also skip ampersands [&] as they should not be parsed out of a valid query
Dim Index As Long, ReservedChars As String: ReservedChars = "!#$'()*/:;=?@[]""-.<>\^_`{|}~"
'Convert all % symbols to encoding, as the unformatted string should not already contain URL Encoded characters
UnformattedString = Replace(UnformattedString, "%", "%" & Asc("%"))
'Convert spaces to plus signs to match Google URI query specifications
UnformattedString = Replace(UnformattedString, " ", "+")
'Iterate through the reserved characters for encoding
For Index = 1 To (Len(ReservedChars) - 1)
UnformattedString = Replace(UnformattedString, Mid(ReservedChars, Index, 1), "%" & Asc(Mid(ReservedChars, Index, 1)))
Next Index
'Return URL encoded string
URLEncode = UnformattedString
End Function
Private Function FilterHTML(ByVal RawHTML As String) As String
If Len(RawHTML) = 0 Then Exit Function
Dim HTMLEntities As Variant, HTMLReplacements As Variant, Counter As Long
Const REG_HTMLTAGS = "(<[\w\s""':.=-]*>|<\/[\w\s""':.=-]*>)" 'Used to remove HTML formating from each step in the queried directions
HTMLEntities = Array(" ", "<", ">", "&", """, "'")
HTMLReplacements = Array(" ", "<", ">", "&", """", "'")
'Parse HTML Entities into plaintext
For Counter = 0 To UBound(HTMLEntities)
RawHTML = Replace(RawHTML, HTMLEntities(Counter), HTMLReplacements(Counter))
Next Counter
'Remove any stray HTML tags
Dim TargetTags() As String: TargetTags = RegExer(RawHTML, REG_HTMLTAGS)
'Preemptively remove new line characters with actual new lines to separate any conjoined lines.
RawHTML = Replace(RawHTML, "<b>", " ")
For Counter = 0 To UBound(TargetTags)
RawHTML = Replace(RawHTML, TargetTags(Counter), "")
Next Counter
FilterHTML = RawHTML
End Function
Public Function RegExer(ByVal RawData As String, ByVal RegExPattern As String) As String()
'Outputs an array of strings for each matching expression
Dim RegEx As Object: Set RegEx = CreateObject("VBScript.RegExp")
Dim Matches As Object
Dim Match As Variant
Dim Output() As String
Dim OutputUBound As Integer
Dim Counter As Long
With RegEx
.Global = True
.MultiLine = True
.IgnoreCase = True
.Pattern = RegExPattern
End With
If RegEx.test(RawData) Then
Set Matches = RegEx.Execute(RawData)
For Each Match In Matches
OutputUBound = OutputUBound + 1
Next Match
ReDim Output(OutputUBound - 1) As String
For Each Match In Matches
Output(Counter) = Matches(Counter)
Counter = Counter + 1
Next Match
RegExer = Output
Else
ReDim Output(0) As String
RegExer = Output
End If
End Function