VBA - извлекать данные из html, чтобы преуспеть - PullRequest
0 голосов
/ 22 февраля 2020

У меня проблема с кодированием. Я использовал Excel VBA для извлечения данных с веб-страницы, чтобы преуспеть. Веб-страница https://proptx.midland.com.hk/utx/index.jsp?est_id=E12837&lang=en Нажмите «Все транзакции», и она отобразит вторую таблицу внизу первой таблицы. Я хотел бы извлечь данные из нижней таблицы (не из верхней).

Вот код:

Sub PropertyTransactions()
Dim ieObj As InternetExplorer
Dim htmlEle As IHTMLElement
Dim i As Integer

i = 1

Set ieObj = New InternetExplorer
ieObj.Visible = True
ieObj.navigate "https://proptx.midland.com.hk/utx/index.jsp?est_id=E12837&lang=en"

Application.Wait Now + TimeValue("00:00:05")

For Each htmlEle In ieObj.document.getElementsByClassName("tablesorter")(0).getElementsByTagName("tr")
Set htmlEle = ActiveDocument.all.tags("head").Item(0)
    With ActiveSheet
        .Range("A" & i).Value = htmlEle.Children(0).textContent
        .Range("B" & i).Value = htmlEle.Children(1).textContent
        .Range("C" & i).Value = htmlEle.Children(2).textContent
        .Range("D" & i).Value = htmlEle.Children(3).textContent
        .Range("E" & i).Value = htmlEle.Children(4).textContent
        .Range("F" & i).Value = htmlEle.Children(5).textContent
        .Range("G" & i).Value = htmlEle.Children(6).textContent
        .Range("H" & i).Value = htmlEle.Children(7).textContent
        .Range("I" & i).Value = htmlEle.Children(8).textContent
        .Range("J" & i).Value = htmlEle.Children(9).textContent
    End With

    i = i + 1
Next htmlEle

End Sub

Однако в этой строке есть ошибка:
For Each htmlEle In ieObj.document.getElementsByClassName("tablesorter")(0).getElementsByTagName("tr")

Отображает ошибку времени выполнения 91: переменная объекта или переменная блока не установлены. Как я могу это исправить? Большое спасибо!

1 Ответ

2 голосов
/ 22 февраля 2020

Я использую позднюю привязку, но это не имеет значения. Пожалуйста, прочитайте комментарии в макросе:

Sub PropertyTransactions()

  Dim url As String
  Dim browser As Object
  Dim nodeTransactionTab As Object
  Dim nodeTransactionTable As Object
  Dim nodesHeaderAll As Object
  Dim nodeHeaderOne As Object
  Dim nodesTrAll As Object
  Dim nodeTrOne As Object
  Dim nodesTdAll As Object
  Dim nodeTdOne As Object
  Dim currentRow As Long
  Dim currentColumn As Long

  'Initialize variables
  currentRow = 1
  currentColumn = 1
  url = "https://proptx.midland.com.hk/utx/index.jsp?est_id=E12837&lang=en"

  'Initialize Internet Explorer, set visibility,
  'call URL and wait until page is fully loaded
  Set browser = CreateObject("internetexplorer.application")
  browser.Visible = True 'You can set this to False to make the IE invisible
  browser.navigate url
  Do Until browser.ReadyState = 4: DoEvents: Loop

  'First we must click the 'All Transaction' tab to load the table you want
  'The tab works like a button and has the id 'tx_record_3'
  'The nature of an id is that it is unique
  'With the following lines you can try to get elements by them ids
  'The method getElementByID() doesn't build a node collection
  'We switch off all runtime errors when we use it because we get
  'a runtime error if there is nor element with that id
  On Error Resume Next
  Set nodeTransactionTab = browser.document.getElementByID("tx_record_3")
  On Error GoTo 0

  'Now we check if we have an html element 'nodeTransactionTab'
  If Not nodeTransactionTab Is Nothing Then
    'Click the tab and give a short break to load the transaction table
    nodeTransactionTab.Click
    Application.Wait Now + TimeValue("00:00:02")

    'Now we can get the transaction table
    'You try that with the css class 'tablesorter' but the table has also an id
    '(The CSS class 'tablesorter' can also only be found after the transaction
    'table has been loaded)
    On Error Resume Next
    Set nodeTransactionTable = browser.document.getElementByID("Tx_hist_table")
    On Error GoTo 0
  Else
    'The object nodeTransactionTab couldn't be build
    MsgBox "No transaction tab found"
  End If

  'To avoid deep if nesting, we can check at this point whether the transaction
  'table was found, because an object is nothing by default
  If Not nodeTransactionTable Is Nothing Then
    'We trust the header is present
    Set nodesHeaderAll = nodeTransactionTable.getElementsByTagName("th")

    'Get header of transaction table
    For Each nodeHeaderOne In nodesHeaderAll
      ActiveSheet.Cells(currentRow, currentColumn).Value = Trim(nodeHeaderOne.innertext)
      currentColumn = currentColumn + 1
    Next nodeHeaderOne

    'Prepare next line
    currentRow = currentRow + 1
    currentColumn = 1

    'We trust the values are present too
    'Get all value table rows
    Set nodesTrAll = nodeTransactionTable.getElementsByTagName("tbody")(0).getElementsByTagName("tr")

    'Get one row of values after the other from transaction table
    For Each nodeTrOne In nodesTrAll
      'Get all cells of next row
      Set nodesTdAll = nodeTrOne.getElementsByTagName("td")

      'Get values cell by cell
      For Each nodeTdOne In nodesTdAll
        ActiveSheet.Cells(currentRow, currentColumn).Value = Trim(nodeTdOne.innertext)
        currentColumn = currentColumn + 1
      Next nodeTdOne
    'Prepare next line
    currentRow = currentRow + 1
    currentColumn = 1
    Next nodeTrOne
  Else
    'The object nodeTransactionTable couldn't be build
    MsgBox "No transaction table found"
  End If

  'Clean up
  browser.Quit
  Set browser = Nothing
  Set nodeTransactionTab = Nothing
  Set nodesHeaderAll = Nothing
  Set nodeHeaderOne = Nothing
  Set nodesTrAll = Nothing
  Set nodeTrOne = Nothing
  Set nodesTdAll = Nothing
  Set nodeTdOne = Nothing
End Sub
Добро пожаловать на сайт PullRequest, где вы можете задавать вопросы и получать ответы от других членов сообщества.
...