Захват веб-таблицы с помощью VBA без разрыва страницы - PullRequest
0 голосов
/ 12 февраля 2020

Я хотел бы получить полный набор данных в таблице в пределах https://mis.twse.com.tw/stock/sblInquiryCap.jsp?lang=en_us#

Я использовал коды из другого поста, но я мог получить только первые 10-е данные из-за разрыва страницы. В любом случае я могу изменить код, чтобы захватить полный набор данных, пожалуйста?

Option Explicit
Public Sub MakeSelectionGetData()
    Sheets("Sheet1").Cells.Clear
    Dim ie As New InternetExplorer
    Const url = "https://mis.twse.com.tw/stock/sblInquiryCap.jsp?lang=en_us#"
    Application.ScreenUpdating = False
    With ie
        .Visible = True
        .navigate url

        While .Busy Or .readyState < 4: DoEvents: Wend

        Application.Wait Now + TimeSerial(0, 0, 6)

        Dim nTable As HTMLTable
        Set nTable = .document.getElementById("sblCapTable")
        Dim Headers()
        Headers = Array("Number", "Stock Code", "Real Time Available Volume for SBL Short Sales", "Last Modify")
        Dim TR As Object, TD As Object, r As Long, c As Long

        With ActiveSheet
            r = 2
            c = 1
            Dim TR_col As Object, TD_col As Object
            Set TR_col = nTable.getElementsByTagName("TR")
            .Range("A1").Resize(1, UBound(Headers) + 1) = Headers
            For Each TR In TR_col
                Set TD_col = TR.getElementsByTagName("TD")
                For Each TD In TD_col
                    .Cells(r, c) = TD.innerText
                    c = c + 1
                Next
                c = 1
                r = r + 1
            Next
        End With
        .Quit
    End With
    Application.ScreenUpdating = True
End Sub

enter image description here

1 Ответ

0 голосов
/ 12 февраля 2020

Желательно, прежде всего, установить 100 просмотров на страницу. Затем go через нумерацию страниц, что немного сложно на странице. Я прокомментировал макрос:

Public Sub MakeSelectionGetData()

  Const url = "https://mis.twse.com.tw/stock/sblInquiryCap.jsp?lang=en_us#"
  Dim ie As Object
  Dim nodeDropdown As Object
  Dim nTable As Object
  Dim TR As Object
  Dim TD As Object
  Dim TR_col As Object
  Dim TD_col As Object
  Dim nodePagination As Object
  Dim nodesCssCurrentNext As Object
  Dim Headers() As Variant
  Dim r As Long
  Dim c As Long
  Dim endOfPagination As Boolean

  Sheets("Sheet1").Cells.Clear
  Headers = Array("Number", "Stock Code", "Real Time Available Volume for SBL Short Sales", "Last Modify")
  ActiveSheet.Range("A1").Resize(1, UBound(Headers) + 1) = Headers
  r = 2
  c = 1

  'Initialize Internet Explorer, set visibility,
  'call URL and wait until page is fully loaded
  Set ie = CreateObject("internetexplorer.application")
  ie.Visible = True
  ie.navigate url
  Do Until ie.readyState = 4: DoEvents: Loop
  Application.Wait Now + TimeSerial(0, 0, 6)

  'Change hits per page to 100
  'Get dropbox from html
  On Error Resume Next
  Set nodeDropdown = ie.document.getElementById("prePage")
  On Error GoTo 0
  'Select the entry with the value 100
  nodeDropdown.selectedIndex = 3
  'Trigger the change event to update the page
  Call TriggerEvent(ie.document, nodeDropdown, "change")
  'Short break to run the update
  Application.Wait Now + TimeSerial(0, 0, 2)

  'Loop through the pagination
  Do
    'Your code
    Set nTable = ie.document.getElementById("sblCapTable").getElementsByTagName("tbody")(0)
    Set TR_col = nTable.getElementsByTagName("TR")
    For Each TR In TR_col
      Set TD_col = TR.getElementsByTagName("TD")
      For Each TD In TD_col
        ActiveSheet.Cells(r, c) = TD.innerText
        c = c + 1
      Next TD
      c = 1
      r = r + 1
    Next TR

    'Click next button in pagination if it's a link
    On Error Resume Next
    Set nodePagination = ie.document.getElementById("Pagination")
    On Error GoTo 0

    'Check for no 'Next' button
    Set nodesCssCurrentNext = nodePagination.getElementsByClassName("current next")
    'While there is no element in the node collection we click the 'Next' button
    If nodesCssCurrentNext.Length = 0 Then
      'Click the 'Next' button
      nodePagination.getElementsByClassName("next")(0).Click
      'Short break to update the next 100 hits
      'All data is in memmory, so there is nothing to load from the server
      Application.Wait Now + TimeSerial(0, 0, 1)
    Else
      'If the node collection is not empty, we reached the end of pagination
      endOfPagination = True
    End If
  Loop Until endOfPagination

  ie.Quit
End Sub

Эта процедура для запуска события html для изменения выпадающего списка до 100 обращений на страницу:

Private Sub TriggerEvent(htmlDocument As Object, htmlElementWithEvent As Object, eventType As String)

  Dim theEvent As Object

  htmlElementWithEvent.Focus
  Set theEvent = htmlDocument.createEvent("HTMLEvents")
  theEvent.initEvent eventType, True, False
  htmlElementWithEvent.dispatchEvent theEvent
End Sub
Добро пожаловать на сайт PullRequest, где вы можете задавать вопросы и получать ответы от других членов сообщества.
...