Excel VBA Web Scrape.Общая линия не в правильном месте - PullRequest
0 голосов
/ 19 декабря 2018

[Редактировать: правильно вставленный HTML-код] [редактировать 2, исправлена ​​проблема с пробелами]

проблема с пробелами была исправлена.Я скопировал эту строку, найденную выше в коде.

Единственное, что я хотел бы сделать сейчас, - это чтобы цифры в строке для итогов были выделены жирным шрифтом, но не уверен, что это можно выделить в коде.Если это не может, я в порядке, оставив это в покое.Я просто хотел бы, чтобы это выделялось среди информации об отдельных сотрудниках.

If TD.getAttribute("colspan") Then
                Column = Column + TD.getAttribute("colspan")

и помещал это в этот раздел ниже в коде

            Set TDs = TR.getElementsByTagName("td")
            For Each TD In TDs
                Worksheet.Cells(Row, Column).Value = TD.innerText
                If TD.getAttribute("colspan") Then
                    Column = Column + TD.getAttribute("colspan")

                Else

                Column = Column + 1
                End If

Обновленный код теперь

Sub DownloadPPAProcessData0700()
Dim Browser As InternetExplorer
Dim Document As HTMLDocument
Dim Table As IHTMLElement
Dim Tables As IHTMLElementCollection
Dim Div As IHTMLElement
Dim Divs As IHTMLElementCollection
Dim H3 As IHTMLElement
Dim TR As IHTMLElement
Dim TRs  As IHTMLElementCollection
Dim TD As IHTMLElement
Dim TDs  As IHTMLElementCollection
Dim Row As Integer
Dim Column As Integer

Dim Worksheet As Worksheet

Dim NodeType As String
Dim Warehouse As String
Dim ProcessId As String
Dim PrimaryAttribute As String
Dim SecondaryAttribute As String

Set Worksheet = ThisWorkbook.Worksheets("0700")

Application.ScreenUpdating = True
Dim SD As Date
Dim ED As Date
Dim STS As Integer
Dim ETS As Integer
Dim STE As Integer
Dim ETE As Integer
SD = Worksheets("Variables").Range("A2").Value
ED = Worksheets("Variables").Range("A2").Value
STS = Worksheets("Variables").Range("B2").Value
ETS = Worksheets("Variables").Range("C2").Value
STE = Worksheets("Variables").Range("D2").Value
ETE = Worksheets("Variables").Range("D2").Value
Application.ScreenUpdating = True

NodeType = Worksheet.Cells(2, 1).Value
Warehouse = Worksheet.Cells(2, 2).Value
ProcessId = Worksheet.Cells(2, 3).Value
PrimaryAttribute = Worksheet.Cells(2, 4).Value
SecondaryAttribute = Worksheet.Cells(2, 5).Value

Row = 1
Column = 1

Sheets("0700").Activate
Sheets("0700").Select
Application.ScreenUpdating = True
With ActiveSheet
Set Browser = New InternetExplorerMedium
Browser.Navigate "https://fclm-portal.amazon.com/ppa/inspect/process?&processId=100114&warehouseId=BFI4&primaryAttribute=PICKING_PROCESS_PATH&secondaryAttribute=GL_CODE&maxIntradayDays=1&spanType=Intraday&startDateIntraday=" & Year(SD) & "%2F" & Month(SD) & "%2F" & Day(SD) & "&startHourIntraday=" & (STS) & "&startMinuteIntraday=" & (STE) & "&endDateIntraday=" & Year(ED) & "%2F" & Month(ED) & "%2F" & Day(ED) & "&endHourIntraday=" & (ETS) & "&endMinuteIntraday=" & (ETE)

'Wait for page to load
Do While Browser.Busy Or Browser.ReadyState <> READYSTATE_COMPLETE
    DoEvents
Loop

'Scan the document
Set Document = Browser.Document
Set Divs = Document.getElementById("secondaryProductivityList").getElementsByTagName("div")
    For Each Div In Divs
     Set H3 = Div.getElementsByTagName("h3")(0)

    If Not Div.className = "floatHeader" And Not H3 Is Nothing Then
        Worksheet.Cells(Row, 1).Value = H3.innerText
        Worksheet.Cells(Row, 1).Font.Bold = True
        Row = Row + 1

        Set Tables = Div.getElementsByTagName("table")
        Set Table = Tables(0)
        Set TRs = Table.getElementsByTagName("tr")
        For Each TR In TRs
            Column = 1
            Set TDs = TR.getElementsByTagName("th")
            For Each TD In TDs
                Worksheet.Cells(Row, Column).Value = TD.innerText
                Worksheet.Cells(Row, Column).Font.Bold = True
                If TD.getAttribute("colspan") Then
                    Column = Column + TD.getAttribute("colspan")
                Else
                    Column = Column + 1
                End If
                Next

            Set TDs = TR.getElementsByTagName("td")
            For Each TD In TDs
                Worksheet.Cells(Row, Column).Value = TD.innerText
                If TD.getAttribute("colspan") Then
                    Column = Column + TD.getAttribute("colspan")

                Else

                Column = Column + 1
                End If


                Next
        Row = Row + 1
        Next
    End If

 Row = Row + 1
 Next
 Browser.Quit
 Application.ScreenUpdating = True
 End With
 Range("A1:Z50").Columns.AutoFit
 End Sub

Я пытаюсь работать с файлом веб-очистки, веб-страница меняется в зависимости от времени суток, за которое вы хотите получить информацию.но пока я просто опубликую, как настроен код на 1 час.

Это копирует правильные таблицы с веб-страницы, но проблема, с которой я сталкиваюсь, заключается в том, что в конце каждой таблицы должна быть общая строка, которая должна совпадать по расстоянию с остальными числами.слово «итого» на веб-странице занимает 3 столбца с пробелами, а затем после них последуют остальные цифры, которые совпадают со столбцом «единиц».

когда макрос извлекает данные, он помещаетобщая строка в самой левой ячейке и номер данных сразу после нее.вызывая отключение всей строки на две ячейки.

Включая 2 ссылки на изображения того, как выглядит веб-страница и из чего выделяется файл excel

https://imgur.com/a/uVwUC8j

https://imgur.com/a/Jjjnyd1

вот код

Sub DownloadPPAProcessData0700()
Dim Browser As InternetExplorer
Dim Document As HTMLDocument
Dim Table As IHTMLElement
Dim Tables As IHTMLElementCollection
Dim Div As IHTMLElement
Dim Divs As IHTMLElementCollection
Dim H3 As IHTMLElement
Dim TR As IHTMLElement
Dim TRs  As IHTMLElementCollection
Dim TD As IHTMLElement
Dim TDs  As IHTMLElementCollection
Dim Row As Integer
Dim Column As Integer

Dim Worksheet As Worksheet



Set Worksheet = ThisWorkbook.Worksheets("0700")

Application.ScreenUpdating = False
Dim SD As Date
Dim ED As Date
Dim STS As Integer
Dim ETS As Integer
Dim STE As Integer
Dim ETE As Integer
SD = Worksheets("Variables").Range("A2").Value
ED = Worksheets("Variables").Range("A2").Value
STS = Worksheets("Variables").Range("B2").Value
ETS = Worksheets("Variables").Range("C2").Value
STE = Worksheets("Variables").Range("D2").Value
ETE = Worksheets("Variables").Range("D2").Value
Application.ScreenUpdating = False


Row = 1
Column = 1

Sheets("0700").Activate
Sheets("0700").Select
With ActiveSheet
Set Browser = New InternetExplorerMedium
Browser.Navigate "https://fclm.com/ppa/inspect/process?&processId=100114&warehouseId=...&primaryAttribute=PICKING_PROCESS_PATH&secondaryAttribute=GL_CODE&maxIntradayDays=1&spanType=Intraday&startDateIntraday=" & Year(SD) & "%2F" & Month(SD) & "%2F" & Day(SD) & "&startHourIntraday=" & (STS) & "&startMinuteIntraday=" & (STE) & "&endDateIntraday=" & Year(ED) & "%2F" & Month(ED) & "%2F" & Day(ED) & "&endHourIntraday=" & (ETS) & "&endMinuteIntraday=" & (ETE)

'Wait for page to load
Do While Browser.Busy Or Browser.ReadyState <> READYSTATE_COMPLETE
    DoEvents
Loop

'Scan the document
Set Document = Browser.Document
Set Divs = Document.getElementById("secondaryProductivityList").getElementsByTagName("div")
For Each Div In Divs
    Set H3 = Div.getElementsByTagName("h3")(0)

    If Not Div.className = "floatHeader" And Not H3 Is Nothing Then
        Worksheet.Cells(Row, 1).Value = H3.innerText
        Row = Row + 1

        Set Tables = Div.getElementsByTagName("table")
        Set Table = Tables(0)
        Set TRs = Table.getElementsByTagName("tr")
        For Each TR In TRs
            Column = 1
            Set TDs = TR.getElementsByTagName("th")
            For Each TD In TDs
                Worksheet.Cells(Row, Column).Value = TD.innerText
                Worksheet.Cells(Row, Column).Font.Bold = True
                If TD.getAttribute("colspan") Then
                    Column = Column + TD.getAttribute("colspan")
                Else
                    Column = Column + 1
                End If
                Next

            Set TDs = TR.getElementsByTagName("td")
            For Each TD In TDs
                Worksheet.Cells(Row, Column).Value = TD.innerText
                Column = Column + 1
                Next

        Row = Row + 1
        Next
    End If
Row = Row + 1
Next

Browser.Quit
End With
Range("A1:Z50").Columns.AutoFit
End Sub

Это код проверки для всех нижних таблиц, из которых я пытаюсь скопировать.

<div id="secondaryProductivityList">
  <div>
    <h3>PICKING_PROCESS_PATH: PPFRACSDESTROY</h3>
    <table class="table table-striped table-bordered table-hover table-condensed">
      <thead>
        <tr>
          <th colspan="3" class="header"></th>
          <th colspan="7" class="header">UNKNOWN</th>
        </tr>
        <tr>
          <th class="header">Employee&nbsp;Id</th>
          <th class="header">Employee&nbsp;Name</th>
          <th class="header">Manager&nbsp;Name</th>
          <th class="{sorter: 'thousands'} header">Units</th>
          <th class="{sorter: 'thousands'} header">Quantity</th>
          <th class="{sorter: 'thousands'} header">Hours (Direct)</th>
          <th class="{sorter: 'thousands'} header">Hours (Inferred)</th>
          <th class="{sorter: 'thousands'} header">Hours (Total)</th>
          <th class="{sorter: 'thousands'} header">UPH</th>
          <th class="{sorter: 'thousands'} header">QPH</th>
        </tr>
      </thead>
      <tbody>
        <tr>
          <td class="employeeInspect" id="100244269">100244269</td>
          <td class="employeeInspect" id="100244269">Llarenas, Brandon</td>
          <td class="employeeInspect" id="101151204">Jackson, Michelle M</td>
          <td>503</td>
          <td>503</td>
          <td>0.92</td>
          <td>0</td>
          <td>0.92</td>
          <td>545.75</td>
          <td>545.75</td>
        </tr>
        <tr>
          <td class="employeeInspect" id="101157611">101157611</td>
          <td class="employeeInspect" id="101157611">Swearingen, Caitlin</td>
          <td class="employeeInspect" id="101151204">Jackson, Michelle M</td>
          <td>539</td>
          <td>539</td>
          <td>0.99</td>
          <td>0</td>
          <td>0.99</td>
          <td>543.38</td>
          <td>543.38</td>
        </tr>
      </tbody>
      <tfoot>
        <tr>
          <td colspan="3" class="totalLineItem">Total:</td>
          <td>1,042</td>
          <td>1,042</td>
          <td>1.91</td>
          <td>0</td>
          <td>1.91</td>
          <td>544.52</td>
          <td>544.52</td>
        </tr>
      </tfoot>
    </table>
    <div class="floatHeader" style="display:none">
      <table class="table table-striped table-bordered table-hover table-condensed" style="width: 1170px;">
        <thead>
          <tr>
            <th colspan="3" class="header" style="width: 459.233px;"></th>
            <th colspan="7" class="header" style="width: 688.233px;">UNKNOWN</th>
          </tr>
          <tr>
            <th class="header" style="width: 110.233px;">Employee&nbsp;Id</th>
            <th class="header" style="width: 160.233px;">Employee&nbsp;Name</th>
            <th class="header" style="width: 167.233px;">Manager&nbsp;Name</th>
            <th class="{sorter: 'thousands'} header" style="width: 49.2333px;">Units</th>
            <th class="{sorter: 'thousands'} header" style="width: 77.2333px;">Quantity</th>
            <th class="{sorter: 'thousands'} header" style="width: 124.233px;">Hours (Direct)</th>
            <th class="{sorter: 'thousands'} header" style="width: 141.233px;">Hours (Inferred)</th>
            <th class="{sorter: 'thousands'} header" style="width: 115.233px;">Hours (Total)</th>
            <th class="{sorter: 'thousands'} header" style="width: 59.2333px;">UPH</th>
            <th class="{sorter: 'thousands'} header" style="width: 59.2333px;">QPH</th>
          </tr>
        </thead>
      </table>
    </div>
  </div>
  <div>
    <h3>PICKING_PROCESS_PATH: PPFRACSDESTROYHZMT</h3>
    <table class="table table-striped table-bordered table-hover table-condensed">
      <thead>
        <tr>
          <th colspan="3" class="header"></th>
          <th colspan="7" class="header">UNKNOWN</th>
        </tr>
        <tr>
          <th class="header">Employee&nbsp;Id</th>
          <th class="header">Employee&nbsp;Name</th>
          <th class="header">Manager&nbsp;Name</th>
          <th class="{sorter: 'thousands'} header">Units</th>
          <th class="{sorter: 'thousands'} header">Quantity</th>
          <th class="{sorter: 'thousands'} header">Hours (Direct)</th>
          <th class="{sorter: 'thousands'} header">Hours (Inferred)</th>
          <th class="{sorter: 'thousands'} header">Hours (Total)</th>
          <th class="{sorter: 'thousands'} header">UPH</th>
          <th class="{sorter: 'thousands'} header">QPH</th>
        </tr>
      </thead>
      <tbody>
        <tr>
          <td class="employeeInspect" id="100244269">100244269</td>
          <td class="employeeInspect" id="100244269">Llarenas, Brandon</td>
          <td class="employeeInspect" id="101151204">Jackson, Michelle M</td>
          <td>22</td>
          <td>22</td>
          <td>0.08</td>
          <td>0</td>
          <td>0.08</td>
          <td>280.85</td>
          <td>280.85</td>
        </tr>
        <tr>
          <td class="employeeInspect" id="101157611">101157611</td>
          <td class="employeeInspect" id="101157611">Swearingen, Caitlin</td>
          <td class="employeeInspect" id="101151204">Jackson, Michelle M</td>
          <td>5</td>
          <td>5</td>
          <td>0.01</td>
          <td>0</td>
          <td>0.01</td>
          <td>620.69</td>
          <td>620.69</td>
        </tr>
      </tbody>
      <tfoot>
        <tr>
          <td colspan="3" class="totalLineItem">Total:</td>
          <td>27</td>
          <td>27</td>
          <td>0.09</td>
          <td>0</td>
          <td>0.09</td>
          <td>312.54</td>
          <td>312.54</td>
        </tr>
      </tfoot>
    </table>
    <div class="floatHeader" style="display:none">
      <table class="table table-striped table-bordered table-hover table-condensed" style="width: 1170px;">
        <thead>
          <tr>
            <th colspan="3" class="header" style="width: 459.233px;"></th>
            <th colspan="7" class="header" style="width: 688.233px;">UNKNOWN</th>
          </tr>
          <tr>
            <th class="header" style="width: 110.233px;">Employee&nbsp;Id</th>
            <th class="header" style="width: 160.233px;">Employee&nbsp;Name</th>
            <th class="header" style="width: 167.233px;">Manager&nbsp;Name</th>
            <th class="{sorter: 'thousands'} header" style="width: 49.2333px;">Units</th>
            <th class="{sorter: 'thousands'} header" style="width: 77.2333px;">Quantity</th>
            <th class="{sorter: 'thousands'} header" style="width: 124.233px;">Hours (Direct)</th>
            <th class="{sorter: 'thousands'} header" style="width: 141.233px;">Hours (Inferred)</th>
            <th class="{sorter: 'thousands'} header" style="width: 115.233px;">Hours (Total)</th>
            <th class="{sorter: 'thousands'} header" style="width: 59.2333px;">UPH</th>
            <th class="{sorter: 'thousands'} header" style="width: 59.2333px;">QPH</th>
          </tr>
        </thead>
      </table>
    </div>
  </div>
  <div>
    <h3>PICKING_PROCESS_PATH: PPFRACSLTL</h3>
    <table class="table table-striped table-bordered table-hover table-condensed">
      <thead>
        <tr>
          <th colspan="3" class="header"></th>
          <th colspan="7" class="header">UNKNOWN</th>
        </tr>
        <tr>
          <th class="header">Employee&nbsp;Id</th>
          <th class="header">Employee&nbsp;Name</th>
          <th class="header">Manager&nbsp;Name</th>
          <th class="{sorter: 'thousands'} header">Units</th>
          <th class="{sorter: 'thousands'} header">Quantity</th>
          <th class="{sorter: 'thousands'} header">Hours (Direct)</th>
          <th class="{sorter: 'thousands'} header">Hours (Inferred)</th>
          <th class="{sorter: 'thousands'} header">Hours (Total)</th>
          <th class="{sorter: 'thousands'} header">UPH</th>
          <th class="{sorter: 'thousands'} header">QPH</th>
        </tr>
      </thead>
      <tbody>
        <tr>
          <td class="employeeInspect" id="101158899">101158899</td>
          <td class="employeeInspect" id="101158899">Cimafranca, Lorna</td>
          <td class="employeeInspect" id="101151204">Jackson, Michelle M</td>
          <td>1</td>
          <td>57</td>
          <td>0.80</td>
          <td>0.20</td>
          <td>1</td>
          <td>1</td>
          <td>57</td>
        </tr>
      </tbody>
      <tfoot>
        <tr>
          <td colspan="3" class="totalLineItem">Total:</td>
          <td>1</td>
          <td>57</td>
          <td>0.80</td>
          <td>0.20</td>
          <td>1</td>
          <td>1</td>
          <td>57</td>
        </tr>
      </tfoot>
    </table>
    <div class="floatHeader" style="display:none">
      <table class="table table-striped table-bordered table-hover table-condensed" style="width: 1170px;">
        <thead>
          <tr>
            <th colspan="3" class="header" style="width: 468.233px;"></th>
            <th colspan="7" class="header" style="width: 679.233px;">UNKNOWN</th>
          </tr>
          <tr>
            <th class="header" style="width: 115.233px;">Employee&nbsp;Id</th>
            <th class="header" style="width: 159.233px;">Employee&nbsp;Name</th>
            <th class="header" style="width: 173.233px;">Manager&nbsp;Name</th>
            <th class="{sorter: 'thousands'} header" style="width: 51.2333px;">Units</th>
            <th class="{sorter: 'thousands'} header" style="width: 80.2333px;">Quantity</th>
            <th class="{sorter: 'thousands'} header" style="width: 129.233px;">Hours (Direct)</th>
            <th class="{sorter: 'thousands'} header" style="width: 146.233px;">Hours (Inferred)</th>
            <th class="{sorter: 'thousands'} header" style="width: 119.233px;">Hours (Total)</th>
            <th class="{sorter: 'thousands'} header" style="width: 43.2333px;">UPH</th>
            <th class="{sorter: 'thousands'} header" style="width: 44.2333px;">QPH</th>
          </tr>
        </thead>
      </table>
    </div>
  </div>
</div>

1 Ответ

0 голосов
/ 19 декабря 2018

Я бы использовал буфер обмена для копирования через форматирование.Я читаю ваш html из файла (отредактировал эту часть сейчас), тогда как вы извлекли бы данные из ie.document - схема показана ниже.

Option Explicit
Public Sub test()
    Dim html As HTMLDocument, tables As Object, i As Long, clipboard As Object
    Dim ws As Worksheet, tablesDescriptions As Object
    Dim ie As InternetExplorer, url As String
    url = "yourURL"
    Set ie = New InternetExplorer 'InternetExplorerMedium
    Set ws = ThisWorkbook.Worksheets("Sheet1")

    ie.Visible = True
    ie.Navigate2 url
    While ie.Busy Or ie.readyState < 4: DoEvents: Wend
    Set html = ie.document
    Set tables = html.querySelectorAll("#secondaryProductivityList table")
    Set clipboard = GetObject("New:{1C3B4210-F441-11CE-B9EA-00AA006B1A69}")
    Set tablesDescriptions = html.querySelectorAll("#secondaryProductivityList h3")
    ws.Cells.UnMerge

    For i = 0 To tables.Length - 1 Step 2
        clipboard.SetText tables.item(i).outerHTML
        clipboard.PutInClipboard
        ws.Cells(LastRow(ws) + 2, 1) = tablesDescriptions.item(IIf(i = 0, 0, i / 2)).innerText
        ws.Cells(LastRow(ws) + 1, 1).PasteSpecial
    Next
    ie.Quit
End Sub

Public Function LastRow(ByVal sh As Worksheet) As Long
    On Error Resume Next
    LastRow = sh.Cells.Find(What:="*", _
                            After:=sh.Range("A1"), _
                            Lookat:=xlPart, _
                            LookIn:=xlFormulas, _
                            SearchOrder:=xlByRows, _
                            SearchDirection:=xlPrevious, _
                            MatchCase:=False).Row
    On Error GoTo 0
End Function

Вывод:

image

Добро пожаловать на сайт PullRequest, где вы можете задавать вопросы и получать ответы от других членов сообщества.
...