Я новичок в stackoverflow и новичок ie в кодировке VBA. На моей работе нам предоставляют данные об отправке в форме Ms Word, что не очень полезно. Я нашел способ передачи данных с помощью VBA и получил полностью функциональный код. Однако набор данных содержит сотни тысяч записей. Я попытался запустить данные за месяц с 200 тыс. Записей, и это заняло 5 дней. Просто интересно, есть ли в моем коде что-нибудь, что я мог бы улучшить, чтобы ускорить процесс. Я пытался отключить обновления экрана, события, расчеты, но это мало что дало. Заранее благодарим за помощь.
Sub Word_to_Excel()
Dim FName As String, FD As FileDialog
Dim wdApp As Object
Dim wdDoc As Object
Dim WDR, WDCheck, ShipmentID As Object
Dim ExR As Range
Dim file
Dim Path As String
Dim ImportDate As Object
Dim ImportValue As String
Dim ShipmentIDcheck As String
Dim objResult
Set objShell = CreateObject("WScript.Shell")
Set ExR = Selection ' current location in Excel Sheet
' Select Folder containing word documents
Set FD = Application.FileDialog(msoFileDialogFolderPicker)
FD.Show
FName = FD.SelectedItems(1)
file = Dir(FName & "\*.docx")
Set wdApp = CreateObject("Word.Application")
' Open word document in the folder, run macro, close it and open the next word document until there are none left
Do While file <> ""
wdApp.Documents.Open Filename:=FName & "\" & file
wdApp.ActiveWindow.ActivePane.View.Type = 1
wdApp.Visible = True
' Once the word doc is open, go to beginning of document and search for CTY/SITE/SORT:
wdApp.Selection.HomeKey Unit:=6
wdApp.Selection.Find.ClearFormatting
wdApp.Selection.Find.Execute "CTY/SITE/SORT:"
Set WDCheck = wdApp.Selection
' If "CTY/SITE/SORT:" is found, then look for Shipment ID
Do While WDCheck = "CTY/SITE/SORT:"
' Find first shipment
wdApp.Selection.HomeKey Unit:=5
wdApp.Selection.MoveDown Unit:=5, Count:=11
wdApp.Selection.MoveRight Unit:=1, Count:=1
wdApp.Selection.MoveRight Unit:=1, Count:=11, Extend:=1
Set ShipmentID = wdApp.Selection
ShipmentIDcheck = Replace(ShipmentID, " ", "")
' Transfer information from Word to Excel for a Shipment ID and go to the next one.
' Shipment ID should be a string that is 11 characters long
' If Shipment ID no longer exist, go to next page by searching for the next CTY/SITE/SORT:
Do While Len(Trim(ShipmentIDcheck)) = 11
i = i + 1
ExR(i, 1) = file
ExR(i, 2) = ShipmentIDcheck
' Consignee Name
wdApp.Selection.MoveUp Unit:=5, Count:=1
wdApp.Selection.MoveRight Unit:=1, Count:=12
wdApp.Selection.MoveRight Unit:=1, Count:=23, Extend:=1
Set WDR = wdApp.Selection
ExR(i, 3) = Trim(WDR)
' Importer Name
wdApp.Selection.MoveRight Unit:=1, Count:=2
wdApp.Selection.MoveRight Unit:=1, Count:=23, Extend:=1
Set WDR = wdApp.Selection
ExR(i, 8) = Trim(WDR)
' Shipper Name
wdApp.Selection.MoveRight Unit:=1, Count:=2
wdApp.Selection.MoveRight Unit:=1, Count:=23, Extend:=1
Set WDR = wdApp.Selection
ExR(i, 13) = Trim(WDR)
' Quantity
wdApp.Selection.MoveRight Unit:=1, Count:=2
wdApp.Selection.MoveRight Unit:=1, Count:=10, Extend:=1
Set WDR = wdApp.Selection
ExR(i, 19) = Trim(WDR)
' Weight
wdApp.Selection.MoveRight Unit:=1, Count:=2
wdApp.Selection.MoveRight Unit:=1, Count:=12, Extend:=1
Set WDR = wdApp.Selection
ExR(i, 20) = Trim(WDR)
' Value
wdApp.Selection.MoveRight Unit:=1, Count:=2
wdApp.Selection.MoveRight Unit:=1, Count:=12, Extend:=1
Set WDR = wdApp.Selection
ExR(i, 21) = Trim(WDR)
' Broker
wdApp.Selection.MoveRight Unit:=1, Count:=2
wdApp.Selection.MoveRight Unit:=1, Count:=11, Extend:=1
Set WDR = wdApp.Selection
ExR(i, 23) = Trim(WDR)
' Consignee Street
wdApp.Selection.HomeKey Unit:=5
wdApp.Selection.MoveDown Unit:=5, Count:=1
wdApp.Selection.MoveRight Unit:=1, Count:=13
wdApp.Selection.MoveRight Unit:=1, Count:=23, Extend:=1
Set WDR = wdApp.Selection
ExR(i, 4) = Trim(WDR)
' Importer Street
wdApp.Selection.MoveRight Unit:=1, Count:=2
wdApp.Selection.MoveRight Unit:=1, Count:=23, Extend:=1
Set WDR = wdApp.Selection
ExR(i, 9) = Trim(WDR)
' Shipper Street
wdApp.Selection.MoveRight Unit:=1, Count:=2
wdApp.Selection.MoveRight Unit:=1, Count:=23, Extend:=1
Set WDR = wdApp.Selection
ExR(i, 14) = Trim(WDR)
' Description
wdApp.Selection.MoveRight Unit:=1, Count:=8
wdApp.Selection.MoveRight Unit:=1, Count:=40, Extend:=1
Set WDR = wdApp.Selection
ExR(i, 18) = Trim(WDR)
' Consignee City
wdApp.Selection.HomeKey Unit:=5
wdApp.Selection.MoveDown Unit:=5, Count:=1
wdApp.Selection.MoveRight Unit:=1, Count:=13
wdApp.Selection.MoveRight Unit:=1, Count:=13, Extend:=1
Set WDR = wdApp.Selection
ExR(i, 5) = Trim(WDR)
' Consignee Province
wdApp.Selection.MoveRight Unit:=1, Count:=2
wdApp.Selection.MoveRight Unit:=1, Count:=2, Extend:=1
Set WDR = wdApp.Selection
ExR(i, 6) = Trim(WDR)
' Consignee Postal
wdApp.Selection.MoveRight Unit:=1, Count:=2
wdApp.Selection.MoveRight Unit:=1, Count:=6, Extend:=1
Set WDR = wdApp.Selection
ExR(i, 7) = Trim(WDR)
' Importer City
wdApp.Selection.MoveRight Unit:=1, Count:=2
wdApp.Selection.MoveRight Unit:=1, Count:=13, Extend:=1
Set WDR = wdApp.Selection
ExR(i, 10) = Trim(WDR)
' Importer Province
wdApp.Selection.MoveRight Unit:=1, Count:=2
wdApp.Selection.MoveRight Unit:=1, Count:=2, Extend:=1
Set WDR = wdApp.Selection
ExR(i, 11) = Trim(WDR)
' Importer Postal
wdApp.Selection.MoveRight Unit:=1, Count:=2
wdApp.Selection.MoveRight Unit:=1, Count:=6, Extend:=1
Set WDR = wdApp.Selection
ExR(i, 12) = Trim(WDR)
' Shipper City
wdApp.Selection.MoveRight Unit:=1, Count:=2
wdApp.Selection.MoveRight Unit:=1, Count:=13, Extend:=1
Set WDR = wdApp.Selection
ExR(i, 15) = Trim(WDR)
' Shipper Province
wdApp.Selection.MoveRight Unit:=1, Count:=2
wdApp.Selection.MoveRight Unit:=1, Count:=2, Extend:=1
Set WDR = wdApp.Selection
ExR(i, 16) = Trim(WDR)
' Shipper Postal
wdApp.Selection.MoveRight Unit:=1, Count:=2
wdApp.Selection.MoveRight Unit:=1, Count:=6, Extend:=1
Set WDR = wdApp.Selection
ExR(i, 17) = Trim(WDR)
' Country of Origin
wdApp.Selection.MoveRight Unit:=1, Count:=29
wdApp.Selection.MoveRight Unit:=1, Count:=21, Extend:=1
Set WDR = wdApp.Selection
ExR(i, 22) = Trim(WDR)
wdApp.Selection.HomeKey Unit:=5
wdApp.Selection.MoveDown Unit:=5, Count:=2
wdApp.Selection.MoveRight Unit:=1, Count:=1
wdApp.Selection.MoveRight Unit:=1, Count:=11, Extend:=1
Set ShipmentID = wdApp.Selection
' Remove spaces from selection. Selection is then used to check if it is a shipment ID.
' If it is, then data for that shipment ID is transferred. If not, macro will go to the next page in the Word Doc.
ShipmentIDcheck = Replace(ShipmentID, " ", "")
ActiveCell.Offset(1).Select
Loop
'Simulate keyboard press "NUMLOCK" to prevent screen from locking
objResult = objShell.SendKeys("{NUMLOCK}")
wdApp.Selection.HomeKey Unit:=5
wdApp.Selection.Find.ClearFormatting
wdApp.Selection.Find.Execute "CTY/SITE/SORT:"
Set WDCheck = wdApp.Selection
Loop
wdApp.ActiveDocument.Close SaveChanges:=wdDoNotSaveChanges
ActiveWorkbook.Save
file = Dir()
Loop
wdApp.Quit
MsgBox "Data extraction completed at:" & vbNewLine & Format(Now, "mmmm d, yyyy hh:mm AM/PM")
End Sub
Вот как форматируется набор данных. Есть несколько документов Word, содержащих страницы и страницы этого набора данных в день. Количество отправлений на странице варьируется. Но формат одинаков во всем. В документах word нет таблиц, только текст, разделенный пробелами. CTY / SITE / SORT: уникален для каждой страницы, и я использовал его как опорную точку. если макрос находит его, он идет вниз на 11 строк и получает первый идентификатор отгрузки и другую информацию. Затем он проверяет следующий идентификатор доставки. Если его там нет, то он переходит на следующую страницу и повторяет процесс.
REPORT NUM : ABC1234 OPERATIONS SYSTEM PAGE NUM: 2
CTY/SITE/SORT: CA 00123 SUMMARY CARGO RUN TIME: 07:33:43
SORT DATE : INBOUND - SCAN RUN DATE: 01AUG19
OPER ID : ABC123
MVMT: 12345678 MVMT DT: 01AUG19 MAWB: PROD TYP: DTY TYP: IMP CTY: EXP CTY: BL TYP:
COURIER REMISSION MANIFEST EXPORT SITE: US 12345
GCCN ID: EXPECTED SHPTS: EXPECTED PKGS: EXPECTED WEIGHT:
CUSTOMS NUM CONSIGNEE NAME IMPORTER NAME SHIPPER NAME CSA QTY WGT(LBS) VALUE BROKER
SHIPMENT ID DESCRIPTION (CAD) CTRY OF ORIGIN
JOHN SMITH ABC COMPANY XYZ COMPANY 1 1.1 1000.00 UNCONSIGNED
ABC12345678 123 MAIN STREET 345 RANDOM ROAD UNIVERSITY OF WASHINGTO BICYCLE PARTS
VANCOUVER BC V1A1A1 VANCOUVER BC V2B1B2 SEATTLE WA 981234 US
JOHN SMITH ABC COMPANY XYZ COMPANY 1 1.1 1000.00 UNCONSIGNED
ABC12345678 123 MAIN STREET 345 RANDOM ROAD UNIVERSITY OF WASHINGTO BICYCLE PARTS
VANCOUVER BC V1A1A1 VANCOUVER BC V2B1B2 SEATTLE WA 981234 US
JOHN SMITH ABC COMPANY XYZ COMPANY 1 1.1 1000.00 UNCONSIGNED
ABC12345678 123 MAIN STREET 345 RANDOM ROAD UNIVERSITY OF WASHINGTO BICYCLE PARTS
VANCOUVER BC V1A1A1 VANCOUVER BC V2B1B2 SEATTLE WA 981234 US
JOHN SMITH ABC COMPANY XYZ COMPANY 1 1.1 1000.00 UNCONSIGNED
ABC12345678 123 MAIN STREET 345 RANDOM ROAD UNIVERSITY OF WASHINGTO BICYCLE PARTS
VANCOUVER BC V1A1A1 VANCOUVER BC V2B1B2 SEATTLE WA 981234 US
TOTAL FOR DUTY TYPE COURIER REMISSION
TOTAL SHIPMENTS: 4
TOTAL PACKAGES : 4
TOTAL WEIGHT : 70.9 LBS
TOTAL VALUES : 4000.00
* * *
Я использовал следующий код для очистки набора данных и упорядочения их по одной записи в строке, и каждая строка разделяется абзац (спасибо, макропод). Поскольку данные организованы в столбцы, разделенные пробелами, я могу сохранить их в виде файла .txt и импортировать в Excel. Теперь задача состоит в том, чтобы применить код ко всем документам в папке и создать файл .txt для каждого. Или было бы еще лучше, если бы код мог объединить все данные из очищенных файлов .docx в один файл .txt.
Sub CleanWordDoc()
Application.ScreenUpdating = False
Dim p As Long, StrOut As String
With ActiveDocument.Range
With .Find
.ClearFormatting
.Replacement.ClearFormatting
.Wrap = wdFindContinue
.Forward = True
.Format = False
.MatchWildcards = True
.Text = "REPORT NUM : * CTRY OF ORIGIN^13" 'Clean header on each page
.Replacement.Text = ""
.Execute Replace:=wdReplaceAll
.Text = "[ ]{2,}ACTUAL SHP TOTAL*[ ]{20,}^13^m" 'Clean footer on some pages
.Replacement.Text = ""
.Execute Replace:=wdReplaceAll
.Text = "[ ]{2,}TOTAL FOR DUTY*[ ]{20,}^13^m" 'Clean more footers
.Replacement.Text = ""
.Execute Replace:=wdReplaceAll
.Text = "REPORT NUM :*SUMMARY*[\*] [\*][ ]{20,}^13" 'Clean last page
.Replacement.Text = ""
.Execute Replace:=wdReplaceAll
.Text = "^m^13" ' Clean all page breaks
.Replacement.Text = ""
.Execute Replace:=wdReplaceAll
.Text = "^13^13" ' Clean empty paragraphs
.Replacement.Text = "^13"
.Execute Replace:=wdReplaceAll
.Text = "<[ ]{1,}^13" ' Clean spaces and paragraphs at the beginning of doc
.Replacement.Text = ""
.Execute Replace:=wdReplaceAll
.Text = "(*)^13(*)^13(*^13)" ' Combine 3 paragraphs into one
.Replacement.Text = "\1 \2 \3"
.Execute Replace:=wdReplaceAll
End With
End With
Application.ScreenUpdating = True
End Sub
И вот так выглядит очищенный файл .docx (с сотнями записей ):
12345678900 ABC COMPANY DEF COMPANY XYZ COMPANY 1 1.1 123.45 AAABROKER A0B12345LFD ABC ADDRESS DEF ADDRESS XYZ ADDRESS BICYCLE PARTS VANCOUVER BC V1A1A1 MARKHAM ON L1L1L1 SHENZHEN 512323 CN
98765432100 ABC COMPANY DEF COMPANY XYZ COMPANY 1 1.1 123.45 AAABROKER A0B12345LFD ABC ADDRESS DEF ADDRESS XYZ ADDRESS BICYCLE PARTS VANCOUVER BC V1A1A1 MARKHAM ON L1L1L1 SHENZHEN 512323 CN