Преобразование нескольких документов Word в файлы HTML с использованием VBA - PullRequest
0 голосов
/ 14 июня 2019

Как я могу преобразовать несколько документов MS Word в определенной папке в HTML, используя VBA?Я использовал Powershell для этого, но, к сожалению, доступ к сценариям заблокирован.

1 Ответ

0 голосов
/ 15 июня 2019

Может быть, это может помочь ?? !!

Option Explicit

Sub ChangeDocsToTxtOrRTFOrHTML()
'with export to PDF in Word 2007
    Dim fs As Object
    Dim oFolder As Object
    Dim tFolder As Object
    Dim oFile As Object
    Dim strDocName As String
    Dim intPos As Integer
    Dim locFolder As String
    Dim fileType As String
    On Error Resume Next
    locFolder = InputBox("Enter the folder path to DOCs", "File Conversion", "C:\myDocs")
    Select Case Application.Version
        Case Is < 12
            Do
                fileType = UCase(InputBox("Change DOC to TXT, RTF, HTML", "File Conversion", "TXT"))
            Loop Until (fileType = "TXT" Or fileType = "RTF" Or fileType = "HTML")
        Case Is >= 12
            Do
                fileType = UCase(InputBox("Change DOC to TXT, RTF, HTML or PDF(2007+ only)", "File Conversion", "TXT"))
            Loop Until (fileType = "TXT" Or fileType = "RTF" Or fileType = "HTML" Or fileType = "PDF")
    End Select
    Application.ScreenUpdating = False
    Set fs = CreateObject("Scripting.FileSystemObject")
    Set oFolder = fs.GetFolder(locFolder)
    Set tFolder = fs.CreateFolder(locFolder & "Converted")
    Set tFolder = fs.GetFolder(locFolder & "Converted")
    For Each oFile In oFolder.Files
        Dim d As Document
        Set d = Application.Documents.Open(oFile.Path)
        strDocName = ActiveDocument.Name
        intPos = InStrRev(strDocName, ".")
        strDocName = Left(strDocName, intPos - 1)
        ChangeFileOpenDirectory tFolder
        Select Case fileType
        Case Is = "TXT"
            strDocName = strDocName & ".txt"
            ActiveDocument.SaveAs FileName:=strDocName, FileFormat:=wdFormatText
        Case Is = "RTF"
            strDocName = strDocName & ".rtf"
            ActiveDocument.SaveAs FileName:=strDocName, FileFormat:=wdFormatRTF
        Case Is = "HTML"
            strDocName = strDocName & ".html"
            ActiveDocument.SaveAs FileName:=strDocName, FileFormat:=wdFormatFilteredHTML
        Case Is = "PDF"
            strDocName = strDocName & ".pdf"

            ' *** Word 2007 users - remove the apostrophe at the start of the next line ***
            'ActiveDocument.ExportAsFixedFormat OutputFileName:=strDocName, ExportFormat:=wdExportFormatPDF

        End Select
        d.Close
        ChangeFileOpenDirectory oFolder
    Next oFile
    Application.ScreenUpdating = True
End Sub
############################# ##################### ##############
Option Explicit

Private Declare Sub Sleep Lib "kernel32" (ByVal dwMilliseconds As Long)

Sub ConvertDocs()
    Dim fs As Object
    Dim oFolder As Object
    Dim tFolder As Object
    Dim oFile As Object
    Dim strDocName As String
    Dim intPos As Integer
    Dim locFolder As String
    Dim fileType As String
    Dim office2007 As Boolean
    Dim lf As LinkFormat
    Dim oField As Field
    Dim oIShape As InlineShape
    Dim oShape As Shape
    On Error Resume Next
    locFolder = InputBox("Enter the path to the folder with the documents to be converted", "File Conversion", "C:\myDocs")
    If Application.Version >= 12 Then
        office2007 = True
        Do
            fileType = UCase(InputBox("Enter one of the following formats (to convert to): TXT, RTF, HTML, DOC, DOCX or PDF", "File Conversion", "TXT"))
        Loop Until (fileType = "TXT" Or fileType = "RTF" Or fileType = "HTML" Or fileType = "PDF" Or fileType = "DOC" Or fileType = "DOCX")
    Else
        office2007 = False
        Do
            fileType = UCase(InputBox("Enter one of the following formats (to convert to): TXT, RTF, HTML or DOC", "File Conversion", "TXT"))
        Loop Until (fileType = "TXT" Or fileType = "RTF" Or fileType = "HTML" Or fileType = "DOC")
    End Select
    Application.ScreenUpdating = False
    Set fs = CreateObject("Scripting.FileSystemObject")
    Set oFolder = fs.GetFolder(locFolder)
    Set tFolder = fs.CreateFolder(locFolder & "Converted")
    Set tFolder = fs.GetFolder(locFolder & "Converted")
    For Each oFile In oFolder.Files
        Dim d As Document
        Set d = Application.Documents.Open(oFile.Path)
        ' put the document into print view
        If fileType = "RTF" Or fileType = "DOC" Or fileType = "DOCX" Then
            With ActiveWindow.View
                .ReadingLayout = False
                .Type = wdPrintView
            End With
        End If
        ' try to embed linked images from fields, shapes and inline shapes into the document
        ' (for some reason this does not work for all images in all HTML files I've tested)
        If Not fileType = "HTML" Then
            For Each oField In d.Fields
                Set lf = oField.LinkFormat
                If oField.Type = wdFieldIncludePicture And Not lf Is Nothing And Not lf.SavePictureWithDocument Then
                    lf.SavePictureWithDocument = True
                    Sleep (2000)
                    lf.BreakLink()
                    d.UndoClear()
                End If
            Next
            For Each oShape In d.Shapes
                Set lf = oShape.LinkFormat
                If Not lf Is Nothing And Not lf.SavePictureWithDocument Then
                    lf.SavePictureWithDocument = True
                    Sleep (2000)
                    lf.BreakLink()
                    d.UndoClear()
                End If
            Next
            For Each oIShape In d.InlineShapes
                Set lf = oIShape.LinkFormat
                If Not lf Is Nothing And Not lf.SavePictureWithDocument Then
                    lf.SavePictureWithDocument = True
                    Sleep (2000)
                    lf.BreakLink()
                    d.UndoClear()
                End If
            Next
        End If
        strDocName = d.Name
        intPos = InStrRev(strDocName, ".")
        strDocName = Left(strDocName, intPos - 1)
        ChangeFileOpenDirectory(tFolder)
        ' Check out these links for a comprehensive list of supported file formats and format constants:
        ' http://msdn.microsoft.com/en-us/library/microsoft.office.interop.word.wdsaveformat.aspx
        ' http://msdn.microsoft.com/en-us/library/office/bb238158.aspx
        ' (In the latter list you can see the values that the constants are associated with.
        '  Office 2003 only supported values up to wdFormatXML(=11). Values from wdFormatXMLDocument(=12)
        '  til wdFormatDocumentDefault(=16) were added in Office 2007, and wdFormatPDF(=17) and wdFormatXPS(=18)
        '  were added in Office 2007 SP2. Office 2010 added the various wdFormatFlatXML* formats and wdFormatOpenDocumentText.)
        If Not office2007 And fileType = "DOCX" Then
            fileType = "DOC"
        End If
        Select Case fileType
            Case Is = "TXT"
                strDocName = strDocName & ".txt"
                d.SaveAs(FileName := strDocName, FileFormat := wdFormatText)
            Case Is = "RTF"
                strDocName = strDocName & ".rtf"
                d.SaveAs(FileName := strDocName, FileFormat := wdFormatRTF)
            Case Is = "HTML"
                strDocName = strDocName & ".html"
                d.SaveAs(FileName := strDocName, FileFormat := wdFormatFilteredHTML)
            Case Is = "DOC"
                strDocName = strDocName & ".doc"
                d.SaveAs(FileName := strDocName, FileFormat := wdFormatDocument)
            Case Is = "DOCX"
                strDocName = strDocName & ".docx"
                ' *** Word 2007+ users - remove the apostrophe at the start of the next line ***
                'd.SaveAs(FileName := strDocName, FileFormat := wdFormatDocumentDefault)
            Case Is = "PDF"
                strDocName = strDocName & ".pdf"
                ' *** Word 2007 SP2+ users - remove the apostrophe at the start of the next line ***
                'd.ExportAsFixedFormat(OutputFileName := strDocName, ExportFormat := wdExportFormatPDF)
        End Select
        d.Close
        ChangeFileOpenDirectory(oFolder)
    Next oFile
    Application.ScreenUpdating = True
End Sub
############################# ##################### ##############

Также, посмотрите это ...

https://www.youtube.com/watch?v=4vFQV6RtYMM

Добро пожаловать на сайт PullRequest, где вы можете задавать вопросы и получать ответы от других членов сообщества.
...