Как заставить XMLDOMDocument включать декларацию XML? - PullRequest
8 голосов
/ 17 июля 2009

Когда XMLDOMDocument сохраняет себя, как я могу заставить его включить XML декларацию , например ::

  • <?xml version="1.0" encoding="UTF-8" ?>
  • <?xml version="1.0" encoding="UTF-16" ?>
  • <?xml version="1.0" encoding="UCS-2" ?>
  • <?xml version="1.0" encoding="UCS-4" ?>
  • <?xml version="1.0" encoding="ISO-10646-UCS-2" ?>
  • <?xml version="1.0" encoding="UNICODE-1-1-UTF-8" ?>
  • <?xml version="1.0" encoding="UNICODE-2-0-UTF-16" ?>
  • <?xml version="1.0" encoding="UNICODE-2-0-UTF-8" ?>
  • <?xml version="1.0" encoding="US-ASCII" ?>
  • <?xml version="1.0" encoding="ISO-8859-1" ?>
  • <?xml version="1.0" encoding="WINDOWS-1250" ?>

Объект XMLDOMDomcument создается в памяти (т. Е. Xml не загружается из какого-либо внешнего источника):

{
   IXMLDOMDocument2 doc = new DOMDocument60();

   //add nodes to the doc
   ...

   doc.Save(saveTarget);
}

Без объявления xml вы получите только тело xml, например:

<Customer>
   ...
</Customer>

вместо полного XML-документа:

<?xml version="1.0" encoding="US-ASCII" ?>
<Customer>
   ...
</Customer>

Вопрос 2

Как я могу контролировать кодировку , которую XMLDOMDocument будет использовать при сохранении в поток?

Ответы [ 5 ]

8 голосов
/ 17 июля 2009

Вам нужно использовать MXXMLWriter60, а не сохранять его напрямую. Извините, у меня нет примера на C #, но вот эквивалент VB.Net. Подробнее см. IMXWriter .

' Create and load a DOMDocument object.

Dim xmlDoc As New DOMDocument60
xmlDoc.loadXML("<doc><one>test1</one><two>test2</two></doc>")

' Set properties on the XML writer - including BOM, XML declaration and encoding

Dim wrt As New MXXMLWriter60
wrt.byteOrderMark = True
wrt.omitXMLDeclaration = False
wrt.encoding = "US-ASCII"
wrt.indent = True

' Set the XML writer to the SAX content handler.

Dim rdr As New SAXXMLReader60
Set rdr.contentHandler = wrt
Set rdr.dtdHandler = wrt
Set rdr.errorHandler = wrt
rdr.putProperty "http://xml.org/sax/properties/lexical-handler", wrt
rdr.putProperty "http://xml.org/sax/properties/declaration-handler", wrt

' Now pass the DOM through the SAX handler, and it will call the writer

rdr.parse xmlDoc

' Let the writer do its thing

Dim iFileNo As Integer
iFileNo = FreeFile
Open App.Path + "\saved.xml" For Output As #iFileNo
Print #iFileNo, wrt.output
Close #iFileNo
3 голосов
/ 27 января 2013

Вы должны иметь возможность получить то же самое с помощью метода CreateProcessingInstruction.

Пример;

' Create and load a DOMDocument object.

Dim xmlDoc As New DOMDocument
Dim xRecords As IXMLDOMElement

' Make the Records the root node and add instructional line to XML file.
Set xRecords = xmlDoc.createElement("HeuristicFiler")
xmlDoc.appendChild xmlDoc.createProcessingInstruction("xml", "version=""1.0"" encoding=""UTF-8"" standalone=""yes""")
xmlDoc.appendChild xRecords

'  Add various records

' Save the XML File 
xmlDoc.Save strFilePath
2 голосов
/ 27 ноября 2010

Вот то же самое в c ++

bool PPrintDOMDocument (IXMLDOMDocument* pDoc, IStream* pStream)
{
    // Create the writer
    CComPtr <IMXWriter> pMXWriter;
    CHK_FAILED(pMXWriter.CoCreateInstance(__uuidof (MXXMLWriter), NULL, CLSCTX_ALL));

    CComPtr <ISAXContentHandler> pISAXContentHandler;
    CHK_FAILED (pMXWriter.QueryInterface(&pISAXContentHandler));

    CComPtr <ISAXErrorHandler> pISAXErrorHandler;
    CHK_FAILED(pMXWriter.QueryInterface (&pISAXErrorHandler));

    CComPtr <ISAXDTDHandler> pISAXDTDHandler;
    CHK_FAILED (pMXWriter.QueryInterface (&pISAXDTDHandler));

    CHK_FAILED (pMXWriter->put_omitXMLDeclaration (VARIANT_FALSE));
    CHK_FAILED (pMXWriter->put_standalone (VARIANT_TRUE));
    CHK_FAILED (pMXWriter->put_indent (VARIANT_TRUE));
    CHK_FAILED (pMXWriter->put_encoding (L"UTF-8"));

    CComPtr <ISAXXMLReader> pSAXReader;
    CHK_FAILED (pSAXReader.CoCreateInstance (__uuidof (SAXXMLReader), NULL, CLSCTX_ALL));

    CHK_FAILED (pSAXReader ->putContentHandler (pISAXContentHandler));
    CHK_FAILED (pSAXReader ->putDTDHandler (pISAXDTDHandler));
    CHK_FAILED (pSAXReader ->putErrorHandler (pISAXErrorHandler));
    CHK_FAILED (pSAXReader ->putProperty (
      L"http://xml.org/sax/properties/lexical-handler", CComVariant (pMXWriter)));
    CHK_FAILED (pSAXReader ->putProperty (
      L"http://xml.org/sax/properties/declaration-handler", CComVariant (pMXWriter)));

    if (! SUCCEEDED (pMXWriter ->put_output (CComVariant (pStream)))) return false;
    if (! SUCCEEDED (pSAXReader->parse(CComVariant (pDoc)))) return false;
    pMXWriter->flush();
}

... и вам понадобится подходящий IStream ...

//implement filestream that derives from IStream
class FileStream : public IStream
{
    FileStream(HANDLE hFile) 
    {
        _refcount = 1;
        _hFile = hFile;
    }

    ~FileStream()
    {
        if (_hFile != INVALID_HANDLE_VALUE)
        {
            ::CloseHandle(_hFile);
        }
    }

public:
    HRESULT static OpenFile(LPCWSTR pName, IStream ** ppStream, bool fWrite)
    {
        HANDLE hFile = ::CreateFileW(pName, 
            fWrite ? GENERIC_WRITE : GENERIC_READ, FILE_SHARE_READ,
            NULL, 
            fWrite ? CREATE_ALWAYS : OPEN_EXISTING, 
            FILE_ATTRIBUTE_NORMAL, 
            NULL);

        if (hFile == INVALID_HANDLE_VALUE)
            return HRESULT_FROM_WIN32(GetLastError());

        *ppStream = new FileStream(hFile);

        if(*ppStream == NULL)
            CloseHandle(hFile);

        return S_OK;
    }

    virtual HRESULT STDMETHODCALLTYPE QueryInterface(REFIID iid, void ** ppvObject)
    { 
        if (iid == __uuidof(IUnknown)
            || iid == __uuidof(IStream)
            || iid == __uuidof(ISequentialStream))
        {
            *ppvObject = static_cast<IStream*>(this);
            AddRef();
            return S_OK;
        } else
            return E_NOINTERFACE; 
    }

    virtual ULONG STDMETHODCALLTYPE AddRef(void) 
    { 
        return (ULONG)InterlockedIncrement(&_refcount); 
    }

    virtual ULONG STDMETHODCALLTYPE Release(void) 
    {
        ULONG res = (ULONG) InterlockedDecrement(&_refcount);
        if (res == 0) 
            delete this;
        return res;
    }

    // ISequentialStream Interface
public:
    virtual HRESULT STDMETHODCALLTYPE Read(void* pv, ULONG cb, ULONG* pcbRead)
    {
        BOOL rc = ReadFile(_hFile, pv, cb, pcbRead, NULL);
        return (rc) ? S_OK : HRESULT_FROM_WIN32(GetLastError());
    }

    virtual HRESULT STDMETHODCALLTYPE Write(void const* pv, ULONG cb, ULONG* pcbWritten)
    {
        BOOL rc = WriteFile(_hFile, pv, cb, pcbWritten, NULL);
        return rc ? S_OK : HRESULT_FROM_WIN32(GetLastError());
    }

    // IStream Interface
public:
    virtual HRESULT STDMETHODCALLTYPE SetSize(ULARGE_INTEGER)
    { 
        return E_NOTIMPL;   
    }

    virtual HRESULT STDMETHODCALLTYPE CopyTo(IStream*, ULARGE_INTEGER, ULARGE_INTEGER*,
        ULARGE_INTEGER*) 
    { 
        return E_NOTIMPL;   
    }

    virtual HRESULT STDMETHODCALLTYPE Commit(DWORD)                                      
    { 
        return E_NOTIMPL;   
    }

    virtual HRESULT STDMETHODCALLTYPE Revert(void)                                       
    { 
        return E_NOTIMPL;   
    }

    virtual HRESULT STDMETHODCALLTYPE LockRegion(ULARGE_INTEGER, ULARGE_INTEGER, DWORD)              
    { 
        return E_NOTIMPL;   
    }

    virtual HRESULT STDMETHODCALLTYPE UnlockRegion(ULARGE_INTEGER, ULARGE_INTEGER, DWORD)            
    { 
        return E_NOTIMPL;   
    }

    virtual HRESULT STDMETHODCALLTYPE Clone(IStream **)                                  
    { 
        return E_NOTIMPL;   
    }

    virtual HRESULT STDMETHODCALLTYPE Seek(LARGE_INTEGER liDistanceToMove, DWORD dwOrigin,
        ULARGE_INTEGER* lpNewFilePointer)
    { 
        DWORD dwMoveMethod;

        switch(dwOrigin)
        {
        case STREAM_SEEK_SET:
            dwMoveMethod = FILE_BEGIN;
            break;
        case STREAM_SEEK_CUR:
            dwMoveMethod = FILE_CURRENT;
            break;
        case STREAM_SEEK_END:
            dwMoveMethod = FILE_END;
            break;
        default:   
            return STG_E_INVALIDFUNCTION;
            break;
        }

        if (SetFilePointerEx(_hFile, liDistanceToMove, (PLARGE_INTEGER) lpNewFilePointer,
                             dwMoveMethod) == 0)
            return HRESULT_FROM_WIN32(GetLastError());
        return S_OK;
    }

    virtual HRESULT STDMETHODCALLTYPE Stat(STATSTG* pStatstg, DWORD grfStatFlag) 
    {
        if (GetFileSizeEx(_hFile, (PLARGE_INTEGER) &pStatstg->cbSize) == 0)
            return HRESULT_FROM_WIN32(GetLastError());
        return S_OK;
    }

private:
    HANDLE _hFile;
    LONG _refcount;
};

... и объяснение того, что происходит.

1 голос
/ 03 сентября 2011

Вот то же самое в Delphi:

//writes the document to the WideString as UTF-16 (since it's a WideString)
class function TXMLHelper.WriteDocumentToString(
       const Document60: DOMDocument60): WideString; 
var
    writer: IMXWriter;
    reader: IVBSAXXMLReader;
    bstr: OleVariant;
begin
{
    From http://support.microsoft.com/kb/275883
    INFO: XML Encoding and DOM Interface Methods

    MSXML has native support for the following encodings:
        UTF-8
        UTF-16
        UCS-2
        UCS-4
        ISO-10646-UCS-2
        UNICODE-1-1-UTF-8
        UNICODE-2-0-UTF-16
        UNICODE-2-0-UTF-8

    It also recognizes (internally using the WideCharToMultibyte 
    API function for mappings) the following encodings:
        US-ASCII
        ISO-8859-1
        ISO-8859-2
        ISO-8859-3
        ISO-8859-4
        ISO-8859-5
        ISO-8859-6
        ISO-8859-7
        ISO-8859-8
        ISO-8859-9
        WINDOWS-1250
        WINDOWS-1251
        WINDOWS-1252
        WINDOWS-1253
        WINDOWS-1254
        WINDOWS-1255
        WINDOWS-1256
        WINDOWS-1257
        WINDOWS-1258
}

    if Document60 = nil then
       raise Exception.Create('TXMLHelper.WriteDocument: Document60 cannot be nil');

    // Set properties on the XML writer
    //    - including BOM, XML declaration and encoding
    writer := CoMXXMLWriter60.Create;
    writer.byteOrderMark := False; //Don't write the BOM. Has no effect for BSTR output, but still: we don't want it.  Determines whether to write the Byte Order Mark (BOM). The byteOrderMark property has no effect for BSTR or DOM output. (Default True)
    writer.omitXMLDeclaration := False; //Don't skip the xml declaration.   Forces the IMXWriter to skip the XML declaration. Useful for creating document fragments. (Default False)
//  writer.encoding := 'UTF-8'; //Sets and gets encoding for the output. (Default "UTF-16")
    writer.indent := True; //Sets whether to indent output. (Default False)
    writer.standalone := True; //Sets the value of the standalone attribute in the XML declaration to "yes" or "no".


    // Set the XML writer to the SAX content handler.
    reader := CoSAXXMLReader60.Create;
    reader.contentHandler := writer as IVBSAXContentHandler;
    reader.dtdHandler := writer as IVBSAXDTDHandler;
    reader.errorHandler := writer as IVBSAXErrorHandler;
    reader.putProperty('http://xml.org/sax/properties/lexical-handler', writer);
    reader.putProperty('http://xml.org/sax/properties/declaration-handler', writer);

    // Now pass the DOM through the SAX handler, and it will call the writer
    reader.parse(Document60);

    // Let the writer do its thing
    bstr := writer.output;
    Result := bstr;
end;

И версия записи в поток:

class procedure TXMLHelper.WriteDocumentToStream(const Document60: DOMDocument60; const stream: IStream; Encoding: string);
var
    writer: IMXWriter;
    reader: IVBSAXXMLReader;
begin
{
    From http://support.microsoft.com/kb/275883
    INFO: XML Encoding and DOM Interface Methods

    MSXML has native support for the following encodings:
        UTF-8
        UTF-16
        UCS-2
        UCS-4
        ISO-10646-UCS-2
        UNICODE-1-1-UTF-8
        UNICODE-2-0-UTF-16
        UNICODE-2-0-UTF-8

    It also recognizes (internally using the WideCharToMultibyte API function for mappings) the following encodings:
        US-ASCII
        ISO-8859-1
        ISO-8859-2
        ISO-8859-3
        ISO-8859-4
        ISO-8859-5
        ISO-8859-6
        ISO-8859-7
        ISO-8859-8
        ISO-8859-9
        WINDOWS-1250
        WINDOWS-1251
        WINDOWS-1252
        WINDOWS-1253
        WINDOWS-1254
        WINDOWS-1255
        WINDOWS-1256
        WINDOWS-1257
        WINDOWS-1258
}

    if Document60 = nil then
        raise Exception.Create('TXMLHelper.WriteDocument: Document60 cannot be nil');
    if stream = nil then
        raise Exception.Create('TXMLHelper.WriteDocument: stream cannot be nil');

    // Set properties on the XML writer - including BOM, XML declaration and encoding
    writer := CoMXXMLWriter60.Create;
    writer.byteOrderMark := True; //Determines whether to write the Byte Order Mark (BOM). The byteOrderMark property has no effect for BSTR or DOM output. (Default True)
    writer.omitXMLDeclaration := False; //Forces the IMXWriter to skip the XML declaration. Useful for creating document fragments. (Default False)
    if Encoding <> '' then
        writer.encoding := Encoding; //Sets and gets encoding for the output. (Default "UTF-16")
    writer.indent := True; //Sets whether to indent output. (Default False)
    writer.standalone := True;

    // Set the XML writer to the SAX content handler.
    reader := CoSAXXMLReader60.Create;
    reader.contentHandler := writer as IVBSAXContentHandler;
    reader.dtdHandler := writer as IVBSAXDTDHandler;
    reader.errorHandler := writer as IVBSAXErrorHandler;
    reader.putProperty('http://xml.org/sax/properties/lexical-handler', writer);
    reader.putProperty('http://xml.org/sax/properties/declaration-handler', writer);


    writer.output := stream; //The resulting document will be written into the provided IStream

    // Now pass the DOM through the SAX handler, and it will call the writer
    reader.parse(Document60);

    writer.flush;
end;

Примечание : любой код публикуется в открытом доступе. Указание авторства не требуется.

0 голосов
/ 03 февраля 2016

C ++:

std::string xmlString = ...
xmlString = "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>" + xmlString

Потому что, если серьезно, правильное решение - это шутка ...

...