Работа с большими XML файлами в наборе данных (нехватка памяти) - PullRequest
0 голосов
/ 20 февраля 2020

Я пытаюсь прочитать 1 ГБ XML файл с почти 1 миллионом записей через URL-адрес, сохранить <file> в наборе данных и сохранить их в базе данных, но через 10 минут приложение обработает sh и бросить Out of Memory Exception.

XML выглядит следующим образом:

<ICECAT-interface xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="http://data.icecat.biz/xsd/files.index.xsd">
  <files.index Generated="20200220011050">
    <file path="export/freexml.int/INT/1399.xml" Product_ID="1399" Updated="20200122151223" Quality="ICECAT" Supplier_id="1" Prod_ID="C4872A" Catid="377" On_Market="1" Model_Name="80" Product_View="333140" HighPic="http://images.icecat.biz/img/gallery/1399_4084669833.jpg" HighPicSize="2734733" HighPicWidth="4200" HighPicHeight="2772" Date_Added="20051027000000" Limited="No"></file>
    <file path="export/freexml.int/INT/1400.xml" Product_ID="1400" Updated="20200122151313" Quality="ICECAT" Supplier_id="1" Prod_ID="C4874A" Catid="377" On_Market="1" Model_Name="80" Product_View="283127" HighPic="http://images.icecat.biz/img/gallery/1400_6603855084.jpg" HighPicSize="2734913" HighPicWidth="4200" HighPicHeight="2952" Date_Added="20051027000000" Limited="No"></file>
  </files.index>
</ICECAT-interface>

Sub Main ():

Class Module1
    Public Shared Sub Main()
        Dim url As String = "http://data.Icecat.biz/export/freexml/EN/daily.index.xml"
        ProcessXMLFeedURL(url)
    End Sub
End Class

Функция:

Public Shared Function ProcessXMLFeedURL(MyURL As String) As Boolean
    Dim OK As Boolean = False
    Try
        Dim rssReq As WebRequest = WebRequest.Create(MyURL)
        Dim username As String = ""
        Dim password As String = ""
        Dim encoded As String = System.Convert.ToBase64String(System.Text.Encoding.UTF8.GetBytes(username + ":" + password))
        rssReq.Headers.Add("Authorization", "Basic " + encoded)
        '//Get the WebResponse
        Dim rep As WebResponse = rssReq.GetResponse()
        '//Read the Response in a XMLTextReader
        Dim xtr As XmlTextReader = New XmlTextReader(rep.GetResponseStream())
        '// Set up the connection to the SQL server
        Dim MyConnectionString As String = "Data Source=...."
        Dim Connection As SqlConnection = New SqlConnection(MyConnectionString)
        Dim MyServer As Server = New Server(New ServerConnection(Connection))
        Dim db As Database = New Database(MyServer, "xxxxxx")
        db.Create()
        '//Create a new DataSet
        Dim ds As DataSet = New DataSet()
        ds.ReadXml(xtr)
        '// Parse tables
        Dim Mytable As Table
        Dim MyTableName As String = ds.Tables(1).TableName
        If Not HaveTable(MyConnectionString, MyTableName) Then
                '// Create the table
                Try
                    Mytable = New Table(db, MyTableName)
                Catch ex As Exception
                    Dim ii As Integer = 0
                End Try
                '// create the columns
                Dim Mycolumn As Column = New Column()
            For Each dc As DataColumn In ds.Tables(1).Columns
                Mycolumn = New Column(Mytable, dc.ColumnName)
                Mycolumn.DataType = getdatatype(dc.DataType.ToString)
                Mytable.Columns.Add(Mycolumn)
            Next
            Mytable.Create()
            Dim PrimaryKeys() As DataColumn = ds.Tables(1).PrimaryKey
            Dim PrimaryKey As DataColumn
                For Each PrimaryKey In PrimaryKeys
                    Dim Myindex As Index = New Index(Mytable, PrimaryKey.ColumnName)
                    Myindex.IndexKeyType = IndexKeyType.DriPrimaryKey
                    Myindex.IndexedColumns.Add(New IndexedColumn(Myindex, PrimaryKey.ColumnName))
                    Mytable.Indexes.Add(Myindex)
                Next
            End If
            Using MyConnection As SqlConnection = New SqlConnection(MyConnectionString)
                MyConnection.Open()
                Using bulkcopy As SqlBulkCopy = New SqlBulkCopy(MyConnection)
                bulkcopy.DestinationTableName = "[databasename].[dbo].[" + MyTableName + "]"
                Try
                    bulkcopy.WriteToServer(ds.Tables(1))
                Catch ex As Exception
                        Dim iw As Integer = 0
                    End Try
                End Using
                MyConnection.Close()
            End Using
    Catch ex As Exception
        Throw ex '// Do errorhanddling here
    End Try
    Return OK
End Function
...