Я новичок в R и впервые работаю над XML-файлом.Прошли много постов с сайта, чтобы загрузить xml с родителем и несколькими дочерними элементами для загрузки в один фрейм данных в R. Но либо строка повторяется, либо происходит сбой.
Формат xml-файла, который я пытаюсь загрузить
<?xml version="1.0" encoding="utf-8"?>
<!--File SUMMARY: Total Number of Accounts in file: 2, Total Amount
Outstanding in file :$30-->
<OCADocument xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns:xsd="http://www.w3.org/2001/XMLSchema" CreationDateTime="2019-03-
13T04:04:40.6537471-04:00" Jurisdiction="PUNE"
xmlns="www.oesc.ca/XMLSchema">
<Consumer ConsumerOID="11">
<ConsumerInformation>
<ConsumerLocationType>Residential</ConsumerLocationType>
</ConsumerInformation>
<BillingContact>
<NameInformation>
<LastName>ALIAS</LastName>
<FirstName>Jibson</FirstName>
</NameInformation>
<AddressInformation AddressType="BillingAddress">
<Address1>4608 32 abc</Address1>
<City>pune</City>
<StateProvince>MH</StateProvince>
<PostalZip>411017</PostalZip>
<CountryCode>CA</CountryCode>
</AddressInformation>
<PhoneInformation Type="Main">
<PhoneNumber>985623147</PhoneNumber>
</PhoneInformation>
<PhoneInformation Type="Home">
<PhoneNumber>785412369</PhoneNumber>
</PhoneInformation>
<CreditInformation>
<CreditStatus>ACC</CreditStatus>
<CreditScore>00001</CreditScore>
</CreditInformation>
</BillingContact>
<SigningContact>
<NameInformation>
<FullName>Jibson ALIAS</FullName>
</NameInformation>
<AddressInformation AddressType="BillingAddress">
<Address1>4608 32 abc</Address1>
<City>pune</City>
<StateProvince>MH</StateProvince>
<PostalZip>411017</PostalZip>
<CountryCode>CA</CountryCode>
</AddressInformation>
</SigningContact>
<Site SiteOID="400002" MarketParticipant="ANORTH">
<AccountID>
<LDCAccountNumber>000167</LDCAccountNumber>
</AccountID>
<SiteAddress>
<Address1>4608 32 abc</Address1>
<City>pune</City>
<StateProvince>MH</StateProvince>
<PostalZip>411017</PostalZip>
<CountryCode>CA</CountryCode>
</SiteAddress>
<PaymentPlan>
<ESGPaymentPlan>None</ESGPaymentPlan>
</PaymentPlan>
<Contract ContractOID="10000002">
<RtlrContractIdentifier>80000153</RtlrContractIdentifier>
<ContractState>FLW</ContractState>
<ContractPrice>7.1900000000</ContractPrice>
<ContractSigningDate>2018-09-23</ContractSigningDate>
<ContractFlowStartDate>2018-09-29</ContractFlowStartDate>
<ContractFlowEndDate>2019-09-29</ContractFlowEndDate>
<LanguagePreference>eng</LanguagePreference>
</Contract>
<BillingEntityOID>200214</BillingEntityOID>
</Site>
<Site SiteOID="45647" MarketParticipant="EPCOR">
<AccountID>
<LDCAccountNumber>0014587</LDCAccountNumber>
</AccountID>
<SiteAddress>
<Address1>4608 32 abc</Address1>
<City>pune</City>
<StateProvince>MH</StateProvince>
<PostalZip>411017</PostalZip>
<CountryCode>CA</CountryCode>
</SiteAddress>
<PaymentPlan>
<ESGPaymentPlan>None</ESGPaymentPlan>
</PaymentPlan>
<Contract ContractOID="100003">
<RtlrContractIdentifier>805553</RtlrContractIdentifier>
<ContractState>FLW</ContractState>
<ContractPrice>0.45000000</ContractPrice>
<ContractSigningDate>2018-09-23</ContractSigningDate>
<ContractFlowStartDate>2018-09-29</ContractFlowStartDate>
<ContractFlowEndDate>2019-09-29</ContractFlowEndDate>
<LanguagePreference>eng</LanguagePreference>
</Contract>
<BillingEntityOID>200001</BillingEntityOID>
</Site>
<ConsumerBillingEntity BillingEntityOID="200001" BusinessSegmentIdentifier="JEA">
<ExitFeesOutstanding>0</ExitFeesOutstanding>
<EstimateExitFees>0</EstimateExitFees>
<OldestOutstandingReceivMHlesDueDate>2019-01-11</OldestOutstandingReceivMHlesDueDate>
<Outstanding_Current>264.24</Outstanding_Current>
<Outstanding_1to30>221.19</Outstanding_1to30>
<Outstanding_31to60>0</Outstanding_31to60>
<Outstanding_61to90>143.66</Outstanding_61to90>
<Outstanding_91to120>0</Outstanding_91to120>
<Outstanding_121to150>0</Outstanding_121to150>
<Outstanding_151to180>0</Outstanding_151to180>
<Outstanding_181plus>0</Outstanding_181plus>
<LastPaymentAmount>200.00</LastPaymentAmount>
<LastPaymentDate>2018-12-24T00:00:00</LastPaymentDate>
<CustomerAccountNo>904511110</CustomerAccountNo>
<PaymentScoreInformation>
<PaymentScore>0</PaymentScore>
</PaymentScoreInformation>
</ConsumerBillingEntity>
</Consumer>
<Consumer ConsumerOID="421">
<ConsumerInformation>
<ConsumerLocationType>Residential</ConsumerLocationType>
</ConsumerInformation>
<BillingContact>
<NameInformation>
<LastName>BORUDE</LastName>
<FirstName>GANESH</FirstName>
</NameInformation>
<AddressInformation AddressType="BillingAddress">
<Address1>1420 Pimpri</Address1>
<City>PUNE</City>
<StateProvince>MH</StateProvince>
<PostalZip>41018</PostalZip>
<CountryCode>CA</CountryCode>
</AddressInformation>
<PhoneInformation Type="Main">
<PhoneNumber>4034789652</PhoneNumber>
</PhoneInformation>
<EMailInformation>
<EMailAddress>abc@gmail.com</EMailAddress>
</EMailInformation>
<CreditInformation>
<CreditStatus>ACC</CreditStatus>
<CreditScore>00111</CreditScore>
</CreditInformation>
</BillingContact>
<SigningContact>
<NameInformation>
<FullName>GANESH BORUDE</FullName>
</NameInformation>
<AddressInformation AddressType="BillingAddress">
<Address1>1420 Pimpri</Address1>
<City>PUNE</City>
<StateProvince>MH</StateProvince>
<PostalZip>41018</PostalZip>
<CountryCode>CA</CountryCode>
</AddressInformation>
</SigningContact>
<Site SiteOID="58796" MarketParticipant="ATCOSouth">
<AccountID>
<LDCAccountNumber>0000000416</LDCAccountNumber>
</AccountID>
<SiteAddress>
<Address1>1420 Pimpri</Address1>
<City>PUNE</City>
<StateProvince>MH</StateProvince>
<PostalZip>41018</PostalZip>
<CountryCode>CA</CountryCode>
</SiteAddress>
<PaymentPlan>
<ESGPaymentPlan>None</ESGPaymentPlan>
</PaymentPlan>
<Contract ContractOID="5400006">
<RtlrContractIdentifier>1000016700</RtlrContractIdentifier>
<ContractState>XXX</ContractState>
<ContractPrice>8.8900000000</ContractPrice>
<ContractSigningDate>2009-09-23</ContractSigningDate>
<LanguagePreference>eng</LanguagePreference>
</Contract>
<BillingEntityOID>3000415</BillingEntityOID>
</Site>
<Site SiteOID="44789653" MarketParticipant="ENMAX">
<AccountID>
<LDCAccountNumber>0020003657650</LDCAccountNumber>
</AccountID>
<SiteAddress>
<Address1>1420 Pimpri</Address1>
<City>PUNE</City>
<StateProvince>MH</StateProvince>
<PostalZip>41018</PostalZip>
<CountryCode>CA</CountryCode>
</SiteAddress>
<PaymentPlan>
<ESGPaymentPlan>None</ESGPaymentPlan>
</PaymentPlan>
<Contract ContractOID="140605">
<RtlrContractIdentifier>80010822</RtlrContractIdentifier>
<ContractState>FLW</ContractState>
<ContractPrice>0.0949000000</ContractPrice>
<ContractSigningDate>2018-10-02</ContractSigningDate>
<ContractFlowStartDate>2018-10-08</ContractFlowStartDate>
<ContractFlowEndDate>2019-10-08</ContractFlowEndDate>
<LanguagePreference>eng</LanguagePreference>
</Contract>
<BillingEntityOID>300045</BillingEntityOID>
</Site>
<ConsumerBillingEntity BillingEntityOID="300045" BusinessSegmentIdentifier="JEALP">
<ExitFeesOutstanding>0</ExitFeesOutstanding>
<EstimateExitFees>0</EstimateExitFees>
<OldestOutstandingReceivMHlesDueDate>2019-01-24</OldestOutstandingReceivMHlesDueDate>
<Outstanding_Current>121.47</Outstanding_Current>
<Outstanding_1to30>121.94</Outstanding_1to30>
<Outstanding_31to60>98.29</Outstanding_31to60>
<Outstanding_61to90>0</Outstanding_61to90>
<Outstanding_91to120>0</Outstanding_91to120>
<Outstanding_121to150>0</Outstanding_121to150>
<Outstanding_151to180>0</Outstanding_151to180>
<Outstanding_181plus>0</Outstanding_181plus>
<LastPaymentAmount>100.00</LastPaymentAmount>
<LastPaymentDate>2019-03-01T00:00:00</LastPaymentDate>
<CustomerAccountNo>982</CustomerAccountNo>
<PaymentScoreInformation>
<PaymentScore>15</PaymentScore>
</PaymentScoreInformation>
</ConsumerBillingEntity>
</Consumer>
</OCADocument>
Метод 1
library(XML)
library(methods)
xml_raw<-xmlParse("test.xml",useInternalNodes = TRUE)
xml_data<-xmlToDataFrame(xml_raw)
Сообщение об ошибке, которое я получаю, показано ниже
Ошибка в[<-.data.frame
(*tmp*
, i, имена (узлы [[i]])), значение = c («Жилой»,: дубликаты подписок для столбцов
Метод 2
df<-ldply(xmlToList(xml_raw), function(x) { data.frame(x) } )
это загружает файл во фрейм данных, но повторяет одну и ту же транзакцию более чем в два раза. Снимок экрана вывода.
Я хотел бы иметь одну строку consumerOID со всеми деталями, а не повторять его. Он должен иметь всех родителейи дочерний узел как столбец во фрейме данных.
Снимок экрана вывода