У меня очень большой файл JSON строк с 4.000.000 строк, и мне нужно преобразовать несколько событий из каждой строки. Полученный CSV-файл содержит 15 000 000 строк. Как я могу оптимизировать этот скрипт?
Я использую Powershell core 7, и для завершения преобразования требуется около 50 часов.
Мой скрипт Powershell:
$stopwatch = [system.diagnostics.stopwatch]::StartNew()
$totalrows = 4000000
$encoding = [System.Text.Encoding]::UTF8
$i = 0
$ig = 0
$output = @()
$Importfile = "C:\file.jsonl"
$Exportfile = "C:\file.csv"
if (test-path $Exportfile) {
Remove-Item -path $Exportfile
}
foreach ($line in [System.IO.File]::ReadLines($Importfile, $encoding)) {
$json = $line | ConvertFrom-Json
foreach ($item in $json.events.items) {
$CSVLine = [pscustomobject]@{
Key = $json.Register.Key
CompanyID = $json.id
Eventtype = $item.type
Eventdate = $item.date
Eventdescription = $item.description
}
$output += $CSVLine
}
$i++
$ig++
if ($i -ge 30000) {
$output | Export-Csv -Path $Exportfile -NoTypeInformation -Delimiter ";" -Encoding UTF8 -Append
$i = 0
$output = @()
$minutes = $stopwatch.elapsed.TotalMinutes
$percentage = $ig / $totalrows * 100
$totalestimatedtime = $minutes * (100/$percentage)
$timeremaining = $totalestimatedtime - $minutes
Write-Host "Events: Total minutes passed: $minutes. Total minutes remaining: $timeremaining. Percentage: $percentage"
}
}
$output | Export-Csv -Path $Exportfile -NoTypeInformation -Delimiter ";" -Encoding UTF8 -Append
Write-Output $ig
$stopwatch.Stop()
Вот структура JSON.
{
"id": "111111111",
"name": {
"name": "Test Company GmbH",
"legalForm": "GmbH"
},
"address": {
"street": "Berlinstr.",
"postalCode": "11111",
"city": "Berlin"
},
"status": "liquidation",
"events": {
"items": [{
"type": "Liquidation",
"date": "2001-01-01",
"description": "Liquidation"
}, {
"type": "NewCompany",
"date": "2000-01-01",
"description": "Neueintragung"
}, {
"type": "ControlChange",
"date": "2002-01-01",
"description": "Tested Company GmbH"
}]
},
"relatedCompanies": {
"items": [{
"company": {
"id": "2222222",
"name": {
"name": "Test GmbH",
"legalForm": "GmbH"
},
"address": {
"city": "Berlin",
"country": "DE",
"formattedValue": "Berlin, Deutschland"
},
"status": "active"
},
"roles": [{
"date": "2002-01-01",
"name": "Komplementär",
"type": "Komplementaer",
"demotion": true,
"group": "Control",
"dir": "Source"
}, {
"date": "2001-01-01",
"name": "Komplementär",
"type": "Komplementaer",
"group": "Control",
"dir": "Source"
}]
}, {
"company": {
"id": "33333",
"name": {
"name": "Test2 GmbH",
"legalForm": "GmbH"
},
"address": {
"city": "Berlin",
"country": "DE",
"formattedValue": "Berlin, Deutschland"
},
"status": "active"
},
"roles": [{
"date": "2002-01-01",
"name": "Komplementär",
"type": "Komplementaer",
"demotion": true,
"group": "Control",
"dir": "Source"
}, {
"date": "2001-01-01",
"name": "Komplementär",
"type": "Komplementaer",
"group": "Control",
"dir": "Source"
}]
}]
}
}