Я пытаюсь использовать HTreeMap из mapDB для заполнения списка миллионов записей в файле CSV, но каждый раз, когда я вставляю данные в список, предыдущая запись перезаписывается для HTreeMap.
Чтобы избежать перезаписи HTreeMap в окончательном Списке значений, нужно создать еще одно соединение с БД хеш-карты, но в этом решении есть исключение размера кучи Java.
Существует ли тонкий способ использования HTreeMapпри чтении более миллиона записей и добавлении их в список без дублирования данных?
public GapList<HTreeMap<String, Object>> fn_ReadCSV_GapListHTMap(File fileCSV) {
BufferedReader bfrdrCSVReader = null;
String strLine = "";
String[] arrHeaders;
//Gaplist is used for collecting the data read as map from the CSV.
GapList<HTreeMap<String, Object>> glhtmapReadCSV = new GapList<>();
try {
bfrdrCSVReader = new BufferedReader(new FileReader(fileCSV));
//reading header for the .csv file, which by default is the first line of file.
String headerLine = bfrdrCSVReader.readLine();
arrHeaders = headerLine.split(",");
//using mapDB to read voluminous data from the CSV which is in tunes of a million.
DB dbReadCSV = DBMaker.memoryDB().closeOnJvmShutdown().make();
HTreeMap<String, Object> htmapLineData = (HTreeMap<String, Object>) dbReadCSV.hashMap("htmapLineData").keySerializer(Serializer.STRING).expireMaxSize(25).createOrOpen();
//read each line of the .csv file.
while((strLine = bfrdrCSVReader.readLine()) != null) {
//intCSVLine ++;
String[] arrTokens = strLine.split(",",-1);
//When I used hashmap, I reset it here after adding the read data to
//the list, but this type of behavior can't be done for HTreeMap.
//Map<String, Object> mapLineData = new HashMap<>();
//as stated in the problem statement, one needs to create new instance of DB
//such that the new HTreeMap initialized later points to new memory location
//and when finally added to the list, doesn't duplicate the data in the list.
dbReadCSV = DBMaker.memoryDB().closeOnJvmShutdown().make();
HTreeMap<String, Object> htmapLineData = (HTreeMap<String, Object>) dbReadCSV.hashMap("htmapLineData").keySerializer(Serializer.STRING).expireMaxSize(25).createOrOpen();
for(int intLineNum = 0; intLineNum < arrHeaders.length; intLineNum++) {
//based on the header read, read each value for that header & add to the map.
htmapLineData.put(arrHeaders[intLineNum].trim(), arrTokens[intLineNum].trim());
}
//once a map for a line read is created, add it to the final list of entries.
glhtmapReadCSV.add(htmapLineData);
//The below code creates an issue wherein the entire DB connection is closed
//and the error states it to be "com.sun.jdi.InvocationException occurred invoking method."
//resulting into a corrupt list of data.
//closing the DBMaker to enable refreshing of the HTreeMap.
//dbReadCSV.close();
}
bfrdrCSVReader.close();
}
catch(Exception exceptionCSVReader) {
StringWriter stack = new StringWriter();
exceptionCSVReader.printStackTrace(new PrintWriter(stack));
log.debug("DEBUG: The exception while reading CSV file is: "+stack);
assertTrue(false, "ERROR: CSV file can't be read; hence exiting with exception !");
}
return glhtmapReadCSV;
}
Исключение, сообщаемое при выполнении вышеуказанного кода, выглядит следующим образом.
PS :utility.CSVHandler.fn_ReadCSV_GapListHTMap - это пользовательский класс, разработанный мной.
The exception while reading CSV file is: org.mapdb.DBException$OutOfMemory: Java heap space
at org.mapdb.volume.ByteArrayVol.ensureAvailable(ByteArrayVol.java:98)
at org.mapdb.StoreDirect.<init>(StoreDirect.kt:94)
at org.mapdb.StoreDirect$Companion.make(StoreDirect.kt:57)
at org.mapdb.StoreDirect$Companion.make$default(StoreDirect.kt:56)
at org.mapdb.StoreDirect.compact(StoreDirect.kt:756)
at utility.CSVHandler.fn_ReadCSV_GapListHTMap(CSVHandler.java:292)
at appModules.Src1_Src2_Comparision_Functions.fn_CompareQlikSources(Src1_Src2_Comparision_Functions.java:695)
at testCasesQlik.testCSVHandler.fn_testCSV(testCSVHandler.java:81)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.testng.internal.MethodInvocationHelper.invokeMethod(MethodInvocationHelper.java:124)
at org.testng.internal.Invoker.invokeMethod(Invoker.java:583)
at org.testng.internal.Invoker.invokeTestMethod(Invoker.java:719)
at org.testng.internal.Invoker.invokeTestMethods(Invoker.java:989)
at org.testng.internal.TestMethodWorker.invokeTestMethods(TestMethodWorker.java:125)
at org.testng.internal.TestMethodWorker.run(TestMethodWorker.java:109)
at org.testng.TestRunner.privateRun(TestRunner.java:648)
at org.testng.TestRunner.run(TestRunner.java:505)
at org.testng.SuiteRunner.runTest(SuiteRunner.java:455)
at org.testng.SuiteRunner.runSequentially(SuiteRunner.java:450)
at org.testng.SuiteRunner.privateRun(SuiteRunner.java:415)
at org.testng.SuiteRunner.run(SuiteRunner.java:364)
at org.testng.SuiteRunnerWorker.runSuite(SuiteRunnerWorker.java:52)
at org.testng.SuiteRunnerWorker.run(SuiteRunnerWorker.java:84)
at org.testng.TestNG.runSuitesSequentially(TestNG.java:1208)
at org.testng.TestNG.runSuitesLocally(TestNG.java:1137)
at org.testng.TestNG.runSuites(TestNG.java:1049)
at org.testng.TestNG.run(TestNG.java:1017)
at org.testng.remote.AbstractRemoteTestNG.run(AbstractRemoteTestNG.java:114)
at org.testng.remote.RemoteTestNG.initAndRun(RemoteTestNG.java:251)
at org.testng.remote.RemoteTestNG.main(RemoteTestNG.java:77)
Caused by: java.lang.OutOfMemoryError: Java heap space