Мне нужно преобразовать 5.000.000 записей из БД в JSON, но у меня заканчивается память после 4.000 записей.
Я использую Задачу, думая, когда задача завершена, GC очищает все в потоке из памяти.
public class Program()
{
public static void Main(string[] args)
{
Program p = new Program();
p.ExportUsingTask();
}
public void ExportUsingTask()
{
List<int> ids = Program.LoadID(); // trying dont keep DBContext references, so GC can free memory
GC.Collect(); // GC can clear 130MB of memory, DBContext have no references anymore
GC.WaitForPendingFinalizers();
foreach (int item in ids)
{
Task job = new Task(() => new Program().Process(item));
job.RunSynchronously();
Task.WaitAll(job);
job.Dispose();
job = null;
GC.Collect(); // GC don't clear memory, uses more and more memory at each iteration, until OutOfMemoryException
GC.WaitForPendingFinalizers();
}
}
public static List<int> LoadID()
{
List<int> ids = new List<int>();
using (Context db = new Context())
{
ids = db.Alfa.Where(a => a.date.Year == 2019).Select(a => a.id).ToList<int>(); // load 500.000 id from DB, use 130MB of memory
// have some business logic here, but isn't the problem, memory is free after execution anyway
db.Dispose();
}
return ids;
}
public void Process(int id)
{
Beta b = GetBetaFromAlfa(id); // Beta is JSON model that I need save to file
string json = Newtonsoft.Json.JsonConvert.SerializeObject(b);
b = null;
using (StreamWriter sw = System.IO.File.AppendText(@"c:\MyFile.json"))
{
sw.Write(json);
sw.Close();
sw.Dispose();
}
GC.Collect(); // GC don't clear memory
GC.WaitForPendingFinalizers();
}
public static Beta GetBetaFromAlfa(int idAlfa)
{
Alfa a = null; // Alfa is my model in DB
Beta b = null; // Beta is JSON model that I need save to file
using (Context db = new Context())
{
Alfa a = db.Alfa.Single(a => a.id == idAlfa);
b = ConvertAlfaToBeta(a);
db.Dispose();
}
GC.Collect(); // GC don't clear memory
GC.WaitForPendingFinalizers();
return b;
}
public static Beta ConvertAlfaToBeta(Alfa alfa)
{
// business logic, something like:
// beta.id = alfa.id;
// beta.name = alfa.name;
// only simple type association (int, string, decimal, datetime, etc)
}
}
public class Alfa(){ ... }
public class Beta(){ ... }
В первой попытке я делал один цикл, читая записи одну за другой, когда я получил 100 записей, я сохранил весь JSON в файл. Но мне все равно не хватило памяти, когда я получил 4000 записей, используя цикл:
public void ExportUsingLoop()
{
List<int> ids = Program.LoadID(); // trying dont keep DBContext references, so GC can free memory
GC.Collect(); // GC can clear 130MB of memory, DBContext have no references anymore
GC.WaitForPendingFinalizers();
int count = 0;
StringBuilder content = new StringBuilder();
foreach (int item in ids)
{
count++;
Beta b = GetBetaFromAlfa(id); // Beta is JSON model that I need save to file
string json = Newtonsoft.Json.JsonConvert.SerializeObject(b);
content.AppendLine(json);
b = null;
json = null;
if(count % 100 == 0)
{
using (StreamWriter sw = System.IO.File.AppendText(@"c:\MyFile.json"))
{
sw.Write(content.ToString());
content.Clear(); // just for clarification
sw.Close();
sw.Dispose();
}
GC.Collect(); // GC don't clear memory, uses more and more memory at each iteration, until OutOfMemoryException
GC.WaitForPendingFinalizers();
}
}
}