Анализирую очень большой файл CSV, и у меня где-то есть утечка памяти, но я не могу ее найти - PullRequest
0 голосов
/ 21 марта 2020

Когда я запускаю программу, используя меньший файл данных, она работает просто отлично, но когда я запускаю ее, используя больший (~ 450 000 строк данных) файл, она вылетает, говоря, что сборке мусора не хватает памяти. Я знаю, что кода много, и я благодарен за любые ваши предложения. Спасибо!

Я почти уверен, что ошибка в классе FileReader22, но я также включил класс DataAnalysis. Есть два других класса, один из которых создает объект Flight (строки, представляющие различные столбцы данных) и класс Runner, который выводит данные в файл .txt, но я на 99% уверен, что это не проблема.

Не уверен, нужно ли мне использовать поток вместо сканера (даже не уверен, как использовать поток, тбч ...)

РЕДАКТИРОВАТЬ: думал, что было бы разумно включить код ошибки.

Exception in thread "main" java.lang.OutOfMemoryError: GC overhead limit exceeded

    at java.util.Arrays.copyOfRange(Unknown Source)
    at java.lang.String.<init>(Unknown Source)
    at java.nio.HeapCharBuffer.toString(Unknown Source)
    at java.nio.CharBuffer.toString(Unknown Source)
    at java.util.regex.Matcher.toMatchResult(Unknown Source)
    at java.util.Scanner.match(Unknown Source)
    at java.util.Scanner.hasNextLine(Unknown Source)
    at FileReader22.makeList(FileReader22.java:24)
    at FlightRunner22.main(FlightRunner22.java:13)

Класс, который читает файл и разбирает данные на объекты:

public class FileReader22 {
    private ArrayList<Flights22> rawFlights = new ArrayList<Flights22>();

    private ArrayList<Flights22> filteredFlights = new ArrayList<Flights22>();

    private ArrayList<Flights22> cancelledFlights = new ArrayList<Flights22>();

    public FileReader22() throws IOException {
        filteredFlights = filterFlights(rawFlights);
        rawFlights = makeList();
        cancelledFlights = makeCancelledFlights(rawFlights);
    }
    public ArrayList<Flights22> makeList() throws IOException{

        ArrayList<Flights22> rawFlights = new ArrayList<Flights22>();//Flights that weren't cancelled
        File file = new File("INSERT FILENAME HERE");
        try {
            Scanner fileFinder = new Scanner(file);//Reads csv file

            while(fileFinder.hasNextLine()){
                String line = fileFinder.nextLine();


                String[] columnData = line.split(",");
                if(columnData[19].equals("0")) {
                    String dayOfMonth = (columnData[0]);
                    //String dayOfWeek = (columnData[1]);
                    //String flightDate = columnData[2];
                    String carrier = columnData[3];
                    String tailNum = columnData[4];
                    String originCode = (columnData[5]);
                    //String origin3L = columnData[6];
                    //String originState = columnData[7];
                    String destCode = (columnData[8]);
                    //String dest3L = columnData[9];
                    //String destState = columnData[10];
                    String depTime = (columnData[11]);
                    String depDelay = (columnData[12]);
                    //String wheelsOff = (columnData[13]);
                    //String wheelsOn = (columnData[14]);
                    String arrTime = (columnData[15]);
                    String arrDelay = (columnData[16]);
                    String cancel = (columnData[17]);
                    String cancelCode = columnData[18];
                    String diverted = (columnData[19]);
                    String airTime = (columnData[20]);
                    String distance = (columnData[21]);

                    Flights22 flight = new Flights22(dayOfMonth, carrier, 
                            tailNum, originCode, destCode,
                            depTime, depDelay, arrTime, 
                            arrDelay, cancel, cancelCode, diverted, airTime, 
                            distance);
                    rawFlights.add(flight);
                }
            }
            fileFinder.close();
        }
        catch(FileNotFoundException e) {
            System.out.println(e);
        }
       filterFlights(rawFlights);
       makeCancelledFlights(rawFlights);
       return this.rawFlights;
        }

    public boolean isFullFilled(Flights22 flight) {

        if(flight.getAirTime().equals(null) ||
                flight.getArrDelay().equals("") ||
                //flight.getArrTime().equals("") || 
                flight.getCancel().equals("") ||
                flight.getCarrier().equals("") ||
                flight.getDayOfMonth().equals("") ||
                //flight.getDayOfWeek().equals("") ||
                flight.getDepDelay().equals("") ||
                //flight.getDepTime().equals("") ||
                //flight.getDest3L().equals("") ||
                flight.getDestCode().equals("") ||
                //flight.getDestState().equals("")||
                flight.getDistance().equals("") ||
                flight.getDiverted().equals("") ||
                //flight.getFlightDate().equals("") ||
                //flight.getOrigin3L().equals("") ||
                flight.getOriginCode().equals("") ||
                //flight.getOriginState().contentEquals("") ||
                flight.getTailNum().equals("")){
                //flight.getWheelsOff().equals("") ||
                //flight.getWheelsOn().equals("")) {
            return false;
        }
        return true; 
    }
    public boolean isFinishedFlights(Flights22 flight){
        if(flight.getCancel().equals("0")) {
            return true;
        }
        return false; 
    }
    public ArrayList<Flights22> filterFlights(ArrayList<Flights22> flights){
        for(Flights22 flight : flights) {
            if(isFinishedFlights(flight) && isFullFilled(flight)) {
                filteredFlights.add(flight);
            }
        }
        return filteredFlights;
    }
    public ArrayList<Flights22> makeCancelledFlights(ArrayList<Flights22> flightList){

        for(Flights22 flight : flightList) {
            if(flight.getCancel().equals("1")) {
                cancelledFlights.add(flight);
            }
        }
        return this.cancelledFlights;
    }
}

Класс, который выполняет весь анализ данных:

import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
public class DataAnalysis22 {
   public HashMap<String, Integer> makeCancellationCountHash(ArrayList<String> airports, ArrayList<Flights22> forgottenFlights){
    HashMap<String, Integer> cancellationCountHash = new HashMap<String, Integer>();

       for(String carrier : airports) {
           int tally = 0;
           for(Flights22 flight : forgottenFlights) {
               if(flight.getCarrier().equals(carrier)) {
                   tally++;
               }
           cancellationCountHash.put(carrier, tally);
           }
       }
       return cancellationCountHash;
   }
   //Returns the total number of cancelled flights for all airlines
   public HashMap<String, Integer> makeCompletedCountHash(ArrayList<String> airports, ArrayList<Flights22> fulfilledFlights){
    HashMap<String, Integer> completedCountHash = new HashMap<String, Integer>();

       for(String carrier : airports) {
           int tally = 0;
           for(Flights22 flight : fulfilledFlights) {
               if(flight.getCarrier().equals(carrier)) {
                   tally++;
               }
           completedCountHash.put(carrier, tally);
           }
       }
       return completedCountHash;
   }  
   /*
    * Take the value from cancount, divide it by the sum of cancount and comcount, put it in a new hashmap
    */
   public String makeCancellationPercentage(ArrayList<Flights22> finishedFlights, ArrayList<Flights22> forgottenFlights) {
       HashMap<String, Integer> completedHash = makeCompletedCountHash(castOfCarriers(finishedFlights), finishedFlights);
       HashMap<String, Integer> cancelledHash = makeCancellationCountHash(castOfCarriers(finishedFlights), forgottenFlights);
       HashMap<String, Double> percentageHash = new HashMap<String, Double>();

       for(String comCarrierCode : completedHash.keySet()) {
           for(String canCarrierCode : cancelledHash.keySet()) {
               if(comCarrierCode.equals(canCarrierCode)) {
                   Double percent = 0.0;
                   percentageHash.put(comCarrierCode, ((Double.valueOf(cancelledHash.get(canCarrierCode)) / 
                           (Double.valueOf((cancelledHash.get(canCarrierCode)) + completedHash.get(comCarrierCode))))));
               }
           }
       }
       String maxCancelCarrier = Collections.max(percentageHash.entrySet(), Map.Entry.comparingByValue()).getKey();

       Double maxCancelPercent = percentageHash.get(maxCancelCarrier) * 100;

       String answer = maxCancelCarrier + "," + Double.toString(maxCancelPercent);
       completedHash = null;
       cancelledHash = null;
       percentageHash = null;
       return answer;
   }
   //Returns a list of all of the carriers
   public ArrayList<String> castOfCarriers(ArrayList<Flights22> finishedFlights){
       ArrayList<String> carriers = new ArrayList<String>();

       for(Flights22 flight : finishedFlights) {

           if(!(carriers.contains(flight.getCarrier()))){

               carriers.add(flight.getCarrier());
       }
       }
       return carriers;
   }
   //Takes all the tailNums from the flight objects and inputs them into an ArrayList w/o duplicates
   public ArrayList<String> allTailNums(ArrayList<Flights22> finishedFlights){

       ArrayList<String> tailNums = new ArrayList<String>();

       for(Flights22 flight : finishedFlights) {
           if(!(flight.getTailNum().equals("")) && !(tailNums.contains(flight.getTailNum()))) {
               tailNums.add(flight.getTailNum());
           }
       }
       return tailNums;

       /*
        * Make a hashmap that uses an arraylist of each individual tailnum as the keys. Then, a for loop goes through all
        * of the flight objects and searches for each tailnum. When it finds a tailnum, it gets the miles flown and adds
        * them to the value in the hashmap associated with the proper tailnum key.
        */  
   }
   //Answers Question (3)
   public String tailTally(ArrayList<Flights22> finishedFlights) {
       HashMap<String, Integer> tailNumTallyHash = new HashMap<String, Integer>();
       ArrayList<Flights22> flight = finishedFlights;
       ArrayList<String> tailArray = allTailNums(finishedFlights);
       String maxTailNum= "";
       int tailNumSum = 0;

       for(String tail : tailArray) {
           tailNumSum = 0;
           for(Flights22 flyer : flight) {
               int miles = 0;
               if(tail.equals(flyer.getTailNum())) {
                   miles = Integer.parseInt(flyer.getDistance());
                   tailNumSum += miles;  
               }
               tailNumTallyHash.put(tail, tailNumSum);
           }
       }
       maxTailNum = Collections.max(tailNumTallyHash.entrySet(), Map.Entry.comparingByValue()).getKey();
       flight = null;
       tailArray = null;
       tailNumTallyHash = null;
       return maxTailNum;
   }
   //Question (2)
   //Returns the cancel code with the most entries.
   public String cancelCodeCount(ArrayList<Flights22> forgottenFlights) {
       int cancelCountA = 0;
       int cancelCountB = 0;
       int cancelCountC = 0;
       int cancelCountD = 0;

       String cancelCountKing = "";
    for(Flights22 flight : forgottenFlights) {
           if(flight.getCancelCode().contains("A")) {
               cancelCountA++;
           }
           else {
               if(flight.getCancelCode().contains("B")) {
                   cancelCountB++;
               }
               if(flight.getCancelCode().contains("C")) {
                   cancelCountC++;
               }
               if(flight.getCancelCode().contains("D")) {
                   cancelCountD++;
               }
           }
           if(cancelCountA > cancelCountB && cancelCountA > cancelCountC && cancelCountA > cancelCountD) {
               cancelCountKing = "A";
           }
           if(cancelCountB > cancelCountA && cancelCountB > cancelCountC && cancelCountB > cancelCountD) {
               cancelCountKing = "B";
           }
           if(cancelCountC > cancelCountA && cancelCountC > cancelCountB && cancelCountC > cancelCountD) {
               cancelCountKing = "C";
           }
           if(cancelCountD > cancelCountA && cancelCountD > cancelCountB && cancelCountD > cancelCountC) {
               cancelCountKing = "D";
           }
    }
    return cancelCountKing;

   }
      // Makes an array of all unique airport codes 
   public ArrayList<String> airportArray(ArrayList<Flights22> finishedFlights){
       ArrayList<String> allAirports = new ArrayList<String>();
       for(Flights22 flight : finishedFlights) {
           if(!(allAirports.contains(flight.getOriginCode()))) {
               allAirports.add(flight.getOriginCode());
           }
       }
       return allAirports;
   }   
   //Answers Question (4)
   public String airportAccruer(ArrayList<Flights22> finishedFlights) {
       ArrayList<String> allAirports = airportArray(finishedFlights);
       HashMap<String, Integer> airportArrivalHash = makeAirportArrivalHash(allAirports, finishedFlights);
       HashMap<String, Integer> airportDepartureHash = makeAirportDepartureHash(allAirports, finishedFlights);

       HashMap<String, Integer> totalMap = new HashMap<String, Integer>(airportArrivalHash);
       for(String airportCode : airportDepartureHash.keySet()) {
           if(totalMap.containsKey(airportCode)) {
               if(totalMap.containsKey(airportCode)) {
                   totalMap.put(airportCode, airportDepartureHash.get(airportCode) + totalMap.get(airportCode));
           }else {
               totalMap.put(airportCode,  airportDepartureHash.get(airportCode));
           }
           }
       }
       String maxNum = Collections.max(totalMap.entrySet(), Map.Entry.comparingByValue()).getKey();
       airportArrivalHash = null;
       airportDepartureHash = null;
       return maxNum;
   }
   //Method to make a hashmap of arrivals from each airport with airport code as key and tally of arrivals as value

   public HashMap<String, Integer> makeAirportArrivalHash(ArrayList<String> airports, ArrayList<Flights22> finishedFlights){
       HashMap<String, Integer> airportArrivalHash = new HashMap<String, Integer>();

       for(String airportCode : airports) {
           int tally = 0;
           for(Flights22 flight : finishedFlights) {
               if(flight.getDestCode().equals(airportCode)) {
                   tally++;
               }
           airportArrivalHash.put(airportCode, tally);
           }
       }
       return airportArrivalHash;
   }
   //Method to make a hashmap of departures from each airport with airport code as key and tally of departures as value
   public HashMap<String, Integer> makeAirportDepartureHash(ArrayList<String> airports, ArrayList<Flights22> finishedFlights){
       HashMap<String, Integer> airportDepartureHash = new HashMap<String, Integer>();
       for(String airportCode : airports) {
           int tally = 0;
           for(Flights22 flight : finishedFlights) {
               if(flight.getOriginCode().equals(airportCode)) {
                tally++;   
               }
           airportDepartureHash.put(airportCode, tally);
           }
   }
       return airportDepartureHash;
   }
   public HashMap<String, Integer> maxMover(ArrayList<Flights22> finishedFlights) {
       ArrayList<String> airportList = airportArray(finishedFlights);
       HashMap<String, Integer> airportArrs = makeAirportArrivalHash(airportList, finishedFlights);
       HashMap<String, Integer> airportDeps = makeAirportDepartureHash(airportList, finishedFlights);
       HashMap<String, Integer> maxMoverAirports = new HashMap<>(airportArrs);

       airportDeps.forEach((key, value) -> maxMoverAirports.merge(key, value, (v1, v2) -> (v1 + v2)));
       airportArrs = null;
       airportDeps = null;

       return maxMoverAirports;
   }
   //Answers Question (4)
   public String busiestAirport(ArrayList<Flights22> finishedFlights) {
       HashMap<String, Integer> busiestAirport = maxMover(finishedFlights);
       String maxNum = Collections.max(busiestAirport.entrySet(), Map.Entry.comparingByValue()).getKey();
       busiestAirport = null;
       return maxNum;
   }
   //Answers Question(5)
   //Departures - arrivals
   public String biggestSource(ArrayList<Flights22> finishedFlights) {
       ArrayList<String> airportList = airportArray(finishedFlights);
       HashMap<String, Integer> airportArrs = makeAirportArrivalHash(airportList, finishedFlights);
       HashMap<String, Integer> airportDeps = makeAirportDepartureHash(airportList, finishedFlights);
       HashMap<String, Integer> busiestHash = new HashMap<>();

       for(String airportCode : airportList) {
           int sourceDiff = airportDeps.get(airportCode) - airportArrs.get(airportCode);
           busiestHash.put(airportCode, sourceDiff);
       }
           airportList = null;
       String maxSource = Collections.max(busiestHash.entrySet(), Map.Entry.comparingByValue()).getKey();
       return maxSource;
   }
   //Answers Question(6)
   public String biggestSink(ArrayList<Flights22> finishedFlights) {
       ArrayList<String> airportList = airportArray(finishedFlights);
       HashMap<String, Integer> airportArrs = makeAirportArrivalHash(airportList, finishedFlights);
       HashMap<String, Integer> airportDeps = makeAirportDepartureHash(airportList, finishedFlights);
       HashMap<String, Integer> sinkHash = new HashMap<>();

       for(String airportCode : airportList) {
           int sinkDiff = airportArrs.get(airportCode) - airportDeps.get(airportCode);
           sinkHash.put(airportCode, sinkDiff);
       }
       airportList = null;

       String maxSource = Collections.max(sinkHash.entrySet(), Map.Entry.comparingByValue()).getKey();
       return maxSource;
   }
   /*
    * If flight depdelay >= 60 tally++, if flight arrdelay >= 60 tally++, if flight
    * depdelay and flight arrdelay >= 60, tally -1
    */

   //Answers Question (7)
   public int AADelay(ArrayList<Flights22> finishedFlights) {
       int AADelayTally = 0;
       for(Flights22 flight : finishedFlights) {
           if(flight.getCarrier().equals("AA")) {
               if(Integer.parseInt(flight.getDepDelay()) >= 60 || Integer.parseInt(flight.getArrDelay()) >= 60) {
                   AADelayTally++;{
                   if(Integer.parseInt(flight.getArrDelay()) >= 60 && Integer.parseInt(flight.getDepDelay()) >= 60) {
                       AADelayTally--;
                   }
               }
           }
       } 
   }
       return AADelayTally;  
   }
   /*
    * Take in the arraylist of fulfilled flights. Iterate through flights, finding biggest difference between depdelay
    * and arrdelay. If it is the biggest so far, store the day of month, the tailnum, and the depdelay in 3 different
    * variables. Return all three as a String. 
    */
   //Answers Question (8)
   public String savesTheDelay(ArrayList<Flights22> finishedFlights) {
       String saveReturn = "";
       int maxSave = 0;
       for(Flights22 flight : finishedFlights) {

           int save = Integer.parseInt(flight.getDepDelay());
           if(Integer.parseInt(flight.getArrDelay()) <= 0 && save > maxSave) {
               maxSave = save;
               int moDay = Integer.parseInt(flight.getDayOfMonth());
               int saveDelay = Integer.parseInt(flight.getDepDelay());
               String saveTail = flight.getTailNum();
               saveReturn = Integer.toString(moDay) + "," + Integer.toString(saveDelay) + "," + saveTail;
           }
       }
       return saveReturn;
   } 
   //Answers Question (9)
   //Which airport had the fewest total flights?
   public String fewestFlights(ArrayList<Flights22> finishedFlights) {
       HashMap<String, Integer> slowestAirport = maxMover(finishedFlights);
       String minNum = Collections.min(slowestAirport.entrySet(), Map.Entry.comparingByValue()).getKey();
       slowestAirport = null;
       return minNum;
   }
   public HashMap<String, Integer> mminMover(ArrayList<Flights22> finishedFlights) {
       ArrayList<String> airportList = airportArray(finishedFlights);
       HashMap<String, Integer> airportArrs = makeAirportArrivalHash(airportList, finishedFlights);
       HashMap<String, Integer> airportDeps = makeAirportDepartureHash(airportList, finishedFlights);
       HashMap<String, Integer> minMoverAirports = new HashMap<>(airportArrs);

       airportDeps.forEach((key, value) -> minMoverAirports.merge(key, value, (v1, v2) -> (v1 + v2)));
       airportArrs = null;
       airportDeps = null;
       return minMoverAirports;
   } 
}
...