Утечка памяти в C Программа, которая читает два списка и создает список похожих слов между двумя - PullRequest
1 голос
/ 14 января 2020

Я нахожусь в процессе создания программы C, назначенной в моем классе, которая читает два файла TXT и генерирует три файла TXT, причем первый и второй выходные файлы содержат отсортированные списки каждого слова в каждом соответствующем входном текстовом файле, а также количество вхождений слова, в то время как третий выходной файл содержит отсортированный список того, какие слова содержатся в обоих файлах, а также общее количество вхождений, в которых слово встречается в двух файлах. Это требует реализации двусвязных списков, что у меня есть опыт реализации в C ++, но не в C.

С другой стороны, моя программа работает, как ожидалось, и генерирует отсортированный выходной файл как желательно. Тем не менее, другое требование моего назначения - устранить любые утечки памяти, которые должны произойти в моей программе, используя Valgrind в качестве инструмента для обнаружения и обнаружения утечек. Мой вывод из Valgrind предупреждает о двух случаях «точно потерянных» блоков, и у меня возникают проблемы с их поиском, что, вероятно, связано с моей неопытностью кодирования структур DLL в C и использованием Valgrind для отслеживания утечек. Ребята, вы видите утечки памяти / проблемы, которые могут быть причиной этих проблем?

Мой код ниже. Для этого требуется пять аргументов CL, два входных TXT-файла, три выходных TXT-файла.

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>

typedef struct wordStruct
{
    char* str; //Stored word
    int count; //Stored count of words             
    struct wordStruct* next; //Pointer to next word
    struct wordStruct* prev; //Pointer to previous word
} word;

word* createWord(char* inputWord)
{
    //First, allocate space for the word.
    printf("\n Input word has a strlen of %lu \n", strlen(inputWord)); //Prints the length (size) of the word being created.
    word* newWord = malloc(sizeof(word));
    printf("\n newWord has a size of %lu. \n", sizeof(word));
    if (NULL != newWord)
    {
        //Allocate new space for storing the new word in 'str'
        //If str was an array of fixed size, storage would be wasted!
        newWord->str = (char*)malloc((strlen(inputWord)) + 1);
        strcpy(newWord->str, inputWord); //Copy the "word" into newWord->str
        newWord->str[strlen(inputWord)] = '\0'; //POSIX end of word.
        printf("\nCalled from line 28... Created this: %s\n", newWord->str);
        //Set up the total word 'count' to 1.
        newWord->count = 1;
        //Initialize the linked list pointers (implement the prev next)
        newWord->next = NULL;
        newWord->prev = NULL;
        //newWord->prev = what?
    }
    //free(inputWord);
    return newWord;
}

word* addWord(word** wordList, char* inputWord) //wordList allows for multiple lists to be compared. Agnostic to the main function.
{
    if (!*wordList) //This handles an empty list.
    {
        printf("\n Creating a new list... (called from line 42)... \n");
        return *wordList = createWord(inputWord);
    }

    word* temp = *wordList; //Now, the program will search for word duplicates, defined as words that already exist in the list. Duplicates will increase the count.
    //If the word exists in the list, increment the count. Otherwise a new word will be created.
    while (temp->next != NULL)
    {
        if (strcmp(temp->str, inputWord) == 0)
        {
            temp->count = temp->count + 1; //Increment the count if a duplicate is found.
            return *wordList;
        }
        else
        {
            //temp->prev = temp;
            temp = temp->next; //Keep iterating through the list to look for dupes.
        }
    }
    word* newWord = createWord(inputWord); //No duplicate is found, so a new word node will be created.
    if (NULL != newWord)
    {
        temp->next = newWord; //Insert at the tail of the list.
        printf("\nCalled from Line 64... New Word Added: %s\n", newWord->str);
    }
    //free(temp); //DO NOT FREE TEMP.
    return newWord;
}

word* readFileMakeList(word* inputList, char* inputFile)
{
    FILE* fileToOpen;
    char* delim = ". ,:;\t\n";
    //char* fileRead = inputFile;
    fileToOpen = fopen(inputFile, "r");
    if (fileToOpen == 0)
    {
        printf("\nCalled from Line 76... File is not opened!\n");
    }
    else
    {
        printf("\n%s has been opened!\n", inputFile);
    }
    int ch = 0, readingWord = 1, charIndex = 0;
    char currentWord[100];
    while ((ch = fgetc(fileToOpen)) != EOF)
    {
        if (strchr(delim, ch)) //If a delimiter is read, terminate the current word being read, readingWord
        {
            if (readingWord == 1)
            {
                readingWord = 0; //Terminates the current word.
                currentWord[charIndex] = '\0'; //POSIX ending

                printf("\nCalled from line 119... addWord(listOne, %s)\n", currentWord);
                if (addWord(&inputList, currentWord))
                {
                    printf(" Added: %s\n", inputList->str);
                }
                else
                {
                    fprintf(stderr, "\nError: addWord failed.\n");
                }
                charIndex = 0;

            }
        }
        else
        {
            readingWord = 1;
            currentWord[charIndex++] = tolower(ch); //Makes ch lowercase
        }
    }
    if (readingWord == 1)
    {
        currentWord[charIndex] = '\0';
        //Add to the list
        printf("\naddWord(listOne, %s)\n", currentWord);
        if (addWord(&inputList, currentWord))
        {
            printf(" added %s\n", inputList->str);
        }
        else
        {
            fprintf(stderr, "Error: addWord failed!");
        }
    }

    return inputList;
}

word* createLinks(word* inputList) //Investigate if the problem is the linked lists?
{
    int firstItem = 1;
    word* temp = inputList; //Temp pointer that will be used.
    word* firstIter = inputList;
    while (firstIter != NULL)
    {
        if (firstItem == 1)
        {
            firstItem = 0;
            firstIter->prev = NULL;
            firstIter = firstIter->next;
        }
        else
        {
            firstIter->prev = temp;
            temp = temp->next;
            firstIter = firstIter->next;
        }
    }
    return inputList;
}

word* sortList(word* inputList) //Return head of sorted list. IT WORKS!
{
    //word *inputListStart = inputList; //Used to reset list.
    word* firstIter = inputList;
    word* secondIter = inputList;

    while (firstIter != NULL)
    {
        secondIter = firstIter;
        while (secondIter != NULL)
        {
            if (strcmp(firstIter->str, secondIter->str) > 0) //This means that the secondIter value is less than firstIter, and they must be swapped.
            {
                char* temp = firstIter->str; //Use a temp word struct to perform a swap.
                firstIter->str = secondIter->str;
                secondIter->str = temp;
            }
            secondIter = secondIter->next;
        }
        firstIter = firstIter->next;
    }
    //free(firstIter);
    //free(secondIter);
    return inputList; //Returns the head of the list
}

void printToOutputFile(word* inputList, char* outputFile)
{
    char* fileRead = outputFile;
    FILE* fileOutput = fopen(fileRead, "w+");
    if (fileOutput == 0)
    {
        printf("\nFrom line 135... Output file not opened!\n");
    }
    else
    {
        printf("\nFrom Line 139... Output file opened!\n");
    }

    while (inputList != NULL)
    {
        //Add the word name, then its corresponding count, then move onto next.
        fprintf(fileOutput, "%s %d \n", inputList->str, inputList->count);
        printf("%s ", inputList->str);
        //inputList->prev = inputList;
        inputList = inputList->next;
    }
}

void createCombinedOutputFile(word* firstList, word* secondList, char* finalOutput)
{
    char* fileRead = finalOutput;
    FILE* fileOutput = fopen(fileRead, "w+");
    if (fileOutput == 0)
    {
        printf("\nFrom line 158... Output file not opened!\n");
    }
    else
    {
        printf("\nFrom Line 139... Output file opened!\n");
    }
    word* secondListStart = secondList;

    while (firstList != NULL)
    {
        secondList = secondListStart;
        while (secondList != NULL)
        {
            printf("\nComparing %s to %s\n", firstList->str, secondList->str);
            if (strcmp(firstList->str, secondList->str) == 0)
            {
                int combinedCount = firstList->count + secondList->count;
                fprintf(fileOutput, "%s,%d \n", firstList->str, combinedCount);
                printf("%s ", firstList->str);
            }
            //secondList->prev = secondList;
            secondList = secondList->next;
        }
        //firstList->prev = firstList;
        firstList = firstList->next;
    }
}

void destroyList(word* inputList) //This isnt working...
{
    while (inputList != NULL)
    {
        if (inputList->prev == NULL)
        {
            inputList = inputList->next;
        }
        else if (inputList->next == NULL)
        {
            free(inputList);
            break;
        }
        else
        {
            free(inputList->prev);
            inputList = inputList->next;
        }
    }
}

int main(int argc, char* argv[])
{
    word* listOne; //Creation of the two linked lists.
    listOne = NULL; //Head of linked list for each.
    word* listTwo;
    listTwo = NULL;

    char* firstFile = argv[1];
    char* secondFile = argv[2];
    char* firstOutputFile = argv[3];
    char* secondOutputFile = argv[4];
    char* finalOutputFile = argv[5];

    if (argc != 6) //THIS WILL CHANGE. THIS IS FOR THE TEMP TWO OUTPUT FILES.
    {
        fprintf(stderr, "\nError! Insufficient input. Proper usage is %s inputFile1 inputFile2 outputFile1 outputFile2 outputFile3\n", argv[0]);
        return 1;
    }

    //Open the first file! --START FUNCTION HERE., INPUT VARIABLES SHOULD BE firstFileOpen, firstFile, listOne

    listOne = readFileMakeList(listOne, firstFile);
    listTwo = readFileMakeList(listTwo, secondFile); //The two linked lists are created using this command.

    listOne = createLinks(listOne);
    listTwo = createLinks(listTwo);

    //Sort the two created lists.
    listOne = sortList(listOne);
    listTwo = sortList(listTwo);

    //The first linked list is now complete. Now, lets print the output.

    printToOutputFile(listOne, firstOutputFile);
    printToOutputFile(listTwo, secondOutputFile);

    //Now, compare the two created linked lists to find similar words.
    createCombinedOutputFile(listOne, listTwo, finalOutputFile);

    printf("\nProgram operation complete!\n");

    putchar('\n');
    return 0;

}

Два входных файла, которые я использовал для тестирования этой программы:


Call me Ishmael. Some years ago - never mind how long precisely - having little or no money in my purse, 
and nothing particular to interest me on shore, I thought I would sail about a little and see the watery 
part of the world. It is a way I have of driving off the spleen and regulating the circulation. Whenever 
I find myself growing grim about the mouth; whenever it is a damp, drizzly November in my soul; whenever 
I find myself involuntarily pausing before coffin warehouses, and bringing up the rear of every funeral 
I meet; and especially whenever my hypos get such an upper hand of me, that it requires a strong moral 
principle to prevent me from deliberately stepping into the street, and methodically knocking people's 
hats off - then, I account it high time to get to sea as soon as I can. This is my substitute for pistol 
and ball. With a philosophical flourish Cato throws himself upon his sword; I quietly take to the ship. 
There is nothing surprising in this. If they but knew it, almost all men in their degree, some time or 
other, cherish very nearly the same feelings towards the ocean with me. 

Hey, I just met you and this is crazy
But here's my number, so call me maybe
It's hard to look right at you baby
But here's my number, so call me maybe
Hey I just met you and this is crazy
But here's my number, so call me maybe
And all the other boys try to chase me
But here's my number, so call me maybe

третий выходной файл (тот, который имеет значение) должен выглядеть следующим образом:

all,3 
and,3 
but,5 
call,4 
i,5 
is,5 
me,3 
my,2 
other,2 
the,2 
this,2 
to,2 

Мой вывод Valgrind выглядит следующим образом:

==24== Memcheck, a memory error detector
==24== Copyright (C) 2002-2017, and GNU GPL'd, by Julian Seward et al.
==24== Using Valgrind-3.13.0 and LibVEX; rerun with -h for copyright info
==24== Command: ./commonMod test.txt test2.txt testOut.txt testOut2.txt finalOutput.txt
==24== Parent PID: 6
==24== 
==24== error calling PR_SET_PTRACER, vgdb might block
==24== 
==24== HEAP SUMMARY:
==24==     in use at exit: 9,091 bytes in 339 blocks
==24==   total heap usage: 345 allocs, 6 frees, 33,667 bytes allocated
==24== 
==24== 1,173 (32 direct, 1,141 indirect) bytes in 1 blocks are definitely lost in loss record 13 of 15
==24==    at 0x4C2FB0F: malloc (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so)
==24==    by 0x108AD0: createWord (commonMod.c:20)
==24==    by 0x108BC3: addWord (commonMod.c:46)
==24==    by 0x108D86: readFileMakeList (commonMod.c:100)
==24==    by 0x1092D6: main (commonMod.c:285)
==24== 
==24== 5,158 (32 direct, 5,126 indirect) bytes in 1 blocks are definitely lost in loss record 15 of 15
==24==    at 0x4C2FB0F: malloc (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so)
==24==    by 0x108AD0: createWord (commonMod.c:20)
==24==    by 0x108BC3: addWord (commonMod.c:46)
==24==    by 0x108D86: readFileMakeList (commonMod.c:100)
==24==    by 0x1092BF: main (commonMod.c:284)
==24== 
==24== LEAK SUMMARY:
==24==    definitely lost: 64 bytes in 2 blocks
==24==    indirectly lost: 6,267 bytes in 332 blocks
==24==      possibly lost: 0 bytes in 0 blocks
==24==    still reachable: 2,760 bytes in 5 blocks
==24==         suppressed: 0 bytes in 0 blocks
==24== Reachable blocks (those to which a pointer was found) are not shown.
==24== To see them, rerun with: --leak-check=full --show-leak-kinds=all
==24== 
==24== For counts of detected and suppressed errors, rerun with: -v
==24== ERROR SUMMARY: 2 errors from 2 contexts (suppressed: 0 from 0)

Большое спасибо. Хорошего дня.

...