получить строку в тексте для каждого слова - PullRequest
0 голосов
/ 20 апреля 2020

Следующая программа печатает частоту слова в файле. Я пытаюсь сохранить для каждого слова в текстовом файле, какие строки / строки появились и сколько раз всего в целом файле. Он считает, сколько раз он появился, но я не могу определить, какие строки в текстовом файле.

Проблема со строками int []. Выход программы дает

ошибка сегментации

    #define MAXWORDS    10000
    #define MAXSTRING   100

    /* structure holding word frequency information */

    typedef struct _word {
        char    s[MAXSTRING];   /* the word */
        int count;      /* number of times word occurs */
        int lines[1000];

    } word;


    void insert_word (word *words, int *n, char *s, int no) {
        int i;

        /* linear search for the word */
        for (i=0; i<*n; i++) if (strcmp (s, words[i].s) == 0) {

            /* found it?  increment and return. */

            words[i].count++;
            words[i].lines[words[i].count]=no;
            printf("%d", no);

            return;
        }

        /* error conditions... */

        if (strlen (s) >= MAXSTRING) {
            fprintf (stderr, "word too long!\n");
            exit (1);
        }
        if (*n >= MAXWORDS) {   
            fprintf (stderr, "too many words!\n");
            exit (1);
        }

        /* copy the word into the structure at the first available slot,
         * i.e., *n
         */

        strcpy (words[*n].s, s);

        /* this word has occured once up to now, so count = 1 */

        words[*n].count = 1;
        words[*n].lines[words[*n].count]=no;
        /* one more word */

        (*n)++;
    }


...........
int main () {
    word    words[MAXWORDS];
    char    s[1000];
    int i, n, m;

    n = 0;

FILE* file = fopen("test.txt", "r"); 


    /* read all the words in the file... */
    int no=1;
    while (fgets(s, sizeof(s), file)) {
        scanf ("%s", s);
            insert_word (words, &n, s,no);
            no=no+1;
        }
    }
        fclose(file);




    qsort((void *) words, n, sizeof (word),
        (int (*) (const void *, const void *)) wordcmp);



    if (n < 20) 
        m = n;
    else
        m = 20;



    for (i=0; i<m; i++)
        printf ("%s\t[%d] {%d} \n", words[i].s, words[i].count, words[i].lines);
}

1 Ответ

0 голосов
/ 20 апреля 2020

Вот решение с динамическим c выделением памяти:

typedef struct _word {
    char *s;                /* the word */
    int count;              /* number of times word occurs */
    int *line_numbers;      // Array of line numbers
    int num_line_numbers;   // Size of the array of line numbers
} word;
// Creating a struct to hold the data. I find it's easier
typedef struct {
    word *words;      // The array of word structs
    int num_words;    // The size of the array
} word_list;

void insert_word (word_list *words, char *s, int line_number)
{
    /* linear search for the word */
    for (int i = 0; i < words->num_words; i++) {
        if (strcmp (s, words->words[i].s) == 0) {
            /* found it?  increment and return. */
            words->words[i].count++;

            // See if it already appeared in this line
            if (words->words[i].line_numbers[words->words[i].num_line_numbers - 1] == line_number) {
                return;
            }

            // New line number. Increase the line number array by one
            int *tmp = realloc(words->words[i].line_numbers, sizeof(int) * (words->words[i].num_line_numbers + 1));
            if (NULL == tmp) exit(0);
            words->words[i].line_numbers = tmp;
            // Add the line number to the array
            words->words[i].line_numbers[words->words[i].num_line_numbers] = line_number;
            words->words[i].num_line_numbers += 1;
            return;
        }
    }

    /* error conditions... */
    ....

    // Increase the size of the word array by one.
    word *tmp = realloc(words->words, sizeof(word) * (words->num_words + 1));
    if (tmp == NULL) exit(0);
    words->words = tmp;    

    /* copy the word into the structure at the first available slot,
     * i.e., *n
     */
    words->words[words->num_words].s = malloc(strlen(s) + 1);
    strcpy(words->words[words->num_words].s, s);

    /* this word has occurred once up to now, so count = 1 */

    words->words[words->num_words].count = 1;
    words->words[words->num_words].line_numbers = malloc(sizeof(int));
    words->words[words->num_words].line_numbers[0] = line_number;
    words->words[words->num_words].num_line_numbers = 1;
    words->num_words += 1;
}

bool remove_word(word_list *words, const char *word_to_delete)
{
    for (int i = 0; i < words->num_words; i++) {
        if (0 == strcmp(words->words[i].s, word_to_delete)) {
            // TODO: handle special case where there is only 1 word in list

            // Calc number of words after found word
            int number_of_words_to_right = words->num_words - i - 1;
            // Free mem
            free(words->words[i].s);
            free(words->words[i].line_numbers);
            // Copy remaining words
            memcpy(&words->words[i], &words->words[i + 1], sizeof(word) * number_of_words_to_right);
            // Resize the array (technically not required)
            word *tmp = realloc(words->words, sizeof(word) * --words->num_words);
            if (NULL == tmp) exit(0);
            words->words = tmp;
            return true;
        }
    }
    return false;
}

И в main()

    word_list *words = malloc(sizeof(word_list));
    if (NULL == words) exit(0);
    memset(words, 0, sizeof(word_list));

    ....

    /* read all the words in the file... */
    char s[1000];
    int line_number = 1;
    while (fgets(s, sizeof(s), file)) {
        char *word = strtok(s, " ");
        while (word != NULL) {
            size_t len = strlen(word);
            if (len > 0 && word[len - 1] == '\n') word[--len] = 0;
            insert_word(words, word, line_number);
            word = strtok(NULL, " ");
        }
        line_number += 1;
    }
    fclose(file);

    for (int i = 0; i < words->num_words; i++) {
        printf("%s\t\t[%d] {", words->words[i].s, words->words[i].count);
        for (int j = 0; j < words->words[i].num_line_numbers; j++) {
            if (j != 0) printf(",");
            printf("%d", words->words[i].line_numbers[j]);
        }
        printf("}\n");
    }

    // It's good practice to always free mem. It's super not important
    // in this app since the OS will do it when you exit
    for (int i = 0; i < words->num_words; i++) {
        free(words->words[i].s);
        free(words->words[i].line_numbers);
    }
    free(words->words);
    free(words);

Добро пожаловать на сайт PullRequest, где вы можете задавать вопросы и получать ответы от других членов сообщества.
...