C программа для поиска частоты слов - PullRequest
0 голосов
/ 27 октября 2018

У меня есть программа на C, которая будет подсчитывать количество слов в каждом файле, заданном в командной строке.Теперь мне нужно подсчитать, сколько раз появляется каждое слово.Вот мой код:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <unistd.h>

#define MAXWORDS    10000
#define MAXSTRING   100

/* structure holding word frequency information */

typedef struct _word {
    char    s[MAXSTRING];   /* the word */
    int count;      /* number of times word occurs */
} word;

static int *total_amount_of_words;


int countWords(FILE *file){
    int count = 0;
    char character;
    while((character = fgetc(file)) != EOF){
        if(character == '\n' || character == ' ')
            count++;
    }
    return count;
}

void insert_word (word *words, int *n, char *s) {
    int i;

    /* linear search for the word */
    for (i=0; i<*n; i++) if (strcmp (s, words[i].s) == 0) {

        /* found it?  increment and return. */

        words[i].count++;
        return;
    }

    /* error conditions... */

    if (strlen (s) >= MAXSTRING) {
        fprintf (stderr, "word too long!\n");
        exit (1);
    }
    if (*n >= MAXWORDS) {   
        fprintf (stderr, "too many words!\n");
        exit (1);
    }

    /* copy the word into the structure at the first available slot,
     * i.e., *n
     */

    strcpy (words[*n].s, s);

    /* this word has occured once up to now, so count = 1 */

    words[*n].count = 1;

    /* one more word */

    (*n)++;
}

int wordcmp (word *a, word *b) {
    if (a->count < b->count) return +1;
    if (a->count > b->count) return -1;
    return 0;
}

/* return 1 if c is alphabetic (a..z or A..Z), 0 otherwise */
int is_alpha (char c) {
    if (c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z') return 1;
    return 0;
}

void remove_char (char *s, int i) {
    while (s[i]) {
        i++;
        s[i-1] = s[i];
    }
    s[i] = 0;
}

void remove_non_alpha (char *s) {
    int i;

    for (i=0; s[i]; i++) if (!is_alpha (s[i])) remove_char (s, i);
}

void make_lowercase (char *s) {
    int i;

    for (i=0; s[i]; i++) s[i] = tolower (s[i]);
}


int main(int argc, char *argv[])
{
    word    words[MAXWORDS];
    char    s[1000];
    int i, j, n, m;
    int pid;
    FILE *current_file;

    n = 0;

    total_amount_of_words = mmap(NULL, sizeof *total_amount_of_words, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0);

    //Create processes for each file on the command line
    for(i = 1; i < argc; i++)
    {
        pid = fork();
        if(pid == -1) //Error
        {
            exit(-1);
        }
        else if(pid == 0) //Children
        {
            current_file = fopen(argv[i], "r");
            int current_word_amount = countWords(current_file);
            *total_amount_of_words += current_word_amount;
            printf("Child Process for File %s: number of words is: %i\n", argv[i], current_word_amount);

        rewind(current_file);

        /* read all the words in the file... */

        while (!feof (current_file)) {
            fscanf (current_file, "%s", s);

            /* only insert the word if it's not punctuation */

            if (is_alpha (s[0])) {

                /* get rid of non-letters */

                remove_non_alpha (s);

                /* make all letters lowercase */

                make_lowercase (s);

                /* put this word in the list */

                insert_word (words, &n, s);
            }
        }

            fclose(current_file);
            exit(0);
        }
        else //Parent
        {
            wait(NULL);
        }

    }

    /* sort the list of words by descending frequency */

    qsort((void *) words, n, sizeof (word),
        (int (*) (const void *, const void *)) wordcmp);



    /* print the words with their frequencies */

    for (j=0; j<*total_amount_of_words; j++){
    printf ("Word: %s\t", words[j].s);
    printf ("Frequency: %%d\n", words[j].count);
    }

    printf("All %i files have been counted!\n Total Amount of Words: %d\n", (argc-1), *total_amount_of_words);

    munmap(total_amount_of_words, sizeof *total_amount_of_words);


}

В цикле for внизу я не могу распечатать каждое слово и сколько раз оно появилось.Я не могу получить ничего для вывода, необходимость структуры инициализируется. Как мне поделиться структурой со всеми процессами?

...