Question

Мне нужно рекурсивно перечислить все каталоги и файлы в C-программировании. Я посмотрел на FTW, но это не входит в две операционные системы, которые я использую (Fedora и Minix). Я начинаю испытывать сильную головную боль от разных вещей, которые я прочитал за последние несколько часов.

Если кто-то знает фрагмент кода, который я мог бы посмотреть, это было бы замечательно, или если кто-то может дать мне хорошее руководство по этому вопросу, я был бы очень благодарен.

Nominal Animal · Answer 1 · 02 апреля 2015

Почему все настаивают на том, чтобы изобретать велосипед снова и снова?

POSIX.1-2008 стандартизировал функцию nftw(), также определенную в спецификации Single Unix v4 (SuSv4) и доступную в Linux (glibc, man 3 nftw) , OS X и большинство современных вариантов BSD. Это совсем не ново.

Наивные opendir() / readdir() / closedir() реализации почти никогда не обрабатывают случаи, когда каталоги или файлы перемещаются, переименовываются или удаляются во время обхода дерева, тогда как nftw() должен обрабатывать их изящно.

В качестве примера рассмотрим следующую программу на C, в которой перечисляется дерево каталогов, начиная с текущего рабочего каталога или каждого из каталогов, указанных в командной строке, или только файлов, названных в командной строке:

/* We want POSIX.1-2008 + XSI, i.e. SuSv4, features */
#define _XOPEN_SOURCE 700

/* Added on 2017-06-25:
   If the C library can support 64-bit file sizes
   and offsets, using the standard names,
   these defines tell the C library to do so. */
#define _LARGEFILE64_SOURCE
#define _FILE_OFFSET_BITS 64 

#include <stdlib.h>
#include <unistd.h>
#include <ftw.h>
#include <time.h>
#include <stdio.h>
#include <string.h>
#include <errno.h>

/* POSIX.1 says each process has at least 20 file descriptors.
 * Three of those belong to the standard streams.
 * Here, we use a conservative estimate of 15 available;
 * assuming we use at most two for other uses in this program,
 * we should never run into any problems.
 * Most trees are shallower than that, so it is efficient.
 * Deeper trees are traversed fine, just a bit slower.
 * (Linux allows typically hundreds to thousands of open files,
 *  so you'll probably never see any issues even if you used
 *  a much higher value, say a couple of hundred, but
 *  15 is a safe, reasonable value.)
*/
#ifndef USE_FDS
#define USE_FDS 15
#endif

int print_entry(const char *filepath, const struct stat *info,
                const int typeflag, struct FTW *pathinfo)
{
    /* const char *const filename = filepath + pathinfo->base; */
    const double bytes = (double)info->st_size; /* Not exact if large! */
    struct tm mtime;

    localtime_r(&(info->st_mtime), &mtime);

    printf("%04d-%02d-%02d %02d:%02d:%02d",
           mtime.tm_year+1900, mtime.tm_mon+1, mtime.tm_mday,
           mtime.tm_hour, mtime.tm_min, mtime.tm_sec);

    if (bytes >= 1099511627776.0)
        printf(" %9.3f TiB", bytes / 1099511627776.0);
    else
    if (bytes >= 1073741824.0)
        printf(" %9.3f GiB", bytes / 1073741824.0);
    else
    if (bytes >= 1048576.0)
        printf(" %9.3f MiB", bytes / 1048576.0);
    else
    if (bytes >= 1024.0)
        printf(" %9.3f KiB", bytes / 1024.0);
    else
        printf(" %9.0f B  ", bytes);

    if (typeflag == FTW_SL) {
        char   *target;
        size_t  maxlen = 1023;
        ssize_t len;

        while (1) {

            target = malloc(maxlen + 1);
            if (target == NULL)
                return ENOMEM;

            len = readlink(filepath, target, maxlen);
            if (len == (ssize_t)-1) {
                const int saved_errno = errno;
                free(target);
                return saved_errno;
            }
            if (len >= (ssize_t)maxlen) {
                free(target);
                maxlen += 1024;
                continue;
            }

            target[len] = '\0';
            break;
        }

        printf(" %s -> %s\n", filepath, target);
        free(target);

    } else
    if (typeflag == FTW_SLN)
        printf(" %s (dangling symlink)\n", filepath);
    else
    if (typeflag == FTW_F)
        printf(" %s\n", filepath);
    else
    if (typeflag == FTW_D || typeflag == FTW_DP)
        printf(" %s/\n", filepath);
    else
    if (typeflag == FTW_DNR)
        printf(" %s/ (unreadable)\n", filepath);
    else
        printf(" %s (unknown)\n", filepath);

    return 0;
}


int print_directory_tree(const char *const dirpath)
{
    int result;

    /* Invalid directory path? */
    if (dirpath == NULL || *dirpath == '\0')
        return errno = EINVAL;

    result = nftw(dirpath, print_entry, USE_FDS, FTW_PHYS);
    if (result >= 0)
        errno = result;

    return errno;
}

int main(int argc, char *argv[])
{
    int arg;

    if (argc < 2) {

        if (print_directory_tree(".")) {
            fprintf(stderr, "%s.\n", strerror(errno));
            return EXIT_FAILURE;
        }

    } else {

        for (arg = 1; arg < argc; arg++) {
            if (print_directory_tree(argv[arg])) {
                fprintf(stderr, "%s.\n", strerror(errno));
                return EXIT_FAILURE;
            }
        }

    }

    return EXIT_SUCCESS;
}

Большая часть кода выше находится в print_entry(). Его задача - распечатать каждую запись каталога. В print_directory_tree() мы говорим nftw() вызывать его для каждой записи каталога, которую он видит.

Единственной волнистой деталью выше является решение о том, сколько файловых дескрипторов следует использовать nftw(). Если ваша программа использует не более двух дополнительных файловых дескрипторов (в дополнение к стандартным потокам) во время обхода дерева файлов, 15 считается безопасным (во всех системах, имеющих nftw() и в основном совместимых с POSIX).

В Linux вы можете использовать sysconf(_SC_OPEN_MAX), чтобы найти максимальное количество открытых файлов, и вычесть число, которое вы используете одновременно с вызовом nftw(), но я бы не стал беспокоиться (если бы не знал, что утилита будет использоваться в основном с патологически глубокими структурами каталогов). Пятнадцать дескрипторов не ограничивают глубину дерева; nftw() только становится медленнее (и может не обнаружить изменений в каталоге, если пройти каталог глубже, чем 13 каталогов из этого, хотя компромиссы и общая способность обнаруживать изменения различаются в разных системах и реализациях библиотеки C). Простое использование постоянной времени компиляции делает код переносимым - он должен работать не только в Linux, но и в Mac OS X и во всех текущих вариантах BSD, а также в большинстве других не слишком старых вариантов Unix.

В комментарии Руслан упомянул, что им пришлось переключиться на nftw64(), потому что у них были записи в файловой системе, которые требовали 64-битных размеров / смещений, и «нормальная» версия nftw() не удалась с errno == EOVERFLOW. Правильным решением является не переключение на 64-разрядные функции, специфичные для GLIBC, а определение _LARGEFILE64_SOURCE и _FILE_OFFSET_BITS 64. Они говорят библиотеке C переключаться на 64-битные размеры файлов и смещения, если это возможно, при использовании стандартных функций (nftw(), fstat() и т. Д.) И имен типов (off_t и т. Д.).

Lloyd Macrohon · Answer 2 · 09 декабря 2011

Вот рекурсивная версия:

#include <unistd.h>
#include <sys/types.h>
#include <dirent.h>
#include <stdio.h>
#include <string.h>

void listdir(const char *name, int indent)
{
    DIR *dir;
    struct dirent *entry;

    if (!(dir = opendir(name)))
        return;

    while ((entry = readdir(dir)) != NULL) {
        if (entry->d_type == DT_DIR) {
            char path[1024];
            if (strcmp(entry->d_name, ".") == 0 || strcmp(entry->d_name, "..") == 0)
                continue;
            snprintf(path, sizeof(path), "%s/%s", name, entry->d_name);
            printf("%*s[%s]\n", indent, "", entry->d_name);
            listdir(path, indent + 2);
        } else {
            printf("%*s- %s\n", indent, "", entry->d_name);
        }
    }
    closedir(dir);
}

int main(void) {
    listdir(".", 0);
    return 0;
}

Jan · Answer 3 · 09 декабря 2011

int is_directory_we_want_to_list(const char *parent, char *name) {
  struct stat st_buf;
  if (!strcmp(".", name) || !strcmp("..", name))
    return 0;
  char *path = alloca(strlen(name) + strlen(parent) + 2);
  sprintf(path, "%s/%s", parent, name);
  stat(path, &st_buf);
  return S_ISDIR(st_buf.st_mode);
}

int list(const char *name) {
  DIR *dir = opendir(name);
  struct dirent *ent;
  while (ent = readdir(dir)) {
    char *entry_name = ent->d_name;
    printf("%s\n", entry_name);
    if (is_directory_we_want_to_list(name, entry_name)) {
      // You can consider using alloca instead.
      char *next = malloc(strlen(name) + strlen(entry_name) + 2);
      sprintf(next, "%s/%s", name, entry_name);
      list(next);
      free(next);
    }
  }
  closedir(dir);
}

Заголовочные файлы, которые стоит просмотреть в этом контексте: stat.h , dirent.h .Имейте в виду, что приведенный выше код не проверяет наличие ошибок, которые могут возникнуть.

Совершенно другой подход предлагается ftw, определенным в ftw.h.

Myst · Answer 4 · 27 июня 2017

Как я уже упоминал в своем комментарии, я считаю, что рекурсивный подход имеет две присущие этой задаче недостатки.

Первый недостаток - ограничение открытых файлов.Этот предел накладывает ограничение на глубокий обход.Если есть достаточно подпапок, рекурсивный подход сломается.( См. Редактирование относительно переполнения стека )

Второй недостаток немного более тонкий.Благодаря рекурсивному подходу очень сложно проверять жесткие ссылки.Если дерево папок является циклическим (из-за жестких ссылок), рекурсивный подход сломается (возможно, без переполнения стека).( См. Правку в отношении жестких ссылок )

Однако избежать этих проблем довольно просто, заменив рекурсию дескриптором одного файла и связанными списками.

Я предполагаю, что этоэто не школьный проект, и эта рекурсия не является обязательной.

Вот пример приложения.

Используйте a.out ./ для просмотра дерева папок.

Я извиняюсь за макросы ивещи ... Я обычно использую встроенные функции, но я подумал, что было бы легче следовать коду, если бы он был все в одной функции.

#include <dirent.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>

int main(int argc, char const *argv[]) {
  /* print use instruction unless a folder name was given */
  if (argc < 2)
    fprintf(stderr,
            "\nuse:\n"
            "    %s <directory>\n"
            "for example:\n"
            "    %s ./\n\n",
            argv[0], argv[0]),
        exit(0);

  /*************** a small linked list macro implementation ***************/

  typedef struct list_s {
    struct list_s *next;
    struct list_s *prev;
  } list_s;

#define LIST_INIT(name)                                                        \
  { .next = &name, .prev = &name }

#define LIST_PUSH(dest, node)                                                  \
  do {                                                                         \
    (node)->next = (dest)->next;                                               \
    (node)->prev = (dest);                                                     \
    (node)->next->prev = (node);                                               \
    (dest)->next = (node);                                                     \
  } while (0);

#define LIST_POP(list, var)                                                    \
  if ((list)->next == (list)) {                                                \
    var = NULL;                                                                \
  } else {                                                                     \
    var = (list)->next;                                                        \
    (list)->next = var->next;                                                  \
    var->next->prev = var->prev;                                               \
  }

  /*************** a record (file / folder) item type ***************/

  typedef struct record_s {
    /* this is a flat processing queue. */
    list_s queue;
    /* this will list all queued and processed folders (cyclic protection) */
    list_s folders;
    /* this will list all the completed items (siblings and such) */
    list_s list;
    /* unique ID */
    ino_t ino;
    /* name length */
    size_t len;
    /* name string */
    char name[];
  } record_s;

/* take a list_s pointer and convert it to the record_s pointer */
#define NODE2RECORD(node, list_name)                                           \
  ((record_s *)(((uintptr_t)(node)) -                                          \
                ((uintptr_t) & ((record_s *)0)->list_name)))

/* initializes a new record */
#define RECORD_INIT(name)                                                      \
  (record_s){.queue = LIST_INIT((name).queue),                                 \
             .folders = LIST_INIT((name).folders),                             \
             .list = LIST_INIT((name).list)}

  /*************** the actual code ***************/

  record_s records = RECORD_INIT(records);
  record_s *pos, *item;
  list_s *tmp;
  DIR *dir;
  struct dirent *entry;

  /* initialize the root folder record and add it to the queue */
  pos = malloc(sizeof(*pos) + strlen(argv[1]) + 2);
  *pos = RECORD_INIT(*pos);
  pos->len = strlen(argv[1]);
  memcpy(pos->name, argv[1], pos->len);
  if (pos->name[pos->len - 1] != '/')
    pos->name[pos->len++] = '/';
  pos->name[pos->len] = 0;
  /* push to queue, but also push to list (first item processed) */
  LIST_PUSH(&records.queue, &pos->queue);
  LIST_PUSH(&records.list, &pos->list);

  /* as long as the queue has items to be processed, do so */
  while (records.queue.next != &records.queue) {
    /* pop queued item */
    LIST_POP(&records.queue, tmp);
    /* collect record to process */
    pos = NODE2RECORD(tmp, queue);
    /* add record to the processed folder list */
    LIST_PUSH(&records.folders, &pos->folders);

    /* process the folder and add all folder data to current list */
    dir = opendir(pos->name);
    if (!dir)
      continue;

    while ((entry = readdir(dir)) != NULL) {

      /* create new item, copying it's path data and unique ID */
      item = malloc(sizeof(*item) + pos->len + entry->d_namlen + 2);
      *item = RECORD_INIT(*item);
      item->len = pos->len + entry->d_namlen;
      memcpy(item->name, pos->name, pos->len);
      memcpy(item->name + pos->len, entry->d_name, entry->d_namlen);
      item->name[item->len] = 0;
      item->ino = entry->d_ino;
      /* add item to the list, right after the `pos` item */
      LIST_PUSH(&pos->list, &item->list);

      /* unless it's a folder, we're done. */
      if (entry->d_type != DT_DIR)
        continue;

      /* test for '.' and '..' */
      if (entry->d_name[0] == '.' &&
          (entry->d_name[1] == 0 ||
           (entry->d_name[1] == '.' && entry->d_name[2] == 0)))
        continue;

      /* add folder marker */
      item->name[item->len++] = '/';
      item->name[item->len] = 0;

      /* test for cyclic processing */
      list_s *t = records.folders.next;
      while (t != &records.folders) {
        if (NODE2RECORD(t, folders)->ino == item->ino) {
          /* we already processed this folder! */
          break; /* this breaks from the small loop... */
        }
        t = t->next;
      }
      if (t != &records.folders)
        continue; /* if we broke from the small loop, entry is done */

      /* item is a new folder, add to queue */
      LIST_PUSH(&records.queue, &item->queue);
    }
    closedir(dir);
  }

  /*************** Printing the results and cleaning up ***************/
  while (records.list.next != &records.list) {
    /* pop list item */
    LIST_POP(&records.list, tmp);
    /* collect record to process */
    pos = NODE2RECORD(tmp, list);
    /* prepare for next iteration */
    LIST_POP(&records.list, tmp);
    fwrite(pos->name, pos->len, 1, stderr);
    fwrite("\n", 1, 1, stderr);
    free(pos);
  }
  return 0;
}

EDIT

@ Stargateur упомянул в комментариях, что рекурсивный код, вероятно, переполнит стек до достижения предела открытого файла.

Хотя я не вижу, насколько переполнение стека лучше, эта оценка, вероятно, вернадо тех пор, пока процесс не приближается к пределу файла при запуске.

Еще один момент, упомянутый @Stargateur в комментариях, заключался в том, что глубина рекурсивного кода ограничена максимой.мм количество подкаталогов (64000 в файловой системе ext4) и жесткие ссылки крайне маловероятны (поскольку жесткие ссылки на папки не разрешены в Linux / Unix).

Это хорошая новость, если кодработает на Linux (что, в зависимости от вопроса), так что эта проблема не является реальной проблемой (если только не запускать код на macOS или, может быть, Windows) ... хотя подпапки 64K в рекурсии могут перевернуть стек,

Сказав это, нерекурсивный вариант все еще имеет преимущества, такие как возможность легко добавлять ограничение на количество обрабатываемых элементов, а также возможность кэшировать результат.

PS

Согласно комментариям, это нерекурсивная версия кода, которая не проверяет циклические иерархии.Он быстрее и должен быть достаточно безопасным для использования на компьютере с Linux, где жесткие ссылки на папки не разрешены.

#include <dirent.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>

int main(int argc, char const *argv[]) {
  /* print use instruction unless a folder name was given */
  if (argc < 2)
    fprintf(stderr,
            "\nuse:\n"
            "    %s <directory>\n"
            "for example:\n"
            "    %s ./\n\n",
            argv[0], argv[0]),
        exit(0);

  /*************** a small linked list macro implementation ***************/

  typedef struct list_s {
    struct list_s *next;
    struct list_s *prev;
  } list_s;

#define LIST_INIT(name)                                                        \
  { .next = &name, .prev = &name }

#define LIST_PUSH(dest, node)                                                  \
  do {                                                                         \
    (node)->next = (dest)->next;                                               \
    (node)->prev = (dest);                                                     \
    (node)->next->prev = (node);                                               \
    (dest)->next = (node);                                                     \
  } while (0);

#define LIST_POP(list, var)                                                    \
  if ((list)->next == (list)) {                                                \
    var = NULL;                                                                \
  } else {                                                                     \
    var = (list)->next;                                                        \
    (list)->next = var->next;                                                  \
    var->next->prev = var->prev;                                               \
  }

  /*************** a record (file / folder) item type ***************/

  typedef struct record_s {
    /* this is a flat processing queue. */
    list_s queue;
    /* this will list all the completed items (siblings and such) */
    list_s list;
    /* unique ID */
    ino_t ino;
    /* name length */
    size_t len;
    /* name string */
    char name[];
  } record_s;

/* take a list_s pointer and convert it to the record_s pointer */
#define NODE2RECORD(node, list_name)                                           \
  ((record_s *)(((uintptr_t)(node)) -                                          \
                ((uintptr_t) & ((record_s *)0)->list_name)))

/* initializes a new record */
#define RECORD_INIT(name)                                                      \
  (record_s){.queue = LIST_INIT((name).queue), .list = LIST_INIT((name).list)}

  /*************** the actual code ***************/

  record_s records = RECORD_INIT(records);
  record_s *pos, *item;
  list_s *tmp;
  DIR *dir;
  struct dirent *entry;

  /* initialize the root folder record and add it to the queue */
  pos = malloc(sizeof(*pos) + strlen(argv[1]) + 2);
  *pos = RECORD_INIT(*pos);
  pos->len = strlen(argv[1]);
  memcpy(pos->name, argv[1], pos->len);
  if (pos->name[pos->len - 1] != '/')
    pos->name[pos->len++] = '/';
  pos->name[pos->len] = 0;
  /* push to queue, but also push to list (first item processed) */
  LIST_PUSH(&records.queue, &pos->queue);
  LIST_PUSH(&records.list, &pos->list);

  /* as long as the queue has items to be processed, do so */
  while (records.queue.next != &records.queue) {
    /* pop queued item */
    LIST_POP(&records.queue, tmp);
    /* collect record to process */
    pos = NODE2RECORD(tmp, queue);

    /* process the folder and add all folder data to current list */
    dir = opendir(pos->name);
    if (!dir)
      continue;

    while ((entry = readdir(dir)) != NULL) {

      /* create new item, copying it's path data and unique ID */
      item = malloc(sizeof(*item) + pos->len + entry->d_namlen + 2);
      *item = RECORD_INIT(*item);
      item->len = pos->len + entry->d_namlen;
      memcpy(item->name, pos->name, pos->len);
      memcpy(item->name + pos->len, entry->d_name, entry->d_namlen);
      item->name[item->len] = 0;
      item->ino = entry->d_ino;
      /* add item to the list, right after the `pos` item */
      LIST_PUSH(&pos->list, &item->list);

      /* unless it's a folder, we're done. */
      if (entry->d_type != DT_DIR)
        continue;

      /* test for '.' and '..' */
      if (entry->d_name[0] == '.' &&
          (entry->d_name[1] == 0 ||
           (entry->d_name[1] == '.' && entry->d_name[2] == 0)))
        continue;

      /* add folder marker */
      item->name[item->len++] = '/';
      item->name[item->len] = 0;

      /* item is a new folder, add to queue */
      LIST_PUSH(&records.queue, &item->queue);
    }
    closedir(dir);
  }

  /*************** Printing the results and cleaning up ***************/
  while (records.list.next != &records.list) {
    /* pop list item */
    LIST_POP(&records.list, tmp);
    /* collect record to process */
    pos = NODE2RECORD(tmp, list);
    /* prepare for next iteration */
    LIST_POP(&records.list, tmp);
    fwrite(pos->name, pos->len, 1, stderr);
    fwrite("\n", 1, 1, stderr);
    free(pos);
  }
  return 0;
}

chqrlie · Answer 5 · 24 июня 2017

Вот упрощенная версия, которая является рекурсивной, но использует намного меньше стекового пространства:

#include <errno.h>
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <unistd.h>
#include <dirent.h>

void listdir(char *path, size_t size) {
    DIR *dir;
    struct dirent *entry;
    size_t len = strlen(path);

    if (!(dir = opendir(path))) {
        fprintf(stderr, "path not found: %s: %s\n",
                path, strerror(errno));
        return;
    }

    puts(path);
    while ((entry = readdir(dir)) != NULL) {
        char *name = entry->d_name;
        if (entry->d_type == DT_DIR) {
            if (!strcmp(name, ".") || !strcmp(name, ".."))
                continue;
            if (len + strlen(name) + 2 > size) {
                fprintf(stderr, "path too long: %s/%s\n", path, name);
            } else {
                path[len] = '/';
                strcpy(path + len + 1, name);
                listdir(path, size);
                path[len] = '\0';
            }
        } else {
            printf("%s/%s\n", path, name);
        }
    }
    closedir(dir);
}

int main(void) {
    char path[1024] = ".";
    listdir(path, sizeof path);
    return 0;
}

В моей системе его вывод точно такой же, как у find .

Как рекурсивно перечислить каталоги в C на Linux?

Пожалуйста, войдите или зарегистрируйтесь чтобы ответить на этот вопрос.

Ответы [ 5 ]

Пожалуйста, войдите или зарегистрируйтесь что бы добавить комментарий.

Пожалуйста, войдите или зарегистрируйтесь что бы добавить комментарий.

Пожалуйста, войдите или зарегистрируйтесь что бы добавить комментарий.

Пожалуйста, войдите или зарегистрируйтесь что бы добавить комментарий.

Пожалуйста, войдите или зарегистрируйтесь что бы добавить комментарий.

Как рекурсивно перечислить каталоги в C на Linux?

Пожалуйста, войдите или зарегистрируйтесь чтобы ответить на этот вопрос.

Ответы [ 5 ]

Пожалуйста, войдите или зарегистрируйтесь что бы добавить комментарий.

Пожалуйста, войдите или зарегистрируйтесь что бы добавить комментарий.

Пожалуйста, войдите или зарегистрируйтесь что бы добавить комментарий.

Пожалуйста, войдите или зарегистрируйтесь что бы добавить комментарий.

Пожалуйста, войдите или зарегистрируйтесь что бы добавить комментарий.

Нет похожих вопросов