Мне пришлось сделать что-то подобное в последнее время. Этот скрипт потребует модификации, но имеет все необходимое:
- рекурсивно просматривает файлы и
справочники (субрекурс).
- Он имеет функцию, чтобы действовать на
каталоги (processDir) и
отдельный, чтобы действовать на файлы
(ProcessFile).
- Он обрабатывает пробелы в именах файлов
используя альтернативную версию
функция glob из File :: Glob.
- Он не выполняет никаких действий, но вместо этого
записывает выходной файл (CSV, TAB или
Perl скрипт), так что пользователь может
прежде чем совершить большую ошибку, просмотрите предложенные изменения.
- Выводит частичные результаты
периодически, что полезно, если
ваша система выходит из строя частично.
- Это происходит в глубину первого порядка.
Это важно, потому что если вы
есть скрипт, который изменяет (переименовывает
или перемещает) родительский каталог перед
обработка подкаталогов и
файлы, плохие вещи могут случиться.
- Он читает из файла списка пропуска,
что позволяет избежать огромных каталогов
и смонтированные тома, которые у вас нет
хочу посетить.
- Не следует по символическим ссылкам,
которые часто вызывают округлости.
Небольшая модификация processFile - это большая часть того, что вам нужно сделать, плюс избавление от ненужных вам функций. (Этот сценарий был разработан для поиска файлов с символами в именах, которые не поддерживаются в Windows.)
ПРИМЕЧАНИЕ. В конце он вызывает «open», который на MAC откроет полученный файл в приложении по умолчанию. В Windows используйте «Пуск». В других системах Unix есть похожие команды.
#!/usr/bin/perl -w
# 06/04/2009. PAC. Fixed bug in processDir. Was using $path instead of $dir when forming newpath.
use strict;
use File::Glob ':glob'; # This glob allows spaces in filenames. The default one does not.
sub recurse(&$);
sub processFile($);
sub stem($);
sub processXMLFile($);
sub readFile($);
sub writeFile($$);
sub writeResults($);
sub openFileInApplication($);
if (scalar @ARGV < 4) {
print <<HELP_TEXT;
Purpose: Report on files and directories whose names violate policy by:
o containing illegal characters
o being too long
o beginning or ending with certain characters
Usage: perl EnforceFileNamePolicy.pl root-path skip-list format output-file
root-path .... Recursively process all files and subdirectories starting with this directory.
skip-list .... Name of file with directories to skip, one to a line.
format ....... Output format:
tab = tab delimited list of current and proposed file names
csv = comma separated list of current and proposed file names
perl = perl script to do the renaming
output-file .. Name of file to hold results.
Output: A script or delimited file that will rename the offending files and directories is printed to output-file.
As directories are processed or problems found, diagnostic messages will be printed to STDOUT.
Note: Symbolic links are not followed, otherwise infinite recursion would result.
Note: Directories are processed in depth-first, case-insensitive alphabetical order.
Note: If \$CHECKPOINT_FREQUENCY > 0, partial results will be written to intermediate files periodically.
This is useful if you need to kill the process before it completes and do not want to lose all your work.
HELP_TEXT
exit;
}
########################################################
# #
# CONFIGURABLE OPTIONS #
# #
########################################################
my $BAD_CHARACTERS_CLASS = "[/\\?<>:*|\"]";
my $BAD_SUFFIX_CLASS = "[. ]\$";
my $BAD_PREFIX_CLASS = "^[ ]";
my $REPLACEMENT_CHAR = "_";
my $MAX_PATH_LENGTH = 256;
my $WARN_PATH_LENGTH = 250;
my $LOG_PATH_DEPTH = 4; # How many directories down we go when logging the current directory being processed.
my $CHECKPOINT_FREQUENCY = 20000; # After an integral multiple of this number of directories are processed, write a partial results file in case we later kill the process.
########################################################
# #
# COMMAND LINE ARGUMENTS #
# #
########################################################
my $rootDir = $ARGV[0];
my $skiplistFile = $ARGV[1];
my $outputFormat = $ARGV[2];
my $outputFile = $ARGV[3];
########################################################
# #
# BEGIN PROCESSING #
# #
########################################################
my %pathChanges = (); # Old name to new name, woth path attached.
my %reasons = ();
my %skip = (); # Directories to skip, as read from the skip file.
my $dirsProcessed = 0;
# Load the skiplist
my $skiplist = readFile($skiplistFile);
foreach my $skipentry (split(/\n/, $skiplist)) {
$skip{$skipentry} = 1;
}
# Find all improper path names under directory and store in %pathChanges.
recurse(\&processFile, $rootDir);
# Write the output file.
writeResults(0);
print "DONE!\n";
# Open results in an editor for review.
#WARNING: If your default application for opening perl files is the perl exe itself, this will run the otput perl script!
# Thus, you may want to comment this out.
# Better yet: associate a text editor with the perl script.
openFileInApplication($outputFile);
exit;
sub recurse(&$) {
my($func, $path) = @_;
if ($path eq '') {
$path = ".";
}
## append a trailing / if it's not there
$path .= '/' if($path !~ /\/$/);
## loop through the files contained in the directory
for my $eachFile (sort { lc($a) cmp lc($b) } glob($path.'*')) {
# If eachFile has a shorter name and is a prefix of $path, then stop recursing. We must have traversed "..".
if (length($eachFile) > length($path) || substr($path, 0, length($eachFile)) ne $eachFile) {
## if the file is a directory
my $skipFile = defined $skip{$eachFile};
if( -d $eachFile && ! -l $eachFile && ! $skipFile) { # Do not process symbolic links like directories! Otherwise, this will never complete - many circularities.
my $depth = depthFromRoot($eachFile);
if ($depth <= $LOG_PATH_DEPTH) {
# Printing every directory as we process it slows the program and does not give the user an intelligible measure of progress.
# So we only go so deep in printing directory names.
print "Processing: $eachFile\n";
}
## pass the directory to the routine ( recursion )
recurse(\&$func, $eachFile);
# Process the directory AFTER its children to force strict depth-first order.
processDir($eachFile);
} else {
if ($skipFile) {
print "Skipping: $eachFile\n";
}
# Process file.
&$func($eachFile);
}
}
}
}
sub processDir($) {
my ($path) = @_;
my $newpath = $path;
my $dir;
my $file;
if ($path eq "/") {
return;
}
elsif ($path =~ m|^(.*/)([^/]+)$|) {
($dir, $file) = ($1, $2);
}
else {
# This path has no slashes, hence must be the root directory.
$file = $path;
$dir = '';
}
if ($file =~ /$BAD_CHARACTERS_CLASS/) {
$file =~ s/($BAD_CHARACTERS_CLASS)/$REPLACEMENT_CHAR/g;
$newpath = $dir . $file;
rejectDir($path, $newpath, "Illegal character in directory.");
}
elsif ($file =~ /$BAD_SUFFIX_CLASS/) {
$file =~ s/($BAD_SUFFIX_CLASS)/$REPLACEMENT_CHAR/g;
$newpath = $dir . $file;
rejectDir($path, $newpath, "Illegal character at end of directory.");
}
elsif ($file =~ /$BAD_PREFIX_CLASS/) {
$file =~ s/($BAD_PREFIX_CLASS)/$REPLACEMENT_CHAR/g;
$newpath = $dir . $file;
rejectDir($path, $newpath, "Illegal character at start of directory.");
}
elsif (length($path) >= $MAX_PATH_LENGTH) {
rejectDir($path, $newpath, "Directory name length > $MAX_PATH_LENGTH.");
}
elsif (length($path) >= $WARN_PATH_LENGTH) {
rejectDir($path, $newpath, "Warning: Directory name length > $WARN_PATH_LENGTH.");
}
$dirsProcessed++;
if ($CHECKPOINT_FREQUENCY > 0 && $dirsProcessed % $CHECKPOINT_FREQUENCY == 0) {
writeResults(1);
}
}
sub processFile($) {
my ($path) = @_;
my $newpath = $path;
$path =~ m|^(.*/)([^/]+)$|;
my ($dir, $file) = ($1, $2);
if (! defined ($file) || $file eq '') {
$file = $path;
}
if ($file =~ /$BAD_CHARACTERS_CLASS/) {
$file =~ s/($BAD_CHARACTERS_CLASS)/$REPLACEMENT_CHAR/g;
$newpath = $dir . $file;
rejectFile($path, $newpath, "Illegal character in filename.");
}
elsif ($file =~ /$BAD_SUFFIX_CLASS/) {
$file =~ s/($BAD_SUFFIX_CLASS)/$REPLACEMENT_CHAR/g;
$newpath = $dir . $file;
rejectFile($path, $newpath, "Illegal character at end of filename.");
}
elsif ($file =~ /$BAD_PREFIX_CLASS/) {
$file =~ s/($BAD_PREFIX_CLASS)/$REPLACEMENT_CHAR/g;
$newpath = $dir . $file;
rejectFile($path, $newpath, "Illegal character at start of filename.");
}
elsif (length($path) >= $MAX_PATH_LENGTH) {
rejectFile($path, $newpath, "File name length > $MAX_PATH_LENGTH.");
}
elsif (length($path) >= $WARN_PATH_LENGTH) {
rejectFile($path, $newpath, "Warning: File name length > $WARN_PATH_LENGTH.");
}
}
sub rejectDir($$$) {
my ($oldName, $newName, $reason) = @_;
$pathChanges{$oldName} = $newName;
$reasons{$oldName} = $reason;
print "Reason: $reason Dir: $oldName\n";
}
sub rejectFile($$$) {
my ($oldName, $newName, $reason) = @_;
$pathChanges{$oldName} = $newName;
$reasons{$oldName} = $reason;
print "Reason: $reason File: $oldName\n";
}
sub readFile($) {
my ($filename) = @_;
my $contents;
if (-e $filename) {
# This is magic: it opens and reads a file into a scalar in one line of code.
# See http://www.perl.com/pub/a/2003/11/21/slurp.html
$contents = do { local( @ARGV, $/ ) = $filename ; <> } ;
}
else {
$contents = '';
}
return $contents;
}
sub writeFile($$) {
my( $file_name, $text ) = @_;
open( my $fh, ">$file_name" ) || die "Can't create $file_name $!" ;
print $fh $text ;
}
# writeResults() - Compose results in the appropriate format: perl script, tab delimited, or comma delimited, then write to output file.
sub writeResults($) {
my ($checkpoint) = @_;
my $outputText = '';
my $outputFileToUse;
my $checkpointMessage;
if ($checkpoint) {
$checkpointMessage = "$dirsProcessed directories processed so far.";
}
else {
$checkpointMessage = "$dirsProcessed TOTAL directories processed.";
}
if ($outputFormat eq 'tab') {
$outputText .= "Reason\tOld name\tNew name\n";
$outputText .= "$checkpointMessage\t\t\n";
}
elsif ($outputFormat eq 'csv') {
$outputText .= "Reason,Old name,New name\n";
$outputText .= "$checkpointMessage,,\n";
}
elsif ($outputFormat eq 'perl') {
$outputText = <<END_PERL;
#/usr/bin/perl
# $checkpointMessage
#
# Rename files and directories with bad names.
# If the reason is that the filename is too long, you must hand edit this script and choose a suitable, shorter new name.
END_PERL
}
foreach my $file (sort {
my $shortLength = length($a) > length($b) ? length($b) : length($a);
my $prefixA = substr($a, 0, $shortLength);
my $prefixB = substr($b, 0, $shortLength);
if ($prefixA eq $prefixB) {
return $prefixA eq $a ? 1 : -1; # If one path is a prefix of the other, the longer path must sort first. We must process subdirectories before their parent directories.
}
else {
return $a cmp $b;
}
} keys %pathChanges) {
my $changedName = $pathChanges{$file};
my $reason = $reasons{$file};
if ($outputFormat eq 'tab') {
$outputText .= "$reason\t$file\t$changedName\n";
}
elsif ($outputFormat eq 'csv') {
$outputText .= "$reason,$file,$changedName\n";
}
else {
# Escape the spaces so the mv command works.
$file =~ s/ /\\ /g;
$changedName =~ s/ /\\ /g;
$outputText .= "#$reason\nrename \"$file\", \"$changedName\"\n";
}
}
$outputFileToUse = $outputFile;
if ($checkpoint) {
$outputFileToUse =~ s/(^.*)([.][^.]+$)/$1-$dirsProcessed$2/;
}
writeFile($outputFileToUse, $outputText);
}
# Compute how many directories deep the given path is below the root for this script.
sub depthFromRoot($) {
my ($dir) = @_;
$dir =~ s/\Q$rootDir\E//;
my $count = 1;
for (my $i = 0; $i < length($dir); $i++) {
if (substr($dir, $i, 1) eq "/") { $count ++; }
}
return $count;
}
#openFileInApplication($filename) - Open the file in its default application.
#
# TODO: Must be changed for WINDOWS. Use 'start' instead of 'open'???
sub openFileInApplication($) {
my ($filename) = @_;
`open $filename`;
}