Для этого вопроса уже предусмотрено несколько решений, но я прочитал и протестировал большую часть кода здесь, и я получил это решение, представляющее собой смесь того, что я узнал здесь:
Функция
Эта функция включена в пакет Symfony2 , но ее можно извлечь для использования в качестве обычного PHP , она зависит только от функции iconv
, которая должна быть включено:
Filesystem.php
<?php
namespace COil\Bundle\COilCoreBundle\Component\HttpKernel\Util;
use Symfony\Component\HttpKernel\Util\Filesystem as BaseFilesystem;
/**
* Extends the Symfony filesystem object.
*/
class Filesystem extends BaseFilesystem
{
/**
* Make a filename safe to use in any function. (Accents, spaces, special chars...)
* The iconv function must be activated.
*
* @param string $fileName The filename to sanitize (with or without extension)
* @param string $defaultIfEmpty The default string returned for a non valid filename (only special chars or separators)
* @param string $separator The default separator
* @param boolean $lowerCase Tells if the string must converted to lower case
*
* @author COil <https://github.com/COil>
* @see /2033916/ochistit-stroki-chtoby-sdelat-ih-bezopasnymi-dlya-url-i-imeni-faila
*
* @return string
*/
public function sanitizeFilename($fileName, $defaultIfEmpty = 'default', $separator = '_', $lowerCase = true)
{
// Gather file informations and store its extension
$fileInfos = pathinfo($fileName);
$fileExt = array_key_exists('extension', $fileInfos) ? '.'. strtolower($fileInfos['extension']) : '';
// Removes accents
$fileName = @iconv('UTF-8', 'us-ascii//TRANSLIT', $fileInfos['filename']);
// Removes all characters that are not separators, letters, numbers, dots or whitespaces
$fileName = preg_replace("/[^ a-zA-Z". preg_quote($separator). "\d\.\s]/", '', $lowerCase ? strtolower($fileName) : $fileName);
// Replaces all successive separators into a single one
$fileName = preg_replace('!['. preg_quote($separator).'\s]+!u', $separator, $fileName);
// Trim beginning and ending seperators
$fileName = trim($fileName, $separator);
// If empty use the default string
if (empty($fileName)) {
$fileName = $defaultIfEmpty;
}
return $fileName. $fileExt;
}
}
Юнит-тесты
Что интересно, я создал тесты PHPUnit, сначала для тестирования крайних случаев, и поэтому вы можете проверить, соответствует ли он вашим потребностям:
(Если вы обнаружите ошибку, не стесняйтесь добавлять тестовый пример)
FilesystemTest.php
<?php
namespace COil\Bundle\COilCoreBundle\Tests\Unit\Helper;
use COil\Bundle\COilCoreBundle\Component\HttpKernel\Util\Filesystem;
/**
* Test the Filesystem custom class.
*/
class FilesystemTest extends \PHPUnit_Framework_TestCase
{
/**
* test sanitizeFilename()
*/
public function testFilesystem()
{
$fs = new Filesystem();
$this->assertEquals('logo_orange.gif', $fs->sanitizeFilename('--logö _ __ ___ ora@@ñ--~gé--.gif'), '::sanitizeFilename() handles complex filename with specials chars');
$this->assertEquals('coilstack', $fs->sanitizeFilename('cOiLsTaCk'), '::sanitizeFilename() converts all characters to lower case');
$this->assertEquals('cOiLsTaCk', $fs->sanitizeFilename('cOiLsTaCk', 'default', '_', false), '::sanitizeFilename() lower case can be desactivated, passing false as the 4th argument');
$this->assertEquals('coil_stack', $fs->sanitizeFilename('coil stack'), '::sanitizeFilename() convert a white space to a separator');
$this->assertEquals('coil-stack', $fs->sanitizeFilename('coil stack', 'default', '-'), '::sanitizeFilename() can use a different separator as the 3rd argument');
$this->assertEquals('coil_stack', $fs->sanitizeFilename('coil stack'), '::sanitizeFilename() removes successive white spaces to a single separator');
$this->assertEquals('coil_stack', $fs->sanitizeFilename(' coil stack'), '::sanitizeFilename() removes spaces at the beginning of the string');
$this->assertEquals('coil_stack', $fs->sanitizeFilename('coil stack '), '::sanitizeFilename() removes spaces at the end of the string');
$this->assertEquals('coilstack', $fs->sanitizeFilename('coil,,,,,,stack'), '::sanitizeFilename() removes non-ASCII characters');
$this->assertEquals('coil_stack', $fs->sanitizeFilename('coil_stack '), '::sanitizeFilename() keeps separators');
$this->assertEquals('coil_stack', $fs->sanitizeFilename(' coil________stack'), '::sanitizeFilename() converts successive separators into a single one');
$this->assertEquals('coil_stack.gif', $fs->sanitizeFilename('cOil Stack.GiF'), '::sanitizeFilename() lower case filename and extension');
$this->assertEquals('copy_of_coil.stack.exe', $fs->sanitizeFilename('Copy of coil.stack.exe'), '::sanitizeFilename() keeps dots before the extension');
$this->assertEquals('default.doc', $fs->sanitizeFilename('____________.doc'), '::sanitizeFilename() returns a default file name if filename only contains special chars');
$this->assertEquals('default.docx', $fs->sanitizeFilename(' ___ - --_ __%%%%__¨¨¨***____ .docx'), '::sanitizeFilename() returns a default file name if filename only contains special chars');
$this->assertEquals('logo_edition_1314352521.jpg', $fs->sanitizeFilename('logo_edition_1314352521.jpg'), '::sanitizeFilename() returns the filename untouched if it does not need to be modified');
$userId = rand(1, 10);
$this->assertEquals('user_doc_'. $userId. '.doc', $fs->sanitizeFilename('亐亐亐亐亐.doc', 'user_doc_'. $userId), '::sanitizeFilename() returns the default string (the 2nd argument) if it can\'t be sanitized');
}
}
Результаты теста: (проверено на Ubuntu с PHP 5.3.2 и MacOsX с PHP 5.3.17:
All tests pass:
phpunit -c app/ src/COil/Bundle/COilCoreBundle/Tests/Unit/Helper/FilesystemTest.php
PHPUnit 3.6.10 by Sebastian Bergmann.
Configuration read from /var/www/strangebuzz.com/app/phpunit.xml.dist
.
Time: 0 seconds, Memory: 5.75Mb
OK (1 test, 17 assertions)