Я был в положении, в котором мне нужно было отфильтровать определенные символы, не влияя на html, потому что я использовал редактор wysiwig, но люди, копирующие вставку из слова, добавили бы к содержанию несколько хороших нередуцируемых символов.
Мое решение сводится к простым спискам замены.
class ReplaceIllegal {
public static $find = array ( 0 => '\x0', 1 => '\x1', 2 => '\x2', 3 => '\x3', 4 => '\x4', 5 => '\x5', 6 => '\x6', 7 => '\x7', 8 => '\x8', 9 => '\x9', 10 => '\xA', 11 => '\xB', 12 => '\xC', 13 => '\xD', 14 => '\xE', 15 => '\xF', 16 => '\x10', 17 => '\x11', 18 => '\x12', 19 => '\x13', 20 => '\x14', 21 => '\x15', 22 => '\x16', 23 => '\x17', 24 => '\x18', 25 => '\x19', 26 => '\x1A', 27 => '\x1B', 28 => '\x1C', 29 => '\x1D', 30 => '\x1E', 31 => '\x80', 32 => '\x81', 33 => '\x82', 34 => '\x83', 35 => '\x84', 36 => '\x85', 37 => '\x86', 38 => '\x87', 39 => '\x88', 40 => '\x89', 41 => '\x8A', 42 => '\x8B', 43 => '\x8C', 44 => '\x8D', 45 => '\x8E', 46 => '\x8F', 47 => '\x90', 48 => '\x91', 49 => '\x92', 50 => '\x93', 51 => '\x94', 52 => '\x95', 53 => '\x96', 54 => '\x97', 55 => '\x98', 56 => '\x99', 57 => '\x9A', 58 => '\x9B', 59 => '\x9C', 60 => '\x9D', 61 => '\x9E', 62 => '\x9F', 63 => '\xA0', 64 => '\xA1', 65 => '\xA2', 66 => '\xA3', 67 => '\xA4', 68 => '\xA5', 69 => '\xA6', 70 => '\xA7', 71 => '\xA8', 72 => '\xA9', 73 => '\xAA', 74 => '\xAB', 75 => '\xAC', 76 => '\xAD', 77 => '\xAE', 78 => '\xAF', 79 => '\xB0', 80 => '\xB1', 81 => '\xB2', 82 => '\xB3', 83 => '\xB4', 84 => '\xB5', 85 => '\xB6', 86 => '\xB7', 87 => '\xB8', 88 => '\xB9', 89 => '\xBA', 90 => '\xBB', 91 => '\xBC', 92 => '\xBD', 93 => '\xBE', 94 => '\xBF', 95 => '\xC0', 96 => '\xC1', 97 => '\xC2', 98 => '\xC3', 99 => '\xC4', 100 => '\xC5', 101 => '\xC6', 102 => '\xC7', 103 => '\xC8', 104 => '\xC9', 105 => '\xCA', 106 => '\xCB', 107 => '\xCC', 108 => '\xCD', 109 => '\xCE', 110 => '\xCF', 111 => '\xD0', 112 => '\xD1', 113 => '\xD2', 114 => '\xD3', 115 => '\xD4', 116 => '\xD5', 117 => '\xD6', 118 => '\xD7', 119 => '\xD8', 120 => '\xD9', 121 => '\xDA', 122 => '\xDB', 123 => '\xDC', 124 => '\xDD', 125 => '\xDE', 126 => '\xDF', 127 => '\xE0', 128 => '\xE1', 129 => '\xE2', 130 => '\xE3', 131 => '\xE4', 132 => '\xE5', 133 => '\xE6', 134 => '\xE7', 135 => '\xE8', 136 => '\xE9', 137 => '\xEA', 138 => '\xEB', 139 => '\xEC', 140 => '\xED', 141 => '\xEE', 142 => '\xEF', 143 => '\xF0', 144 => '\xF1', 145 => '\xF2', 146 => '\xF3', 147 => '\xF4', 148 => '\xF5', 149 => '\xF6', 150 => '\xF7', 151 => '\xF8', 152 => '\xF9', 153 => '\xFA', 154 => '\xFB', 155 => '\xFC', 156 => '\xFD', 157 => '\xFE', );
private static $replace = array ( 0 => '�', 1 => '', 2 => '', 3 => '', 4 => '', 5 => '', 6 => '', 7 => '', 8 => '', 9 => '	', 10 => ' ', 11 => '', 12 => '', 13 => ' ', 14 => '', 15 => '', 16 => '', 17 => '', 18 => '', 19 => '', 20 => '', 21 => '', 22 => '', 23 => '', 24 => '', 25 => '', 26 => '', 27 => '', 28 => '', 29 => '', 30 => '', 31 => '€', 32 => '', 33 => '‚', 34 => 'ƒ', 35 => '„', 36 => '…', 37 => '†', 38 => '‡', 39 => 'ˆ', 40 => '‰', 41 => 'Š', 42 => '‹', 43 => 'Œ', 44 => '', 45 => 'Ž', 46 => '', 47 => '', 48 => '‘', 49 => '’', 50 => '“', 51 => '”', 52 => '•', 53 => '–', 54 => '—', 55 => '˜', 56 => '™', 57 => 'š', 58 => '›', 59 => 'œ', 60 => '', 61 => 'ž', 62 => 'Ÿ', 63 => ' ', 64 => '¡', 65 => '¢', 66 => '£', 67 => '¤', 68 => '¥', 69 => '¦', 70 => '§', 71 => '¨', 72 => '©', 73 => 'ª', 74 => '«', 75 => '¬', 76 => '­', 77 => '®', 78 => '¯', 79 => '°', 80 => '±', 81 => '²', 82 => '³', 83 => '´', 84 => 'µ', 85 => '¶', 86 => '·', 87 => '¸', 88 => '¹', 89 => 'º', 90 => '»', 91 => '¼', 92 => '½', 93 => '¾', 94 => '¿', 95 => 'À', 96 => 'Á', 97 => 'Â', 98 => 'Ã', 99 => 'Ä', 100 => 'Å', 101 => 'Æ', 102 => 'Ç', 103 => 'È', 104 => 'É', 105 => 'Ê', 106 => 'Ë', 107 => 'Ì', 108 => 'Í', 109 => 'Î', 110 => 'Ï', 111 => 'Ð', 112 => 'Ñ', 113 => 'Ò', 114 => 'Ó', 115 => 'Ô', 116 => 'Õ', 117 => 'Ö', 118 => '×', 119 => 'Ø', 120 => 'Ù', 121 => 'Ú', 122 => 'Û', 123 => 'Ü', 124 => 'Ý', 125 => 'Þ', 126 => 'ß', 127 => 'à', 128 => 'á', 129 => 'â', 130 => 'ã', 131 => 'ä', 132 => 'å', 133 => 'æ', 134 => 'ç', 135 => 'è', 136 => 'é', 137 => 'ê', 138 => 'ë', 139 => 'ì', 140 => 'í', 141 => 'î', 142 => 'ï', 143 => 'ð', 144 => 'ñ', 145 => 'ò', 146 => 'ó', 147 => 'ô', 148 => 'õ', 149 => 'ö', 150 => '÷', 151 => 'ø', 152 => 'ù', 153 => 'ú', 154 => 'û', 155 => 'ü', 156 => 'ý', 157 => 'þ', );
/*
* replace illegal characters for escaped html character but don't touch anything else.
*/
public static function getSaveValue($value) {
return str_replace(self::$find, self::$replace, $value);
}
public static function makeIllegal($find,$replace) {
self::$find[] = $find;
self::$replace[] = $replace;
}
}