Примечание Решение Don работает со строками в коде, но не работает со строками из файла с кодировкой UTF-8
Это лучшее решение, которое у меня есть, с использованием URLEncode и последующим экранированием шестнадцатеричных символов:
String s = "Cerepedia, una apliación web";
String ENCODING= "uft-8";
String encoded_s = URLEncoder.encode(s,ENCODING); // Cerepedia+una+aplicaci%C3%83%C2%B3n+web
String s_hexa_free = EncodingTableUtils.replaceHexa(,ENCODING)); // Cerepedia+una+aplicacion+web
EncodingTableUtils
import java.util.HashMap;
import java.util.Iterator;
import java.util.Set;
public class EncodingTableUtils {
public final static HashMap iso88591 = new HashMap();
static {
iso88591.put("%C3%A1", "a"); // á
iso88591.put("%C3%81", "A"); // Á
iso88591.put("%C3%A9", "e"); // é
iso88591.put("%C3%89", "E"); // É
iso88591.put("%C3%AD", "i"); // í
iso88591.put("%C3%8D", "I"); // Í
iso88591.put("%C3%93", "O"); // Ó
iso88591.put("%C3%B3", "o"); // ó
iso88591.put("%C3%BA", "u"); // ú
iso88591.put("%C3%9A", "U"); // Ú
iso88591.put("%C3%91", "N"); // Ñ
iso88591.put("%C3%B1", "n"); // ñ
}
public final static HashMap utf8 = new HashMap();
static {
utf8.put("%C3%83%C2%A1", "a"); // á
utf8.put("%C3%83%EF%BF", "A"); // Á
utf8.put("%BD%C3%83%C2", "e"); // é
utf8.put("%A9%C3%83%E2", "E"); // É
utf8.put("%80%B0%C3%83", "i"); // í
utf8.put("%C2%AD%C3%83", "I"); // Í
utf8.put("%EF%BF%BD%C3", "O"); // Ó
utf8.put("%C3%83%C2%B3", "o"); // ó
utf8.put("%83%E2%80%9C", "u"); // ú
utf8.put("%C3%83%C2%BA", "U"); // Ú
utf8.put("%C3%83%C5%A1", "N"); // Ñ
utf8.put("%C3%83%E2%80", "n"); // ñ
}
public final static HashMap enc_table = new HashMap();
static {
enc_table.put("iso-8859-1", iso88591);
enc_table.put("utf-8", utf8);
}
/**
* Replace Hexadecimal characters with equivalent english not special ones
* <p>Example: á Hexa: %C3%A1 gets replaced with a</p>
* @param s Usually a string coming from URLEncode.encode
* @param enc Encoding UTF-8 or ISO-8850-1
*/
public static String convertHexaDecimal(String s, String enc) {
HashMap characters = (HashMap) enc_table.get(enc.toLowerCase());
if(characters==null) return "";
Set keys = characters.keySet();
Iterator it = keys.iterator();
while(it.hasNext()) {
String key = (String) it.next();
String regex = EscapeChars.forRegex(key);
String replacement = (String) characters.get(key);
s = s.replaceAll(regex, replacement);
}
return s;
}
}
Класс EscapeChars
public final class EscapeChars {
/**
* Replace characters having special meaning in regular expressions
* with their escaped equivalents, preceded by a '\' character.
*
* <P>The escaped characters include :
*<ul>
*<li>.
*<li>\
*<li>?, * , and +
*<li>&
*<li>:
*<li>{ and }
*<li>[ and ]
*<li>( and )
*<li>^ and $
*</ul>
*/
public static String forRegex(String aRegexFragment){
final StringBuilder result = new StringBuilder();
final StringCharacterIterator iterator = new StringCharacterIterator(aRegexFragment);
char character = iterator.current();
while (character != CharacterIterator.DONE ){
/*
* All literals need to have backslashes doubled.
*/
if (character == '.') {
result.append("\\.");
}
else if (character == '\\') {
result.append("\\\\");
}
else if (character == '?') {
result.append("\\?");
}
else if (character == '*') {
result.append("\\*");
}
else if (character == '+') {
result.append("\\+");
}
else if (character == '&') {
result.append("\\&");
}
else if (character == ':') {
result.append("\\:");
}
else if (character == '{') {
result.append("\\{");
}
else if (character == '}') {
result.append("\\}");
}
else if (character == '[') {
result.append("\\[");
}
else if (character == ']') {
result.append("\\]");
}
else if (character == '(') {
result.append("\\(");
}
else if (character == ')') {
result.append("\\)");
}
else if (character == '^') {
result.append("\\^");
}
else if (character == '$') {
result.append("\\$");
}
else {
//the char is not a special one
//add it to the result as is
result.append(character);
}
character = iterator.next();
}
return result.toString();
}
}