Вот самописная версия ObjC, которая работает в большинстве случаев. Он определяет unescaped
метод для NSString.
#import <Cocoa/Cocoa.h>
@interface NSString (BackslashUnescaping)
- (NSString*) unescaped;
@end
@implementation NSString (BackslashUnescaping)
- (NSString*) unescaped
{
NSString *text = self;
NSInteger charIndex = 0;
while (true) {
NSInteger textLen = text.length;
if (charIndex >= textLen) {
break;
}
NSRange remainingRange = NSMakeRange (charIndex, textLen-charIndex);
NSRange charRange = [text rangeOfString:@"\\"options:0 range:remainingRange];
if (charRange.length == 0) {
// no more backslashes -> done
break;
}
charIndex = charRange.location + 1;
if (charIndex >= textLen) {
// reached end of string -> exit loop
break;
}
// check char following the backslash
unichar nextChar = [text characterAtIndex:charIndex];
unichar replacementChar;
NSInteger skipLen = 1;
if (nextChar >= 'a' && nextChar <= 'z') {
if (nextChar == 'n') {
replacementChar = '\n'; // LF
} else if (nextChar == 'r') {
replacementChar = '\r'; // CR
} else if (nextChar == 't') {
replacementChar = '\t'; // TAB
} else if (nextChar == 'x') {
// A hex char code
const NSInteger xtraLen = 2;
if (charIndex+xtraLen >= textLen) break;
// Note: Does not make sure that both chars are valid hex chars
NSString *code = [text substringWithRange:NSMakeRange(charIndex+1, 2)];
char ch = strtol(code.UTF8String, NULL, 16);
replacementChar = ch;
skipLen += xtraLen;
} else if (nextChar == 'u') {
// A unicode char code
const NSInteger xtraLen = 4;
if (charIndex+xtraLen >= textLen) break;
// Note: Does not make sure that all four chars are valid hex chars
NSString *code = [text substringWithRange:NSMakeRange(charIndex+1, 4)];
unichar ch = strtol(code.UTF8String, NULL, 16);
replacementChar = ch;
skipLen += xtraLen;
} else {
// an unknown escape code - this should be fixed
NSAssert(false, @"There's a case missing for escaping \\%c", nextChar);
}
} else if (nextChar >= '0' && nextChar <= '9') {
unichar nextChar2 = 0;
if (charIndex > textLen) { // get the second octal char
nextChar2 = [text characterAtIndex:charIndex+1];
}
if (nextChar == '0' && (nextChar2 < '0' || nextChar2 > '9')) {
// A short NUL (\0) char
replacementChar = 0;
} else {
// An octal char code
const NSInteger xtraLen = 2;
if (charIndex+xtraLen >= textLen) break;
// Note: Does not make sure that the last char is a valid octal char
NSString *code = [text substringWithRange:NSMakeRange(charIndex, 3)];
char ch = strtol(code.UTF8String, NULL, 8); // https://stackoverflow.com/a/12820646/43615
replacementChar = ch;
skipLen += xtraLen;
}
} else {
// Handle all generic escapes, like for \\ and \"
replacementChar = nextChar;
}
#if 0 // Use string concatenation
charIndex += skipLen-1;
NSString *head = [text substringToIndex:charRange.location];
NSString *tail = [text substringFromIndex:charIndex+1];
text = [NSString stringWithFormat:@"%@%C%@", head, replacementChar, tail];
#else // Use a mutable string
if (text == self) {
text = text.mutableCopy;
}
NSRange replacedRange = NSMakeRange(charRange.location, skipLen+1);
NSString *replacement = [NSString stringWithCharacters:&replacementChar length:1];
[(NSMutableString*)text replaceCharactersInRange:replacedRange withString:replacement];
charIndex += 1;
#endif
}
return text;
}
@end
int main(int argc, const char * argv[]) {
@autoreleasepool {
NSArray *testValues = @[
@"CR:\\rLF:\\n",
@"\\\"quoted\\\"",
@"Backslash: \\\\",
@"Octal x (\170):\\170",
@"Hex x (\x78):\\x78",
@"Unicode Ф (\u0424):\\u0424",
@"NUL char:\\0.",
@"Bad Hex:\\x7x", // this is not detected being invalid
@"Bad Hex:\\x7",
@"Incomplete :\\13"
];
for (NSString *s in testValues) {
NSString *s2 = [NSString stringWithFormat:@"Escaped: %@\nUnescaped: %@", s, s.unescaped];
printf("\n%s\n", s2.UTF8String);
}
}
return 0;
}