попробуй использовать ascii encode.
я использую этот код для определения ru \ en языков в моем проекте социального бота
function language($string) {
$ru = array("208","209","208176","208177","208178","208179","208180","208181","209145","208182","208183","208184","208185","208186","208187","208188","208189","208190","208191","209128","209129","209130","209131","209132","209133","209134","209135","209136","209137","209138","209139","209140","209141","209142","209143");
$en = array("97","98","99","100","101","102","103","104","105","106","107","108","109","110","111","112","113","114","115","116","117","118","119","120","121","122");
$htmlcharacters = array("<", ">", "&", "<", ">", "&");
$string = str_replace($htmlcharacters, "", $string);
//Strip out the slashes
$string = stripslashes($string);
$badthings = array("=", "#", "~", "!", "?", ".", ",", "<", ">", "/", ";", ":", '"', "'", "[", "]", "{", "}", "@", "$", "%", "^", "&", "*", "(", ")", "-", "_", "+", "|", "`");
$string = str_replace($badthings, "", $string);
$string = mb_strtolower($string);
$msgarray = explode(" ", $string);
$words = count($msgarray);
$letters = str_split($msgarray[0]);
$letters = ToAscii($letters[0]);
$brackets = array("[",",","]");
$letters = str_replace($brackets, "", $letters);
if (in_array($letters, $ru)) {
$result = 'Русский' ; //russian
} elseif (in_array($letters, $en)) {
$result = 'Английский'; //english
} else {
$result = 'ошибка' . $letters; //error
}} return $result;