Azure распознавание Computer Vision, объединение только текстовых значений из вложенного json вывода - PullRequest
0 голосов
/ 29 апреля 2020

В настоящее время я использую API Vision, предоставляемый Azure, и выходные данные, которые я получаю от OCR Api, выглядят так ...

enter code here

{"language" : "es", "textAngle": 0.0, "direction": "Up", "region": [{"boundingBox": "11,14,437,515", "lines": [{"boundingBox": "12,14,394, 17 "," words ": [{" boundingBox ":" 12,15,27,12 "," text ":" Con "}, {" boundingBox ":" 44,18,26,9 "," text " : "una"}, {"boundingBox": "75,16,43,11", "text": "careta"}, {"boundingBox": "122,16,91,14", "text": " transparente "}, {" boundingBox ":" 217,18,8,13 "," text ":" y "}, {" boundingBox ":" 229,15,57,15 "," text ":" cargado " }, {"boundingBox": "291,18,16,9", "text": "en"}, {"boundingBox": "313,14,20,13", "text": "los"}, {"boundingBox": "338,14,47,13", "text": "brazos"}, {"boundingBox": "389,14,17,13", "text": "de"}]}, {"boundingBox": "12,37,431,15", "words": [{"boundingBox": "12,37,44,14", "text": "Pedro,"}, {"boundingBox": "61 , 40,16,9 "," text ":" su "}, {" boundingBox ":" 82,37,43,15 ", "text": "padre,"}, {"boundingBox": "129,37,35,12", "text": "José"}, {"boundingBox": "169,37,31,14", " text ":" Luis, "}, {" boundingBox ":" 204,37,12,12 "," text ":" el "}, {" boundingBox ":" 221,37,34,12 "," text ":" bebé "}, {" boundingBox ":" 260,38,73,14 "," text ":" prematuro "}, {" boundingBox ":" 337,40,26,9 "," text ": "con"}, {"boundingBox": "368,40,25,9", "text": "una"}, {"boundingBox": "398,37,45,12", "text": "madre "}]}, {" boundingBox ":" 12,59,382,16 "," words ": [{" boundingBox ":" 12,59,78,16 "," text ":" contagiada "}, {" boundingBox ":" 95,59,17,12 "," text ":" de "}, {" boundingBox ":" 117,59,64,12 "," text ":" Covid-19 "}, {" boundingBox ":" 186,62,7,13 "," text ":" y "}, {" boundingBox ":" 198,59,38,16 "," text ":" quien "}, {" boundingBox ": "240,59,52,12", "text": "falleció"}, {"boundingBox": "297,59,59,16", "text": "después"}, {"boundingBox": "361 , 59,17,12 "," text ":" de "}, {" boundingBox ":" 383,59,11,12 "," text ":" la "}]}, {" boundingBox ":" 12 , 81,227,16 "," words ": [{" boundin gBox ":" 12,81,59,14 "," text ":" cesárea, "}, {" boundingBox ":" 75,81,33,12 "," text ":" salió "}, {" boundingBox ":" 113,81,20,12 "," text ":" del "}, {" boundingBox ":" 138,81,50,16 "," text ":" Seguro "}, {" boundingBox ": «192,81,47,12», «текст»: «Социальный». }]}, {"boundingBox": "12,122,425,17", "words": [{"boundingBox": "12,123,49,12", "text": "Debido"}, {"boundingBox": "66,126, 8,9 "," text ":" a "}, {" boundingBox ":" 78,126,26,13 "," text ":" que "}, {" boundingBox ":" 109,123,47,12 "," text ":" Estaba "}, {" boundingBox ":" 161,126,30,13 "," text ":" muy "}, {" boundingBox ":" 196,126,38,13 "," text ":" grave " }, {"boundingBox": "239,126,49,13", "text": "porque"}, {"boundingBox": "292,122,40,13", "text": "sufrió"}, {"boundingBox" : "337,123,87,16", "text": "preclampsia"}, {"boundingBox": "429,128,8,2", "text": "-"}]}, {"boundingBox": "12,145,413, 16 "," слова ": [{ "boundingBox": "12,145,111,15", "text": "осложнения"}, {"boundingBox": "128,148,46,13", "текст": "могилы}}, {" boundingBox ":" 179,148,16 , 9 "," text ":" en "}, {" boundingBox ":" 200,145,12,12 "," text ":" el "}, {" boundingBox ":" 217,145,70,12 "," text ":" embarazo "}, {" boundingBox ":" 292,148,23,12 "," text ":" por "}, {" boundingBox ":" 319,145,52,16 "," text ":" presión "} , {"boundingBox": "376,145,49,12", "text": "arterial"}]}, {"boundingBox": "12,166,425,16", "words": [{"boundingBox": "12,167,68 , 14 "," text ":" elevada—, "}, {" boundingBox ":" 85,167,11,12 "," text ":" la "}, {" boundingBox ":" 101,167,45,12 ", "text": "madre"}, {"boundingBox": "150,166,23,13", "text": "fue"}, {"boundingBox": "178,167,48,12", "text": "llevada "}, {" boundingBox ":" 231,167,17,12 "," text ":" de "}, {" boundingBox ":" 253,167,60,15 "," text ":" urgencia "}, {" boundingBox ":" 318,167,43,12 "," text ":" desde "}, {" boundingBox ":" 366,167,71,14 "," text ":" Monclova, "}]}, {" boundingBox ":" 12,189,369,16 "," w ords ": [{" boundingBox ":" 12,189,44,12 "," text ":" donde "}, {" boundingBox ":" 61,189,49,14 "," text ":" deposía, "}, { "boundingBox": "114,192,8,13", "text": "y"}, {"boundingBox": "127,192,25,12", "text": "que"}, {"boundingBox": "157,192 , 16,9 "," text ":" es "}, {" boundingBox ":" 178,189,66,15 "," text ":" epicentro "}, {" boundingBox ":" 248,189,17,12 ", "text": "de"}, {"boundingBox": "270,189,11,12", "text": "la"}, {"boundingBox": "286,189,70,16", "text": "pandemia "}, {" boundingBox ":" 361,189,20,12 "," text ":" del "}]}, {" boundingBox ":" 12,211,176,12 "," words ": [{" boundingBox ":" 12,211 , 84,12 "," text ":" coronavirus "}, {" boundingBox ":" 101,214,16,9 "," text ":" en "}, {" boundingBox ":" 122,211,66,12 ", "текст": "Коауила". }]}, {"boundingBox": "12,252,430,17", "words": [{"boundingBox": "12,252,17,13", "text": "\" El "}, {" boundingBox ":" 35,252,51,15 "," text ":" infante, "}, {" boundingBox ":" 91,253,17,12 "," text ":" de "}, {" boundingBox ":" 113,256,58,9 "," text ":" escasos "}, {" boundingBox ":" 176,253,17,12 "," text ":" 18 "}, {" boundingBox ":" 198,252,29,13 "," text ": "días"}, {"boundingBox": "232,252,17,13", "text": "de"}, {"boundingBox": "253,252,51,15", "text": "nacido,"}, {"boundingBox": "308,256,16,9", "text": "es"}, {"boundingBox": "329,253,11,12", "text": "el"}, {"boundingBox": " 346,253,45,15 "," text ":" primer "}, {" boundingBox ":" 396,252,24,17 "," text ":" hijo "}, {" boundingBox ":" 425,253,17,12 " , "text": "de"}]}, {"boundingBox": "12,274,432,17", "words": [{"boundingBox": "12,278,25,9", "text": "una"}, {"boundingBox": "42,275,45,12", "text": "madre"}, {"boundingBox": "92,274,65,13", "text": "infectada"}, {"boundingBox": " 162,278,25,9 "," text ":" con "}, {" boundingBox ":" 192,27 5,83,12 "," text ":" coronavirus "}, {" boundingBox ":" 280,278,16,9 "," text ":" en "}, { "boundingBox": "301,275,11,12", "text": "el"}, {"boundingBox": "318,275,29,16", "text": "país"}, {"boundingBox": "351,278 , 8,13 "," text ":" y "}, {" boundingBox ":" 363,275,11,12 "," text ":" él "}, {" boundingBox ":" 379,275,30,12 ", "text": "está"}, {"boundingBox": "414,275,30,12", "text": "libre"}]}, {"boundingBox": "12,297,413,16", "words": [{ "boundingBox": "12,297,20,12", "text": "del"}, {"boundingBox": "37,297,43,14", "text": "virus \", "}, {" boundingBox " : "84,297,57,12", "text": "destacó"}, {"boundingBox": "146,297,11,12", "text": "la"}, {"boundingBox": "162,297,77, 16 "," text ":" Delegación "}, {" boundingBox ":" 244,297,20,12 "," text ":" del "}, {" boundingBox ":" 270,297,57,12 "," text " : "Instituto"}, {"boundingBox": "332,297,68,12", "text": "Mexicano"}, {"boundingBox": "405,297,20,12", "text": "del"}] }, {"boundingBox": "12,318,366,17", "words": [{"boundingBox": "12,319,49,16", "text": "Seguro"}, {"boundingBox": "66,319,42, 12 "," text ":" Social "}, {" boundin gBox ":" 114,318,51,17 "," text ":" (IMSS), "}, {" boundingBox ":" 170,322,16,9 "," text ":" en "}, {" boundingBox ": "191,322,17,9", "text": "un"}, {"boundingBox": "213,319,87,12", "text": "comunicado"}, {"boundingBox": "305,319,17,12 "," text ":" de "}, {" boundingBox ":" 327,322,51,13 "," text ":" prensa. "}]}, {" boundingBox ":" 11,360,406,17 "," words " : [{"boundingBox": "11,361,55,15", "text": "Aunque"}, {"boundingBox": "70,360,11,13", "text": "el"}, {"boundingBox" : "87,364,45,9", "text": "menor"}, {"boundingBox": "136,362,47,11", "text": "estuvo"}, {"boundingBox": "188,364,30, 13 "," text ":" muy "}, {" boundingBox ":" 222,364,42,13 "," text ":" grave, "}, {" boundingBox ":" 268,360,23,13 "," text ":" fue "}, {" boundingBox ":" 295,361,35,12 "," text ":" dado "}, {" boundingBox ":" 335,361,17,12 "," text ":" de "} , {"boundingBox": "356,360,26,13", "text": "alta"}, {"boundingBox": "387,362,30,11", "text": "este"}]}, {"boundingBox ":" 12,382,422,17 "," words ": [{" boundingBox ":" 12,384,50,11 " , "text": "martes"}, {"boundingBox": "66,383,15,12", "text": "21"}, {"boundingBox": "88,383,17,12", "text": " de "}, {" boundingBox ":" 110,383,29,12 "," text ":" abril "}, {" boundingBox ":" 145,383,20,12 "," text ":" del "}, {" boundingBox ":" 170,383,58,16 "," text ":" Hospital "}, {" boundingBox ":" 234,383,53,12 "," text ":" General "}, {" boundingBox ":" 292,383, 17,12 "," text ":" de "}, {" boundingBox ":" 313,383,35,12 "," text ":" Zona "}, {" boundingBox ":" 353,382,42,17 "," text ":" (HGZ) "}, {" boundingBox ":" 400,383,24,12 "," text ":" No. "}, {" boundingBox ":" 429,383,5,12 "," text ": "1"}]}, {"boundingBox": "12,404,135,13", "words": [{"boundingBox": "12,404,17,13", "text": "de"}, {"boundingBox": "34,405,36,12", "text": "IMSS"}, {"boundingBox": "75,408,16,9", "text": "en"}, {"boundingBox": "96,405,51,12 », "текст": "сальтильо". }]}, {"boundingBox": "12,447,415,16", "words": [{"boundingBox": "12,447,24,12", "text": "Por"}, {"boundingBox": "40,450, 16,9 "," text ":" su "}, {" boundingBox ":" 62,448,39,14 "," text ":" parte, "}, {" boundingBox ":" 106,450,16,9 ", "text": "su"}, {"boundingBox": "127,447,43,15", "text": "padre,"}, {"boundingBox": "174,450,26,12", "text": " que "}, {" boundingBox ":" 205,447,44,15 "," text ":" quedó "}, {" boundingBox ":" 253,447,41,14 "," text ":" viudo, "}, { "boundingBox": "299,450,25,9", "text": "con"}, {"boundingBox": "329,450,26,9", "text": "una"}, {"boundingBox": "360,447 , 24,16 "," text ":" hija "}, {" boundingBox ":" 389,447,17,12 "," text ":" de "}, {" boundingBox ":" 411,447,16,12 ", "text": "IO"}]}, {"boundingBox": "12,468,403,17", "words": [{"boundingBox": "12,469,37,14", "text": "años,"}, {"boundingBox": "54,469,30,12", "text": "está"}, {"boundingBox": "89,469,98,15", "text": "desempleado,"}, {"boundingBox": "192,472,31,12", "text": "pero"}, {"boundingBox": "228, 469,70,16 "," text ":" agradeció "}, {" boundingBox ":" 303,469,19,12 "," text ":" las "}, {" boundingBox ":" 327,470,66,11 " , "text": "muestras"}, {"boundingBox": "398,468,17,13", "text": "de"}]}, {"boundingBox": "12,491,436,16", "words": [ {"boundingBox": "12,491,44,12", "text": "cariño"}, {"boundingBox": "60,491,20,12", "text": "del"}, {"boundingBox": " 86,491,60,15 "," text ":" personal "}, {" boundingBox ":" 151,491,17,12 "," text ":" de "}, {" boundingBox ":" 173,491,11,12 " , "text": "la"}, {"boundingBox": "190,491,72,12", "text": "institución"}, {"boundingBox": "267,494,26,13", "text": " que "}, {" boundingBox ":" 298,491,38,12 "," text ":" cuidó "}, {" boundingBox ":" 341,494,8,9 "," text ":" a "}, {" boundingBox ":" 354,494,16,9 "," text ":" su "}, {" boundingBox ":" 375,491,61,16 "," text ":" pequeño "}, {" boundingBox ":" 440,494, 8,13 "," text ":" y "}]}, {" boundingBox ":" 12,513,179,16 "," words ": [{" boundingBox ":" 12,513,12,12 "," text ":" lo "}, {" boundingBox ":" 29,513,60,16 "," text ": "despidió"}, {"boundingBox": "93,516,26,9", "text": "con"}, {"boundingBox": "124,513,67,16", "text": "aplausos." }]}]}]}

Тем не менее, я пробовал дюжину различных способов, чтобы анализировать только значения из объекта «текст» и помещать его в одну переменную строку.

, пытаясь получить такие данные просто невозможно.

var text = data.regions[0].lines[0].words[0].text;
        console.log('text: '+text);

Почти как разбор строки за раз. Не имеет смысла.

Раньше у меня был скрипт «игла в стеке», который выполнял эту работу, но их PHP sdk был оставлен или оставлен в Github, так что вы знаете, как получить только значения, которые я хочу использовать javascript или jquery?

, большое спасибо и остаемся в безопасности.

ПРИМЕЧАНИЕ: Кстати, этот API отлично работает для текстов на испанском sh, в то время как другие сервисы не распознают латинские символы с акцентом. Спасибо.

Добро пожаловать на сайт PullRequest, где вы можете задавать вопросы и получать ответы от других членов сообщества.
...