Для разработчиков Java:
Я рекомендую для этого вам использовать Tesseract и Tess4j .
Вы можете найти пример того, как найти слова на изображении в одном из тестов Tess4j.
https://github.com/nguyenq/tess4j/blob/master/src/test/java/net/sourceforge/tess4j/TessAPITest.java#L449-L517
public void testResultIterator() throws Exception {
logger.info("TessBaseAPIGetIterator");
File tiff = new File(this.testResourcesDataPath, "eurotext.tif");
BufferedImage image = ImageIO.read(new FileInputStream(tiff)); // require jai-imageio lib to read TIFF
ByteBuffer buf = ImageIOHelper.convertImageData(image);
int bpp = image.getColorModel().getPixelSize();
int bytespp = bpp / 8;
int bytespl = (int) Math.ceil(image.getWidth() * bpp / 8.0);
api.TessBaseAPIInit3(handle, datapath, language);
api.TessBaseAPISetPageSegMode(handle, TessPageSegMode.PSM_AUTO);
api.TessBaseAPISetImage(handle, buf, image.getWidth(), image.getHeight(), bytespp, bytespl);
ETEXT_DESC monitor = new ETEXT_DESC();
TimeVal timeout = new TimeVal();
timeout.tv_sec = new NativeLong(0L); // time > 0 causes blank ouput
monitor.end_time = timeout;
ProgressMonitor pmo = new ProgressMonitor(monitor);
pmo.start();
api.TessBaseAPIRecognize(handle, monitor);
logger.info("Message: " + pmo.getMessage());
TessResultIterator ri = api.TessBaseAPIGetIterator(handle);
TessPageIterator pi = api.TessResultIteratorGetPageIterator(ri);
api.TessPageIteratorBegin(pi);
logger.info("Bounding boxes:\nchar(s) left top right bottom confidence font-attributes");
int level = TessPageIteratorLevel.RIL_WORD;
// int height = image.getHeight();
do {
Pointer ptr = api.TessResultIteratorGetUTF8Text(ri, level);
String word = ptr.getString(0);
api.TessDeleteText(ptr);
float confidence = api.TessResultIteratorConfidence(ri, level);
IntBuffer leftB = IntBuffer.allocate(1);
IntBuffer topB = IntBuffer.allocate(1);
IntBuffer rightB = IntBuffer.allocate(1);
IntBuffer bottomB = IntBuffer.allocate(1);
api.TessPageIteratorBoundingBox(pi, level, leftB, topB, rightB, bottomB);
int left = leftB.get();
int top = topB.get();
int right = rightB.get();
int bottom = bottomB.get();
/******************************************/
/* COORDINATES AND WORDS ARE PRINTED HERE */
/******************************************/
System.out.print(String.format("%s %d %d %d %d %f", word, left, top, right, bottom, confidence));
// logger.info(String.format("%s %d %d %d %d", str, left, height - bottom, right, height - top)); //
// training box coordinates
IntBuffer boldB = IntBuffer.allocate(1);
IntBuffer italicB = IntBuffer.allocate(1);
IntBuffer underlinedB = IntBuffer.allocate(1);
IntBuffer monospaceB = IntBuffer.allocate(1);
IntBuffer serifB = IntBuffer.allocate(1);
IntBuffer smallcapsB = IntBuffer.allocate(1);
IntBuffer pointSizeB = IntBuffer.allocate(1);
IntBuffer fontIdB = IntBuffer.allocate(1);
String fontName = api.TessResultIteratorWordFontAttributes(ri, boldB, italicB, underlinedB, monospaceB,
serifB, smallcapsB, pointSizeB, fontIdB);
boolean bold = boldB.get() == TRUE;
boolean italic = italicB.get() == TRUE;
boolean underlined = underlinedB.get() == TRUE;
boolean monospace = monospaceB.get() == TRUE;
boolean serif = serifB.get() == TRUE;
boolean smallcaps = smallcapsB.get() == TRUE;
int pointSize = pointSizeB.get();
int fontId = fontIdB.get();
logger.info(String.format(" font: %s, size: %d, font id: %d, bold: %b,"
+ " italic: %b, underlined: %b, monospace: %b, serif: %b, smallcap: %b", fontName, pointSize,
fontId, bold, italic, underlined, monospace, serif, smallcaps));
} while (api.TessPageIteratorNext(pi, level) == TRUE);
assertTrue(true);
}