Я пытаюсь выполнить оптическое распознавание изображений на региональных языках, которые поддерживаются Google Vision API.Однако я не могу указать несколько языков, которые будут извлечены из изображения, например en ---- english, hi ------ hindi.Ниже дан мой код:
import os
import sys
import json
import time
import base64
import urllib
import urllib2
def main():
if len(sys.argv) != 2:
print 'Usage: python {} [api-key]'.format(sys.argv[0])
return
to_check = ('inputs', 'processed', 'outputs')
if any([not os.path.exists(x) for x in to_check]):
print 'The script expects the following folders to exist: {}'.format(', '.join(to_check))
answer = None
while answer is None or answer.lower() not in ['y', 'n']:
answer = raw_input('Do you want the script to create them? [yn] ').lower()
if answer == 'n':
return
for folder in to_check:
if not os.path.exists(folder):
os.makedirs(folder)
print 'Using API key: {}\n'.format(sys.argv[1])
before = []
while True:
files = os.listdir('inputs')
added = [f for f in files if f not in before]
removed = [f for f in before if f not in files]
print 'Added:', added
print 'Removed:', removed
print 'Processing new files..'
for file in added:
parts = os.path.splitext(os.path.basename(file))
text_path = os.path.join('outputs', '.'.join(parts[:-1]) +
'.txt')
processed_path = os.path.join('processed',
os.path.basename(file))
if os.path.exists(text_path):
print('Output file already exists, just moving image.')
os.rename(os.path.join('inputs', file), processed_path)
else:
process(os.path.join('inputs', file), text_path)
os.rename(os.path.join('inputs', file), processed_path)
print('---')
before = files
time.sleep(5)
def process(fname, output):
print('Processing {}'.format(fname))
url = 'https://vision.googleapis.com/v1/images:annotate?' +
urllib.urlencode({'key': sys.argv[1]
})
payload = json.dumps(get_payload([fname], 5)).encode('utf-8')
request = urllib2.Request(url)
request.add_header('Content-Type', 'application/json')
request.add_header('Content-Length', len(payload))
try:
response = json.loads(urllib2.urlopen(request, payload).read())
except urllib2.HTTPError as e:
print('Wrong api key. Please check it.')
print(e.read())
sys.exit(1)
return
text_response = response['responses'][0]
text = text_response['fullTextAnnotation']['text']
with open(output, 'wb+') as file:
file.write(text.encode('utf-8'))
print('Done! Text written to {}'.format(output))
def get_payload(paths, max_results):
requests = []
for path in paths:
with open(path, 'rb') as file:
content = {'content':
base64.b64encode(file.read()).decode('utf-8')}
requests.append({
'image': content,
'features': [{'type': 'TEXT_DETECTION', 'maxResults': max_results}]
})
return {'requests': requests}
if __name__ == '__main__':
main()