Ошибка OSE: невозможно идентифицировать файл изображения <объект _io.BytesIO по адресу 0x00000146C3A786A8> - PullRequest
0 голосов
/ 27 апреля 2020

Я пытаюсь запустить amaract textract, используя корзину S3, сохраняя файлы PDF в корзине. Вот пример кода, который я использую, но во время работы я получаю следующую ошибку:

C: \ Users \ Lenovo \ Desktop> python s3demo.py

Traceback ( последний вызов был последним):

Файл "s3demo.py", строка 130, в

main()

Файл "s3demo.py", строка 126, в основном

block_count=process_text_analysis(bucket,document)

Файл "s3demo.py", строка 68, в process_text_analysis

image=Image.open(stream)

Файл "C: \ Users \ Lenovo \ Anaconda3 \ lib \ site-packages \ PIL \ Image.py", строка 2818, в открытом

raise IOError("cannot identify image file %r" % (filename if filename else fp))

OSError: невозможно идентифицировать файл изображения <_io.BytesIO объект в 0x00000146C3A786A8>

Ниже приведен фрагмент кода:

import boto3
import io
from io import BytesIO
import sys

import math
from PIL import Image, ImageDraw, ImageFont

def ShowBoundingBox(draw,box,width,height,boxColor):

    left = width * box['Left']
    top = height * box['Top'] 
    draw.rectangle([left,top, left + (width * box['Width']), top +(height * box['Height'])],outline=boxColor)   

def ShowSelectedElement(draw,box,width,height,boxColor):

    left = width * box['Left']
    top = height * box['Top'] 
    draw.rectangle([left,top, left + (width * box['Width']), top +(height * box['Height'])],fill=boxColor)  

# Displays information about a block returned by text detection and text analysis
def DisplayBlockInformation(block):
    print('Id: {}'.format(block['Id']))
    if 'Text' in block:
        print('    Detected: ' + block['Text'])
    print('    Type: ' + block['BlockType'])

    if 'Confidence' in block:
        print('    Confidence: ' + "{:.2f}".format(block['Confidence']) + "%")

    if block['BlockType'] == 'CELL':
        print("    Cell information")
        print("        Column:" + str(block['ColumnIndex']))
        print("        Row:" + str(block['RowIndex']))
        print("        Column Span:" + str(block['ColumnSpan']))
        print("        RowSpan:" + str(block['ColumnSpan']))    

    if 'Relationships' in block:
        print('    Relationships: {}'.format(block['Relationships']))
    print('    Geometry: ')
    print('        Bounding Box: {}'.format(block['Geometry']['BoundingBox']))
    print('        Polygon: {}'.format(block['Geometry']['Polygon']))

    if block['BlockType'] == "KEY_VALUE_SET":
        print ('    Entity Type: ' + block['EntityTypes'][0])

    if block['BlockType'] == 'SELECTION_ELEMENT':
        print('    Selection element detected: ', end='')

    if block['SelectionStatus'] =='SELECTED':
        print('Selected')
    else:
        print('Not selected')    

    if 'Page' in block:
        print('Page: ' + block['Page'])
    print()

def process_text_analysis(bucket, document):

#Get the document from S3
    s3_connection = boto3.resource('s3')

    s3_object = s3_connection.Object(bucket,document)
    s3_response = s3_object.get()

    stream = io.BytesIO(s3_response['Body'].read())
    image=Image.open(stream)

# Analyze the document
    client = boto3.client('textract')

    image_binary = stream.getvalue()
    response = client.analyze_document(Document={'Bytes': image_binary},
    FeatureTypes=["TABLES", "FORMS"])


# Alternatively, process using S3 object
#response = client.analyze_document(
#    Document={'S3Object': {'Bucket': bucket, 'Name': document}},
#    FeatureTypes=["TABLES", "FORMS"])


#Get the text blocks
    blocks=response['Blocks']
    width, height =image.size  
    draw = ImageDraw.Draw(image)  
    print('Detected Document Text')

# Create image showing bounding box/polygon the detected lines/text
    for block in blocks:

        DisplayBlockInformation(block)

        draw=ImageDraw.Draw(image)
        if block['BlockType'] == "KEY_VALUE_SET":
            if block['EntityTypes'][0] == "KEY":
                ShowBoundingBox(draw, block['Geometry']['BoundingBox'],width,height,'red')
        else:
            ShowBoundingBox(draw, block['Geometry']['BoundingBox'],width,height,'green')  

        if block['BlockType'] == 'TABLE':
            ShowBoundingBox(draw, block['Geometry']['BoundingBox'],width,height, 'blue')

        if block['BlockType'] == 'CELL':
            ShowBoundingBox(draw, block['Geometry']['BoundingBox'],width,height, 'yellow')
        if block['BlockType'] == 'SELECTION_ELEMENT':
            if block['SelectionStatus'] =='SELECTED':
                ShowSelectedElement(draw, block['Geometry']['BoundingBox'],width,height, 'blue')    

        #uncomment to draw polygon for all Blocks
        #points=[]
        #for polygon in block['Geometry']['Polygon']:
        #    points.append((width * polygon['X'], height * polygon['Y']))
        #draw.polygon((points), outline='blue')

# Display the image
    image.show()
    return len(blocks)


def main():

    bucket = 'textract-console-ap-southeast-1-e404965c-91e4-4f9e-89fb-242b7c5'
    document = 'Invoice_Sample2.pdf'
    block_count=process_text_analysis(bucket,document)
    print("Blocks detected: " + str(block_count))

if __name__ == "__main__":
    main()

Любой Помощь будет высоко ценится.

...