Отключение распознавания при загрузке на Google Диск через API - PullRequest
0 голосов
/ 29 мая 2018

У меня возникла проблема с unsv, когда он периодически зависал на неопределенное время при попытке конвертировать случайные документы в PDF, поэтому я написал небольшой скрипт на python для загрузки документов в GDrive и загрузки их снова в виде PDF-файлов, чтобы обойти эту проблему.

Проблема, с которой я столкнулся, заключается в том, что google drive автоматически пытается загружать загруженные изображения, и я не хочу, чтобы это произошло, но я пока не могу найти документацию покак отключить OCR.

Я заметил одну вещь: я функция создания из v3 API , в API v2 есть функция вставки , которая принимает OCRфлаг.Возможно ли это с API v3?

Вот мой код:

    from __future__ import print_function
    import httplib2
    import magic
    import io
    import sys
    import argparse
    import subprocess as sp

    from apiclient import discovery
    from oauth2client.service_account import ServiceAccountCredentials
    from httplib2 import Http

    from googleapiclient.http import MediaFileUpload
    from googleapiclient.http import MediaIoBaseDownload

    from settings import *

    """
    This script exists to mask unoconv for JUST pdf conversion. If it gets flags for anything else, it will fallback on unoconv.

    Otherwise, it uploads the document to google drive, download it as a pdf, and then delete the file out of the drive.
    """

    MIMETYPE_MAPPING = {
        "application/vnd.openxmlformats-officedocument.wordprocessingml.document":"application/vnd.google-apps.document",
        "application/rtf":"application/vnd.google-apps.document",
        "text/richtext":"application/vnd.google-apps.document",
        "text/plain":"application/vnd.google-apps.document",
        "text/html":"application/vnd.google-apps.document",
        "application/vnd.oasis.opendocument.text":"application/vnd.google-apps.document",
        "application/x-iwork-pages-sffpages":"application/vnd.google-apps.document",
        "application/msword":"application/vnd.google-apps.document",

        "application/vnd.ms-excel":"application/vnd.google-apps.spreadsheets",
        "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet":"application/vnd.google-apps.spreadsheets",
        "text/csv":"application/vnd.google-apps.spreadsheets",
        "text/tab-separated-values":"application/vnd.google-apps.spreadsheets",
        "application/vnd.oasis.opendocument.spreadsheets":"application/vnd.google-apps.spreadsheets",
        "application/vnd.oasis.opendocument.spreadsheet":"application/vnd.google-apps.spreadsheets",

        "application/vnd.ms-powerpoint":"application/vnd.google-apps.presentation",
        "application/vnd.openxmlformats-officedocument.presentationml.presentationml":"application/vnd.google-apps.presentation",
        "application/vnd.oasis.opendocument.presentation":"application/vnd.google-apps.presentation",

        "image/png":"application/vnd.google-apps.document",
        "image/x-citrix-png":"application/vnd.google-apps.document",
        "image/x-png":"application/vnd.google-apps.document",
        "image/jpeg":"application/vnd.google-apps.document",
        "image/x-citrix-jpeg":"application/vnd.google-apps.document",
        "image/gif":"application/vnd.google-apps.document",
        "image/bmp":"application/vnd.google-apps.document",

        "application/pdf":"application/vnd.google-apps.document",
    }

    SERVICE = None

    def get_service():
        """
        Establishes the connection to the google drive APIs.
        """
        global SERVICE
        if SERVICE is None:
            credentials = ServiceAccountCredentials.from_json(JSON_KEY)
            http = http_auth = credentials.authorize(Http())
            SERVICE = discovery.build('drive', 'v3', http=http_auth)
        return SERVICE

    def drive_upload(fp, fn):
        """
        Uploads the file found at fp to root of google drive account as a google doc with name fn

        Returns the id of the new file
        """
        mimetype = magic.from_file(fp, mime=True)
        drive_service = get_service()
        file_metadata = {
            'name' : fn,
            'mimeType' : MIMETYPE_MAPPING.get(mimetype, 'application/vnd.google-apps.document'),
        } 
        media = MediaFileUpload(fp,
                                mimetype=mimetype,
                                resumable=True)
        import inspect
        print(inspect.getargspec(drive_service.files().create)[0])

        file = drive_service.files().create(body=file_metadata,
                                            media_body=media,
                                            fields='id').execute()
        return file.get('id')

    def download_pdf(file_id,dlp):
        """
        Downloads file from google drive specified by file_id to the filepath in dlp

        Will download file as pdf
        """
        drive_service = get_service()
        request = drive_service.files().export_media(fileId=file_id,
                                                     mimeType='application/pdf')
        resp = request.execute()
        f = open(dlp,'w')
        f.write(resp)
        f.close()

    def convert_to_pdf(inputf, outputf):
        """
        Converts input file to pdf located at output file and cleans up file from google drive
        """
        fid = drive_upload(inputf,inputf.split('/')[-1])
        download_pdf(fid,outputf)

        #Now delete the file from drive
        service = get_service()
        service.files().delete(fileId=fid).execute()

    def pass_through():
        """
        Calls unoconv with same args that were passed to this script
        """
        print("PASSING THROUGH",file=sys.stderr)
        cmd = PATH_TO_UNOCONV + " " + " ".join(sys.argv[1:])
        child = sp.Popen(cmd.split(), stdout=sp.PIPE, stderr=sp.PIPE)
        stdout, stderr = child.communicate()

        print(stdout,end='')
        print(stderr, file=sys.stderr,end='')
        sys.exit(child.returncode)

    class ArgParse(argparse.ArgumentParser):
        """
        This subclass of ArgumentParser exists to change the default behaviour of the exit function

        If the exit function is called with a status other than 0 (usually because unsupported flags are used),
            a call is made to pass_through let unoconv handle this call.
        """

        def exit(self, status=0,message=None):
            if status != 0:
                pass_through()
            else:
                return super(ArgParse,self).exit(status=status,message=message)

    if __name__ == '__main__':
        parser = ArgParse(description="Wrapper for unoconv that farms pdf conversions to google drive, using any args other than the supplied will cause it to fallback on unoconv")
        parser.add_argument('-f', metavar='format', help='Desired ouput format')
        parser.add_argument('-o', metavar='output_file',  help='Path to output file')
        parser.add_argument('fname', metavar='inputf', type=str, nargs=1, help='Path to file to convert')

        args = parser.parse_args()

        fmt = args.f
        output_file = args.o
        input_file = args.fname[0]

        if fmt.upper() == "PDF":
            try:
                convert_to_pdf(input_file, output_file)
            except:
                pass_through()
        else:
            #if we aren't converting the file to a PDF, let unoconv handle it
            pass_through()
Добро пожаловать на сайт PullRequest, где вы можете задавать вопросы и получать ответы от других членов сообщества.
...