Python: UnicodeEncodeError: кодек «ascii» не может кодировать символы - PullRequest
0 голосов
/ 19 февраля 2019

Я пытаюсь записать фрейм данных в базу данных oracle sql.

Мой входной файл - файл xlsx, поэтому я использовал функцию read_excel, чтобы сначала поместить его в качестве фрейма данных:

df = pd.read_excel('myfile.xlsx', 
                   skiprows=20, 
                   encoding='utf-8',
                   charset='utf-8',
                   skipinitialspace=False,
                   names = ["v1","v2","v3"],
                   dtype = { "v1": str,
                             "v2": np.int64,
                             "v3": np.float64,},
                   sheet_name='data1')`

Вот скрипт, который я использую для записи в таблицу sql: insertDfToDB(myconnexion, df, 'table')

Здесь у меня ошибка: UnicodeEncodeError: 'ascii' codec can't encode characters in position 8-13: ordinal not in range(128)

Я использую python 3 в среде anaconda.

здесь моя функция для записи dataFrame в базу данных оракула:

import sys
import cx_Oracle
import pandas as pd
from sqlalchemy import types, create_engine, text

%load_ext autoreload
%autoreload 2
%matplotlib inline
def printf (format,*args):
sys.stdout.write (format % args)


def printException (exception):
    error, = exception.args
    printf ("Error code = %s\n",error.code);
    printf ("Error message = %s\n",error.message);

def sqlFromFile(fn):
    #read SQL from File
    f = open(fn)
    sqlFile = f.read()
    f.close()

    #Split Commands
    sqlCmds = sqlFile.split(';')

    #
    for cmd in sqlCmds:
        try:
            c.execute(cmd)
        except cx_Oracle.DatabaseError as e:
            print ("Command skipped: ", e)


def connectOraCx(host = None, sname = None, user = None, pw = None, port = 1521):

    con_str = """(DESCRIPTION=
    (ADDRESS=(PROTOCOL=tcp)(HOST={0})(PORT={2}))
    (CONNECT_DATA=(SERVICE_NAME ={1})))""".format(host, sname, port)

    try:
        con = cx_Oracle.connect (user, pw, con_str)
        return con

    except cx_Oracle.DatabaseError as e:
        printf ('Failed to connect to %s\n',sname)
        printException (e)
        sys.exit(1)

def connectOraAlq(host = None, sname = None, user = None, pw = None , port = 1521):

    con_str = "oracle+cx_oracle://{0}:{1}@{2}:{4}/?service_name={3}".format(user, pw, host, sname, port)

    try:
        oracle_db = create_engine(con_str)
        con = oracle_db.connect()  
        return con

    except cx_Oracle.DatabaseError as e:
        printf ('Failed to connect to %s\n',sname)
        printException (e)
        sys.exit(1)   


def insertDfToDB(con, df, tn):     
    dtyp = {c:types.VARCHAR(df[c].str.len().max())
        for c in df.columns[(df.dtypes == 'object')].tolist()}
    df.to_sql(tn, con, index = False, if_exists='append', dtype=dtyp, chunksize = 5000)

Здесь трассировка ошибки

---------------------------------------------------------------------------
UnicodeEncodeError                        Traceback (most recent call last)
<ipython-input-49-6a85449f92e4> in <module>()
----> 1 insertDfToDB(con_AA_alq, open_supplier_order,'open_supplier_order')

<ipython-input-10-e974cf09feca> in insertDfToDB(con, df, tn)
     66     dtyp = {c:types.VARCHAR(df[c].str.len().max())
     67         for c in df.columns[(df.dtypes == 'object')].tolist()}
---> 68     df.to_sql(tn, con, index = False, if_exists='append', dtype=dtyp, chunksize = 5000)

/ds_data/anaconda3/lib/python3.6/site-packages/pandas/core/generic.py in to_sql(self, name, con, schema, if_exists, index, index_label, chunksize, dtype)
   2128         sql.to_sql(self, name, con, schema=schema, if_exists=if_exists,
   2129                    index=index, index_label=index_label, chunksize=chunksize,
-> 2130                    dtype=dtype)
   2131 
   2132     def to_pickle(self, path, compression='infer',

/ds_data/anaconda3/lib/python3.6/site-packages/pandas/io/sql.py in to_sql(frame, name, con, schema, if_exists, index, index_label, chunksize, dtype)
    448     pandas_sql.to_sql(frame, name, if_exists=if_exists, index=index,
    449                       index_label=index_label, schema=schema,
--> 450                       chunksize=chunksize, dtype=dtype)
    451 
    452 

/ds_data/anaconda3/lib/python3.6/site-packages/pandas/io/sql.py in to_sql(self, frame, name, if_exists, index, index_label, schema, chunksize, dtype)
   1125                          schema=schema, dtype=dtype)
   1126         table.create()
-> 1127         table.insert(chunksize)
   1128         if (not name.isdigit() and not name.islower()):
   1129             # check for potentially case sensitivity issues (GH7815)

/ds_data/anaconda3/lib/python3.6/site-packages/pandas/io/sql.py in insert(self, chunksize)
    639 
    640                 chunk_iter = zip(*[arr[start_i:end_i] for arr in data_list])
--> 641                 self._execute_insert(conn, keys, chunk_iter)
    642 
    643     def _query_iterator(self, result, chunksize, columns, coerce_float=True,

/ds_data/anaconda3/lib/python3.6/site-packages/pandas/io/sql.py in _execute_insert(self, conn, keys, data_iter)
    614     def _execute_insert(self, conn, keys, data_iter):
    615         data = [{k: v for k, v in zip(keys, row)} for row in data_iter]
--> 616         conn.execute(self.insert_statement(), data)
    617 
    618     def insert(self, chunksize=None):

/ds_data/anaconda3/lib/python3.6/site-packages/sqlalchemy/engine/base.py in execute(self, object, *multiparams, **params)
    946             raise exc.ObjectNotExecutableError(object)
    947         else:
--> 948             return meth(self, multiparams, params)
    949 
    950     def _execute_function(self, func, multiparams, params):

/ds_data/anaconda3/lib/python3.6/site-packages/sqlalchemy/sql/elements.py in _execute_on_connection(self, connection, multiparams, params)
    267     def _execute_on_connection(self, connection, multiparams, params):
    268         if self.supports_execution:
--> 269             return connection._execute_clauseelement(self, multiparams, params)
    270         else:
    271             raise exc.ObjectNotExecutableError(self)

/ds_data/anaconda3/lib/python3.6/site-packages/sqlalchemy/engine/base.py in _execute_clauseelement(self, elem, multiparams, params)
   1058             compiled_sql,
   1059             distilled_params,
-> 1060             compiled_sql, distilled_params
   1061         )
   1062         if self._has_events or self.engine._has_events:

/ds_data/anaconda3/lib/python3.6/site-packages/sqlalchemy/engine/base.py in _execute_context(self, dialect, constructor, statement, parameters, *args)
   1198                 parameters,
   1199                 cursor,
-> 1200                 context)
   1201 
   1202         if self._has_events or self.engine._has_events:

/ds_data/anaconda3/lib/python3.6/site-packages/sqlalchemy/engine/base.py in _handle_dbapi_exception(self, e, statement, parameters, cursor, context)
   1414                 )
   1415             else:
-> 1416                 util.reraise(*exc_info)
   1417 
   1418         finally:

/ds_data/anaconda3/lib/python3.6/site-packages/sqlalchemy/util/compat.py in reraise(tp, value, tb, cause)
    185         if value.__traceback__ is not tb:
    186             raise value.with_traceback(tb)
--> 187         raise value
    188 
    189 else:

/ds_data/anaconda3/lib/python3.6/site-packages/sqlalchemy/engine/base.py in _execute_context(self, dialect, constructor, statement, parameters, *args)
   1168                         statement,
   1169                         parameters,
-> 1170                         context)
   1171             elif not parameters and context.no_parameters:
   1172                 if self.dialect._has_events:

/ds_data/anaconda3/lib/python3.6/site-packages/sqlalchemy/dialects/oracle/cx_oracle.py in do_executemany(self, cursor, statement, parameters, context)
    850         if isinstance(parameters, tuple):
    851             parameters = list(parameters)
--> 852         cursor.executemany(statement, parameters)
    853 
    854     def do_begin_twophase(self, connection, xid):

UnicodeEncodeError: 'ascii' codec can't encode characters in position 8-13: ordinal not in range(128)
...