Я работаю над заданием ETL. У меня есть два фрейма данных: один пришел из файла CSV, другой пришел из вызова API, который был преобразован в файл JSON. В настоящее время я пытаюсь загрузить фреймы данных в свою базу данных PostgreSQL, но продолжаю получать ошибки, которые я не понимаю, независимо от того, сколько я их гуглю.
Вот код и ошибка:
## Connecting to my local database [changed items (<>) to cover my info]
rds_connection_string = "<username>:<password>@localhost:5432/<database name>"
engine = create_engine(f'postgresql://{rds_connection_string}')
## Attempt to load CSV converted Dataframe into database
inspect_transformed.to_sql(name='Ny_Inspection', con=engine, if_exists='append', index=False)
##Error message (same error for JSON attempt)
---------------------------------------------------------------------------
InvalidTextRepresentation Traceback (most recent call last)
~\Anaconda3\envs\MyPyEnv\lib\site-packages\sqlalchemy\engine\base.py in _execute_context(self,
dialect, constructor, statement, parameters, *args)
1223 self.dialect.do_executemany(
-> 1224 cursor, statement, parameters, context
1225 )
~\Anaconda3\envs\MyPyEnv\lib\site-packages\sqlalchemy\dialects\postgresql\psycopg2.py in
do_executemany(self, cursor, statement, parameters, context)
772 else:
--> 773 cursor.executemany(statement, parameters)
774
InvalidTextRepresentation: invalid input syntax for integer: "NKA"
LINE 1: ... "Zip_Code", "Boro", "Grade") VALUES ('THE PALM', 'NKA', 'JF...
^
The above exception was the direct cause of the following exception:
DataError Traceback (most recent call last)
<ipython-input-15-0683923e3eec> in <module>
----> 1 inspect_transformed.to_sql(name='Ny_Inspection', con=engine, if_exists='append',
index=False)
~\Anaconda3\envs\MyPyEnv\lib\site-packages\pandas\core\generic.py in to_sql(self, name, con, schema,
if_exists, index, index_label, chunksize, dtype, method)
2711 chunksize=chunksize,
2712 dtype=dtype,
-> 2713 method=method,
2714 )
2715
~\Anaconda3\envs\MyPyEnv\lib\site-packages\pandas\io\sql.py in to_sql(frame, name, con, schema, if_exists, index, index_label, chunksize, dtype, method)
516 chunksize=chunksize,
517 dtype=dtype,
--> 518 method=method,
519 )
520
~\Anaconda3\envs\MyPyEnv\lib\site-packages\pandas\io\sql.py in to_sql(self, frame, name, if_exists, index, index_label, schema, chunksize, dtype, method)
1318 )
1319 table.create()
-> 1320 table.insert(chunksize, method=method)
1321 if not name.isdigit() and not name.islower():
1322 # check for potentially case sensitivity issues (GH7815)
~\Anaconda3\envs\MyPyEnv\lib\site-packages\pandas\io\sql.py in insert(self, chunksize, method)
754
755 chunk_iter = zip(*[arr[start_i:end_i] for arr in data_list])
--> 756 exec_insert(conn, keys, chunk_iter)
757
758 def _query_iterator(
~\Anaconda3\envs\MyPyEnv\lib\site-packages\pandas\io\sql.py in _execute_insert(self, conn, keys, data_iter)
668 """
669 data = [dict(zip(keys, row)) for row in data_iter]
--> 670 conn.execute(self.table.insert(), data)
671
672 def _execute_insert_multi(self, conn, keys, data_iter):
~\Anaconda3\envs\MyPyEnv\lib\site-packages\sqlalchemy\engine\base.py in execute(self, object_, *multiparams, **params)
986 raise exc.ObjectNotExecutableError(object_)
987 else:
--> 988 return meth(self, multiparams, params)
989
990 def _execute_function(self, func, multiparams, params):
~\Anaconda3\envs\MyPyEnv\lib\site-packages\sqlalchemy\sql\elements.py in _execute_on_connection(self, connection, multiparams, params)
285 def _execute_on_connection(self, connection, multiparams, params):
286 if self.supports_execution:
--> 287 return connection._execute_clauseelement(self, multiparams, params)
288 else:
289 raise exc.ObjectNotExecutableError(self)
~\Anaconda3\envs\MyPyEnv\lib\site-packages\sqlalchemy\engine\base.py in _execute_clauseelement(self, elem, multiparams, params)
1105 distilled_params,
1106 compiled_sql,
-> 1107 distilled_params,
1108 )
1109 if self._has_events or self.engine._has_events:
~\Anaconda3\envs\MyPyEnv\lib\site-packages\sqlalchemy\engine\base.py in _execute_context(self, dialect, constructor, statement, parameters, *args)
1246 except BaseException as e:
1247 self._handle_dbapi_exception(
-> 1248 e, statement, parameters, cursor, context
1249 )
1250
~\Anaconda3\envs\MyPyEnv\lib\site-packages\sqlalchemy\engine\base.py in _handle_dbapi_exception(self, e, statement, parameters, cursor, context)
1464 util.raise_from_cause(newraise, exc_info)
1465 elif should_wrap:
-> 1466 util.raise_from_cause(sqlalchemy_exception, exc_info)
1467 else:
1468 util.reraise(*exc_info)
~\Anaconda3\envs\MyPyEnv\lib\site-packages\sqlalchemy\util\compat.py in raise_from_cause(exception, exc_info)
397 exc_type, exc_value, exc_tb = exc_info
398 cause = exc_value if exc_value is not exception else None
--> 399 reraise(type(exception), exception, tb=exc_tb, cause=cause)
400
401
~\Anaconda3\envs\MyPyEnv\lib\site-packages\sqlalchemy\util\compat.py in reraise(tp, value, tb, cause)
151 value.__cause__ = cause
152 if value.__traceback__ is not tb:
--> 153 raise value.with_traceback(tb)
154 raise value
155
~\Anaconda3\envs\MyPyEnv\lib\site-packages\sqlalchemy\engine\base.py in _execute_context(self, dialect, constructor, statement, parameters, *args)
1222 if not evt_handled:
1223 self.dialect.do_executemany(
-> 1224 cursor, statement, parameters, context
1225 )
1226 elif not parameters and context.no_parameters:
~\Anaconda3\envs\MyPyEnv\lib\site-packages\sqlalchemy\dialects\postgresql\psycopg2.py in do_executemany(self, cursor, statement, parameters, context)
771 extras.execute_batch(cursor, statement, parameters)
772 else:
--> 773 cursor.executemany(statement, parameters)
774
775 @util.memoized_instancemethod
DataError: (psycopg2.errors.InvalidTextRepresentation) invalid input syntax for integer: "NKA"
LINE 1: ... "Zip_Code", "Boro", "Grade") VALUES ('THE PALM', 'NKA', 'JF...
^
[SQL: INSERT INTO "Ny_Inspection" ("Restaurant_Name", "Building", "Street", "Zip_Code", "Boro", "Grade") VALUES (%(Restaurant_Name)s, %(Building)s, %(Street)s, %(Zip_Code)s, %(Boro)s, %(Grade)s)]
[parameters: ({'Restaurant_Name': 'A&H DELI', 'Building': '431', 'Street': '7TH AVE', 'Zip_Code': 10001.0, 'Boro': 'Manhattan', 'Grade': 'A'}, {'Restaurant_Name': 'ANTOJITOS ECUATORIANOS', 'Building': '3398', 'Street': 'FULTON ST', 'Zip_Code': 11208.0, 'Boro': 'Brooklyn', 'Grade': 'A'}, {'Restaurant_Name': 'I LAND FISH & GRILL', 'Building': '7911', 'Street': 'FLATLANDS AVE', 'Zip_Code': 11236.0, 'Boro': 'Brooklyn', 'Grade': 'B'}, {'Restaurant_Name': 'CAFE LAFAYETTE', 'Building': '80', 'Street': 'LAFAYETTE STREET', 'Zip_Code': 10013.0, 'Boro': 'Manhattan', 'Grade': 'A'}, {'Restaurant_Name': 'FLY BAR', 'Building': '4224', 'Street': 'COLLEGE POINT BLVD', 'Zip_Code': 11355.0, 'Boro': 'Queens', 'Grade': 'A'}, {'Restaurant_Name': 'ACE HOT BAGEL & DELI', 'Building': '25305', 'Street': 'NORTHERN BOULEVARD', 'Zip_Code': 11362.0, 'Boro': 'Queens', 'Grade': 'A'}, {'Restaurant_Name': 'PIZZA CHEF', 'Building': '564', 'Street': 'WEST 235 STREET', 'Zip_Code': 10463.0, 'Boro': 'Bronx', 'Grade': 'A'}, {'Restaurant_Name': 'MANCORA BAR & APNA MASALA INDIAN CUISINE', 'Building': '344', 'Street': 'E 6TH ST', 'Zip_Code': 10003.0, 'Boro': 'Manhattan', 'Grade': 'A'} ... displaying 10 of 196370 total bound parameter sets ... {'Restaurant_Name': 'DINOSAUR BAR-B-QUE', 'Building': '2276', 'Street': '12 AVENUE', 'Zip_Code': 10027.0, 'Boro': 'Manhattan', 'Grade': 'A'}, {'Restaurant_Name': "MCDONALD'S", 'Building': '5713', 'Street': 'CHURCH AVENUE', 'Zip_Code': 11203.0, 'Boro': 'Brooklyn', 'Grade': 'A'})]
(Background on this error at: http://sqlalche.me/e/9h9h)
Я прочитал документацию и не понимаю проблему или не знаю, как ее исправить ... помогите?