Я работаю над сценарием, в котором отправляю фрейм данных в BigQuery:
load_job = bq_client.load_table_from_dataframe(
df, '.'.join([PROJECT, DATASET, PROGRAMS_TABLE])
)
# Wait for the load job to complete
return load_job.result()
Это работает нормально, но только если схема уже определена в BigQuery или если я определяю схема моей работы в моем сценарии. Если схема не была определена, у меня появляется следующая ошибка:
Traceback (most recent call last): File "/env/local/lib/python3.7/site-packages/google/cloud/bigquery/client.py", line 1661, in load_table_from_dataframe dataframe.to_parquet(tmppath, compression=parquet_compression) File "/env/local/lib/python3.7/site-packages/pandas/core/frame.py", line 2237, in to_parquet **kwargs File "/env/local/lib/python3.7/site-packages/pandas/io/parquet.py", line 254, in to_parquet **kwargs File "/env/local/lib/python3.7/site-packages/pandas/io/parquet.py", line 117, in write **kwargs File "/env/local/lib/python3.7/site-packages/pyarrow/parquet.py", line 1270, in write_table writer.write_table(table, row_group_size=row_group_size) File "/env/local/lib/python3.7/site-packages/pyarrow/parquet.py", line 426, in write_table self.writer.write_table(table, row_group_size=row_group_size) File "pyarrow/_parquet.pyx", line 1311, in pyarrow._parquet.ParquetWriter.write_table File "pyarrow/error.pxi", line 85, in pyarrow.lib.check_status pyarrow.lib.ArrowInvalid: Casting from timestamp[ns] to timestamp[ms] would lose data: 1578661876547574000 During handling of the above exception, another exception occurred: Traceback (most recent call last): File "/env/local/lib/python3.7/site-packages/google/cloud/functions/worker.py", line 383, in run_background_function _function_handler.invoke_user_function(event_object) File "/env/local/lib/python3.7/site-packages/google/cloud/functions/worker.py", line 217, in invoke_user_function return call_user_function(request_or_event) File "/env/local/lib/python3.7/site-packages/google/cloud/functions/worker.py", line 214, in call_user_function event_context.Context(**request_or_event.context)) File "/user_code/main.py", line 151, in main df = df(param1, param2) File "/user_code/main.py", line 141, in get_df df, '.'.join([PROJECT, DATASET, PROGRAMS_TABLE]) File "/env/local/lib/python3.7/site-packages/google/cloud/bigquery/client.py", line 1677, in load_table_from_dataframe os.remove(tmppath) FileNotFoundError: [Errno 2] No such file or directory: '/tmp/tmp_ps5xji9_job_634ff274.parquet'
Почему pyarrow
генерирует эту ошибку? Как я могу решить эту проблему, кроме предопределенной схемы?