Мой код
json_fi = glob.glob("*.json")
table_list=[]
for filename in json_fi:
with open(filename, encoding='utf-8', mode='r') as i:
data = pd.read_json(i, lines=True)
table_list.append(data)
for table in table_list:
writer = append_to_parquet_table(table, filepath, writer)
if writer:
writer.close()
Метод append_to_parquet
def append_to_parquet_table(dataframe, filepath=None, writer=None):
table = pa.Table.from_pandas(dataframe, preserve_index=False)
if writer is None:
writer = pq.ParquetWriter(filepath, table.schema)
writer.write_table(table=table)
return writer
Получена ошибка
File "p0.py", line 21, in append_to_parquet_table
writer.write_table(table=table)
File "/Users/milenko/djikii/venv/lib/python3.8/site-packages/pyarrow/parquet.py", line 590, in write_table
raise ValueError(msg)
Я смотрел parquet.py
def write_table(self, table, row_group_size=None):
if self.schema_changed:
table = _sanitize_table(table, self.schema, self.flavor)
assert self.is_open
if not table.schema.equals(self.schema, check_metadata=False):
msg = ('Table schema does not match schema used to create file: '
'\ntable:\n{!s} vs. \nfile:\n{!s}'
.format(table.schema, self.schema))
raise ValueError(msg)
Что такое self.schema? Я написал свой паркетный файл, и .metadata и .schema выглядят нормально. Почему у меня ошибка значения? Как это исправить?