При преобразовании в формат данных в формат hdf выдается ошибка. Я не мог понять причину этого. Я попытался заполнить значения nan и все еще выдает результаты с той же ошибкой.
Примечание: у меня есть только текстовые данные в моем фрейме данных. Нет цифр c только столбцы объекта.
df.to_hdf('df.h5', 'df', format='table')
Я получаю две ошибки с одной трассировкой.
---------------------------------------------------------------------------
MemoryError Traceback (most recent call last)
~/miniconda3/lib/python3.7/site-packages/pandas/io/pytables.py in create_axes(self, axes, obj, validate, nan_rep, data_columns, min_itemsize, **kwargs)
3932 errors=self.errors,
-> 3933 info=self.info,
3934 )
~/miniconda3/lib/python3.7/site-packages/pandas/io/pytables.py in set_atom(self, block, block_items, existing_col, min_itemsize, nan_rep, info, encoding, errors)
2179 encoding,
-> 2180 errors,
2181 )
~/miniconda3/lib/python3.7/site-packages/pandas/io/pytables.py in set_atom_string(self, block, block_items, existing_col, min_itemsize, nan_rep, encoding, errors)
2218 # itemsize is the maximum length of a string (along any dimension)
-> 2219 data_converted = _convert_string_array(data, encoding, errors)
2220 itemsize = data_converted.itemsize
~/miniconda3/lib/python3.7/site-packages/pandas/io/pytables.py in _convert_string_array(data, encoding, errors, itemsize)
4881
-> 4882 data = np.asarray(data, dtype="S{size}".format(size=itemsize))
4883 return data
~/miniconda3/lib/python3.7/site-packages/numpy/core/_asarray.py in asarray(a, dtype, order)
84
---> 85 return array(a, dtype, copy=False, order=order)
86
MemoryError: Unable to allocate 62.5 GiB for an array with shape (4, 1000000) and data type |S11414
During handling of the above exception, another exception occurred:
Exception Traceback (most recent call last)
<ipython-input-12-618772a3e197> in <module>
----> 1 df.to_hdf('df.h5', 'df', format='table')
~/miniconda3/lib/python3.7/site-packages/pandas/core/generic.py in to_hdf(self, path_or_buf, key, **kwargs)
2528 from pandas.io import pytables
2529
-> 2530 pytables.to_hdf(path_or_buf, key, self, **kwargs)
2531
2532 def to_msgpack(self, path_or_buf=None, encoding="utf-8", **kwargs):
~/miniconda3/lib/python3.7/site-packages/pandas/io/pytables.py in to_hdf(path_or_buf, key, value, mode, complevel, complib, append, **kwargs)
276 path_or_buf, mode=mode, complevel=complevel, complib=complib
277 ) as store:
--> 278 f(store)
279 else:
280 f(path_or_buf)
~/miniconda3/lib/python3.7/site-packages/pandas/io/pytables.py in <lambda>(store)
269 f = lambda store: store.append(key, value, **kwargs)
270 else:
--> 271 f = lambda store: store.put(key, value, **kwargs)
272
273 path_or_buf = _stringify_path(path_or_buf)
~/miniconda3/lib/python3.7/site-packages/pandas/io/pytables.py in put(self, key, value, format, append, **kwargs)
957 format = get_option("io.hdf.default_format") or "fixed"
958 kwargs = self._validate_format(format, kwargs)
--> 959 self._write_to_group(key, value, append=append, **kwargs)
960
961 def remove(self, key, where=None, start=None, stop=None):
~/miniconda3/lib/python3.7/site-packages/pandas/io/pytables.py in _write_to_group(self, key, value, format, index, append, complib, encoding, **kwargs)
1523
1524 # write the object
-> 1525 s.write(obj=value, append=append, complib=complib, **kwargs)
1526
1527 if s.is_table and index:
~/miniconda3/lib/python3.7/site-packages/pandas/io/pytables.py in write(self, obj, axes, append, complib, complevel, fletcher32, min_itemsize, chunksize, expectedrows, dropna, **kwargs)
4192 # create the axes
4193 self.create_axes(
-> 4194 axes=axes, obj=obj, validate=append, min_itemsize=min_itemsize, **kwargs
4195 )
4196
~/miniconda3/lib/python3.7/site-packages/pandas/io/pytables.py in create_axes(self, axes, obj, validate, nan_rep, data_columns, min_itemsize, **kwargs)
3942 "cannot find the correct atom type -> "
3943 "[dtype->{name},items->{items}] {detail!s}".format(
-> 3944 name=b.dtype.name, items=b_items, detail=detail
3945 )
3946 )
Exception: cannot find the correct atom type -> [dtype->object,items->Index(['a', 'b', 'c', 'd', 'e'], dtype='object')] Unable to allocate 62.5 GiB for an array with shape (4, 1000000) and data type |S11414