Я пытаюсь создать датафрейм из списков и получаю следующее исключение:
Исключение: данные должны быть одномерными
project_transformed_data = pd.DataFrame(data = {'school_state':school_state,
'grade_one_hot':grade_one_hot,
'teacher_prefix':teacher_prefix,
'categories_one_hot':categories_one_hot,
'sub_categories_one_hot':sub_categories_one_hot,
'price_standardized':price_standardized,
'quantity_standardized':quantity_standardized,
'no_project_standardized':no_project_standardized,
'preprocessed_essays':preprocessed_essays,
'preprocessed_title':preprocessed_title,
'preprocessed_resource_description':preprocessed_resource_description
})
Полная трассировка исключений:
Exception Traceback (most recent call last)
<ipython-input-42-534fb60e58d6> in <module>()
9 'preprocessed_essays':preprocessed_essays,
10 'preprocessed_title':preprocessed_title,
---> 11 'preprocessed_resource_description':preprocessed_resource_description
12 })
/usr/local/lib/python3.6/dist-packages/pandas/core/frame.py in __init__(self, data, index, columns, dtype, copy)
328 dtype=dtype, copy=copy)
329 elif isinstance(data, dict):
--> 330 mgr = self._init_dict(data, index, columns, dtype=dtype)
331 elif isinstance(data, ma.MaskedArray):
332 import numpy.ma.mrecords as mrecords
/usr/local/lib/python3.6/dist-packages/pandas/core/frame.py in _init_dict(self, data, index, columns, dtype)
459 arrays = [data[k] for k in keys]
460
--> 461 return _arrays_to_mgr(arrays, data_names, index, columns, dtype=dtype)
462
463 def _init_ndarray(self, values, index, columns, dtype=None, copy=False):
/usr/local/lib/python3.6/dist-packages/pandas/core/frame.py in _arrays_to_mgr(arrays, arr_names, index, columns, dtype)
6166
6167 # don't force copy because getting jammed in an ndarray anyway
-> 6168 arrays = _homogenize(arrays, index, dtype)
6169
6170 # from BlockManager perspective
/usr/local/lib/python3.6/dist-packages/pandas/core/frame.py in _homogenize(data, index, dtype)
6475 v = lib.fast_multiget(v, oindex.values, default=np.nan)
6476 v = _sanitize_array(v, index, dtype=dtype, copy=False,
-> 6477 raise_cast_failure=False)
6478
6479 homogenized.append(v)
/usr/local/lib/python3.6/dist-packages/pandas/core/series.py in _sanitize_array(data, index, dtype, copy, raise_cast_failure)
3273 elif subarr.ndim > 1:
3274 if isinstance(data, np.ndarray):
-> 3275 raise Exception('Data must be 1-dimensional')
3276 else:
3277 subarr = _asarray_tuplesafe(data, dtype=dtype)
Exception: Data must be 1-dimensional
preprocessed_resource_description - это список. Тем не менее я получаю исключение.
Любая идея, почему я получаю это исключение.
Пример данных:
print(preprocessed_resource_description[0:2])
print(type(preprocessed_resource_description))
print(len(preprocessed_resource_description))
Выход:
['kids kore wobble chair 14 blackreading tree classroom rug shape rectangle rug dimensions 7 8 w x 10 9 lseat foam pad blackjack chair purple cotton', 'robot mouse stem activity set']
<class 'list'>
20000