Вот мой фрейм данных:
cars_num_df.head(10)
mpg cylinders displacement horsepower weight acceleration age
0 18.0 8 307.0 130.0 3504.0 12.0 13
1 15.0 8 350.0 165.0 3693.0 11.5 13
2 18.0 8 318.0 150.0 3436.0 11.0 13
3 16.0 8 304.0 150.0 3433.0 12.0 13
4 17.0 8 302.0 140.0 3449.0 10.5 13
5 15.0 8 429.0 198.0 4341.0 10.0 13
6 14.0 8 454.0 220.0 4354.0 9.0 13
7 14.0 8 440.0 215.0 4312.0 8.5 13
8 14.0 8 455.0 225.0 4425.0 10.0 13
9 15.0 8 390.0 190.0 3850.0 8.5 13
Позже я стандартизировал данные, используя Zscore, и затем я хочу ЗАМЕНИТЬ выбросы (не удалить) медианным значением каждого столбца.
Я пытался сделать это:
median = cars_numz_df.median()
std = cars_numz_df.std()
value = cars_numz_df
outliers = (value - median).abs() > 2*std
cars_numz_df[outliers] = cars_numz_df[outliers].abs()
cars_numz_df[outliers]
mpg cylinders displacement horsepower weight acceleration age
0 NaN 1.498191 NaN NaN NaN NaN NaN
1 NaN 1.498191 NaN NaN NaN NaN NaN
2 NaN 1.498191 NaN NaN NaN NaN NaN
3 NaN 1.498191 NaN NaN NaN NaN NaN
4 NaN 1.498191 NaN NaN NaN NaN NaN
5 NaN 1.498191 2.262118 2.454408 NaN NaN NaN
6 NaN 1.498191 2.502182 3.030708 NaN 2.384735 NaN
7 NaN 1.498191 2.367746 2.899730 NaN 2.566274 NaN
8 NaN 1.498191 2.511784 3.161685 NaN NaN NaN
9 NaN 1.498191 1.887617 2.244844 NaN 2.566274 NaN
Теперь я пытаюсь заменить выбросы медианой следующим образом:
cars_numz_df[outliers] = median
но я получаю эту ошибку:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-394-d48a51500f28> in <module>
9 cars_numz_df[outliers] = cars_numz_df[outliers].abs()
10
---> 11 cars_numz_df[outliers] = median
12
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\frame.py
in __setitem__(self, key, value)
3112
3113 if isinstance(key, DataFrame) or getattr(key, 'ndim', None)
== 2:
-> 3114 self._setitem_frame(key, value)
3115 elif isinstance(key, (Series, np.ndarray, list, Index)):
3116 self._setitem_array(key, value)
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\frame.py
in _setitem_frame(self, key, value)
3161 self._check_inplace_setting(value)
3162 self._check_setitem_copy()
-> 3163 self._where(-key, value, inplace=True)
3164
3165 def _ensure_valid_index(self, value):
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\generic.py
in _where(self, cond, other, inplace, axis, level, errors, try_cast)
7543
7544 _, other = self.align(other, join='left', axis=axis,
-> 7545 level=level,
fill_value=np.nan)
7546
7547 # if we are NOT aligned, raise as we cannot where
index
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\frame.py
in align(self, other, join, axis, level, copy, fill_value, method, limit,
fill_axis, broadcast_axis)
3548 method=method,
limit=limit,
3549 fill_axis=fill_axis,
-> 3550
broadcast_axis=broadcast_axis)
3551
3552 @Appender(_shared_docs['reindex'] % _shared_doc_kwargs)
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\generic.py
in align(self, other, join, axis, level, copy, fill_value, method, limit,
fill_axis, broadcast_axis)
7370 copy=copy,
fill_value=fill_value,
7371 method=method, limit=limit,
-> 7372 fill_axis=fill_axis)
7373 else: # pragma: no cover
7374 raise TypeError('unsupported type: %s' % type(other))
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\generic.py
in _align_series(self, other, join, axis, level, copy, fill_value, method,
limit, fill_axis)
7469 fdata = fdata.reindex_indexer(join_index, lidx,
axis=0)
7470 else:
-> 7471 raise ValueError('Must specify axis=0 or 1')
7472
7473 if copy and fdata is self._data:
ValueError: Must specify axis=0 or 1
Посоветуйте, пожалуйста, как я могу заменить выбросы на медиану столбца.