pandas: разброс участков на df.pivot () вызывает KeyError - PullRequest
0 голосов
/ 25 ноября 2018

Я создал фрейм данных из df.pivot (), который выглядит следующим образом:

cluster    0     1    2    3     4    5     6      7    8     9
value                                                          
5          0     0    1    1     2    1     1      3    0     0
20         0     0    0    0     0    0     0      1    0     0
22         0     0    0    0     0    0     1      0    0     0
50         0     0    0    0     0    0     0      1    0     0
100      211   493  133  180   262   19   782   6295  137   517
200      667  1685  444  588   877  242  2630  21077  494  1751
250        0     1    0    0     0    0     0      3    1     0
300      180   480  133  177   234   20   744   5985  236   474
350        0     0    0    0     0    0     0      1    1     0

Я пытаюсь создать сетку из нескольких графиков рассеяния, по одному графику рассеяния для каждого кластера.Нечто похожее на это:

Sample scatter subplot

Это то, что я пытался:

chart = df.plot(
 kind = 'scatter', 
 x = 'value', 
 y = 'cluster', 
 subplots = True, 
 sharex = True, 
 title = "Question value distribution across clusters"
)

Это приводит к KeyErrorуказывает, что график не может получить доступ к столбцам сводной таблицы.Вот журнал:

KeyError                                  Traceback (most recent call last)
/usr/local/lib/python3.6/dist-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
   2524             try:
-> 2525                 return self._engine.get_loc(key)
   2526             except KeyError:

pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()

pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()

pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()

pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()

KeyError: 'cluster'

During handling of the above exception, another exception occurred:

KeyError                                  Traceback (most recent call last)
<ipython-input-90-175952b92cec> in <module>()
     10 rk = gk.pivot(index = 'value', columns = 'cluster', values = 'count').fillna(0)
     11 rk = rk.astype('int')
---> 12 chart = xk.plot(kind = 'scatter', x = 'value', y = 'cluster', subplots = True, sharex = True, title = "Question value distribution for cluster "+str(cluster_no))
     13 # chart.set_xlabel("Value of question ($)")
     14 # chart.set_ylabel("Questions in cluster")

/usr/local/lib/python3.6/dist-packages/pandas/plotting/_core.py in __call__(self, x, y, kind, ax, subplots, sharex, sharey, layout, figsize, use_index, title, grid, legend, style, logx, logy, loglog, xticks, yticks, xlim, ylim, rot, fontsize, colormap, table, yerr, xerr, secondary_y, sort_columns, **kwds)
   2675                           fontsize=fontsize, colormap=colormap, table=table,
   2676                           yerr=yerr, xerr=xerr, secondary_y=secondary_y,
-> 2677                           sort_columns=sort_columns, **kwds)
   2678     __call__.__doc__ = plot_frame.__doc__
   2679 

/usr/local/lib/python3.6/dist-packages/pandas/plotting/_core.py in plot_frame(data, x, y, kind, ax, subplots, sharex, sharey, layout, figsize, use_index, title, grid, legend, style, logx, logy, loglog, xticks, yticks, xlim, ylim, rot, fontsize, colormap, table, yerr, xerr, secondary_y, sort_columns, **kwds)
   1900                  yerr=yerr, xerr=xerr,
   1901                  secondary_y=secondary_y, sort_columns=sort_columns,
-> 1902                  **kwds)
   1903 
   1904 

/usr/local/lib/python3.6/dist-packages/pandas/plotting/_core.py in _plot(data, x, y, subplots, ax, kind, **kwds)
   1685         if isinstance(data, DataFrame):
   1686             plot_obj = klass(data, x=x, y=y, subplots=subplots, ax=ax,
-> 1687                              kind=kind, **kwds)
   1688         else:
   1689             raise ValueError("plot kind %r can only be used for data frames"

/usr/local/lib/python3.6/dist-packages/pandas/plotting/_core.py in __init__(self, data, x, y, s, c, **kwargs)
    835             # the handling of this argument later
    836             s = 20
--> 837         super(ScatterPlot, self).__init__(data, x, y, s=s, **kwargs)
    838         if is_integer(c) and not self.data.columns.holds_integer():
    839             c = self.data.columns[c]

/usr/local/lib/python3.6/dist-packages/pandas/plotting/_core.py in __init__(self, data, x, y, **kwargs)
    811         if len(self.data[x]._get_numeric_data()) == 0:
    812             raise ValueError(self._kind + ' requires x column to be numeric')
--> 813         if len(self.data[y]._get_numeric_data()) == 0:
    814             raise ValueError(self._kind + ' requires y column to be numeric')
    815 

/usr/local/lib/python3.6/dist-packages/pandas/core/frame.py in __getitem__(self, key)
   2137             return self._getitem_multilevel(key)
   2138         else:
-> 2139             return self._getitem_column(key)
   2140 
   2141     def _getitem_column(self, key):

/usr/local/lib/python3.6/dist-packages/pandas/core/frame.py in _getitem_column(self, key)
   2144         # get column
   2145         if self.columns.is_unique:
-> 2146             return self._get_item_cache(key)
   2147 
   2148         # duplicate columns & possible reduce dimensionality

/usr/local/lib/python3.6/dist-packages/pandas/core/generic.py in _get_item_cache(self, item)
   1840         res = cache.get(item)
   1841         if res is None:
-> 1842             values = self._data.get(item)
   1843             res = self._box_item_values(item, values)
   1844             cache[item] = res

/usr/local/lib/python3.6/dist-packages/pandas/core/internals.py in get(self, item, fastpath)
   3841 
   3842             if not isna(item):
-> 3843                 loc = self.items.get_loc(item)
   3844             else:
   3845                 indexer = np.arange(len(self.items))[isna(self.items)]

/usr/local/lib/python3.6/dist-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
   2525                 return self._engine.get_loc(key)
   2526             except KeyError:
-> 2527                 return self._engine.get_loc(self._maybe_cast_indexer(key))
   2528 
   2529         indexer = self.get_indexer([key], method=method, tolerance=tolerance)

pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()

pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()

pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()

pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()

KeyError: 'cluster'

Как мне пройти через это?

...