# Revenue = Active Customer Count * Order Count * Average Revenue per Order
#converting the type of Invoice Date Field from string to datetime.
tx_data['InvoiceDate'] = pd.to_datetime(tx_data['InvoiceDate'])
#creating YearMonth field for the ease of reporting and visualization
tx_data['InvoiceYearMonth'] = tx_data['InvoiceDate'].map(lambda date: 100*date.year + date.month)
#calculate Revenue for each row and create a new dataframe with YearMonth - Revenue columns
tx_data['Revenue'] = tx_data['UnitPrice'] * tx_data['Quantity']
tx_revenue = tx_data.groupby(['InvoiceYearMonth'])['Revenue'].sum().reset_index()
tx_revenue
#creating a new dataframe with UK customers only
tx_uk = tx_data.query("Country=='United Kingdom'").reset_index(drop=True)
#creating monthly active customers dataframe by counting unique Customer IDs
tx_monthly_active = tx_uk.groupby('InvoiceYearMonth')['CustomerID'].nunique().reset_index()
#print the dataframe
tx_monthly_active
#plotting the output
plot_data = [
go.Bar(
x=tx_monthly_active.query['InvoiceYearMonth'],
y=tx_monthly_active.query['CustomerID'],
)
]
plot_layout = go.Layout(
xaxis={"type": "category"},
title='Monthly Active Customers'
)
fig = go.Figure(data=plot_data, layout=plot_layout)
pyoff.iplot(fig)
Он работал в коде, который я написал ранее. Но здесь отображается ошибка. Я был бы очень признателен, если бы было предложено решение. Я использую Jupyter Notebook в Pycharm. Я действительно не могу понять, в чем проблема. Я все еще новичок в программировании, поэтому мне немного сложно разобраться в этой проблеме.
KeyError Traceback (most recent call last)
<ipython-input-26-82f7e61120b9> in <module>
3
4 #creating monthly active customers dataframe by counting unique Customer IDs
----> 5 tx_monthly_active = tx_uk.groupby('InvoiceYearMonth')['CustomerID'].nunique().reset_index()
6
7 #print the dataframe
c:\users\aayus\pycharmprojects\helloworld\venv\lib\site-packages\pandas\core\frame.py in groupby(self, by, axis, level, as_index, sort, group_keys, squeeze, observed)
5799 axis = self._get_axis_number(axis)
5800
-> 5801 return groupby_generic.DataFrameGroupBy(
5802 obj=self,
5803 keys=by,
c:\users\aayus\pycharmprojects\helloworld\venv\lib\site-packages\pandas\core\groupby\groupby.py in __init__(self, obj, keys, axis, level, grouper, exclusions, selection, as_index, sort, group_keys, squeeze, observed, mutated)
400 from pandas.core.groupby.grouper import get_grouper
401
--> 402 grouper, exclusions, obj = get_grouper(
403 obj,
404 keys,
c:\users\aayus\pycharmprojects\helloworld\venv\lib\site-packages\pandas\core\groupby\grouper.py in get_grouper(obj, key, axis, level, sort, observed, mutated, validate)
596 in_axis, name, level, gpr = False, None, gpr, None
597 else:
--> 598 raise KeyError(gpr)
599 elif isinstance(gpr, Grouper) and gpr.key is not None:
600 # Add key to exclusions
KeyError: 'InvoiceYearMonth'