Для следующего кадра данных pandas я хочу создать функцию / код python, который
- помечает обновления продаж, сделанные продавцом, отличным от того, кто его запустил;
- Rank/ Оценка риска продавца кражи других клиентов Любая помощь приветствуется. Спасибо
создать образец данных о продажах с помощью account_id: счета продаж, last_updated_date: дата последнего выполнения продаж;sales_id: идентификатор продавца
```python
data = pd.DataFrame([
['a', 1, 'A'],
['a', 2, 'B'],
['a', 3, 'C'],
['b', 5, 'D'],
['b', 6, 'E'],
['b', 7, 'F'],
['b', 8, 'G'],
['c', 10, 'H'],
['c', 11, 'I'],
['c', 12, 'J'],
['c', 13, 'K'],
['d', 14, 'J'],
['d', 15, 'J']
], columns = ['account_id', 'last_updated_date', 'sales_id'])
data # print sales data
'''python
## latest sales updates of sales persons per account group in one code
'''python
df_l =data \
.sort_values(['last_updated_date'], ascending = False) \
.groupby('account_id') \
.last() \
.reset_index()
df_l
'''
merge latest and earliest sales updates data sets
'''python
data_merge = pd.merge(df_f, df_l, left_on='account_id',right_on='account_id',how='outer',suffixes=('_latest','_earliest'))
data_merge '''
Flag sales processes where latest updates are not performed by the sales person who put the intial hard effort of attracting new customers
cold calls and intial effort to convert leads to sales is hard and important effort that sales person who does it is credited more than the one who processes the sales effort of started by others
'''python
for id in data_merge.account_id.unique():
data_merge['rank_flag'] = np.where(data_merge['sales_id_latest']!=
data_merge['sales_id_earliest'], 'yes', 'no')
data_merge
""" Flag sales person who process sales started by others"""
df = data_merge[data_merge['rank_flag']=='yes']
df_flag=df.groupby('sales_id_latest').count().reset_index()
df_flag=df_flag[['sales_id_latest','rank_flag']]
print(df_flag)
"""reset the index, the old index is added as a column, and a new sequential index is used """
df_total=data.groupby('sales_id').count().reset_index()
df_total
'''
# merge orginal sales transaction data with sales flag data to calculate sales steal risk by salesperson
data_ratio = pd.merge(df_total,df_flag, left_on='sales_id',right_on='sales_id_latest',how='outer',suffixes=('_total','_flag'))
data_ratio = data_ratio.fillna(0 )
data_ratio
# calculate sales stealing risk ratio of salespersons
data_ratio['risk_ratio'] = data_ratio['rank_flag']/data_ratio['last_updated_date']
data_ratio