У меня два вопроса:
- Мои метки данных отображаются как 0,00 и не соответствуют моей кросс-таблице.Я не знаю, почему ...
Обновление с полным кодом:
df = pd.read_csv('2018_ms_data_impact_only.csv', low_memory=False)
df.head()
StartDate EndDate Status IPAddress Progress duration Finished RecordedDate ResponseId RecipientLastName ... Gender LGBTQ Mobile organizing_interest Parent Policy policy_interest reg_to_vote unique_id Veteran
0 4/6/18 10:32 4/6/18 10:39 1 NaN 100 391 1 4/6/18 10:39 R_1liSDxRmTKDLFfT Mays ... Woman 0.0 4752122624 Currently in this field 1.0 NaN NaN 0.0 0034000001VAbTAAA1 0.0
1 4/9/18 6:31 4/9/18 6:33 1 NaN 100 160 1 4/9/18 6:33 R_0ezRf2zyaLwFDa1 Mays ... Woman 0.0 4752122684 2020 0.0 A 2020 0.0 0034000001W3tGOAAZ 0.0
2 4/9/18 9:14 4/9/18 9:15 1 NaN 100 70 1 4/9/18 9:15 R_DeHh3DQ23uQZwLD Mays ... Woman 0.0 4752122684 2020 0.0 A 2020 0.0 0034000001W3tGOAAZ 0.0
3 4/9/18 9:21 4/9/18 9:22 1 NaN 100 69 1 4/9/18 9:22 R_1CC0ckmyS7E1qs3 Mays ... Woman 0.0 4752122684 2020 0.0 A 2020 0.0 0034000001W3tGOAAZ 0.0
4 4/9/18 9:28 4/9/18 9:29 1 NaN 100 54 1 4/9/18 9:29 R_01GuM5KqtHIgvEl Mays ... Woman 0.0 4752122684 2020 0.0 A 2020 0.0 0034000001W3tGOAAZ 0.0
def impact_action_yn_new(series):
if series == 3:
return 'No'
elif series == 1:
return 'Yes'
df['impact_action_yn_new'] = df['impact_action_yn'].apply(impact_action_yn_new)
df['impact_action_yn_new'].value_counts(sort=False)
# clean up engagement - collapse nan and 0, 2s
def engagement_new(series):
if series == '0':
return 'Null'
elif series == 'NaN':
return 'Null'
elif series == '1':
return '1'
elif series == '2':
return '2a'
elif series == '2a':
return '2a'
elif series == '2b':
return '2b'
elif series == '3':
return '3'
elif series == '4':
return '4'
elif series == '5':
return '5'
df['engagement_new'] = df['Engagement'].apply(engagement_new)
impact_action_table_eng = pd.crosstab(df.impact_action_yn_new,df.engagement_new)
print(impact_action_table_eng)
engagement_new 1 2a 2b 3 4 5 Null
impact_action_yn_new
No 676 508 587 683 172 31 1
Yes 410 405 303 671 357 237 1
# Crosstab: Impact YN x Engagement - Row percentages
impact_action_table_eng_rowperc = pd.crosstab(df.impact_action_yn_new,df.engagement_new).apply(lambda r: r/r.sum()*100, axis=1)
print(impact_action_table_eng_rowperc)
engagement_new 1 2a 2b 3 4 \
impact_action_yn_new
No 25.432656 19.112114 22.084274 25.696012 6.471031
Yes 17.197987 16.988255 12.709732 28.145973 14.974832
engagement_new 5 Null
impact_action_yn_new
No 1.166290 0.037622
Yes 9.941275 0.041946
#plot data
stacked_imp_eng_rowperc = impact_action_table_eng_rowperc.stack().reset_index().rename(columns={0:'value'})
total = float(len(df))
#set fig size
fig, ax = plt.subplots(figsize=(15,10))
#set style
sns.set_style('whitegrid')
#plot
ax = sns.barplot(x=stacked_imp_eng_rowperc.engagement_new,
y=stacked_imp_eng_rowperc.value,
hue=stacked_imp_eng_rowperc.impact_action_yn_new)
#plot legend
ax.legend(loc='center right',bbox_to_anchor=(.95,.9),ncol=1, fancybox=True, shadow=True)
#plot axis labels
for p in ax.patches:
height = p.get_height()
ax.text(p.get_x()+p.get_width()/2.,
height,
'{:1.2f}'.format(height/total),
ha="center")
ax.set(xlabel='Engagement Level', ylabel='% Reporting an Action within Last 12 Months');
Я не уверен, почему метки данных на гистограмме отображаются как 0,00,Это вызывает кросс-таблицу.Какие-нибудь мысли?
Можно ли преобразовать расчеты кросс-таблицы в процентах?Я хотел бы построить эти проценты вместо десятичных дробей.
Спасибо за вашу помощь!