Вы можете создать логическую маску m1
и m2
, сравнив столбец с 1
для m3
, связав обе маски по &
для AND
.
Затем для подсчета sum
эти маски - True
s являются такими процессами, как 1
s.
Последний для использования фильтра boolean indexing
с m3
:
#sample data
a = np.array([[100, 1, 1],[101, 2, 2],[102, 1, 2],[103, 1, 2],[1251, 1, 1]])
df = pd.DataFrame(a, columns=['slno','disease','plastic_used'])
#with real data use
#df = pd.read_csv('PUBLIC.csv')
print (df)
slno disease plastic_used
0 100 1 1
1 101 2 2
2 102 1 2
3 103 1 2
4 1251 1 1
m1 = (df['plastic_used'] == 1)
m2 = (df['disease'] == 1)
m3 = m1 & m2
np_plastic_usage = m1.sum()
np_patients = m2.sum()
patient_who_used_plastic = m3.sum()
data_of_ppl_whose_disease_is_one_and_plastic_used_is_one = df[m3]
print("No:of ppl got disease = {}".format(np_patients))
print("No:of ppl used plastic = {}".format(np_plastic_usage))
print("No:of ppl become patients because of platic usage = {}".format(patient_who_used_plastic))
No:of ppl got disease = 4
No:of ppl used plastic = 2
No:of ppl become patients because of platic usage = 2
print(data_of_ppl_whose_disease_is_one_and_plastic_used_is_one)
slno disease plastic_used
0 100 1 1
4 1251 1 1