R код:
start.DF <- data.frame(
Dependents = c('2', '3+', '1'),
Married = c('yes', NA, 'no'),
CoapplicantIncome = c(45, 0, 75),
TotalIncome = c(100, 67, 80)
)
library(dplyr)
Dependents.Numeric = recode(start.DF$Dependents, '3+'='3') %>% as.character() %>% as.numeric()
start.DF = start.DF %>% mutate(HouseholdSize = ifelse(is.na(Dependents.Numeric) | is.na(Married), NA,
ifelse(CoapplicantIncome > 0 | Married == "Yes",
Dependents.Numeric + 2,
Dependents.Numeric +1)))
start.DF = start.DF %>% mutate(IncomePC = TotalIncome/HouseholdSize)
start.DF
Dependents Married CoapplicantIncome TotalIncome HouseholdSize IncomePC
1 2 yes 45 100 4 25.00000
2 3+ <NA> 0 67 NA NA
3 1 no 75 80 3 26.66667
Код Python:
import pandas as pd
import numpy as np
start_df = pd.DataFrame.from_dict({
'Dependents': ('2', '3+', '1'),
'Married': ('yes', np.nan, 'no'),
'CoapplicantIncome': (45, 0, 75),
'TotalIncome': (100, 67, 80)
})
Dependents_Numeric = start_df['Dependents'].str.replace('3\+', '3').astype(float)
start_df['HouseholdSize'] = np.nan
cond1 = pd.isna(Dependents_Numeric) | pd.isna(start_df['Married'])
cond2 = (start_df['CoapplicantIncome'] > 0) | (start_df['Married'] == 'yes')
start_df.loc[~cond1 & cond2, 'HouseholdSize'] = Dependents_Numeric[~cond1 & cond2] + 2
start_df.loc[~cond1 & ~cond2, 'HouseholdSize'] = Dependents_Numeric[~cond1 & ~cond2] + 1
start_df['IncomePC'] = start_df['TotalIncome'] / start_df['HouseholdSize']
print(start_df)
CoapplicantIncome Dependents Married TotalIncome HouseholdSize IncomePC
0 45 2 yes 100 4.0 25.000000
1 0 3+ NaN 67 NaN NaN
2 75 1 no 80 3.0 26.666667