Можно использовать substring
вместе с ifelse
как:
# Get the 3rd and 4th character from "birth_number". If it is > 12
# that row is for Female, otherwise Male
df$Gender <- ifelse(as.numeric(substring(df$birth_number,3,4)) > 12, "Female", "Male")
# Now correct the "birth_number". Subtract 50 form middle 2 digits.
# Updated based on feedback from @RuiBarradas to use df$Gender == "Female"
# to subtract 50 from month number
df$birth_number <- ifelse(df$Gender == "Female",
as.character(as.numeric(df$birth_number)-5000), df$birth_number)
df
# client_id birth_number district_id Gender
# 1 1 701213 18 Female
# 2 2 450204 1 Male
# 3 3 401009 1 Female
# 4 4 561201 5 Male
# 5 5 600703 5 Female
# 6 6 190922 12 Male
# so on
#
Данные:
df <- read.table(text =
'"client_id";"birth_number";"district_id"
1;"706213";18
2;"450204";1
3;"406009";1
4;"561201";5
5;"605703";5
6;"190922";12
7;"290125";15
8;"385221";51
9;"351016";60
10;"430501";57
11;"505822";57
12;"810220";40
13;"745529";54
14;"425622";76
15;"185828";21
16;"190225";21
17;"341013";76
18;"315405";76
19;"421228";47
20;"790104";46
21;"526029";12
22;"696011";1
23;"730529";1
24;"395729";43
25;"395423";21
26;"695420";74
27;"665326";54
28;"450929";1
29;"515911";30
30;"576009";74
31;"620209";68
32;"800728";52
33;"486204";73',
header = TRUE, stringsAsFactors = FALSE, sep = ";")