Ищем R-скрипт, который может делать следующее: если значение x столбца A содержит определенную строку, тогда напишите y в новый столбец B - PullRequest
0 голосов
/ 20 февраля 2020

Я пытаюсь создать новую переменную в R на основе другой переменной. По сути, я хочу сделать следующее: если значение x в столбце A содержит указанную c строку символов, то (в новом столбце) напишите y.

Так, например, в моем наборе данных у меня есть переменная с именем "condition", которая имеет следующие уникальные значения:

[1] "05_CL_dom" "16_CF_sub" "02_CL_sub" "01_CL_dom" "19_CF_dom" "14_CL_sub" "17_CL_dom" "10_CL_sub" "09_CL_dom" "15_CF_dom"
[11] "18_CL_sub" "06_CL_sub" "20_CF_sub" "12_CF_sub" "04_CF_sub" "08_CF_sub" "24_CF_sub" "11_CF_dom" "21_CL_dom" "22_CL_sub"
[21] "23_CF_dom" "07_CF_dom" "03_CF_dom" "13_CL_dom" "06_CL_dom" "07_CF_sub" "16_CF_dom" "03_CF_sub" "12_CF_dom" "11_CF_sub"
[31] "17_CL_sub" "14_CL_dom" "21_CL_sub" "20_CF_dom" "22_CL_dom" "19_CF_sub" "01_CL_sub" "08_CF_dom" "15_CF_sub" "13_CL_sub"
[41] "18_CL_dom" "09_CL_sub" "05_CL_sub" "23_CF_sub" "10_CL_dom" "24_CF_dom" "04_CF_dom" "02_CL_dom" "18_CF_sub" "05_CF_dom"
[51] "02_CF_sub" "03_CL_dom" "21_CF_dom" "01_CF_dom" "12_CL_sub" "19_CL_dom" "20_CL_sub" "23_CL_dom" "16_CL_sub" "22_CF_sub"
[61] "04_CL_sub" "11_CL_dom" "10_CF_sub" "06_CF_sub" "14_CF_sub" "07_CL_dom" "15_CL_dom" "08_CL_sub" "24_CL_sub" "13_CF_dom"
[71] "09_CF_dom" "17_CF_dom" "19_CL_sub" "13_CF_sub" "10_CF_dom" "16_CL_dom" "05_CF_sub" "09_CF_sub" "06_CF_dom" "03_CL_sub"
[81] "08_CL_dom" "14_CF_dom" "17_CF_sub" "21_CF_sub" "01_CF_sub" "24_CL_dom" "23_CL_sub" "22_CF_dom" "07_CL_sub" "18_CF_dom"
[91] "20_CL_dom" "04_CL_dom" "11_CL_sub" "02_CF_dom" "12_CL_dom" "15_CL_sub"

На основе этих значений я хочу создать новую переменную с именем «conditionNEW»:
- если значение условия содержит CL_sub -> CLsub
- если значение условия содержит CL_dom -> CLdom
- если значение условия содержит CF_sub -> CFsub
- если значение условия содержит CF_dom -> CFdom

Прямо сейчас, то, что я сделал, это:

rawdata$conditionNEW[rawdata$condition == "01_CF_dom"] <- "CFdom"
rawdata$conditionNEW[rawdata$condition == "02_CF_dom"] <- "CFdom"
rawdata$conditionNEW[rawdata$condition == "03_CF_dom"] <- "CFdom"
rawdata$conditionNEW[rawdata$condition == "04_CF_dom"] <- "CFdom"
rawdata$conditionNEW[rawdata$condition == "05_CF_dom"] <- "CFdom"
rawdata$conditionNEW[rawdata$condition == "06_CF_dom"] <- "CFdom"
rawdata$conditionNEW[rawdata$condition == "07_CF_dom"] <- "CFdom"
rawdata$conditionNEW[rawdata$condition == "08_CF_dom"] <- "CFdom"
rawdata$conditionNEW[rawdata$condition == "09_CF_dom"] <- "CFdom"
rawdata$conditionNEW[rawdata$condition == "10_CF_dom"] <- "CFdom"
rawdata$conditionNEW[rawdata$condition == "11_CF_dom"] <- "CFdom"
rawdata$conditionNEW[rawdata$condition == "12_CF_dom"] <- "CFdom"
rawdata$conditionNEW[rawdata$condition == "13_CF_dom"] <- "CFdom"
rawdata$conditionNEW[rawdata$condition == "14_CF_dom"] <- "CFdom"
rawdata$conditionNEW[rawdata$condition == "15_CF_dom"] <- "CFdom"
rawdata$conditionNEW[rawdata$condition == "16_CF_dom"] <- "CFdom"
rawdata$conditionNEW[rawdata$condition == "17_CF_dom"] <- "CFdom"
rawdata$conditionNEW[rawdata$condition == "18_CF_dom"] <- "CFdom"
rawdata$conditionNEW[rawdata$condition == "19_CF_dom"] <- "CFdom"
rawdata$conditionNEW[rawdata$condition == "20_CF_dom"] <- "CFdom"
rawdata$conditionNEW[rawdata$condition == "21_CF_dom"] <- "CFdom"
rawdata$conditionNEW[rawdata$condition == "22_CF_dom"] <- "CFdom"
rawdata$conditionNEW[rawdata$condition == "23_CF_dom"] <- "CFdom"
rawdata$conditionNEW[rawdata$condition == "24_CF_dom"] <- "CFdom"

rawdata$conditionNEW[rawdata$condition == "01_CF_sub"] <- "CFsub"
rawdata$conditionNEW[rawdata$condition == "02_CF_sub"] <- "CFsub"
rawdata$conditionNEW[rawdata$condition == "03_CF_sub"] <- "CFsub"
rawdata$conditionNEW[rawdata$condition == "04_CF_sub"] <- "CFsub"
rawdata$conditionNEW[rawdata$condition == "05_CF_sub"] <- "CFsub"
rawdata$conditionNEW[rawdata$condition == "06_CF_sub"] <- "CFsub"
rawdata$conditionNEW[rawdata$condition == "07_CF_sub"] <- "CFsub"
rawdata$conditionNEW[rawdata$condition == "08_CF_sub"] <- "CFsub"
rawdata$conditionNEW[rawdata$condition == "09_CF_sub"] <- "CFsub"
rawdata$conditionNEW[rawdata$condition == "10_CF_sub"] <- "CFsub"
rawdata$conditionNEW[rawdata$condition == "11_CF_sub"] <- "CFsub"
rawdata$conditionNEW[rawdata$condition == "12_CF_sub"] <- "CFsub"
rawdata$conditionNEW[rawdata$condition == "13_CF_sub"] <- "CFsub"
rawdata$conditionNEW[rawdata$condition == "14_CF_sub"] <- "CFsub"
rawdata$conditionNEW[rawdata$condition == "15_CF_sub"] <- "CFsub"
rawdata$conditionNEW[rawdata$condition == "16_CF_sub"] <- "CFsub"
rawdata$conditionNEW[rawdata$condition == "17_CF_sub"] <- "CFsub"
rawdata$conditionNEW[rawdata$condition == "18_CF_sub"] <- "CFsub"
rawdata$conditionNEW[rawdata$condition == "19_CF_sub"] <- "CFsub"
rawdata$conditionNEW[rawdata$condition == "20_CF_sub"] <- "CFsub"
rawdata$conditionNEW[rawdata$condition == "21_CF_sub"] <- "CFsub"
rawdata$conditionNEW[rawdata$condition == "22_CF_sub"] <- "CFsub"
rawdata$conditionNEW[rawdata$condition == "23_CF_sub"] <- "CFsub"
rawdata$conditionNEW[rawdata$condition == "24_CF_sub"] <- "CFsub"

rawdata$conditionNEW[rawdata$condition == "01_CL_dom"] <- "CLdom"
rawdata$conditionNEW[rawdata$condition == "02_CL_dom"] <- "CLdom"
rawdata$conditionNEW[rawdata$condition == "03_CL_dom"] <- "CLdom"
rawdata$conditionNEW[rawdata$condition == "04_CL_dom"] <- "CLdom"
rawdata$conditionNEW[rawdata$condition == "05_CL_dom"] <- "CLdom"
rawdata$conditionNEW[rawdata$condition == "06_CL_dom"] <- "CLdom"
rawdata$conditionNEW[rawdata$condition == "07_CL_dom"] <- "CLdom"
rawdata$conditionNEW[rawdata$condition == "08_CL_dom"] <- "CLdom"
rawdata$conditionNEW[rawdata$condition == "09_CL_dom"] <- "CLdom"
rawdata$conditionNEW[rawdata$condition == "10_CL_dom"] <- "CLdom"
rawdata$conditionNEW[rawdata$condition == "11_CL_dom"] <- "CLdom"
rawdata$conditionNEW[rawdata$condition == "12_CL_dom"] <- "CLdom"
rawdata$conditionNEW[rawdata$condition == "13_CL_dom"] <- "CLdom"
rawdata$conditionNEW[rawdata$condition == "14_CL_dom"] <- "CLdom"
rawdata$conditionNEW[rawdata$condition == "15_CL_dom"] <- "CLdom"
rawdata$conditionNEW[rawdata$condition == "16_CL_dom"] <- "CLdom"
rawdata$conditionNEW[rawdata$condition == "17_CL_dom"] <- "CLdom"
rawdata$conditionNEW[rawdata$condition == "18_CL_dom"] <- "CLdom"
rawdata$conditionNEW[rawdata$condition == "19_CL_dom"] <- "CLdom"
rawdata$conditionNEW[rawdata$condition == "20_CL_dom"] <- "CLdom"
rawdata$conditionNEW[rawdata$condition == "21_CL_dom"] <- "CLdom"
rawdata$conditionNEW[rawdata$condition == "22_CL_dom"] <- "CLdom"
rawdata$conditionNEW[rawdata$condition == "23_CL_dom"] <- "CLdom"
rawdata$conditionNEW[rawdata$condition == "24_CL_dom"] <- "CLdom"

rawdata$conditionNEW[rawdata$condition == "01_CL_sub"] <- "CLsub"
rawdata$conditionNEW[rawdata$condition == "02_CL_sub"] <- "CLsub"
rawdata$conditionNEW[rawdata$condition == "03_CL_sub"] <- "CLsub"
rawdata$conditionNEW[rawdata$condition == "04_CL_sub"] <- "CLsub"
rawdata$conditionNEW[rawdata$condition == "05_CL_sub"] <- "CLsub"
rawdata$conditionNEW[rawdata$condition == "06_CL_sub"] <- "CLsub"
rawdata$conditionNEW[rawdata$condition == "07_CL_sub"] <- "CLsub"
rawdata$conditionNEW[rawdata$condition == "08_CL_sub"] <- "CLsub"
rawdata$conditionNEW[rawdata$condition == "09_CL_sub"] <- "CLsub"
rawdata$conditionNEW[rawdata$condition == "10_CL_sub"] <- "CLsub"
rawdata$conditionNEW[rawdata$condition == "11_CL_sub"] <- "CLsub"
rawdata$conditionNEW[rawdata$condition == "12_CL_sub"] <- "CLsub"
rawdata$conditionNEW[rawdata$condition == "13_CL_sub"] <- "CLsub"
rawdata$conditionNEW[rawdata$condition == "14_CL_sub"] <- "CLsub"
rawdata$conditionNEW[rawdata$condition == "15_CL_sub"] <- "CLsub"
rawdata$conditionNEW[rawdata$condition == "16_CL_sub"] <- "CLsub"
rawdata$conditionNEW[rawdata$condition == "17_CL_sub"] <- "CLsub"
rawdata$conditionNEW[rawdata$condition == "18_CL_sub"] <- "CLsub"
rawdata$conditionNEW[rawdata$condition == "19_CL_sub"] <- "CLsub"
rawdata$conditionNEW[rawdata$condition == "20_CL_sub"] <- "CLsub"
rawdata$conditionNEW[rawdata$condition == "21_CL_sub"] <- "CLsub"
rawdata$conditionNEW[rawdata$condition == "22_CL_sub"] <- "CLsub"
rawdata$conditionNEW[rawdata$condition == "23_CL_sub"] <- "CLsub"
rawdata$conditionNEW[rawdata$condition == "24_CL_sub"] <- "CLsub"

Это работает, но это большой код для чего-то, что похоже на то, что должно быть довольно простым Есть кто-нибудь, кто знает, как сделать это с меньшим количеством кода?

1 Ответ

1 голос
/ 20 февраля 2020

вы можете использовать операторы ifelse

rawdata$conditionnew <- ifelse(  
    grepl( "CL_sub" , rawdata$condition ) , "CLsub", 
    ifelse(   
        grepl( "CL_dom" , rawdata$condition ) , "CLdom",
        ifelse( 
            grepl( "CF_sub" , rawdata$condition ) , "CF_sub",
            ifelse( 
                grepl( "CF_dom" , rawdata$condition ) , "CF_dom", NA))))

Здесь вы также можете использовать регулярные выражения:

rawdata$conditionNEW  <- gsub( "[0-9]|_" , "", rawdata$condition  )
Добро пожаловать на сайт PullRequest, где вы можете задавать вопросы и получать ответы от других членов сообщества.
...