Мы можем использовать data.table
методы
library(data.table)
setDT(df)[funding_round_type == "venture", funding_round_type :=
sample(c("series-a", "series-b", "series-c+"), 1, prob = c(.4, .4, .2))][]
# investor_name funding_round_type count
#1: .406 Ventures angel 1
#2: .406 Ventures other 2
#3: .406 Ventures private-equity 1
#4: .406 Ventures series-a 5
#5: .406 Ventures series-b 2
#6: .406 Ventures series-c+ 7
#7: .406 Ventures series-b 1
#8: 500 Startups angel 40
Или используя case_when
из tidyverse
library(tidyerse)
df %>%
mutate(funding_round_type = case_when(funding_round_type == "venture" ~
sample(c("series-a", "series-b", "series-c+"), 1, prob = c(.4, .4, .2)),
TRUE ~ funding_round_type))
# investor_name funding_round_type count
#1 .406 Ventures angel 1
#2 .406 Ventures other 2
#3 .406 Ventures private-equity 1
#4 .406 Ventures series-a 5
#5 .406 Ventures series-b 2
#6 .406 Ventures series-c+ 7
#7 .406 Ventures series-a 1
#8 500 Startups angel 40
Данные
df <- structure(list(investor_name = c(".406 Ventures", ".406 Ventures",
".406 Ventures", ".406 Ventures", ".406 Ventures", ".406 Ventures",
".406 Ventures", "500 Startups"), funding_round_type = c("angel",
"other", "private-equity", "series-a", "series-b", "series-c+",
"venture", "angel"), count = c(1L, 2L, 1L, 5L, 2L, 7L, 1L, 40L
)), class = "data.frame", row.names = c("1", "2", "3", "4", "5",
"6", "7", "8"))