# small data set for chess players with player id (p_id), days from some starting date (dayo)
# an Elo rating (elo) and a result win or loss (fin1_0)
p_id <-
c(2232,1877,2210,2073,2232,2232,1877,2210,2073,2232,1877,2210,2073,1877,2232,2210,2073,2210,1877)
dayo <- c(750,750,750,750,751,750,750,750,750,751,751,751,751,751,751,751,762,762,762)
elo <-
c(1741,1841,1651,1561,1751,1741,1841,1651,1561,1751,1851,1551,1951,1820,1660,2230,1762,1762,1762)
fin1_0 <- c(0,1,0,0,1,0,0,0,1,1,0,0,0,1,0,0,1,0,0)
my.df <- data.frame(p_id,elo,dayo,fin1_0)
my.df
# for each player I want to determine the number of games played, the number of wins they achieved
# the day of their last win, how many days since they won a game, how many days since they played a
# game
# and how many games played since they won
# what I tried
library(dplyr)
tbl.df <- my.df %>%
# first arrange the data based on p_id and dayo
arrange(p_id, dayo) %>%
group_by(p_id) %>%
mutate( games = 1:n(),
wins = cumsum(fin1_0 == 1) ) %>%
# group by p_id and every win
group_by(p_id, wins) %>%
mutate(last_win_day = min(dayo),
days_since_win = dayo - first(dayo) ) %>%
group_by(p_id) %>%
# replace the first value (or NA) in every group with a random value between 1 and 10
mutate(days_since_win = replace(days_since_win, row_number() == 1, sample(1:10,1) ),
days_since_game = dayo -lag(dayo),
days_since_game = replace_na(days_since_game,sample(1:10,1))) %>%
group_by(p_id, wins) %>%
mutate(games_since_win = games - first(games))
# convert back to a data.frame
my.df <- data.frame(tbl.df)
my.df[order(my.df$p_id,my.df$dayo),]
my.df
# truncated output
p_id elo dayo fin1_0 games wins last_win_day days_since_win days_since_game games_since_win
1 1877 1841 750 1 1 1 750 10 10 0
2 1877 1841 750 0 2 1 750 0 0 1
3 1877 1851 751 0 3 1 750 1 1 2
4 1877 1820 751 1 4 2 751 0 0 0
5 1877 1762 762 0 5 2 751 11 11 1
6 2073 1561 750 0 1 0 750 3 7 0
7 2073 1561 750 1 2 1 750 0 0 0
8 2073 1951 751 0 3 1 750 1 1 1
9 2073 1762 762 1 4 2 762 0 11 0
# wanted output
p_id elo dayo fin1_0 games wins last_win_day days_since_win days_since_game games_since_win
1 1877 1841 750 1 1 1 750 10 10 0
2 1877 1841 750 0 2 1 750 0 0 0
3 1877 1851 751 0 3 1 750 1 1 1
4 1877 1820 751 1 4 2 751 0 1 0
5 1877 1762 762 0 5 2 751 11 11 11
6 2073 1561 750 0 1 0 750 3 7 0
7 2073 1561 750 1 2 1 750 0 0 0
8 2073 1951 751 0 3 1 750 1 1 1
9 2073 1762 762 1 4 2 762 0 11 0