Как я могу изменить свой фрейм данных с дубликатами строк? - PullRequest
0 голосов
/ 06 февраля 2020

Я собрал данные (vector.sum) от участников (id) в разные моменты времени (time) в течение нескольких дней (day).

Вот мой упрощенный фрейм данных:

df <- structure(list(vector.sum = c(0, 51.122, 0, 133.034, 133.034, 
29.268, 29.268, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 32.312, 32.312, 
0, 0, 250.858, 250.858, 407.326, 407.326, 816.76, 816.76, 0, 
0, 118.018, 118.018, 189.916, 189.916, 0, 0, 0, 0, 240.86, 240.86, 
53.758, 0, 0, 0, 570.664, 0, 179.614, 179.614, 489.112, 489.112, 
1038.536, 1038.536, 1290.056, 1290.056, 103.99, 103.99, 0, 0, 
3866.208, 3866.208, 575.126, 575.126, 0, 0, 109.066, 109.066, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 199.142, 0
), id = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("AnBe1990_CPW1B51180302_2020-01-16_12_00_00_AM_160secDataTable", 
"AnIn1996_CPW1B51180336_2020-01-16_12_00_00_AM_1-260secDataTable", 
"BaGa1993_CPW1B17190050_2020-01-16_12_00_00_AM_160secDataTable", 
"HeHe1964_CPW1B17190061_2020-01-16_12_00_00_AM_160secDataTable", 
"ReMa1976_CPW1B17190131_2020-01-16_12_00_00_AM_160secDataTable", 
"SiBu1964_CPW1B51180350_2020-01-16_12_00_00_AM_160secDataTable", 
"SiRe1960_CPW1B17190141_2020-01-16_12_00_00_AM_160secDataTable", 
"UrBr1963_CPW1B17190115_2020-01-16_12_00_00_AM_160secDataTable", 
"UrEi1988_CPW1B51180357_2020-01-16_12_00_00_AM_160secDataTable", 
"VeZi1971_CPW1B51180306_2020-01-16_12_00_00_AM_160secDataTable", 
"ViGr1987_CPW1B17190044_2020-01-16_12_00_00_AM_160secDataTable"
), class = "factor"), time = structure(c(1L, 2L, 3L, 1L, 1L, 
2L, 2L, 3L, 3L, 1L, 1L, 2L, 2L, 3L, 3L, 1L, 1L, 2L, 2L, 3L, 3L, 
1L, 1L, 2L, 2L, 3L, 3L, 1L, 1L, 2L, 2L, 3L, 3L, 1L, 1L, 2L, 2L, 
3L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 1L, 2L, 2L, 3L, 3L, 1L, 1L, 
2L, 2L, 3L, 3L, 1L, 1L, 2L, 2L, 3L, 3L, 1L, 1L, 2L, 2L, 3L, 3L, 
1L, 1L, 2L, 2L, 3L, 3L, 1L, 1L, 2L, 2L, 3L, 3L, 1L, 2L, 3L), .Label = c("00:00:00", 
"00:05:00", "00:10:00", "00:15:00", "00:20:00", "00:25:00", "00:30:00", 
"00:35:00", "00:40:00", "00:45:00", "00:50:00", "00:55:00", "01:00:00", 
"01:05:00", "01:10:00", "01:15:00", "01:20:00", "01:25:00", "01:30:00", 
"01:35:00", "01:40:00", "01:45:00", "01:50:00", "01:55:00", "02:00:00", 
"02:05:00", "02:10:00", "02:15:00", "02:20:00", "02:25:00", "02:30:00", 
"02:35:00", "02:40:00", "02:45:00", "02:50:00", "02:55:00", "03:00:00", 
"03:05:00", "03:10:00", "03:15:00", "03:20:00", "03:25:00", "03:30:00", 
"03:35:00", "03:40:00", "03:45:00", "03:50:00", "03:55:00", "04:00:00", 
"04:05:00", "04:10:00", "04:15:00", "04:20:00", "04:25:00", "04:30:00", 
"04:35:00", "04:40:00", "04:45:00", "04:50:00", "04:55:00", "05:00:00", 
"05:05:00", "05:10:00", "05:15:00", "05:20:00", "05:25:00", "05:30:00", 
"05:35:00", "05:40:00", "05:45:00", "05:50:00", "05:55:00", "06:00:00", 
"06:05:00", "06:10:00", "06:15:00", "06:20:00", "06:25:00", "06:30:00", 
"06:35:00", "06:40:00", "06:45:00", "06:50:00", "06:55:00", "07:00:00", 
"07:05:00", "07:10:00", "07:15:00", "07:20:00", "07:25:00", "07:30:00", 
"07:35:00", "07:40:00", "07:45:00", "07:50:00", "07:55:00", "08:00:00", 
"08:05:00", "08:10:00", "08:15:00", "08:20:00", "08:25:00", "08:30:00", 
"08:35:00", "08:40:00", "08:45:00", "08:50:00", "08:55:00", "09:00:00", 
"09:05:00", "09:10:00", "09:15:00", "09:20:00", "09:25:00", "09:30:00", 
"09:35:00", "09:40:00", "09:45:00", "09:50:00", "09:55:00", "10:00:00", 
"10:05:00", "10:10:00", "10:15:00", "10:20:00", "10:25:00", "10:30:00", 
"10:35:00", "10:40:00", "10:45:00", "10:50:00", "10:55:00", "11:00:00", 
"11:05:00", "11:10:00", "11:15:00", "11:20:00", "11:25:00", "11:30:00", 
"11:35:00", "11:40:00", "11:45:00", "11:50:00", "11:55:00", "12:00:00", 
"12:05:00", "12:10:00", "12:15:00", "12:20:00", "12:25:00", "12:30:00", 
"12:35:00", "12:40:00", "12:45:00", "12:50:00", "12:55:00", "13:00:00", 
"13:05:00", "13:10:00", "13:15:00", "13:20:00", "13:25:00", "13:30:00", 
"13:35:00", "13:40:00", "13:45:00", "13:50:00", "13:55:00", "14:00:00", 
"14:05:00", "14:10:00", "14:15:00", "14:20:00", "14:25:00", "14:30:00", 
"14:35:00", "14:40:00", "14:45:00", "14:50:00", "14:55:00", "15:00:00", 
"15:05:00", "15:10:00", "15:15:00", "15:20:00", "15:25:00", "15:30:00", 
"15:35:00", "15:40:00", "15:45:00", "15:50:00", "15:55:00", "16:00:00", 
"16:05:00", "16:10:00", "16:15:00", "16:20:00", "16:25:00", "16:30:00", 
"16:35:00", "16:40:00", "16:45:00", "16:50:00", "16:55:00", "17:00:00", 
"17:05:00", "17:10:00", "17:15:00", "17:20:00", "17:25:00", "17:30:00", 
"17:35:00", "17:40:00", "17:45:00", "17:50:00", "17:55:00", "18:00:00", 
"18:05:00", "18:10:00", "18:15:00", "18:20:00", "18:25:00", "18:30:00", 
"18:35:00", "18:40:00", "18:45:00", "18:50:00", "18:55:00", "19:00:00", 
"19:05:00", "19:10:00", "19:15:00", "19:20:00", "19:25:00", "19:30:00", 
"19:35:00", "19:40:00", "19:45:00", "19:50:00", "19:55:00", "20:00:00", 
"20:05:00", "20:10:00", "20:15:00", "20:20:00", "20:25:00", "20:30:00", 
"20:35:00", "20:40:00", "20:45:00", "20:50:00", "20:55:00", "21:00:00", 
"21:05:00", "21:10:00", "21:15:00", "21:20:00", "21:25:00", "21:30:00", 
"21:35:00", "21:40:00", "21:45:00", "21:50:00", "21:55:00", "22:00:00", 
"22:05:00", "22:10:00", "22:15:00", "22:20:00", "22:25:00", "22:30:00", 
"22:35:00", "22:40:00", "22:45:00", "22:50:00", "22:55:00", "23:00:00", 
"23:05:00", "23:10:00", "23:15:00", "23:20:00", "23:25:00", "23:30:00", 
"23:35:00", "23:40:00", "23:45:00", "23:50:00", "23:55:00"), class = "factor"), 
    day = structure(c(1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 
    3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 
    5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 8L, 
    8L, 8L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 
    3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 
    6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 8L, 8L, 8L), .Label = c("16. Januar", 
    "17. Januar", "18. Januar", "19. Januar", "20. Januar", "21. Januar", 
    "22. Januar", "23. Januar"), class = "factor")), row.names = c(NA, 
-84L), class = "data.frame")

Теперь этот набор данных содержит повторяющиеся строки для каждого day (кроме первого и последнего day) для каждого id.

> head(df, n = 10)
   vector.sum                                                            id     time        day
1       0.000 AnBe1990_CPW1B51180302_2020-01-16_12_00_00_AM_160secDataTable 00:00:00 16. Januar
2      51.122 AnBe1990_CPW1B51180302_2020-01-16_12_00_00_AM_160secDataTable 00:05:00 16. Januar
3       0.000 AnBe1990_CPW1B51180302_2020-01-16_12_00_00_AM_160secDataTable 00:10:00 16. Januar
4     133.034 AnBe1990_CPW1B51180302_2020-01-16_12_00_00_AM_160secDataTable 00:00:00 17. Januar
5     133.034 AnBe1990_CPW1B51180302_2020-01-16_12_00_00_AM_160secDataTable 00:00:00 17. Januar
6      29.268 AnBe1990_CPW1B51180302_2020-01-16_12_00_00_AM_160secDataTable 00:05:00 17. Januar
7      29.268 AnBe1990_CPW1B51180302_2020-01-16_12_00_00_AM_160secDataTable 00:05:00 17. Januar
8       0.000 AnBe1990_CPW1B51180302_2020-01-16_12_00_00_AM_160secDataTable 00:10:00 17. Januar
9       0.000 AnBe1990_CPW1B51180302_2020-01-16_12_00_00_AM_160secDataTable 00:10:00 17. Januar
10      0.000 AnBe1990_CPW1B51180302_2020-01-16_12_00_00_AM_160secDataTable 00:00:00 18. Januar

Я хотел бы переставить данные для каждого id, чтобы дубликаты строк появлялись в том же порядке time, что и первый и последний day (т. Е. 00:00:00, за которым следует 00:05:00, затем 00:10:00 и повторите).

Я мог бы предположить, что arrange() может помочь мне здесь, но я не уверен, как указать функцию, чтобы она обеспечивала меня тем, что я ищу.

Есть идеи?

1 Ответ

0 голосов
/ 06 февраля 2020

Для общего набора данных вы можете использовать order напрямую

df[order(df$time), ]

Однако безопаснее / лучше преобразовать данные в POSIXct, а затем order

df[order(as.POSIXct(df$time, format = "%T")), ]
Добро пожаловать на сайт PullRequest, где вы можете задавать вопросы и получать ответы от других членов сообщества.
...