Нужна таблица с агрегированными данными: агрегаты и кросс-таблицы - PullRequest
2 голосов
/ 09 июля 2020
• 1000 Мне нужна таблица с одной строкой для каждого животного и годами в столбцах, и 1, если особь наблюдалась в этом году, или 0, если это не наблюдалось.

введите описание изображения здесь

Это должно быть легко, но мои навыки не так хороши. Любая помощь приветствуется.

Пример данных:

> dput(df)
structure(list(no = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 
13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 
29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 
45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 
61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 
77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 
93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 
107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 
120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 
133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 
146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 
159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 
172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183), 
who = c("KA-1", "KA-1", "KA-1", "KA-1", "KA-1", "KA-1", "KA-1", 
"KA-1", "KA-1", "KA-1", "KA-1", "KA-1", "KA-1", "KA-1", "KA-1", 
"KA-1", "KA-1", "KA-1", "KA-1", "KA-1", "KA-1", "KA-1", "KA-1", 
"KA-1", "KA-1", "KA-1", "KA-1", "KA-1", "KA-1", "KA-1", "KA-1", 
"KA-1", "KA-1", "KA-1", "KA-1", "KA-1", "KA-1", "KA-1", "KA-1", 
"KA-1", "KA-1", "KA-1", "KA-1", "KA-1", "KA-1", "KA-1", "KA-1", 
"KA-1", "KA-1", "KA-1", "KA-1", "KA-1", "KA-1", "KA-1", "KA-1", 
"KA-1", "KA-1", "KA-1", "KA-1", "KA-1", "KA-1", "KA-1", "KA-1", 
"KA-1", "KA-1", "KA-1", "KA-1", "KA-1", "KA-1", "KA-1", "KA-1", 
"KA-1", "KA-1", "KA-1", "KA-1", "KA-1", "KA-1", "KA-1", "KA-1", 
"KA-1", "KA-1", "KA-1", "KA-1", "KA-1", "KA-1", "KA-1", "KA-1", 
"KA-1", "KA-1", "KA-1", "KA-1", "KA-1", "KA-1", "KA-1", "KA-1", 
"KA-1", "KA-1", "KA-1", "KA-1", "KA-1", "KA-1", "KA-1", "KA-1", 
"KA-1", "KA-1", "KA-1", "KA-1", "KA-1", "KA-1", "KA-1", "KA-1", 
"KA-1", "KA-1", "KA-1", "KA-1", "KA-1", "KA-1", "KA-1", "KA-1", 
"KA-1", "KA-1", "KA-1", "KA-1", "KA-1", "KA-1", "KA-1", "KA-1", 
"KA-1", "KA-1", "KA-1", "KA-1", "KA-1", "KA-1", "KA-1", "KA-1", 
"KA-1", "KA-1", "KA-1", "KA-1", "KA-1", "KA-1", "KA-1", "KA-1", 
"KA-1", "KA-1", "KA-1", "KA-1", "KA-1", "KA-1", "KA-1", "KA-1", 
"KA-1", "KA-1", "KA-1", "KA-1", "KA-1", "KA-1", "KA-1", "KA-2", 
"KA-2", "KA-2", "KA-2", "KA-2", "KA-2", "KA-2", "KA-2", "KA-2", 
"KA-2", "KA-2", "KA-2", "KA-2", "KA-2", "KA-2", "KA-2", "KA-2", 
"KA-2", "KA-2", "KA-2", "KA-2", "KA-2", "KA-2", "KA-2", "KA-2"
), time = c(2411870000, 2501880000, 2501880000, 2501880000, 
2501880000, 2901880000, 2901880000, 2901880000, 2901880000, 
2901880000, 3101900000, 3101900000, 3101900000, 202900000, 
202900000, 202900000, 202900000, 202900000, 702900000, 702900000, 
702900000, 702900000, 702900000, 802900000, 802900000, 802900000, 
802900000, 802900000, 802900000, 802900000, 802900000, 802900000, 
802900000, 802900000, 802900000, 802900000, 802900000, 1002900000, 
1002900000, 1002900000, 1002900000, 1002900000, 1002900000, 
1002900000, 1002900000, 1502900000, 1502900000, 1502900000, 
1502900000, 1502900000, 1502900000, 1502900000, 1502900000, 
1502900000, 1502900000, 1602900000, 1602900000, 1602900000, 
1602900000, 1602900000, 1602900000, 1602900000, 1602900000, 
1602900000, 1602900000, 1602900000, 1602900000, 1602900000, 
1602900000, 1602900000, 1602900000, 2711900000, 2711900000, 
2810920950, 2810920953, 2810920953, 2810921015, 2810921148, 
2810921158, 2810921158, 2810921159, 2810921216, 2810921400, 
3010921449, 411920922, 411921008, 411921052, 411920931, 411920931, 
411920937, 411920959, 411921009, 411921039, 411921051, 411921054, 
411921054, 611921128, 611921205, 611921335, 2011921332, 2010930959, 
2010931149, 2010931205, 2010930000, 2010930000, 2610931219, 
2610931230, 2610931244, 2610931249, 2610931304, 2610930000, 
2610931341, 2610931423, 2610931435, 2610930000, 211931409, 
211931409, 211931432, 211931439, 211931442, 211931459, 211931504, 
211931522, 711931050, 711931050, 711931100, 711931100, 711931117, 
711931118, 711931335, 711931425, 711931425, 711931429, 711931440, 
1111930000, 1111930000, 1111931343, 1111931343, 1111930000, 
1111930000, 1211930000, 1810941232, 1810941233, 1810941240, 
1810941244, 1810941248, 1810941249, 1810941419, 1810941426, 
1810941434, 1810941435, 1810931442, 1810931523, 1810931530, 
1810931530, 1810941546, 2910940959, 1203910000, 2501880000, 
2501880000, 2501880000, 2501880000, 3101900000, 202900000, 
702900000, 702900000, 802900000, 802900000, 802900000, 802900000, 
802900000, 1002900000, 1002900000, 1102900000, 1102900000, 
1502900000, 1602900000, 1602900000, 1602900000, 1602900000, 
1602900000, 1602900000, 1203910000), film = c("999", "10", 
"10", "14", "14", "18", "18", "18", "18", "18", "22", "23", 
"23", "26", "26", "26", "26", "26", "27", "27", "28", "28", 
"28", "29", "30", "30", "30", "30", "31", "31", "31", "31", 
"32", "32", "32", "33", "33", "36", "36", "36", "37", "37", 
"37", "37", "37", "40", "41", "41", "41", "41", "41", "41", 
"42", "43", "43", "44", "44", "44", "44", "44", "44", "44", 
"46", "46", "46", "46", "47", "47", "48", "49", "49", "66", 
"66", "231", "231", "231", "231", "231", "231", "231", "231", 
"232", "232", "233", "237", "237", "238", "369", "369", "369", 
"370", "370", "371", "371", "371", "371", "243", "243", "243", 
"286", "593", "594", "594", "595", "595", "606", "606", "606", 
"606", "607", "607", "607", "608", "608", "608", "618", "618", 
"618", "618", "619", "619", "619", "620", "628", "628", "628", 
"628", "628", "628", "628", "629", "629", "629", "629", "636", 
"636", "636", "636", "636", "636", "637", "656", "656", "656", 
"656", "656", "656", "658", "658", "658", "658", "659", "659", 
"659", "659", "664", "681", "999", "14", "14", "14", "17", 
"23", "26", "27", "27", "30", "30", "32", "33", "33", "36", 
"37", "38", "38", "42", "46", "46", "47", "47", "47", "47", 
"108"), frame = c(0, 5, 19, 12, 14, 11, 17, 21, 22, 23, 23, 
7, 8, 1, 27, 29, 30, 31, 8, 32, 7, 9, 13, 5, 3, 18, 21, 29, 
6, 30, 33, 35, 29, 30, 34, 21, 24, 6, 7, 10, 2, 3, 16, 19, 
20, 31, 1, 2, 8, 13, 14, 16, 5, 5, 22, 0, 1, 2, 3, 20, 25, 
31, 8, 9, 17, 24, 5, 25, 1, 6, 27, 9, 10, 3, 4, 6, 17, 27, 
28, 31, 34, 1, 3, 1, 17, 29, 12, 21, 24, 32, 22, 29, 6, 15, 
23, 25, 4, 7, 34, 17, 5, 9, 21, 20, 21, 0, 7, 13, 31, 17, 
20, 35, 3, 9, 13, 5, 6, 22, 26, 0, 19, 26, 18, 1, 3, 5, 7, 
11, 12, 29, 23, 25, 27, 30, 1, 17, 18, 19, 20, 21, 14, 10, 
14, 19, 21, 24, 26, 20, 23, 26, 28, 3, 16, 22, 23, 9, 20, 
3, 15, 25, 30, 11, 2, 15, 2, 36, 5, 29, 31, 27, 28, 27, 6, 
1, 3, 5, 12, 24, 13, 23, 24, 28, 11), year = c(1987, 1988, 
1988, 1988, 1988, 1988, 1988, 1988, 1988, 1988, 1990, 1990, 
1990, 1990, 1990, 1990, 1990, 1990, 1990, 1990, 1990, 1990, 
1990, 1990, 1990, 1990, 1990, 1990, 1990, 1990, 1990, 1990, 
1990, 1990, 1990, 1990, 1990, 1990, 1990, 1990, 1990, 1990, 
1990, 1990, 1990, 1990, 1990, 1990, 1990, 1990, 1990, 1990, 
1990, 1990, 1990, 1990, 1990, 1990, 1990, 1990, 1990, 1990, 
1990, 1990, 1990, 1990, 1990, 1990, 1990, 1990, 1990, 1990, 
1990, 1992, 1992, 1992, 1992, 1992, 1992, 1992, 1992, 1992, 
1992, 1992, 1992, 1992, 1992, 1992, 1992, 1992, 1992, 1992, 
1992, 1992, 1992, 1992, 1992, 1992, 1992, 1992, 1993, 1993, 
1993, 1993, 1993, 1993, 1993, 1993, 1993, 1993, 1993, 1993, 
1993, 1993, 1993, 1993, 1993, 1993, 1993, 1993, 1993, 1993, 
1993, 1993, 1993, 1993, 1993, 1993, 1993, 1993, 1993, 1993, 
1993, 1993, 1993, 1993, 1993, 1993, 1993, 1993, 1993, 1994, 
1994, 1994, 1994, 1994, 1994, 1994, 1994, 1994, 1994, 1993, 
1993, 1993, 1993, 1994, 1994, 1991, 1988, 1988, 1988, 1988, 
1990, 1990, 1990, 1990, 1990, 1990, 1990, 1990, 1990, 1990, 
1990, 1990, 1990, 1990, 1990, 1990, 1990, 1990, 1990, 1990, 
1991), tid = structure(c(564710400, 570067200, 570067200, 
570067200, 570067200, 570412800, 570412800, 570412800, 570412800, 
570412800, 633744000, 633744000, 633744000, 633916800, 633916800, 
633916800, 633916800, 633916800, 634348800, 634348800, 634348800, 
634348800, 634348800, 634435200, 634435200, 634435200, 634435200, 
634435200, 634435200, 634435200, 634435200, 634435200, 634435200, 
634435200, 634435200, 634435200, 634435200, 634608000, 634608000, 
634608000, 634608000, 634608000, 634608000, 634608000, 634608000, 
635040000, 635040000, 635040000, 635040000, 635040000, 635040000, 
635040000, 635040000, 635040000, 635040000, 635126400, 635126400, 
635126400, 635126400, 635126400, 635126400, 635126400, 635126400, 
635126400, 635126400, 635126400, 635126400, 635126400, 635126400, 
635126400, 635126400, 659664000, 659664000, 720265800, 720265980, 
720265980, 720267300, 720272880, 720273480, 720273480, 720273540, 
720274560, 720280800, 720456540, 720868920, 720871680, 720874320, 
720869460, 720869460, 720869820, 720871140, 720871740, 720873540, 
720874260, 720874440, 720874440, 721049280, 721051500, 721056900, 
722266320, 751111140, 751117740, 751118700, 751075200, 751075200, 
751637940, 751638600, 751639440, 751639740, 751640640, 751593600, 
751642860, 751645380, 751646100, 751593600, 752249340, 752249340, 
752250720, 752251140, 752251320, 752252340, 752252640, 752253720, 
752669400, 752669400, 752670000, 752670000, 752671020, 752671080, 
752679300, 752682300, 752682300, 752682540, 752683200, 752976000, 
752976000, 753025380, 753025380, 752976000, 752976000, 753062400, 
782483520, 782483580, 782484000, 782484240, 782484480, 782484540, 
782489940, 782490360, 782490840, 782490900, 750955320, 750957780, 
750958200, 750958200, 782495160, 783424740, 668736000, 570067200, 
570067200, 570067200, 570067200, 633744000, 633916800, 634348800, 
634348800, 634435200, 634435200, 634435200, 634435200, 634435200, 
634608000, 634608000, 634694400, 634694400, 635040000, 635126400, 
635126400, 635126400, 635126400, 635126400, 635126400, 668736000
), class = c("POSIXct", "POSIXt"), tzone = "UTC"), month = c(11L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 11L, 11L, 10L, 10L, 
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 11L, 11L, 11L, 
11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 
11L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 
10L, 10L, 10L, 10L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 
11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 
11L, 11L, 11L, 11L, 11L, 11L, 10L, 10L, 10L, 10L, 10L, 10L, 
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 3L, 1L, 
1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L), Year = c(87L, 88L, 88L, 
88L, 88L, 88L, 88L, 88L, 88L, 88L, 90L, 90L, 90L, 90L, 90L, 
90L, 90L, 90L, 90L, 90L, 90L, 90L, 90L, 90L, 90L, 90L, 90L, 
90L, 90L, 90L, 90L, 90L, 90L, 90L, 90L, 90L, 90L, 90L, 90L, 
90L, 90L, 90L, 90L, 90L, 90L, 90L, 90L, 90L, 90L, 90L, 90L, 
90L, 90L, 90L, 90L, 90L, 90L, 90L, 90L, 90L, 90L, 90L, 90L, 
90L, 90L, 90L, 90L, 90L, 90L, 90L, 90L, 90L, 90L, 92L, 92L, 
92L, 92L, 92L, 92L, 92L, 92L, 92L, 92L, 92L, 92L, 92L, 92L, 
92L, 92L, 92L, 92L, 92L, 92L, 92L, 92L, 92L, 92L, 92L, 92L, 
92L, 93L, 93L, 93L, 93L, 93L, 93L, 93L, 93L, 93L, 93L, 93L, 
93L, 93L, 93L, 93L, 93L, 93L, 93L, 93L, 93L, 93L, 93L, 93L, 
93L, 93L, 93L, 93L, 93L, 93L, 93L, 93L, 93L, 93L, 93L, 93L, 
93L, 93L, 93L, 93L, 93L, 93L, 94L, 94L, 94L, 94L, 94L, 94L, 
94L, 94L, 94L, 94L, 93L, 93L, 93L, 93L, 94L, 94L, 91L, 88L, 
88L, 88L, 88L, 90L, 90L, 90L, 90L, 90L, 90L, 90L, 90L, 90L, 
90L, 90L, 90L, 90L, 90L, 90L, 90L, 90L, 90L, 90L, 90L, 91L
)), row.names = c(NA, -183L), class = "data.frame")

Ответы [ 2 ]

2 голосов
/ 09 июля 2020

Мы могли бы сделать count из 'who', 'year' и изменить форму с 'long' на 'wide' с помощью pivot_wider

library(dplyr)
library(tidyr)
df %>% 
    count(who, year) %>% 
    mutate(n = 1) %>%
    pivot_wider(names_from = year, values_from = n, values_fill = list(n = 0))

Или получите строки distinct, а затем выполните pivot_wider

df %>%
   distinct(who, year) %>%
   pivot_wider(names_from = year, values_from = year,
        values_fn = length, values_fill = list(year = 0))
1 голос
/ 09 июля 2020

В базовом R вы можете использовать table:

+(table(df$who, df$year) > 0)

#       1987 1988 1990 1991 1992 1993 1994
#  KA-1    1    1    1    1    1    1    1
#  KA-2    0    1    1    1    0    0    0

+ в начале меняет TRUE / FALSE значения на 1/0 соответственно.

...