Исходя из примера обновленных данных OP, нам нужно сгруппировать только по 'STNAME'
library(dplyr)
set2c %>%
group_by(STNAME) %>%
summarise(totalAll = sum(totalAll), avppop = mean(avgpop))
# A tibble: 11 x 3
# STNAME totalAll avppop
# <chr> <int> <dbl>
# 1 Colorado 504428 504428
# 2 Idaho 930983 310328.
# 3 Illinois 66094 66094
# 4 Indiana 35422 35422
# 5 Iowa 14116 7058
# 6 Kentucky 38509 19254.
# 7 Louisiana 124704 62352
# 8 Missouri 50645 25322.
# 9 Oklahoma 44063 22032.
#10 South Carolina 49101 24550.
#11 Virginia 64978 32489
Если вы хотите выбрать определенные столбцы при создании новых столбцов, используйте transmute
вместо summarise
set2c %>%
group_by(STNAME) %>%
transmute(totalAll, totalAllSum = sum(totalAll), avppop = mean(avgpop))
# A tibble: 20 x 4
# Groups: STNAME [11]
# STNAME totalAll totalAllSum avppop
# <chr> <int> <int> <dbl>
# 1 South Carolina 24560 49101 24550.
# 2 South Carolina 24541 49101 24550.
# 3 Louisiana 62514 124704 62352
# 4 Louisiana 62190 124704 62352
# 5 Virginia 32566 64978 32489
# 6 Virginia 32412 64978 32489
# 7 Idaho 456885 930983 310328.
# 8 Idaho 469966 930983 310328.
# 9 Iowa 7053 14116 7058
#10 Kentucky 19294 38509 19254.
#11 Missouri 25306 50645 25322.
#12 Oklahoma 21981 44063 22032.
#13 Iowa 7063 14116 7058
#14 Kentucky 19215 38509 19254.
#15 Missouri 25339 50645 25322.
#16 Oklahoma 22082 44063 22032.
#17 Colorado 504428 504428 504428
#18 Idaho 4132 930983 310328.
#19 Illinois 66094 66094 66094
#20 Indiana 35422 35422 35422
данные
set2c <- structure(list(CTYNAME = c("Abbeville County", "Abbeville County",
"Acadia Parish", "Acadia Parish", "Accomack County", "Accomack County",
"Ada County", "Ada County", "Adair County", "Adair County", "Adair County",
"Adair County", "Adair County", "Adair County", "Adair County",
"Adair County", "Adams County", "Adams County", "Adams County",
"Adams County"), YEAR = c(10L, 11L, 10L, 11L, 10L, 11L, 10L,
11L, 10L, 10L, 10L, 10L, 11L, 11L, 11L, 11L, 10L, 10L, 10L, 10L
), STNAME = c("South Carolina", "South Carolina", "Louisiana",
"Louisiana", "Virginia", "Virginia", "Idaho", "Idaho", "Iowa",
"Kentucky", "Missouri", "Oklahoma", "Iowa", "Kentucky", "Missouri",
"Oklahoma", "Colorado", "Idaho", "Illinois", "Indiana"), totalAll = c(24560L,
24541L, 62514L, 62190L, 32566L, 32412L, 456885L, 469966L, 7053L,
19294L, 25306L, 21981L, 7063L, 19215L, 25339L, 22082L, 504428L,
4132L, 66094L, 35422L), totalMale = c(11895L, 11868L, 30405L,
30342L, 15871L, 15817L, 228715L, 235266L, 3503L, 9578L, 12183L,
10981L, 3509L, 9508L, 12194L, 11015L, 254651L, 2129L, 32521L,
17683L), totalFemale = c(12665L, 12673L, 32109L, 31848L, 16695L,
16595L, 228170L, 234700L, 3550L, 9716L, 13123L, 11000L, 3554L,
9707L, 13145L, 11067L, 249777L, 2003L, 33573L, 17739L), avgpop = c(24560L,
24541L, 62514L, 62190L, 32566L, 32412L, 456885L, 469966L, 7053L,
19294L, 25306L, 21981L, 7063L, 19215L, 25339L, 22082L, 504428L,
4132L, 66094L, 35422L)), class = "data.frame", row.names = c("1",
"2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13",
"14", "15", "16", "17", "18", "19", "20"))