Мне нравится использовать пакеты dplyr
и tidyverse
для такого рода вопросов об обобщении. Больше здесь:
https://dplyr.tidyverse.org/
library(tidyverse)
# First I'd like to reshape into long (aka "tidy") format
df_tidy <- df %>%
mutate(obs_num = row_number()) %>% # To keep track of orig row
gather(sp, count, sp1:sp4)
# First question
df_tidy %>%
# This gives total counts for all recorded combos of site and species
count(site, sp, wt = count) %>%
filter(n > 0) %>%
count(site) # Count how many rows (ie species) for each site
## A tibble: 2 x 2
# site nn
# <chr> <int>
#1 SiteA 2
#2 SiteB 4
# Second question
df_tidy %>%
# Count how many observations had counts > 0 for each site
count(site, obs_num, wt = count > 0) %>%
group_by(site) %>%
summarize(avg_taxa = mean(n))
## A tibble: 2 x 2
# site avg_taxa
# <chr> <dbl>
#1 SiteA 1
#2 SiteB 2