Подведение итогов Dataframe, содержащего числа и строки по широте - PullRequest
1 голос
/ 07 мая 2020

Я хотел бы иметь возможность рассчитать среднее значение «IntakeEnergy» по местоположению (широте) для следующего набора данных. Проблема, с которой я столкнулся, заключается в том, что набор данных содержит как число, так и символы, в результате чего функция не знает, как вычислить среднее значение строки. Я попытался сгруппировать данные по широте, а затем вычислить средние значения других столбцов следующим образом:

blah<-profit.fall.all %>% group_by(Lat) %>% summarise_all(funs(mean))

Однако это будет успешным, только если я отброшу все столбцы строк. Я гораздо лучше знаком с Excel, и я считаю, что потенциальным решением для этого было бы создание сводной таблицы, но я не уверен, что это лучший способ решить эту проблему.

Data<-dput(head(profit.fall.all,15))
structure(list(Id = structure(c(35L, 70L, 20L, 5L, 15L, 5L, 35L, 
34L, 36L, 47L, 33L, 50L, 69L, 66L, 20L), .Label = c("Barren Island Mud 1", 
"BH High 1", "BH High 2", "BH Low 1", "BH Low 2", "BH Low 3", 
"BH SAV 2", "BHH 1 C", "BHH 2 E", "BHL 1 E", "BHL 2", "BHL 3 (B)", 
"BHM 1", "BHM 1 C", "BI High 1", "BI Low 1", "BI Low 2C", "BI Low 3", 
"BI Marsh B", "BI Mud", "BIHI High B", "BIL1 (low) E", "BIL1 E", 
"BIL1E", "BIL2 E", "BIL2E", "BW Fresh 1", "BW Fresh 2", "BW High 1", 
"BW High 2", "BW High 5", "BW Low 3", "BW Money Stump", "BW Mud 1", 
"BW SAV 1", "BW SAV 2", "BWH 1 D", "BWH 2", "BWH 3", "BWH 5", 
"BWL 1", "BWL 2", "BWL 3", "BWM 1", "BWMS D", "BWS 1", "EN High 2", 
"EN High 4", "EN High 5", "EN Low 1", "EN Low 2", "EN Mud 2", 
"ENH3 A High", "ENH4 A High", "ENH5 A High", "ENL1 Low E", "ENM1 A Mud", 
"ENS1 SAV", "ENS2 SAV 2C", "ENS3 SAV 3E", "High 3C", "James Marsh", 
"MWP 27 High 1", "MWP 28 High 2", "MWP 29 Low 1", "MWP 30 Mud 1", 
"MWP 31 Low 2", "MWP 32 Mud 2", "MWP 33 Low 3", "MWP 34 Low 4", 
"MWP 35 Mud 3", "PWRC Fresh", "PWRC Fresh 1", "PWRC Fresh 1-4", 
"WP 27 HM-MARC", "WP 28 HM-MARC", "WP 30 IT MARE", "WP29 LM-MARC", 
"WP30 IT MARE"), class = "factor"), Season = structure(c(2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("", 
"Fall", "Spring", "Spring?"), class = "factor"), Refuge = structure(c(5L, 
7L, 2L, 3L, 2L, 3L, 5L, 5L, 5L, 6L, 5L, 6L, 7L, 7L, 2L), .Label = c("", 
"Barren Island", "Bishop's Head", "Bishops Head", "Blackwater", 
"Eastern Neck", "Martin", "PWRC"), class = "factor"), HType = structure(c(6L, 
4L, 5L, 4L, 3L, 4L, 6L, 5L, 6L, 3L, 4L, 4L, 4L, 5L, 5L), .Label = c("", 
"Fresh", "High", "Low", "Mud", "SAV"), class = "factor"), Long = c(-76.109109, 
-75.99733, -76.261634, -76.038959, -76.256617, -76.038959, -76.109109, 
-76.146408, -76.103627, -76.225188, -76.23491, -76.239864, -75.99354, 
-76.01407, -76.261634), Lat = c(38.441089, 37.99369, 38.336058, 
38.224469, 38.326234, 38.224469, 38.441089, 38.417947, 38.403511, 
39.04065, 38.43141, 39.026771, 37.98833, 38.01108, 38.336058), 
    Prey.Group = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 3L), .Label = c("Melampus", "Ruppia", 
    "Scirpus", "Zannichellia"), class = "factor"), IntakeEnergy = c(112.577988769079, 
    29.6957686910562, 22.825852053573, 64.2669620925843, 1182.80971128049, 
    454.559045812661, 893.487901876808, 483.341619235618, 16853.8824849192, 
    38.6168499963705, 30.540123199744, 557.798944392019, 6326.03316402962, 
    57.7990761383585, 11.4733747438)), row.names = c(1L, 5L, 
6L, 7L, 8L, 15L, 22L, 23L, 24L, 25L, 33L, 35L, 42L, 43L, 45L), class = "data.frame")
> View(Data)

Ответы [ 2 ]

1 голос
/ 07 мая 2020

Мы также можем использовать summarise_at

library(dplyr)
profit.fall.all %>%
    group_by(Lat) %>%
    summarise_at(vars(Long,IntakeEnergy), mean, na.rm = TRUE)
1 голос
/ 07 мая 2020

Итоговые два варианта. Надеюсь, они достаточно близки, чтобы вы смогли изменить их на свой вкус. Мне кажется, я дал вам четыре правильных ответа, и вы не можете полностью описать то, что хотите.


Data %>% 
  group_by(Lat) %>% 
  mutate(avgIntakeEnergybyLat = mean(IntakeEnergy, na.rm = TRUE)) %>% 
  arrange(Id) %>%
  select(-Prey.Group) 
#> # A tibble: 15 x 8
#> # Groups:   Lat [12]
#>    Id         Season Refuge    HType  Long   Lat IntakeEnergy avgIntakeEnergyby…
#>    <fct>      <fct>  <fct>     <fct> <dbl> <dbl>        <dbl>              <dbl>
#>  1 BH Low 2   Fall   Bishop's… Low   -76.0  38.2         64.3              259. 
#>  2 BH Low 2   Fall   Bishop's… Low   -76.0  38.2        455.               259. 
#>  3 BI High 1  Fall   Barren I… High  -76.3  38.3       1183.              1183. 
#>  4 BI Mud     Fall   Barren I… Mud   -76.3  38.3         22.8               17.1
#>  5 BI Mud     Fall   Barren I… Mud   -76.3  38.3         11.5               17.1
#>  6 BW Money … Fall   Blackwat… Low   -76.2  38.4         30.5               30.5
#>  7 BW Mud 1   Fall   Blackwat… Mud   -76.1  38.4        483.               483. 
#>  8 BW SAV 1   Fall   Blackwat… SAV   -76.1  38.4        113.               503. 
#>  9 BW SAV 1   Fall   Blackwat… SAV   -76.1  38.4        893.               503. 
#> 10 BW SAV 2   Fall   Blackwat… SAV   -76.1  38.4      16854.             16854. 
#> 11 EN High 2  Fall   Eastern … High  -76.2  39.0         38.6               38.6
#> 12 EN Low 1   Fall   Eastern … Low   -76.2  39.0        558.               558. 
#> 13 MWP 30 Mu… Fall   Martin    Mud   -76.0  38.0         57.8               57.8
#> 14 MWP 33 Lo… Fall   Martin    Low   -76.0  38.0       6326.              6326. 
#> 15 MWP 34 Lo… Fall   Martin    Low   -76.0  38.0         29.7               29.7

Data %>% 
  group_by(Lat) %>% 
  mutate(avgIntakeEnergybyLat = mean(IntakeEnergy, na.rm = TRUE)) %>% 
  arrange(Id) %>%
  select(-Prey.Group, -IntakeEnergy) %>%
  distinct(Lat, .keep_all = TRUE)
#> # A tibble: 12 x 7
#> # Groups:   Lat [12]
#>    Id             Season Refuge        HType  Long   Lat avgIntakeEnergybyLat
#>    <fct>          <fct>  <fct>         <fct> <dbl> <dbl>                <dbl>
#>  1 BH Low 2       Fall   Bishop's Head Low   -76.0  38.2                259. 
#>  2 BI High 1      Fall   Barren Island High  -76.3  38.3               1183. 
#>  3 BI Mud         Fall   Barren Island Mud   -76.3  38.3                 17.1
#>  4 BW Money Stump Fall   Blackwater    Low   -76.2  38.4                 30.5
#>  5 BW Mud 1       Fall   Blackwater    Mud   -76.1  38.4                483. 
#>  6 BW SAV 1       Fall   Blackwater    SAV   -76.1  38.4                503. 
#>  7 BW SAV 2       Fall   Blackwater    SAV   -76.1  38.4              16854. 
#>  8 EN High 2      Fall   Eastern Neck  High  -76.2  39.0                 38.6
#>  9 EN Low 1       Fall   Eastern Neck  Low   -76.2  39.0                558. 
#> 10 MWP 30 Mud 1   Fall   Martin        Mud   -76.0  38.0                 57.8
#> 11 MWP 33 Low 3   Fall   Martin        Low   -76.0  38.0               6326. 
#> 12 MWP 34 Low 4   Fall   Martin        Low   -76.0  38.0                 29.7

Оказывается, OP хочет добавить столбец и сохранить другие столбцы и строки ...

library(dplyr)

Data %>% 
  group_by(Lat) %>% 
  mutate(avgbyLat = mean(IntakeEnergy, na.rm = TRUE)) %>% 
  arrange(Id)
#> # A tibble: 15 x 9
#> # Groups:   Lat [12]
#>    Id        Season Refuge    HType  Long   Lat Prey.Group IntakeEnergy avgbyLat
#>    <fct>     <fct>  <fct>     <fct> <dbl> <dbl> <fct>             <dbl>    <dbl>
#>  1 BH Low 2  Fall   Bishop's… Low   -76.0  38.2 Melampus           64.3    259. 
#>  2 BH Low 2  Fall   Bishop's… Low   -76.0  38.2 Melampus          455.     259. 
#>  3 BI High 1 Fall   Barren I… High  -76.3  38.3 Melampus         1183.    1183. 
#>  4 BI Mud    Fall   Barren I… Mud   -76.3  38.3 Melampus           22.8     17.1
#>  5 BI Mud    Fall   Barren I… Mud   -76.3  38.3 Scirpus            11.5     17.1
#>  6 BW Money… Fall   Blackwat… Low   -76.2  38.4 Ruppia             30.5     30.5
#>  7 BW Mud 1  Fall   Blackwat… Mud   -76.1  38.4 Ruppia            483.     483. 
#>  8 BW SAV 1  Fall   Blackwat… SAV   -76.1  38.4 Melampus          113.     503. 
#>  9 BW SAV 1  Fall   Blackwat… SAV   -76.1  38.4 Ruppia            893.     503. 
#> 10 BW SAV 2  Fall   Blackwat… SAV   -76.1  38.4 Ruppia          16854.   16854. 
#> 11 EN High 2 Fall   Eastern … High  -76.2  39.0 Ruppia             38.6     38.6
#> 12 EN Low 1  Fall   Eastern … Low   -76.2  39.0 Ruppia            558.     558. 
#> 13 MWP 30 M… Fall   Martin    Mud   -76.0  38.0 Ruppia             57.8     57.8
#> 14 MWP 33 L… Fall   Martin    Low   -76.0  38.0 Ruppia           6326.    6326. 
#> 15 MWP 34 L… Fall   Martin    Low   -76.0  38.0 Melampus           29.7     29.7

в отличие от предыдущего ответа.

Попробуйте summarise_if(is.numeric, mean, na.rm = TRUE) [конечно, вы хотите только Lat]

library(dplyr)

Data %>% group_by(Lat) %>% summarise_if(is.numeric, mean, na.rm = TRUE)
#> # A tibble: 12 x 3
#>      Lat  Long IntakeEnergy
#>    <dbl> <dbl>        <dbl>
#>  1  38.0 -76.0       6326. 
#>  2  38.0 -76.0         29.7
#>  3  38.0 -76.0         57.8
#>  4  38.2 -76.0        259. 
#>  5  38.3 -76.3       1183. 
#>  6  38.3 -76.3         17.1
#>  7  38.4 -76.1      16854. 
#>  8  38.4 -76.1        483. 
#>  9  38.4 -76.2         30.5
#> 10  38.4 -76.1        503. 
#> 11  39.0 -76.2        558. 
#> 12  39.0 -76.2         38.6

где данные ...

Data <- structure(list(Id = structure(c(35L, 70L, 20L, 5L, 15L, 5L, 35L,
34L, 36L, 47L, 33L, 50L, 69L, 66L, 20L), .Label = c("Barren Island Mud 1",
"BH High 1", "BH High 2", "BH Low 1", "BH Low 2", "BH Low 3",
"BH SAV 2", "BHH 1 C", "BHH 2 E", "BHL 1 E", "BHL 2", "BHL 3 (B)",
"BHM 1", "BHM 1 C", "BI High 1", "BI Low 1", "BI Low 2C", "BI Low 3",
"BI Marsh B", "BI Mud", "BIHI High B", "BIL1 (low) E", "BIL1 E",
"BIL1E", "BIL2 E", "BIL2E", "BW Fresh 1", "BW Fresh 2", "BW High 1",
"BW High 2", "BW High 5", "BW Low 3", "BW Money Stump", "BW Mud 1",
"BW SAV 1", "BW SAV 2", "BWH 1 D", "BWH 2", "BWH 3", "BWH 5",
"BWL 1", "BWL 2", "BWL 3", "BWM 1", "BWMS D", "BWS 1", "EN High 2",
"EN High 4", "EN High 5", "EN Low 1", "EN Low 2", "EN Mud 2",
"ENH3 A High", "ENH4 A High", "ENH5 A High", "ENL1 Low E", "ENM1 A Mud",
"ENS1 SAV", "ENS2 SAV 2C", "ENS3 SAV 3E", "High 3C", "James Marsh",
"MWP 27 High 1", "MWP 28 High 2", "MWP 29 Low 1", "MWP 30 Mud 1",
"MWP 31 Low 2", "MWP 32 Mud 2", "MWP 33 Low 3", "MWP 34 Low 4",
"MWP 35 Mud 3", "PWRC Fresh", "PWRC Fresh 1", "PWRC Fresh 1-4",
"WP 27 HM-MARC", "WP 28 HM-MARC", "WP 30 IT MARE", "WP29 LM-MARC",
"WP30 IT MARE"), class = "factor"), Season = structure(c(2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("",
"Fall", "Spring", "Spring?"), class = "factor"), Refuge = structure(c(5L,
7L, 2L, 3L, 2L, 3L, 5L, 5L, 5L, 6L, 5L, 6L, 7L, 7L, 2L), .Label = c("",
"Barren Island", "Bishop's Head", "Bishops Head", "Blackwater",
"Eastern Neck", "Martin", "PWRC"), class = "factor"), HType = structure(c(6L,
4L, 5L, 4L, 3L, 4L, 6L, 5L, 6L, 3L, 4L, 4L, 4L, 5L, 5L), .Label = c("",
"Fresh", "High", "Low", "Mud", "SAV"), class = "factor"), Long = c(-76.109109,
-75.99733, -76.261634, -76.038959, -76.256617, -76.038959, -76.109109,
-76.146408, -76.103627, -76.225188, -76.23491, -76.239864, -75.99354,
-76.01407, -76.261634), Lat = c(38.441089, 37.99369, 38.336058,
38.224469, 38.326234, 38.224469, 38.441089, 38.417947, 38.403511,
39.04065, 38.43141, 39.026771, 37.98833, 38.01108, 38.336058),
Prey.Group = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 3L), .Label = c("Melampus", "Ruppia",
"Scirpus", "Zannichellia"), class = "factor"), IntakeEnergy = c(112.577988769079,
29.6957686910562, 22.825852053573, 64.2669620925843, 1182.80971128049,
454.559045812661, 893.487901876808, 483.341619235618, 16853.8824849192,
38.6168499963705, 30.540123199744, 557.798944392019, 6326.03316402962,
57.7990761383585, 11.4733747438)), row.names = c(1L, 5L,
6L, 7L, 8L, 15L, 22L, 23L, 24L, 25L, 33L, 35L, 42L, 43L, 45L), class = "data.frame")

glimpse(Data)
#> Rows: 15
#> Columns: 8
#> $ Id           <fct> BW SAV 1, MWP 34 Low 4, BI Mud, BH Low 2, BI High 1, BH …
#> $ Season       <fct> Fall, Fall, Fall, Fall, Fall, Fall, Fall, Fall, Fall, Fa…
#> $ Refuge       <fct> Blackwater, Martin, Barren Island, Bishop's Head, Barren…
#> $ HType        <fct> SAV, Low, Mud, Low, High, Low, SAV, Mud, SAV, High, Low,…
#> $ Long         <dbl> -76.10911, -75.99733, -76.26163, -76.03896, -76.25662, -…
#> $ Lat          <dbl> 38.44109, 37.99369, 38.33606, 38.22447, 38.32623, 38.224…
#> $ Prey.Group   <fct> Melampus, Melampus, Melampus, Melampus, Melampus, Melamp…
#> $ IntakeEnergy <dbl> 112.57799, 29.69577, 22.82585, 64.26696, 1182.80971, 454…

Создано в 2020 г. -05-07 с помощью пакета REPEX (v0.3.0)

...