Функция R / l oop вычисляет, но возвращает нежелательные результаты, подозрительные проблемы синтаксиса / класса / поднабора - PullRequest
1 голос
/ 25 марта 2020

Я знаю, что есть более простые способы выполнения sh длительного вычисления среднего по коэффициенту (например, tapply / table), но мне не терпится изучить циклы и синтаксис, а также проблемы, связанные с классом / подмножествами / синтаксисом


# b) use a loop and conditionals to sum and then divide (long way)
       # test calc of total length by species
       sum(iris$Petal.Length[iris$Species == "setosa"])
     sum(iris$Petal.Length[iris$Species == "versicolor"])

       # test calc of count rows of a species
       nrow(subset(iris, Species == "setosa"))
     nrow(subset(iris, Species == "versicolor" ))

       # test calc of mean (long way)
       test1 <- sum(iris$Petal.Length[iris$Species == "setosa"]) / nrow(subset(iris, Species ==      "setosa"))
       test1

       test2 <- sum(iris$Petal.Length[iris$Species == "versicolor"]) / nrow(subset(iris, Species == "versicolor" ))
       test2


           # attempt at function, ideally should return the mean by factor, when you enter the Species name
               calc_mean_factor <- function() {
               # spec_levels <- c(levels(iris$Species)) # levels as a vector, commented to exclude from test calcs
                   spec_levels <- levels(iris$Species) # obtains the levels of the factors - should this be vector/factor?
                   x <- length(spec_levels) # creates numerical range cap for the loop
                       for(i in 1:x){
                           tot_spec <- sum(iris$Petal.Length[iris$Species] == spec_levels[i]) # is this correct syntax for loop?
                           count_spec <- nrow(subset(iris, Species == spec_levels[i])) # is this correct syntax for loop?
                           mean_spec <- tot_spec / count_spec # is this correct syntax for loop?
                       }

                   # print tests to check if calculating as expected
                   print(spec_levels[1:x]) # this shows the correct names
                   print(spec_levels) # same as above
                   print(spec_levels[2]) # passes subset test
                   print(class(spec_levels)) # should this class be 'factor', 'vector', or this is ok?
                   print(x) # as expected, it is the length of the species of 3
                   print(class(x)) # returns integer, assume this is ok
                   print(1:x) # as expected, the range from 1 to 3
                   print(sum(iris$Petal.Length[iris$Species])) # this function is calculating, but returning total of 205
                   print(sum(iris$Petal.Length[iris$Species] == spec_levels[1])) # why is the function not accepting the subset? is it due to class?
                   print(sum(iris$Petal.Length[iris$Species] == "setosa")) # why is this returning 0
                   print(sum(iris$Petal.Length[iris$Species] == spec_levels[1:x])) # why is this returning 0
                   print(tot_spec) # expected "0" due to above tests returning 0
                   print(count_spec[1:x]) # 50 is expected, but why not printing for all three species?
                   }

               calc_mean_factor()

1 Ответ

1 голос
/ 25 марта 2020

Внутри вашего для l oop вы назначаете векторы длины 1, tot_spec и count_spec, другое значение 3 раза. Сначала назначьте пустой вектор, а затем назначьте значение путем поднабора.

tot_spec <- count_spec <- vector()
for(i in 1:x){
  tot_spec[i] <- sum(iris$Petal.Length[iris$Species] == spec_levels[i]) 
  count_spec[i] <- nrow(subset(iris, Species == spec_levels[i])) 
  mean_spec <- tot_spec / count_spec 
              }
...