Ошибка узла фрагмента Knime R: нижний индекс выходит за пределы - PullRequest
0 голосов
/ 15 января 2020

При попытке выполнить узел фрагмента R в KNIME, R уже установлен с пакетом dplyr.

library(dplyr)

atrinktos <- knime.in

# Firs we filter out the months
# Here it is done using Lithuanian month names, but it needn't be so and could be
# used with any other language

sausis <- atrinktos %>% filter(grepl("Sausis", month))

vasaris <- atrinktos %>% filter(grepl("Vasaris", month))

kovas <- atrinktos %>% filter(grepl("Kovas", month))

balandis <- atrinktos %>% filter(grepl("Balandis", month))

geguze <- atrinktos %>% filter(grepl("Gegužė", month))

birzelis <- atrinktos %>% filter(grepl("Birželis", month))

liepa <- atrinktos %>% filter(grepl("Liepa", month))

rugpjutis <- atrinktos %>% filter(grepl("Rugpjūtis", month))

rugsejis <- atrinktos %>% filter(grepl("Rugsėjis", month))

spalis <- atrinktos %>% filter(grepl("Spalis", month))

lapkritis <- atrinktos %>% filter(grepl("Lapkritis", month))

gruodis <- atrinktos %>% filter(grepl("Gruodis", month))

# Then we create a monthly list variable
men <- list(sausis, vasaris, kovas, balandis, geguze, birzelis, liepa, rugpjutis,
            rugsejis, spalis, lapkritis, gruodis)

#############################
### First period ###
#############################

# we have to create objects that will be populated by our loop

regre1 <- matrix(nrow = 12, ncol = (dim(atrinktos)[2] - 3))
pard1 <- matrix(nrow = 1, ncol = (dim(atrinktos)[2] - 3))
vard1 <- vector()
pav1 <- vector()
p1 <- vector()

# we put the data in the loop and create models in every loop step. From those
# models we get the regressor effects if p < 0.05
# Effects coefficients are calculated according to the following formula: if
# p < 0.05 then coefficient = (beta0 + beta1) / beta0
# If p > 0.05, then regressor coefficient is 1, i.e. the month in question does
# not differ from other months
for (j in 1:12) {
  varlist <- names(sausis)[4:dim(atrinktos)[2]]
  models <- lapply(varlist,
                   function(x) {
                     lm(substitute(i ~ month, list(i = as.name(x))),
                        data = men[[j]])
                   }
            )
  for (i in 1:(dim(atrinktos)[2] - 3)) {
    a <- models[[i]]   
    p1[i] <- summary(a)$coefficients[2, 4]
    if (p1[i] < 0.05) {
      p1[i] <- p1[i]
      pard1[,i] <- (a$coefficients[1] + a$coefficients[2]) / a$coefficients[1] 
    } else {
      #p[i] <- 1
      pard1[, i] <- 1
    }
    pav1[i] <- cbind(substring(a$terms[[2]], 1, 4))
  }
  rownames(pard1) <- names(a$coefficients[2])
  vard1[j] <- cbind(names(a$coefficients[2]))
  regre1[j, ] <- cbind(pard1)
}

# We name the columns and rows of our matrix objects

vard1 <- substring(vard1, 6, ) # We strip the long name of the store and leave
                               # the code only
rownames(regre1) <- vard1
colnames(regre1) <- pav1

##############################
### ANTRAS ###
##############################

# we have to create objects that will be populated by our loop

regre2 <- matrix(nrow=12, ncol=(dim(atrinktos)[2] - 3))
pard2 <- matrix(nrow=1, ncol=(dim(atrinktos)[2] - 3))
vard2 <- vector()
pav2 <- vector()
p2 <- vector()
# we put the data in the loop and create models in every loop step. From those
# models we get the regressor effects if p < 0.05
# Effects coefficients are calculated according to the following formula: if
# p < 0.05 then coefficient = (beta0 + beta1)/beta0
# If p > 0.05, then regressor coefficient is 1, i.e. the month in question does
# not differ from other months
for (j in 1:12) {
  varlist <- names(sausis)[4:dim(atrinktos)[2]]
  models <- lapply(varlist,
                   function(x) {
                     lm(substitute(i ~ month, list(i = as.name(x))),
                        data = men[[j]])
                   }
            )
  for (i in 1:(dim(atrinktos)[2] - 3)) {
    a <- models[[i]]
    p2[i] <- summary(a)$coefficients[3, 4]
    if (p2[i] < 0.05) {
      p2[i] <- p2[i]
      pard2[,i] <- (a$coefficients[1] + a$coefficients[3]) / a$coefficients[1] 
    } else {
      #p[i] <- 1
      pard2[,i] <- 1
    }
    pav2[i] <- cbind(substring(a$terms[[2]], 1, 4))
  }
  rownames(pard2) <- names(a$coefficients[3])
  vard2[j] <- cbind(names(a$coefficients[3]))
  regre2[j,] <- cbind(pard2)
}

# We name the columns and rows of our matrix objects

vard2 <- substring(vard2,6,) # We strip the long name of the store and leave
                             # the code only
rownames(regre2) <- vard2
colnames(regre2) <- pav2

##############################
### TRECIAS ###
##############################

# we have to create objects that will be populated by our loop

regre3 <- matrix(nrow=12, ncol=(dim(atrinktos)[2] - 3))
pard3 <- matrix(nrow=1, ncol=(dim(atrinktos)[2] - 3))
vard3 <- vector()
pav3 <- vector()
p3 <- vector()
# we put the data in the loop and create models in every loop step. From those
# models we get the regressor effects if p < 0.05
# Effects coefficients are calculated according to the following formula: if
# p < 0.05 then coefficient = (beta0 + beta1)/beta0
# If p > 0.05, then regressor coefficient is 1, i.e. the month in question does
# not differ from other months
for (j in 1:12) {
  varlist <- names(sausis)[4:dim(atrinktos)[2]]
  models <- lapply(varlist,
              function(x) {
                lm(substitute(i ~ month, list(i = as.name(x))),
                   data = men[[j]])
              }
            )
  for (i in 1:(dim(atrinktos)[2] - 3)) {
    a <- models[[i]]    
    p3[i] <- summary(a)$coefficients[4, 4]
    if (p3[i] < 0.05) {
      p3[i] <- p3[i]
      pard3[, i] <- (a$coefficients[1] + a$coefficients[4]) / a$coefficients[1] 
    } else {
      #p[i] <- 1
      pard3[, i] <- 1
    }
    pav3[i] <- cbind(substring(a$terms[[2]], 1, 4))
  }
  rownames(pard3) <- names(a$coefficients[4])
  vard3[j] <- cbind(names(a$coefficients[4]))
  regre3[j,] <- cbind(pard3)
}

# We name the columns and rows of our matrix objects

vard3 <- substring(vard3, 6, ) #  We strip the long name of the store and leave the code only
rownames(regre3) <- vard3
colnames(regre3) <- pav3

#############################
### Combining the matrices###
#############################

regre <- rbind(regre1, regre2, regre3)

# Output
knime.out <- regre

Я получаю сообщение об ошибке:

Attaching package: 'dplyr'

The following objects are masked from 'package:stats':

    filter, lag

The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union

Error: subscript out of bounds

Я понимаю, что первые строки просто предупреждение? и это не должно быть причиной этой ошибки, возможно я ошибаюсь. То, что предшествует узлу фрагмента R, - это базовое c соединение с базой данных с несколькими базами данных SQL исполнителями и узлами обработки данных. Возможно, ошибка в коде, который я предоставил, или, возможно, мне стоит взглянуть на более ранние узлы?

Так как я очень плохо знаком с языком R, я не могу найти решение в Интернете где-либо.

РЕДАКТИРОВАТЬ: Обнаружено, откуда возникает ошибка «вне границ».

p3[i] <- summary(a)$coefficients[4,4]
if (p3[i] < 0.05) {
  p3[i] <- p3[i] pard3[,i] <- (a$coefficients[1] + a$coefficients[4]) / a$coefficients[1]
} else {
  #p[i] <- 1
  pard3[,i] <- 1
}

№ 4 вне привязок, если я изменю его на 3, потому что он работает, но это не так, как работает программирование, просто случайная замена число.

Есть идеи, что это за # 4 и как исправить код?

...