При попытке выполнить узел фрагмента R в KNIME, R уже установлен с пакетом dplyr.
library(dplyr)
atrinktos <- knime.in
# Firs we filter out the months
# Here it is done using Lithuanian month names, but it needn't be so and could be
# used with any other language
sausis <- atrinktos %>% filter(grepl("Sausis", month))
vasaris <- atrinktos %>% filter(grepl("Vasaris", month))
kovas <- atrinktos %>% filter(grepl("Kovas", month))
balandis <- atrinktos %>% filter(grepl("Balandis", month))
geguze <- atrinktos %>% filter(grepl("Gegužė", month))
birzelis <- atrinktos %>% filter(grepl("Birželis", month))
liepa <- atrinktos %>% filter(grepl("Liepa", month))
rugpjutis <- atrinktos %>% filter(grepl("Rugpjūtis", month))
rugsejis <- atrinktos %>% filter(grepl("Rugsėjis", month))
spalis <- atrinktos %>% filter(grepl("Spalis", month))
lapkritis <- atrinktos %>% filter(grepl("Lapkritis", month))
gruodis <- atrinktos %>% filter(grepl("Gruodis", month))
# Then we create a monthly list variable
men <- list(sausis, vasaris, kovas, balandis, geguze, birzelis, liepa, rugpjutis,
rugsejis, spalis, lapkritis, gruodis)
#############################
### First period ###
#############################
# we have to create objects that will be populated by our loop
regre1 <- matrix(nrow = 12, ncol = (dim(atrinktos)[2] - 3))
pard1 <- matrix(nrow = 1, ncol = (dim(atrinktos)[2] - 3))
vard1 <- vector()
pav1 <- vector()
p1 <- vector()
# we put the data in the loop and create models in every loop step. From those
# models we get the regressor effects if p < 0.05
# Effects coefficients are calculated according to the following formula: if
# p < 0.05 then coefficient = (beta0 + beta1) / beta0
# If p > 0.05, then regressor coefficient is 1, i.e. the month in question does
# not differ from other months
for (j in 1:12) {
varlist <- names(sausis)[4:dim(atrinktos)[2]]
models <- lapply(varlist,
function(x) {
lm(substitute(i ~ month, list(i = as.name(x))),
data = men[[j]])
}
)
for (i in 1:(dim(atrinktos)[2] - 3)) {
a <- models[[i]]
p1[i] <- summary(a)$coefficients[2, 4]
if (p1[i] < 0.05) {
p1[i] <- p1[i]
pard1[,i] <- (a$coefficients[1] + a$coefficients[2]) / a$coefficients[1]
} else {
#p[i] <- 1
pard1[, i] <- 1
}
pav1[i] <- cbind(substring(a$terms[[2]], 1, 4))
}
rownames(pard1) <- names(a$coefficients[2])
vard1[j] <- cbind(names(a$coefficients[2]))
regre1[j, ] <- cbind(pard1)
}
# We name the columns and rows of our matrix objects
vard1 <- substring(vard1, 6, ) # We strip the long name of the store and leave
# the code only
rownames(regre1) <- vard1
colnames(regre1) <- pav1
##############################
### ANTRAS ###
##############################
# we have to create objects that will be populated by our loop
regre2 <- matrix(nrow=12, ncol=(dim(atrinktos)[2] - 3))
pard2 <- matrix(nrow=1, ncol=(dim(atrinktos)[2] - 3))
vard2 <- vector()
pav2 <- vector()
p2 <- vector()
# we put the data in the loop and create models in every loop step. From those
# models we get the regressor effects if p < 0.05
# Effects coefficients are calculated according to the following formula: if
# p < 0.05 then coefficient = (beta0 + beta1)/beta0
# If p > 0.05, then regressor coefficient is 1, i.e. the month in question does
# not differ from other months
for (j in 1:12) {
varlist <- names(sausis)[4:dim(atrinktos)[2]]
models <- lapply(varlist,
function(x) {
lm(substitute(i ~ month, list(i = as.name(x))),
data = men[[j]])
}
)
for (i in 1:(dim(atrinktos)[2] - 3)) {
a <- models[[i]]
p2[i] <- summary(a)$coefficients[3, 4]
if (p2[i] < 0.05) {
p2[i] <- p2[i]
pard2[,i] <- (a$coefficients[1] + a$coefficients[3]) / a$coefficients[1]
} else {
#p[i] <- 1
pard2[,i] <- 1
}
pav2[i] <- cbind(substring(a$terms[[2]], 1, 4))
}
rownames(pard2) <- names(a$coefficients[3])
vard2[j] <- cbind(names(a$coefficients[3]))
regre2[j,] <- cbind(pard2)
}
# We name the columns and rows of our matrix objects
vard2 <- substring(vard2,6,) # We strip the long name of the store and leave
# the code only
rownames(regre2) <- vard2
colnames(regre2) <- pav2
##############################
### TRECIAS ###
##############################
# we have to create objects that will be populated by our loop
regre3 <- matrix(nrow=12, ncol=(dim(atrinktos)[2] - 3))
pard3 <- matrix(nrow=1, ncol=(dim(atrinktos)[2] - 3))
vard3 <- vector()
pav3 <- vector()
p3 <- vector()
# we put the data in the loop and create models in every loop step. From those
# models we get the regressor effects if p < 0.05
# Effects coefficients are calculated according to the following formula: if
# p < 0.05 then coefficient = (beta0 + beta1)/beta0
# If p > 0.05, then regressor coefficient is 1, i.e. the month in question does
# not differ from other months
for (j in 1:12) {
varlist <- names(sausis)[4:dim(atrinktos)[2]]
models <- lapply(varlist,
function(x) {
lm(substitute(i ~ month, list(i = as.name(x))),
data = men[[j]])
}
)
for (i in 1:(dim(atrinktos)[2] - 3)) {
a <- models[[i]]
p3[i] <- summary(a)$coefficients[4, 4]
if (p3[i] < 0.05) {
p3[i] <- p3[i]
pard3[, i] <- (a$coefficients[1] + a$coefficients[4]) / a$coefficients[1]
} else {
#p[i] <- 1
pard3[, i] <- 1
}
pav3[i] <- cbind(substring(a$terms[[2]], 1, 4))
}
rownames(pard3) <- names(a$coefficients[4])
vard3[j] <- cbind(names(a$coefficients[4]))
regre3[j,] <- cbind(pard3)
}
# We name the columns and rows of our matrix objects
vard3 <- substring(vard3, 6, ) # We strip the long name of the store and leave the code only
rownames(regre3) <- vard3
colnames(regre3) <- pav3
#############################
### Combining the matrices###
#############################
regre <- rbind(regre1, regre2, regre3)
# Output
knime.out <- regre
Я получаю сообщение об ошибке:
Attaching package: 'dplyr'
The following objects are masked from 'package:stats':
filter, lag
The following objects are masked from 'package:base':
intersect, setdiff, setequal, union
Error: subscript out of bounds
Я понимаю, что первые строки просто предупреждение? и это не должно быть причиной этой ошибки, возможно я ошибаюсь. То, что предшествует узлу фрагмента R, - это базовое c соединение с базой данных с несколькими базами данных SQL исполнителями и узлами обработки данных. Возможно, ошибка в коде, который я предоставил, или, возможно, мне стоит взглянуть на более ранние узлы?
Так как я очень плохо знаком с языком R, я не могу найти решение в Интернете где-либо.
РЕДАКТИРОВАТЬ: Обнаружено, откуда возникает ошибка «вне границ».
p3[i] <- summary(a)$coefficients[4,4]
if (p3[i] < 0.05) {
p3[i] <- p3[i] pard3[,i] <- (a$coefficients[1] + a$coefficients[4]) / a$coefficients[1]
} else {
#p[i] <- 1
pard3[,i] <- 1
}
№ 4 вне привязок, если я изменю его на 3, потому что он работает, но это не так, как работает программирование, просто случайная замена число.
Есть идеи, что это за # 4 и как исправить код?