Я провел тестирование на линейную регрессию, разделив ее на 80:20. 80 для обучения и 20 для тестирования.
Файл lr_model.R:
source("nn_lr_operation.R")
#Data use in the model. HVGO:1, MVGO:2, VR:3
type_data_run = 2
fullData <- readdata(type_data_run)
#testvalue
set.seed(12345)
totRow <- nrow(fullData)
tot_train <- trainvalue(totRow)
tot_test <- tot_train + 1
trainset <- fullData[1:tot_train, ]
testset <- fullData[tot_test:totRow, ]
#correlation
correlation <- cor(fullData$Tan,fullData$Value)
#simple linear regression
r <- lm(Value~Tan,data=trainset)
#predict
predicts <- predict(r, testset)
# PERCENTAGE ERROR
percent_error <- data.frame(Percentage_Error = abs(fullData$Value[tot_test:totRow] - predicts))
# Total percentage error
total_percent_error = sum(percent_error/sum(fullData$Value[tot_test:totRow]))*100
Tot_perc_error <- data.frame(Total_percentage_error = total_percent_error)
# ACCURACY
accuracy = 100-total_percent_error
acrcy <- data.frame(Accuracy = accuracy)
Файл nn_lr_operation.r
# READ DATA
readdata<- function(x){
pathData <- getwd()
if(x==1){
pathData <- paste(pathData, "/Data/cr_tan_hvgo_matched_combine.xlsx", sep = "")
}
else if(x==2){
pathData <- paste(pathData, "/Data/cr_tan_mvgo_matched_combine.xlsx", sep = "")
}
else
pathData <- paste(pathData, "/Data/cr_tan_vr_matched_combine.xlsx", sep = "")
library(readxl)
return(read_excel(pathData, col_types = c("numeric", "numeric")))
}
# GET TOTAL TRAIN
trainvalue <- function(x){
return(ceiling(0.8 * x))
}
Затем я реализую оптимизацию градиентного спуска. Но почему-то точность, которую я получаю, ниже, чем до реализации градиентного спуска.
Оптимизация градиентного спуска кода:
#Gradient Descent
GradD <- function(x, y, alpha, epsilon){
iter <- 0
i <- 0
x <- cbind(rep(1,nrow(x)), x)
theta <- matrix(c(1,1),ncol(x),1)
cost <- (1/(2*nrow(x))) * t(x %*% theta - y) %*% (x %*% theta - y)
delta <- 1
while(delta > epsilon){
i <- i + 1
theta <- theta - (alpha / nrow(x)) * (t(x) %*% (x %*% theta - y))
cval <- (1/(2*nrow(x))) * t(x %*% theta - y) %*% (x %*% theta - y)
cost <- append(cost, cval)
delta <- abs(cost[i+1] - cost[i])
if((cost[i+1] - cost[i]) > 0){
print("The cost is increasing. Try reducing alpha.")
return()
}
iter <- append(iter, i)
}
print(sprintf("Completed in %i iterations.", i))
return(theta)
}
#Prediction using coeeficient matrix and x input matrix
TPredict <- function(theta, x){
x <- cbind(rep(1,nrow(x)), x)
return(x %*% theta)
}
#Gradient descent
x <- as.matrix(trainset$Tan)
y <- as.matrix(trainset$Value)
theta <- GradD(x, y, alpha = 0.006, epsilon = 10^-10)
theta
z <- as.matrix(testset$Tan)
predicted <- TPredict(theta, z)