#这段程序集成了模型的建立,预测和R2值的计算,发现了一个有趣的问题,R语言输出的参数在EXCEL里进行计算会出现相当大的偏差,因此无法将建立的模型通过excel重新计算输出,全部都应该在R语言内实现
rm(list = ls())
library(ggplot2)
file_path<-file.choose()
library(readxl)
mydata <- read_excel(file_path)
#E:\训练集
linemodel<-lm(y1~x,data=mydata)
summary(linemodel)$r.squared
f<-summary(linemodel)$fstatistic
pf(f[1], f[2], f[3], lower.tail=F)
linemodel
lnmodel<-lm(y1~log(x,base=exp(1)),data=mydata)
summary(lnmodel)$r.squared
f<-summary(lnmodel)$fstatistic
pf(f[1], f[2], f[3], lower.tail=F)
lnmodel
expmodel<-lm(log(y1)~x,data=mydata)
summary(expmodel)$r.squared
f<-summary(expmodel)$fstatistic
pf(f[1], f[2], f[3], lower.tail=F)
expmodel
# x<-mydata$x
# y<-mydata$y1
# nlsmodel <- nls(y ~ a*x^b, start = list(a=2, b= 1.5))
# lines(seq(1, 20, by = 0.1), predict(nlsmodel, data.frame(x=seq(1, 20, by = 0.1))))
# nlsmodel
# summary(nlsmodel)
linemodel2<-lm(y2~x,data=mydata)
summary(linemodel2)$r.squared
f<-summary(linemodel2)$fstatistic
pf(f[1], f[2], f[3], lower.tail=F)
linemodel2
lnmodel2<-lm(y2~log(x),data=mydata)
summary(lnmodel2)$r.squared
f<-summary(lnmodel2)$fstatistic
pf(f[1], f[2], f[3], lower.tail=F)
lnmodel2
expmodel2<-lm(log(y2)~x,data=mydata)
summary(expmodel2)$r.squared
f<-summary(expmodel2)$fstatistic
pf(f[1], f[2], f[3], lower.tail=F)
expmodel2
# x<-mydata$x
# y<-mydata$y2
# nlsmodel2 <- nls(y ~ a*x^b, start = list(a=2, b= 1.5))
# lines(seq(1, 20, by = 0.1), predict(nlsmodel2, data.frame(x=seq(1, 20, by = 0.1))))
# nlsmodel2
# summary(nlsmodel2)
mydata$linemodel1<-linemodel$fitted.values
mydata$lnmodel1<-lnmodel$fitted.values
mydata$expmodel1<-exp(expmodel$fitted.values)
mydata$linemodel2<-linemodel2$fitted.values
mydata$lnmodel2<-lnmodel2$fitted.values
mydata$expmodel2<-exp(expmodel2$fitted.values)
file_path<-file.choose()
library(readxl)
predictdata <- read_excel(file_path)
#E:\验证集
predictdata$linemodel1prediction<-predict(linemodel,predictdata)
predictdata$lnmodel1prediction<-predict(lnmodel,predictdata)
predictdata$expmodel1prediction<-exp(predict(expmodel,predictdata))
predictdata$linemodel2prediction<-predict(linemodel2,predictdata)
predictdata$lnmodel2prediction<-predict(lnmodel2,predictdata)
predictdata$expmodel2prediction<-exp(predict(expmodel2,predictdata))
r2.test<-function(y_actual,y_predicted){
avr_y_actual <- mean(y_actual)
ss_total <- sum((y_actual - avr_y_actual)^2)
ss_regression <- sum((y_predicted - avr_y_actual)^2)
ss_residuals <- sum((y_actual - y_predicted)^2)
rsquare <- 1 - ss_residuals / ss_total
return(rsquare)#当模型偏差过大,rsquare很小时,不采用rsquare统计
n1<-length(y_actual)
n2<-length(y_predicted)#要求n1==n2
meansquare<-ss_residuals/(n1-2)
#参考王辰勇译《线性回归分析导论》12页
#return(meansquare)#MS残
RMSE<-(ss_residuals/n1)^0.5
NRMSD<-RMSE/avr_y_actual
#return(NRMSD)
}
r2.test(predictdata$y1,predictdata$linemodel1prediction)
r2.test(predictdata$y1,predictdata$lnmodel1prediction)
r2.test(predictdata$y1,predictdata$expmodel1prediction)
r2.test(predictdata$y2,predictdata$linemodel2prediction)
r2.test(predictdata$y2,predictdata$lnmodel2prediction)
r2.test(predictdata$y2,predictdata$expmodel2prediction)