d <- file.choose();
d <- as.data.frame(read.csv(d), header=T)
require(foreign)
library(foreign)
// the ID number is even, hence we take the first 71 observations
d<-d[1:71,]
// Check the new data set:
d
// Choose variables participating in the assignment:
292302%%13+2
[1] 12
2302%%13+2
[1] 3
// Our explanatory variables are variable #3 (lgdp2) and #12 (gcony2)
//Now plot a loess regression for variable #3
scatter.smooth(d[,2]~d[,3])
abline(lm(d[,2]~d[,3]),col="red", lwd=4)
// Add a robust linear regression and add a blue line on a scatterplot. As a result, we will get the following:
text(xcenter, ycenter, "OLS”, col="blue", cex=2)
summary(lm(d[,2]~d[,3])
summary(rlm(d[,2]~d[,3])
// The following table is constructed to compare the OLS and the robust regression results:
//Both regressions appeared to be bad estimators of y.net based on the given value of lgdp2. They are both insignificant and cannot be used in forecasts.
// Now perform the same steps for the second explanatory variable
scatter.smooth(d[,2]~d[,12])
abline(lm(d[,2]~d[,12]),lwd=4, col="blue")
abline(rlm(d[,2]~d[,12]),lwd=4, col="red")
Summary(lm(d[,2]~d[,12])
Summary(rlm(d[,2]~d[,12])
// We’ve got the similar results. Lm and rlm are not significantly different and they are both insignificant and cannot be used in predictions.
//Now do a nonparametric regression with both variables. Our formula is a little bit different:
fit<- loess(d[,2]~d[,3]+d[,12], span=1, degree=2)
summary(fit)
// the following is the result:
Call:
loess(formula = d[, 2] ~ d[, 3] + d[, 12], span = 1, degree = 2)
Equivalent Number of Parameters: 6.66
Residual Standard Error: 0.01857
Trace of smoother matrix: 7.42 (exact)
Control settings:
span : 1
degree : 2
family : gaussian
surface : interpolate cell = 0.2
normalize: TRUE
parametric: FALSE FALSE
drop.square: FALSE FALSE
//type the following command:
scatter.smooth(predict(fit) ~d[,3])
//It shows the graph with predicted values of y.net based on the given value of lgdp2. The prediction is completed by the nonparametric regression. As we can see, the line is very close to the location of the points on the scatterplot, indicating that the nonparametric regression is more accurate than the linear regression.