R Asymptotics
In this code we draw samples of different size from a population of data. We then apply OLS to each of these samples and obtain OLS parameter estimate distributions. We can show how the parameter estimate variance reduces with increased sample sizes.
# Code to demonstrate unbiasedness of OLS estimator # also the bahviour as sample size increases
Npop <- 100000 # Population Size Nsamp1 <- 1000 # Sample Size Nsamp2 <- 10000 # Sample Size Nsamp3 <- 100000 # Sample Size
Q <- 600 # number of samples taken
u <- rnorm(Npop) # true error terms beta <- matrix(c(0.5, 1.5),2,1) # true parameters Xpop <- matrix(1,Npop,2) # initialise population X Xpop[,2] <- floor(runif(Npop)*12+8) # exp var uniform r.v. in [8,19] Ypop <- Xpop %*% beta + u # %*% is matrix multiplication
save_beta1 <- matrix(0,Q,3)
for (i in 1:Q ) {
sel1 <- ceiling(runif(Nsamp1)*Npop) # select Nsamp1 observations
Ysel <- Ypop[sel1,]
Xsel <- Xpop[sel1,]
reg_sel <- lm(Ysel~Xsel[,2]) # estimate regression in sample
save_beta1[i,1] <- reg_sel$
coefficients[2] # save estimated beta_1
sel2 <- ceiling(runif(Nsamp2)*Npop) # select Nsamp2 observations Ysel <- Ypop[sel2,] Xsel <- Xpop[sel2,] reg_sel <- lm(Ysel~Xsel[,2]) # estimate regression in sample save_beta1[i,2] <- reg_sel$
coefficients[2] # save estimated beta_1 sel3 <- ceiling(runif(Nsamp3)*Npop) # select Nsamp3 observations Ysel <- Ypop[sel3,] Xsel <- Xpop[sel3,] reg_sel <- lm(Ysel~Xsel[,2]) # estimate regression in sample save_beta1[i,3] <- reg_sel$
coefficients[2] # save estimated beta_1 }
# define bins bins = seq(min(save_beta1[,1])-0.05, max(save_beta1[,1])+0.05, by = 0.005) # match
# Kernel Density Estimates (smooth histograms) d1 <- density(save_beta1[,1]) d2 <- density(save_beta1[,2]) d3 <- density(save_beta1[,3])
# Plots colors <- rainbow(3) plot(range(c(1.35,1.65)), range(c(0,max(d3$y)+0.05)), type="n", xlab="beta", ylab="Density") # axis(1, at = c(1,2,3,4,5), labels=c("Fail","3", "2.2", "2.1", "1")) lines(d1$x,d1$y, col = colors[1], lwd=3) lines(d2$x,d2$y, col = colors[2], lwd=3) lines(d3$x,d3$y, col = colors[3], lwd=3) title(main = "Distribution of estimated Parameters") legend("topleft", inset=.0, title="Sample Size",c("Nsamp1","Nsamp2","Nsamp3"), fill=rainbow(3), bty = "n")