R Asymptotics

From ECLR
Revision as of 09:32, 6 February 2015 by Rb (talk | contribs)
Jump to: navigation, search

In this code we draw samples of different size from a population of data. We then apply OLS to each of these samples and obtain OLS parameter estimate distributions. We can show how the parameter estimate variance reduces with increased sample sizes.

   # Code to demonstrate unbiasedness of OLS estimator
   # also the bahviour as sample size increases
   Npop <- 100000  # Population Size
   Nsamp1 <- 1000    # Sample Size
   Nsamp2 <- 10000    # Sample Size
   Nsamp3 <- 100000    # Sample Size
   Q <- 600  # number of samples taken
   u <- rnorm(Npop)      # true error terms
   beta <- matrix(c(0.5, 1.5),2,1)   # true parameters
   Xpop <- matrix(1,Npop,2) # initialise population X
   Xpop[,2] <- floor(runif(Npop)*12+8) # exp var uniform r.v. in [8,19]
   Ypop <- Xpop %*% beta + u    # %*% is matrix multiplication
   save_beta1 <- matrix(0,Q,3)
   for (i in 1:Q ) {
     sel1 <- ceiling(runif(Nsamp1)*Npop)  # select Nsamp1 observations
     Ysel <- Ypop[sel1,]    
     Xsel <- Xpop[sel1,]
     reg_sel <- lm(Ysel~Xsel[,2])   # estimate regression in sample
     save_beta1[i,1] <- reg_sel\$coefficients[2]  # save estimated beta_1
     sel2 <- ceiling(runif(Nsamp2)*Npop)    # select Nsamp2 observations
     Ysel <- Ypop[sel2,]
     Xsel <- Xpop[sel2,]
     reg_sel <- lm(Ysel~Xsel[,2])   # estimate regression in sample
     save_beta1[i,2] <- reg_sel$coefficients[2]  # save estimated beta_1
 
     sel3 <- ceiling(runif(Nsamp3)*Npop)    # select Nsamp3 observations
     Ysel <- Ypop[sel3,]
     Xsel <- Xpop[sel3,]
     reg_sel <- lm(Ysel~Xsel[,2])   # estimate regression in sample
     save_beta1[i,3] <- reg_sel\$coefficients[2]  # save estimated beta_1
   }
   # define bins
   bins = seq(min(save_beta1[,1])-0.05, max(save_beta1[,1])+0.05, by = 0.005) # match
   # Kernel Density Estimates (smooth histograms)
   d1 <- density(save_beta1[,1])
   d2 <- density(save_beta1[,2])
   d3 <- density(save_beta1[,3])
   # Plots
   colors <- rainbow(3) 
   plot(range(c(1.35,1.65)), range(c(0,max(d3$y)+0.05)), type="n", xlab="beta", ylab="Density")
   # axis(1, at = c(1,2,3,4,5), labels=c("Fail","3", "2.2", "2.1", "1"))
   lines(d1$x,d1$y, col = colors[1], lwd=3)
   lines(d2$x,d2$y, col = colors[2], lwd=3)
   lines(d3$x,d3$y, col = colors[3], lwd=3)
   title(main = "Distribution of estimated Parameters")
   legend("topleft", inset=.0, title="Sample Size",c("Nsamp1","Nsamp2","Nsamp3"), fill=rainbow(3), bty = "n")