Difference between revisions of "R Asymptotics"

From ECLR
Jump to: navigation, search
Line 30: Line 30:
 
       Xsel <- Xpop[sel2,]
 
       Xsel <- Xpop[sel2,]
 
       reg_sel <- lm(Ysel~Xsel[,2])  # estimate regression in sample
 
       reg_sel <- lm(Ysel~Xsel[,2])  # estimate regression in sample
       save_beta1[i,2] <- reg_sel$coefficients[2]  # save estimated beta_1
+
       save_beta1[i,2] <- reg_sel<source enclose=none>$</source>coefficients[2]  # save estimated beta_1
 
    
 
    
 
       sel3 <- ceiling(runif(Nsamp3)*Npop)    # select Nsamp3 observations
 
       sel3 <- ceiling(runif(Nsamp3)*Npop)    # select Nsamp3 observations
Line 36: Line 36:
 
       Xsel <- Xpop[sel3,]
 
       Xsel <- Xpop[sel3,]
 
       reg_sel <- lm(Ysel~Xsel[,2])  # estimate regression in sample
 
       reg_sel <- lm(Ysel~Xsel[,2])  # estimate regression in sample
       save_beta1[i,3] <- reg_sel\$coefficients[2]  # save estimated beta_1
+
       save_beta1[i,3] <- reg_sel<source enclose=none>$</source>coefficients[2]  # save estimated beta_1
 
     }
 
     }
  

Revision as of 08:34, 6 February 2015

In this code we draw samples of different size from a population of data. We then apply OLS to each of these samples and obtain OLS parameter estimate distributions. We can show how the parameter estimate variance reduces with increased sample sizes.

   # Code to demonstrate unbiasedness of OLS estimator
   # also the bahviour as sample size increases
   Npop <- 100000  # Population Size
   Nsamp1 <- 1000    # Sample Size
   Nsamp2 <- 10000    # Sample Size
   Nsamp3 <- 100000    # Sample Size
   Q <- 600  # number of samples taken
   u <- rnorm(Npop)      # true error terms
   beta <- matrix(c(0.5, 1.5),2,1)   # true parameters
   Xpop <- matrix(1,Npop,2) # initialise population X
   Xpop[,2] <- floor(runif(Npop)*12+8) # exp var uniform r.v. in [8,19]
   Ypop <- Xpop %*% beta + u    # %*% is matrix multiplication
   save_beta1 <- matrix(0,Q,3)
   for (i in 1:Q ) {
     sel1 <- ceiling(runif(Nsamp1)*Npop)  # select Nsamp1 observations
     Ysel <- Ypop[sel1,]    
     Xsel <- Xpop[sel1,]
     reg_sel <- lm(Ysel~Xsel[,2])   # estimate regression in sample
     save_beta1[i,1] <- reg_sel\$coefficients[2]  # save estimated beta_1
     sel2 <- ceiling(runif(Nsamp2)*Npop)    # select Nsamp2 observations
     Ysel <- Ypop[sel2,]
     Xsel <- Xpop[sel2,]
     reg_sel <- lm(Ysel~Xsel[,2])   # estimate regression in sample
     save_beta1[i,2] <- reg_sel$coefficients[2]  # save estimated beta_1
 
     sel3 <- ceiling(runif(Nsamp3)*Npop)    # select Nsamp3 observations
     Ysel <- Ypop[sel3,]
     Xsel <- Xpop[sel3,]
     reg_sel <- lm(Ysel~Xsel[,2])   # estimate regression in sample
     save_beta1[i,3] <- reg_sel$coefficients[2]  # save estimated beta_1
   }
   # define bins
   bins = seq(min(save_beta1[,1])-0.05, max(save_beta1[,1])+0.05, by = 0.005) # match
   # Kernel Density Estimates (smooth histograms)
   d1 <- density(save_beta1[,1])
   d2 <- density(save_beta1[,2])
   d3 <- density(save_beta1[,3])
   # Plots
   colors <- rainbow(3) 
   plot(range(c(1.35,1.65)), range(c(0,max(d3$y)+0.05)), type="n", xlab="beta", ylab="Density")
   # axis(1, at = c(1,2,3,4,5), labels=c("Fail","3", "2.2", "2.1", "1"))
   lines(d1$x,d1$y, col = colors[1], lwd=3)
   lines(d2$x,d2$y, col = colors[2], lwd=3)
   lines(d3$x,d3$y, col = colors[3], lwd=3)
   title(main = "Distribution of estimated Parameters")
   legend("topleft", inset=.0, title="Sample Size",c("Nsamp1","Nsamp2","Nsamp3"), fill=rainbow(3), bty = "n")