Difference between revisions of "R Asymptotics"

Revision as of 17:38, 6 February 2015

In this code we draw samples of different size from a population of data. We then apply OLS to each of these samples and obtain OLS parameter estimate distributions. We can show how the parameter estimate variance reduces with increased sample sizes.

   # Code to demonstrate unbiasedness of OLS estimator
   # also the bahviour as sample size increases

   Npop <- 100000  # Population Size
   Nsamp1 <- 1000    # Sample Size
   Nsamp2 <- 10000    # Sample Size
   Nsamp3 <- 100000    # Sample Size

   Q <- 600  # number of samples taken

   u <- rnorm(Npop)      # true error terms
   beta <- matrix(c(0.5, 1.5),2,1)   # true parameters
   Xpop <- matrix(1,Npop,2) # initialise population X
   Xpop[,2] <- floor(runif(Npop)*12+8) # exp var uniform r.v. in [8,19]
   Ypop <- Xpop %*% beta + u    # %*% is matrix multiplication

   save_beta1 <- matrix(0,Q,3)

   for (i in 1:Q ) {
     sel1 <- ceiling(runif(Nsamp1)*Npop)  # select Nsamp1 observations
     Ysel <- Ypop[sel1,]    
     Xsel <- Xpop[sel1,]
     reg_sel <- lm(Ysel~Xsel[,2])   # estimate regression in sample
     save_beta1[i,1] <- reg_sel$coefficients[2]  # save estimated beta_1

     sel2 <- ceiling(runif(Nsamp2)*Npop)    # select Nsamp2 observations
     Ysel <- Ypop[sel2,]
     Xsel <- Xpop[sel2,]
     reg_sel <- lm(Ysel~Xsel[,2])   # estimate regression in sample
     save_beta1[i,2] <- reg_sel$coefficients[2]  # save estimated beta_1
 
     sel3 <- ceiling(runif(Nsamp3)*Npop)    # select Nsamp3 observations
     Ysel <- Ypop[sel3,]
     Xsel <- Xpop[sel3,]
     reg_sel <- lm(Ysel~Xsel[,2])   # estimate regression in sample
     save_beta1[i,3] <- reg_sel$coefficients[2]  # save estimated beta_1
   }

   # define bins
   bins = seq(min(save_beta1[,1])-0.05, max(save_beta1[,1])+0.05, by = 0.005) # match

   # Kernel Density Estimates (smooth histograms)
   d1 <- density(save_beta1[,1])
   d2 <- density(save_beta1[,2])
   d3 <- density(save_beta1[,3])

   # Plots
   colors <- rainbow(3) 
   plot(range(c(1.35,1.65)), range(c(0,max(d3$y)+0.05)), type="n", xlab="beta", ylab="Density")
   # axis(1, at = c(1,2,3,4,5), labels=c("Fail","3", "2.2", "2.1", "1"))
   lines(d1$x,d1$y, col = colors[1], lwd=3)
   lines(d2$x,d2$y, col = colors[2], lwd=3)
   lines(d3$x,d3$y, col = colors[3], lwd=3)
   title(main = "Distribution of estimated Parameters")
   legend("topleft", inset=.0, title="Sample Size",c("Nsamp1","Nsamp2","Nsamp3"), fill=rainbow(3), bty = "n")

Difference between revisions of "R Asymptotics"

Revision as of 17:38, 6 February 2015

Navigation menu

Personal tools

Namespaces

Variants

Views

More

Search

ECLR

Tools