Difference between revisions of "R Asymptotics"

From ECLR
Jump to: navigation, search
Line 51: Line 51:
 
     plot(range(c(1.35,1.65)), range(c(0,max(d3<source enclose=none>$</source>y)+0.05)), type="n", xlab="beta", ylab="Density")
 
     plot(range(c(1.35,1.65)), range(c(0,max(d3<source enclose=none>$</source>y)+0.05)), type="n", xlab="beta", ylab="Density")
 
     # axis(1, at = c(1,2,3,4,5), labels=c("Fail","3", "2.2", "2.1", "1"))
 
     # axis(1, at = c(1,2,3,4,5), labels=c("Fail","3", "2.2", "2.1", "1"))
     lines(d1$x,d1$y, col = colors[1], lwd=3)
+
     lines(d1<source enclose=none>$</source>x,d1<source enclose=none>$</source>y, col = colors[1], lwd=3)
     lines(d2$x,d2$y, col = colors[2], lwd=3)
+
     lines(d2<source enclose=none>$</source>x,d2<source enclose=none>$</source>y, col = colors[2], lwd=3)
     lines(d3$x,d3$y, col = colors[3], lwd=3)
+
     lines(d3<source enclose=none>$</source>x,d3<source enclose=none>$</source>y, col = colors[3], lwd=3)
 
     title(main = "Distribution of estimated Parameters")
 
     title(main = "Distribution of estimated Parameters")
 
     legend("topleft", inset=.0, title="Sample Size",c("Nsamp1","Nsamp2","Nsamp3"), fill=rainbow(3), bty = "n")
 
     legend("topleft", inset=.0, title="Sample Size",c("Nsamp1","Nsamp2","Nsamp3"), fill=rainbow(3), bty = "n")

Revision as of 17:38, 6 February 2015

In this code we draw samples of different size from a population of data. We then apply OLS to each of these samples and obtain OLS parameter estimate distributions. We can show how the parameter estimate variance reduces with increased sample sizes.

   # Code to demonstrate unbiasedness of OLS estimator
   # also the bahviour as sample size increases
   Npop <- 100000  # Population Size
   Nsamp1 <- 1000    # Sample Size
   Nsamp2 <- 10000    # Sample Size
   Nsamp3 <- 100000    # Sample Size
   Q <- 600  # number of samples taken
   u <- rnorm(Npop)      # true error terms
   beta <- matrix(c(0.5, 1.5),2,1)   # true parameters
   Xpop <- matrix(1,Npop,2) # initialise population X
   Xpop[,2] <- floor(runif(Npop)*12+8) # exp var uniform r.v. in [8,19]
   Ypop <- Xpop %*% beta + u    # %*% is matrix multiplication
   save_beta1 <- matrix(0,Q,3)
   for (i in 1:Q ) {
     sel1 <- ceiling(runif(Nsamp1)*Npop)  # select Nsamp1 observations
     Ysel <- Ypop[sel1,]    
     Xsel <- Xpop[sel1,]
     reg_sel <- lm(Ysel~Xsel[,2])   # estimate regression in sample
     save_beta1[i,1] <- reg_sel$coefficients[2]  # save estimated beta_1
     sel2 <- ceiling(runif(Nsamp2)*Npop)    # select Nsamp2 observations
     Ysel <- Ypop[sel2,]
     Xsel <- Xpop[sel2,]
     reg_sel <- lm(Ysel~Xsel[,2])   # estimate regression in sample
     save_beta1[i,2] <- reg_sel$coefficients[2]  # save estimated beta_1
 
     sel3 <- ceiling(runif(Nsamp3)*Npop)    # select Nsamp3 observations
     Ysel <- Ypop[sel3,]
     Xsel <- Xpop[sel3,]
     reg_sel <- lm(Ysel~Xsel[,2])   # estimate regression in sample
     save_beta1[i,3] <- reg_sel$coefficients[2]  # save estimated beta_1
   }
   # define bins
   bins = seq(min(save_beta1[,1])-0.05, max(save_beta1[,1])+0.05, by = 0.005) # match
   # Kernel Density Estimates (smooth histograms)
   d1 <- density(save_beta1[,1])
   d2 <- density(save_beta1[,2])
   d3 <- density(save_beta1[,3])
   # Plots
   colors <- rainbow(3) 
   plot(range(c(1.35,1.65)), range(c(0,max(d3$y)+0.05)), type="n", xlab="beta", ylab="Density")
   # axis(1, at = c(1,2,3,4,5), labels=c("Fail","3", "2.2", "2.1", "1"))
   lines(d1$x,d1$y, col = colors[1], lwd=3)
   lines(d2$x,d2$y, col = colors[2], lwd=3)
   lines(d3$x,d3$y, col = colors[3], lwd=3)
   title(main = "Distribution of estimated Parameters")
   legend("topleft", inset=.0, title="Sample Size",c("Nsamp1","Nsamp2","Nsamp3"), fill=rainbow(3), bty = "n")