Difference between revisions of "R Asymptotics"
Line 30: | Line 30: | ||
Xsel <- Xpop[sel2,] | Xsel <- Xpop[sel2,] | ||
reg_sel <- lm(Ysel~Xsel[,2]) # estimate regression in sample | reg_sel <- lm(Ysel~Xsel[,2]) # estimate regression in sample | ||
− | save_beta1[i,2] <- reg_sel$coefficients[2] # save estimated beta_1 | + | save_beta1[i,2] <- reg_sel<source enclose=none>$</source>coefficients[2] # save estimated beta_1 |
sel3 <- ceiling(runif(Nsamp3)*Npop) # select Nsamp3 observations | sel3 <- ceiling(runif(Nsamp3)*Npop) # select Nsamp3 observations | ||
Line 36: | Line 36: | ||
Xsel <- Xpop[sel3,] | Xsel <- Xpop[sel3,] | ||
reg_sel <- lm(Ysel~Xsel[,2]) # estimate regression in sample | reg_sel <- lm(Ysel~Xsel[,2]) # estimate regression in sample | ||
− | save_beta1[i,3] <- reg_sel | + | save_beta1[i,3] <- reg_sel<source enclose=none>$</source>coefficients[2] # save estimated beta_1 |
} | } | ||
Revision as of 08:34, 6 February 2015
In this code we draw samples of different size from a population of data. We then apply OLS to each of these samples and obtain OLS parameter estimate distributions. We can show how the parameter estimate variance reduces with increased sample sizes.
# Code to demonstrate unbiasedness of OLS estimator # also the bahviour as sample size increases
Npop <- 100000 # Population Size Nsamp1 <- 1000 # Sample Size Nsamp2 <- 10000 # Sample Size Nsamp3 <- 100000 # Sample Size
Q <- 600 # number of samples taken
u <- rnorm(Npop) # true error terms beta <- matrix(c(0.5, 1.5),2,1) # true parameters Xpop <- matrix(1,Npop,2) # initialise population X Xpop[,2] <- floor(runif(Npop)*12+8) # exp var uniform r.v. in [8,19] Ypop <- Xpop %*% beta + u # %*% is matrix multiplication
save_beta1 <- matrix(0,Q,3)
for (i in 1:Q ) { sel1 <- ceiling(runif(Nsamp1)*Npop) # select Nsamp1 observations Ysel <- Ypop[sel1,] Xsel <- Xpop[sel1,] reg_sel <- lm(Ysel~Xsel[,2]) # estimate regression in sample save_beta1[i,1] <- reg_sel\$coefficients[2] # save estimated beta_1
sel2 <- ceiling(runif(Nsamp2)*Npop) # select Nsamp2 observations Ysel <- Ypop[sel2,] Xsel <- Xpop[sel2,] reg_sel <- lm(Ysel~Xsel[,2]) # estimate regression in sample save_beta1[i,2] <- reg_sel$
coefficients[2] # save estimated beta_1 sel3 <- ceiling(runif(Nsamp3)*Npop) # select Nsamp3 observations Ysel <- Ypop[sel3,] Xsel <- Xpop[sel3,] reg_sel <- lm(Ysel~Xsel[,2]) # estimate regression in sample save_beta1[i,3] <- reg_sel$
coefficients[2] # save estimated beta_1 }
# define bins bins = seq(min(save_beta1[,1])-0.05, max(save_beta1[,1])+0.05, by = 0.005) # match
# Kernel Density Estimates (smooth histograms) d1 <- density(save_beta1[,1]) d2 <- density(save_beta1[,2]) d3 <- density(save_beta1[,3])
# Plots colors <- rainbow(3) plot(range(c(1.35,1.65)), range(c(0,max(d3$y)+0.05)), type="n", xlab="beta", ylab="Density") # axis(1, at = c(1,2,3,4,5), labels=c("Fail","3", "2.2", "2.1", "1")) lines(d1$x,d1$y, col = colors[1], lwd=3) lines(d2$x,d2$y, col = colors[2], lwd=3) lines(d3$x,d3$y, col = colors[3], lwd=3) title(main = "Distribution of estimated Parameters") legend("topleft", inset=.0, title="Sample Size",c("Nsamp1","Nsamp2","Nsamp3"), fill=rainbow(3), bty = "n")