Difference between revisions of "ExampleCodeOLShac"
(3 intermediate revisions by the same user not shown) | |||
Line 1: | Line 1: | ||
− | Below you can find example code which uses the OLShac function. That function estimates a linear model using OLS. Results are returned in a structure. Amongst the results are White standard errors and Newey-West standard errors. | + | Below you can find example code which uses the OLShac function. That function estimates a linear model using OLS. Results are returned in a structure <source enclose="none">out</source>. Amongst the results are White standard errors and Newey-West standard errors. From the comments in <source enclose="none">OLShac</source> you can see the exact specifications of all outputs. See <source enclose="none">OLShac</source> below. |
− | == | + | == hac_estimators.m == |
− | + | In this code we simulate artificial data. We do so to establish the properties of t-tests based on different standard errors. The code simulates three different processes, one with error terms that meet the Gauss-Markov assumptions, another with heteroskedastic and another with autocorrelated error terms. In order to simulate autocorrelated we use another function <source enclose="none">armasim</source>, which is attached to the end of this file. | |
<source> | <source> | ||
+ | %==================================================== | ||
+ | % In here we | ||
+ | % 1) simulate three different processes | ||
+ | % a) regression model with that meets GM assumptions | ||
+ | % b) regression model with heteroskedastic errors | ||
+ | % c) regression model with AC errors | ||
+ | % 2) Perform parameter inference using normal, White and NW se | ||
+ | % 3) Evaluate whether tests have correct size | ||
+ | % | ||
+ | %==================================================== | ||
+ | function main | ||
+ | clc % Clears the command window | ||
+ | randn('state',123467); % ensures that random number generator starts with same random number | ||
+ | |||
+ | % Parameters for DGP | ||
+ | % y = x*b + u | ||
+ | n = 200; % length of the simulated series | ||
+ | nsim = 10000; % number of simulations | ||
+ | x = [ones(n,1) randn(n,2)]; % constant and two randomly generated series | ||
+ | b = [0.5; 1.0; -0.5]; % true parameter vector | ||
+ | sig = 0.1; % standard deviation of error terms | ||
+ | |||
+ | % Parameters for the AR error terms | ||
+ | p = 1; % AR order | ||
+ | q = 1; % MA order | ||
+ | % process parameters | ||
+ | alpha = 0.0; | ||
+ | phi = [0.9]; | ||
+ | theta = [0.7]; | ||
+ | sigar = 0.1; % standard error of innovation term | ||
+ | |||
+ | |||
+ | % save t-stats for DGP with GM errors here, col1: OLS se, Col2: White, | ||
+ | % Col3: NW; only safe results for b(2) | ||
+ | tsave = zeros(nsim,3); | ||
+ | |||
+ | % simulate model with GM (gmerr), heteroskedatic (hserr) or | ||
+ | % autocorrelated (arerr) errors | ||
+ | for i = 1 : nsim | ||
+ | % gmerr = randn(n,1)*sig; % GM errors | ||
+ | % hserr = [randn(n/2,1)*sig;randn(n/2,1)*(sig/5)]; % heterosk errors | ||
+ | arerr = armasim(n,p,q,alpha,phi,theta,sigar); % AR errors | ||
+ | y = x*b + arerr; % DGP with selected errors | ||
+ | results = OLShac(y,x,0); % estimate and save results in "results" | ||
+ | bmb0 = results.b(2) - b(2); % difference of estimated value from true value | ||
+ | tsave(i,:) = [bmb0/results.bse(2) bmb0/results.wh_bse(2) bmb0/results.nw_bse(2)]; | ||
+ | end | ||
+ | |||
+ | cv5 = norminv(0.975); % 2 tailed 5% cv, Normal | ||
+ | cv1 = norminv(0.995); % 2 tailed 1% cv, Normal | ||
+ | cv5t = tinv(0.975,n-length(b)); % 2 tailed 5% cv, t-Distr | ||
+ | cv1t = tinv(0.995,n-length(b)); % 2 tailed 1% cv, t-Distr | ||
+ | |||
+ | count5 = (abs(tsave(:,2:3))>cv5); % 1 if test stat exceed cv, 0 otherwise | ||
+ | count1 = (abs(tsave(:,2:3))>cv1); % 1 if test stat exceed cv, 0 otherwise | ||
+ | count5 = [(abs(tsave(:,1))>cv5t) count5]; | ||
+ | count1 = [(abs(tsave(:,1))>cv1t) count1]; | ||
+ | |||
+ | prop5 = sum(count5)./nsim; % proportion of rejections of true H0 at 5% | ||
+ | prop1 = sum(count1)./nsim; % proportion of rejections of true H0 at 1% | ||
+ | |||
+ | disp('Rejections for standard OLS, White and NW based t-tests'); | ||
+ | disp(' OLS White NW'); | ||
+ | disp('at 5%'); | ||
+ | disp(prop5); | ||
+ | disp('at 1%'); | ||
+ | disp(prop1); | ||
+ | |||
+ | end | ||
+ | </source> | ||
+ | |||
+ | This is the <source enclose="none">armasim</source> function. Either copy it below the above file (as it is written as a function you can do that), or you can save it in a new function file with the name <source enclose="none">armasim.m</source>. | ||
+ | |||
+ | <source> | ||
+ | function y = armasim(t,p,q,al,ph,th,si); | ||
+ | % Function that simulates ARMA(p,q) process of | ||
+ | % input: t length series to be simulated | ||
+ | % p, AR order | ||
+ | % q, MA order | ||
+ | % al, constant | ||
+ | % ph, AR parameters [ph(1); ph(2); ...; ph(p)] | ||
+ | % th, MA parameters [th(1); th(2); ...; th(q)] | ||
+ | % | ||
+ | % y(t) = al + ph(1)*y(t-1) + ... + ph(p)*y(t-p) | ||
+ | % + th(1)*eps(t-1) + ... + th(q)*eps(t-q) + eps(t) | ||
+ | % where eps(t) comes from ~N(0,si^2) | ||
+ | |||
+ | eps = randn(t+100,1)*si;% error terms, 100 extra for startup | ||
+ | maxlag = max([p;q]); % find max lag | ||
+ | if sum(ph)==1 | ||
+ | um = 1; | ||
+ | else | ||
+ | um = al/(1-sum(ph)); % unconditional mean | ||
+ | end | ||
+ | |||
+ | |||
+ | fph = flipud(ph); % reversed AR parameter vector | ||
+ | fth = flipud(th); % reversed MA parameter vector | ||
+ | y = ones(t+100,1)*um; % in here we safe the series, start with unconditional mean | ||
+ | |||
+ | if p>0 & q>0 % ARMA model | ||
+ | |||
+ | for i = maxlag+1:t+100 | ||
+ | y(i) = al + y(i-p:i-1)'*fph + eps(i-q:i-1)'*fth + eps(i); | ||
+ | end | ||
+ | |||
+ | elseif p>0 & q==0 % AR Model | ||
+ | |||
+ | for i = maxlag+1:t+100 | ||
+ | y(i) = al + y(i-p:i-1)'*fph + eps(i); | ||
+ | end | ||
+ | |||
+ | elseif p==0 & q>0 % MA model | ||
+ | |||
+ | for i = maxlag+1:t+100 | ||
+ | y(i) = al + eps(i-q:i-1)'*fth + eps(i); | ||
+ | end | ||
+ | |||
+ | else % White noise process | ||
+ | |||
+ | for i = maxlag+1:t+100 | ||
+ | y(i) = al + eps(i); | ||
+ | end | ||
+ | end | ||
+ | |||
+ | y = y(101:end); | ||
+ | end | ||
+ | </source> | ||
+ | |||
+ | An example output of this code (using autocorrelated error terms) is as follows: | ||
+ | |||
+ | <source> | ||
+ | OLS White NW | ||
+ | at 5% | ||
+ | 0.0243 0.0299 0.0455 | ||
+ | |||
+ | at 1% | ||
+ | 0.0021 0.0039 0.0079 | ||
+ | </source> | ||
+ | |||
+ | As we are testing the size of the tests (testing a correct null hypothesis) you can see that the empirical sizes are closest to the nominal sizes (0.05 and 0.01) for the t-test using the Newey-West standard errors. | ||
+ | |||
+ | == <div id="OLShacm">OLShac.m </div>== | ||
+ | |||
+ | This is a function that estimates a linear model with OLS. The results are all returned in a structure. Save this as a new function file called <source enclose="none">OLShac.m</source>. | ||
+ | |||
+ | <source> | ||
+ | |||
+ | function out = OLShac(y,x,output,B); | ||
+ | % This function performs an OLS estimation | ||
+ | % input: y, vector with dependent variable | ||
+ | % x, matrix with explanatory variable | ||
+ | % function will automatically add a constant if the first col | ||
+ | % is not a vector of ones | ||
+ | % output, 1 = printed output | ||
+ | % B, lag lengths for Newey-West standard errors | ||
+ | % output: Structure called OLS with the following elements: | ||
+ | % .b, estimated parameters | ||
+ | % .bse, standard errors for bhat | ||
+ | % .wh_bse, White standard errors for bhat | ||
+ | % .nw_bse, Newey-West standard errors for bhat | ||
+ | % .res, estimated residuals | ||
+ | % .n, number of observations used | ||
+ | % .rss, residual sum of squares | ||
+ | % .r2, Rsquared | ||
+ | |||
+ | % This checks how many inputs have been provided and chooses defaults for | ||
+ | % missing inputs | ||
+ | switch nargin | ||
+ | case 2 | ||
+ | output = 1; | ||
+ | B = 0; % will later be replaced by default value | ||
+ | case 3 | ||
+ | B = 0; % will later be replaced by default value | ||
+ | case 4 | ||
+ | % nothing | ||
+ | otherwise | ||
+ | error('2 to 4 inputs are required!') | ||
+ | end | ||
+ | |||
+ | |||
+ | % select those rows that have observations for all variables | ||
+ | ninit = length(y); | ||
+ | testnan = [isnan(y) isnan(x)]; | ||
+ | testnan = (sum(testnan,2)==0); | ||
+ | y = y(testnan); | ||
+ | x = x(testnan,:); | ||
+ | % test whether first column is vector of ones | ||
+ | temp = (x(x(:,1)==1)); | ||
+ | if length(temp) ~= length(x) | ||
+ | x = [ones(length(x),1) x]; % add constant of not included in x | ||
+ | end | ||
+ | |||
+ | [n,k] = size(x); % sample size - n, number of explan vars (incl constant) - k | ||
+ | xxi = inv(x'*x); | ||
+ | |||
+ | n = n; | ||
+ | b = xxi*x'*y; | ||
+ | res = y - x*b; | ||
+ | rss = res'*res; | ||
+ | ssq = rss/(n-k); | ||
+ | s = sqrt(ssq); | ||
+ | bse = ssq*xxi; | ||
+ | bse = sqrt(diag(bse)); | ||
+ | tstat = b./bse; | ||
+ | ym = y - mean(y); | ||
+ | r2 = 1 - (res'*res)/(ym'*ym); | ||
+ | adjr2 = 1 - (n-1)*(1-r2)/(n-k); | ||
+ | fstat = ((((ym'*ym))-(res'*res))/(k-1))/((res'*res)/(n-k)); | ||
+ | dw = corrcoef([res(1:end-1) res(2:end)]); | ||
+ | dw = 2*(1-dw(2,1)); | ||
+ | |||
+ | |||
+ | % calculation of robust standard errors | ||
+ | |||
+ | % White standard errors | ||
+ | resx = repmat(res,1,size(x,2)).*x; | ||
+ | wh_vcm = xxi*(resx'*resx)*xxi; | ||
+ | wh_bse = sqrt(diag(wh_vcm)); | ||
+ | |||
+ | % Newey-West standard errors | ||
+ | if B==0 % recalculate B only if not provided as input | ||
+ | B = ceil(4*(n/100)^(2/9)); % This is the lag length in the Newey West calculation | ||
+ | end | ||
+ | xox = resx(1:end,:)'*resx(1:end,:); | ||
+ | for i = 1:B-1; | ||
+ | w = 1-(i/B); | ||
+ | za = resx(1+i:end,:)'*resx(1:end-i,:); | ||
+ | xox = xox+w*(za+za'); | ||
+ | end; | ||
+ | nw_vcm = xxi*xox*xxi; | ||
+ | nw_bse = sqrt(diag(nw_vcm)); | ||
+ | |||
+ | % Save the outputs | ||
+ | out.b = b; | ||
+ | out.bse = bse; | ||
+ | out.wh_bse = wh_bse; | ||
+ | out.nw_bse = nw_bse; | ||
+ | out.res = res; | ||
+ | out.n = n; | ||
+ | out.rss = rss; | ||
+ | out.r2 = r2; | ||
+ | |||
+ | if output | ||
+ | pval = 2*(1-tcdf(abs(tstat),n-k)); | ||
+ | pvalf = 1- fcdf(fstat,k-1,n-k); | ||
+ | try % if stats toolbox is available | ||
+ | pval = 2*(1-tcdf(abs(tstat),n-k)); | ||
+ | pvalf = 1- fcdf(fstat,k-1,n-k); | ||
+ | catch | ||
+ | try % if NAG toolbox is available | ||
+ | pval = 2*(1-g01eb(abs(tstat),n-k)); | ||
+ | pvalf = g01ed(fstat,k-1,n-k,'tail','U'); | ||
+ | catch | ||
+ | pval = -999*ones(size(tstat)); | ||
+ | pvalf = -999; | ||
+ | end | ||
+ | end | ||
+ | |||
+ | fprintf('===========================================================\n'); | ||
+ | fprintf('===== Regression Output ==================================\n'); | ||
+ | fprintf('Obs used = %4.0f, missing obs = %4.0f \n',n,(ninit-n)); | ||
+ | fprintf('Rsquared = %5.4f \n',r2); | ||
+ | fprintf('adj_Rsq = %5.4f \n',adjr2); | ||
+ | fprintf('===== Estimated Model Parameters ==========================\n'); | ||
+ | fprintf('= Par se se(White) se(NW) ==================\n'); | ||
+ | format short; | ||
+ | disp([b bse wh_bse nw_bse]); | ||
+ | fprintf('===== Model Statistics ====================================\n'); | ||
+ | fprintf(' Fstat = %5.4f (%5.4f)\n',[fstat;pvalf]); | ||
+ | fprintf(' standard error = %5.4f\n',sqrt(ssq)); | ||
+ | fprintf(' RSS = %5.4f\n',rss); | ||
+ | fprintf(' Durbin-Watson = %5.4f\n',dw); | ||
+ | fprintf('===========================================================\n'); | ||
+ | end | ||
</source> | </source> |
Latest revision as of 00:01, 20 November 2012
Below you can find example code which uses the OLShac function. That function estimates a linear model using OLS. Results are returned in a structure out
. Amongst the results are White standard errors and Newey-West standard errors. From the comments in OLShac
you can see the exact specifications of all outputs. See OLShac
below.
hac_estimators.m
In this code we simulate artificial data. We do so to establish the properties of t-tests based on different standard errors. The code simulates three different processes, one with error terms that meet the Gauss-Markov assumptions, another with heteroskedastic and another with autocorrelated error terms. In order to simulate autocorrelated we use another function armasim
, which is attached to the end of this file.
%====================================================
% In here we
% 1) simulate three different processes
% a) regression model with that meets GM assumptions
% b) regression model with heteroskedastic errors
% c) regression model with AC errors
% 2) Perform parameter inference using normal, White and NW se
% 3) Evaluate whether tests have correct size
%
%====================================================
function main
clc % Clears the command window
randn('state',123467); % ensures that random number generator starts with same random number
% Parameters for DGP
% y = x*b + u
n = 200; % length of the simulated series
nsim = 10000; % number of simulations
x = [ones(n,1) randn(n,2)]; % constant and two randomly generated series
b = [0.5; 1.0; -0.5]; % true parameter vector
sig = 0.1; % standard deviation of error terms
% Parameters for the AR error terms
p = 1; % AR order
q = 1; % MA order
% process parameters
alpha = 0.0;
phi = [0.9];
theta = [0.7];
sigar = 0.1; % standard error of innovation term
% save t-stats for DGP with GM errors here, col1: OLS se, Col2: White,
% Col3: NW; only safe results for b(2)
tsave = zeros(nsim,3);
% simulate model with GM (gmerr), heteroskedatic (hserr) or
% autocorrelated (arerr) errors
for i = 1 : nsim
% gmerr = randn(n,1)*sig; % GM errors
% hserr = [randn(n/2,1)*sig;randn(n/2,1)*(sig/5)]; % heterosk errors
arerr = armasim(n,p,q,alpha,phi,theta,sigar); % AR errors
y = x*b + arerr; % DGP with selected errors
results = OLShac(y,x,0); % estimate and save results in "results"
bmb0 = results.b(2) - b(2); % difference of estimated value from true value
tsave(i,:) = [bmb0/results.bse(2) bmb0/results.wh_bse(2) bmb0/results.nw_bse(2)];
end
cv5 = norminv(0.975); % 2 tailed 5% cv, Normal
cv1 = norminv(0.995); % 2 tailed 1% cv, Normal
cv5t = tinv(0.975,n-length(b)); % 2 tailed 5% cv, t-Distr
cv1t = tinv(0.995,n-length(b)); % 2 tailed 1% cv, t-Distr
count5 = (abs(tsave(:,2:3))>cv5); % 1 if test stat exceed cv, 0 otherwise
count1 = (abs(tsave(:,2:3))>cv1); % 1 if test stat exceed cv, 0 otherwise
count5 = [(abs(tsave(:,1))>cv5t) count5];
count1 = [(abs(tsave(:,1))>cv1t) count1];
prop5 = sum(count5)./nsim; % proportion of rejections of true H0 at 5%
prop1 = sum(count1)./nsim; % proportion of rejections of true H0 at 1%
disp('Rejections for standard OLS, White and NW based t-tests');
disp(' OLS White NW');
disp('at 5%');
disp(prop5);
disp('at 1%');
disp(prop1);
end
This is the armasim
function. Either copy it below the above file (as it is written as a function you can do that), or you can save it in a new function file with the name armasim.m
.
function y = armasim(t,p,q,al,ph,th,si);
% Function that simulates ARMA(p,q) process of
% input: t length series to be simulated
% p, AR order
% q, MA order
% al, constant
% ph, AR parameters [ph(1); ph(2); ...; ph(p)]
% th, MA parameters [th(1); th(2); ...; th(q)]
%
% y(t) = al + ph(1)*y(t-1) + ... + ph(p)*y(t-p)
% + th(1)*eps(t-1) + ... + th(q)*eps(t-q) + eps(t)
% where eps(t) comes from ~N(0,si^2)
eps = randn(t+100,1)*si;% error terms, 100 extra for startup
maxlag = max([p;q]); % find max lag
if sum(ph)==1
um = 1;
else
um = al/(1-sum(ph)); % unconditional mean
end
fph = flipud(ph); % reversed AR parameter vector
fth = flipud(th); % reversed MA parameter vector
y = ones(t+100,1)*um; % in here we safe the series, start with unconditional mean
if p>0 & q>0 % ARMA model
for i = maxlag+1:t+100
y(i) = al + y(i-p:i-1)'*fph + eps(i-q:i-1)'*fth + eps(i);
end
elseif p>0 & q==0 % AR Model
for i = maxlag+1:t+100
y(i) = al + y(i-p:i-1)'*fph + eps(i);
end
elseif p==0 & q>0 % MA model
for i = maxlag+1:t+100
y(i) = al + eps(i-q:i-1)'*fth + eps(i);
end
else % White noise process
for i = maxlag+1:t+100
y(i) = al + eps(i);
end
end
y = y(101:end);
end
An example output of this code (using autocorrelated error terms) is as follows:
OLS White NW
at 5%
0.0243 0.0299 0.0455
at 1%
0.0021 0.0039 0.0079
As we are testing the size of the tests (testing a correct null hypothesis) you can see that the empirical sizes are closest to the nominal sizes (0.05 and 0.01) for the t-test using the Newey-West standard errors.
OLShac.m
This is a function that estimates a linear model with OLS. The results are all returned in a structure. Save this as a new function file called OLShac.m
.
function out = OLShac(y,x,output,B);
% This function performs an OLS estimation
% input: y, vector with dependent variable
% x, matrix with explanatory variable
% function will automatically add a constant if the first col
% is not a vector of ones
% output, 1 = printed output
% B, lag lengths for Newey-West standard errors
% output: Structure called OLS with the following elements:
% .b, estimated parameters
% .bse, standard errors for bhat
% .wh_bse, White standard errors for bhat
% .nw_bse, Newey-West standard errors for bhat
% .res, estimated residuals
% .n, number of observations used
% .rss, residual sum of squares
% .r2, Rsquared
% This checks how many inputs have been provided and chooses defaults for
% missing inputs
switch nargin
case 2
output = 1;
B = 0; % will later be replaced by default value
case 3
B = 0; % will later be replaced by default value
case 4
% nothing
otherwise
error('2 to 4 inputs are required!')
end
% select those rows that have observations for all variables
ninit = length(y);
testnan = [isnan(y) isnan(x)];
testnan = (sum(testnan,2)==0);
y = y(testnan);
x = x(testnan,:);
% test whether first column is vector of ones
temp = (x(x(:,1)==1));
if length(temp) ~= length(x)
x = [ones(length(x),1) x]; % add constant of not included in x
end
[n,k] = size(x); % sample size - n, number of explan vars (incl constant) - k
xxi = inv(x'*x);
n = n;
b = xxi*x'*y;
res = y - x*b;
rss = res'*res;
ssq = rss/(n-k);
s = sqrt(ssq);
bse = ssq*xxi;
bse = sqrt(diag(bse));
tstat = b./bse;
ym = y - mean(y);
r2 = 1 - (res'*res)/(ym'*ym);
adjr2 = 1 - (n-1)*(1-r2)/(n-k);
fstat = ((((ym'*ym))-(res'*res))/(k-1))/((res'*res)/(n-k));
dw = corrcoef([res(1:end-1) res(2:end)]);
dw = 2*(1-dw(2,1));
% calculation of robust standard errors
% White standard errors
resx = repmat(res,1,size(x,2)).*x;
wh_vcm = xxi*(resx'*resx)*xxi;
wh_bse = sqrt(diag(wh_vcm));
% Newey-West standard errors
if B==0 % recalculate B only if not provided as input
B = ceil(4*(n/100)^(2/9)); % This is the lag length in the Newey West calculation
end
xox = resx(1:end,:)'*resx(1:end,:);
for i = 1:B-1;
w = 1-(i/B);
za = resx(1+i:end,:)'*resx(1:end-i,:);
xox = xox+w*(za+za');
end;
nw_vcm = xxi*xox*xxi;
nw_bse = sqrt(diag(nw_vcm));
% Save the outputs
out.b = b;
out.bse = bse;
out.wh_bse = wh_bse;
out.nw_bse = nw_bse;
out.res = res;
out.n = n;
out.rss = rss;
out.r2 = r2;
if output
pval = 2*(1-tcdf(abs(tstat),n-k));
pvalf = 1- fcdf(fstat,k-1,n-k);
try % if stats toolbox is available
pval = 2*(1-tcdf(abs(tstat),n-k));
pvalf = 1- fcdf(fstat,k-1,n-k);
catch
try % if NAG toolbox is available
pval = 2*(1-g01eb(abs(tstat),n-k));
pvalf = g01ed(fstat,k-1,n-k,'tail','U');
catch
pval = -999*ones(size(tstat));
pvalf = -999;
end
end
fprintf('===========================================================\n');
fprintf('===== Regression Output ==================================\n');
fprintf('Obs used = %4.0f, missing obs = %4.0f \n',n,(ninit-n));
fprintf('Rsquared = %5.4f \n',r2);
fprintf('adj_Rsq = %5.4f \n',adjr2);
fprintf('===== Estimated Model Parameters ==========================\n');
fprintf('= Par se se(White) se(NW) ==================\n');
format short;
disp([b bse wh_bse nw_bse]);
fprintf('===== Model Statistics ====================================\n');
fprintf(' Fstat = %5.4f (%5.4f)\n',[fstat;pvalf]);
fprintf(' standard error = %5.4f\n',sqrt(ssq));
fprintf(' RSS = %5.4f\n',rss);
fprintf(' Durbin-Watson = %5.4f\n',dw);
fprintf('===========================================================\n');
end