Chapter 3. The Algebra of Least Squares • hansen

Load the data and create subsamples

We process the data with the following code:

library(hansen)
#> 
#> Attaching package: 'hansen'
#> The following object is masked from 'package:stats':
#> 
#>     influence

experience <- cps09mar$age - cps09mar$education_num - 6
mbf <- (cps09mar$race_num == 2) & (cps09mar$marital_num <= 2) &
  (cps09mar$female_num == 1) & (experience == 12)
sam <- (cps09mar$race_num == 4) & (cps09mar$marital_num == 7) &
  (cps09mar$female_num == 0)
dat1 <- cps09mar[mbf, ]
dat2 <- cps09mar[sam, ]

dim(dat1)
#> [1] 20 18
dim(dat2)
#> [1] 268  18

Equation 3.12

The C++ code for the OLS estimator is as follows:

#include "00_main.h"

Mat<double> ols_(const Mat<double>& Y, const Mat<double>& X) {
  Mat<double> XtX = X.t() * X;             // X'X
  Mat<double> XtX_inv = inv(XtX);          // (X'X)^(-1)
  Mat<double> beta = XtX_inv * X.t() * Y;  // (X'X)^(-1)(X'Y)

  return beta;
}

[[cpp11::register]] doubles_matrix<> ols_r_(const doubles_matrix<>& y,
                                            const doubles_matrix<>& x) {
  Mat<double> Y = as_Mat(y);  // Col<double> Y = as_Col(y); also works
  Mat<double> X = as_Mat(x);
  Mat<double> beta = ols_(Y, X);
  return as_doubles_matrix(beta);
}

Every time you need to test a new function or changes to a function, run devtools::load_all() to load the new function or its changes into the R environment.

To use the function, it is required to create a model matrix and a response vector:

y <- as.matrix(log(dat1$earnings / (dat1$hours * dat1$week)))
x <- cbind(1, dat1$education_num)

equation312 <- ols(y, x)

rownames(equation312) <- c("Intercept", "Education")
colnames(equation312) <- "Estimate"

round(equation312, 3)
#>           Estimate
#> Intercept    0.698
#> Education    0.155

Equation 3.13

y <- as.matrix(log(dat2$earnings / (dat2$hours * dat2$week)))

experience <- dat2$age - dat2$education_num - 6
experience2 <- (experience^2) / 100
x <- cbind(1, dat2$education_num, experience, experience2)

equation313 <- ols(y, x)

rownames(equation313) <- c(
  "Intercept", "Education", "Experience",
  "Experience^2"
)
colnames(equation313) <- "Estimate"

round(equation313, 3)
#>              Estimate
#> Intercept       0.575
#> Education       0.143
#> Experience      0.036
#> Experience^2   -0.071

Influence (page 87)

The influence function can be written in C++ to practice, and it is a good idea to separate the residuals from the leverage to keep the code modular:

double influence_(const doubles_matrix<>& y, const doubles_matrix<>& x) {
  Mat<double> Y = as_Mat(y);
  Mat<double> X = as_Mat(x);

  Mat<double> e = Y - (X * ols_(Y, X));
  Mat<double> XXi = inv(X.t() * X);
  Mat<double> leverage = sum(X % (X * XXi), 1);
  Mat<double> ones = Mat<double>(Y.n_rows, 1, fill::ones);
  Mat<double> d = (leverage % e) / (ones - leverage);

  return as_scalar(max(abs(d)));
}

[[cpp11::register]] double influence_r_(const doubles_matrix<>& y,
                                        const doubles_matrix<>& x) {
  return influence_(y, x);
}

The regression from Equation 3.13 has an influence of:

max(abs(influence(y, x)))
#> [1] 0.2926396

Equation 3.49

This is identical to the OLS estimator, but changing the input data:

x_r <- x[x[, "experience"] < 45, ]
y_r <- y[x[, "experience"] < 45, ]

equation349 <- ols(y_r, x_r)

rownames(equation349) <- rownames(equation313)
colnames(equation349) <- colnames(equation313)

round(equation349, 3)
#>              Estimate
#> Intercept       0.531
#> Education       0.144
#> Experience      0.043
#> Experience^2   -0.095