Chapter 5 Fixed effects estimates

5.1 Create fixed effects

This step is very different compared to Stata. By using Stata you need to create 830 0-1 columns, here you only need 2 factor columns:

gravity2 <- gravity2 %>% 
  mutate(
    exp_time = as.factor(paste(exporter, year, sep = "_")),
    imp_time = as.factor(paste(importer, year, sep = "_"))
  )

Now we see how many dummy variables we are adding to the OLS model, there’s a high risk of collinearity!:

length(levels(gravity2$exp_time)) + length(levels(gravity2$imp_time))
## [1] 828

5.2 Adjust

fe_formula <- as.formula("log_trade ~ log_dist + cntg + lang + clny + 
                         exp_time + imp_time")

model5 <- lm(fe_formula, data = gravity2)

5.3 Check for collinear terms

collinear_terms <- alias(model5)

collinear_matrix <- collinear_terms$Complete

rownames(collinear_matrix)
## [1] "imp_timeZAF_1990" "imp_timeZAF_1994" "imp_timeZAF_1998" "imp_timeZAF_2002" "imp_timeZAF_2006"

5.4 Compute clustered standard errors

vcov_cluster5 <- vcovCL(model5, cluster = gravity2[, "pair"], 
                        df_correction = TRUE)

coef_test5 <- tidy(coeftest(
  model5,
  vcov_cluster5[
    which(!grepl("^exp_time|^imp_time", rownames(vcov_cluster5))),
    which(!grepl("^exp_time|^imp_time", colnames(vcov_cluster5)))
  ]
))

coef_test5
## # A tibble: 5 × 5
##   term        estimate std.error statistic   p.value
##   <chr>          <dbl>     <dbl>     <dbl>     <dbl>
## 1 (Intercept)   13.1      0.444      29.4  2.68e-187
## 2 log_dist      -1.22     0.0308    -39.5  0        
## 3 cntg           0.223    0.151       1.48 1.38e-  1
## 4 lang           0.661    0.0676      9.78 1.49e- 22
## 5 clny           0.670    0.116       5.78 7.51e-  9
summary(model5)$r.squared
## [1] 0.8432398