########################################## # Heckman 2-step selection. # Mahmood Arai, nov 16, 2005. # NOTE: Codes are written to be easy to follow # for undergraduate students. ########################################## # The DATA # use data lnu from http://people.su.se/~ma/R_intro/ load("DataWageMacro.rda") attach(lnu) # manipulate data to get some workers # with wages == 0 (non-participation) # for this I replace all lowest quartile wages with zero. wage <- replace(wage, wage<64, 0) y <- log(wage[wage>0]) # creates the outcome variable sel <- wage # the outcome including non-participants # Data excluding the lowest quartile lnu3quart <- subset(lnu, wage>=64) detach(lnu) attach(lnu3quart) # the model matrix in the main eq. X <- cbind(female,school,exp,expsquare=I(exp^ 2)) # detach(lnu3quart) attach(lnu) # the model matrix for the selection eq. Xsel <- cbind(female,school,exp,public) ########################################## # The function to do the heckman heckman <- function(y, sel, X, Xsel) { # y is the outcome variable, sel is the dependent varaible # in the selection equation # X and Xsel are regressors in the main and selection equation print("The selection equation") print(summary(glm(I(sel>0) ~ Xsel, binomial(link=probit)))) # predict probabilities of being selected phat <- predict(glm(I(sel>0) ~ Xsel, binomial(link=probit))) # Create data with phat and the selection variable phatdata <- data.frame(phat,sel) # Keep only those with sel>0 phatdata <- subset(phatdata,sel>0) # Keep only the phat variable phatdata <- subset(phatdata,select=phat) # Compute lambda lambda <- dnorm(phatdata$phat)/pnorm(phatdata$phat) # Create the model matrix including lambda Xcorr <- cbind(X,lambda) # run the regression print("Xcorr-prefix is just the name of the model matrix.") print( modcorr<- summary(lm(y~Xcorr))) # The White's correction library(car) print("Results with White's correction for non-constant error variance") # Corrected standard error secorr <- matrix(sqrt(diag(hccm(lm(y ~ Xcorr),type="hc1"))) ) # Saving the coefficients from the main regression coefcorr <- coef(modcorr) # Replace the standard error with hccm coefcorr [,2]<- secorr # Replace the t-values coefcorr [,3]<- coefcorr [,1]/secorr # Replace the p-values coefcorr [,4] <- 1- pt(abs(coefcorr[,3]),df=length(y)-1-ncol(Xcorr)) # Print final results print(coefcorr) } ########################################## heckman(y,sel,X,Xsel)