using CSV, DataFrames, DataFramesMeta, Optim
include("../children-cash-transfers/src/Transfers.jl")
# for this simple model we can just drop missing data. When we estimate the model with persistent latent heterogeneity, we will need complete panels (including years where choices are missing).
= @chain begin
data read("../children-cash-transfers/data/MainPanelFile.csv",DataFrame,missingstring = "NA")
CSV.#@select :MID :year :wage :hrs :earn :SOI :CPIU :WelfH :FSInd :num_child :age
@subset :year.>=1985 :year.<=2010
@transform :AFDC = :WelfH.>0
@rename :FS = :FSInd
@transform :P = :FS + :AFDC :H = min.(2,round.(Union{Int64, Missing},:hrs / (52*20)))
@subset .!ismissing.(:P) .&& .!ismissing.(:H)
@transform @byrow :wage = begin
if :hrs>0 && :earn>0
return :earn / :hrs / :CPIU
else
return missing
end
end
end
= (;P = Int64.(data.P), H = Int64.(data.H), year = data.year, age = data.age,
data_mle = data.SOI, num_kids = data.num_child, cpi = data.CPIU,
soi = log.(coalesce.(data.wage,1.)),wage_missing = ismissing.(data.wage)) logwage
(P = [2, 2, 2, 1, 0, 0, 0, 2, 1, 0 … 2, 2, 1, 2, 0, 0, 1, 1, 1, 1], H = [0, 0, 0, 0, 0, 0, 0, 0, 0, 2 … 0, 0, 2, 2, 2, 2, 0, 0, 0, 0], year = [1994, 1995, 1996, 1998, 2000, 2002, 2004, 2006, 2008, 1990 … 1991, 1992, 1991, 1992, 1991, 1992, 1991, 1992, 1991, 1992], age = [21, 22, 23, 25, 27, 29, 31, 33, 35, 17 … 21, 22, 23, 24, 39, 40, 30, 31, 25, 26], soi = [43, 43, 43, 43, 43, 43, 43, 43, 43, 17 … 44, 44, 7, 7, 5, 5, 39, 39, 39, 39], num_kids = [2, 2, 3, 3, 3, 3, 3, 3, 3, 1 … 2, 2, 2, 2, 3, 3, 4, 4, 2, 2], cpi = [0.860812349005761, 0.884959812302546, 0.910948243820851, 0.946664188812488, 1.0, 1.04457233785542, 1.09707768072849, 1.17054218546739, 1.25008130459022, 0.758792510685746 … 0.790785866939231, 0.8148346032336, 0.790785866939231, 0.8148346032336, 0.790785866939231, 0.8148346032336, 0.790785866939231, 0.8148346032336, 0.790785866939231, 0.8148346032336], logwage = [0.0, 0.0, 0.0, 0.0, 1.2039728043259361, 0.0, 0.0, 0.0, 0.0, 0.8230555833449215 … 0.0, 0.0, 2.6512574120409043, 2.3314412292534223, 1.265948758245773, 1.8744481199656744, 0.0, 0.0, 0.0, 0.0], wage_missing = Bool[1, 1, 1, 1, 0, 1, 1, 1, 1, 0 … 1, 1, 0, 0, 0, 0, 1, 1, 1, 1])