using CSV, DataFrames, DataFramesMeta, Statistics
data = CSV.read("../data/cps_00019.csv",DataFrame)
data = @chain data begin
@transform :E = :EMPSTAT.<21
@transform @byrow :wage = begin
if :PAIDHOUR==0
return missing
elseif :PAIDHOUR==2
if :HOURWAGE<99.99 && :HOURWAGE>0
return :HOURWAGE
else
return missing
end
elseif :PAIDHOUR==1
if :EARNWEEK>0 && :UHRSWORKT.<997
return :EARNWEEK / :UHRSWORKT
else
return missing
end
end
end
@subset :MONTH.==1
@select :AGE :SEX :RACE :EDUC :wage :E :DURUNEMP
@transform :DURUNEMP = round.(:DURUNEMP .* 12/52) #<- we convert weekly unemployment durations to monthly since we have a monthly model
end| Row | AGE | SEX | RACE | EDUC | wage | E | DURUNEMP |
|---|---|---|---|---|---|---|---|
| Int64 | Int64 | Int64 | Int64 | Float64? | Bool | Float64 | |
| 1 | 72 | 1 | 100 | 81 | missing | true | 231.0 |
| 2 | 66 | 1 | 100 | 111 | missing | true | 231.0 |
| 3 | 61 | 2 | 100 | 111 | missing | true | 231.0 |
| 4 | 52 | 2 | 200 | 73 | 20.84 | true | 231.0 |
| 5 | 19 | 2 | 200 | 73 | 10.0 | true | 231.0 |
| 6 | 56 | 2 | 200 | 111 | 25.0 | true | 231.0 |
| 7 | 22 | 2 | 200 | 81 | 9.5 | true | 231.0 |
| 8 | 23 | 2 | 100 | 124 | missing | true | 231.0 |
| 9 | 24 | 2 | 100 | 124 | missing | true | 231.0 |
| 10 | 59 | 2 | 200 | 111 | missing | true | 231.0 |
| 11 | 53 | 1 | 200 | 81 | missing | true | 231.0 |
| 12 | 24 | 2 | 200 | 73 | missing | true | 231.0 |
| 13 | 60 | 1 | 100 | 124 | missing | true | 231.0 |
| ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ |
| 61353 | 41 | 1 | 100 | 111 | missing | true | 231.0 |
| 61354 | 41 | 2 | 100 | 73 | missing | true | 231.0 |
| 61355 | 38 | 1 | 100 | 73 | missing | true | 231.0 |
| 61356 | 29 | 2 | 100 | 73 | missing | true | 231.0 |
| 61357 | 71 | 2 | 100 | 73 | 12.0 | true | 231.0 |
| 61358 | 45 | 1 | 100 | 92 | 21.25 | true | 231.0 |
| 61359 | 41 | 1 | 100 | 73 | missing | true | 231.0 |
| 61360 | 42 | 1 | 100 | 111 | missing | true | 231.0 |
| 61361 | 43 | 2 | 100 | 123 | missing | true | 231.0 |
| 61362 | 17 | 1 | 100 | 60 | missing | true | 231.0 |
| 61363 | 32 | 2 | 100 | 81 | missing | true | 231.0 |
| 61364 | 30 | 2 | 100 | 81 | missing | true | 231.0 |