using CSV, DataFrames, DataFramesMeta, Statistics
= CSV.read("../data/cps_00019.csv",DataFrame)
data = @chain data begin
data @transform :E = :EMPSTAT.<21
@transform @byrow :wage = begin
if :PAIDHOUR==0
return missing
elseif :PAIDHOUR==2
if :HOURWAGE<99.99 && :HOURWAGE>0
return :HOURWAGE
else
return missing
end
elseif :PAIDHOUR==1
if :EARNWEEK>0 && :UHRSWORKT.<997
return :EARNWEEK / :UHRSWORKT
else
return missing
end
end
end
@subset :MONTH.==1
@select :AGE :SEX :RACE :EDUC :wage :E :DURUNEMP
@transform :DURUNEMP = round.(:DURUNEMP .* 12/52) #<- we convert weekly unemployment durations to monthly since we have a monthly model
end
Row | AGE | SEX | RACE | EDUC | wage | E | DURUNEMP |
---|---|---|---|---|---|---|---|
Int64 | Int64 | Int64 | Int64 | Float64? | Bool | Float64 | |
1 | 72 | 1 | 100 | 81 | missing | true | 231.0 |
2 | 66 | 1 | 100 | 111 | missing | true | 231.0 |
3 | 61 | 2 | 100 | 111 | missing | true | 231.0 |
4 | 52 | 2 | 200 | 73 | 20.84 | true | 231.0 |
5 | 19 | 2 | 200 | 73 | 10.0 | true | 231.0 |
6 | 56 | 2 | 200 | 111 | 25.0 | true | 231.0 |
7 | 22 | 2 | 200 | 81 | 9.5 | true | 231.0 |
8 | 23 | 2 | 100 | 124 | missing | true | 231.0 |
9 | 24 | 2 | 100 | 124 | missing | true | 231.0 |
10 | 59 | 2 | 200 | 111 | missing | true | 231.0 |
11 | 53 | 1 | 200 | 81 | missing | true | 231.0 |
12 | 24 | 2 | 200 | 73 | missing | true | 231.0 |
13 | 60 | 1 | 100 | 124 | missing | true | 231.0 |
⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ |
61353 | 41 | 1 | 100 | 111 | missing | true | 231.0 |
61354 | 41 | 2 | 100 | 73 | missing | true | 231.0 |
61355 | 38 | 1 | 100 | 73 | missing | true | 231.0 |
61356 | 29 | 2 | 100 | 73 | missing | true | 231.0 |
61357 | 71 | 2 | 100 | 73 | 12.0 | true | 231.0 |
61358 | 45 | 1 | 100 | 92 | 21.25 | true | 231.0 |
61359 | 41 | 1 | 100 | 73 | missing | true | 231.0 |
61360 | 42 | 1 | 100 | 111 | missing | true | 231.0 |
61361 | 43 | 2 | 100 | 123 | missing | true | 231.0 |
61362 | 17 | 1 | 100 | 60 | missing | true | 231.0 |
61363 | 32 | 2 | 100 | 81 | missing | true | 231.0 |
61364 | 30 | 2 | 100 | 81 | missing | true | 231.0 |