Saturday, March 11, 2023

The do file

 clear all
set more off



/****************************/
/******** QUESTION  1 *******/
/****************************/

*Importing data

use "D:\STATA2023\part1_timeseries.dta"

* sorting the year column
sort year

*set data as time series
tsset year, yearly

*label variables
lab var children "The number of own children under age 5"
lab var unemployed "The share of unemployed women within (25-54) "

* 1. Time series of ’children’ and ’unemployed’ over time

twoway (tsline children) (tsline unemployed, yaxis(2)), ttitle(Years) ///
title(Time series of Unemployed and Children)legend(c(1))



* simple regression of ’children’ on ’unemployed’
reg children unemployed

* 2. Regression of ’children’ on ’unemployed’ and share_married
reg children unemployed share_married

* 3. testing for trends in the variables

*set up a time trend
gen t = _n

reg unemployed t // non linear
reg share_married t
reg children t

gen ln_share_married = ln(share_married)
label var ln_share_married "logarithm of unemployed share of married women"
gen ln_children = ln(children)
label var ln_children "logarithm of No. of children"

reg ln_children unemployed ln_share_married

* 4. testing for autocorrelation and unit roots

* testing for unit roots

*Test for unit roots (Dickey-Fuller (DF) test)
dfuller children
dfuller unemployed


* AR(1) for children
reg ln_children L.ln_children
reg ln_children L.ln_children t

* AR(1) for share_married

reg ln_share_married L.ln_share_married
reg ln_share_married L.ln_share_married t


 /****************************/
/******** QUESTION  2 *******/
/****************************/
use "D:\STATA2023\part2_panel.dta", clear

* setting the data as panel data
xtset statefip year


* 5
keep if year==2022 // keeping observations in 2022 only

lab var children "The number of own children under age 5" // changes the label of children

twoway scatter children lnincome, ///
    ytitle("Average number of children")///
    xtitle("Natural logarithm of median household income") ///
    title("Relationship between children and lnincome in 2022") ///
    graphregion(color(white)) plotregion(color(white)) || lfit children lnincome

*summary of variable
summarize children lnincome

*summary of control variable

summarize share_married share_women


* 6
use "D:\STATA2023\part2_panel.dta", clear

* fertility and income pooled

reg children lnincome ib(last).year

* pooled ols with other control variables

reg children lnincome ib(last).year share_married share_women pop




* 7.
* setting the data as panel data
xtset year

* fixed effects
xtreg children lnincome share_married share_women pop, fe


* generating first difference variables
gen t =_n
tsset t // set the time variable

gen dchildren = d.children
gen dlnincome = d.lnincome
gen dshare_married = d.share_married
gen dshare_women = d.share_women
gen dpop = d.pop


* regression first difference

reg dchildren dlnincome dshare_married dshare_women dpop,nocons






No comments:

Post a Comment

The Need for Efficient Cable Organizer in the Digital Age

    Table of Contents 1       Introduction . 5 2       Literature review .. 5 3       Quality of Theoretical Foundati...