For your project, you want to make sure that you keep your code organized. You also want to make sure that all of your project code is contained in a single file.
Here are some examples of what your code might look like at end of the semester. Be sure to write comments in your code to help keep yourself organized.
Example R script:
######################################
#Load in project data file
######################################
load("/Volumes/qac201/Studies and Codebooks/AddHealth/Data/addhealth_merged.RData")
#List out variables I am interested in and take subset to include only these variables
#H1G1H - general health
#H1WP4 - parents allow you to make decisions about how much TV you watch
#H1WP6 - parents allow you to make own decisions about when to go to bed
# Make subset to include only variables of interest
var.to.keep<-c("AID","BIO_SEX","H1GH1","H1WP4","H1WP6")
myData<-m1[,var.to.keep]
#Load in necessary libraries
library(ggplot2)
library(descr)
######################################
#Data Managment and Frequency Tables
######################################
# General Health, code out refusal or legitimate skip options
myData$H1GH1[myData$H1GH1==6]<-NA
myData$H1GH1[myData$H1GH1==8]<-NA
freq(myData$H1GH1)
# Bio Sex, code out refusal
myData$BIO_SEX[myData$BIO_SEX==6]<-NA
freq(myData$BIO_SEX)
#Parents Allow you to make own decisions about TV, code out refusal and legitimate skip options
myData$H1WP4[myData$H1WP4==6]<-NA
myData$H1WP4[myData$H1WP4==7]<-NA
myData$H1WP4[myData$H1WP4==8]<-NA
myData$H1WP4[myData$H1WP4==9]<-NA
freq(myData$H1WP4)
#Parents Allow you to make own decisions about when you go to bed, code out refusal and legitimate skip options
myData$H1WP6[myData$H1WP6==6]<-NA
myData$H1WP6[myData$H1WP6==7]<-NA
myData$H1WP6[myData$H1WP6==8]<-NA
myData$H1WP6[myData$H1WP6==9]<-NA
freq(myData$H1WP6)
#Construct new variable Health 1=Lowest Health, 5=Highest Health
myData$Health[myData$H1GH1==1]<-5
myData$Health[myData$H1GH1==2]<-4
myData$Health[myData$H1GH1==3]<-3
myData$Health[myData$H1GH1==4]<-2
myData$Health[myData$H1GH1==5]<-1
freq(myData$Health)
#Construct new variable ParentControl that says whether parent is making all the decisions
myData$ParentControl[myData$H1WP6==1&myData$H1WP4==1]<-"Parent Controlling Decisions"
myData$ParentControl[myData$H1WP6==0&myData$H1WP4==0]<-"Child making own decisions"
myData$ParentControl[myData$H1WP6==0&myData$H1WP4==1]<-"Partial Parent Control"
myData$ParentControl[myData$H1WP6==1&myData$H1WP4==0]<-"Partial Parent Control"
freq(myData$ParentControl)
#Re-order so that variable is laid out from least control to most control
myData$ParentControl<-factor(myData$ParentControl, levels=c("Child making own decisions",
"Partial Parent Control",
"Parent Controlling Decisions"))
#Univariate graphing code
myData<-na.omit(myData)
ggplot(data=myData)+
geom_bar(aes(x=Health))
ggplot(data=myData)+
geom_bar(aes(x=ParentControl))
#Bivariate graphing code
ggplot(data=myData)+
stat_summary(aes(x=ParentControl, y=Health), fun="mean", geom="bar")+
ylab("Average Health Rating")
#Code to test the association between variables
mod<-aov(Health~ParentControl, data=myData)
summary(mod)
TukeyHSD(mod)
#Multivariate graph - health, parental control, sex
ggplot(data=myData)+
stat_summary(aes(x=ParentControl, y=Health, fill=factor(BIO_SEX)), fun="mean", geom="bar",
position="dodge", alpha=0.8)+
xlab("Parental Control")+
ylab("Average Health Rating")+
scale_fill_manual("Biological Sex", labels=c("Male","Female"), values=c("dodgerblue","orchid4"))
#Model to predict health
mod<-lm(Health~ParentControl+factor(BIO_SEX)+ParentControl*factor(BIO_SEX), data=myData)
summary(mod)
Example Stata do file:
//Load in data and select variables of interest
// H1G1H - general health
// H1WP4 - parents allow you to make decisions about how much TV you watch
// H1WP6 - parents allow you to make own decisions about when to go to bed
use AID BIO_SEX H1GH1 H1WP4 H1WP6 ///
using "P:\QAC\qac201\Studies and Codebooks\AddHealth\Data\addhealth_merged.dta", clear
// use /// when the command continues on the next line
****************************************
* Data Management and Frequency Tables
****************************************
// General Health, code out refusal or legitimate skip options
replace H1GH1=. if H1GH1==6|H1GH1==8
/*
or using the recode command
recode H1GH1 (6 8 =.)
*/
tab H1GH1
// Bio Sex, code out refusal
replace BIO_SEX=. if BIO_SEX==6
tab H1GH1
// Parents Allow you to make own decisions about TV, code out refusal and legitimate skip options
replace H1WP4=. if H1WP4==6|H1WP4==7|H1WP4==8|H1WP4==9
/* or again with the recode command
recode H1WP4 (6/9=.)
*/
// Parents Allow you to make own decisions about when you go to bed, code out refusal and legitimate skip options
replace H1WP6=. if H1WP6==6 | H1WP6==7 | H1WP6==8 | H1WP6==9
// Construct new variable Health 1=Lowest Health, 5=Highest Health
gen Health=.
replace Health=1 if H1GH1==5
replace Health=2 if H1GH1==4
replace Health=3 if H1GH1==3
replace Health=4 if H1GH1==2
replace Health=5 if H1GH1==1
// Construct new variable ParentControl that says whether parent is making all the decisions
gen ParentControl=.
replace ParentControl=0 if H1WP4==0&H1WP6==0
replace ParentControl=2 if H1WP4==1&H1WP6==1
replace ParentControl=1 if H1WP4==0&H1WP6==1
replace ParentControl=1 if H1WP4==1&H1WP6==0
// Univariate graphing code
graph bar, over(ParentControl)
graph bar, over(Health)
// Bivariate graphing code
graph bar Health, over(ParentControl)
// Code to test the association between variables
oneway Health ParentControl, tabulate sidak
// Multivariate graph - health, parental control, sex
graph bar Health, over(ParentControl) over(BIO_SEX)
// Regression model - health, parental control, sex
reg health i.ParentControl i.BIO_SEX // i.variable_name for categorical variables (factors)
Example SAS program:
/*Set up library*/
LIBNAME myFolder "P:\QAC\QAC201\Studies and Codebooks\AddHealth\Data";
/*Variables in study include:
H1G1H - general health
H1WP4 - parents allow you to make decisions about how much TV you watch
H1WP6 - parents allow you to make own decisions about when to go to bed */
/*Load in data and do data management on variables*/
data myData; set myFolder.addhealth_merged;
/*Code out refusals and don't knows*/
if H1GH1=6 or H1GH1=8 then H1GH1=.;
if H1GH1=6 or H1GH1=8 then H1GH1=.;
if BIO_SEX=6 then BIO_SEX=.;
if H1WP4=6 or H1WP4=7 or H1WP4=8 or H1WP4=9 then H1WP4=.;
if H1WP6=6 or H1WP6=7 or H1WP6=8 or H1WP6=9 then H1WP6=.;
/*Construct new variable Health 1=Lowest Health, 5=Highest Health*/
if H1GH1=5 then Health=1;
if H1GH1=4 then Health=2;
if H1GH1=3 then Health=3;
if H1GH1=2 then Health=4;
if H1GH1=1 then Health=5;
/*Construct new variable ParentControl that says whether parent is making all the decisions*/
if H1WP6=1 and H1WP4=1 then ParentControl=2;
if H1WP6=0 and H1WP4=1 then ParentControl=1;
if H1WP6=1 and H1WP4=0 then ParentControl=1;
if H1WP6=0 and H1WP4=0 then ParentControl=0;
/*Frequency Tables*/
proc freq;
tables Health BIO_SEX ParentControl;
run;
/*Univariate graphing code*/
proc sgplot;
vbar Health;
run;
proc sgplot;
vbar ParentControl;
run;
/*Bivariate graphing code*/
proc sgplot;
vbar ParentControl/ response=Health stat=mean;
run;
/*Code to test the association between variables*/
proc anova; class ParentControl;
model Health = ParentControl; means ParentControl /duncan;
/*Multivariate graph - health, parental control, sex*/
proc sgpanel;
panelby bio_sex/layout=columnlattice onepanel;
vbar ParentControl/ group=ParentControl response=Health stat=mean;
run;
/*Model to predict health*/
proc sort data=mydata; by ParentControl;
proc glm; class ParentControl;
model Health=ParentControl BIO_SEX /solution;
RUN;