lab:datasets:nlsy:r_codes_for_initial_data_process_for_piat_math
As of 2010-03-10
## process the piat math data
piat.orig<-read.table('piatmath.dat', na.string='.')
names(piat.orig)<-c('id','sex','age1997','age1998','age1999','age2000','age2001','age2002','piat1997','piat1998','piat1999','piat2000','piat2001','piat2002')
## select data based on missingness
M<-is.na(piat.orig)
M.sum<-cbind(apply(M[,1:8],1,sum), apply(M[,9:14],1,sum))
table(M.sum[,2])
## choose data with complete data on sex and age
piat.comp.age<-piat.orig[M.sum[,1]==0,]
dim(piat.comp.age)
## choose piat with at least two measurements
piat<-piat.orig[M.sum[,2]<5 & M.sum[,1]==0, ]
dim(piat)
n<-dim(piat)[1]
plot(c(piat[1, 3:8]), c(piat[1, 9:14]), ylim=c(0,100), xlim=c(12,19), type='l')
plot(c(piat[1, 3:8]), c(piat[1, 9:14]), ylim=c(0,100), xlim=c(12,19))
for (i in 2:n){
points(c(piat[i, 3:8]), c(piat[i, 9:14]))
lines(c(piat[i, 3:8]), c(piat[i, 9:14]))
}
## age at 18
## this is the data for analysis
piat.18<-piat[piat[,8]==18,]
dim(piat.18)
n<-dim(piat.18)[1]
plot(13:18, c(piat.18[1, 9:14]), ylim=c(0,100), type='l')
plot(13:18, c(piat.18[1, 9:14]), ylim=c(0,100))
for (i in 2:100){
points(13:18, c(piat.18[i, 9:14]))
lines(13:18, c(piat.18[i, 9:14]))
}
## write data for Laura for mixture modeling
write.table(piat[9:13], 'piatmath-trymixture.txt', row.names=F, col.names=F)
## some basic analysis
apply(piat.18[, 9:14], 2, boxplot, na.rm=T)
apply(piat.18[, 9:14], 2, hist, na.rm=T)
boxplot(piat.18[, 9:14], names=c('1997', '1998', '1999', '2000', '2001', '2002'))
boxplot(log10(piat.18[, 9:14]), names=c('1997', '1998', '1999', '2000', '2001', '2002'))
apply(sqrt(piat.18[, 9:14]), 2, hist, na.rm=T)
lab/datasets/nlsy/r_codes_for_initial_data_process_for_piat_math.txt · Last modified: 2016/01/24 09:48 by 127.0.0.1
