lab:datasets:nlsy:r_codes_for_initial_data_process_for_piat_math
As of 2010-03-10
## process the piat math data piat.orig<-read.table('piatmath.dat', na.string='.') names(piat.orig)<-c('id','sex','age1997','age1998','age1999','age2000','age2001','age2002','piat1997','piat1998','piat1999','piat2000','piat2001','piat2002') ## select data based on missingness M<-is.na(piat.orig) M.sum<-cbind(apply(M[,1:8],1,sum), apply(M[,9:14],1,sum)) table(M.sum[,2]) ## choose data with complete data on sex and age piat.comp.age<-piat.orig[M.sum[,1]==0,] dim(piat.comp.age) ## choose piat with at least two measurements piat<-piat.orig[M.sum[,2]<5 & M.sum[,1]==0, ] dim(piat) n<-dim(piat)[1] plot(c(piat[1, 3:8]), c(piat[1, 9:14]), ylim=c(0,100), xlim=c(12,19), type='l') plot(c(piat[1, 3:8]), c(piat[1, 9:14]), ylim=c(0,100), xlim=c(12,19)) for (i in 2:n){ points(c(piat[i, 3:8]), c(piat[i, 9:14])) lines(c(piat[i, 3:8]), c(piat[i, 9:14])) } ## age at 18 ## this is the data for analysis piat.18<-piat[piat[,8]==18,] dim(piat.18) n<-dim(piat.18)[1] plot(13:18, c(piat.18[1, 9:14]), ylim=c(0,100), type='l') plot(13:18, c(piat.18[1, 9:14]), ylim=c(0,100)) for (i in 2:100){ points(13:18, c(piat.18[i, 9:14])) lines(13:18, c(piat.18[i, 9:14])) } ## write data for Laura for mixture modeling write.table(piat[9:13], 'piatmath-trymixture.txt', row.names=F, col.names=F) ## some basic analysis apply(piat.18[, 9:14], 2, boxplot, na.rm=T) apply(piat.18[, 9:14], 2, hist, na.rm=T) boxplot(piat.18[, 9:14], names=c('1997', '1998', '1999', '2000', '2001', '2002')) boxplot(log10(piat.18[, 9:14]), names=c('1997', '1998', '1999', '2000', '2001', '2002')) apply(sqrt(piat.18[, 9:14]), 2, hist, na.rm=T)
lab/datasets/nlsy/r_codes_for_initial_data_process_for_piat_math.txt · Last modified: 2016/01/24 09:48 by 127.0.0.1