## data process ## process the piat math data piat.orig<-read.table('piatmath-age-grade.dat.txt', na.string='.') names(piat.orig)<-c('id','sex', 'race', 'grade1997', 'grade1998', 'grade1999', 'grade2000', 'grade2001', 'grade2002', 'age1997', 'age1998', 'age1999', 'age2000', 'age2001', 'age2002', 'piat1997', 'piat1998', 'piat1999', 'piat2000', 'piat2001', 'piat2002') ## select data based on missingness M<-is.na(piat.orig) M.sum<-cbind(M[,1:3], apply(M[,4:9],1,sum), apply(M[,10:15],1,sum), apply(M[,16:21],1,sum)) table(M.sum[,6]) ## choose data with complete data on sex and age piat.comp.age<-piat.orig[M.sum[,1]==0,] dim(piat.comp.age) ## choose piat with at least two measurements piat.2<-piat.orig[M.sum[,6]<5 & M.sum[,1]==0 & M.sum[,2]==0 & M.sum[,3]==0 & M.sum[,4]==0 & M.sum[,5]==0, ] dim(piat.2) boxplot(piat2000~sex, data=piat.2) boxplot(piat2000~race, data=piat.2) boxplot(piat2000~age1997, data=piat.2) boxplot(piat2000~grade1997, data=piat.2) table(piat.2$sex) table(piat.2$race) table(piat.2$age1997) table(piat.2$grade1997) ## choose piat with at least three measurements piat.3<-piat.orig[M.sum[,6]<4 & M.sum[,1]==0 & M.sum[,2]==0 & M.sum[,3]==0 & M.sum[,4]==0 & M.sum[,5]==0, ] dim(piat.3) ## at least two data points at grade 6 piat.t2.g6<-piat.2[piat.2$grade1997==6, ] dim(piat.t2.g6) ## at least two data points at grade 6, age12 piat.t2.g6.a12<-piat.2[piat.2$grade1997==6 & piat.2$age1997==12, ] dim(piat.t2.g6.a12) ## at least two data points at grade 6, age12, non-black piat.t2.g6.a12.nb<-piat.2[piat.2$grade1997==6 & piat.2$age1997==12 & piat.2$race==4, ] dim(piat.t2.g6.a12.nb) par(mfrow=c(2,3)) apply(piat.t2.g6.a12.nb[, 16:21], 2, hist) ## at least two data points at grade 6, non-black piat.t2.g6.nb<-piat.2[piat.2$grade1997==6 & piat.2$age1997==12 & piat.2$race==4, ] dim(piat.t2.g6.nb) par(mfrow=c(2,3)) apply(piat.t2.g6.nb[, 16:21], 2, hist, breaks=20) ## at least two data points, age12, non-black piat.t2.a12.nb<-piat.2[piat.2$grade1997==6 & piat.2$age1997==12 & piat.2$race==4, ] dim(piat.t2.a12.nb) par(mfrow=c(2,3)) apply(piat.t2.a12.nb[, 16:21], 2, hist, breaks=20) n<-dim(piat)[1] plot(c(piat[1, 3:8]), c(piat[1, 9:14]), ylim=c(0,100), xlim=c(12,19), type='l') plot(c(piat[1, 3:8]), c(piat[1, 9:14]), ylim=c(0,100), xlim=c(12,19)) for (i in 2:n){ points(c(piat[i, 3:8]), c(piat[i, 9:14])) lines(c(piat[i, 3:8]), c(piat[i, 9:14])) } ## age at 18 ## this is the data for analysis piat.18<-piat[piat[,8]==18,] dim(piat.18) n<-dim(piat.18)[1] plot(13:18, c(piat.18[1, 9:14]), ylim=c(0,100), type='l') plot(13:18, c(piat.18[1, 9:14]), ylim=c(0,100)) for (i in 2:100){ points(13:18, c(piat.18[i, 9:14])) lines(13:18, c(piat.18[i, 9:14])) } ## write data for Laura for mixture modeling write.table(piat[9:13], 'piatmath-trymixture.txt', row.names=F, col.names=F) ## some basic analysis apply(piat.18[, 9:14], 2, boxplot, na.rm=T) apply(piat.18[, 9:14], 2, hist, na.rm=T) boxplot(piat.18[, 9:14], names=c('1997', '1998', '1999', '2000', '2001', '2002')) boxplot(log10(piat.18[, 9:14]), names=c('1997', '1998', '1999', '2000', '2001', '2002')) apply(sqrt(piat.18[, 9:14]), 2, hist, na.rm=T)