User Tools

Site Tools


lab:datasets:nlsy:r_codes_for_initial_data_process_for_piat_math

As of 2010-03-10

## process the piat math data

piat.orig<-read.table('piatmath.dat', na.string='.')
names(piat.orig)<-c('id','sex','age1997','age1998','age1999','age2000','age2001','age2002','piat1997','piat1998','piat1999','piat2000','piat2001','piat2002')

## select data based on missingness
M<-is.na(piat.orig)

M.sum<-cbind(apply(M[,1:8],1,sum), apply(M[,9:14],1,sum))
table(M.sum[,2])

## choose data with complete data on sex and age
piat.comp.age<-piat.orig[M.sum[,1]==0,]
dim(piat.comp.age)

## choose piat with at least two measurements
piat<-piat.orig[M.sum[,2]<5 & M.sum[,1]==0, ]
dim(piat)

n<-dim(piat)[1]

plot(c(piat[1, 3:8]), c(piat[1, 9:14]), ylim=c(0,100), xlim=c(12,19), type='l')
plot(c(piat[1, 3:8]), c(piat[1, 9:14]), ylim=c(0,100), xlim=c(12,19))

for (i in 2:n){
  points(c(piat[i, 3:8]), c(piat[i, 9:14]))
  lines(c(piat[i, 3:8]), c(piat[i, 9:14]))
}


## age at 18
## this is the data for analysis
piat.18<-piat[piat[,8]==18,]
dim(piat.18)

n<-dim(piat.18)[1]

plot(13:18, c(piat.18[1, 9:14]), ylim=c(0,100), type='l')
plot(13:18, c(piat.18[1, 9:14]), ylim=c(0,100))

for (i in 2:100){
  points(13:18, c(piat.18[i, 9:14]))
  lines(13:18, c(piat.18[i, 9:14]))
}

## write data for Laura for mixture modeling
write.table(piat[9:13], 'piatmath-trymixture.txt', row.names=F, col.names=F)


## some basic analysis
apply(piat.18[, 9:14], 2, boxplot, na.rm=T)
apply(piat.18[, 9:14], 2, hist, na.rm=T)

boxplot(piat.18[, 9:14], names=c('1997', '1998', '1999', '2000', '2001', '2002'))
boxplot(log10(piat.18[, 9:14]), names=c('1997', '1998', '1999', '2000', '2001', '2002'))

apply(sqrt(piat.18[, 9:14]), 2, hist, na.rm=T)
lab/datasets/nlsy/r_codes_for_initial_data_process_for_piat_math.txt · Last modified: 2016/01/24 09:48 by 127.0.0.1