lab:zhang:process_the_data_for_analysis_2010-01-25
## add missing data for missing days ## read data dset<-read.csv('dynamic-mediation-data-2010-01-25.csv', head=T) uniqueid<-unique(dset$X.id) freqid<-table(dset$X.id) k<-length(uniqueid) id<-rep(1:length(uniqueid), freqid) dset<-cbind(id, dset) m<-length(uniqueid)*50 ## sort the data using id and survey dset<-dset[order(dset$survey), ] dset<-dset[order(dset$id), ] dim(dset) aug<-NULL len<-1:50 for (i in 1:k){ temp<-dset$survey[dset$id==i] temp1<-len[-temp] temp2<-cbind(rep(i,length(temp1)),temp1) aug<-rbind(aug, temp2) } l<-m-dim(dset)[1] aug.mat<-array(NA, dim=c(l, dim(dset)[2])) aug.mat[,1]<-aug[,1] aug.mat[,3]<-aug[,2] colnames(aug.mat)<-colnames(dset) dset<-rbind(dset, aug.mat) ## sort the data using id and survey dset<-dset[order(dset$survey), ] dset<-dset[order(dset$id), ] freq.id<-rep(freqid, each=50) dset<-cbind(dset, freq.id) write.table(dset, file='DMA-data-2010-01-25.txt',row.names=F, quote=F) ## take out the data with more than 40 occassions of data dset40<-dset[freq.id>39,] write.table(dset40, file='DMA-data-40-2010-01-25.txt',row.names=F, quote=F) dset45<-dset[freq.id>44,] write.table(dset45, file='DMA-data-45-2010-01-25.txt',row.names=F, quote=F)
lab/zhang/process_the_data_for_analysis_2010-01-25.txt · Last modified: 2016/01/24 09:48 by 127.0.0.1