# This code is for the statistical software package R.
# You can download this software for free at http://www.r-project.org/.
# Download all the files, change the WorkDir (work directory)
#  below to whatever fold you dumped the files into,
#  paste the code into R (Control-A, Control-C) and let'er rip!
WorkDir <- "C:/Laboratory/AAAMTELData/";
par(mfrow=c(1,1));

# Start with model with SAT scores, the six sessions
#---------------------------------------------------------------------
# Run a regression, SAT-W vs. SAT-V.
# Interpolate the missing SAT-W.
# Make initial guesses for the missing Score.
# Fit a regression to get beta-coefficients for the Score model.
#  Use separate models/regressions for reading and writing.
# Interpolate the missing Score.
#
# Fit two regressions to get beta-coefficients for models
#  for SAT-W given the rest of the data.
# Get test-by-test estimates for missing SAT-W.
# For each person with a missing SAT-W, take the average
#  of the test-by-test estimates to get a guess for that
#  person's theoretical SAT-W.
# Use the new SAT-W estimates to get new beta-coefficients
#  for the two models for the MTEL-R and MTEL-W.
# Use multiple linear regression to get test-by-test estimates
#  for missing SAT-W.
# Rinse and repeat.
#

#------------------ Phase 1 ------------------------------------------
#------------------ Reading and preparing the data set ---------------
S1F06Date <- 38993 #10/03/06
S2F06Date <- 39000 #10/10/06
S3F06Date <- 39007 #10/17/06
S4F06Date <- 39014 #10/24/06
S5F06Date <- 39021 #10/31/06
S6F06Date <- 39028 #11/07/06

S1S07Date <- 39105 #01/23/07
S2S07Date <- 39112 #01/30/07
S3S07Date <- 39119 #02/06/07
S4S07Date <- 39126 #02/13/07
S5S07Date <- 39133 #02/20/07
S6S07Date <- 39140 #02/27/07

S1F07Date <- 39364 #10/09/07
S2F07Date <- 39371 #10/16/07
S3F07Date <- 39378 #10/23/07
S4F07Date <- 39385 #10/30/07
S5F07Date <- 39392 #11/06/07
S6F07Date <- 39399 #11/13/07

S1S08Date <- 39475 #01/28/08
S2S08Date <- 39482 #02/05/08
S3S08Date <- 39489 #02/12/08
S4S08Date <- 39496 #02/19/08
S5S08Date <- 39503 #02/26/08
S6S08Date <- 39510 #03/04/08

ReadIn <- read.csv(paste(WorkDir,"MTELGenData.csv",sep=""),header=TRUE);
attach(ReadIn);

nTests <- length(TDNumber);
nTestsR <- sum(TestType=="R");
nTestsW <- nTests - nTestsR;
nStud <- StudNum[nTests];
nColZ <- 0; i1 <- nTests; while (i1 > 1) {
   if (StudNum[i1] == StudNum[i1-1]) {i1 = i1 - 1;}
   else if (SATW[i1] > 0) {i1 = i1 - 1;}
   else {
      nColZ = nColZ + 1;
      i1 = i1 - 1;
   }
}
i1 = 1; if (SATW[i1] == 0) {nColZ = nColZ + 1;}
ZMatrix <- matrix(0,nTests,nColZ);
i1 <- nTests; i2 <- nColZ + 1;
   if (SATW[i1]==0) {
      i2 = i2 - 1;
      ZMatrix[i1,i2] = 1;
   }
i1 <- nTests - 1; while (i1 > 0) {
   if (SATW[i1]==0) {
      if (StudNum[i1]<StudNum[i1+1]) {i2 = i2 - 1;}
      ZMatrix[i1,i2] = 1;
   }
   i1 = i1 - 1;
}
AveMat <- ((1 / (matrix(1,nTests,nTests) %*% ZMatrix)) * ZMatrix) %*% t(ZMatrix);
ZMatrixR <- ZMatrix[TestType=="R",];
AveMatR <- ((1 / (matrix(1,nTestsR,nTestsR) %*% ZMatrixR)) * ZMatrixR) %*% t(ZMatrixR);
ZMatrixW <- ZMatrix[TestType=="W",];
AveMatW <- ((1 / (matrix(1,nTestsW,nTestsW) %*% ZMatrixW)) * ZMatrixW) %*% t(ZMatrixW);
rm(i1,i2);

# SATV is a decent predictor of SATW
SATSet <- rep(1,nTests);
i1 = nTests; while (i1 > 1) {
   if (StudNum[i1]==StudNum[i1-1]) {SATSet[i1] = 0;}
   if (SATW[i1]==0) {SATSet[i1] = 0;}
   i1 = i1 - 1;
}
rm(i1);
Fit0 <- lm(SATW~SATV,subset=(SATSet==1));
Sum0 <- summary(Fit0);
Slope0 <- Sum0$coefficients[2,1];
Int0 <- Sum0$coefficients[1,1];

png(file=paste(WorkDir,"Figure1.png",sep=""));
SATVSATW <- plot(SATV,SATW,
                 xlab="SAT-Verbal Score",
                 ylab="SAT-Writing Score",
                 main="SAT-Writing vs. SAT-Verbal"
           );
abline(Int0,Slope0);
#savePlot(file=paste(WorkDir,"SATVSATW",sep=""),type="jpg");
#savePlot(file=paste(WorkDir,"Figure1",sep=""),type="jpg");
dev.off()

SATW2 <- Slope0*SATV + Int0;
i1 = nTests; while (i1 > 0) {
   if (SATW[i1] > 0) {SATW2[i1] = SATW[i1];}
   i1 = i1 - 1;
}
rm(i1);

S1R <- rep(0,nTests); S2R <- rep(0,nTests); S3R <- rep(0,nTests);
S4R <- rep(0,nTests); S5R <- rep(0,nTests); S6R <- rep(0,nTests);
S1W <- rep(0,nTests); S2W <- rep(0,nTests); S3W <- rep(0,nTests);
S4W <- rep(0,nTests); S5W <- rep(0,nTests); S6W <- rep(0,nTests);
SATVR <- rep(0,nTests); SATVW <- rep(0,nTests);
SATW2R <- rep(0,nTests); SATW2W <- rep(0,nTests);
AttemptR <- rep(0,nTests); AttemptW <- rep(0,nTests);
SATWM <- rep(0,nTests);
i1 <- nTests; while (i1>0) {
   if (TestType[i1]=="R") {
           if ((TDNumber[i1]>S1F06Date)&(S1F06[i1]>0)) {S1R[i1] = 1;}
      else if ((TDNumber[i1]>S1S07Date)&(S1S07[i1]>0)) {S1R[i1] = 1;}
      else if ((TDNumber[i1]>S1F07Date)&(S1F07[i1]>0)) {S1R[i1] = 1;}
      else if ((TDNumber[i1]>S1S08Date)&(S1S08[i1]>0)) {S1R[i1] = 1;}

           if ((TDNumber[i1]>S2F06Date)&(S2F06[i1]>0)) {S2R[i1] = 1;}
      else if ((TDNumber[i1]>S2S07Date)&(S2S07[i1]>0)) {S2R[i1] = 1;}
      else if ((TDNumber[i1]>S2F07Date)&(S2F07[i1]>0)) {S2R[i1] = 1;}
      else if ((TDNumber[i1]>S2S08Date)&(S2S08[i1]>0)) {S2R[i1] = 1;}

           if ((TDNumber[i1]>S3F06Date)&(S3F06[i1]>0)) {S3R[i1] = 1;}
      else if ((TDNumber[i1]>S3S07Date)&(S3S07[i1]>0)) {S3R[i1] = 1;}
      else if ((TDNumber[i1]>S3F07Date)&(S3F07[i1]>0)) {S3R[i1] = 1;}
      else if ((TDNumber[i1]>S3S08Date)&(S3S08[i1]>0)) {S3R[i1] = 1;}

           if ((TDNumber[i1]>S4F06Date)&(S4F06[i1]>0)) {S4R[i1] = 1;}
      else if ((TDNumber[i1]>S4S07Date)&(S4S07[i1]>0)) {S4R[i1] = 1;}
      else if ((TDNumber[i1]>S4F07Date)&(S4F07[i1]>0)) {S4R[i1] = 1;}
      else if ((TDNumber[i1]>S4S08Date)&(S4S08[i1]>0)) {S4R[i1] = 1;}

           if ((TDNumber[i1]>S5F06Date)&(S5F06[i1]>0)) {S5R[i1] = 1;}
      else if ((TDNumber[i1]>S5S07Date)&(S5S07[i1]>0)) {S5R[i1] = 1;}
      else if ((TDNumber[i1]>S5F07Date)&(S5F07[i1]>0)) {S5R[i1] = 1;}
      else if ((TDNumber[i1]>S5S08Date)&(S5S08[i1]>0)) {S5R[i1] = 1;}

           if ((TDNumber[i1]>S6F06Date)&(S6F06[i1]>0)) {S6R[i1] = 1;}
      else if ((TDNumber[i1]>S6S07Date)&(S6S07[i1]>0)) {S6R[i1] = 1;}
      else if ((TDNumber[i1]>S6F07Date)&(S6F07[i1]>0)) {S6R[i1] = 1;}
      else if ((TDNumber[i1]>S6S08Date)&(S6S08[i1]>0)) {S6R[i1] = 1;}

      SATVR[i1] = SATV[i1];
      SATW2R[i1] = SATW2[i1];
      AttemptR[i1] = Attempt[i1];
   }
   else if (TestType[i1]=="W") {
           if ((TDNumber[i1]>S1F06Date)&(S1F06[i1]>0)) {S1W[i1] = 1;}
      else if ((TDNumber[i1]>S1S07Date)&(S1S07[i1]>0)) {S1W[i1] = 1;}
      else if ((TDNumber[i1]>S1F07Date)&(S1F07[i1]>0)) {S1W[i1] = 1;}
      else if ((TDNumber[i1]>S1S08Date)&(S1S08[i1]>0)) {S1W[i1] = 1;}

           if ((TDNumber[i1]>S2F06Date)&(S2F06[i1]>0)) {S2W[i1] = 1;}
      else if ((TDNumber[i1]>S2S07Date)&(S2S07[i1]>0)) {S2W[i1] = 1;}
      else if ((TDNumber[i1]>S2F07Date)&(S2F07[i1]>0)) {S2W[i1] = 1;}
      else if ((TDNumber[i1]>S2S08Date)&(S2S08[i1]>0)) {S2W[i1] = 1;}

           if ((TDNumber[i1]>S3F06Date)&(S3F06[i1]>0)) {S3W[i1] = 1;}
      else if ((TDNumber[i1]>S3S07Date)&(S3S07[i1]>0)) {S3W[i1] = 1;}
      else if ((TDNumber[i1]>S3F07Date)&(S3F07[i1]>0)) {S3W[i1] = 1;}
      else if ((TDNumber[i1]>S3S08Date)&(S3S08[i1]>0)) {S3R[i1] = 1;}

           if ((TDNumber[i1]>S4F06Date)&(S4F06[i1]>0)) {S4W[i1] = 1;}
      else if ((TDNumber[i1]>S4S07Date)&(S4S07[i1]>0)) {S4W[i1] = 1;}
      else if ((TDNumber[i1]>S4F07Date)&(S4F07[i1]>0)) {S4W[i1] = 1;}
      else if ((TDNumber[i1]>S4S08Date)&(S4S08[i1]>0)) {S4W[i1] = 1;}

           if ((TDNumber[i1]>S5F06Date)&(S5F06[i1]>0)) {S5W[i1] = 1;}
      else if ((TDNumber[i1]>S5S07Date)&(S5S07[i1]>0)) {S5W[i1] = 1;}
      else if ((TDNumber[i1]>S5F07Date)&(S5F07[i1]>0)) {S5W[i1] = 1;}
      else if ((TDNumber[i1]>S5S08Date)&(S5S08[i1]>0)) {S5W[i1] = 1;}

           if ((TDNumber[i1]>S6F06Date)&(S6F06[i1]>0)) {S6W[i1] = 1;}
      else if ((TDNumber[i1]>S6S07Date)&(S6S07[i1]>0)) {S6W[i1] = 1;}
      else if ((TDNumber[i1]>S6F07Date)&(S6F07[i1]>0)) {S6W[i1] = 1;}
      else if ((TDNumber[i1]>S6S08Date)&(S6S08[i1]>0)) {S6W[i1] = 1;}

      SATVW[i1] = SATV[i1];
      SATW2W[i1] = SATW2[i1];
      AttemptW[i1] = Attempt[i1];
   }
   if (SATW[i1]==0) {SATWM[i1] = 1;}
   if (PassFail[i1]=="P") {Score[i1] = 270;}
   i1 = i1 - 1;
}
rm(i1);

DataSet <- data.frame(cbind(DataLine,StudNum,SATSet,
                            SATW,SATWM,
                            SATW2,SATW2R,SATW2W,
                            SATV,SATVR,SATVW,
                            S1R,S1W,
                            S2R,S2W,
                            S3R,S3W,
                            S4R,S4W,
                            S5R,S5W,
                            S6R,S6W,
                            AttemptR,AttemptW,
                            TestType=1*(TestType=="R"),
                            PassFail=1*(PassFail=="P"),
                            Score
           ));

#------------------ Phase 2 ------------------------------------------
#------------------ Using the interative process ---------------------
Fit1R <- list(NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, #  10
              NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, #  20
              NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, #  30
              NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, #  40
              NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, #  50
              NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, #  60
              NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, #  70
              NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, #  80
              NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, #  90
              NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL  # 100
         );
Sum1R <- Fit1R; Pred1R <- Fit1R;
Fit2R <- Fit1R; Sum2R <- Fit1R; Pred2R <- Fit1R;
Fit1W <- Fit1R; Sum1W <- Fit1R; Pred1W <- Fit1R;
Fit2W <- Fit1R; Sum2W <- Fit1R; Pred2W <- Fit1R;
DataSetR <- subset(DataSet,TestType==1);
DataSetW <- subset(DataSet,TestType==0);

j1 <- 1; while (j1 <= 100) {
   Fit1Ra <- lm(Score~
#                     SATW2R
                     +SATVR
#                     +S1R
                     +S2R
                     +S3R
                     +S4R
                     +S5R
                     +S6R
                     +AttemptR
#                     +SATW2W
#                     +SATVW
#                     +S1W
#                     +S2W
#                     +S3W
#                     +S4W
#                     +S5W
#                     +S6W
#                     +AttemptW
#                     +TestType
                     ,
               data=DataSetR);
   Sum1Ra <- summary(Fit1Ra);
   Pred1Ra <- predict.lm(Fit1Ra);
   i1 <- nTestsR; while (i1 > 0) {
      if (DataSetR$PassFail[i1]==1) {
         DataSetR$Score[i1] = max(Pred1Ra[i1],240);
      }
      i1 = i1 - 1;
   }

   Fit1Wa <- lm(Score~
#                     SATW2R
#                     +SATVR
#                     +S1R
#                     +S2R
#                     +S3R
#                     +S4R
#                     +S5R
#                     +S6R
#                     +AttemptR
                     +SATW2W
                     +SATVW
#                     +S1W
#                     +S2W
#                     +S3W
#                     +S4W
#                     +S5W
                     +S6W
#                     +AttemptW
#                     +TestType
                     ,
               data=DataSetW);
   Sum1Wa <- summary(Fit1Wa);
   Pred1Wa <- predict.lm(Fit1Wa);
   i1 <- nTestsW; while (i1 > 0) {
      if (DataSetW$PassFail[i1]==1) {
         DataSetW$Score[i1] = max(Pred1Wa[i1],240);
      }
      i1 = i1 - 1;
   }

   Fit2Ra <- lm(SATW2~
#                     Score
                     +SATVR
#                     +S1R
                     +S2R
#                     +S3R
#                     +S4R
#                     +S5R
                     +S6R
#                     +AttemptR
#                     +SATVW
#                     +S1W
#                     +S2W
#                     +S3W
#                     +S4W
#                     +S5W
#                     +S6W
#                     +AttemptW
#                     +TestType
                     ,
               data=DataSetR,
               subset=(SATWM==0));
   Sum2Ra <- summary(Fit2Ra);
   Pred2Ra <- predict.lm(Fit2Ra,newdata=DataSetR);

   Fit2Wa <- lm(SATW2~
#                     Score
#                     +SATVR
#                     +S1R
#                     +S2R
#                     +S3R
#                     +S4R
#                     +S5R
#                     +S6R
#                     +AttemptR
                     +SATVW
#                     +S1W
#                     +S2W
                     +S3W
#                     +S4W
#                     +S5W
                     +S6W
#                     +AttemptW
#                     +TestType
                     ,
               data=DataSetW,
               subset=(SATWM==0));
   Sum2Wa <- summary(Fit2Wa);
   Pred2Wa <- predict.lm(Fit2Wa,newdata=DataSetW);

   Pred2 <- SATW;
   Pred2[DataSetR$DataLine] <- Pred2Ra;
   Pred2[DataSetW$DataLine] <- Pred2Wa;
   Pred2M <- AveMat %*% Pred2;
   Pred2MR <- Pred2M[DataSetR$DataLine];
   Pred2MW <- Pred2M[DataSetW$DataLine];
   i1 <- nTestsR; while(i1 > 0) {
      if (DataSetR$SATW[i1] == 0) {DataSetR$SATW2[i1] <- Pred2MR[i1];}
      if (DataSetR$SATW2R[i1] > 0) {DataSetR$SATW2R[i1] <- DataSetR$SATW2[i1];}
      i1 = i1 - 1;
   }
   i1 <- nTestsW; while(i1 > 0) {
      if (DataSetW$SATW[i1] == 0) {DataSetW$SATW2[i1] <- Pred2MW[i1];}
      if (DataSetW$SATW2W[i1] > 0) {DataSetW$SATW2W[i1] <- DataSetW$SATW2[i1];}
      i1 = i1 - 1;
   }

   Fit1R[[j1]] = Fit1Ra;
   Sum1R[[j1]] = Sum1Ra;
   Pred1R[[j1]] = DataSetR$Score;
   Fit2R[[j1]] = Fit2Ra;
   Sum2R[[j1]] = Sum2Ra;
   Pred2R[[j1]] = DataSetR$SATW2;
   Fit1W[[j1]] = Fit1Wa;
   Sum1W[[j1]] = Sum1Wa;
   Pred1W[[j1]] = DataSetW$Score;
   Fit2W[[j1]] = Fit2Wa;
   Sum2W[[j1]] = Sum2Wa;
   Pred2W[[j1]] = DataSetW$SATW2;
   j1 = j1 + 1;
}
Fit1RFinal <- Fit1R[[100]];
Sum1RFinal <- summary(Fit1RFinal);
Pred1RFinal <- predict(Fit1RFinal);
SE1RFinal <- Sum1RFinal$sigma;
df1RFinal <- Sum1RFinal$df[2];
Fit1WFinal <- Fit1W[[100]];
Sum1WFinal <- summary(Fit1WFinal);
Pred1WFinal <- predict(Fit1WFinal);
SE1WFinal <- Sum1WFinal$sigma;
df1WFinal <- Sum1WFinal$df[2];

#------------------ Phase 3 ------------------------------------------
#------------------ Logistic Regression with Iteration ---------------
Fit3R <- list(NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, #  10
              NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, #  20
              NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, #  30
              NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, #  40
              NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, #  50
              NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, #  60
              NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, #  70
              NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, #  80
              NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, #  90
              NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL  # 100
         );
Sum3R <- Fit3R; Pred3R <- Fit3R;
Fit4R <- Fit3R; Sum4R <- Fit3R; Pred4R <- Fit3R;
Fit3W <- Fit3R; Sum3W <- Fit3R; Pred3W <- Fit3R;
Fit4W <- Fit3R; Sum4W <- Fit3R; Pred4W <- Fit3R;
DataSetR <- subset(DataSet,TestType==1);
DataSetW <- subset(DataSet,TestType==0);

j1 <- 1; while (j1 <= 100) {
   Fit3Ra <- glm(PassFail~
                         SATW2R
#                         +SATVR
#                         +S1R
#                         +S2R
#                         +S3R
                         +S4R
#                         +S5R
                         +S6R
                         +AttemptR
#                         +SATW2W
#                         +SATVW
#                         +S1W
#                         +S2W
#                         +S3W
#                         +S4W
#                         +S5W
#                         +S6W
#                         +AttemptW
#                         +TestType
                         ,
                data=DataSetR,
                family="binomial");
   Sum3Ra <- summary(Fit3Ra);
   Pred3Ra <- predict.glm(Fit3Ra);
   DataSetR$Score <- Pred3Ra;

   Fit3Wa <- glm(PassFail~
#                         SATW2R
#                         +SATVR
#                         +S1R
#                         +S2R
#                         +S3R
#                         +S4R
#                         +S5R
#                         +S6R
#                         +AttemptR
                         +SATW2W
                         +SATVW
                         +S1W
#                         +S2W
#                         +S3W
#                         +S4W
#                         +S5W
#                         +S6W
#                         +AttemptW
#                         +TestType
                         ,
                data=DataSetW,
                family="binomial");
   Sum3Wa <- summary(Fit3Wa);
   Pred3Wa <- predict.glm(Fit3Wa);
   DataSetW$Score <- Pred3Wa;

   Fit4Ra <- lm(SATW2~
                     Score
#                     +SATVR
#                     +S1R
#                     +S2R
#                     +S3R
                     +S4R
                     +S5R
                     +S6R
                     +AttemptR
#                     +SATVW
#                     +S1W
#                     +S2W
#                     +S3W
#                     +S4W
#                     +S5W
#                     +S6W
#                     +AttemptW
#                     +TestType
                     ,
               data=DataSetR,
               subset=(SATWM==0));
   Sum4Ra <- summary(Fit4Ra);
   Pred4Ra <- predict.lm(Fit4Ra,newdata=DataSetR);

   Fit4Wa <- lm(SATW2~
                     Score
#                     +SATVR
#                     +S1R
#                     +S2R
#                     +S3R
#                     +S4R
#                     +S5R
#                     +S6R
#                     +AttemptR
                     +SATVW
                     +S1W
#                     +S2W
#                     +S3W
                     +S4W
#                     +S5W
                     +S6W
#                     +AttemptW
#                     +TestType
                     ,
               data=DataSetW,
               subset=(SATWM==0));
   Sum4Wa <- summary(Fit4Wa);
   Pred4Wa <- predict.lm(Fit4Wa,newdata=DataSetW);

   Pred4 <- SATW;
   Pred4[DataSetR$DataLine] <- Pred4Ra;
   Pred4[DataSetW$DataLine] <- Pred4Wa;
   Pred4M <- AveMat %*% Pred4;
   Pred4MR <- Pred4M[DataSetR$DataLine];
   Pred4MW <- Pred4M[DataSetW$DataLine];
   i1 <- nTestsR; while(i1 > 0) {
      if (DataSetR$SATW[i1] == 0) {DataSetR$SATW2[i1] <- Pred4MR[i1];}
      if (DataSetR$SATW2R[i1] > 0) {DataSetR$SATW2R[i1] <- DataSetR$SATW2[i1];}
      i1 = i1 - 1;
   }
   i1 <- nTestsW; while(i1 > 0) {
      if (DataSetW$SATW[i1] == 0) {DataSetW$SATW2[i1] <- Pred4MW[i1];}
      if (DataSetW$SATW2W[i1] > 0) {DataSetW$SATW2W[i1] <- DataSetW$SATW2[i1];}
      i1 = i1 - 1;
   }

   Fit3R[[j1]] = Fit3Ra;
   Sum3R[[j1]] = Sum3Ra;
   Pred3R[[j1]] = DataSetR$Score;
   Fit4R[[j1]] = Fit4Ra;
   Sum4R[[j1]] = Sum4Ra;
   Pred4R[[j1]] = DataSetR$SATW2;
   Fit3W[[j1]] = Fit3Wa;
   Sum3W[[j1]] = Sum3Wa;
   Pred3W[[j1]] = DataSetW$Score;
   Fit4W[[j1]] = Fit4Wa;
   Sum4W[[j1]] = Sum4Wa;
   Pred4W[[j1]] = DataSetW$SATW2;
   j1 = j1 + 1;
}
Fit3RFinal <- Fit3R[[100]];
Sum3RFinal <- summary(Fit3RFinal);
Pred3RFinal <- predict(Fit3RFinal);
SE3RFinal <- Sum3RFinal$sigma;
df3RFinal <- Sum3RFinal$df[2];
Fit3WFinal <- Fit3W[[100]];
Sum3WFinal <- summary(Fit3WFinal);
Pred3WFinal <- predict(Fit3WFinal);
SE3WFinal <- Sum3WFinal$sigma;
df3WFinal <- Sum3WFinal$df[2];

Sum3R[[100]]
Sum3W[[100]]
Sum4R[[100]]
Sum4W[[100]]

#------------------ Phase 4 ------------------------------------------
#------------------ One-pass Logistic Regression ----------------------------
DataSetR <- subset(DataSet,TestType==1);
DataSetW <- subset(DataSet,TestType==0);

   Fit5R <- lm(SATW2~
#                     PassFail
                     +SATVR
#                     +S1R
                     +S2R
#                     +S3R
#                     +S4R
#                     +S5R
                     +S6R
#                     +AttemptR
#                     +SATVW
#                     +S1W
#                     +S2W
#                     +S3W
#                     +S4W
#                     +S5W
#                     +S6W
#                     +AttemptW
#                     +TestType
                     ,
               data=DataSetR,
               subset=(SATWM==0));
   Sum5R <- summary(Fit5R);
   Pred5R <- predict.lm(Fit5R,newdata=DataSetR);

   Fit5W <- lm(SATW2~
                     PassFail
#                     +SATVR
#                     +S1R
#                     +S2R
#                     +S3R
#                     +S4R
#                     +S5R
#                     +S6R
#                     +AttemptR
                     +SATVW
#                     +S1W
#                     +S2W
                     +S3W
#                     +S4W
#                     +S5W
                     +S6W
#                     +AttemptW
#                     +TestType
                     ,
               data=DataSetW,
               subset=(SATWM==0));
   Sum5W <- summary(Fit5W);
   Pred5W <- predict.lm(Fit5W,newdata=DataSetW);

   Pred5 <- SATW;
   Pred5[DataSetR$DataLine] <- Pred5R;
   Pred5[DataSetW$DataLine] <- Pred5W;
   Pred5M <- AveMat %*% Pred5;
   Pred5MR <- Pred5M[DataSetR$DataLine];
   Pred5MW <- Pred5M[DataSetW$DataLine];
   i1 <- nTestsR; while(i1 > 0) {
      if (DataSetR$SATW[i1] == 0) {DataSetR$SATW2[i1] <- Pred5MR[i1];}
      if (DataSetR$SATW2R[i1] > 0) {DataSetR$SATW2R[i1] <- DataSetR$SATW2[i1];}
      i1 = i1 - 1;
   }
   i1 <- nTestsW; while(i1 > 0) {
      if (DataSetW$SATW[i1] == 0) {DataSetW$SATW2[i1] <- Pred5MW[i1];}
      if (DataSetW$SATW2W[i1] > 0) {DataSetW$SATW2W[i1] <- DataSetW$SATW2[i1];}
      i1 = i1 - 1;
   }

   Fit6R <- glm(PassFail~
                         SATW2R
#                         +SATVR
#                         +S1R
#                         +S2R
#                         +S3R
#                         +S4R
#                         +S5R
                         +S6R
#                         +AttemptR
#                         +SATW2W
#                         +SATVW
#                         +S1W
#                         +S2W
#                         +S3W
#                         +S4W
#                         +S5W
#                         +S6W
#                         +AttemptW
#                         +TestType
                         ,
                data=DataSetR,
                family="binomial");
   Sum6R <- summary(Fit6R);
   Pred6R <- predict.glm(Fit6R);

   Fit6W <- glm(PassFail~
#                         SATW2R
#                         +SATVR
#                         +S1R
#                         +S2R
#                         +S3R
#                         +S4R
#                         +S5R
#                         +S6R
#                         +AttemptR
                         +SATW2W
#                         +SATVW
#                         +S1W
#                         +S2W
#                         +S3W
#                         +S4W
#                         +S5W
#                         +S6W
#                         +AttemptW
#                         +TestType
                         ,
                data=DataSetW,
                family="binomial");
   Sum6W <- summary(Fit6W);
   Pred6W <- predict.glm(Fit6W);

Sum5R
Sum5W
Sum6R
Sum6W

#------------------ Phase 5 ------------------------------------------
#------------------ Right-sided confidence intervals -----------------
PredSAT <- rep(100,0);
PredSAT[1] <- 400;
PredSAT[100] <- 700;
PredSAT[1:100] <- 1:100*(PredSAT[100]-PredSAT[1])/100 + PredSAT[1];

DataSetP <- data.frame(cbind(SATW2R=PredSAT,
                             SATVR=PredSAT,
                             S1R=rep(0,100),
                             S2R=rep(0,100),
                             S3R=rep(0,100),
                             S4R=rep(0,100),
                             S5R=rep(0,100),
                             S6R=rep(0,100),
                             AttemptR=rep(1,100),
                             SATW2W=PredSAT,
                             SATVW=PredSAT,
                             S1W=rep(0,100),
                             S2W=rep(0,100),
                             S3W=rep(0,100),
                             S4W=rep(0,100),
                             S5W=rep(0,100),
                             S6W=rep(0,100),
                             AttemptW=rep(1,100)
            ));

PRange1R <- predict(Fit1RFinal,
                    newdata=DataSetP,
                    level=0.90,
                    interval="prediction"         
            );
PRange1W <- predict(Fit1WFinal,
                    newdata=DataSetP,
                    level=0.90,
                    interval="prediction"         
            );
tStarR <- qt(0.95,df1RFinal);
tStarW <- qt(0.95,df1WFinal);
PSE1R <- (PRange1R[1:100,1]-PRange1R[1:100,2])/tStarR;
PSE1W <- (PRange1W[1:100,1]-PRange1W[1:100,2])/tStarW;
Prob1R <- (240-PRange1R[1:100,1])/PSE1R;
   Prob1R <- 1 - pt(Prob1R,df1RFinal);
Prob1W <- (240-PRange1W[1:100,1])/PSE1W;
   Prob1W <- 1 - pt(Prob1W,df1WFinal);

png(file=paste(WorkDir,"Figure2.png",sep=""),
    width=960, height=480);
par(mfrow=c(1,2));
plot(x=PredSAT,
     y=Prob1R,
     xlab="SAT verbal score",
     ylab="Model's probabilities of passing the reading subtest",
     main="Reading subtest probability curve"
);
#savePlot(file=paste(WorkDir,"ProbCurveR",sep=""),type="jpg");
#savePlot(file=paste(WorkDir,"Figure2",sep=""),type="jpg");
plot(x=PredSAT,
     y=Prob1W,
     xlab="SAT verbal score with equal SAT writing score",
     ylab="Model's probabilities of passing the writing subtest",
     main="Writing subtest probability curve"
);
#savePlot(file=paste(WorkDir,"ProbCurveW",sep=""),type="jpg");
#savePlot(file=paste(WorkDir,"Figure3",sep=""),type="jpg");
par(mfrow=c(1,1));
dev.off();

Prob3R <- predict.glm(Fit3RFinal,newdata=DataSetP);
     Prob3R <- 1/(1+exp(-Prob3R));
Prob3W <- predict.glm(Fit3WFinal,newdata=DataSetP);
     Prob3W <- 1/(1+exp(-Prob3W));
Prob6R <- predict.glm(Fit6R,newdata=DataSetP);
     Prob6R <- 1/(1+exp(-Prob6R));
Prob6W <- predict.glm(Fit6W,newdata=DataSetP);
     Prob6W <- 1/(1+exp(-Prob6W));
png(file=paste(WorkDir,"Figure5.png",sep=""),
    width=960, height=480);
par(mfrow=c(1,2));
plot(x=PredSAT,
     y=Prob1R,ylim=c(0,1),
     xlab="SAT verbal score with equal SAT writing score",
     ylab="Models' probabilities of passing the reading subtest",
     main="Reading subtest probability curve"
);
points(x=PredSAT,y=Prob3R,pch=24)
points(x=PredSAT,y=Prob6R,pch=25)
legend(x=400,y=1,
       legend=c("Original iterative",
                "Logistic iterative",
                "Logistic one-pass"),
       pch=c(21,24,25)
      );
plot(x=PredSAT,
     y=Prob1R,ylim=c(0,1),
     xlab="SAT verbal score with equal SAT writing score",
     ylab="Models' probabilities of passing the writing subtest",
     main="Reading subtest probability curve"
);
points(x=PredSAT,y=Prob3R,pch=24)
points(x=PredSAT,y=Prob6R,pch=25)
legend(x=400,y=1,
       legend=c("Original iterative",
                "Logistic iterative",
                "Logistic one-pass"),
       pch=c(21,24,25)
      );
par(mfrow=c(1,1));
dev.off();

#------------------ Phase 6 ------------------------------------------
#------------------ Sample residual plots ----------------------------
ResPlotX1R <- Pred1RFinal;
ResPlotX1W <- Pred1WFinal;
Correlations <- matrix(0,100,2);
j1 <- 100; while (j1 > 0) {
   ResPlotY1R <- DataSetR$Score;
   i1 <- nTestsR; while (i1 > 0) {
      if (ResPlotY1R[i1] >= 240) {
         ResPlotY1R[i1] = rnorm(1,ResPlotX1R[i1],SE1RFinal);
         while (ResPlotY1R[i1] < 240) {
            ResPlotY1R[i1] = rnorm(1,ResPlotX1R[i1],SE1RFinal);
         }
      }
      i1 = i1 - 1;
   }
   ResPlotY1R <- ResPlotY1R - ResPlotX1R;
   Correlations[j1,1] <- cor(ResPlotY1R,ResPlotX1R);

   ResPlotY1W <- DataSetW$Score;
   i1 <- nTestsW; while (i1 > 0) {
      if (ResPlotY1W[i1] >= 240) {
         ResPlotY1W[i1] = rnorm(1,ResPlotX1W[i1],SE1WFinal);
         while (ResPlotY1W[i1] < 240) {
            ResPlotY1W[i1] = rnorm(1,ResPlotX1W[i1],SE1WFinal);
         }
      }
      i1 = i1 - 1;
   }
   ResPlotY1W <- ResPlotY1W - ResPlotX1W;
   Correlations[j1,2] <- cor(ResPlotY1W,ResPlotX1W);

   j1 = j1 - 1;
}

png(file=paste(WorkDir,"Figure3.png",sep=""),
    width=960, height=480);
par(mfrow=c(1,2));
plot(x=ResPlotX1R,
     y=ResPlotY1R,
     xlab="Predicted MTEL reading subtest scores",
     ylab="Residuals for simulated reading subtest scores",
     main="Residual plot for the reading subtest model"
);
#savePlot(file=paste(WorkDir,"CorrelationsR",sep=""),type="jpg");
#savePlot(file=paste(WorkDir,"Figure4",sep=""),type="jpg");
plot(x=ResPlotX1W,
     y=ResPlotY1W,
     xlab="Predicted MTEL writing subtest scores",
     ylab="Residuals for simulated writing subtest scores",
     main="Residual plot for the writing subtest model"
);
#savePlot(file=paste(WorkDir,"CorrelationsW",sep=""),type="jpg");
#savePlot(file=paste(WorkDir,"Figure5",sep=""),type="jpg");
par(mfrow=c(1,1));
dev.off()

png(file=paste(WorkDir,"Figure4.png",sep=""),
    width=960, height=480);
par(mfrow=c(1,2));
hist(Correlations[,1],
     main="Histogram for reading subtests' correlations",
     xlab="Correlations for the reading subtest residuals");
#savePlot(file=paste(WorkDir,"HistCorrR",sep=""),type="jpg");
#savePlot(file=paste(WorkDir,"Figure6",sep=""),type="jpg");
hist(Correlations[,2],
     main="Histogram for writing subtests' correlations",
     xlab="Correlations for the writing subtest residuals");
#savePlot(file=paste(WorkDir,"HistCorrW",sep=""),type="jpg");
#savePlot(file=paste(WorkDir,"Figure7",sep=""),type="jpg");
par(mfrow=c(1,1));
dev.off()

#------------------ Phase 7 ------------------------------------------
#------------------ Other graphs and histograms -----------------

par(mfrow=c(2,2));
hist(SATV[SATSet==1],
     main="Histogram of SAT Verbal Scores",
     xlab="SAT Verbal Score",
     ylab="Number of Students");
hist(SATW[SATSet==1],
     main="Histogram of SAT Writing Scores",
     xlab="SAT Writing Score",
     ylab="Number of Students");
hist(SATW[SATSet==1]+SATV[SATSet==1],
     main="Histogram of Combined SAT Scores",
     xlab="Combined SAT Score",
     ylab="Number of Students");
hist(Attempt,
     main="Histogram of Attempts",
     xlab="Number of Attempts",
     ylab="Number of Subtests",
     breaks=0:6);

TestSet <- subset(DataSet,AttemptR==1);
cor(cbind(TestSet$S1R,
          TestSet$S2R,
          TestSet$S3R,
          TestSet$S4R,
          TestSet$S5R,
          TestSet$S6R))

# Reading, SATW available, Pass
sum((DataSet$TestType==1)*(DataSet$SATW>0)*(DataSet$PassFail==1));
# Reading, SATW available, Fail
sum((DataSet$TestType==1)*(DataSet$SATW>0)*(DataSet$PassFail==0));
# Reading, SATW not available, Pass
sum((DataSet$TestType==1)*(DataSet$SATW==0)*(DataSet$PassFail==1));
# Reading, SATW not available, Fail
sum((DataSet$TestType==1)*(DataSet$SATW==0)*(DataSet$PassFail==0));
# Writing, SATW available, Pass
sum((DataSet$TestType==0)*(DataSet$SATW>0)*(DataSet$PassFail==1));
# Writing, SATW available, Fail
sum((DataSet$TestType==0)*(DataSet$SATW>0)*(DataSet$PassFail==0));
# Writing, SATW not available, Pass
sum((DataSet$TestType==0)*(DataSet$SATW==0)*(DataSet$PassFail==1));
# Writing, SATW not available, Fail
sum((DataSet$TestType==0)*(DataSet$SATW==0)*(DataSet$PassFail==0));

DataSetR$PassFail;
Guess1R <- (Pred1RFinal > 240);
Guess3R <- ((1/(1+exp(-Pred3RFinal))) > 0.5);
XTableR <- xtabs(~Guess1R+Guess3R+DataSetR$PassFail);

DataSetW$PassFail;
Guess1W <- (Pred1WFinal > 240);
Guess3W <- ((1/(1+exp(-Pred3WFinal))) > 0.5);
XTableW <- xtabs(~Guess1W+Guess3W+DataSetW$PassFail);

# Here are the coefficients and tables for Models 1R and 1W.
# 
#              Estimate Std. Error t value Pr(>|t|)    
# (Intercept) 188.42253   11.76509  16.015  < 2e-16 ***
# SATVR         0.06908    0.02029   3.405  0.00110 ** 
# S2R           7.41384    4.04414   1.833  0.07102 .  
# S3R         -10.43697    3.34023  -3.125  0.00259 ** 
# S4R         -12.32946    3.86958  -3.186  0.00215 ** 
# S5R          -7.10014    3.16406  -2.244  0.02800 *  
# S6R          20.02423    3.39374   5.900 1.17e-07 ***
# AttemptR      8.52730    1.70401   5.004 4.01e-06 ***
# ---
#              Estimate Std. Error t value Pr(>|t|)    
# (Intercept) 173.94343    9.00993  19.306   <2e-16 ***
# SATW2W        0.05445    0.02338   2.329   0.0226 *  
# SATVW         0.05884    0.02603   2.261   0.0267 *  
# S6W           4.09514    2.11881   1.933   0.0571 .  
# ---
# 
# Here are the coefficients and tables for Models 3R and 3W.
# 
#              Estimate Std. Error z value Pr(>|z|)  
# (Intercept) -5.153766   2.551453  -2.020   0.0434 *
# SATW2R       0.009224   0.004359   2.116   0.0343 *
# S4R         -1.873888   0.889126  -2.108   0.0351 *
# S6R          2.087282   0.853973   2.444   0.0145 *
# AttemptR     0.780352   0.466727   1.672   0.0945 .
# ---
#               Estimate Std. Error z value Pr(>|z|)    
# (Intercept) -14.473240   3.655077  -3.960  7.5e-05 ***
# SATW2W        0.014859   0.007507   1.979   0.0478 *  
# SATVW         0.013016   0.007303   1.782   0.0747 .  
# S1W           1.042058   0.621473   1.677   0.0936 .  
# ---
# 
# Here are the coefficients for Models 6R and 6W.
# 
#              Estimate Std. Error z value Pr(>|z|)  
# (Intercept) -3.906234   2.160624  -1.808   0.0706 .
# SATW2R       0.008008   0.003954   2.025   0.0429 *
# S6R          0.960070   0.541339   1.774   0.0761 .
# ---
#               Estimate Std. Error z value Pr(>|z|)    
# (Intercept) -10.161367   2.742013  -3.706 0.000211 ***
# SATW2W        0.020315   0.005329   3.812 0.000138 ***

#http://blog.revolutionanalytics.com/2009/01/10-tips-for-making-your-r-graphics-look-their-best.html
#234567890123456789012345678901234567890123456789012345678901234567890

