allData = read.csv("~/Papers/22_Ochem_Models_across_the_Curriculum/Analysis/Modelusage_cleandataset_July2022_v2.csv",header = TRUE)
allData[which(allData$Exam..2 == "EX"),]$Exam..2 = NA
allData$Exam..2 = as.numeric(allData$Exam..2)
allData[which(allData$Exam..3 == "EX"),]$Exam..3 = NA
allData$Exam..3 = as.numeric(allData$Exam..3)
#for some reason, theres a lot of different types of answers for the main question: "Did you use the model". 
# making a decision that if it's not yes, its no.
allData[which(allData$Did.you.use.the.model.kit. == "N/A"),]$Did.you.use.the.model.kit. = "N"
allData[which(allData$Did.you.use.the.model.kit. == ""),]$Did.you.use.the.model.kit. = "N"
allData[which(allData$Did.you.use.the.model.kit. == "No answer"),]$Did.you.use.the.model.kit. = "N"
allData[which(allData$Did.you.use.the.model.kit. == "Both"),]$Did.you.use.the.model.kit. = "Y"

allData$Did.you.use.the.model.kit. = gsub("\\s+", "", allData$Did.you.use.the.model.kit.)

newman = allData[which(allData$Question == "Newman Projection (1.1)"),]
diaste = allData[which(allData$Question == "Diastereomers model (1.6)"),]
dimeso = allData[which(allData$Question == "Diastereomer Meso (1.3)"),]
enanto = allData[which(allData$Question == "Enantiomers Ring (1.2)"),]
didash = allData[which(allData$Question == "Diastereomers Wedge and Dash (1.5)"),]
#assuming the order of students is the same in allData
#all(newman$ID == diaste$ID )
#all(newman$ID == dimeso$ID )
#all(newman$ID == enanto$ID )
#all(newman$ID == didash$ID )
students = newman[,c(1:11)]

makeColumn4ModelUse = function(df){
  column = c()
  for (i in 1:nrow(df)){
    if ( df$Did.you.use.the.model.kit.[i] == "Y" ){ 
      ans="Using"
    } else if ( df$I.preferred.to.use.other.methods..R.and.S[i] == "Y" || 
                df$I.preferred.to.use.other.methods..Visualizing.in.my.head[i] == "Y"){ 
      ans="NotNeed"
    }else{
      ans="NotUsing"
    }
    column = append(column,ans)
  }
  return(column)
}
makeColumn4ModelUse2 = function(df){
  column = c()
  for (i in 1:nrow(df)){
    if ( df$Did.you.use.the.model.kit.[i] == "Y" ){ 
      ans="Using"
    }else{
      ans="NotUsing"
    }
    column = append(column,ans)
  }
  return(column)
}
makeColumn4ModelUseCorrect = function(df){
  column = c()
  for (i in 1:nrow(df)){
    if ( df$Did.you.use.the.model.kit.[i] == "Y" ){ 
      if (df$Question.correct[i] == "correct"){
        ans = "Using Correct"
      }else{
        ans = "Using Incorrect"
      }
    }else{
      if (df$Question.correct[i] == "correct"){
        ans = "Notusing Correct"
      }else{
        ans = "Notusing Incorrect"
      }
    }
    column = append(column,ans)
  }
  return(column)
}
getExamBlock = function(df,cat){
  high = max(df[[cat]])
  low = min(df[[cat]])
  range = high-low 
  grade = c()
  for (i in 1:nrow(df)){
    x=df[[cat]][i]
    if       (x > low+range*0.8){
      grade = append(grade,100)
    }else if (x > low+range*0.6){
      grade = append(grade,80)
    }else if (x > low+range*0.4){
      grade = append(grade,40)
    }else if (x > low+range*0.2){
      grade = append(grade,20)
    }else {
      grade = append(grade,0)
    }
  }
  return(grade)
}


students$newman = makeColumn4ModelUse(newman)
students$diaste = makeColumn4ModelUse(diaste)
students$enanto = makeColumn4ModelUse(enanto)
students$didash = makeColumn4ModelUse(didash)
students$dimeso = makeColumn4ModelUse(dimeso)

students$newman2= makeColumn4ModelUse2(newman)
students$diaste2= makeColumn4ModelUse2(diaste)
students$enanto2= makeColumn4ModelUse2(enanto)
students$didash2= makeColumn4ModelUse2(didash)
students$dimeso2= makeColumn4ModelUse2(dimeso)

students$newmanComb= makeColumn4ModelUseCorrect(newman)
students$diasteComb= makeColumn4ModelUseCorrect(diaste)
students$enantoComb= makeColumn4ModelUseCorrect(enanto)
students$didashComb= makeColumn4ModelUseCorrect(didash)
students$dimesoComb= makeColumn4ModelUseCorrect(dimeso)

students$newmanQ= newman$Question.correct
students$diasteQ= diaste$Question.correct
students$enantoQ= enanto$Question.correct
students$didashQ= didash$Question.correct
students$dimesoQ= dimeso$Question.correct
#students$Exam1block = getExamBlock(newman,"Exam..1")
write.csv(students,file = "out.csv")
addAnswerColumn = function(df){
  
  df$answers = NA
  for (i in 1:nrow(df)){
   if ( df$Question.correct[i] == "correct" & df$Did.you.use.the.model.kit.[i] == "Y" ) { df$answers[i] = "Yes&Yes"} 
   if ( df$Question.correct[i] != "correct" & df$Did.you.use.the.model.kit.[i] == "Y" ) { df$answers[i] = "No&Yes"} 
   if ( df$Question.correct[i] == "correct" & df$Did.you.use.the.model.kit.[i] != "Y" ) { df$answers[i] = "Yes&No"} 
   if ( df$Question.correct[i] != "correct" & df$Did.you.use.the.model.kit.[i] != "Y" ) { df$answers[i] = "No&No"} 
  }
  df = df[order(df$answers),]
  return(df) 
}

library(ggplot2)
library(ggpubr)
library(psych)

plotGGbox = function(df,myx,myy,mytitle,myylab){
  df = df[complete.cases(df[[myy]]),]
  maxy = max(df[[myy]])
  ggboxplot(df, x = myx, y = myy,  
            title = mytitle,
            color = myx, add = "jitter", legend="none",ylab = myylab) + rotate_x_text(angle = 45) +  
    geom_hline( yintercept = mean(df[[myy]]), linetype = 2) + 
    stat_compare_means(method = "anova", label.y = maxy*1.10) +
    coord_cartesian(ylim = c(0, maxy*1.2)) + 
    stat_compare_means(label = "p.format", size=2.5, method = "t.test", ref.group = ".all.",label.y = maxy*1.05)
}
getAnova = function(df,myx,myy,mytitle,myylab){
  #get anova
  a<- TukeyHSD( aov(df[[myy]] ~ df[[myx]])) 
  b<-as.data.frame(a$`df[[myx]]`[,4])
  colnames(b) = c("Testing statistical significance: p-values")
  print(knitr::kable(b, caption = paste("Anova: ",mytitle)))
}
plotAndTable = function(df,myx,myy,mytitle,myylab){
  print(plotGGbox(df,myx,myy,mytitle,myylab))
  table = describeBy(df[[myy]],df[[myx]],mat=TRUE,digits = 2)
  print(knitr::kable(table[,c(2,4,5,6,7,10,11,12)],caption=paste("Statistics of ",myylab," based on getting the question correct (Yes/No) & using the models (Yes/No)")))
  getAnova(df,myx,myy,mytitle,myylab)
}

library(dplyr)
library(corrplot)
plotChi = function(a){
  #I need to use droplevels otherwise it was showing Expert with zeros as a ghost category?
  b=chisq.test(table(droplevels(a)))
  cat(paste("<p><b>The Chi-square analysis gives a p=",round(b$p.value,5),"</b></p>"))
  cat(paste("<p><b>Residuals analysis:</b></p>"))
  cat("A negative residual implies that the measured value is lower than expected and a positive value higher than expected</br>")
  corrplot(b$residuals, is.cor = FALSE)
}
plotBarAndCorr = function(df,myx,myy,myxlabel,myylabel,mytitle){
  #myx is the course or demographic variable, the independent variable
  #myy is typically the clusterLetter, the dependent variable
  #remove experts, not useful for the chisquare analysis
  #select the two categorical variables
  a = df[,c(myy,myx)]
  print(plotBarCategories(a,myx,myy,myxlabel,myylabel,mytitle))
  plotChi(a)
}
plotBarCategories = function(a,myx,myy,myxlabel,myylabel,mytitle){
  #using aes_string instead of aes because colnames are variables
  #ggplot(a, aes_string(x=myx,fill=myy)) + geom_bar()
  
  a %>% 
    count(!!sym(myy),!!sym(myx))  %>% 
    group_by(!!sym(myx)) %>% 
    mutate(lab = paste0(round(prop.table(n) * 100, 2), '%')) %>%
    ggplot(aes(!!sym(myx),n, fill=!!sym(myy))) + 
    geom_col() + geom_text(aes(label=lab),position='stack',vjust=1.5) +
    labs(x=myxlabel,y=myylabel,title=mytitle)
}

1 Models

Newman Diastereomer Enantiomer Ring Wedge Dash Diastereomer Meso

2 Distribution 2 groups: Using Models / Not Using Models

From the table below we can see:

  • The exercise where students used the models the most is the Wedge/Dash exercise.
  • The exercise where students used the models the least is the enantiomer ring.
  • In all exercises, the percent of students who answer correctly is higher if they use the models. With the tiny exception of the diaestereomer Meso exercise with very similar fraction of students getting it right with or without models.
  • Specifically, the largest difference was in the Diastereomer exercise where 76% of students who used the model answered the question correctly, compared to students who did not use the model where only 63% answered it correctly.
calcDistribution = function(df){
  n = nrow(df)
  using = sum(df$Did.you.use.the.model.kit. == "Y" )
  usingCorrect = sum(df$Did.you.use.the.model.kit. == "Y" & df$Question.correct == "correct")
  usingIncorrect = using - usingCorrect
  notusing = n - using
  notusingCorrect = sum(df$Did.you.use.the.model.kit. != "Y" & df$Question.correct == "correct")
  notusingIncorrect = notusing - notusingCorrect
  return(
    list(
      c(using,usingCorrect,usingIncorrect,notusing,notusingCorrect,notusingIncorrect),
      c( "%" ,signif(usingCorrect/using*100,digits = 2), signif(usingIncorrect/using*100,digits = 2),"%",signif(notusingCorrect/notusing*100,digits = 2),signif(notusingIncorrect/notusing*100,digits = 2))
    )
  )
}

distribution = data.frame(matrix(ncol=0,nrow=6))

distribution$Newman = unlist( calcDistribution(newman))[1:6]
distribution$NewmanPercent = unlist( calcDistribution(newman))[7:12]
a=chisq.test(table(newman$Did.you.use.the.model.kit.,newman$Question.correct))
distribution$NewmanChiSquare = c("Chi-squared","p = ",round(a$p.value,4)," "," "," ")

distribution$Diastereomer = unlist( calcDistribution(diaste))[1:6]
distribution$DiastereomerPercent = unlist( calcDistribution(diaste))[7:12]
a=chisq.test(table(diaste$Did.you.use.the.model.kit.,diaste$Question.correct))
distribution$DiasteChiSquare = c("Chi-squared","p = ",round(a$p.value,4)," "," "," ")

distribution$EnantiomerRing = unlist( calcDistribution(enanto))[1:6]
distribution$EnantiomerRingPercert = unlist( calcDistribution(enanto))[7:12]
a=chisq.test(table(enanto$Did.you.use.the.model.kit.,enanto$Question.correct))
distribution$EnantoChiSquare = c("Chi-squared","p = ",round(a$p.value,4)," "," "," ")

distribution$WedgeDash = unlist( calcDistribution(didash))[1:6]
distribution$WedgeDashPercent = unlist( calcDistribution(didash))[7:12]
a=chisq.test(table(didash$Did.you.use.the.model.kit.,didash$Question.correct))
distribution$DidashChiSquare = c("Chi-squared","p = ",round(a$p.value,4)," "," "," ")

distribution$DiastereomerMeso = unlist( calcDistribution(dimeso))[1:6]
distribution$DiastereomerMesoPercent = unlist( calcDistribution(dimeso))[7:12]
a=chisq.test(table(dimeso$Did.you.use.the.model.kit.,dimeso$Question.correct))
distribution$DimesoChiSquare = c("Chi-squared","p = ",round(a$p.value,4)," "," "," ")

rownames(distribution) = c("Using Models N-Total","Using Models N-Correct","Using Models N-Incorrect",
                           "Not Using N-Total","Not Using N-Correct","Not Using N-Incorrect")


knitr::kable(t(distribution),digits = 1)
Using Models N-Total Using Models N-Correct Using Models N-Incorrect Not Using N-Total Not Using N-Correct Not Using N-Incorrect
Newman 80 59 21 70 46 24
NewmanPercent % 74 26 % 66 34
NewmanChiSquare Chi-squared p = 0.3719
Diastereomer 80 61 19 70 44 26
DiastereomerPercent % 76 24 % 63 37
DiasteChiSquare Chi-squared p = 0.108
EnantiomerRing 36 24 12 114 72 42
EnantiomerRingPercert % 67 33 % 63 37
EnantoChiSquare Chi-squared p = 0.8546
WedgeDash 94 64 30 56 37 19
WedgeDashPercent % 68 32 % 66 34
DidashChiSquare Chi-squared p = 0.9407
DiastereomerMeso 49 35 14 101 75 26
DiastereomerMesoPercent % 71 29 % 74 26
DimesoChiSquare Chi-squared p = 0.8645

We will show again the same information as in the table above but with graphs. The idea is that even if the Chi-square test gives us no significance, in some cases there are trends that must be recognized

2.1 Newman

plotBarAndCorr(newman,"Did.you.use.the.model.kit.","Question.correct","Using Models","N of students","Newman projection question")

The Chi-square analysis gives a p= 0.37193

Residuals analysis:

A negative residual implies that the measured value is lower than expected and a positive value higher than expected

2.2 Diaste

plotBarAndCorr(diaste,"Did.you.use.the.model.kit.","Question.correct","Using Models","N of students","Diastereomer question")

The Chi-square analysis gives a p= 0.10802

Residuals analysis:

A negative residual implies that the measured value is lower than expected and a positive value higher than expected

2.3 Enantiomer ring

plotBarAndCorr(enanto,"Did.you.use.the.model.kit.","Question.correct","Using Models","N of students","Enantiomer ring question")

The Chi-square analysis gives a p= 0.85463

Residuals analysis:

A negative residual implies that the measured value is lower than expected and a positive value higher than expected

2.4 Wedge dash

plotBarAndCorr(didash,"Did.you.use.the.model.kit.","Question.correct","Using Models","N of students","Wedge Dash question")

The Chi-square analysis gives a p= 0.9407

Residuals analysis:

A negative residual implies that the measured value is lower than expected and a positive value higher than expected

2.5 Diastereomer Meso

plotBarAndCorr(dimeso,"Did.you.use.the.model.kit.","Question.correct","Using Models","N of students","Diastereomer Meso question")

The Chi-square analysis gives a p= 0.86454

Residuals analysis:

A negative residual implies that the measured value is lower than expected and a positive value higher than expected

3 Distribution 3 groups: Using models / Dont need models / Not using models

We define the group “Dont need models” those students who not using models they replied that it is because either they visualize it in their head or they’re using the R/S approach.

With the performance of these three groups shown in the table below, we can extract the following observations:

  • Even though from the previous table we saw that students using the models had a higher percent of correct answers, we see now that the students who do not use it because they don’t need it are actually the highest performers.
  • This means that wrapping all students in a not-using-models group was inaccurate.
  • The exercise where this observation is most relevant is the wedge and dash exercise, where 85% of students who claim they don’t need the models they still got the answer right, while 69% of students using the model got it right.
  • The only exception is the diastereomer exercise where the difference is minimal 76% of students who used the model got it correct while 75% of students who didn’t need it got it correct.
  • In all exercises, students who did not use the model for a reason besides not needing it performed lower than the other two groups.
  • Students who claim to not need it makes sense to be performing better as they may have already transitioned to type-2 thinking
calcDistribution3 = function(df){
  n = nrow(df)
  using = sum(df$Did.you.use.the.model.kit. == "Y" )
  usingCorrect = sum(df$Did.you.use.the.model.kit. == "Y" & df$Question.correct == "correct")
  usingIncorrect = using - usingCorrect
  
  notNeeded = sum( 
    df$I.preferred.to.use.other.methods..R.and.S == "Y" | df$I.preferred.to.use.other.methods..Visualizing.in.my.head == "Y")
  notNeededCorrect = 
  sum(  (df$I.preferred.to.use.other.methods..R.and.S == "Y" | df$I.preferred.to.use.other.methods..Visualizing.in.my.head == "Y") & 
         df$Question.correct == "correct"
  )
  notNeededIncorrect = notNeeded - notNeededCorrect
  
  notusing = n - using - notNeeded
  notusingCorrect = 
  sum(
    df$Did.you.use.the.model.kit. != "Y" & 
    ( df$I.preferred.to.use.other.methods..R.and.S != "Y" & df$I.preferred.to.use.other.methods..Visualizing.in.my.head != "Y") &
    df$Question.correct == "correct"
  )
  notusingIncorrect = notusing - notusingCorrect
  
  mymat = matrix(c(usingCorrect,usingIncorrect,notNeededCorrect,notNeededIncorrect,notusingCorrect,notusingIncorrect),nrow = 3,ncol = 2,byrow = TRUE)
  a=chisq.test(mymat)
  # I cannot return a vector because it is a combination of characters and numbers
  return(
    list(
      c(using,usingCorrect,usingIncorrect,notNeeded,notNeededCorrect,notNeededIncorrect, notusing,notusingCorrect,notusingIncorrect),
      c( "%" ,signif(usingCorrect/using*100,digits = 2), signif(usingIncorrect/using*100,digits = 2),
      "%",signif(notNeededCorrect/notNeeded*100,digits = 2),signif(notNeededIncorrect/notNeeded*100,digits = 2),
      "%",signif(notusingCorrect/notusing*100,digits = 2),signif(notusingIncorrect/notusing*100,digits = 2)),
      c(a$p.value)
    )
  )
}

distribution3 = data.frame(matrix(ncol=0,nrow=9))

a=unlist( calcDistribution3(newman))
distribution3$Newman = a[1:9]
distribution3$NewmanPercent = a[10:18]
distribution3$NewmanChiSquare = c("Chi-squared","p = ",round(as.numeric(a[19]),4)," "," "," "," "," "," ")

a=unlist( calcDistribution3(diaste))
distribution3$Diastereomer = a[1:9]
distribution3$DiastereomerPercent = a[10:18]
distribution3$DiasteChiSquare = c("Chi-squared","p = ",round(as.numeric(a[19]),4)," "," "," "," "," "," ")

a=unlist( calcDistribution3(enanto))
distribution3$EnantiomerRing = a[1:9]
distribution3$EnantiomerRingPercert = a[10:18]
distribution3$EnantiomerChiSquare = c("Chi-squared","p = ",round(as.numeric(a[19]),4)," "," "," "," "," "," ")

a=unlist( calcDistribution3(didash))
distribution3$WedgeDash = a[1:9]
distribution3$WedgeDashPercent = a[10:18]
distribution3$WedgeDashChiSquare = c("Chi-squared","p = ",round(as.numeric(a[19]),4)," "," "," "," "," "," ")

a=unlist( calcDistribution3(dimeso))
distribution3$DiastereomerMeso = a[1:9]
distribution3$DiastereomerMesoPercent = a[10:18]
distribution3$DiastereomerChiSquare = c("Chi-squared","p = ",round(as.numeric(a[19]),4)," "," "," "," "," "," ")

rownames(distribution3) = c("Using Models N-Total","Using Models N-Correct","Using Models N-Incorrect",
                           "Not Needed N-Total","Not Needed N-Correct","Not Needed N-Incorrect",
                           "Not Using N-Total","Not Using N-Correct","Not Using N-Incorrect")

knitr::kable(t(distribution3),digits = 1)
Using Models N-Total Using Models N-Correct Using Models N-Incorrect Not Needed N-Total Not Needed N-Correct Not Needed N-Incorrect Not Using N-Total Not Using N-Correct Not Using N-Incorrect
Newman 80 59 21 26 21 5 44 27 17
NewmanPercent % 74 26 % 81 19 % 61 39
NewmanChiSquare Chi-squared p = 0.1738
Diastereomer 80 61 19 28 21 7 42 24 18
DiastereomerPercent % 76 24 % 75 25 % 57 43
DiasteChiSquare Chi-squared p = 0.0757
EnantiomerRing 36 24 12 54 40 14 60 34 26
EnantiomerRingPercert % 67 33 % 74 26 % 57 43
EnantiomerChiSquare Chi-squared p = 0.1466
WedgeDash 94 64 30 13 11 2 43 27 16
WedgeDashPercent % 68 32 % 85 15 % 63 37
WedgeDashChiSquare Chi-squared p = 0.3352
DiastereomerMeso 49 35 14 50 40 10 51 36 15
DiastereomerMesoPercent % 71 29 % 80 20 % 71 29
DiastereomerChiSquare Chi-squared p = 0.4935

3.1 Newman

plotBarAndCorr(students,"newman","newmanQ","Using Models","N of students","Newman projection question")

The Chi-square analysis gives a p= 0.11676

Residuals analysis:

A negative residual implies that the measured value is lower than expected and a positive value higher than expected

3.2 Diaste

plotBarAndCorr(students,"diaste","diasteQ","Using Models","N of students","Diastereomer question")

The Chi-square analysis gives a p= 0.05442

Residuals analysis:

A negative residual implies that the measured value is lower than expected and a positive value higher than expected

3.3 Enantiomer ring

plotBarAndCorr(students,"enanto","enantoQ","Using Models","N of students","Enantiomer ring question")

The Chi-square analysis gives a p= 0.07037

Residuals analysis:

A negative residual implies that the measured value is lower than expected and a positive value higher than expected

3.4 Wedge dash

plotBarAndCorr(students,"didash","didashQ","Using Models","N of students","Wedge Dash question")

The Chi-square analysis gives a p= 0.34409

Residuals analysis:

A negative residual implies that the measured value is lower than expected and a positive value higher than expected

3.5 Diastereomer Meso

plotBarAndCorr(students,"dimeso","dimesoQ","Using Models","N of students","Diastereomer Meso question")

The Chi-square analysis gives a p= 0.16913

Residuals analysis:

A negative residual implies that the measured value is lower than expected and a positive value higher than expected

4 Crossing among groups

4.1 Three categories: Using Models/ Not Needing Models / Not Using Models - Color System Exam 1

import pandas as pd
import plotly.express as px
students = pd.read_csv("~/Papers/22_Ochem_Models_across_the_Curriculum/Analysis/out.csv")
modelUse = students[["didash","diaste","newman","dimeso","enanto","Exam..1"]]
sortedModelUse = modelUse.sort_values(by=['didash','diaste','newman','dimeso','enanto'], axis=0, ascending=False)
fig = px.parallel_categories(sortedModelUse,
                              color="Exam..1",
                              #color_continuous_scale='Bluered_r',
                              labels={ 
                                "didash":"Wedge/Dash",
                                "diaste":"Diastereomers",
                                "newman":"Newman",
                                "dimeso":"Diastereomer Meso",
                                "enanto":"Enantiomer Ring",
                              })
fig.show()

4.2 Two categories: Using Models / Not Using Models - Color system Exam 1

modelUse = students[["didash2","diaste2","newman2","dimeso2","enanto2","Exam..1"]]
sortedModelUse = modelUse.sort_values(by=['didash2','diaste2','newman2','dimeso2','enanto2'], axis=0, ascending=False)
fig = px.parallel_categories(sortedModelUse,
                              color="Exam..1",
                              #color_continuous_scale='Bluered_r',
                              labels={ 
                                "didash2":"Wedge/Dash",
                                "diaste2":"Diastereomers",
                                "newman2":"Newman",
                                "dimeso2":"Diastereomer Meso",
                                "enanto2":"Enantiomer Ring",
                              })
fig.show()

4.3 Four categoties: Using Models/ Not Using Models & Correct/Incorrect - Color system Exam 1

modelUse = students[["didashComb","diasteComb","newmanComb","dimesoComb","enantoComb","Exam..1"]]
sortedModelUse = modelUse.sort_values(by=['didashComb','diasteComb','newmanComb','dimesoComb','enantoComb'], axis=0, ascending=False)
fig = px.parallel_categories(sortedModelUse,
                              color="Exam..1",
                              #color_continuous_scale='Bluered_r',
                              labels={ 
                                "didashComb":"Wedge/Dash",
                                "diasteComb":"Diastereomers",
                                "newmanComb":"Newman",
                                "dimesoComb":"Diastereomer Meso",
                                "enantoComb":"Enantiomer Ring",
                              })
fig.show()
#maybe this can be done easier with parallel categories https://plotly.com/python/parallel-categories-diagram/
mysource = c()
for (i in 0:11){
  for (j in 0:2){
    mysource = append(source,i)
  }
}
mytarget = c(3,4,5,3,4,5,3,4,5,
           6,7,8,6,7,8,6,7,8,
           9,10,11,9,10,11,9,10,11,
           12,13,14,12,13,14,12,13,14)
myvalue = rep(1,each=45)
#order from most used to least used
exercises = c("didash","newman","diaste","dimeso","enanto")

myvalue = c()
for (i in 2:length(exercises)){
  print(exercises[i])
  
}
  
library(plotly)
fig <- plot_ly(
    type = "sankey",
    orientation = "h",

    node = list(
      label = c("Using", "Not Using", "Not Needed", 
                "Using", "Not Using", "Not Needed", 
                "Using", "Not Using", "Not Needed", 
                "Using", "Not Using", "Not Needed", 
                "Using", "Not Using", "Not Needed" 
                ),
      color = rep("blue",each=15),
      pad = 15,
      thickness = 20,
      line = list(
        color = "black",
        width = 0.5
      )
    ),

    link = list(
      source = mysource,
      target = mytarget,
      value =  myvalue
    )
  )
fig <- fig %>% layout(
    title = "Students Model Use Through Exercises",
    font = list(
      size = 10
    )
)

fig

5 Newman Projection Exercise

The relevant data that we are going to test against course performance for each exercise is:

  • Did they get the exercise correct? Yes/No
  • Did they use the model? Yes/(any other answer)

Very few students reply “no” to the question “have you found the models helpful”, so the statistics would be skewed. It may be better to just analyze whether they used them or not.

The p-values shown in the boxplot are not an ANOVA analysis. Rather it is a t-test of that group against the rest of students.

5.1 Exam 1

newman = addAnswerColumn(newman)
plotAndTable(newman,"answers","Exam..1","Exam 1: Was the question correct? Did they use the models?","Exam 1")

Statistics of Exam 1 based on getting the question correct (Yes/No) & using the models (Yes/No)
group1 n mean sd median min max range
X11 No&No 24 33.26 8.79 36.38 19.75 44.75 25.00
X12 No&Yes 21 35.15 6.51 35.50 24.00 44.25 20.25
X13 Yes&No 46 41.70 5.35 41.88 26.50 49.50 23.00
X14 Yes&Yes 59 41.71 7.23 43.75 22.75 49.75 27.00
Anova: Exam 1: Was the question correct? Did they use the models?
Testing statistical significance: p-values
No&Yes-No&No 0.7943966
Yes&No-No&No 0.0000178
Yes&Yes-No&No 0.0000073
Yes&No-No&Yes 0.0024101
Yes&Yes-No&Yes 0.0014855
Yes&Yes-Yes&No 0.9999998

5.2 Exam 2

plotAndTable(newman,"answers","Exam..2","Exam 2: Was the question correct? Did they use the models?","Exam 2")

Statistics of Exam 2 based on getting the question correct (Yes/No) & using the models (Yes/No)
group1 n mean sd median min max range
X11 No&No 23 36.76 6.52 37.40 23.30 46.0 22.70
X12 No&Yes 21 36.32 7.42 35.75 20.40 47.5 27.10
X13 Yes&No 45 40.98 6.04 41.15 19.75 49.5 29.75
X14 Yes&Yes 59 41.92 5.66 43.75 22.50 50.0 27.50
Anova: Exam 2: Was the question correct? Did they use the models?
Testing statistical significance: p-values
No&Yes-No&No 0.9954546
Yes&No-No&No 0.0422499
Yes&Yes-No&No 0.0048311
Yes&No-No&Yes 0.0254227
Yes&Yes-No&Yes 0.0027572
Yes&Yes-Yes&No 0.8683489

5.3 Exam 3

plotAndTable(newman,"answers","Exam..3","Exam 3: Was the question correct? Did they use the models?","Exam 3")

Statistics of Exam 3 based on getting the question correct (Yes/No) & using the models (Yes/No)
group1 n mean sd median min max range
X11 No&No 24 31.95 11.25 35.17 7.50 45.50 38.00
X12 No&Yes 20 28.43 10.21 28.50 11.50 47.05 35.55
X13 Yes&No 46 37.68 9.13 39.15 0.00 49.50 49.50
X14 Yes&Yes 58 39.81 8.31 41.35 19.75 50.00 30.25
Anova: Exam 3: Was the question correct? Did they use the models?
Testing statistical significance: p-values
No&Yes-No&No 0.5997337
Yes&No-No&No 0.0746253
Yes&Yes-No&No 0.0038671
Yes&No-No&Yes 0.0017343
Yes&Yes-No&Yes 0.0000361
Yes&Yes-Yes&No 0.6592264

5.4 Final Exam

plotAndTable(newman,"answers","Final.Exam","Final Exam: Was the question correct? Did they use the models?","Final Exam")

Statistics of Final Exam based on getting the question correct (Yes/No) & using the models (Yes/No)
group1 n mean sd median min max range
X11 No&No 24 59.95 15.85 62.25 36.00 90.00 54.00
X12 No&Yes 21 54.89 16.59 54.25 29.25 82.75 53.50
X13 Yes&No 46 70.46 17.72 70.88 0.00 92.75 92.75
X14 Yes&Yes 59 74.71 16.29 77.00 34.50 99.00 64.50
Anova: Final Exam: Was the question correct? Did they use the models?
Testing statistical significance: p-values
No&Yes-No&No 0.7422885
Yes&No-No&No 0.0648681
Yes&Yes-No&No 0.0020852
Yes&No-No&Yes 0.0030441
Yes&Yes-No&Yes 0.0000410
Yes&Yes-Yes&No 0.5695576

5.5 Quizzes

plotAndTable(newman,"answers","Quizzes.Final.Score","Quizzes: Was the question correct? Did they use the models?","Quizzes Average Score")

Statistics of Quizzes Average Score based on getting the question correct (Yes/No) & using the models (Yes/No)
group1 n mean sd median min max range
X11 No&No 24 76.31 14.80 80.56 53.44 99.44 46.00
X12 No&Yes 21 71.67 16.07 72.78 46.39 97.78 51.39
X13 Yes&No 46 81.04 16.17 82.37 30.00 103.33 73.33
X14 Yes&Yes 59 87.54 13.77 90.63 51.39 105.56 54.17
Anova: Quizzes: Was the question correct? Did they use the models?
Testing statistical significance: p-values
No&Yes-No&No 0.7300690
Yes&No-No&No 0.5963749
Yes&Yes-No&No 0.0128058
Yes&No-No&Yes 0.0880143
Yes&Yes-No&Yes 0.0003173
Yes&Yes-Yes&No 0.1281489

6 Diastereomer Exercise

6.1 Exam 1

diaste = addAnswerColumn(diaste)
plotAndTable(diaste,"answers","Exam..1","Exam 1: Was the question correct? Did they use the models?","Exam 1")

Statistics of Exam 1 based on getting the question correct (Yes/No) & using the models (Yes/No)
group1 n mean sd median min max range
X11 No&No 26 34.61 7.22 36.50 21.50 47.50 26.00
X12 No&Yes 19 35.36 6.98 35.50 19.75 48.00 28.25
X13 Yes&No 44 40.47 7.27 41.38 19.75 49.75 30.00
X14 Yes&Yes 61 42.02 7.01 44.00 22.75 49.75 27.00
Anova: Exam 1: Was the question correct? Did they use the models?
Testing statistical significance: p-values
No&Yes-No&No 0.9853766
Yes&No-No&No 0.0059834
Yes&Yes-No&No 0.0001015
Yes&No-No&Yes 0.0476465
Yes&Yes-No&Yes 0.0027828
Yes&Yes-Yes&No 0.6919621

6.2 Exam 2

plotAndTable(diaste,"answers","Exam..2","Exam 2: Was the question correct? Did they use the models?","Exam 2")

Statistics of Exam 2 based on getting the question correct (Yes/No) & using the models (Yes/No)
group1 n mean sd median min max range
X11 No&No 24 37.10 6.50 37.10 19.75 49.0 29.25
X12 No&Yes 19 38.42 6.00 36.70 23.30 47.5 24.20
X13 Yes&No 44 39.88 6.45 40.58 20.40 49.5 29.10
X14 Yes&Yes 61 41.82 6.35 44.25 22.50 50.0 27.50
Anova: Exam 2: Was the question correct? Did they use the models?
Testing statistical significance: p-values
No&Yes-No&No 0.9063228
Yes&No-No&No 0.3161622
Yes&Yes-No&No 0.0130503
Yes&No-No&Yes 0.8370842
Yes&Yes-No&Yes 0.1794463
Yes&Yes-Yes&No 0.4137197

6.3 Exam 3

plotAndTable(diaste,"answers","Exam..3","Exam 3: Was the question correct? Did they use the models?","Exam 3")

Statistics of Exam 3 based on getting the question correct (Yes/No) & using the models (Yes/No)
group1 n mean sd median min max range
X11 No&No 26 29.88 12.93 33.65 0.00 50.0 50.00
X12 No&Yes 18 32.94 8.02 31.32 17.25 45.5 28.25
X13 Yes&No 44 37.47 8.65 38.52 15.75 50.0 34.25
X14 Yes&Yes 60 39.32 8.91 41.02 14.90 49.8 34.90
Anova: Exam 3: Was the question correct? Did they use the models?
Testing statistical significance: p-values
No&Yes-No&No 0.7237166
Yes&No-No&No 0.0088721
Yes&Yes-No&No 0.0002696
Yes&No-No&Yes 0.3321327
Yes&Yes-No&Yes 0.0676680
Yes&Yes-Yes&No 0.7658246

6.4 Final Exam

plotAndTable(diaste,"answers","Final.Exam","Final Exam: Was the question correct? Did they use the models?","Final Exam")

Statistics of Final Exam based on getting the question correct (Yes/No) & using the models (Yes/No)
group1 n mean sd median min max range
X11 No&No 26 57.58 19.79 57.75 0.00 99 99.00
X12 No&Yes 19 61.64 14.45 56.75 41.50 90 48.50
X13 Yes&No 44 69.45 15.02 68.50 37.00 94 57.00
X14 Yes&Yes 61 74.04 18.14 77.00 29.25 96 66.75
Anova: Final Exam: Was the question correct? Did they use the models?
Testing statistical significance: p-values
No&Yes-No&No 0.8610101
Yes&No-No&No 0.0295541
Yes&Yes-No&No 0.0004023
Yes&No-No&Yes 0.3504037
Yes&Yes-No&Yes 0.0337593
Yes&Yes-Yes&No 0.5322270

6.5 Quizzes

plotAndTable(diaste,"answers","Quizzes.Final.Score","Quizzes: Was the question correct? Did they use the models?","Quizzes Average Score")

Statistics of Quizzes Average Score based on getting the question correct (Yes/No) & using the models (Yes/No)
group1 n mean sd median min max range
X11 No&No 26 72.15 17.57 71.78 30.00 105.56 75.56
X12 No&Yes 19 77.67 13.12 77.19 56.11 100.00 43.89
X13 Yes&No 44 81.57 15.63 85.62 43.13 104.44 61.31
X14 Yes&Yes 61 86.70 14.30 90.56 46.94 104.44 57.50
Anova: Quizzes: Was the question correct? Did they use the models?
Testing statistical significance: p-values
No&Yes-No&No 0.6245966
Yes&No-No&No 0.0624727
Yes&Yes-No&No 0.0004001
Yes&No-No&Yes 0.7849585
Yes&Yes-No&Yes 0.1105684
Yes&Yes-Yes&No 0.3217175

7 Enantiomer Ring Exercise

7.1 Exam 1

enanto = addAnswerColumn(enanto)
plotAndTable(enanto,"answers","Exam..1","Exam 1: Was the question correct? Did they use the models?","Exam 1")

Statistics of Exam 1 based on getting the question correct (Yes/No) & using the models (Yes/No)
group1 n mean sd median min max range
X11 No&No 42 33.29 7.08 34.62 19.75 43.50 23.75
X12 No&Yes 12 38.46 5.27 39.50 30.75 46.50 15.75
X13 Yes&No 72 41.71 6.41 42.38 20.75 49.75 29.00
X14 Yes&Yes 24 43.85 6.77 46.88 25.50 49.75 24.25
Anova: Exam 1: Was the question correct? Did they use the models?
Testing statistical significance: p-values
No&Yes-No&No 0.0817935
Yes&No-No&No 0.0000000
Yes&Yes-No&No 0.0000000
Yes&No-No&Yes 0.3909838
Yes&Yes-No&Yes 0.0989274
Yes&Yes-Yes&No 0.5140672

7.2 Exam 2

plotAndTable(enanto,"answers","Exam..2","Exam 2: Was the question correct? Did they use the models?","Exam 2")

Statistics of Exam 2 based on getting the question correct (Yes/No) & using the models (Yes/No)
group1 n mean sd median min max range
X11 No&No 41 36.35 6.25 35.95 19.75 46.15 26.40
X12 No&Yes 12 39.68 6.16 39.92 25.50 47.75 22.25
X13 Yes&No 71 41.31 5.90 42.50 22.50 50.00 27.50
X14 Yes&Yes 24 42.76 6.66 45.00 20.40 49.50 29.10
Anova: Exam 2: Was the question correct? Did they use the models?
Testing statistical significance: p-values
No&Yes-No&No 0.3533042
Yes&No-No&No 0.0003666
Yes&Yes-No&No 0.0004661
Yes&No-No&Yes 0.8289157
Yes&Yes-No&Yes 0.4911216
Yes&Yes-Yes&No 0.7532891

7.3 Exam 3

plotAndTable(enanto,"answers","Exam..3","Exam 3: Was the question correct? Did they use the models?","Exam 3")

Statistics of Exam 3 based on getting the question correct (Yes/No) & using the models (Yes/No)
group1 n mean sd median min max range
X11 No&No 42 30.18 11.66 32.05 0.00 49.0 49.00
X12 No&Yes 11 36.21 9.08 38.25 14.90 49.0 34.10
X13 Yes&No 71 38.60 7.81 39.75 17.25 50.0 32.75
X14 Yes&Yes 24 40.48 9.41 43.77 16.00 49.8 33.80
Anova: Exam 3: Was the question correct? Did they use the models?
Testing statistical significance: p-values
No&Yes-No&No 0.2334518
Yes&No-No&No 0.0000526
Yes&Yes-No&No 0.0001913
Yes&No-No&Yes 0.8619775
Yes&Yes-No&Yes 0.5976589
Yes&Yes-Yes&No 0.8307620

7.4 Final Exam

plotAndTable(enanto,"answers","Final.Exam","Final Exam: Was the question correct? Did they use the models?","Final Exam")

Statistics of Final Exam based on getting the question correct (Yes/No) & using the models (Yes/No)
group1 n mean sd median min max range
X11 No&No 42 56.25 18.85 54.62 0.00 90.0 90.00
X12 No&Yes 12 64.90 17.76 70.88 33.75 88.0 54.25
X13 Yes&No 72 73.09 14.00 71.50 34.50 99.0 64.50
X14 Yes&Yes 24 76.51 17.98 83.12 35.75 95.9 60.15
Anova: Final Exam: Was the question correct? Did they use the models?
Testing statistical significance: p-values
No&Yes-No&No 0.3772369
Yes&No-No&No 0.0000027
Yes&Yes-No&No 0.0000211
Yes&No-No&Yes 0.3813311
Yes&Yes-No&Yes 0.1929542
Yes&Yes-Yes&No 0.8145588

7.5 Quizzes

plotAndTable(enanto,"answers","Quizzes.Final.Score","Quizzes: Was the question correct? Did they use the models?","Quizzes Average Score")

Statistics of Quizzes Average Score based on getting the question correct (Yes/No) & using the models (Yes/No)
group1 n mean sd median min max range
X11 No&No 42 70.18 16.66 69.31 30.00 97.78 67.78
X12 No&Yes 12 82.65 11.54 84.62 63.75 99.44 35.69
X13 Yes&No 72 85.03 13.07 86.46 54.44 105.56 51.12
X14 Yes&Yes 24 90.32 14.23 96.25 46.67 104.44 57.77
Anova: Quizzes: Was the question correct? Did they use the models?
Testing statistical significance: p-values
No&Yes-No&No 0.0410941
Yes&No-No&No 0.0000018
Yes&Yes-No&No 0.0000009
Yes&No-No&Yes 0.9500051
Yes&Yes-No&Yes 0.4261788
Yes&Yes-Yes&No 0.3958202

8 Wedge Dash Exercise

8.1 Exam 1

didash = addAnswerColumn(didash)
plotAndTable(didash,"answers","Exam..1","Exam 1: Was the question correct? Did they use the models?","Exam 1")

Statistics of Exam 1 based on getting the question correct (Yes/No) & using the models (Yes/No)
group1 n mean sd median min max range
X11 No&No 19 33.29 7.80 36.75 19.75 44.00 24.25
X12 No&Yes 30 34.94 6.64 34.75 22.75 47.25 24.50
X13 Yes&No 37 40.18 7.13 40.75 20.75 49.75 29.00
X14 Yes&Yes 64 42.93 6.23 44.12 19.75 49.75 30.00
Anova: Exam 1: Was the question correct? Did they use the models?
Testing statistical significance: p-values
No&Yes-No&No 0.8377591
Yes&No-No&No 0.0022821
Yes&Yes-No&No 0.0000011
Yes&No-No&Yes 0.0102264
Yes&Yes-No&Yes 0.0000020
Yes&Yes-Yes&No 0.2026792

8.2 Exam 2

plotAndTable(didash,"answers","Exam..2","Exam 2: Was the question correct? Did they use the models?","Exam 2")

Statistics of Exam 2 based on getting the question correct (Yes/No) & using the models (Yes/No)
group1 n mean sd median min max range
X11 No&No 19 35.08 6.24 37.15 23.30 45.0 21.70
X12 No&Yes 30 38.02 7.62 36.27 20.40 50.0 29.60
X13 Yes&No 35 40.15 6.27 40.65 19.75 49.5 29.75
X14 Yes&Yes 64 42.40 5.07 43.95 23.75 49.5 25.75
Anova: Exam 2: Was the question correct? Did they use the models?
Testing statistical significance: p-values
No&Yes-No&No 0.3559186
Yes&No-No&No 0.0205889
Yes&Yes-No&No 0.0000533
Yes&No-No&Yes 0.4947170
Yes&Yes-No&Yes 0.0076862
Yes&Yes-Yes&No 0.2998958

8.3 Exam 3

plotAndTable(didash,"answers","Exam..3","Exam 3: Was the question correct? Did they use the models?","Exam 3")

Statistics of Exam 3 based on getting the question correct (Yes/No) & using the models (Yes/No)
group1 n mean sd median min max range
X11 No&No 19 28.01 11.14 30.30 7.5 40.3 32.8
X12 No&Yes 29 31.67 9.83 29.90 16.0 48.0 32.0
X13 Yes&No 37 37.26 10.02 38.50 0.0 50.0 50.0
X14 Yes&Yes 63 40.45 7.47 41.65 21.3 50.0 28.7
Anova: Exam 3: Was the question correct? Did they use the models?
Testing statistical significance: p-values
No&Yes-No&No 0.5272977
Yes&No-No&No 0.0025590
Yes&Yes-No&No 0.0000039
Yes&No-No&Yes 0.0699516
Yes&Yes-No&Yes 0.0001966
Yes&Yes-Yes&No 0.3343617

8.4 Final Exam

plotAndTable(didash,"answers","Final.Exam","Final Exam: Was the question correct? Did they use the models?","Final Exam")

Statistics of Final Exam based on getting the question correct (Yes/No) & using the models (Yes/No)
group1 n mean sd median min max range
X11 No&No 19 56.90 12.79 60.75 36.00 80.75 44.75
X12 No&Yes 30 60.59 19.63 57.88 29.25 96.00 66.75
X13 Yes&No 37 67.63 19.14 68.50 0.00 95.50 95.50
X14 Yes&Yes 64 75.61 14.85 77.88 39.50 99.00 59.50
Anova: Final Exam: Was the question correct? Did they use the models?
Testing statistical significance: p-values
No&Yes-No&No 0.8767616
Yes&No-No&No 0.1116913
Yes&Yes-No&No 0.0002093
Yes&No-No&Yes 0.3248693
Yes&Yes-No&Yes 0.0004945
Yes&Yes-Yes&No 0.1025616

8.5 Quizzes

plotAndTable(didash,"answers","Quizzes.Final.Score","Quizzes: Was the question correct? Did they use the models?","Quizzes Average Score")

Statistics of Quizzes Average Score based on getting the question correct (Yes/No) & using the models (Yes/No)
group1 n mean sd median min max range
X11 No&No 19 68.84 12.57 66.39 46.39 88.61 42.22
X12 No&Yes 30 74.68 16.70 74.59 46.67 102.22 55.55
X13 Yes&No 37 82.90 15.16 86.25 30.00 104.44 74.44
X14 Yes&Yes 64 87.72 13.57 90.60 43.13 105.56 62.43
Anova: Quizzes: Was the question correct? Did they use the models?
Testing statistical significance: p-values
No&Yes-No&No 0.5209217
Yes&No-No&No 0.0043566
Yes&Yes-No&No 0.0000108
Yes&No-No&Yes 0.1017889
Yes&Yes-No&Yes 0.0004638
Yes&Yes-Yes&No 0.3784543

9 Diastereomer Meso Exercise

9.1 Exam 1

dimeso = addAnswerColumn(dimeso)
plotAndTable(dimeso,"answers","Exam..1","Exam 1: Was the question correct? Did they use the models?","Exam 1")

Statistics of Exam 1 based on getting the question correct (Yes/No) & using the models (Yes/No)
group1 n mean sd median min max range
X11 No&No 26 34.24 7.58 35.50 19.75 45.00 25.25
X12 No&Yes 14 31.82 7.37 33.62 19.75 47.50 27.75
X13 Yes&No 75 41.07 6.16 41.25 22.75 49.50 26.75
X14 Yes&Yes 35 42.83 7.09 45.50 20.75 49.75 29.00
Anova: Exam 1: Was the question correct? Did they use the models?
Testing statistical significance: p-values
No&Yes-No&No 0.7020425
Yes&No-No&No 0.0001010
Yes&Yes-No&No 0.0000142
Yes&No-No&Yes 0.0000344
Yes&Yes-No&Yes 0.0000048
Yes&Yes-Yes&No 0.5837784

9.2 Exam 2

plotAndTable(dimeso,"answers","Exam..2","Exam 2: Was the question correct? Did they use the models?","Exam 2")

Statistics of Exam 2 based on getting the question correct (Yes/No) & using the models (Yes/No)
group1 n mean sd median min max range
X11 No&No 25 37.51 6.13 37.75 24.50 50.0 25.50
X12 No&Yes 14 34.74 7.80 34.62 20.40 49.5 29.10
X13 Yes&No 74 40.80 6.01 41.10 19.75 49.5 29.75
X14 Yes&Yes 35 42.36 5.87 44.25 23.30 49.0 25.70
Anova: Exam 2: Was the question correct? Did they use the models?
Testing statistical significance: p-values
No&Yes-No&No 0.5369279
Yes&No-No&No 0.1019028
Yes&Yes-No&No 0.0167917
Yes&No-No&Yes 0.0053259
Yes&Yes-No&Yes 0.0008415
Yes&Yes-Yes&No 0.6108740

9.3 Exam 3

plotAndTable(dimeso,"answers","Exam..3","Exam 3: Was the question correct? Did they use the models?","Exam 3")

Statistics of Exam 3 based on getting the question correct (Yes/No) & using the models (Yes/No)
group1 n mean sd median min max range
X11 No&No 26 33.81 9.43 37.50 8.75 45.5 36.75
X12 No&Yes 14 25.78 11.45 27.15 7.50 46.8 39.30
X13 Yes&No 73 37.71 9.25 38.55 0.00 49.8 49.80
X14 Yes&Yes 35 39.57 8.98 41.00 17.25 50.0 32.75
Anova: Exam 3: Was the question correct? Did they use the models?
Testing statistical significance: p-values
No&Yes-No&No 0.0544558
Yes&No-No&No 0.2728122
Yes&Yes-No&No 0.0902442
Yes&No-No&Yes 0.0001605
Yes&Yes-No&Yes 0.0000498
Yes&Yes-Yes&No 0.7737299

9.4 Final Exam

plotAndTable(dimeso,"answers","Final.Exam","Final Exam: Was the question correct? Did they use the models?","Final Exam")

Statistics of Final Exam based on getting the question correct (Yes/No) & using the models (Yes/No)
group1 n mean sd median min max range
X11 No&No 26 61.28 15.92 63.50 29.25 96.00 66.75
X12 No&Yes 14 54.45 18.08 52.62 29.25 93.25 64.00
X13 Yes&No 75 70.30 17.15 71.75 0.00 95.50 95.50
X14 Yes&Yes 35 74.63 17.88 77.75 35.75 99.00 63.25
Anova: Final Exam: Was the question correct? Did they use the models?
Testing statistical significance: p-values
No&Yes-No&No 0.6287530
Yes&No-No&No 0.1021404
Yes&Yes-No&No 0.0167406
Yes&No-No&Yes 0.0101008
Yes&Yes-No&Yes 0.0016576
Yes&Yes-Yes&No 0.6086441

9.5 Quizzes

plotAndTable(dimeso,"answers","Quizzes.Final.Score","Quizzes: Was the question correct? Did they use the models?","Quizzes Average Score")

Statistics of Quizzes Average Score based on getting the question correct (Yes/No) & using the models (Yes/No)
group1 n mean sd median min max range
X11 No&No 26 73.73 14.23 73.47 46.94 102.22 55.28
X12 No&Yes 14 66.33 15.93 64.69 46.39 101.11 54.72
X13 Yes&No 75 83.78 14.99 85.56 30.00 103.33 73.33
X14 Yes&Yes 35 88.59 13.13 90.63 55.31 105.56 50.25
Anova: Quizzes: Was the question correct? Did they use the models?
Testing statistical significance: p-values
No&Yes-No&No 0.4186218
Yes&No-No&No 0.0148549
Yes&Yes-No&No 0.0006988
Yes&No-No&Yes 0.0003605
Yes&Yes-No&Yes 0.0000191
Yes&Yes-Yes&No 0.3723377
  • Analyzing those who already transitioned and don’t need the models: How many are not using the models because they are visualizing it in their head or using the R/S.

  • How many students don’t need the models at all or for some specific question. What question?

10 Demographics and general course performance

#merging by ID the students array which has pretty much everything
demoData = read.csv("~/Papers/22_Ochem_Models_across_the_Curriculum/Analysis/modelusage_mergedwithdemographics_cleanforexternal.csv",header = TRUE)
demoData = demoData[which(demoData$Question == "Newman Projection (1.1)"),]
trimDemoData = demoData[,c("ID","Sex","Ethnicity","SOC","First.Generation","Underrepresented","Home.state","HS.GPA","LLC")]
students = merge(students,trimDemoData,by.x = "ID",by.y = "ID")

10.1 Sex

plotAndTable(students,"Sex","Exam..1","Exam 1: Sex","Exam 1")

Statistics of Exam 1 based on getting the question correct (Yes/No) & using the models (Yes/No)
group1 n mean sd median min max range
X11 F 112 39.08 8.05 40.50 19.75 49.75 30.00
X12 M 24 40.73 7.02 40.62 22.75 49.50 26.75
Anova: Exam 1: Sex
Testing statistical significance: p-values
0.3540949
plotAndTable(students,"Sex","Exam..2","Exam 2: Sex","Exam 2")

Statistics of Exam 2 based on getting the question correct (Yes/No) & using the models (Yes/No)
group1 n mean sd median min max range
X11 F 110 39.94 6.68 40.52 19.75 50.0 30.25
X12 M 24 39.44 6.75 39.90 22.50 49.5 27.00
Anova: Exam 2: Sex
Testing statistical significance: p-values
0.743875
plotAndTable(students,"Sex","Exam..3","Exam 3: Sex","Exam 3")

Statistics of Exam 3 based on getting the question correct (Yes/No) & using the models (Yes/No)
group1 n mean sd median min max range
X11 F 110 35.66 10.76 37.88 0.0 50 50.0
X12 M 24 38.55 8.09 39.80 19.8 50 30.2
Anova: Exam 3: Sex
Testing statistical significance: p-values
0.2168484
plotAndTable(students,"Sex","Final.Exam","Final Exam: Sex","Final Exam")

Statistics of Final Exam based on getting the question correct (Yes/No) & using the models (Yes/No)
group1 n mean sd median min max range
X11 F 112 67.07 19.04 68.50 0.0 96 96.0
X12 M 24 71.83 16.34 68.38 34.5 99 64.5
Anova: Final Exam: Sex
Testing statistical significance: p-values
0.2580761
plotAndTable(students,"Sex","HS.GPA","HS GPA: Sex","HS GPA")

Statistics of HS GPA based on getting the question correct (Yes/No) & using the models (Yes/No)
group1 n mean sd median min max range
X11 F 103 3.66 0.36 3.70 2.72 4.46 1.74
X12 M 19 3.56 0.40 3.78 2.77 3.95 1.18
Anova: HS GPA: Sex
Testing statistical significance: p-values
0.2704221

10.2 Ethnicity

plotAndTable(students,"Ethnicity","Exam..1","Exam 1: Ethnicity","Exam 1")

Statistics of Exam 1 based on getting the question correct (Yes/No) & using the models (Yes/No)
group1 n mean sd median min max range
X11 Am. Indian 2 39.75 2.12 39.75 38.25 41.25 3.00
X12 Asian 29 38.90 7.76 39.25 23.75 49.75 26.00
X13 Black 21 39.57 8.74 43.25 19.75 49.50 29.75
X14 Hispanic 10 35.33 6.94 37.75 24.50 45.00 20.50
X15 NS 5 40.80 5.53 39.25 35.25 49.00 13.75
X16 White 69 39.98 8.04 41.50 19.75 49.75 30.00
Anova: Exam 1: Ethnicity
Testing statistical significance: p-values
Asian-Am. Indian 0.9999902
Black-Am. Indian 1.0000000
Hispanic-Am. Indian 0.9791369
NS-Am. Indian 0.9999859
White-Am. Indian 1.0000000
Black-Asian 0.9996832
Hispanic-Asian 0.8222409
NS-Asian 0.9962504
White-Asian 0.9895042
Hispanic-Black 0.7306292
NS-Black 0.9996011
White-Black 0.9999457
NS-Hispanic 0.8056449
White-Hispanic 0.5109012
White-NS 0.9999231
plotAndTable(students,"Ethnicity","Exam..2","Exam 2: Ethnicity","Exam 2")

Statistics of Exam 2 based on getting the question correct (Yes/No) & using the models (Yes/No)
group1 n mean sd median min max range
X11 Am. Indian 2 39.47 4.70 39.47 36.15 42.8 6.65
X12 Asian 29 39.51 6.82 40.60 20.40 49.0 28.60
X13 Black 21 41.49 5.43 43.75 29.15 48.0 18.85
X14 Hispanic 10 36.00 6.69 36.58 25.50 44.9 19.40
X15 NS 5 41.38 6.49 38.65 35.80 50.0 14.20
X16 White 67 39.95 6.98 40.25 19.75 49.5 29.75
Anova: Exam 2: Ethnicity
Testing statistical significance: p-values
Asian-Am. Indian 1.0000000
Black-Am. Indian 0.9985074
Hispanic-Am. Indian 0.9847733
NS-Am. Indian 0.9993764
White-Am. Indian 0.9999986
Black-Asian 0.9036295
Hispanic-Asian 0.7085177
NS-Asian 0.9921739
White-Asian 0.9996706
Hispanic-Black 0.2734223
NS-Black 1.0000000
White-Black 0.9394133
NS-Hispanic 0.6834781
White-Hispanic 0.5058310
White-NS 0.9973010
plotAndTable(students,"Ethnicity","Exam..3","Exam 3: Ethnicity","Exam 3")

Statistics of Exam 3 based on getting the question correct (Yes/No) & using the models (Yes/No)
group1 n mean sd median min max range
X11 Am. Indian 2 38.38 4.07 38.38 35.50 41.25 5.75
X12 Asian 29 35.65 10.49 35.60 7.50 50.00 42.50
X13 Black 20 38.33 8.59 38.50 20.05 49.50 29.45
X14 Hispanic 10 28.84 11.07 30.55 11.50 41.50 30.00
X15 NS 5 34.96 9.26 31.50 24.75 45.00 20.25
X16 White 68 36.88 10.69 40.00 0.00 50.00 50.00
Anova: Exam 3: Ethnicity
Testing statistical significance: p-values
Asian-Am. Indian 0.9991681
Black-Am. Indian 1.0000000
Hispanic-Am. Indian 0.8389091
NS-Am. Indian 0.9987172
White-Am. Indian 0.9999526
Black-Asian 0.9471217
Hispanic-Asian 0.4689982
NS-Asian 0.9999930
White-Asian 0.9943988
Hispanic-Black 0.1723634
NS-Black 0.9865252
White-Black 0.9938049
NS-Hispanic 0.8869511
White-Hispanic 0.2003628
White-NS 0.9986176
plotAndTable(students,"Ethnicity","Final.Exam","Final Exam: Ethnicity","Final Exam")

Statistics of Final Exam based on getting the question correct (Yes/No) & using the models (Yes/No)
group1 n mean sd median min max range
X11 Am. Indian 2 63.25 2.47 63.25 61.50 65.00 3.5
X12 Asian 29 69.64 15.63 65.60 38.50 94.00 55.5
X13 Black 21 69.76 18.59 75.00 29.25 92.75 63.5
X14 Hispanic 10 56.35 17.11 58.25 29.25 80.75 51.5
X15 NS 5 68.60 21.28 59.75 46.50 96.00 49.5
X16 White 69 68.39 19.95 70.00 0.00 99.00 99.0
Anova: Final Exam: Ethnicity
Testing statistical significance: p-values
Asian-Am. Indian 0.9971308
Black-Am. Indian 0.9970403
Hispanic-Am. Indian 0.9968637
NS-Am. Indian 0.9993634
White-Am. Indian 0.9988986
Black-Asian 1.0000000
Hispanic-Asian 0.3813761
NS-Asian 0.9999971
White-Asian 0.9996445
Hispanic-Black 0.4247844
NS-Black 0.9999956
White-Black 0.9996888
NS-Hispanic 0.8368208
White-Hispanic 0.4030487
White-NS 1.0000000
plotAndTable(students,"Ethnicity","HS.GPA","HS GPA: Ethnicity","HS GPA")

Statistics of HS GPA based on getting the question correct (Yes/No) & using the models (Yes/No)
group1 n mean sd median min max range
X11 Am. Indian 1 3.94 NA 3.94 3.94 3.94 0.00
X12 Asian 28 3.76 0.25 3.84 3.10 4.17 1.06
X13 Black 21 3.45 0.42 3.55 2.74 4.05 1.31
X14 Hispanic 9 3.42 0.52 3.37 2.83 4.22 1.39
X15 NS 3 3.84 0.29 3.86 3.53 4.12 0.59
X16 White 60 3.68 0.35 3.74 2.72 4.46 1.74
Anova: HS GPA: Ethnicity
Testing statistical significance: p-values
Asian-Am. Indian 0.9960981
Black-Am. Indian 0.7463281
Hispanic-Am. Indian 0.7270919
NS-Am. Indian 0.9998282
White-Am. Indian 0.9786182
Black-Asian 0.0297331
Hispanic-Asian 0.1257678
NS-Asian 0.9993950
White-Asian 0.9269927
Hispanic-Black 0.9999643
NS-Black 0.4844470
White-Black 0.0959457
NS-Hispanic 0.4959608
White-Hispanic 0.3023017
White-NS 0.9788118

10.3 First Generation

plotAndTable(students,"First.Generation","Exam..1","Exam 1: First Generation","Exam 1")

Statistics of Exam 1 based on getting the question correct (Yes/No) & using the models (Yes/No)
group1 n mean sd median min max range
X11 N 76 40.83 7.05 41.38 20.75 49.75 29
X12 Y 60 37.52 8.52 39.75 19.75 49.75 30
Anova: Exam 1: First Generation
Testing statistical significance: p-values
0.0143914
plotAndTable(students,"First.Generation","Exam..2","Exam 2: First Generation","Exam 2")

Statistics of Exam 2 based on getting the question correct (Yes/No) & using the models (Yes/No)
group1 n mean sd median min max range
X11 N 74 40.84 6.53 42.20 22.50 50.0 27.50
X12 Y 60 38.62 6.69 38.52 19.75 49.5 29.75
Anova: Exam 2: First Generation
Testing statistical significance: p-values
0.0542465
plotAndTable(students,"First.Generation","Exam..3","Exam 3: First Generation","Exam 3")

Statistics of Exam 3 based on getting the question correct (Yes/No) & using the models (Yes/No)
group1 n mean sd median min max range
X11 N 75 37.48 9.96 40.00 8.75 50 41.25
X12 Y 59 34.53 10.71 35.55 0.00 50 50.00
Anova: Exam 3: First Generation
Testing statistical significance: p-values
0.1026239
plotAndTable(students,"First.Generation","Final.Exam","Final Exam: First Generation","Final Exam")

Statistics of Final Exam based on getting the question correct (Yes/No) & using the models (Yes/No)
group1 n mean sd median min max range
X11 N 76 70.82 18.24 73.12 29.25 99 69.75
X12 Y 60 64.23 18.60 65.25 0.00 94 94.00
Anova: Final Exam: First Generation
Testing statistical significance: p-values
0.0397817
plotAndTable(students,"First.Generation","HS.GPA","HS GPA: First Generation","HS GPA")

Statistics of HS GPA based on getting the question correct (Yes/No) & using the models (Yes/No)
group1 n mean sd median min max range
X11 N 64 3.67 0.38 3.75 2.74 4.46 1.72
X12 Y 58 3.62 0.36 3.69 2.72 4.22 1.50
Anova: HS GPA: First Generation
Testing statistical significance: p-values
0.4748857

10.4 HighSchool GPA

We can plot the final score

plot(students$Final.Score,students$HS.GPA)

mycor= cor(students[,c("Exam..1","Exam..2","Exam..3","Final.Exam","Quizzes.Final.Score","Final.Score","HS.GPA")],use = "pairwise.complete.obs")

upper<-round(mycor,3)
upper[upper.tri(mycor)]<-""
#upper<-as.data.frame(upper)
#upper
#library(xtable)
#print(xtable(upper, type="html"))

knitr::kable(upper ,  caption = "Correlation between the numerical data")
Correlation between the numerical data
Exam..1 Exam..2 Exam..3 Final.Exam Quizzes.Final.Score Final.Score HS.GPA
Exam..1 1
Exam..2 0.725 1
Exam..3 0.772 0.791 1
Final.Exam 0.813 0.851 0.883 1
Quizzes.Final.Score 0.755 0.778 0.81 0.83 1
Final.Score 0.794 0.854 0.895 0.909 0.897 1
HS.GPA 0.312 0.372 0.282 0.366 0.391 0.405 1

11 Model use by demographics

11.1 Newman

11.1.1 By sex

plotBarAndCorr(students,"Sex","newman","Model use","N of students","Model use by sex")

The Chi-square analysis gives a p= 0.20455

Residuals analysis:

A negative residual implies that the measured value is lower than expected and a positive value higher than expected

plotBarAndCorr(students,"Sex","newmanComb","Model use","N of students","Model use by sex")

The Chi-square analysis gives a p= 0.04451

Residuals analysis:

A negative residual implies that the measured value is lower than expected and a positive value higher than expected

11.1.2 By Student of color

plotBarAndCorr(students,"SOC","newman","Model use","N of students","Model use by Student of Color (SOC)")

The Chi-square analysis gives a p= 0.14023

Residuals analysis:

A negative residual implies that the measured value is lower than expected and a positive value higher than expected

plotBarAndCorr(students,"SOC","newmanComb","Model use","N of students","Model use by Student of Color")

The Chi-square analysis gives a p= 0.04874

Residuals analysis:

A negative residual implies that the measured value is lower than expected and a positive value higher than expected

11.1.3 By First Generation

plotBarAndCorr(students,"First.Generation","newman","Model use","N of students","Model use by First Generation")

The Chi-square analysis gives a p= 0.50607

Residuals analysis:

A negative residual implies that the measured value is lower than expected and a positive value higher than expected

plotBarAndCorr(students,"First.Generation","newmanComb","Model use","N of students","Model use First Generation")

The Chi-square analysis gives a p= 0.10626

Residuals analysis:

A negative residual implies that the measured value is lower than expected and a positive value higher than expected

11.1.4 HS GPA

plotAndTable(students,"newman","HS.GPA","Using/Not Using/Not Needed","Highschool GPA")

Statistics of Highschool GPA based on getting the question correct (Yes/No) & using the models (Yes/No)
group1 n mean sd median min max range
X11 NotNeed 20 3.78 0.26 3.80 3.19 4.22 1.03
X12 NotUsing 38 3.48 0.36 3.47 2.74 4.17 1.43
X13 Using 64 3.71 0.37 3.82 2.72 4.46 1.74
Anova: Using/Not Using/Not Needed
Testing statistical significance: p-values
NotUsing-NotNeed 0.0072797
Using-NotNeed 0.7495826
Using-NotUsing 0.0043354
plotAndTable(students,"newmanComb","HS.GPA","Using/Not Using - Correct/Incorrect","Highschool GPA")

Statistics of Highschool GPA based on getting the question correct (Yes/No) & using the models (Yes/No)
group1 n mean sd median min max range
X11 Notusing Correct 39 3.56 0.37 3.60 2.74 4.22 1.48
X12 Notusing Incorrect 19 3.63 0.33 3.70 2.83 4.17 1.33
X13 Using Correct 45 3.75 0.37 3.86 2.72 4.46 1.74
X14 Using Incorrect 19 3.61 0.37 3.71 2.82 4.00 1.19
Anova: Using/Not Using - Correct/Incorrect
Testing statistical significance: p-values
Notusing Incorrect-Notusing Correct 0.8933020
Using Correct-Notusing Correct 0.0657781
Using Incorrect-Notusing Correct 0.9603225
Using Correct-Notusing Incorrect 0.5834926
Using Incorrect-Notusing Incorrect 0.9977177
Using Incorrect-Using Correct 0.4463924

11.2 Diaste

11.2.1 By sex

plotBarAndCorr(students,"Sex","diaste","Model use","N of students","Model use by sex")

The Chi-square analysis gives a p= 0.02205

Residuals analysis:

A negative residual implies that the measured value is lower than expected and a positive value higher than expected

plotBarAndCorr(students,"Sex","diasteComb","Model use","N of students","Model use by sex")

The Chi-square analysis gives a p= 0.0152

Residuals analysis:

A negative residual implies that the measured value is lower than expected and a positive value higher than expected

11.2.2 By Student of color

plotBarAndCorr(students,"SOC","diaste","Model use","N of students","Model use by Student of Color (SOC)")

The Chi-square analysis gives a p= 0.08004

Residuals analysis:

A negative residual implies that the measured value is lower than expected and a positive value higher than expected

plotBarAndCorr(students,"SOC","diasteComb","Model use","N of students","Model use by Student of Color")

The Chi-square analysis gives a p= 0.29166

Residuals analysis:

A negative residual implies that the measured value is lower than expected and a positive value higher than expected

11.2.3 By First Generation

plotBarAndCorr(students,"First.Generation","diaste","Model use","N of students","Model use by First Generation")

The Chi-square analysis gives a p= 0.42854

Residuals analysis:

A negative residual implies that the measured value is lower than expected and a positive value higher than expected

plotBarAndCorr(students,"First.Generation","diasteComb","Model use","N of students","Model use First Generation")

The Chi-square analysis gives a p= 0.46868

Residuals analysis:

A negative residual implies that the measured value is lower than expected and a positive value higher than expected

11.2.4 HS GPA

plotAndTable(students,"diaste","HS.GPA","Using/Not Using/Not Needed","Highschool GPA")

Statistics of Highschool GPA based on getting the question correct (Yes/No) & using the models (Yes/No)
group1 n mean sd median min max range
X11 NotNeed 23 3.71 0.30 3.82 2.92 4.17 1.25
X12 NotUsing 37 3.43 0.36 3.44 2.74 4.25 1.51
X13 Using 62 3.75 0.35 3.86 2.72 4.46 1.74
Anova: Using/Not Using/Not Needed
Testing statistical significance: p-values
NotUsing-NotNeed 0.0058891
Using-NotNeed 0.8811543
Using-NotUsing 0.0000336
plotAndTable(students,"diasteComb","HS.GPA","Using/Not Using - Correct/Incorrect","Highschool GPA")

Statistics of Highschool GPA based on getting the question correct (Yes/No) & using the models (Yes/No)
group1 n mean sd median min max range
X11 Notusing Correct 38 3.49 0.38 3.53 2.74 4.17 1.43
X12 Notusing Incorrect 22 3.61 0.32 3.69 2.83 4.25 1.42
X13 Using Correct 48 3.78 0.36 3.90 2.72 4.46 1.74
X14 Using Incorrect 14 3.67 0.31 3.68 3.19 4.22 1.04
Anova: Using/Not Using - Correct/Incorrect
Testing statistical significance: p-values
Notusing Incorrect-Notusing Correct 0.5995632
Using Correct-Notusing Correct 0.0019687
Using Incorrect-Notusing Correct 0.3695345
Using Correct-Notusing Incorrect 0.2741740
Using Incorrect-Notusing Incorrect 0.9568384
Using Incorrect-Using Correct 0.7719006

11.3 Enanto

11.3.1 By sex

plotBarAndCorr(students,"Sex","enanto","Model use","N of students","Model use by sex")

The Chi-square analysis gives a p= 0.59795

Residuals analysis:

A negative residual implies that the measured value is lower than expected and a positive value higher than expected

plotBarAndCorr(students,"Sex","enantoComb","Model use","N of students","Model use by sex")

The Chi-square analysis gives a p= 0.02156

Residuals analysis:

A negative residual implies that the measured value is lower than expected and a positive value higher than expected

11.3.2 By Student of color

plotBarAndCorr(students,"SOC","enanto","Model use","N of students","Model use by Student of Color (SOC)")

The Chi-square analysis gives a p= 0.46662

Residuals analysis:

A negative residual implies that the measured value is lower than expected and a positive value higher than expected

plotBarAndCorr(students,"SOC","enantoComb","Model use","N of students","Model use by Student of Color")

The Chi-square analysis gives a p= 0.04216

Residuals analysis:

A negative residual implies that the measured value is lower than expected and a positive value higher than expected

11.3.3 By First Generation

plotBarAndCorr(students,"First.Generation","enanto","Model use","N of students","Model use by First Generation")

The Chi-square analysis gives a p= 0.04054

Residuals analysis:

A negative residual implies that the measured value is lower than expected and a positive value higher than expected

plotBarAndCorr(students,"First.Generation","enantoComb","Model use","N of students","Model use First Generation")

The Chi-square analysis gives a p= 0.09056

Residuals analysis:

A negative residual implies that the measured value is lower than expected and a positive value higher than expected

11.3.4 HS GPA

plotAndTable(students,"enanto","HS.GPA","Using/Not Using/Not Needed","Highschool GPA")

Statistics of Highschool GPA based on getting the question correct (Yes/No) & using the models (Yes/No)
group1 n mean sd median min max range
X11 NotNeed 45 3.80 0.31 3.87 2.83 4.28 1.45
X12 NotUsing 52 3.45 0.35 3.50 2.72 3.98 1.26
X13 Using 25 3.77 0.33 3.88 3.08 4.46 1.38
Anova: Using/Not Using/Not Needed
Testing statistical significance: p-values
NotUsing-NotNeed 0.0000032
Using-NotNeed 0.9452032
Using-NotUsing 0.0003567
plotAndTable(students,"enantoComb","HS.GPA","Using/Not Using - Correct/Incorrect","Highschool GPA")

Statistics of Highschool GPA based on getting the question correct (Yes/No) & using the models (Yes/No)
group1 n mean sd median min max range
X11 Notusing Correct 59 3.63 0.38 3.74 2.74 4.28 1.54
X12 Notusing Incorrect 38 3.58 0.37 3.66 2.72 4.25 1.53
X13 Using Correct 16 3.90 0.25 3.94 3.31 4.46 1.15
X14 Using Incorrect 9 3.55 0.35 3.53 3.08 4.00 0.92
Anova: Using/Not Using - Correct/Incorrect
Testing statistical significance: p-values
Notusing Incorrect-Notusing Correct 0.9115999
Using Correct-Notusing Correct 0.0480009
Using Incorrect-Notusing Correct 0.9183449
Using Correct-Notusing Incorrect 0.0202872
Using Incorrect-Notusing Incorrect 0.9945846
Using Incorrect-Using Correct 0.0982578

11.4 Didash

11.4.1 By sex

plotBarAndCorr(students,"Sex","didash","Model use","N of students","Model use by sex")

The Chi-square analysis gives a p= 0.08109

Residuals analysis:

A negative residual implies that the measured value is lower than expected and a positive value higher than expected

plotBarAndCorr(students,"Sex","didashComb","Model use","N of students","Model use by sex")

The Chi-square analysis gives a p= 0.05317

Residuals analysis:

A negative residual implies that the measured value is lower than expected and a positive value higher than expected

11.4.2 By Student of color

plotBarAndCorr(students,"SOC","didash","Model use","N of students","Model use by Student of Color (SOC)")

The Chi-square analysis gives a p= 0.62383

Residuals analysis:

A negative residual implies that the measured value is lower than expected and a positive value higher than expected

plotBarAndCorr(students,"SOC","didashComb","Model use","N of students","Model use by Student of Color")

The Chi-square analysis gives a p= 0.89281

Residuals analysis:

A negative residual implies that the measured value is lower than expected and a positive value higher than expected

11.4.3 By First Generation

plotBarAndCorr(students,"First.Generation","didash","Model use","N of students","Model use by First Generation")

The Chi-square analysis gives a p= 0.44463

Residuals analysis:

A negative residual implies that the measured value is lower than expected and a positive value higher than expected

plotBarAndCorr(students,"First.Generation","didashComb","Model use","N of students","Model use First Generation")

The Chi-square analysis gives a p= 0.73594

Residuals analysis:

A negative residual implies that the measured value is lower than expected and a positive value higher than expected

11.4.4 HS GPA

plotAndTable(students,"didash","HS.GPA","Using/Not Using/Not Needed","Highschool GPA")

Statistics of Highschool GPA based on getting the question correct (Yes/No) & using the models (Yes/No)
group1 n mean sd median min max range
X11 NotNeed 10 3.72 0.33 3.86 2.92 4.00 1.08
X12 NotUsing 36 3.46 0.34 3.47 2.74 4.17 1.43
X13 Using 76 3.73 0.36 3.82 2.72 4.46 1.74
Anova: Using/Not Using/Not Needed
Testing statistical significance: p-values
NotUsing-NotNeed 0.1128374
Using-NotNeed 0.9949252
Using-NotUsing 0.0008828
plotAndTable(students,"didashComb","HS.GPA","Using/Not Using - Correct/Incorrect","Highschool GPA")

Statistics of Highschool GPA based on getting the question correct (Yes/No) & using the models (Yes/No)
group1 n mean sd median min max range
X11 Notusing Correct 31 3.52 0.36 3.55 2.74 4.00 1.26
X12 Notusing Incorrect 15 3.50 0.33 3.49 2.92 4.17 1.25
X13 Using Correct 52 3.76 0.33 3.86 2.72 4.46 1.74
X14 Using Incorrect 24 3.65 0.42 3.74 2.80 4.25 1.45
Anova: Using/Not Using - Correct/Incorrect
Testing statistical significance: p-values
Notusing Incorrect-Notusing Correct 0.9973854
Using Correct-Notusing Correct 0.0197914
Using Incorrect-Notusing Correct 0.5753114
Using Correct-Notusing Incorrect 0.0663154
Using Incorrect-Notusing Incorrect 0.5990924
Using Incorrect-Using Correct 0.5658395

11.5 Dimeso

11.5.1 By sex

plotBarAndCorr(students,"Sex","dimeso","Model use","N of students","Model use by sex")

The Chi-square analysis gives a p= 0.81715

Residuals analysis:

A negative residual implies that the measured value is lower than expected and a positive value higher than expected

plotBarAndCorr(students,"Sex","dimesoComb","Model use","N of students","Model use by sex")

The Chi-square analysis gives a p= 0.04783

Residuals analysis:

A negative residual implies that the measured value is lower than expected and a positive value higher than expected

11.5.2 By Student of color

plotBarAndCorr(students,"SOC","dimeso","Model use","N of students","Model use by Student of Color (SOC)")

The Chi-square analysis gives a p= 0.76848

Residuals analysis:

A negative residual implies that the measured value is lower than expected and a positive value higher than expected

plotBarAndCorr(students,"SOC","dimesoComb","Model use","N of students","Model use by Student of Color")

The Chi-square analysis gives a p= 0.69718

Residuals analysis:

A negative residual implies that the measured value is lower than expected and a positive value higher than expected

11.5.3 By First Generation

plotBarAndCorr(students,"First.Generation","dimeso","Model use","N of students","Model use by First Generation")

The Chi-square analysis gives a p= 0.36419

Residuals analysis:

A negative residual implies that the measured value is lower than expected and a positive value higher than expected

plotBarAndCorr(students,"First.Generation","dimesoComb","Model use","N of students","Model use First Generation")

The Chi-square analysis gives a p= 0.07435

Residuals analysis:

A negative residual implies that the measured value is lower than expected and a positive value higher than expected

11.5.4 HS GPA

plotAndTable(students,"dimeso","HS.GPA","Using/Not Using/Not Needed","Highschool GPA")

Statistics of Highschool GPA based on getting the question correct (Yes/No) & using the models (Yes/No)
group1 n mean sd median min max range
X11 NotNeed 37 3.73 0.30 3.77 2.92 4.25 1.33
X12 NotUsing 48 3.46 0.36 3.48 2.72 4.05 1.33
X13 Using 37 3.80 0.34 3.93 2.83 4.46 1.63
Anova: Using/Not Using/Not Needed
Testing statistical significance: p-values
NotUsing-NotNeed 0.0009286
Using-NotNeed 0.6585847
Using-NotUsing 0.0000274
plotAndTable(students,"dimesoComb","HS.GPA","Using/Not Using - Correct/Incorrect","Highschool GPA")

Statistics of Highschool GPA based on getting the question correct (Yes/No) & using the models (Yes/No)
group1 n mean sd median min max range
X11 Notusing Correct 61 3.63 0.33 3.69 2.74 4.22 1.48
X12 Notusing Incorrect 24 3.46 0.41 3.50 2.72 4.25 1.53
X13 Using Correct 26 3.88 0.27 3.95 3.08 4.46 1.38
X14 Using Incorrect 11 3.62 0.42 3.78 2.83 4.28 1.45
Anova: Using/Not Using - Correct/Incorrect
Testing statistical significance: p-values
Notusing Incorrect-Notusing Correct 0.1970409
Using Correct-Notusing Correct 0.0126614
Using Incorrect-Notusing Correct 0.9999967
Using Correct-Notusing Incorrect 0.0002371
Using Incorrect-Notusing Incorrect 0.5652983
Using Incorrect-Using Correct 0.1795673