The analysis of each milestone contains the following items:

  1. Overall Milestone performance after each attempt.

    1. Examine the number of students settling for a lower score.
    2. Comparison with Fall 2019
  2. Attempt 1: Compare averages of questions for three type of scores

  3. Attempt 2: Compare averages of questions for three type of scores

  4. Attempt 3: Compare averages of questions for three type of scores

  5. Analyze if there are specific questions that students performed significantly different in the same question type. Should this be controlled by the average score of students who took that question?

  6. Performance in milestone-like questions during exams

buildHeaders <- function(numberOfQuestions){
  colHeaders = c()
  #build headers
  for (n in seq(numberOfQuestions)){
    colHeaders = c(colHeaders,paste("q",as.character(n),sep = ""))
  }
  return(colHeaders)
}
variationWithinQuestionTypes <- function(m1,pat){
  #compare each question with the average
  totcol = ncol(m1)
  numcol = (totcol-2-10)/2
  aveQ = data.frame( matrix(ncol=3 ,nrow=0) )
  indivQ = data.frame( matrix(ncol=0 ,nrow=nrow(m1)) )
  for (q in seq(10,totcol-2,2)){
    #q is the number, q-1 is the tilte
    qtitle = colnames(m1)[q-1]
    if ( grepl(pat,qtitle)){
      thisQ = c(
        colnames(m1)[q-1],
        mean(m1[,q],na.rm=TRUE),
        length( na.omit(m1[,q]))
      )
      aveQ = rbind(aveQ,thisQ)
      indivQ[qtitle] = m1[,q]
      
    }
  }
  results = list("indi" = indivQ, "ave" = aveQ)
  return(results)
  
}
printVariationWithinQuestions <- function(indivQ,ndig){
  #library(Hmisc)
  #for some reason I cant calc the correlation when theres too many NA
  #res2<-rcorr(as.matrix(indivQ),type = "pearson")
  #flattenCorrMatrix(res2$r, res2$P)
   
  totlen = length(colnames(indivQ))
  #pval = matrix(1:totlen, nrow = totlen, ncol = totlen)
  pval = data.frame( matrix(ncol=totlen ,nrow=0) )
  for (i in seq(1,totlen)){
    thisline = c()
    for (j in seq(1,totlen)){
      if ( j < i){
        thisline = c(thisline," ")
      }else{
        #p = as.numeric(t.test(indivQ[,i],indivQ[,j])$p.value)
        #need a tryCatch just in case we are comparing questions without attempts
        #tryCatch(
        #  expr = {
            p = format(round( t.test(indivQ[,i],indivQ[,j])$p.value,ndig), nsmall = ndig)
            if( p < 0.05){ p = paste(p,"*",sep = "")}
        #  },
        #  error = {
        #    p = "nope"
        #  }
        #)
        thisline = c(thisline,p)
      }
    }
    pval = rbind(pval,thisline)
  }
  colnames(pval) = colnames(indivQ)
  rownames(pval) = colnames(indivQ)
  return(pval)
  #t.test(indivQ$q8_4,indivQ$q8_5)
}
buildDF_fromMilestone <- function(m1,numberOfQuestions){
  #Build df with just answers 
  totcol = ncol(m1)
  m1df = data.frame( matrix(ncol=numberOfQuestions+3,nrow=0) )
  
  
  colHeaders = c(buildHeaders(numberOfQuestions),"total")
   
  studNames = c()
  attNumb = c()
  #loop over students or row
  for (st in seq(1,nrow(m1)) ){
    #build attemptNumb and studNames. theyll be added as columns later
    attNumb = c(attNumb,m1[st,8])
    studNames = c(studNames,m1[st,1])
    
    #empty the score array and build it up as it finds each question
    thisSt = rep(NA,numberOfQuestions)
    #loop over columns to find nonempty scores
    for (q in seq(10,totcol-2,2)){
      score = m1[st,q]
      if ( !is.na( score ) ){
       questionNumber = colnames(m1)[q-1]
       questionNumber = unlist(strsplit(questionNumber,"_"))[1]
       questionNumber = as.numeric( gsub("^q","",questionNumber))
       thisSt[questionNumber] = score
      }
    }
    #studNames = c(studNames,m1[st,1])
    thisSt = c(thisSt, m1[st,totcol])
    m1df = rbind(m1df,thisSt)
  }
  colnames(m1df) = colHeaders
  m1df = rbind(m1df,colMeans(m1df))
  m1df$studName = c(studNames,NA)
  m1df$attNumb = c(attNumb,NA)
  return(m1df)
}

buildStats <- function(m1df,numberOfQuestions,att){
  m1dfat1 = data.frame( matrix(ncol=numberOfQuestions,nrow=0) )
  m1dfat1sd = data.frame( matrix(ncol=numberOfQuestions,nrow=0) )
   
  st100 = subset(m1df,m1df$total> 79.99 & m1df$attNumb == att)
  m1dfat1 = rbind(m1dfat1, colMeans(st100[,1:numberOfQuestions]) )
  m1dfat1sd = rbind(m1dfat1sd, apply(st100[,1:numberOfQuestions],2,sd) )
   
  st80 = subset(m1df,m1df$total> 69.99 & m1df$total < 79.99 & m1df$attNumb == att)
  m1dfat1 = rbind(m1dfat1, colMeans(st80[,1:numberOfQuestions]) )
  m1dfat1sd = rbind(m1dfat1sd, apply(st80[,1:numberOfQuestions],2,sd) )
   
  st0 = subset(m1df,m1df$total< 69.99 & m1df$attNumb == att)
  m1dfat1 = rbind(m1dfat1, colMeans(st0[,1:numberOfQuestions]) )
  m1dfat1sd = rbind(m1dfat1sd, apply(st0[,1:numberOfQuestions],2,sd) )
   
  colnames(m1dfat1) = buildHeaders(numberOfQuestions)
  colnames(m1dfat1sd) =buildHeaders(numberOfQuestions)
   
  rownames(m1dfat1) = c("Avg > 80", "Avg 70><80","Avg < 70")
  rownames(m1dfat1sd) = c("SD > 80", "SD 70><80","SD < 70")
  #return(list(m1dfat1,m1dfat1sd))
  ltot = length( 
    subset(m1df,m1df$attNumb == att)[,1]
    )
  l100 = length(st100[,1])
  l80 = length(st80[,1])
  l0 = length(st0[,1])
  useStats = c(ltot,l100,l80,l0)
  results = list("ave" = m1dfat1, "sd" = m1dfat1sd, "usestats" = useStats)
  return(results)
  
}
flattenCorrMatrix <- function(cormat, pmat) {
  ut <- upper.tri(cormat)
  data.frame(
    row = rownames(cormat)[row(cormat)[ut]],
    column = rownames(cormat)[col(cormat)[ut]],
    cor  =(cormat)[ut],
    p = pmat[ut]
    )
}
roundThisScore <- function(score){
  if (score <70){ r = 0 
  } else if (score <80){ r = 80 
  } else if (score >80){ r = 100  }
  return(r)
}
flowAndSettling <- function(m1df){
  #check students who did not attempt a 3rd and got lower than 80
  studs = unique(m1df$studName)
  studs = studs[!is.na(studs)]
  settle = data.frame( matrix(ncol=3,nrow=0) )
  flow = data.frame(matrix(ncol=3,nrow=0))
  for (stud in studs){
    thisStDF = m1df[which(m1df$studName == stud),]
    #if att number lower than 3 and score lower than 80
    maxNum = max(thisStDF$attNumb)
    maxScore = max(thisStDF$total)
    if ( maxNum < 3 & maxScore < 80 ){
      settle = rbind(settle,c(stud,maxNum,maxScore))
    }
    sc1 = roundThisScore( max(m1df[which(m1df$studName == stud & m1df$attNumb < 2),]$total) )
    sc2 = roundThisScore( max(m1df[which(m1df$studName == stud & m1df$attNumb < 3),]$total) )
    sc3 = roundThisScore( max(m1df[which(m1df$studName == stud & m1df$attNumb < 4),]$total) )
    flow = rbind(flow,c(sc1,sc2,sc3))
    
  }
  colnames(flow) = c("one","two","three")
  flow2 = data.frame(matrix(ncol=3,nrow=3))
  flow2[,1]=table(flow$one)
  flow2[,2]=table(flow$two)
  flow2[,3]=table(flow$three)
  colnames(flow2) = c("After 1st","After 2nd","After 3rd")
  
  colnames(settle) = c("name","MaxAttempt","MaxScore")
  all = list("settle" = settle,"flow" = flow2)
  return(all)
}

#lets first load the files
if (Sys.info()["sysname"] == "Windows"){
  m1 = read.csv("G:/My Drive/Teaching/Grades_and_SRT/Fall2021/milestone1.csv",header = TRUE)
  m2 = read.csv("G:/My Drive/Teaching/Grades_and_SRT/Fall2021/milestone2.csv",header = TRUE)
  m3 = read.csv("G:/My Drive/Teaching/Grades_and_SRT/Fall2021/milestone3.csv",header = TRUE)
  m4 = read.csv("G:/My Drive/Teaching/Grades_and_SRT/Fall2021/milestone4.csv",header = TRUE)
  ex1 = read.csv("G:/My Drive/Teaching/Grades_and_SRT/Fall2021/Exam_1_scores.csv",header = TRUE)
  ex2 = read.csv("G:/My Drive/Teaching/Grades_and_SRT/Fall2021/Exam_2_scores.csv",header = TRUE)
  m1f19 = read.csv("G:/My Drive/Teaching/Grades_and_SRT/Fall2019/CHEM1331/milestone1.csv",header = TRUE)
  m2f19 = read.csv("G:/My Drive/Teaching/Grades_and_SRT/Fall2019/CHEM1331/milestone2.csv",header = TRUE)
  m3f19 = read.csv("G:/My Drive/Teaching/Grades_and_SRT/Fall2019/CHEM1331/milestone3.csv",header = TRUE)
  m4f19 = read.csv("G:/My Drive/Teaching/Grades_and_SRT/Fall2019/CHEM1331/milestone4.csv",header = TRUE)
}else{
  m1 = read.csv("~/Teaching/Grades_and_SRT/Fall2021/milestone1.csv",header = TRUE)
  m2 = read.csv("~/Teaching/Grades_and_SRT/Fall2021/milestone2.csv",header = TRUE)
  m3 = read.csv("~/Teaching/Grades_and_SRT/Fall2021/milestone3.csv",header = TRUE)
  m4 = read.csv("~/Teaching/Grades_and_SRT/Fall2021/milestone4.csv",header = TRUE)
  ex1 = read.csv("~/Teaching/Grades_and_SRT/Fall2021/Exam_1_scores.csv",header = TRUE)
  ex2 = read.csv("~/Teaching/Grades_and_SRT/Fall2021/Exam_2_scores.csv",header = TRUE)
  m1f19 = read.csv("~/Teaching/Grades_and_SRT/Fall2019/CHEM1331/milestone1.csv",header = TRUE)
  m2f19 = read.csv("~/Teaching/Grades_and_SRT/Fall2019/CHEM1331/milestone2.csv",header = TRUE)
  m3f19 = read.csv("~/Teaching/Grades_and_SRT/Fall2019/CHEM1331/milestone3.csv",header = TRUE)
  m4f19 = read.csv("~/Teaching/Grades_and_SRT/Fall2019/CHEM1331/milestone4.csv",header = TRUE)
}

1 Milestone 1

The questions for milestone 1:

  1. Isotopes, number of neutrons, protons, ane electrons
  2. Mass spectrometry
  3. Electrostatics
  4. SI prefixes
  5. What’s greater: 1 mole of __ or # molecules
  6. Rank electromagnetic waves
  7. Plots of waves: tell wavelength and frequency
  8. Compare Transition among orbits and id the photon involved
  9. Transition among atomic of orbitals of specific atoms
  10. Electronic configuration of atoms and ions (Qbank)
  11. Common charges and electronic configurations of ions
  12. Periodic table trends with Zeff, shielding and size.

1.1 Overall performance in Milestone 1

Number of students in the three grade categories at different attempts

m1df = buildDF_fromMilestone(m1,12)
results = flowAndSettling(m1df)
flow = results$flow

pct1st = flow[,1]/sum(flow[,1])*100
pct2nd = flow[,2]/sum(flow[,2])*100
pct3rd = flow[,3]/sum(flow[,3])*100
stackAndGrouped = data.frame(matrix(ncol = 4,nrow=0))
stackAndGrouped = rbind(stackAndGrouped,c("F",pct1st[1],"F21","First"))
stackAndGrouped = rbind(stackAndGrouped,c("C",pct1st[2],"F21","First"))
stackAndGrouped = rbind(stackAndGrouped,c("A",pct1st[3],"F21","First"))
stackAndGrouped = rbind(stackAndGrouped,c("F",pct2nd[1],"F21","Second"))
stackAndGrouped = rbind(stackAndGrouped,c("C",pct2nd[2],"F21","Second"))
stackAndGrouped = rbind(stackAndGrouped,c("A",pct2nd[3],"F21","Second"))
stackAndGrouped = rbind(stackAndGrouped,c("F",pct3rd[1],"F21","Third"))
stackAndGrouped = rbind(stackAndGrouped,c("C",pct3rd[2],"F21","Third"))
stackAndGrouped = rbind(stackAndGrouped,c("A",pct3rd[3],"F21","Third"))

pct1st = paste(signif( pct1st,digits = 4),"%",sep = "")
pct2nd = paste(signif( pct2nd,digits = 4),"%",sep = "")
pct3rd = paste(signif( pct3rd,digits = 4),"%",sep = "")
flow$`After 1st` = paste( flow$`After 1st`, pct1st,sep = " / ")
flow$`After 2nd` = paste( flow$`After 2nd`, pct2nd,sep = " / ")
flow$`After 3rd` = paste( flow$`After 3rd`, pct3rd,sep = " / ")
scores = c("0","80","100")
flow = cbind(scores,flow)
knitr::kable(flow)
scores After 1st After 2nd After 3rd
0 26 / 12.32% 11 / 5.213% 5 / 2.37%
80 33 / 15.64% 24 / 11.37% 27 / 12.8%
100 152 / 72.04% 176 / 83.41% 179 / 84.83%
row.names(results$flow) = c("0","80","100")
barplot(as.matrix(results$flow),col=c("red","yellow","green"),main = "Milestone 1: students scores best of 3 attempts",legend = rownames(results$flow))

1.1.1 Analyzing students who settled

Some students will not maximize the number of attempts even if they have not achieved the highest >80 score.

results = results$settle
n1_70 = length( results[results$MaxAttempt == 1 & results$MaxScore <70,]$MaxAttempt)
n1_80 = length( results[results$MaxAttempt == 1 & results$MaxScore >69.99,]$MaxAttempt)
n2_70 = length( results[results$MaxAttempt == 2 & results$MaxScore <70,]$MaxAttempt)
n2_80 = length( results[results$MaxAttempt == 2 & results$MaxScore >69.99,]$MaxAttempt)
# Students settling before 3rd attempt with Score < 70 (failing) 70< Score < 80
Settled in 1st attempt 3 7
Settled in 2nd attempt 0 13
Total 3 20

1.1.2 Comparing with Fall 2019

m1df19 = buildDF_fromMilestone(m1f19,12)
results19 = flowAndSettling(m1df19)
flow19 = results19$flow
pct1st = flow19[,1]/sum(flow19[,1])*100
pct2nd = flow19[,2]/sum(flow19[,2])*100
pct3rd = flow19[,3]/sum(flow19[,3])*100

stackAndGrouped = rbind(stackAndGrouped,c("F",pct1st[1],"F19","First"))
stackAndGrouped = rbind(stackAndGrouped,c("C",pct1st[2],"F19","First"))
stackAndGrouped = rbind(stackAndGrouped,c("A",pct1st[3],"F19","First"))
stackAndGrouped = rbind(stackAndGrouped,c("F",pct2nd[1],"F19","Second"))
stackAndGrouped = rbind(stackAndGrouped,c("C",pct2nd[2],"F19","Second"))
stackAndGrouped = rbind(stackAndGrouped,c("A",pct2nd[3],"F19","Second"))
stackAndGrouped = rbind(stackAndGrouped,c("F",pct3rd[1],"F19","Third"))
stackAndGrouped = rbind(stackAndGrouped,c("C",pct3rd[2],"F19","Third"))
stackAndGrouped = rbind(stackAndGrouped,c("A",pct3rd[3],"F19","Third"))
colnames(stackAndGrouped) = c("Grade","Pct","Year","Attempt")
stackAndGrouped$Pct = as.numeric(stackAndGrouped$Pct)

library(ggplot2)
ggplot() + geom_bar(data=stackAndGrouped, aes(y = Pct, x = Year, fill = Grade), stat="identity",
           position='stack') +
  theme_bw() + 
  scale_fill_manual(values = c("green","yellow","red" )) +
  facet_grid( ~ Attempt)

pct1st = paste(signif( pct1st,digits = 4),"%",sep = "")
pct2nd = paste(signif( pct2nd,digits = 4),"%",sep = "")
pct3rd = paste(signif( pct3rd,digits = 4),"%",sep = "")

flow19$`After 1st` = paste( flow19$`After 1st`, pct1st,sep = " / ")
flow19$`After 2nd` = paste( flow19$`After 2nd`, pct2nd,sep = " / ")
flow19$`After 3rd` = paste( flow19$`After 3rd`, pct3rd,sep = " / ")

colnames(flow19) = c("1st att F19","2nd att F19","3rd att F19")
scores = c("0","80","100")
flow19 = cbind(scores,flow19)
mergedFlow =  merge(flow,flow19,by="scores")
mergedFlow$scores = as.numeric(mergedFlow$scores)
knitr::kable(mergedFlow[order(mergedFlow$scores),] )
scores After 1st After 2nd After 3rd 1st att F19 2nd att F19 3rd att F19
1 0 26 / 12.32% 11 / 5.213% 5 / 2.37% 18 / 9.424% 8 / 4.188% 5 / 2.618%
3 80 33 / 15.64% 24 / 11.37% 27 / 12.8% 25 / 13.09% 17 / 8.901% 13 / 6.806%
2 100 152 / 72.04% 176 / 83.41% 179 / 84.83% 148 / 77.49% 166 / 86.91% 173 / 90.58%
  • Students settling in F19
results = results19$settle
n1_70_19 = length( results[results$MaxAttempt == 1 & results$MaxScore <70,]$MaxAttempt)
n1_80_19 = length( results[results$MaxAttempt == 1 & results$MaxScore >69.99,]$MaxAttempt)
n2_70_19 = length( results[results$MaxAttempt == 2 & results$MaxScore <70,]$MaxAttempt)
n2_80_19 = length( results[results$MaxAttempt == 2 & results$MaxScore >69.99,]$MaxAttempt)
# Students settling before 3rd attempt (F21 / F19) Score < 70 (F21) Score < 70 (F19) 70< Score < 80 (F21) 70< Score < 80 (F19)
Settled in 1st attempt 3 2 7 4
Settled in 2nd attempt 0 3 13 5
Total 3 5 20 9

1.2 M1 - Attempt 1 averages

m1df = buildDF_fromMilestone(m1,12)
# First attempt stats
results = buildStats(m1df,12,1)
m1dfat1 = results$ave
m1dfat1sd = results$sd
Attempt Total students Sudents with score > 80 Sudents with score 70-80 Students with score < 70%
One 211 152 (72%) 33 (16%) 26 (12%)

Table of averages per question and their standard deviation

library(knitr)
knitr::kable(m1dfat1,digits=2)
q1 q2 q3 q4 q5 q6 q7 q8 q9 q10 q11 q12
Avg > 80 8.18 7.12 7.76 6.99 6.64 8.25 8.22 7.40 7.52 8.30 7.76 7.34
Avg 70><80 7.96 5.30 6.67 4.80 2.78 8.09 7.19 4.80 6.37 8.27 6.60 6.31
Avg < 70 6.15 4.70 5.20 3.68 3.85 5.88 5.13 3.84 3.44 6.65 4.38 4.35
knitr::kable(m1dfat1sd,digits=2)
q1 q2 q3 q4 q5 q6 q7 q8 q9 q10 q11 q12
SD > 80 0.48 1.99 1.26 1.89 3.37 0.50 0.47 2.47 1.44 0.24 1.06 1.55
SD 70><80 0.94 2.44 1.86 2.47 3.99 0.81 1.65 3.69 1.80 0.36 1.76 1.80
SD < 70 2.33 2.58 2.37 2.90 4.24 3.27 2.65 3.25 2.20 2.70 2.13 1.71
matplot(t(m1dfat1),type="b",ylim=c(0,9),xlab = "Question number",ylab = "Average score/8.33",main="Average score per question on 1st attempt")   
legend("bottomright",legend = c("1:>80","2:70-80","3:<70"))

1.3 M1 - Attempt 2 averages

results = buildStats(m1df,12,2)
m1dfat1 = results$ave
m1dfat1sd = results$sd
Attempt Total students Sudents with score > 80 Sudents with score 70-80 Students with score < 70%
Two 49 25 (51%) 11 (22%) 13 (27%)

Table of averages per question and their standard deviation

library(knitr)
knitr::kable(m1dfat1,digits=2)
q1 q2 q3 q4 q5 q6 q7 q8 q9 q10 q11 q12
Avg > 80 8.06 6.89 7.74 7.25 6.34 8.34 7.75 7.33 6.91 8.33 7.71 7.04
Avg 70><80 7.83 6.31 5.91 4.16 3.03 7.83 7.57 5.30 6.82 8.33 5.91 5.73
Avg < 70 7.48 3.63 5.00 3.68 0.64 6.84 5.61 5.98 4.97 8.01 5.25 4.94
knitr::kable(m1dfat1sd,digits=2)
q1 q2 q3 q4 q5 q6 q7 q8 q9 q10 q11 q12
SD > 80 0.57 1.63 1.06 1.36 3.64 0.00 1.13 2.52 1.87 0.00 0.67 1.61
SD 70><80 1.12 2.18 2.28 2.08 4.21 1.12 1.05 3.82 1.64 0.00 1.98 2.37
SD < 70 0.90 2.09 1.36 2.27 2.31 2.44 1.97 3.56 2.76 0.78 1.64 1.52
matplot(t(m1dfat1),type="b",ylim=c(0,9),xlab = "Question number",ylab = "Average score/8.33",main="M1: Average score per question on 2nd attempt")   
legend("bottomright",legend = c("1:>80","2:70-80","3:<70"))

1.4 M1 - Attempt 3 averages

results = buildStats(m1df,12,3)
m1dfat1 = results$ave
m1dfat1sd = results$sd
Attempt Total students Sudents with score > 80 Sudents with score 70-80 Students with score < 70%
Three 12 3 (25%) 5 (42%) 4 (33%)

Table of averages per question and their standard deviation

library(knitr)
knitr::kable(m1dfat1,digits=2)
q1 q2 q3 q4 q5 q6 q7 q8 q9 q10 q11 q12
Avg > 80 7.41 7.40 6.12 5.55 8.34 8.34 6.94 8.33 7.64 8.33 7.03 4.36
Avg 70><80 5.84 5.55 7.01 5.41 3.34 8.34 7.50 8.33 6.66 7.91 6.11 5.95
Avg < 70 6.95 5.55 3.75 3.12 4.17 7.64 7.29 6.25 2.08 8.33 6.11 4.76
knitr::kable(m1dfat1sd,digits=2)
q1 q2 q3 q4 q5 q6 q7 q8 q9 q10 q11 q12
SD > 80 0.80 1.60 1.93 3.18 0.00 0.00 2.40 0.00 1.20 0.00 1.28 1.82
SD 70><80 2.28 3.40 1.40 3.48 4.57 0.00 1.14 0.00 1.74 0.93 2.08 1.46
SD < 70 1.13 2.27 1.60 2.08 4.82 1.39 1.20 2.66 1.70 0.00 1.20 1.37
matplot(t(m1dfat1),type="b",ylim=c(0,9),xlab = "Question number",ylab = "Average score/8.33",main="M1: Average score per question on 3rd attempt")   
legend("bottomright",legend = c("1:>80","2:70-80","3:<70"))

1.5 Variation within question groups

Question 5 in Milestone 1 is low because it’s a one-true option only this is why it has the largest standard deviation. Question 8 even if it allows for multiple answers, its standard deviation is significantly large and we should look into it.

results = variationWithinQuestionTypes(m1,"q8_")
indivQ = results$indi
aveQ = results$ave

boxplot(indivQ)

library("psych")
knitr::kable(describe(indivQ))
vars n mean sd median trimmed mad min max range skew kurtosis se
q8_1 1 24 7.520139 1.916603 8.33 7.913500 0 2.776667 8.33 5.553333 -1.8771281 1.7627115 0.3912249
q8_2 2 30 6.016111 3.185421 8.33 6.478889 0 0.000000 8.33 8.330000 -0.8438501 -0.9582137 0.5815757
q8_3 3 23 5.553333 3.744057 8.33 5.845614 0 0.000000 8.33 8.330000 -0.6384380 -1.5269565 0.7806898
q8_4 4 22 5.553333 3.635510 8.33 5.861852 0 0.000000 8.33 8.330000 -0.7290461 -1.3295455 0.7750933
q8_5 5 29 7.181034 2.518454 8.33 7.663600 0 0.000000 8.33 8.330000 -1.9109749 2.2502296 0.4676651
q8_6 6 20 5.969833 3.634314 8.33 6.421042 0 0.000000 8.33 8.330000 -0.9301782 -1.0642571 0.8126573
q8_7 7 12 5.553333 3.551924 8.33 5.831000 0 0.000000 8.33 8.330000 -0.4777275 -1.6929012 1.0253521
q8_8 8 23 7.847101 1.599938 8.33 8.330000 0 2.776667 8.33 5.553333 -2.7426553 5.7790080 0.3336101
q8_9 9 30 6.849111 2.892345 8.33 7.520139 0 0.000000 8.33 8.330000 -1.5070908 0.5914437 0.5280676
q8_10 10 26 7.796026 1.759492 8.33 8.203788 0 0.000000 8.33 8.330000 -3.4802501 12.0347164 0.3450648
q8_11 11 19 5.261053 4.128303 8.33 5.390000 0 0.000000 8.33 8.330000 -0.5030472 -1.8353779 0.9470977
q8_12 12 14 7.140000 3.024927 8.33 7.635833 0 0.000000 8.33 8.330000 -1.8264902 1.4549320 0.8084458

We can already see that some questions are easier than others, but we want to see if they are significantly different. We can calculate the p-values among the scores of the different Q8 questions and see if they are significantly different. We do not control for whether the student passsed or not

pval = printVariationWithinQuestions(indivQ,3)
knitr::kable(pval,digits=3)
q8_1 q8_2 q8_3 q8_4 q8_5 q8_6 q8_7 q8_8 q8_9 q8_10 q8_11 q8_12
q8_1 1.000 0.037* 0.031* 0.031* 0.581 0.097 0.094 0.528 0.312 0.599 0.037* 0.677
q8_2 1.000 0.637 0.635 0.124 0.963 0.699 0.009* 0.293 0.011* 0.502 0.269
q8_3 1.000 1.000 0.082 0.714 1.000 0.011* 0.177 0.013* 0.813 0.168
q8_4 1.000 0.081 0.713 1.000 0.011* 0.175 0.013* 0.813 0.166
q8_5 1.000 0.206 0.168 0.252 0.640 0.295 0.080 0.965
q8_6 1.000 0.753 0.042* 0.371 0.049* 0.574 0.315
q8_7 1.000 0.053 0.277 0.058 0.836 0.237
q8_8 1.000 0.117 0.916 0.017* 0.430
q8_9 1.000 0.140 0.154 0.766
q8_10 1.000 0.019* 0.465
q8_11 1.000 0.141
q8_12 1.000

1.6 Performance in Milestone like questions

Question 1 aligns with question 2 in milestone 1. Question 4a aligns with question4 in milestone about SI prefixes. Question 4b with question5 in milestone

2 Milestone 2

The questions for Milestone 2:

  1. Isotopes, number of protons, neutrons, and electrons
  2. Electrostatics
  3. Transition among orbitals
  4. Electronic configuration
  5. SI prefixes and mole conversion
  6. Type of chemical bond in compounds
  7. Hybdrization and angle of small molecules
  8. NMR: number of peaks and number of sp2, sp3 (doubled)
  9. Isomerism (doubled)
  10. Resonance structures (doubled)
  11. Conjugation and UV/Vis (doubled)

q8, q9 q10, and q11 are doubled, so in a way milestone2 contains more questions but with less variability.

2.1 Overall performance in Milestone 2

#this may be confusing, but to avoid typos, Im overriding m1df, theres no m2df
m1df = buildDF_fromMilestone(m2,11)
results = flowAndSettling(m1df)
flow = results$flow

pct1st = flow[,1]/sum(flow[,1])*100
pct2nd = flow[,2]/sum(flow[,2])*100
pct3rd = flow[,3]/sum(flow[,3])*100
stackAndGrouped = data.frame(matrix(ncol = 4,nrow=0))
stackAndGrouped = rbind(stackAndGrouped,c("F",pct1st[1],"F21","First"))
stackAndGrouped = rbind(stackAndGrouped,c("C",pct1st[2],"F21","First"))
stackAndGrouped = rbind(stackAndGrouped,c("A",pct1st[3],"F21","First"))
stackAndGrouped = rbind(stackAndGrouped,c("F",pct2nd[1],"F21","Second"))
stackAndGrouped = rbind(stackAndGrouped,c("C",pct2nd[2],"F21","Second"))
stackAndGrouped = rbind(stackAndGrouped,c("A",pct2nd[3],"F21","Second"))
stackAndGrouped = rbind(stackAndGrouped,c("F",pct3rd[1],"F21","Third"))
stackAndGrouped = rbind(stackAndGrouped,c("C",pct3rd[2],"F21","Third"))
stackAndGrouped = rbind(stackAndGrouped,c("A",pct3rd[3],"F21","Third"))

pct1st = paste(signif( pct1st,digits = 4),"%",sep = "")
pct2nd = paste(signif( pct2nd,digits = 4),"%",sep = "")
pct3rd = paste(signif( pct3rd,digits = 4),"%",sep = "")
flow$`After 1st` = paste( flow$`After 1st`, pct1st,sep = " / ")
flow$`After 2nd` = paste( flow$`After 2nd`, pct2nd,sep = " / ")
flow$`After 3rd` = paste( flow$`After 3rd`, pct3rd,sep = " / ")
scores = c("0","80","100")
flow = cbind(scores,flow)
knitr::kable(flow)
scores After 1st After 2nd After 3rd
0 52 / 25.37% 34 / 16.59% 24 / 11.71%
80 51 / 24.88% 43 / 20.98% 42 / 20.49%
100 102 / 49.76% 128 / 62.44% 139 / 67.8%
row.names(results$flow) = c("0","80","100")
barplot(as.matrix(results$flow),col=c("red","yellow","green"),main = "Milestone 2: students scores best of 3 attempts",legend = rownames(results$flow))

2.1.1 Analyzing students who settled

Some students will not maximize the number of attempts even if they have not achieved the highest >80 score.

results = results$settle
n1_70 = length( results[results$MaxAttempt == 1 & results$MaxScore <70,]$MaxAttempt)
n1_80 = length( results[results$MaxAttempt == 1 & results$MaxScore >69.99,]$MaxAttempt)
n2_70 = length( results[results$MaxAttempt == 2 & results$MaxScore <70,]$MaxAttempt)
n2_80 = length( results[results$MaxAttempt == 2 & results$MaxScore >69.99,]$MaxAttempt)
# Students settling before 3rd attempt with Score < 70 (failing) 70< Score < 80
Settled in 1st attempt 2 14
Settled in 2nd attempt 5 11
Total 7 25

2.1.2 Comparing with Fall 2019

m1df19 = buildDF_fromMilestone(m2f19,12)
results19 = flowAndSettling(m1df19)
flow19 = results19$flow
pct1st = flow19[,1]/sum(flow19[,1])*100
pct2nd = flow19[,2]/sum(flow19[,2])*100
pct3rd = flow19[,3]/sum(flow19[,3])*100

stackAndGrouped = rbind(stackAndGrouped,c("F",pct1st[1],"F19","First"))
stackAndGrouped = rbind(stackAndGrouped,c("C",pct1st[2],"F19","First"))
stackAndGrouped = rbind(stackAndGrouped,c("A",pct1st[3],"F19","First"))
stackAndGrouped = rbind(stackAndGrouped,c("F",pct2nd[1],"F19","Second"))
stackAndGrouped = rbind(stackAndGrouped,c("C",pct2nd[2],"F19","Second"))
stackAndGrouped = rbind(stackAndGrouped,c("A",pct2nd[3],"F19","Second"))
stackAndGrouped = rbind(stackAndGrouped,c("F",pct3rd[1],"F19","Third"))
stackAndGrouped = rbind(stackAndGrouped,c("C",pct3rd[2],"F19","Third"))
stackAndGrouped = rbind(stackAndGrouped,c("A",pct3rd[3],"F19","Third"))
colnames(stackAndGrouped) = c("Grade","Pct","Year","Attempt")
stackAndGrouped$Pct = as.numeric(stackAndGrouped$Pct)

library(ggplot2)
ggplot() + geom_bar(data=stackAndGrouped, aes(y = Pct, x = Year, fill = Grade), stat="identity",
           position='stack') +
  theme_bw() + 
  scale_fill_manual(values = c("green","yellow","red" )) +
  facet_grid( ~ Attempt)

pct1st = paste(signif( pct1st,digits = 4),"%",sep = "")
pct2nd = paste(signif( pct2nd,digits = 4),"%",sep = "")
pct3rd = paste(signif( pct3rd,digits = 4),"%",sep = "")

flow19$`After 1st` = paste( flow19$`After 1st`, pct1st,sep = " / ")
flow19$`After 2nd` = paste( flow19$`After 2nd`, pct2nd,sep = " / ")
flow19$`After 3rd` = paste( flow19$`After 3rd`, pct3rd,sep = " / ")

colnames(flow19) = c("1st att F19","2nd att F19","3rd att F19")
scores = c("0","80","100")
flow19 = cbind(scores,flow19)
mergedFlow =  merge(flow,flow19,by="scores")
mergedFlow$scores = as.numeric(mergedFlow$scores)
knitr::kable(mergedFlow[order(mergedFlow$scores),] )
scores After 1st After 2nd After 3rd 1st att F19 2nd att F19 3rd att F19
1 0 52 / 25.37% 34 / 16.59% 24 / 11.71% 16 / 8.421% 9 / 4.737% 5 / 2.632%
3 80 51 / 24.88% 43 / 20.98% 42 / 20.49% 22 / 11.58% 10 / 5.263% 10 / 5.263%
2 100 102 / 49.76% 128 / 62.44% 139 / 67.8% 152 / 80% 171 / 90% 175 / 92.11%
  • Students settling in F19
results = results19$settle
n1_70_19 = length( results[results$MaxAttempt == 1 & results$MaxScore <70,]$MaxAttempt)
n1_80_19 = length( results[results$MaxAttempt == 1 & results$MaxScore >69.99,]$MaxAttempt)
n2_70_19 = length( results[results$MaxAttempt == 2 & results$MaxScore <70,]$MaxAttempt)
n2_80_19 = length( results[results$MaxAttempt == 2 & results$MaxScore >69.99,]$MaxAttempt)
# Students settling before 3rd attempt (F21 / F19) Score < 70 (F21) Score < 70 (F19) 70< Score < 80 (F21) 70< Score < 80 (F19)
Settled in 1st attempt 2 3 14 2
Settled in 2nd attempt 5 0 11 4
Total 7 3 25 6

2.2 M2 - Attempt 1 averages

# First attempt stats
results = buildStats(m1df,11,1)
m1dfat1 = results$ave
m1dfat1sd = results$sd
Attempt Total students Sudents with score > 80 Sudents with score 70-80 Students with score < 70%
One 205 102 (50%) 51 (25%) 52 (25%)

Table of averages per question and their standard deviation

library(knitr)
knitr::kable(m1dfat1,digits=2)
q1 q2 q3 q4 q5 q6 q7 q8 q9 q10 q11
Avg > 80 6.62 6.39 6.48 6.47 5.53 6.15 5.75 5.21 5.21 6.11 5.20
Avg 70><80 6.51 6.09 6.30 6.43 3.43 5.16 4.68 4.60 3.71 4.92 4.06
Avg < 70 6.08 4.69 4.71 5.15 2.31 4.05 3.24 3.10 2.22 4.22 3.00
knitr::kable(m1dfat1sd,digits=2)
q1 q2 q3 q4 q5 q6 q7 q8 q9 q10 q11
SD > 80 0.22 0.66 0.62 0.43 1.50 1.11 1.10 1.17 1.81 0.83 1.41
SD 70><80 0.54 1.01 0.77 0.46 1.81 1.37 1.48 1.33 2.34 1.60 1.48
SD < 70 1.07 1.55 1.67 1.42 1.90 1.66 1.69 1.79 2.07 1.67 1.57
matplot(t(m1dfat1),type="b",ylim=c(0,9),xlab = "Question number",ylab = "Average score/6.33",main="M2: Average score per question on 1st attempt")   
legend("bottomright",legend = c("1:>80","2:70-80","3:<70"))

2.3 M2 - Attempt 2 averages

results = buildStats(m1df,11,2)
m1dfat1 = results$ave
m1dfat1sd = results$sd
Attempt Total students Sudents with score > 80 Sudents with score 70-80 Students with score < 70%
Two 87 26 (30%) 27 (31%) 34 (39%)

Table of averages per question and their standard deviation

library(knitr)
knitr::kable(m1dfat1,digits=2)
q1 q2 q3 q4 q5 q6 q7 q8 q9 q10 q11
Avg > 80 6.62 6.20 6.21 6.44 4.87 5.33 5.32 6.13 5.64 6.13 5.70
Avg 70><80 6.33 6.12 5.98 6.27 3.58 5.43 4.92 5.01 4.41 5.15 5.32
Avg < 70 5.94 4.86 5.34 5.43 2.20 3.90 3.99 4.53 3.55 3.79 3.77
knitr::kable(m1dfat1sd,digits=2)
q1 q2 q3 q4 q5 q6 q7 q8 q9 q10 q11
SD > 80 0.22 0.92 1.21 0.47 1.63 1.64 1.23 0.71 1.40 1.15 1.30
SD 70><80 1.30 1.00 1.33 0.53 2.00 1.58 1.67 1.02 2.48 2.24 1.14
SD < 70 1.28 1.76 1.63 0.90 2.04 1.68 1.56 1.31 2.30 2.45 1.42
matplot(t(m1dfat1),type="b",ylim=c(0,9),xlab = "Question number",ylab = "Average score/6.33",main="M2: Average score per question on 2md attempt")   
legend("bottomright",legend = c("1:>80","2:70-80","3:<70"))

2.4 M2 - Attempt 3 averages

results = buildStats(m1df,11,3)
m1dfat1 = results$ave
m1dfat1sd = results$sd
Attempt Total students Sudents with score > 80 Sudents with score 70-80 Students with score < 70%
Three 45 12 (27%) 14 (31%) 19 (42%)

Table of averages per question and their standard deviation

library(knitr)
knitr::kable(m1dfat1,digits=2)
q1 q2 q3 q4 q5 q6 q7 q8 q9 q10 q11
Avg > 80 6.66 6.11 6.38 6.59 4.30 5.87 5.79 5.95 6.11 5.47 6.10
Avg 70><80 6.18 5.23 5.47 6.22 3.33 5.40 4.84 5.34 3.81 4.22 4.90
Avg < 70 6.25 5.12 5.52 5.98 2.28 4.37 4.51 4.42 3.33 3.66 4.56
knitr::kable(m1dfat1sd,digits=2)
q1 q2 q3 q4 q5 q6 q7 q8 q9 q10 q11
SD > 80 0.00 1.06 0.65 0.17 1.66 0.92 1.10 0.60 1.38 2.10 0.65
SD 70><80 0.95 1.33 1.21 0.60 2.53 1.60 1.86 0.91 2.53 2.34 1.25
SD < 70 0.66 1.35 1.57 0.79 1.94 1.88 1.46 1.16 2.22 2.50 1.67
matplot(t(m1dfat1),type="b",ylim=c(0,9),xlab = "Question number",ylab = "Average score/6.33",main="M2: Average score per question on 3rd attempt")   
legend("bottomright",legend = c("1:>80","2:70-80","3:<70"))

2.5 Variation within question groups

2.5.1 Question 9 on isomerism

Question 9 in the first attempt of Milestone2 seems to show the highest SD, we can look into it and see whether or not scores are significantly different among questions.

question 18 and 19 were the same for everyone during the first attempt which explains its larger “n” For the 2nd attempt the new question was q9_2 which was kept, repeated, and still didn’t improve students results

results = variationWithinQuestionTypes(m2,"q9_")
indivQ = results$indi
aveQ = results$ave

boxplot(indivQ)

knitr::kable(describe(indivQ))
vars n mean sd median trimmed mad min max range skew kurtosis se
q9_1 1 14 5.717143 1.893459 6.670000 6.114167 0.000000 0.000000 6.67 6.670000 -1.9399650 3.0262130 0.5060481
q9_2 2 139 2.815156 2.476024 2.223333 2.695546 3.296314 0.000000 6.67 6.670000 0.2484774 -1.3314270 0.2100136
q9_3 3 24 4.817222 2.510854 6.670000 5.113667 0.000000 0.000000 6.67 6.670000 -0.8325219 -0.9435558 0.5125258
q9_4 4 27 3.623210 2.553533 2.223333 3.673333 3.296314 0.000000 6.67 6.670000 -0.0127433 -1.5420915 0.4914277
q9_5 5 25 4.268800 2.560858 4.446667 4.446667 3.296314 0.000000 6.67 6.670000 -0.4802299 -1.3430935 0.5121716
q9_6 6 19 4.797719 2.255604 6.670000 4.839020 0.000000 2.223333 6.67 4.446667 -0.2948888 -2.0107026 0.5174710
q9_7 7 23 4.736667 2.706824 6.670000 5.031754 0.000000 0.000000 6.67 6.670000 -0.9591990 -0.8144548 0.5644117
q9_8 8 26 5.707981 1.896737 6.670000 6.063636 0.000000 0.000000 6.67 6.670000 -1.7777175 1.8832951 0.3719808
q9_9 9 23 4.253333 2.991418 6.670000 4.446667 0.000000 0.000000 6.67 6.670000 -0.4925977 -1.6653980 0.6237537
q9_10 10 25 5.380467 1.270741 5.558333 5.505397 1.648157 2.223333 6.67 4.446667 -0.6653471 -0.4869991 0.2541482
q9_11 11 18 3.952593 2.702173 4.446667 4.029792 3.296314 0.000000 6.67 6.670000 -0.3423142 -1.5567310 0.6369082
q9_12 12 20 4.669167 2.269641 4.448333 5.002708 3.293843 0.000000 6.67 6.670000 -0.7503523 -0.7347600 0.5075071
q9_13 13 33 4.311818 2.287766 4.446667 4.446667 3.296314 0.000000 6.67 6.670000 -0.2178465 -1.5036429 0.3982489
q9_14 14 20 5.252625 1.242554 5.002500 5.315156 2.472236 3.335000 6.67 3.335000 -0.2193302 -1.2721324 0.2778435
q9_15 15 19 5.792368 1.020135 5.558333 5.885294 1.648157 3.335000 6.67 3.335000 -0.8003138 -0.4893958 0.2340351
q9_16 16 19 5.616842 1.870248 6.670000 5.885294 0.000000 0.000000 6.67 6.670000 -1.6694941 1.9532684 0.4290643
q9_17 17 2 4.446667 3.144268 4.446667 4.446667 3.296314 2.223333 6.67 4.446667 0.0000000 -2.7500000 2.2233333
q9_18 18 87 4.651188 2.107383 4.446667 4.947793 3.296314 0.000000 6.67 6.670000 -0.8286304 -0.2564302 0.2259352
q9_19 19 111 3.645465 2.431713 4.446667 3.722210 3.296314 0.000000 6.67 6.670000 -0.2103672 -1.2777108 0.2308080

Let’s look into the p-values among question 9 variations.

pval = printVariationWithinQuestions(indivQ,3)
knitr::kable(pval,digits=3)
q9_1 q9_2 q9_3 q9_4 q9_5 q9_6 q9_7 q9_8 q9_9 q9_10 q9_11 q9_12 q9_13 q9_14 q9_15 q9_16 q9_17 q9_18 q9_19
q9_1 1.000 0.000* 0.220 0.005* 0.052 0.214 0.205 0.988 0.077 0.559 0.038* 0.154 0.037* 0.430 0.894 0.881 0.669 0.070 0.001*
q9_2 1.000 0.001* 0.139 0.013* 0.002* 0.003* 0.000* 0.038* 0.000* 0.105 0.002* 0.002* 0.000* 0.000* 0.000* 0.597 0.000* 0.008*
q9_3 1.000 0.099 0.453 0.979 0.916 0.167 0.489 0.332 0.297 0.838 0.440 0.460 0.093 0.238 0.895 0.769 0.045*
q9_4 1.000 0.367 0.107 0.144 0.001* 0.432 0.003* 0.685 0.146 0.281 0.006* 0.000* 0.004* 0.775 0.065 0.968
q9_5 1.000 0.472 0.542 0.028* 0.985 0.060 0.701 0.582 0.947 0.100 0.011* 0.050 0.950 0.499 0.275
q9_6 1.000 0.937 0.162 0.506 0.321 0.311 0.860 0.461 0.445 0.092 0.231 0.901 0.797 0.052
q9_7 1.000 0.159 0.569 0.306 0.363 0.930 0.542 0.418 0.095 0.222 0.918 0.889 0.084
q9_8 1.000 0.053 0.471 0.024* 0.107 0.013* 0.332 0.849 0.873 0.671 0.019* 0.000*
q9_9 1.000 0.105 0.738 0.608 0.937 0.154 0.028* 0.080 0.945 0.554 0.368
q9_10 1.000 0.049* 0.220 0.028* 0.736 0.240 0.639 0.747 0.036* 0.000*
q9_11 1.000 0.385 0.636 0.074 0.013* 0.038* 0.862 0.313 0.655
q9_12 1.000 0.583 0.321 0.055 0.162 0.937 0.974 0.077
q9_13 1.000 0.058 0.002* 0.031* 0.962 0.462 0.153
q9_14 1.000 0.146 0.481 0.779 0.100 0.000*
q9_15 1.000 0.722 0.653 0.001* 0.000*
q9_16 1.000 0.691 0.056 0.000*
q9_17 1.000 0.942 0.780
q9_18 1.000 0.002*
q9_19 1.000

2.5.2 Question 5 on metric conversions

Question 5 is not only the lowest it also has a significant standard deviation

results = variationWithinQuestionTypes(m2,"q5_")
indivQ = results$indi
aveQ = results$ave

boxplot(indivQ)

knitr::kable(describe(indivQ))
vars n mean sd median trimmed mad min max range skew kurtosis se
q5_1 1 85 4.524882 1.950047 4.995 4.681304 2.468529 0 6.66 6.66 -0.5454566 -0.7852274 0.2115123
q5_2 2 86 3.213895 2.223411 3.330 3.187357 2.468529 0 6.66 6.66 0.3015116 -1.1688138 0.2397566
q5_3 3 95 3.522790 2.327748 3.330 3.567857 2.468529 0 6.66 6.66 -0.1115919 -1.3037016 0.2388219
q5_4 4 71 4.268028 2.119804 4.995 4.410789 2.468529 0 6.66 6.66 -0.3853848 -1.1129671 0.2515745

Let’s look into the p-values among question 5 variations.

pval = printVariationWithinQuestions(indivQ,3)
knitr::kable(pval,digits=3)
q5_1 q5_2 q5_3 q5_4
q5_1 1.000 0.000* 0.002* 0.436
q5_2 1.000 0.363 0.003*
q5_3 1.000 0.033*
q5_4 1.000

2.6 Variations within question groups among different groups of students

Will the big variation that we see in question5 be different if we look at the students who passed the milestone in their first attempt

2.6.1 Question 9 on isomerism passing on 1st attempt

Let’s look at question 9 performance among students who passed the milestone on their 1st attempt. We have to remove questions because they were not attempted or have too small sample This will also not allow us to have p values.

st100 = subset(m2,m2$score> 79.99 & m2$attempt == 1)
results = variationWithinQuestionTypes(st100,"q9_")
indivQ = results$indi
#remove q9_17 q9_1 as it was not attempted
indivQ$q9_17 = NULL
indivQ$q9_1 = NULL
aveQ = results$ave

boxplot(indivQ)

knitr::kable(describe(indivQ))
vars n mean sd median trimmed mad min max range skew kurtosis se
q9_2 1 3 5.187778 1.2836421 4.446667 5.187778 0 4.446667 6.67 2.223333 0.3849002 -2.3333333 0.7411111
q9_3 2 5 4.891333 2.4355396 6.670000 4.891333 0 2.223333 6.67 4.446667 -0.2921187 -2.2533333 1.0892064
q9_4 3 9 6.422963 0.7411111 6.670000 6.422963 0 4.446667 6.67 2.223333 -2.0740741 2.6296296 0.2470370
q9_5 4 8 5.280417 2.6407307 6.670000 5.280417 0 0.000000 6.67 6.670000 -1.0840627 -0.7195637 0.9336393
q9_6 5 4 6.670000 0.0000000 6.670000 6.670000 0 6.670000 6.67 0.000000 NaN NaN 0.0000000
q9_7 6 10 6.447667 0.7030797 6.670000 6.670000 0 4.446667 6.67 2.223333 -2.2768399 3.5700000 0.2223333
q9_8 7 10 6.503250 0.5273098 6.670000 6.670000 0 5.002500 6.67 1.667500 -2.2768399 3.5700000 0.1667500
q9_9 8 8 6.114167 1.5721341 6.670000 6.114167 0 2.223333 6.67 4.446667 -1.8561553 1.7031250 0.5558333
q9_10 9 7 6.193571 0.5942108 6.670000 6.193571 0 5.558333 6.67 1.111667 -0.2290811 -2.2040816 0.2245906
q9_11 10 4 6.114167 1.1116667 6.670000 6.114167 0 4.446667 6.67 2.223333 -0.7500000 -1.6875000 0.5558333
q9_12 11 6 6.299444 0.9076720 6.670000 6.299444 0 4.446667 6.67 2.223333 -1.3608276 -0.0833333 0.3705556
q9_13 12 13 5.472564 1.9504554 6.670000 5.659394 0 2.220000 6.67 4.450000 -0.9189644 -1.1160493 0.5409590
q9_14 13 5 6.336500 0.7457287 6.670000 6.336500 0 5.002500 6.67 1.667500 -1.0733126 -0.9200000 0.3335000
q9_15 14 7 6.193571 0.8746547 6.670000 6.193571 0 4.446667 6.67 2.223333 -1.0774380 -0.6242000 0.3305884
q9_16 15 6 6.299444 0.9076720 6.670000 6.299444 0 4.446667 6.67 2.223333 -1.3608276 -0.0833333 0.3705556
q9_18 16 48 5.234236 1.8049936 6.670000 5.447333 0 0.000000 6.67 6.670000 -0.9364927 -0.1702542 0.2605284
q9_19 17 51 5.144183 1.8596530 6.670000 5.422764 0 0.000000 6.67 6.670000 -1.0219086 0.2497358 0.2604035
#pval = printVariationWithinQuestions(indivQ,3)
#knitr::kable(pval,digits=3)

2.6.2 Question 9 on isomerism 2nd and 3rd attempt

st100 = subset(m2, m2$attempt > 1 )
results = variationWithinQuestionTypes(st100,"q9_")
indivQ = results$indi
#remove q9_17 q9_1 as it was not attempted
indivQ$q9_17 = NULL
indivQ$q9_18 = NULL
indivQ$q9_19 = NULL
aveQ = results$ave

boxplot(indivQ)

knitr::kable(describe(indivQ))
vars n mean sd median trimmed mad min max range skew kurtosis se
q9_1 1 14 5.717143 1.893459 6.670000 6.114167 0.0000000 0.000000 6.670000 6.670000 -1.9399650 3.0262130 0.5060481
q9_2 2 132 2.728636 2.450844 2.223333 2.579906 3.2963140 0.000000 6.670000 6.670000 0.2936334 -1.2908564 0.2133186
q9_3 3 14 5.717143 2.084630 6.670000 6.114167 0.0000000 0.000000 6.670000 6.670000 -1.7508066 1.6485778 0.5571407
q9_4 4 9 1.976296 1.738060 2.223333 1.976296 3.2963140 0.000000 4.446667 4.446667 0.1493123 -1.5356596 0.5793532
q9_5 5 10 3.335000 2.401474 3.335000 3.335000 1.6481570 0.000000 6.670000 6.670000 0.0000000 -1.4846939 0.7594129
q9_6 6 8 5.002500 2.301369 6.670000 5.002500 0.0000000 2.223333 6.670000 4.446667 -0.4226652 -2.0302083 0.8136567
q9_7 7 6 4.817222 2.599177 5.558333 4.817222 1.6481570 0.000000 6.670000 6.670000 -0.8808986 -0.9042732 1.0611096
q9_8 8 13 5.130769 2.497684 6.670000 5.457273 0.0000000 0.000000 6.670000 6.670000 -0.9434920 -0.9411600 0.6927330
q9_9 9 6 6.299444 0.907672 6.670000 6.299444 0.0000000 4.446667 6.670000 2.223333 -1.3608276 -0.0833333 0.3705556
q9_10 10 9 4.323148 1.296944 4.446667 4.323148 1.6481570 2.223333 6.670000 4.446667 0.1848612 -0.8789085 0.4323148
q9_11 11 9 3.211482 2.513230 2.223333 3.211482 3.2963140 0.000000 6.670000 6.670000 0.1215625 -1.5896520 0.8377434
q9_12 12 6 3.335000 2.331852 3.335000 3.335000 1.6481570 0.000000 6.670000 6.670000 0.0000000 -1.5709366 0.9519745
q9_13 13 9 4.199630 2.063166 4.446667 4.199630 3.2963140 2.223333 6.670000 4.446667 0.1819657 -1.9406097 0.6877220
q9_14 14 5 5.002500 1.667500 5.002500 5.002500 2.4722355 3.335000 6.670000 3.335000 0.0000000 -2.2000000 0.7457287
q9_15 15 6 5.743611 1.299589 6.114167 5.743611 0.8240785 3.335000 6.670000 3.335000 -0.8808986 -0.9042733 0.5305548
q9_16 16 6 5.928889 1.815344 6.670000 5.928889 0.0000000 2.223333 6.670000 4.446667 -1.3608276 -0.0833333 0.7411111

And p-values

pval = printVariationWithinQuestions(indivQ,3)
knitr::kable(pval,digits=3)
q9_1 q9_2 q9_3 q9_4 q9_5 q9_6 q9_7 q9_8 q9_9 q9_10 q9_11 q9_12 q9_13 q9_14 q9_15 q9_16
q9_1 1.000 0.000* 1.000 0.000* 0.019* 0.470 0.468 0.501 0.366 0.049* 0.023* 0.058 0.094 0.451 0.972 0.818
q9_2 1.000 0.000* 0.250 0.459 0.027* 0.107 0.005* 0.000* 0.006* 0.590 0.559 0.069 0.035* 0.001* 0.006*
q9_3 1.000 0.000* 0.021* 0.481 0.474 0.516 0.396 0.061 0.025* 0.060 0.104 0.463 0.973 0.824
q9_4 1.000 0.174 0.010* 0.047* 0.002* 0.000* 0.006* 0.245 0.255 0.025* 0.011* 0.000* 0.002*
q9_5 1.000 0.154 0.283 0.096 0.004* 0.277 0.914 1.000 0.410 0.145 0.021* 0.029*
q9_6 1.000 0.893 0.906 0.179 0.477 0.146 0.210 0.463 1.000 0.461 0.416
q9_7 1.000 0.810 0.234 0.680 0.261 0.323 0.637 0.890 0.459 0.413
q9_8 1.000 0.156 0.335 0.095 0.157 0.352 0.902 0.492 0.445
q9_9 1.000 0.004* 0.006* 0.025* 0.020* 0.171 0.413 0.668
q9_10 1.000 0.261 0.376 0.881 0.457 0.063 0.097
q9_11 1.000 0.924 0.376 0.138 0.025* 0.031*
q9_12 1.000 0.479 0.202 0.059 0.059
q9_13 1.000 0.447 0.099 0.113
q9_14 1.000 0.443 0.401
q9_15 1.000 0.843
q9_16 1.000

2.6.3 Question 5 on metric conversions passing on first attempt

st100 = subset(m2,m2$score> 79.99 & m2$attempt == 1)
results = variationWithinQuestionTypes(st100,"q5_")
indivQ = results$indi
aveQ = results$ave

boxplot(indivQ)

knitr::kable(describe(indivQ))
vars n mean sd median trimmed mad min max range skew kurtosis se
q5_1 1 28 5.708571 1.148959 6.660 5.827500 0.000000 3.33 6.66 3.33 -0.7319624 -0.7270408 0.2171329
q5_2 2 24 5.203125 1.498299 4.995 5.244750 2.468529 3.33 6.66 3.33 -0.2305026 -1.7759732 0.3058390
q5_3 3 27 5.365000 1.686211 6.660 5.574130 0.000000 0.00 6.66 6.66 -1.2862884 1.3693107 0.3245115
q5_4 4 23 5.863696 1.655099 6.660 6.221842 0.000000 0.00 6.66 6.66 -2.1818668 4.3766065 0.3451120

Let’s look into the p-values among question 5 variations.

pval = printVariationWithinQuestions(indivQ,3)
knitr::kable(pval,digits=3)
q5_1 q5_2 q5_3 q5_4
q5_1 1.000 0.185 0.384 0.706
q5_2 1.000 0.718 0.159
q5_3 1.000 0.298
q5_4 1.000

We see that the difference disappears, therefore question q5_2 and q5_3 are not unfair, rather, they discriminate among high performing and low performing students.

3 Milestone 3

The questions for Milestone 3:

  1. Organic nomenclature: Suffixes
  2. Organic nomenclature: Func Groups
  3. Peptides
  4. ID the interaction that breaks when melting (doubled)
  5. ID the major factor affecting melting
  6. Enthalpy of phase change
  7. Predict final T of mixture based on heat capacity
  8. Gas Trends
  9. Heating curves
  10. Isomerism
  11. NMR: number of peaks and number of sp2, sp3
  12. Transition among orbitals
  13. SI prefixes and mole conversion

3.1 Overall performance in Milestone 3

#this may be confusing, but to avoid typos, Im overriding m1df, theres no m3df
m1df = buildDF_fromMilestone(m3,13)
results = flowAndSettling(m1df)
flow = results$flow

pct1st = flow[,1]/sum(flow[,1])*100
pct2nd = flow[,2]/sum(flow[,2])*100
pct3rd = flow[,3]/sum(flow[,3])*100
stackAndGrouped = data.frame(matrix(ncol = 4,nrow=0))
stackAndGrouped = rbind(stackAndGrouped,c("F",pct1st[1],"F21","First"))
stackAndGrouped = rbind(stackAndGrouped,c("C",pct1st[2],"F21","First"))
stackAndGrouped = rbind(stackAndGrouped,c("A",pct1st[3],"F21","First"))
stackAndGrouped = rbind(stackAndGrouped,c("F",pct2nd[1],"F21","Second"))
stackAndGrouped = rbind(stackAndGrouped,c("C",pct2nd[2],"F21","Second"))
stackAndGrouped = rbind(stackAndGrouped,c("A",pct2nd[3],"F21","Second"))
stackAndGrouped = rbind(stackAndGrouped,c("F",pct3rd[1],"F21","Third"))
stackAndGrouped = rbind(stackAndGrouped,c("C",pct3rd[2],"F21","Third"))
stackAndGrouped = rbind(stackAndGrouped,c("A",pct3rd[3],"F21","Third"))

pct1st = paste(signif( pct1st,digits = 4),"%",sep = "")
pct2nd = paste(signif( pct2nd,digits = 4),"%",sep = "")
pct3rd = paste(signif( pct3rd,digits = 4),"%",sep = "")
flow$`After 1st` = paste( flow$`After 1st`, pct1st,sep = " / ")
flow$`After 2nd` = paste( flow$`After 2nd`, pct2nd,sep = " / ")
flow$`After 3rd` = paste( flow$`After 3rd`, pct3rd,sep = " / ")
scores = c("0","80","100")
flow = cbind(scores,flow)
knitr::kable(flow)
scores After 1st After 2nd After 3rd
0 43 / 22.05% 26 / 13.33% 12 / 6.154%
80 30 / 15.38% 28 / 14.36% 27 / 13.85%
100 122 / 62.56% 141 / 72.31% 156 / 80%
row.names(results$flow) = c("0","80","100")
barplot(as.matrix(results$flow),col=c("red","yellow","green"),main = "Milestone 3: students scores best of 3 attempts",legend = rownames(results$flow))

3.1.1 Analyzing students who settled

Some students will not maximize the number of attempts even if they have not achieved the highest >80 score.

results = results$settle
n1_70 = length( results[results$MaxAttempt == 1 & results$MaxScore <70,]$MaxAttempt)
n1_80 = length( results[results$MaxAttempt == 1 & results$MaxScore >69.99,]$MaxAttempt)
n2_70 = length( results[results$MaxAttempt == 2 & results$MaxScore <70,]$MaxAttempt)
n2_80 = length( results[results$MaxAttempt == 2 & results$MaxScore >69.99,]$MaxAttempt)
# Students settling before 3rd attempt with Score < 70 (failing) 70< Score < 80
Settled in 1st attempt 4 7
Settled in 2nd attempt 3 7
Total 7 14

3.1.2 Comparing with Fall 2019

m1df19 = buildDF_fromMilestone(m3f19,12)
results19 = flowAndSettling(m1df19)
flow19 = results19$flow
pct1st = flow19[,1]/sum(flow19[,1])*100
pct2nd = flow19[,2]/sum(flow19[,2])*100
pct3rd = flow19[,3]/sum(flow19[,3])*100

stackAndGrouped = rbind(stackAndGrouped,c("F",pct1st[1],"F19","First"))
stackAndGrouped = rbind(stackAndGrouped,c("C",pct1st[2],"F19","First"))
stackAndGrouped = rbind(stackAndGrouped,c("A",pct1st[3],"F19","First"))
stackAndGrouped = rbind(stackAndGrouped,c("F",pct2nd[1],"F19","Second"))
stackAndGrouped = rbind(stackAndGrouped,c("C",pct2nd[2],"F19","Second"))
stackAndGrouped = rbind(stackAndGrouped,c("A",pct2nd[3],"F19","Second"))
stackAndGrouped = rbind(stackAndGrouped,c("F",pct3rd[1],"F19","Third"))
stackAndGrouped = rbind(stackAndGrouped,c("C",pct3rd[2],"F19","Third"))
stackAndGrouped = rbind(stackAndGrouped,c("A",pct3rd[3],"F19","Third"))
colnames(stackAndGrouped) = c("Grade","Pct","Year","Attempt")
stackAndGrouped$Pct = as.numeric(stackAndGrouped$Pct)

library(ggplot2)
ggplot() + geom_bar(data=stackAndGrouped, aes(y = Pct, x = Year, fill = Grade), stat="identity",
           position='stack') +
  theme_bw() + 
  scale_fill_manual(values = c("green","yellow","red" )) +
  facet_grid( ~ Attempt)

pct1st = paste(signif( pct1st,digits = 4),"%",sep = "")
pct2nd = paste(signif( pct2nd,digits = 4),"%",sep = "")
pct3rd = paste(signif( pct3rd,digits = 4),"%",sep = "")

flow19$`After 1st` = paste( flow19$`After 1st`, pct1st,sep = " / ")
flow19$`After 2nd` = paste( flow19$`After 2nd`, pct2nd,sep = " / ")
flow19$`After 3rd` = paste( flow19$`After 3rd`, pct3rd,sep = " / ")

colnames(flow19) = c("1st att F19","2nd att F19","3rd att F19")
scores = c("0","80","100")
flow19 = cbind(scores,flow19)
mergedFlow =  merge(flow,flow19,by="scores")
mergedFlow$scores = as.numeric(mergedFlow$scores)
knitr::kable(mergedFlow[order(mergedFlow$scores),] )
scores After 1st After 2nd After 3rd 1st att F19 2nd att F19 3rd att F19
1 0 43 / 22.05% 26 / 13.33% 12 / 6.154% 11 / 6.011% 3 / 1.639% 3 / 1.639%
3 80 30 / 15.38% 28 / 14.36% 27 / 13.85% 21 / 11.48% 14 / 7.65% 9 / 4.918%
2 100 122 / 62.56% 141 / 72.31% 156 / 80% 151 / 82.51% 166 / 90.71% 171 / 93.44%
  • Students settling in F19
results = results19$settle
n1_70_19 = length( results[results$MaxAttempt == 1 & results$MaxScore <70,]$MaxAttempt)
n1_80_19 = length( results[results$MaxAttempt == 1 & results$MaxScore >69.99,]$MaxAttempt)
n2_70_19 = length( results[results$MaxAttempt == 2 & results$MaxScore <70,]$MaxAttempt)
n2_80_19 = length( results[results$MaxAttempt == 2 & results$MaxScore >69.99,]$MaxAttempt)
# Students settling before 3rd attempt (F21 / F19) Score < 70 (F21) Score < 70 (F19) 70< Score < 80 (F21) 70< Score < 80 (F19)
Settled in 1st attempt 4 2 7 3
Settled in 2nd attempt 3 0 7 2
Total 7 2 14 5

3.2 M3 - Attempt 1 averages

# First attempt stats
results = buildStats(m1df,13,1)
m1dfat1 = results$ave
m1dfat1sd = results$sd
Attempt Total students Sudents with score > 80 Sudents with score 70-80 Students with score < 70%
One 195 122 (63%) 30 (15%) 43 (22%)

Table of averages per question and their standard deviation

library(knitr)
knitr::kable(m1dfat1,digits=2)
q1 q2 q3 q4 q5 q6 q7 q8 q9 q10 q11 q12 q13
Avg > 80 7.46 7.50 7.01 6.60 6.73 6.73 6.54 7.47 6.55 6.96 7.03 7.48 6.54
Avg 70><80 6.26 7.19 5.19 5.99 4.79 5.44 4.88 6.78 5.29 5.32 6.49 6.84 3.86
Avg < 70 4.05 4.38 4.24 4.51 2.75 4.33 4.54 5.44 4.48 4.51 5.71 5.79 3.09
knitr::kable(m1dfat1sd,digits=2)
q1 q2 q3 q4 q5 q6 q7 q8 q9 q10 q11 q12 q13
SD > 80 0.69 0.43 1.47 1.51 1.52 1.42 1.55 0.55 1.53 1.42 0.76 0.53 1.58
SD 70><80 1.71 1.19 2.73 1.36 2.31 1.44 2.06 1.30 1.93 2.67 0.99 1.18 2.31
SD < 70 2.30 2.42 2.52 1.89 2.40 2.05 2.23 1.86 1.77 2.39 1.57 2.19 2.19
matplot(t(m1dfat1),type="b",ylim=c(0,9),xlab = "Question number",ylab = "Average score/6.33",main="M3: Average score per question on 1st attempt")   
legend("bottomright",legend = c("1:>80","2:70-80","3:<70"))

3.3 M3 - Attempt 2 averages

results = buildStats(m1df,13,2)
m1dfat1 = results$ave
m1dfat1sd = results$sd
Attempt Total students Sudents with score > 80 Sudents with score 70-80 Students with score < 70%
Two 60 19 (32%) 17 (28%) 24 (40%)

Table of averages per question and their standard deviation

library(knitr)
knitr::kable(m1dfat1,digits=2)
q1 q2 q3 q4 q5 q6 q7 q8 q9 q10 q11 q12 q13
Avg > 80 7.05 7.36 6.20 5.74 6.08 5.75 6.62 7.16 6.57 6.93 6.76 7.30 5.70
Avg 70><80 7.07 7.15 4.47 5.89 4.83 5.34 5.13 6.97 4.80 5.36 6.39 6.48 4.58
Avg < 70 4.81 6.02 4.75 5.40 3.32 4.75 4.38 6.46 4.24 4.49 5.48 6.02 2.22
knitr::kable(m1dfat1sd,digits=2)
q1 q2 q3 q4 q5 q6 q7 q8 q9 q10 q11 q12 q13
SD > 80 0.93 0.57 2.27 1.56 1.95 2.11 1.56 0.76 1.13 1.15 0.94 0.71 2.00
SD 70><80 1.10 1.17 2.76 1.93 2.25 2.19 1.81 0.94 2.24 2.56 1.17 1.65 2.33
SD < 70 2.42 1.71 2.57 1.77 2.33 2.13 1.93 1.52 1.99 3.01 1.23 2.15 1.91
matplot(t(m1dfat1),type="b",ylim=c(0,9),xlab = "Question number",ylab = "Average score/6.33",main="M3: Average score per question on 2md attempt")   
legend("bottomright",legend = c("1:>80","2:70-80","3:<70"))

3.4 M3 - Attempt 3 averages

results = buildStats(m1df,13,3)
m1dfat1 = results$ave
m1dfat1sd = results$sd
Attempt Total students Sudents with score > 80 Sudents with score 70-80 Students with score < 70%
Three 33 15 (45%) 13 (39%) 5 (15%)

Table of averages per question and their standard deviation

library(knitr)
knitr::kable(m1dfat1,digits=2)
q1 q2 q3 q4 q5 q6 q7 q8 q9 q10 q11 q12 q13
Avg > 80 7.19 7.40 5.83 6.33 7.19 6.37 5.65 7.30 6.47 7.43 6.33 6.84 5.45
Avg 70><80 6.04 7.02 5.85 5.23 3.95 6.28 6.12 6.72 5.33 6.53 5.32 6.28 3.65
Avg < 70 4.93 5.17 4.56 4.11 1.54 4.93 5.13 4.94 4.00 3.04 5.62 5.32 3.80
knitr::kable(m1dfat1sd,digits=2)
q1 q2 q3 q4 q5 q6 q7 q8 q9 q10 q11 q12 q13
SD > 80 1.26 0.53 1.96 1.64 1.06 1.73 1.74 0.85 1.04 0.65 0.80 1.20 2.47
SD 70><80 1.93 0.99 2.51 1.61 1.69 1.17 1.97 1.06 1.62 1.92 1.64 1.62 2.39
SD < 70 2.01 3.15 3.18 1.07 2.30 1.29 2.57 2.37 1.76 1.92 1.38 1.59 3.00
matplot(t(m1dfat1),type="b",ylim=c(0,9),xlab = "Question number",ylab = "Average score/6.33",main="M3: Average score per question on 3rd attempt")   
legend("bottomright",legend = c("1:>80","2:70-80","3:<70"))

3.5 Variation within question groups

3.5.1 Adding one new question on Question 4 on “What Breaks”

Question 4 contained one new question that no one had seen before and that it included the “tricky” sodium carboxylate. We can look into it and see whether or not scores are significantly different among questions (the second question q4_2 was the one everyone took).

This single question was kept there, so the fect that some students did not get it right the second and third attempt also tell us how much they review their previous attempts.

results = variationWithinQuestionTypes(m3,"q4_")
indivQ = results$indi
aveQ = results$ave

boxplot(indivQ)

knitr::kable(describe(indivQ))
vars n mean sd median trimmed mad min max range skew kurtosis se
q4_1 1 69 6.305072 1.729340 6.416667 6.551754 1.90267 1.283333 7.7 6.416667 -1.0527392 -0.0086944 0.2081880
q4_2 2 288 5.084317 1.691476 5.133333 5.094612 1.90267 1.283333 7.7 6.416667 -0.0119117 -0.9826020 0.0996712
q4_3 3 61 5.995902 1.810688 6.416667 6.207143 1.90267 0.000000 7.7 7.700000 -1.0293704 0.6111612 0.2318348
q4_4 4 61 6.837432 1.439038 7.700000 7.123810 0.00000 2.566667 7.7 5.133333 -1.5621702 1.5296159 0.1842499
q4_5 5 52 6.416667 1.481866 6.416667 6.661111 1.90267 1.283333 7.7 6.416667 -1.2740566 1.5000000 0.2054978

Let’s look into the p-values among question 4 variations.

pval = printVariationWithinQuestions(indivQ,3)
knitr::kable(pval,digits=3)
q4_1 q4_2 q4_3 q4_4 q4_5
q4_1 1.000 0.000* 0.323 0.058 0.704
q4_2 1.000 0.001* 0.000* 0.000*
q4_3 1.000 0.005* 0.177
q4_4 1.000 0.130
q4_5 1.000

We can clearly see that q4_2 is significantly lower.

3.5.2 Question 5 on deciding what factor decides the outcome

Question 5 is the lowest for the low performers.

results = variationWithinQuestionTypes(m3,"q5_")
indivQ = results$indi
aveQ = results$ave

boxplot(indivQ)

knitr::kable(describe(indivQ))
vars n mean sd median trimmed mad min max range skew kurtosis se
q5_1 1 45 5.475556 2.768714 7.700000 5.827027 0.00000 0.000000 7.7 7.700000 -0.8955661 -0.6120356 0.4127355
q5_2 2 60 6.031667 2.354524 7.700000 6.523611 0.00000 0.000000 7.7 7.700000 -1.3753759 0.9725644 0.3039677
q5_3 3 50 5.800667 1.995556 5.133333 6.031667 3.80534 0.000000 7.7 7.700000 -0.7226793 -0.2403807 0.2822143
q5_4 4 52 5.034615 2.639196 5.133333 5.316667 3.80534 0.000000 7.7 7.700000 -0.4559226 -1.1153820 0.3659907
q5_6 5 45 5.561111 1.641646 5.133333 5.618919 1.90267 2.566667 7.7 5.133333 -0.0495421 -1.2184728 0.2447221
q5_5 6 42 3.850000 2.791569 5.133333 3.850000 3.80534 0.000000 7.7 7.700000 -0.0555182 -1.3388992 0.4307485
q5_7 7 39 5.528205 2.602782 7.700000 5.833333 0.00000 0.000000 7.7 7.700000 -0.7420168 -0.8420089 0.4167787

Let’s look into the p-values among question 5 variations.

pval = printVariationWithinQuestions(indivQ,3)
knitr::kable(pval,digits=3)
q5_1 q5_2 q5_3 q5_4 q5_6 q5_5 q5_7
q5_1 1.000 0.281 0.517 0.426 0.859 0.008* 0.929
q5_2 1.000 0.579 0.039* 0.231 0.000* 0.332
q5_3 1.000 0.101 0.523 0.000* 0.590
q5_4 1.000 0.235 0.039* 0.376
q5_6 1.000 0.001* 0.946
q5_5 1.000 0.006*
q5_7 1.000

3.6 Variations within question groups among different groups of students

Will the big variation that we see in question 4 be different if we look at the students who passed the milestone in their first attempt

3.6.1 Question 4 on “what breaks” passing on 1st attempt

Let’s look at question 4 performance among students who passed the milestone on their 1st attempt. We have to remove questions because they were not attempted or have too small sample This will also not allow us to have p values.

st100 = subset(m3,m3$score> 79.99 & m3$attempt == 1)
results = variationWithinQuestionTypes(st100,"q4_")
indivQ = results$indi
#remove q9_17 q9_1 as it was not attempted
#indivQ$q9_17 = NULL
#indivQ$q9_1 = NULL
aveQ = results$ave

boxplot(indivQ)

knitr::kable(describe(indivQ))
vars n mean sd median trimmed mad min max range skew kurtosis se
q4_1 1 25 7.238000 0.8183871 7.700000 7.394444 0.00000 5.133333 7.7 2.566667 -1.4333383 0.7753774 0.1636774
q4_2 2 122 5.775000 1.7402506 6.416667 5.932143 1.90267 1.283333 7.7 6.416667 -0.4930767 -0.9452706 0.1575549
q4_3 3 26 6.959615 1.3174316 7.700000 7.233333 0.00000 2.566667 7.7 5.133333 -1.9343090 3.1626093 0.2583696
q4_4 4 24 7.432639 0.7548951 7.700000 7.635833 0.00000 5.133333 7.7 2.566667 -2.4174721 4.3227775 0.1540923
q4_5 5 21 7.088889 0.7720343 7.700000 7.171569 0.00000 5.133333 7.7 2.566667 -0.7419630 -0.5964347 0.1684717
#pval = printVariationWithinQuestions(indivQ,3)
#knitr::kable(pval,digits=3)

3.6.2 Question 4 on “What breaks” 2nd and 3rd attempt

This is proof that students who don’t do well in Milestones do not revise their own attempt because q4_2 keeps showing up and they keep answering it wrong.

st100 = subset(m3, m3$attempt > 1 )
results = variationWithinQuestionTypes(st100,"q4_")
indivQ = results$indi
#remove q9_17 q9_1 as it was not attempted
#indivQ$q9_17 = NULL
#indivQ$q9_18 = NULL
#indivQ$q9_19 = NULL
aveQ = results$ave

boxplot(indivQ)

knitr::kable(describe(indivQ))
vars n mean sd median trimmed mad min max range skew kurtosis se
q4_1 1 27 6.274074 1.682072 6.416667 6.472464 1.90267 2.566667 7.7 5.133333 -0.8894264 -0.5195975 0.3237149
q4_2 2 93 4.553763 1.484072 3.850000 4.517333 1.90267 1.283333 7.7 6.416667 0.2173147 -0.4985959 0.1538911
q4_3 3 18 5.846296 1.474584 6.416667 5.855208 1.90267 3.850000 7.7 3.850000 -0.2378145 -1.4971689 0.3475627
q4_4 4 21 6.905556 1.374796 7.700000 7.171569 0.00000 2.566667 7.7 5.133333 -1.6807250 2.1954419 0.3000051
q4_5 5 18 6.202778 1.476408 6.416667 6.336458 1.90267 2.566667 7.7 5.133333 -0.5716151 -0.4377183 0.3479926

And p-values

pval = printVariationWithinQuestions(indivQ,3)
knitr::kable(pval,digits=3)
q4_1 q4_2 q4_3 q4_4 q4_5
q4_1 1.000 0.000* 0.373 0.159 0.882
q4_2 1.000 0.002* 0.000* 0.000*
q4_3 1.000 0.027* 0.474
q4_4 1.000 0.135
q4_5 1.000

3.6.3 Question 5 on “conflicting factors” passing on first attempt

st100 = subset(m3,m3$score> 79.99 & m3$attempt == 1)
results = variationWithinQuestionTypes(st100,"q5_")
indivQ = results$indi
aveQ = results$ave

boxplot(indivQ)

knitr::kable(describe(indivQ))
vars n mean sd median trimmed mad min max range skew kurtosis se
q5_1 1 16 6.897917 1.545338 7.700000 7.150000 0.00000 2.566667 7.7 5.133333 -1.5503911 1.2301065 0.3863345
q5_2 2 27 7.224691 1.016008 7.700000 7.365217 0.00000 5.133333 7.7 2.566667 -1.5316762 0.3635615 0.1955309
q5_3 3 25 6.776000 1.459468 7.700000 6.966667 0.00000 2.566667 7.7 5.133333 -1.1863254 0.3263893 0.2918937
q5_4 4 21 6.722222 1.717211 7.700000 7.096078 0.00000 2.566667 7.7 5.133333 -1.3686912 0.4808129 0.3747262
q5_6 5 26 5.873718 1.709218 6.416667 6.008333 1.90267 2.566667 7.7 5.133333 -0.4028562 -1.1583659 0.3352052
q5_5 6 13 6.317949 1.694578 7.700000 6.533333 0.00000 2.566667 7.7 5.133333 -0.6737567 -0.8226490 0.4699914
q5_7 7 20 7.058333 1.411974 7.700000 7.379167 0.00000 2.566667 7.7 5.133333 -1.9146034 2.6726134 0.3157269

Let’s look into the p-values among question 5 variations.

pval = printVariationWithinQuestions(indivQ,3)
knitr::kable(pval,digits=3)
q5_1 q5_2 q5_3 q5_4 q5_6 q5_5 q5_7
q5_1 1.000 0.458 0.803 0.746 0.053 0.350 0.750
q5_2 1.000 0.208 0.244 0.001* 0.094 0.657
q5_3 1.000 0.910 0.048* 0.417 0.515
q5_4 1.000 0.099 0.507 0.497
q5_6 1.000 0.449 0.014*
q5_5 1.000 0.204
q5_7 1.000

4 Milestone 4

Questions

  1. Heat Curves
  2. Ions in water
  3. Solubility ranking
  4. Solution preparation
  5. Vapor pressure
  6. Major factor in melting point
  7. What breaks when melting
  8. NMR signals
  9. Isomerism
  10. Transition in a Bohr model of atom
  11. Electrostatics
  12. Isotopes

4.1 Overall performance in Milestone 4

#this may be confusing, but to avoid typos, Im overriding m1df, theres no m4df
m1df = buildDF_fromMilestone(m4,12)
results = flowAndSettling(m1df)
flow = results$flow

pct1st = flow[,1]/sum(flow[,1])*100
pct2nd = flow[,2]/sum(flow[,2])*100
pct3rd = flow[,3]/sum(flow[,3])*100
stackAndGrouped = data.frame(matrix(ncol = 4,nrow=0))
stackAndGrouped = rbind(stackAndGrouped,c("F",pct1st[1],"F21","First"))
stackAndGrouped = rbind(stackAndGrouped,c("C",pct1st[2],"F21","First"))
stackAndGrouped = rbind(stackAndGrouped,c("A",pct1st[3],"F21","First"))
stackAndGrouped = rbind(stackAndGrouped,c("F",pct2nd[1],"F21","Second"))
stackAndGrouped = rbind(stackAndGrouped,c("C",pct2nd[2],"F21","Second"))
stackAndGrouped = rbind(stackAndGrouped,c("A",pct2nd[3],"F21","Second"))
stackAndGrouped = rbind(stackAndGrouped,c("F",pct3rd[1],"F21","Third"))
stackAndGrouped = rbind(stackAndGrouped,c("C",pct3rd[2],"F21","Third"))
stackAndGrouped = rbind(stackAndGrouped,c("A",pct3rd[3],"F21","Third"))

pct1st = paste(signif( pct1st,digits = 4),"%",sep = "")
pct2nd = paste(signif( pct2nd,digits = 4),"%",sep = "")
pct3rd = paste(signif( pct3rd,digits = 4),"%",sep = "")
flow$`After 1st` = paste( flow$`After 1st`, pct1st,sep = " / ")
flow$`After 2nd` = paste( flow$`After 2nd`, pct2nd,sep = " / ")
flow$`After 3rd` = paste( flow$`After 3rd`, pct3rd,sep = " / ")
scores = c("0","80","100")
flow = cbind(scores,flow)
knitr::kable(flow)
scores After 1st After 2nd After 3rd
0 33 / 17.65% 19 / 10.16% 12 / 6.417%
80 38 / 20.32% 29 / 15.51% 26 / 13.9%
100 116 / 62.03% 139 / 74.33% 149 / 79.68%
row.names(results$flow) = c("0","80","100")
barplot(as.matrix(results$flow),col=c("red","yellow","green"),main = "Milestone 4: students scores best of 3 attempts",legend = rownames(results$flow))

4.1.1 Analyzing students who settled

Some students will not maximize the number of attempts even if they have not achieved the highest >80 score.

results = results$settle
n1_70 = length( results[results$MaxAttempt == 1 & results$MaxScore <70,]$MaxAttempt)
n1_80 = length( results[results$MaxAttempt == 1 & results$MaxScore >69.99,]$MaxAttempt)
n2_70 = length( results[results$MaxAttempt == 2 & results$MaxScore <70,]$MaxAttempt)
n2_80 = length( results[results$MaxAttempt == 2 & results$MaxScore >69.99,]$MaxAttempt)
# Students settling before 3rd attempt with Score < 70 (failing) 70< Score < 80
Settled in 1st attempt 3 11
Settled in 2nd attempt 4 7
Total 7 18

4.1.2 Comparing with Fall 2019

m1df19 = buildDF_fromMilestone(m4f19,12)
results19 = flowAndSettling(m1df19)
flow19 = results19$flow
pct1st = flow19[,1]/sum(flow19[,1])*100
pct2nd = flow19[,2]/sum(flow19[,2])*100
pct3rd = flow19[,3]/sum(flow19[,3])*100

stackAndGrouped = rbind(stackAndGrouped,c("F",pct1st[1],"F19","First"))
stackAndGrouped = rbind(stackAndGrouped,c("C",pct1st[2],"F19","First"))
stackAndGrouped = rbind(stackAndGrouped,c("A",pct1st[3],"F19","First"))
stackAndGrouped = rbind(stackAndGrouped,c("F",pct2nd[1],"F19","Second"))
stackAndGrouped = rbind(stackAndGrouped,c("C",pct2nd[2],"F19","Second"))
stackAndGrouped = rbind(stackAndGrouped,c("A",pct2nd[3],"F19","Second"))
stackAndGrouped = rbind(stackAndGrouped,c("F",pct3rd[1],"F19","Third"))
stackAndGrouped = rbind(stackAndGrouped,c("C",pct3rd[2],"F19","Third"))
stackAndGrouped = rbind(stackAndGrouped,c("A",pct3rd[3],"F19","Third"))
colnames(stackAndGrouped) = c("Grade","Pct","Year","Attempt")
stackAndGrouped$Pct = as.numeric(stackAndGrouped$Pct)

library(ggplot2)
ggplot() + geom_bar(data=stackAndGrouped, aes(y = Pct, x = Year, fill = Grade), stat="identity",
           position='stack') +
  theme_bw() + 
  scale_fill_manual(values = c("green","yellow","red" )) +
  facet_grid( ~ Attempt)

pct1st = paste(signif( pct1st,digits = 4),"%",sep = "")
pct2nd = paste(signif( pct2nd,digits = 4),"%",sep = "")
pct3rd = paste(signif( pct3rd,digits = 4),"%",sep = "")

flow19$`After 1st` = paste( flow19$`After 1st`, pct1st,sep = " / ")
flow19$`After 2nd` = paste( flow19$`After 2nd`, pct2nd,sep = " / ")
flow19$`After 3rd` = paste( flow19$`After 3rd`, pct3rd,sep = " / ")

colnames(flow19) = c("1st att F19","2nd att F19","3rd att F19")
scores = c("0","80","100")
flow19 = cbind(scores,flow19)
mergedFlow =  merge(flow,flow19,by="scores")
mergedFlow$scores = as.numeric(mergedFlow$scores)
knitr::kable(mergedFlow[order(mergedFlow$scores),] )
scores After 1st After 2nd After 3rd 1st att F19 2nd att F19 3rd att F19
1 0 33 / 17.65% 19 / 10.16% 12 / 6.417% 44 / 24.86% 22 / 12.43% 14 / 7.91%
3 80 38 / 20.32% 29 / 15.51% 26 / 13.9% 37 / 20.9% 30 / 16.95% 22 / 12.43%
2 100 116 / 62.03% 139 / 74.33% 149 / 79.68% 96 / 54.24% 125 / 70.62% 141 / 79.66%
  • Students settling in F19
results = results19$settle
n1_70_19 = length( results[results$MaxAttempt == 1 & results$MaxScore <70,]$MaxAttempt)
n1_80_19 = length( results[results$MaxAttempt == 1 & results$MaxScore >69.99,]$MaxAttempt)
n2_70_19 = length( results[results$MaxAttempt == 2 & results$MaxScore <70,]$MaxAttempt)
n2_80_19 = length( results[results$MaxAttempt == 2 & results$MaxScore >69.99,]$MaxAttempt)
# Students settling before 3rd attempt (F21 / F19) Score < 70 (F21) Score < 70 (F19) 70< Score < 80 (F21) 70< Score < 80 (F19)
Settled in 1st attempt 3 1 11 4
Settled in 2nd attempt 4 4 7 5
Total 7 5 18 9

4.1.3 Dimensional analysis questions in M4-F19

In Fall2019 there were two dimensional analysis questions, q4 and q6

# First attempt stats
results = buildStats(m1df19,12,1)
m1dfat1 = results$ave
m1dfat1sd = results$sd
Attempt Total students Sudents with score > 80 Sudents with score 70-80 Students with score < 70%
One 177 96 (54%) 37 (21%) 44 (25%)

Table of averages per question and their standard deviation

library(knitr)
knitr::kable(m1dfat1,digits=2)
q1 q2 q3 q4 q5 q6 q7 q8 q9 q10 q11 q12
Avg > 80 7.82 7.41 7.23 6.08 7.29 7.81 7.32 7.75 7.94 8.00 8.05 7.35
Avg 70><80 6.76 5.71 5.90 2.70 6.12 5.63 6.68 7.43 7.54 7.73 7.74 6.05
Avg < 70 6.07 3.29 3.92 0.76 4.86 1.70 5.55 6.97 6.91 6.99 6.06 5.46
knitr::kable(m1dfat1sd,digits=2)
q1 q2 q3 q4 q5 q6 q7 q8 q9 q10 q11 q12
SD > 80 0.93 1.49 2.27 3.73 1.57 2.03 1.85 0.77 0.87 1.01 0.62 1.40
SD 70><80 1.51 2.07 3.01 3.96 1.75 3.95 2.22 0.86 1.14 1.19 1.26 1.97
SD < 70 1.72 2.89 2.86 2.43 2.11 3.40 2.47 1.12 1.78 1.75 2.30 1.94
matplot(t(m1dfat1),type="b",ylim=c(0,9),xlab = "Question number",ylab = "Average score/6.33",main="M4-F19: Average score per question on 1st attempt")   
legend("bottomright",legend = c("1:>80","2:70-80","3:<70"))

4.2 M4 - Attempt 1 averages

# First attempt stats
results = buildStats(m1df,12,1)
m1dfat1 = results$ave
m1dfat1sd = results$sd
Attempt Total students Sudents with score > 80 Sudents with score 70-80 Students with score < 70%
One 187 116 (62%) 39 (21%) 32 (17%)

Table of averages per question and their standard deviation

library(knitr)
knitr::kable(m1dfat1,digits=2)
q1 q2 q3 q4 q5 q6 q7 q8 q9 q10 q11 q12
Avg > 80 7.54 7.33 7.44 5.46 7.74 7.88 7.79 7.88 7.65 7.76 7.96 8.21
Avg 70><80 7.16 6.42 5.31 1.50 6.76 6.34 6.94 7.33 7.08 5.55 7.13 7.90
Avg < 70 6.21 2.08 3.52 1.04 5.12 4.25 6.72 6.35 5.16 4.95 6.66 7.25
knitr::kable(m1dfat1sd,digits=2)
q1 q2 q3 q4 q5 q6 q7 q8 q9 q10 q11 q12
SD > 80 1.23 1.66 1.98 3.98 1.17 1.15 0.96 0.70 1.81 1.89 0.93 0.43
SD 70><80 1.48 2.79 2.99 3.24 1.89 2.54 1.39 1.10 1.90 3.31 1.48 1.01
SD < 70 1.87 2.64 2.95 2.80 2.35 3.15 1.58 1.58 3.08 3.51 1.64 1.99
matplot(t(m1dfat1),type="b",ylim=c(0,9),xlab = "Question number",ylab = "Average score/6.33",main="M4: Average score per question on 1st attempt")   
legend("bottomright",legend = c("1:>80","2:70-80","3:<70"))

4.3 M4 - Attempt 2 averages

results = buildStats(m1df,12,2)
m1dfat1 = results$ave
m1dfat1sd = results$sd
Attempt Total students Sudents with score > 80 Sudents with score 70-80 Students with score < 70%
Two 57 23 (40%) 15 (26%) 19 (33%)

Table of averages per question and their standard deviation

library(knitr)
knitr::kable(m1dfat1,digits=2)
q1 q2 q3 q4 q5 q6 q7 q8 q9 q10 q11 q12
Avg > 80 7.55 7.13 7.92 4.35 7.48 7.24 7.06 7.68 7.61 7.00 7.97 8.09
Avg 70><80 6.67 5.00 5.65 0.00 7.59 7.22 7.31 6.72 7.40 6.85 7.22 8.24
Avg < 70 6.22 2.63 3.07 0.00 5.33 2.92 5.85 6.44 6.36 4.97 6.84 7.67
knitr::kable(m1dfat1sd,digits=2)
q1 q2 q3 q4 q5 q6 q7 q8 q9 q10 q11 q12
SD > 80 1.01 2.02 1.29 4.26 1.66 1.62 1.44 0.83 1.43 2.76 0.86 0.68
SD 70><80 1.41 2.62 3.08 0.00 1.16 1.41 1.33 1.53 1.35 2.54 2.24 0.36
SD < 70 1.87 2.85 3.02 0.00 2.23 2.35 2.29 1.14 2.86 3.41 1.66 1.69
matplot(t(m1dfat1),type="b",ylim=c(0,9),xlab = "Question number",ylab = "Average score/6.33",main="M4: Average score per question on 2md attempt")   
legend("bottomright",legend = c("1:>80","2:70-80","3:<70"))

4.4 M4 - Attempt 3 averages

results = buildStats(m1df,12,3)
m1dfat1 = results$ave
m1dfat1sd = results$sd
Attempt Total students Sudents with score > 80 Sudents with score 70-80 Students with score < 70%
Three 23 10 (43%) 7 (30%) 6 (26%)

Table of averages per question and their standard deviation

library(knitr)
knitr::kable(m1dfat1,digits=2)
q1 q2 q3 q4 q5 q6 q7 q8 q9 q10 q11 q12
Avg > 80 7.92 7.78 8.34 2.50 7.91 6.94 7.64 6.91 7.77 7.77 8.33 8.33
Avg 70><80 7.35 5.96 5.96 2.38 7.54 5.55 7.34 6.90 5.55 7.14 7.38 7.34
Avg < 70 5.56 2.78 5.56 0.00 4.63 2.78 6.48 6.94 6.02 5.55 6.66 6.48
knitr::kable(m1dfat1sd,digits=2)
q1 q2 q3 q4 q5 q6 q7 q8 q9 q10 q11 q12
SD > 80 0.94 1.17 0.00 4.03 0.94 2.36 1.35 1.84 1.17 1.76 0.00 0.00
SD 70><80 1.05 3.38 2.97 4.07 1.35 2.27 1.05 1.42 3.21 3.15 1.31 2.08
SD < 70 1.52 3.52 3.05 0.00 2.27 3.04 0.72 1.25 3.25 3.51 2.79 1.90
matplot(t(m1dfat1),type="b",ylim=c(0,9),xlab = "Question number",ylab = "Average score/6.33",main="M4: Average score per question on 3rd attempt")   
legend("bottomright",legend = c("1:>80","2:70-80","3:<70"))

4.5 Variation within question groups

4.5.1 Question 3 on “solubility ranking”

While this one is a straightforward question, a single mistake will trigger the rest of the ranking wrong. This is why low performers perform significantly lower on this one.

results = variationWithinQuestionTypes(m4,"q3_")
indivQ = results$indi
aveQ = results$ave

boxplot(indivQ)

knitr::kable(describe(indivQ))
vars n mean sd median trimmed mad min max range skew kurtosis se
q3_1 1 49 6.893265 2.373185 8.34 7.221220 0.000000 0.00 8.34 8.34 -1.2419413 0.0674694 0.3390264
q3_2 2 47 5.412128 3.294878 6.95 5.666923 2.060814 0.00 8.34 8.34 -0.4703762 -1.4660768 0.4806074
q3_3 3 40 6.950000 2.376484 8.34 7.384375 0.000000 1.39 8.34 6.95 -1.2005794 -0.2789127 0.3757551
q3_4 4 38 6.950000 2.524026 8.34 7.384375 0.000000 0.00 8.34 8.34 -1.4767892 0.6065043 0.4094511
q3_5 5 48 6.283958 2.965867 8.34 6.672000 0.000000 0.00 8.34 8.34 -0.8838455 -0.9116062 0.4280860
q3_6 6 45 5.127556 3.203068 4.17 5.334595 6.182442 0.00 8.34 8.34 -0.1608619 -1.6541892 0.4774852

Let’s look into the p-values among question 3 variations.

pval = printVariationWithinQuestions(indivQ,3)
knitr::kable(pval,digits=3)
q3_1 q3_2 q3_3 q3_4 q3_5 q3_6
q3_1 1.000 0.014* 0.911 0.915 0.267 0.003*
q3_2 1.000 0.014* 0.017* 0.179 0.675
q3_3 1.000 1.000 0.246 0.004*
q3_4 1.000 0.264 0.005*
q3_5 1.000 0.075
q3_6 1.000

Conclusion: Question q3_2(column AI) and q3_6(column ES) are significantly lower than the rest. Worth looking into it.

  • q3_2: 7661924: Consider the following solutes to be dissolved in hexane. Being 1 the most soluble and 6 the least soluble, rank them accordingly: Pentanone=>2,Water=>6,Ethanoic acid=>5,Dipropyl ether=>1,Butanoic acid=>4,Butanone=>3

4.5.2 Question 2 on ions in water

Question 2 requires not to make mistake but also nomenclature of ions

results = variationWithinQuestionTypes(m4,"q2_")
indivQ = results$indi
aveQ = results$ave

boxplot(indivQ)

knitr::kable(describe(indivQ))
vars n mean sd median trimmed mad min max range skew kurtosis se
q2_1 1 42 6.950000 2.234991 8.34 7.440588 0.000000 0 8.34 8.34 -1.6495299 2.1650520 0.3448666
q2_2 2 61 5.742295 3.039785 5.56 6.127347 4.121628 0 8.34 8.34 -0.8793804 -0.6084872 0.3892046
q2_3 3 65 5.987692 3.038619 8.34 6.399245 0.000000 0 8.34 8.34 -0.9369246 -0.5851230 0.3768944
q2_4 4 55 5.560000 3.210068 5.56 5.868889 4.121628 0 8.34 8.34 -0.6377096 -1.1590909 0.4328454
q2_5 5 44 5.812727 3.046410 5.56 6.177778 4.121628 0 8.34 8.34 -0.9000883 -0.5982777 0.4592636

Let’s look into the p-values among question 2 variations.

pval = printVariationWithinQuestions(indivQ,3)
knitr::kable(pval,digits=3)
q2_1 q2_2 q2_3 q2_4 q2_5
q2_1 1.000 0.022* 0.062 0.014* 0.051
q2_2 1.000 0.651 0.755 0.907
q2_3 1.000 0.458 0.769
q2_4 1.000 0.690
q2_5 1.000

4.6 Variations within question groups among different groups of students

Will the big variation that we see in question 4 be different if we look at the students who passed the milestone in their first attempt

4.6.1 Question 2 on “ions in solution” passing on 1st attempt

Let’s look at question 3 performance among students who passed the milestone on their 1st attempt.

st100 = subset(m4,m4$score> 79.99 & m4$attempt == 1)
results = variationWithinQuestionTypes(st100,"q2_")
indivQ = results$indi
#remove q9_17 q9_1 as it was not attempted
#indivQ$q9_17 = NULL
#indivQ$q9_1 = NULL
aveQ = results$ave

boxplot(indivQ)

knitr::kable(describe(indivQ))
vars n mean sd median trimmed mad min max range skew kurtosis se
q2_1 1 19 7.462105 1.619052 8.34 7.685882 0 2.78 8.34 5.56 -1.4967847 1.145767 0.3714360
q2_2 2 28 7.148571 1.918382 8.34 7.413333 0 0.00 8.34 8.34 -1.8764854 4.132959 0.3625402
q2_3 3 30 7.413333 1.686010 8.34 7.760833 0 2.78 8.34 5.56 -1.5274886 1.161198 0.3078219
q2_4 4 23 7.252174 1.620816 8.34 7.462105 0 2.78 8.34 5.56 -1.0624995 0.013987 0.3379635
q2_5 5 16 7.471250 1.330824 8.34 7.545714 0 5.56 8.34 2.78 -0.7343901 -1.545810 0.3327059
#pval = printVariationWithinQuestions(indivQ,3)
#knitr::kable(pval,digits=3)

4.6.2 Question 2 on “ions in solution” 2nd and 3rd attempt

These data shows how all students who took question 2 during their 2nd and 3rd attempt. Regardless if they passed or not.

st100 = subset(m4, m4$attempt > 1 )
results = variationWithinQuestionTypes(st100,"q2_")
indivQ = results$indi
aveQ = results$ave

boxplot(indivQ)

knitr::kable(describe(indivQ))
vars n mean sd median trimmed mad min max range skew kurtosis se
q2_1 1 14 6.552857 2.340475 6.95 6.950000 2.060814 0 8.34 8.34 -1.3777725 1.5729913 0.6255182
q2_2 2 16 4.865000 3.729761 5.56 4.964286 4.121628 0 8.34 8.34 -0.3493856 -1.7594039 0.9324403
q2_3 3 17 5.723529 3.180426 5.56 5.930667 4.121628 0 8.34 8.34 -0.8131637 -0.8903646 0.7713667
q2_4 4 18 4.478889 3.322193 4.17 4.517500 6.182442 0 8.34 8.34 -0.0526470 -1.6368358 0.7830483
q2_5 5 15 5.189333 2.947075 5.56 5.346154 4.121628 0 8.34 8.34 -0.4292674 -1.1933391 0.7609314

And p-values

pval = printVariationWithinQuestions(indivQ,3)
knitr::kable(pval,digits=3)
q2_1 q2_2 q2_3 q2_4 q2_5
q2_1 1.000 0.145 0.411 0.047* 0.178
q2_2 1.000 0.484 0.753 0.790
q2_3 1.000 0.266 0.626
q2_4 1.000 0.520
q2_5 1.000

4.6.3 Question 3 on “solubility ranking” passing on first attempt

st100 = subset(m4,m4$score> 79.99 & m4$attempt == 1)
results = variationWithinQuestionTypes(st100,"q3_")
indivQ = results$indi
aveQ = results$ave

boxplot(indivQ)

knitr::kable(describe(indivQ))
vars n mean sd median trimmed mad min max range skew kurtosis se
q3_1 1 19 7.535263 1.9280340 8.34 7.767647 0 2.78 8.34 5.56 -1.7979077 1.4091320 0.4423213
q3_2 2 15 7.228000 1.9797482 8.34 7.484615 0 2.78 8.34 5.56 -1.4342811 0.4974912 0.5111688
q3_3 3 20 7.575500 1.9384081 8.34 8.079375 0 1.39 8.34 6.95 -2.1147600 3.1575153 0.4334412
q3_4 4 19 7.388947 2.2246096 8.34 7.685882 0 1.39 8.34 6.95 -2.0321101 2.6167305 0.5103604
q3_5 5 22 8.087273 0.8180009 8.34 8.340000 0 5.56 8.34 2.78 -2.6542231 5.2915289 0.1743984
q3_6 6 21 6.751429 2.5737799 8.34 7.031765 0 2.78 8.34 5.56 -0.8817334 -1.2766440 0.5616448

Let’s look into the p-values among question 5 variations.

pval = printVariationWithinQuestions(indivQ,3)
knitr::kable(pval,digits=3)
q3_1 q3_2 q3_3 q3_4 q3_5 q3_6
q3_1 1.000 0.653 0.949 0.830 0.257 0.280
q3_2 1.000 0.608 0.825 0.130 0.535
q3_3 1.000 0.782 0.284 0.253
q3_4 1.000 0.209 0.406
q3_5 1.000 0.032*
q3_6 1.000

4.6.4 Question 3 on “solubility ranking” 2nd and 3rd attempt

These data shows how all students who took question 2 during their 2nd and 3rd attempt. Regardless if they passed or not.

st100 = subset(m4, m4$attempt > 1 )
results = variationWithinQuestionTypes(st100,"q3_")
indivQ = results$indi
aveQ = results$ave

boxplot(indivQ)

knitr::kable(describe(indivQ))
vars n mean sd median trimmed mad min max range skew kurtosis se
q3_1 1 17 6.868235 2.223489 8.34 7.042667 0.000000 2.78 8.34 5.56 -0.9452118 -0.8377803 0.5392753
q3_2 2 14 4.765714 3.844136 5.56 4.865000 4.121628 0.00 8.34 8.34 -0.1720159 -1.9361203 1.0273885
q3_3 3 11 6.950000 2.407551 8.34 7.258889 0.000000 2.78 8.34 5.56 -0.9447550 -1.1212121 0.7259038
q3_4 4 12 7.760833 1.618655 8.34 8.201000 0.000000 2.78 8.34 5.56 -2.4003740 4.4904335 0.4672655
q3_5 5 13 6.308461 2.698616 8.34 6.444546 0.000000 2.78 8.34 5.56 -0.4635149 -1.8727478 0.7484615
q3_6 6 13 3.742308 3.469650 2.78 3.664546 4.121628 0.00 8.34 8.34 0.3234434 -1.6330306 0.9623077

And p-values

pval = printVariationWithinQuestions(indivQ,3)
knitr::kable(pval,digits=3)
q3_1 q3_2 q3_3 q3_4 q3_5 q3_6
q3_1 1.000 0.085 0.929 0.222 0.550 0.011*
q3_2 1.000 0.096 0.016* 0.237 0.474
q3_3 1.000 0.361 0.545 0.015*
q3_4 1.000 0.115 0.002*
q3_5 1.000 0.047*
q3_6 1.000