The analysis of each milestone contains the following items:

Overall Milestone performance after each attempt.
1. Examine the number of students settling for a lower score.
2. Comparison with Fall 2019
Attempt 1: Compare averages of questions for three type of scores
Attempt 2: Compare averages of questions for three type of scores
Attempt 3: Compare averages of questions for three type of scores
Analyze if there are specific questions that students performed significantly different in the same question type. Should this be controlled by the average score of students who took that question?
Performance in milestone-like questions during exams

buildHeaders <- function(numberOfQuestions){
  colHeaders = c()
  #build headers
  for (n in seq(numberOfQuestions)){
    colHeaders = c(colHeaders,paste("q",as.character(n),sep = ""))
  }
  return(colHeaders)
}
variationWithinQuestionTypes <- function(m1,pat){
  #compare each question with the average
  totcol = ncol(m1)
  numcol = (totcol-2-10)/2
  aveQ = data.frame( matrix(ncol=3 ,nrow=0) )
  indivQ = data.frame( matrix(ncol=0 ,nrow=nrow(m1)) )
  for (q in seq(10,totcol-2,2)){
    #q is the number, q-1 is the tilte
    qtitle = colnames(m1)[q-1]
    if ( grepl(pat,qtitle)){
      thisQ = c(
        colnames(m1)[q-1],
        mean(m1[,q],na.rm=TRUE),
        length( na.omit(m1[,q]))
      )
      aveQ = rbind(aveQ,thisQ)
      indivQ[qtitle] = m1[,q]
      
    }
  }
  results = list("indi" = indivQ, "ave" = aveQ)
  return(results)
  
}
printVariationWithinQuestions <- function(indivQ,ndig){
  #library(Hmisc)
  #for some reason I cant calc the correlation when theres too many NA
  #res2<-rcorr(as.matrix(indivQ),type = "pearson")
  #flattenCorrMatrix(res2$r, res2$P)
   
  totlen = length(colnames(indivQ))
  #pval = matrix(1:totlen, nrow = totlen, ncol = totlen)
  pval = data.frame( matrix(ncol=totlen ,nrow=0) )
  for (i in seq(1,totlen)){
    thisline = c()
    for (j in seq(1,totlen)){
      if ( j < i){
        thisline = c(thisline," ")
      }else{
        #p = as.numeric(t.test(indivQ[,i],indivQ[,j])$p.value)
        #need a tryCatch just in case we are comparing questions without attempts
        #tryCatch(
        #  expr = {
            p = format(round( t.test(indivQ[,i],indivQ[,j])$p.value,ndig), nsmall = ndig)
            if( p < 0.05){ p = paste(p,"*",sep = "")}
        #  },
        #  error = {
        #    p = "nope"
        #  }
        #)
        thisline = c(thisline,p)
      }
    }
    pval = rbind(pval,thisline)
  }
  colnames(pval) = colnames(indivQ)
  rownames(pval) = colnames(indivQ)
  return(pval)
  #t.test(indivQ$q8_4,indivQ$q8_5)
}
buildDF_fromMilestone <- function(m1,numberOfQuestions){
  #Build df with just answers 
  totcol = ncol(m1)
  m1df = data.frame( matrix(ncol=numberOfQuestions+3,nrow=0) )
  
  
  colHeaders = c(buildHeaders(numberOfQuestions),"total")
   
  studNames = c()
  attNumb = c()
  #loop over students or row
  for (st in seq(1,nrow(m1)) ){
    #build attemptNumb and studNames. theyll be added as columns later
    attNumb = c(attNumb,m1[st,8])
    studNames = c(studNames,m1[st,1])
    
    #empty the score array and build it up as it finds each question
    thisSt = rep(NA,numberOfQuestions)
    #loop over columns to find nonempty scores
    for (q in seq(10,totcol-2,2)){
      score = m1[st,q]
      if ( !is.na( score ) ){
       questionNumber = colnames(m1)[q-1]
       questionNumber = unlist(strsplit(questionNumber,"_"))[1]
       questionNumber = as.numeric( gsub("^q","",questionNumber))
       thisSt[questionNumber] = score
      }
    }
    #studNames = c(studNames,m1[st,1])
    thisSt = c(thisSt, m1[st,totcol])
    m1df = rbind(m1df,thisSt)
  }
  colnames(m1df) = colHeaders
  m1df = rbind(m1df,colMeans(m1df))
  m1df$studName = c(studNames,NA)
  m1df$attNumb = c(attNumb,NA)
  return(m1df)
}

buildStats <- function(m1df,numberOfQuestions,att){
  m1dfat1 = data.frame( matrix(ncol=numberOfQuestions,nrow=0) )
  m1dfat1sd = data.frame( matrix(ncol=numberOfQuestions,nrow=0) )
   
  st100 = subset(m1df,m1df$total> 79.99 & m1df$attNumb == att)
  m1dfat1 = rbind(m1dfat1, colMeans(st100[,1:numberOfQuestions]) )
  m1dfat1sd = rbind(m1dfat1sd, apply(st100[,1:numberOfQuestions],2,sd) )
   
  st80 = subset(m1df,m1df$total> 69.99 & m1df$total < 79.99 & m1df$attNumb == att)
  m1dfat1 = rbind(m1dfat1, colMeans(st80[,1:numberOfQuestions]) )
  m1dfat1sd = rbind(m1dfat1sd, apply(st80[,1:numberOfQuestions],2,sd) )
   
  st0 = subset(m1df,m1df$total< 69.99 & m1df$attNumb == att)
  m1dfat1 = rbind(m1dfat1, colMeans(st0[,1:numberOfQuestions]) )
  m1dfat1sd = rbind(m1dfat1sd, apply(st0[,1:numberOfQuestions],2,sd) )
   
  colnames(m1dfat1) = buildHeaders(numberOfQuestions)
  colnames(m1dfat1sd) =buildHeaders(numberOfQuestions)
   
  rownames(m1dfat1) = c("Avg > 80", "Avg 70><80","Avg < 70")
  rownames(m1dfat1sd) = c("SD > 80", "SD 70><80","SD < 70")
  #return(list(m1dfat1,m1dfat1sd))
  ltot = length( 
    subset(m1df,m1df$attNumb == att)[,1]
    )
  l100 = length(st100[,1])
  l80 = length(st80[,1])
  l0 = length(st0[,1])
  useStats = c(ltot,l100,l80,l0)
  results = list("ave" = m1dfat1, "sd" = m1dfat1sd, "usestats" = useStats)
  return(results)
  
}
flattenCorrMatrix <- function(cormat, pmat) {
  ut <- upper.tri(cormat)
  data.frame(
    row = rownames(cormat)[row(cormat)[ut]],
    column = rownames(cormat)[col(cormat)[ut]],
    cor  =(cormat)[ut],
    p = pmat[ut]
    )
}
roundThisScore <- function(score){
  if (score <70){ r = 0 
  } else if (score <80){ r = 80 
  } else if (score >80){ r = 100  }
  return(r)
}
flowAndSettling <- function(m1df){
  #check students who did not attempt a 3rd and got lower than 80
  studs = unique(m1df$studName)
  studs = studs[!is.na(studs)]
  settle = data.frame( matrix(ncol=3,nrow=0) )
  flow = data.frame(matrix(ncol=3,nrow=0))
  for (stud in studs){
    thisStDF = m1df[which(m1df$studName == stud),]
    #if att number lower than 3 and score lower than 80
    maxNum = max(thisStDF$attNumb)
    maxScore = max(thisStDF$total)
    if ( maxNum < 3 & maxScore < 80 ){
      settle = rbind(settle,c(stud,maxNum,maxScore))
    }
    sc1 = roundThisScore( max(m1df[which(m1df$studName == stud & m1df$attNumb < 2),]$total) )
    sc2 = roundThisScore( max(m1df[which(m1df$studName == stud & m1df$attNumb < 3),]$total) )
    sc3 = roundThisScore( max(m1df[which(m1df$studName == stud & m1df$attNumb < 4),]$total) )
    flow = rbind(flow,c(sc1,sc2,sc3))
    
  }
  colnames(flow) = c("one","two","three")
  flow2 = data.frame(matrix(ncol=3,nrow=3))
  flow2[,1]=table(flow$one)
  flow2[,2]=table(flow$two)
  flow2[,3]=table(flow$three)
  colnames(flow2) = c("After 1st","After 2nd","After 3rd")
  
  colnames(settle) = c("name","MaxAttempt","MaxScore")
  all = list("settle" = settle,"flow" = flow2)
  return(all)
}

#lets first load the files
if (Sys.info()["sysname"] == "Windows"){
  m1 = read.csv("G:/My Drive/Teaching/Grades_and_SRT/Fall2021/milestone1.csv",header = TRUE)
  m2 = read.csv("G:/My Drive/Teaching/Grades_and_SRT/Fall2021/milestone2.csv",header = TRUE)
  m3 = read.csv("G:/My Drive/Teaching/Grades_and_SRT/Fall2021/milestone3.csv",header = TRUE)
  m4 = read.csv("G:/My Drive/Teaching/Grades_and_SRT/Fall2021/milestone4.csv",header = TRUE)
  ex1 = read.csv("G:/My Drive/Teaching/Grades_and_SRT/Fall2021/Exam_1_scores.csv",header = TRUE)
  ex2 = read.csv("G:/My Drive/Teaching/Grades_and_SRT/Fall2021/Exam_2_scores.csv",header = TRUE)
  m1f19 = read.csv("G:/My Drive/Teaching/Grades_and_SRT/Fall2019/CHEM1331/milestone1.csv",header = TRUE)
  m2f19 = read.csv("G:/My Drive/Teaching/Grades_and_SRT/Fall2019/CHEM1331/milestone2.csv",header = TRUE)
  m3f19 = read.csv("G:/My Drive/Teaching/Grades_and_SRT/Fall2019/CHEM1331/milestone3.csv",header = TRUE)
  m4f19 = read.csv("G:/My Drive/Teaching/Grades_and_SRT/Fall2019/CHEM1331/milestone4.csv",header = TRUE)
}else{
  m1 = read.csv("~/Teaching/Grades_and_SRT/Fall2021/milestone1.csv",header = TRUE)
  m2 = read.csv("~/Teaching/Grades_and_SRT/Fall2021/milestone2.csv",header = TRUE)
  m3 = read.csv("~/Teaching/Grades_and_SRT/Fall2021/milestone3.csv",header = TRUE)
  m4 = read.csv("~/Teaching/Grades_and_SRT/Fall2021/milestone4.csv",header = TRUE)
  ex1 = read.csv("~/Teaching/Grades_and_SRT/Fall2021/Exam_1_scores.csv",header = TRUE)
  ex2 = read.csv("~/Teaching/Grades_and_SRT/Fall2021/Exam_2_scores.csv",header = TRUE)
  m1f19 = read.csv("~/Teaching/Grades_and_SRT/Fall2019/CHEM1331/milestone1.csv",header = TRUE)
  m2f19 = read.csv("~/Teaching/Grades_and_SRT/Fall2019/CHEM1331/milestone2.csv",header = TRUE)
  m3f19 = read.csv("~/Teaching/Grades_and_SRT/Fall2019/CHEM1331/milestone3.csv",header = TRUE)
  m4f19 = read.csv("~/Teaching/Grades_and_SRT/Fall2019/CHEM1331/milestone4.csv",header = TRUE)
}

1 Milestone 1

The questions for milestone 1:

Isotopes, number of neutrons, protons, ane electrons
Mass spectrometry
Electrostatics
SI prefixes
What’s greater: 1 mole of __ or # molecules
Rank electromagnetic waves
Plots of waves: tell wavelength and frequency
Compare Transition among orbits and id the photon involved
Transition among atomic of orbitals of specific atoms
Electronic configuration of atoms and ions (Qbank)
Common charges and electronic configurations of ions
Periodic table trends with Zeff, shielding and size.

1.1 Overall performance in Milestone 1

Number of students in the three grade categories at different attempts

m1df = buildDF_fromMilestone(m1,12)
results = flowAndSettling(m1df)
flow = results$flow

pct1st = flow[,1]/sum(flow[,1])*100
pct2nd = flow[,2]/sum(flow[,2])*100
pct3rd = flow[,3]/sum(flow[,3])*100
stackAndGrouped = data.frame(matrix(ncol = 4,nrow=0))
stackAndGrouped = rbind(stackAndGrouped,c("F",pct1st[1],"F21","First"))
stackAndGrouped = rbind(stackAndGrouped,c("C",pct1st[2],"F21","First"))
stackAndGrouped = rbind(stackAndGrouped,c("A",pct1st[3],"F21","First"))
stackAndGrouped = rbind(stackAndGrouped,c("F",pct2nd[1],"F21","Second"))
stackAndGrouped = rbind(stackAndGrouped,c("C",pct2nd[2],"F21","Second"))
stackAndGrouped = rbind(stackAndGrouped,c("A",pct2nd[3],"F21","Second"))
stackAndGrouped = rbind(stackAndGrouped,c("F",pct3rd[1],"F21","Third"))
stackAndGrouped = rbind(stackAndGrouped,c("C",pct3rd[2],"F21","Third"))
stackAndGrouped = rbind(stackAndGrouped,c("A",pct3rd[3],"F21","Third"))

pct1st = paste(signif( pct1st,digits = 4),"%",sep = "")
pct2nd = paste(signif( pct2nd,digits = 4),"%",sep = "")
pct3rd = paste(signif( pct3rd,digits = 4),"%",sep = "")
flow$`After 1st` = paste( flow$`After 1st`, pct1st,sep = " / ")
flow$`After 2nd` = paste( flow$`After 2nd`, pct2nd,sep = " / ")
flow$`After 3rd` = paste( flow$`After 3rd`, pct3rd,sep = " / ")
scores = c("0","80","100")
flow = cbind(scores,flow)
knitr::kable(flow)

scores	After 1st	After 2nd	After 3rd
0	26 / 12.32%	11 / 5.213%	5 / 2.37%
80	33 / 15.64%	24 / 11.37%	27 / 12.8%
100	152 / 72.04%	176 / 83.41%	179 / 84.83%

row.names(results$flow) = c("0","80","100")
barplot(as.matrix(results$flow),col=c("red","yellow","green"),main = "Milestone 1: students scores best of 3 attempts",legend = rownames(results$flow))

1.1.1 Analyzing students who settled

Some students will not maximize the number of attempts even if they have not achieved the highest >80 score.

results = results$settle
n1_70 = length( results[results$MaxAttempt == 1 & results$MaxScore <70,]$MaxAttempt)
n1_80 = length( results[results$MaxAttempt == 1 & results$MaxScore >69.99,]$MaxAttempt)
n2_70 = length( results[results$MaxAttempt == 2 & results$MaxScore <70,]$MaxAttempt)
n2_80 = length( results[results$MaxAttempt == 2 & results$MaxScore >69.99,]$MaxAttempt)

# Students settling before 3rd attempt	with Score < 70 (failing)	70< Score < 80
Settled in 1st attempt	3	7
Settled in 2nd attempt	0	13
Total	3	20

1.1.2 Comparing with Fall 2019

m1df19 = buildDF_fromMilestone(m1f19,12)
results19 = flowAndSettling(m1df19)
flow19 = results19$flow
pct1st = flow19[,1]/sum(flow19[,1])*100
pct2nd = flow19[,2]/sum(flow19[,2])*100
pct3rd = flow19[,3]/sum(flow19[,3])*100

stackAndGrouped = rbind(stackAndGrouped,c("F",pct1st[1],"F19","First"))
stackAndGrouped = rbind(stackAndGrouped,c("C",pct1st[2],"F19","First"))
stackAndGrouped = rbind(stackAndGrouped,c("A",pct1st[3],"F19","First"))
stackAndGrouped = rbind(stackAndGrouped,c("F",pct2nd[1],"F19","Second"))
stackAndGrouped = rbind(stackAndGrouped,c("C",pct2nd[2],"F19","Second"))
stackAndGrouped = rbind(stackAndGrouped,c("A",pct2nd[3],"F19","Second"))
stackAndGrouped = rbind(stackAndGrouped,c("F",pct3rd[1],"F19","Third"))
stackAndGrouped = rbind(stackAndGrouped,c("C",pct3rd[2],"F19","Third"))
stackAndGrouped = rbind(stackAndGrouped,c("A",pct3rd[3],"F19","Third"))
colnames(stackAndGrouped) = c("Grade","Pct","Year","Attempt")
stackAndGrouped$Pct = as.numeric(stackAndGrouped$Pct)

library(ggplot2)
ggplot() + geom_bar(data=stackAndGrouped, aes(y = Pct, x = Year, fill = Grade), stat="identity",
           position='stack') +
  theme_bw() + 
  scale_fill_manual(values = c("green","yellow","red" )) +
  facet_grid( ~ Attempt)

pct1st = paste(signif( pct1st,digits = 4),"%",sep = "")
pct2nd = paste(signif( pct2nd,digits = 4),"%",sep = "")
pct3rd = paste(signif( pct3rd,digits = 4),"%",sep = "")

flow19$`After 1st` = paste( flow19$`After 1st`, pct1st,sep = " / ")
flow19$`After 2nd` = paste( flow19$`After 2nd`, pct2nd,sep = " / ")
flow19$`After 3rd` = paste( flow19$`After 3rd`, pct3rd,sep = " / ")

colnames(flow19) = c("1st att F19","2nd att F19","3rd att F19")
scores = c("0","80","100")
flow19 = cbind(scores,flow19)
mergedFlow =  merge(flow,flow19,by="scores")
mergedFlow$scores = as.numeric(mergedFlow$scores)
knitr::kable(mergedFlow[order(mergedFlow$scores),] )

	scores	After 1st	After 2nd	After 3rd	1st att F19	2nd att F19	3rd att F19
1	0	26 / 12.32%	11 / 5.213%	5 / 2.37%	18 / 9.424%	8 / 4.188%	5 / 2.618%
3	80	33 / 15.64%	24 / 11.37%	27 / 12.8%	25 / 13.09%	17 / 8.901%	13 / 6.806%
2	100	152 / 72.04%	176 / 83.41%	179 / 84.83%	148 / 77.49%	166 / 86.91%	173 / 90.58%

Students settling in F19

results = results19$settle
n1_70_19 = length( results[results$MaxAttempt == 1 & results$MaxScore <70,]$MaxAttempt)
n1_80_19 = length( results[results$MaxAttempt == 1 & results$MaxScore >69.99,]$MaxAttempt)
n2_70_19 = length( results[results$MaxAttempt == 2 & results$MaxScore <70,]$MaxAttempt)
n2_80_19 = length( results[results$MaxAttempt == 2 & results$MaxScore >69.99,]$MaxAttempt)

# Students settling before 3rd attempt (F21 / F19)	Score < 70 (F21)	Score < 70 (F19)	70< Score < 80 (F21)	70< Score < 80 (F19)
Settled in 1st attempt	3	2	7	4
Settled in 2nd attempt	0	3	13	5
Total	3	5	20	9

1.2 M1 - Attempt 1 averages

m1df = buildDF_fromMilestone(m1,12)
# First attempt stats
results = buildStats(m1df,12,1)
m1dfat1 = results$ave
m1dfat1sd = results$sd

Attempt	Total students	Sudents with score > 80	Sudents with score 70-80	Students with score < 70%
One	211	152 (72%)	33 (16%)	26 (12%)

Table of averages per question and their standard deviation

library(knitr)
knitr::kable(m1dfat1,digits=2)

	q1	q2	q3	q4	q5	q6	q7	q8	q9	q10	q11	q12
Avg > 80	8.18	7.12	7.76	6.99	6.64	8.25	8.22	7.40	7.52	8.30	7.76	7.34
Avg 70><80	7.96	5.30	6.67	4.80	2.78	8.09	7.19	4.80	6.37	8.27	6.60	6.31
Avg < 70	6.15	4.70	5.20	3.68	3.85	5.88	5.13	3.84	3.44	6.65	4.38	4.35

knitr::kable(m1dfat1sd,digits=2)

	q1	q2	q3	q4	q5	q6	q7	q8	q9	q10	q11	q12
SD > 80	0.48	1.99	1.26	1.89	3.37	0.50	0.47	2.47	1.44	0.24	1.06	1.55
SD 70><80	0.94	2.44	1.86	2.47	3.99	0.81	1.65	3.69	1.80	0.36	1.76	1.80
SD < 70	2.33	2.58	2.37	2.90	4.24	3.27	2.65	3.25	2.20	2.70	2.13	1.71

matplot(t(m1dfat1),type="b",ylim=c(0,9),xlab = "Question number",ylab = "Average score/8.33",main="Average score per question on 1st attempt")   
legend("bottomright",legend = c("1:>80","2:70-80","3:<70"))

1.3 M1 - Attempt 2 averages

results = buildStats(m1df,12,2)
m1dfat1 = results$ave
m1dfat1sd = results$sd

Attempt	Total students	Sudents with score > 80	Sudents with score 70-80	Students with score < 70%
Two	49	25 (51%)	11 (22%)	13 (27%)

Table of averages per question and their standard deviation

library(knitr)
knitr::kable(m1dfat1,digits=2)

	q1	q2	q3	q4	q5	q6	q7	q8	q9	q10	q11	q12
Avg > 80	8.06	6.89	7.74	7.25	6.34	8.34	7.75	7.33	6.91	8.33	7.71	7.04
Avg 70><80	7.83	6.31	5.91	4.16	3.03	7.83	7.57	5.30	6.82	8.33	5.91	5.73
Avg < 70	7.48	3.63	5.00	3.68	0.64	6.84	5.61	5.98	4.97	8.01	5.25	4.94

knitr::kable(m1dfat1sd,digits=2)

	q1	q2	q3	q4	q5	q6	q7	q8	q9	q10	q11	q12
SD > 80	0.57	1.63	1.06	1.36	3.64	0.00	1.13	2.52	1.87	0.00	0.67	1.61
SD 70><80	1.12	2.18	2.28	2.08	4.21	1.12	1.05	3.82	1.64	0.00	1.98	2.37
SD < 70	0.90	2.09	1.36	2.27	2.31	2.44	1.97	3.56	2.76	0.78	1.64	1.52

matplot(t(m1dfat1),type="b",ylim=c(0,9),xlab = "Question number",ylab = "Average score/8.33",main="M1: Average score per question on 2nd attempt")   
legend("bottomright",legend = c("1:>80","2:70-80","3:<70"))

1.4 M1 - Attempt 3 averages

results = buildStats(m1df,12,3)
m1dfat1 = results$ave
m1dfat1sd = results$sd

Attempt	Total students	Sudents with score > 80	Sudents with score 70-80	Students with score < 70%
Three	12	3 (25%)	5 (42%)	4 (33%)

Table of averages per question and their standard deviation

library(knitr)
knitr::kable(m1dfat1,digits=2)

	q1	q2	q3	q4	q5	q6	q7	q8	q9	q10	q11	q12
Avg > 80	7.41	7.40	6.12	5.55	8.34	8.34	6.94	8.33	7.64	8.33	7.03	4.36
Avg 70><80	5.84	5.55	7.01	5.41	3.34	8.34	7.50	8.33	6.66	7.91	6.11	5.95
Avg < 70	6.95	5.55	3.75	3.12	4.17	7.64	7.29	6.25	2.08	8.33	6.11	4.76

knitr::kable(m1dfat1sd,digits=2)

	q1	q2	q3	q4	q5	q6	q7	q8	q9	q10	q11	q12
SD > 80	0.80	1.60	1.93	3.18	0.00	0.00	2.40	0.00	1.20	0.00	1.28	1.82
SD 70><80	2.28	3.40	1.40	3.48	4.57	0.00	1.14	0.00	1.74	0.93	2.08	1.46
SD < 70	1.13	2.27	1.60	2.08	4.82	1.39	1.20	2.66	1.70	0.00	1.20	1.37

matplot(t(m1dfat1),type="b",ylim=c(0,9),xlab = "Question number",ylab = "Average score/8.33",main="M1: Average score per question on 3rd attempt")   
legend("bottomright",legend = c("1:>80","2:70-80","3:<70"))

1.5 Variation within question groups

Question 5 in Milestone 1 is low because it’s a one-true option only this is why it has the largest standard deviation. Question 8 even if it allows for multiple answers, its standard deviation is significantly large and we should look into it.

results = variationWithinQuestionTypes(m1,"q8_")
indivQ = results$indi
aveQ = results$ave

boxplot(indivQ)

library("psych")
knitr::kable(describe(indivQ))

	vars	n	mean	sd	median	trimmed	min	max	range	skew	kurtosis	se
q8_1	1	24	7.520139	1.916603	8.33	7.913500	2.776667	8.33	5.553333	-1.8771281	1.7627115	0.3912249
q8_2	2	30	6.016111	3.185421	8.33	6.478889	0.000000	8.33	8.330000	-0.8438501	-0.9582137	0.5815757
q8_3	3	23	5.553333	3.744057	8.33	5.845614	0.000000	8.33	8.330000	-0.6384380	-1.5269565	0.7806898
q8_4	4	22	5.553333	3.635510	8.33	5.861852	0.000000	8.33	8.330000	-0.7290461	-1.3295455	0.7750933
q8_5	5	29	7.181034	2.518454	8.33	7.663600	0.000000	8.33	8.330000	-1.9109749	2.2502296	0.4676651
q8_6	6	20	5.969833	3.634314	8.33	6.421042	0.000000	8.33	8.330000	-0.9301782	-1.0642571	0.8126573
q8_7	7	12	5.553333	3.551924	8.33	5.831000	0.000000	8.33	8.330000	-0.4777275	-1.6929012	1.0253521
q8_8	8	23	7.847101	1.599938	8.33	8.330000	2.776667	8.33	5.553333	-2.7426553	5.7790080	0.3336101
q8_9	9	30	6.849111	2.892345	8.33	7.520139	0.000000	8.33	8.330000	-1.5070908	0.5914437	0.5280676
q8_10	10	26	7.796026	1.759492	8.33	8.203788	0.000000	8.33	8.330000	-3.4802501	12.0347164	0.3450648
q8_11	11	19	5.261053	4.128303	8.33	5.390000	0.000000	8.33	8.330000	-0.5030472	-1.8353779	0.9470977
q8_12	12	14	7.140000	3.024927	8.33	7.635833	0.000000	8.33	8.330000	-1.8264902	1.4549320	0.8084458

We can already see that some questions are easier than others, but we want to see if they are significantly different. We can calculate the p-values among the scores of the different Q8 questions and see if they are significantly different. We do not control for whether the student passsed or not

pval = printVariationWithinQuestions(indivQ,3)
knitr::kable(pval,digits=3)

	q8_1	q8_2	q8_3	q8_4	q8_5	q8_6	q8_7	q8_8	q8_9	q8_10	q8_11	q8_12
q8_1	1.000	0.037*	0.031*	0.031*	0.581	0.097	0.094	0.528	0.312	0.599	0.037*	0.677
q8_2		1.000	0.637	0.635	0.124	0.963	0.699	0.009*	0.293	0.011*	0.502	0.269
q8_3			1.000	1.000	0.082	0.714	1.000	0.011*	0.177	0.013*	0.813	0.168
q8_4				1.000	0.081	0.713	1.000	0.011*	0.175	0.013*	0.813	0.166
q8_5					1.000	0.206	0.168	0.252	0.640	0.295	0.080	0.965
q8_6						1.000	0.753	0.042*	0.371	0.049*	0.574	0.315
q8_7							1.000	0.053	0.277	0.058	0.836	0.237
q8_8								1.000	0.117	0.916	0.017*	0.430
q8_9									1.000	0.140	0.154	0.766
q8_10										1.000	0.019*	0.465
q8_11											1.000	0.141
q8_12												1.000

1.6 Performance in Milestone like questions

Question 1 aligns with question 2 in milestone 1. Question 4a aligns with question4 in milestone about SI prefixes. Question 4b with question5 in milestone

2 Milestone 2

The questions for Milestone 2:

Isotopes, number of protons, neutrons, and electrons
Electrostatics
Transition among orbitals
Electronic configuration
SI prefixes and mole conversion
Type of chemical bond in compounds
Hybdrization and angle of small molecules
NMR: number of peaks and number of sp2, sp3 (doubled)
Isomerism (doubled)
Resonance structures (doubled)
Conjugation and UV/Vis (doubled)

q8, q9 q10, and q11 are doubled, so in a way milestone2 contains more questions but with less variability.

2.1 Overall performance in Milestone 2

#this may be confusing, but to avoid typos, Im overriding m1df, theres no m2df
m1df = buildDF_fromMilestone(m2,11)
results = flowAndSettling(m1df)
flow = results$flow

pct1st = flow[,1]/sum(flow[,1])*100
pct2nd = flow[,2]/sum(flow[,2])*100
pct3rd = flow[,3]/sum(flow[,3])*100
stackAndGrouped = data.frame(matrix(ncol = 4,nrow=0))
stackAndGrouped = rbind(stackAndGrouped,c("F",pct1st[1],"F21","First"))
stackAndGrouped = rbind(stackAndGrouped,c("C",pct1st[2],"F21","First"))
stackAndGrouped = rbind(stackAndGrouped,c("A",pct1st[3],"F21","First"))
stackAndGrouped = rbind(stackAndGrouped,c("F",pct2nd[1],"F21","Second"))
stackAndGrouped = rbind(stackAndGrouped,c("C",pct2nd[2],"F21","Second"))
stackAndGrouped = rbind(stackAndGrouped,c("A",pct2nd[3],"F21","Second"))
stackAndGrouped = rbind(stackAndGrouped,c("F",pct3rd[1],"F21","Third"))
stackAndGrouped = rbind(stackAndGrouped,c("C",pct3rd[2],"F21","Third"))
stackAndGrouped = rbind(stackAndGrouped,c("A",pct3rd[3],"F21","Third"))

pct1st = paste(signif( pct1st,digits = 4),"%",sep = "")
pct2nd = paste(signif( pct2nd,digits = 4),"%",sep = "")
pct3rd = paste(signif( pct3rd,digits = 4),"%",sep = "")
flow$`After 1st` = paste( flow$`After 1st`, pct1st,sep = " / ")
flow$`After 2nd` = paste( flow$`After 2nd`, pct2nd,sep = " / ")
flow$`After 3rd` = paste( flow$`After 3rd`, pct3rd,sep = " / ")
scores = c("0","80","100")
flow = cbind(scores,flow)
knitr::kable(flow)

scores	After 1st	After 2nd	After 3rd
0	52 / 25.37%	34 / 16.59%	24 / 11.71%
80	51 / 24.88%	43 / 20.98%	42 / 20.49%
100	102 / 49.76%	128 / 62.44%	139 / 67.8%

row.names(results$flow) = c("0","80","100")
barplot(as.matrix(results$flow),col=c("red","yellow","green"),main = "Milestone 2: students scores best of 3 attempts",legend = rownames(results$flow))

2.1.1 Analyzing students who settled

Some students will not maximize the number of attempts even if they have not achieved the highest >80 score.

results = results$settle
n1_70 = length( results[results$MaxAttempt == 1 & results$MaxScore <70,]$MaxAttempt)
n1_80 = length( results[results$MaxAttempt == 1 & results$MaxScore >69.99,]$MaxAttempt)
n2_70 = length( results[results$MaxAttempt == 2 & results$MaxScore <70,]$MaxAttempt)
n2_80 = length( results[results$MaxAttempt == 2 & results$MaxScore >69.99,]$MaxAttempt)

# Students settling before 3rd attempt	with Score < 70 (failing)	70< Score < 80
Settled in 1st attempt	2	14
Settled in 2nd attempt	5	11
Total	7	25

2.1.2 Comparing with Fall 2019

m1df19 = buildDF_fromMilestone(m2f19,12)
results19 = flowAndSettling(m1df19)
flow19 = results19$flow
pct1st = flow19[,1]/sum(flow19[,1])*100
pct2nd = flow19[,2]/sum(flow19[,2])*100
pct3rd = flow19[,3]/sum(flow19[,3])*100

stackAndGrouped = rbind(stackAndGrouped,c("F",pct1st[1],"F19","First"))
stackAndGrouped = rbind(stackAndGrouped,c("C",pct1st[2],"F19","First"))
stackAndGrouped = rbind(stackAndGrouped,c("A",pct1st[3],"F19","First"))
stackAndGrouped = rbind(stackAndGrouped,c("F",pct2nd[1],"F19","Second"))
stackAndGrouped = rbind(stackAndGrouped,c("C",pct2nd[2],"F19","Second"))
stackAndGrouped = rbind(stackAndGrouped,c("A",pct2nd[3],"F19","Second"))
stackAndGrouped = rbind(stackAndGrouped,c("F",pct3rd[1],"F19","Third"))
stackAndGrouped = rbind(stackAndGrouped,c("C",pct3rd[2],"F19","Third"))
stackAndGrouped = rbind(stackAndGrouped,c("A",pct3rd[3],"F19","Third"))
colnames(stackAndGrouped) = c("Grade","Pct","Year","Attempt")
stackAndGrouped$Pct = as.numeric(stackAndGrouped$Pct)

library(ggplot2)
ggplot() + geom_bar(data=stackAndGrouped, aes(y = Pct, x = Year, fill = Grade), stat="identity",
           position='stack') +
  theme_bw() + 
  scale_fill_manual(values = c("green","yellow","red" )) +
  facet_grid( ~ Attempt)

pct1st = paste(signif( pct1st,digits = 4),"%",sep = "")
pct2nd = paste(signif( pct2nd,digits = 4),"%",sep = "")
pct3rd = paste(signif( pct3rd,digits = 4),"%",sep = "")

flow19$`After 1st` = paste( flow19$`After 1st`, pct1st,sep = " / ")
flow19$`After 2nd` = paste( flow19$`After 2nd`, pct2nd,sep = " / ")
flow19$`After 3rd` = paste( flow19$`After 3rd`, pct3rd,sep = " / ")

colnames(flow19) = c("1st att F19","2nd att F19","3rd att F19")
scores = c("0","80","100")
flow19 = cbind(scores,flow19)
mergedFlow =  merge(flow,flow19,by="scores")
mergedFlow$scores = as.numeric(mergedFlow$scores)
knitr::kable(mergedFlow[order(mergedFlow$scores),] )

	scores	After 1st	After 2nd	After 3rd	1st att F19	2nd att F19	3rd att F19
1	0	52 / 25.37%	34 / 16.59%	24 / 11.71%	16 / 8.421%	9 / 4.737%	5 / 2.632%
3	80	51 / 24.88%	43 / 20.98%	42 / 20.49%	22 / 11.58%	10 / 5.263%	10 / 5.263%
2	100	102 / 49.76%	128 / 62.44%	139 / 67.8%	152 / 80%	171 / 90%	175 / 92.11%

Students settling in F19

results = results19$settle
n1_70_19 = length( results[results$MaxAttempt == 1 & results$MaxScore <70,]$MaxAttempt)
n1_80_19 = length( results[results$MaxAttempt == 1 & results$MaxScore >69.99,]$MaxAttempt)
n2_70_19 = length( results[results$MaxAttempt == 2 & results$MaxScore <70,]$MaxAttempt)
n2_80_19 = length( results[results$MaxAttempt == 2 & results$MaxScore >69.99,]$MaxAttempt)

# Students settling before 3rd attempt (F21 / F19)	Score < 70 (F21)	Score < 70 (F19)	70< Score < 80 (F21)	70< Score < 80 (F19)
Settled in 1st attempt	2	3	14	2
Settled in 2nd attempt	5	0	11	4
Total	7	3	25	6

2.2 M2 - Attempt 1 averages

# First attempt stats
results = buildStats(m1df,11,1)
m1dfat1 = results$ave
m1dfat1sd = results$sd

Attempt	Total students	Sudents with score > 80	Sudents with score 70-80	Students with score < 70%
One	205	102 (50%)	51 (25%)	52 (25%)

Table of averages per question and their standard deviation

library(knitr)
knitr::kable(m1dfat1,digits=2)

	q1	q2	q3	q4	q5	q6	q7	q8	q9	q10	q11
Avg > 80	6.62	6.39	6.48	6.47	5.53	6.15	5.75	5.21	5.21	6.11	5.20
Avg 70><80	6.51	6.09	6.30	6.43	3.43	5.16	4.68	4.60	3.71	4.92	4.06
Avg < 70	6.08	4.69	4.71	5.15	2.31	4.05	3.24	3.10	2.22	4.22	3.00

knitr::kable(m1dfat1sd,digits=2)

	q1	q2	q3	q4	q5	q6	q7	q8	q9	q10	q11
SD > 80	0.22	0.66	0.62	0.43	1.50	1.11	1.10	1.17	1.81	0.83	1.41
SD 70><80	0.54	1.01	0.77	0.46	1.81	1.37	1.48	1.33	2.34	1.60	1.48
SD < 70	1.07	1.55	1.67	1.42	1.90	1.66	1.69	1.79	2.07	1.67	1.57

matplot(t(m1dfat1),type="b",ylim=c(0,9),xlab = "Question number",ylab = "Average score/6.33",main="M2: Average score per question on 1st attempt")   
legend("bottomright",legend = c("1:>80","2:70-80","3:<70"))

2.3 M2 - Attempt 2 averages

results = buildStats(m1df,11,2)
m1dfat1 = results$ave
m1dfat1sd = results$sd

Attempt	Total students	Sudents with score > 80	Sudents with score 70-80	Students with score < 70%
Two	87	26 (30%)	27 (31%)	34 (39%)

Table of averages per question and their standard deviation

library(knitr)
knitr::kable(m1dfat1,digits=2)

	q1	q2	q3	q4	q5	q6	q7	q8	q9	q10	q11
Avg > 80	6.62	6.20	6.21	6.44	4.87	5.33	5.32	6.13	5.64	6.13	5.70
Avg 70><80	6.33	6.12	5.98	6.27	3.58	5.43	4.92	5.01	4.41	5.15	5.32
Avg < 70	5.94	4.86	5.34	5.43	2.20	3.90	3.99	4.53	3.55	3.79	3.77

knitr::kable(m1dfat1sd,digits=2)

	q1	q2	q3	q4	q5	q6	q7	q8	q9	q10	q11
SD > 80	0.22	0.92	1.21	0.47	1.63	1.64	1.23	0.71	1.40	1.15	1.30
SD 70><80	1.30	1.00	1.33	0.53	2.00	1.58	1.67	1.02	2.48	2.24	1.14
SD < 70	1.28	1.76	1.63	0.90	2.04	1.68	1.56	1.31	2.30	2.45	1.42

matplot(t(m1dfat1),type="b",ylim=c(0,9),xlab = "Question number",ylab = "Average score/6.33",main="M2: Average score per question on 2md attempt")   
legend("bottomright",legend = c("1:>80","2:70-80","3:<70"))

2.4 M2 - Attempt 3 averages

results = buildStats(m1df,11,3)
m1dfat1 = results$ave
m1dfat1sd = results$sd

Attempt	Total students	Sudents with score > 80	Sudents with score 70-80	Students with score < 70%
Three	45	12 (27%)	14 (31%)	19 (42%)

Table of averages per question and their standard deviation

library(knitr)
knitr::kable(m1dfat1,digits=2)

	q1	q2	q3	q4	q5	q6	q7	q8	q9	q10	q11
Avg > 80	6.66	6.11	6.38	6.59	4.30	5.87	5.79	5.95	6.11	5.47	6.10
Avg 70><80	6.18	5.23	5.47	6.22	3.33	5.40	4.84	5.34	3.81	4.22	4.90
Avg < 70	6.25	5.12	5.52	5.98	2.28	4.37	4.51	4.42	3.33	3.66	4.56

knitr::kable(m1dfat1sd,digits=2)

	q1	q2	q3	q4	q5	q6	q7	q8	q9	q10	q11
SD > 80	0.00	1.06	0.65	0.17	1.66	0.92	1.10	0.60	1.38	2.10	0.65
SD 70><80	0.95	1.33	1.21	0.60	2.53	1.60	1.86	0.91	2.53	2.34	1.25
SD < 70	0.66	1.35	1.57	0.79	1.94	1.88	1.46	1.16	2.22	2.50	1.67

matplot(t(m1dfat1),type="b",ylim=c(0,9),xlab = "Question number",ylab = "Average score/6.33",main="M2: Average score per question on 3rd attempt")   
legend("bottomright",legend = c("1:>80","2:70-80","3:<70"))

2.5 Variation within question groups

2.5.1 Question 9 on isomerism

Question 9 in the first attempt of Milestone2 seems to show the highest SD, we can look into it and see whether or not scores are significantly different among questions.

question 18 and 19 were the same for everyone during the first attempt which explains its larger “n” For the 2nd attempt the new question was q9_2 which was kept, repeated, and still didn’t improve students results

results = variationWithinQuestionTypes(m2,"q9_")
indivQ = results$indi
aveQ = results$ave

boxplot(indivQ)

knitr::kable(describe(indivQ))

	vars	n	mean	sd	median	trimmed	mad	min	max	range	skew	kurtosis	se
q9_1	1	14	5.717143	1.893459	6.670000	6.114167	0.000000	0.000000	6.67	6.670000	-1.9399650	3.0262130	0.5060481
q9_2	2	139	2.815156	2.476024	2.223333	2.695546	3.296314	0.000000	6.67	6.670000	0.2484774	-1.3314270	0.2100136
q9_3	3	24	4.817222	2.510854	6.670000	5.113667	0.000000	0.000000	6.67	6.670000	-0.8325219	-0.9435558	0.5125258
q9_4	4	27	3.623210	2.553533	2.223333	3.673333	3.296314	0.000000	6.67	6.670000	-0.0127433	-1.5420915	0.4914277
q9_5	5	25	4.268800	2.560858	4.446667	4.446667	3.296314	0.000000	6.67	6.670000	-0.4802299	-1.3430935	0.5121716
q9_6	6	19	4.797719	2.255604	6.670000	4.839020	0.000000	2.223333	6.67	4.446667	-0.2948888	-2.0107026	0.5174710
q9_7	7	23	4.736667	2.706824	6.670000	5.031754	0.000000	0.000000	6.67	6.670000	-0.9591990	-0.8144548	0.5644117
q9_8	8	26	5.707981	1.896737	6.670000	6.063636	0.000000	0.000000	6.67	6.670000	-1.7777175	1.8832951	0.3719808
q9_9	9	23	4.253333	2.991418	6.670000	4.446667	0.000000	0.000000	6.67	6.670000	-0.4925977	-1.6653980	0.6237537
q9_10	10	25	5.380467	1.270741	5.558333	5.505397	1.648157	2.223333	6.67	4.446667	-0.6653471	-0.4869991	0.2541482
q9_11	11	18	3.952593	2.702173	4.446667	4.029792	3.296314	0.000000	6.67	6.670000	-0.3423142	-1.5567310	0.6369082
q9_12	12	20	4.669167	2.269641	4.448333	5.002708	3.293843	0.000000	6.67	6.670000	-0.7503523	-0.7347600	0.5075071
q9_13	13	33	4.311818	2.287766	4.446667	4.446667	3.296314	0.000000	6.67	6.670000	-0.2178465	-1.5036429	0.3982489
q9_14	14	20	5.252625	1.242554	5.002500	5.315156	2.472236	3.335000	6.67	3.335000	-0.2193302	-1.2721324	0.2778435
q9_15	15	19	5.792368	1.020135	5.558333	5.885294	1.648157	3.335000	6.67	3.335000	-0.8003138	-0.4893958	0.2340351
q9_16	16	19	5.616842	1.870248	6.670000	5.885294	0.000000	0.000000	6.67	6.670000	-1.6694941	1.9532684	0.4290643
q9_17	17	2	4.446667	3.144268	4.446667	4.446667	3.296314	2.223333	6.67	4.446667	0.0000000	-2.7500000	2.2233333
q9_18	18	87	4.651188	2.107383	4.446667	4.947793	3.296314	0.000000	6.67	6.670000	-0.8286304	-0.2564302	0.2259352
q9_19	19	111	3.645465	2.431713	4.446667	3.722210	3.296314	0.000000	6.67	6.670000	-0.2103672	-1.2777108	0.2308080

Let’s look into the p-values among question 9 variations.

pval = printVariationWithinQuestions(indivQ,3)
knitr::kable(pval,digits=3)

	q9_1	q9_2	q9_3	q9_4	q9_5	q9_6	q9_7	q9_8	q9_9	q9_10	q9_11	q9_12	q9_13	q9_14	q9_15	q9_16	q9_17	q9_18	q9_19
q9_1	1.000	0.000*	0.220	0.005*	0.052	0.214	0.205	0.988	0.077	0.559	0.038*	0.154	0.037*	0.430	0.894	0.881	0.669	0.070	0.001*
q9_2		1.000	0.001*	0.139	0.013*	0.002*	0.003*	0.000*	0.038*	0.000*	0.105	0.002*	0.002*	0.000*	0.000*	0.000*	0.597	0.000*	0.008*
q9_3			1.000	0.099	0.453	0.979	0.916	0.167	0.489	0.332	0.297	0.838	0.440	0.460	0.093	0.238	0.895	0.769	0.045*
q9_4				1.000	0.367	0.107	0.144	0.001*	0.432	0.003*	0.685	0.146	0.281	0.006*	0.000*	0.004*	0.775	0.065	0.968
q9_5					1.000	0.472	0.542	0.028*	0.985	0.060	0.701	0.582	0.947	0.100	0.011*	0.050	0.950	0.499	0.275
q9_6						1.000	0.937	0.162	0.506	0.321	0.311	0.860	0.461	0.445	0.092	0.231	0.901	0.797	0.052
q9_7							1.000	0.159	0.569	0.306	0.363	0.930	0.542	0.418	0.095	0.222	0.918	0.889	0.084
q9_8								1.000	0.053	0.471	0.024*	0.107	0.013*	0.332	0.849	0.873	0.671	0.019*	0.000*
q9_9									1.000	0.105	0.738	0.608	0.937	0.154	0.028*	0.080	0.945	0.554	0.368
q9_10										1.000	0.049*	0.220	0.028*	0.736	0.240	0.639	0.747	0.036*	0.000*
q9_11											1.000	0.385	0.636	0.074	0.013*	0.038*	0.862	0.313	0.655
q9_12												1.000	0.583	0.321	0.055	0.162	0.937	0.974	0.077
q9_13													1.000	0.058	0.002*	0.031*	0.962	0.462	0.153
q9_14														1.000	0.146	0.481	0.779	0.100	0.000*
q9_15															1.000	0.722	0.653	0.001*	0.000*
q9_16																1.000	0.691	0.056	0.000*
q9_17																	1.000	0.942	0.780
q9_18																		1.000	0.002*
q9_19																			1.000

2.5.2 Question 5 on metric conversions

Question 5 is not only the lowest it also has a significant standard deviation

results = variationWithinQuestionTypes(m2,"q5_")
indivQ = results$indi
aveQ = results$ave

boxplot(indivQ)

knitr::kable(describe(indivQ))

	vars	n	mean	sd	median	trimmed	mad	max	range	skew	kurtosis	se
q5_1	1	85	4.524882	1.950047	4.995	4.681304	2.468529	6.66	6.66	-0.5454566	-0.7852274	0.2115123
q5_2	2	86	3.213895	2.223411	3.330	3.187357	2.468529	6.66	6.66	0.3015116	-1.1688138	0.2397566
q5_3	3	95	3.522790	2.327748	3.330	3.567857	2.468529	6.66	6.66	-0.1115919	-1.3037016	0.2388219
q5_4	4	71	4.268028	2.119804	4.995	4.410789	2.468529	6.66	6.66	-0.3853848	-1.1129671	0.2515745

Let’s look into the p-values among question 5 variations.

pval = printVariationWithinQuestions(indivQ,3)
knitr::kable(pval,digits=3)

	q5_1	q5_2	q5_3	q5_4
q5_1	1.000	0.000*	0.002*	0.436
q5_2		1.000	0.363	0.003*
q5_3			1.000	0.033*
q5_4				1.000

2.6 Variations within question groups among different groups of students

Will the big variation that we see in question5 be different if we look at the students who passed the milestone in their first attempt

2.6.1 Question 9 on isomerism passing on 1st attempt

Let’s look at question 9 performance among students who passed the milestone on their 1st attempt. We have to remove questions because they were not attempted or have too small sample This will also not allow us to have p values.

st100 = subset(m2,m2$score> 79.99 & m2$attempt == 1)
results = variationWithinQuestionTypes(st100,"q9_")
indivQ = results$indi
#remove q9_17 q9_1 as it was not attempted
indivQ$q9_17 = NULL
indivQ$q9_1 = NULL
aveQ = results$ave

boxplot(indivQ)

knitr::kable(describe(indivQ))

	vars	n	mean	sd	median	trimmed	min	max	range	skew	kurtosis	se
q9_2	1	3	5.187778	1.2836421	4.446667	5.187778	4.446667	6.67	2.223333	0.3849002	-2.3333333	0.7411111
q9_3	2	5	4.891333	2.4355396	6.670000	4.891333	2.223333	6.67	4.446667	-0.2921187	-2.2533333	1.0892064
q9_4	3	9	6.422963	0.7411111	6.670000	6.422963	4.446667	6.67	2.223333	-2.0740741	2.6296296	0.2470370
q9_5	4	8	5.280417	2.6407307	6.670000	5.280417	0.000000	6.67	6.670000	-1.0840627	-0.7195637	0.9336393
q9_6	5	4	6.670000	0.0000000	6.670000	6.670000	6.670000	6.67	0.000000	NaN	NaN	0.0000000
q9_7	6	10	6.447667	0.7030797	6.670000	6.670000	4.446667	6.67	2.223333	-2.2768399	3.5700000	0.2223333
q9_8	7	10	6.503250	0.5273098	6.670000	6.670000	5.002500	6.67	1.667500	-2.2768399	3.5700000	0.1667500
q9_9	8	8	6.114167	1.5721341	6.670000	6.114167	2.223333	6.67	4.446667	-1.8561553	1.7031250	0.5558333
q9_10	9	7	6.193571	0.5942108	6.670000	6.193571	5.558333	6.67	1.111667	-0.2290811	-2.2040816	0.2245906
q9_11	10	4	6.114167	1.1116667	6.670000	6.114167	4.446667	6.67	2.223333	-0.7500000	-1.6875000	0.5558333
q9_12	11	6	6.299444	0.9076720	6.670000	6.299444	4.446667	6.67	2.223333	-1.3608276	-0.0833333	0.3705556
q9_13	12	13	5.472564	1.9504554	6.670000	5.659394	2.220000	6.67	4.450000	-0.9189644	-1.1160493	0.5409590
q9_14	13	5	6.336500	0.7457287	6.670000	6.336500	5.002500	6.67	1.667500	-1.0733126	-0.9200000	0.3335000
q9_15	14	7	6.193571	0.8746547	6.670000	6.193571	4.446667	6.67	2.223333	-1.0774380	-0.6242000	0.3305884
q9_16	15	6	6.299444	0.9076720	6.670000	6.299444	4.446667	6.67	2.223333	-1.3608276	-0.0833333	0.3705556
q9_18	16	48	5.234236	1.8049936	6.670000	5.447333	0.000000	6.67	6.670000	-0.9364927	-0.1702542	0.2605284
q9_19	17	51	5.144183	1.8596530	6.670000	5.422764	0.000000	6.67	6.670000	-1.0219086	0.2497358	0.2604035

#pval = printVariationWithinQuestions(indivQ,3)
#knitr::kable(pval,digits=3)

2.6.2 Question 9 on isomerism 2nd and 3rd attempt

st100 = subset(m2, m2$attempt > 1 )
results = variationWithinQuestionTypes(st100,"q9_")
indivQ = results$indi
#remove q9_17 q9_1 as it was not attempted
indivQ$q9_17 = NULL
indivQ$q9_18 = NULL
indivQ$q9_19 = NULL
aveQ = results$ave

boxplot(indivQ)

knitr::kable(describe(indivQ))

	vars	n	mean	sd	median	trimmed	mad	min	max	range	skew	kurtosis	se
q9_1	1	14	5.717143	1.893459	6.670000	6.114167	0.0000000	0.000000	6.670000	6.670000	-1.9399650	3.0262130	0.5060481
q9_2	2	132	2.728636	2.450844	2.223333	2.579906	3.2963140	0.000000	6.670000	6.670000	0.2936334	-1.2908564	0.2133186
q9_3	3	14	5.717143	2.084630	6.670000	6.114167	0.0000000	0.000000	6.670000	6.670000	-1.7508066	1.6485778	0.5571407
q9_4	4	9	1.976296	1.738060	2.223333	1.976296	3.2963140	0.000000	4.446667	4.446667	0.1493123	-1.5356596	0.5793532
q9_5	5	10	3.335000	2.401474	3.335000	3.335000	1.6481570	0.000000	6.670000	6.670000	0.0000000	-1.4846939	0.7594129
q9_6	6	8	5.002500	2.301369	6.670000	5.002500	0.0000000	2.223333	6.670000	4.446667	-0.4226652	-2.0302083	0.8136567
q9_7	7	6	4.817222	2.599177	5.558333	4.817222	1.6481570	0.000000	6.670000	6.670000	-0.8808986	-0.9042732	1.0611096
q9_8	8	13	5.130769	2.497684	6.670000	5.457273	0.0000000	0.000000	6.670000	6.670000	-0.9434920	-0.9411600	0.6927330
q9_9	9	6	6.299444	0.907672	6.670000	6.299444	0.0000000	4.446667	6.670000	2.223333	-1.3608276	-0.0833333	0.3705556
q9_10	10	9	4.323148	1.296944	4.446667	4.323148	1.6481570	2.223333	6.670000	4.446667	0.1848612	-0.8789085	0.4323148
q9_11	11	9	3.211482	2.513230	2.223333	3.211482	3.2963140	0.000000	6.670000	6.670000	0.1215625	-1.5896520	0.8377434
q9_12	12	6	3.335000	2.331852	3.335000	3.335000	1.6481570	0.000000	6.670000	6.670000	0.0000000	-1.5709366	0.9519745
q9_13	13	9	4.199630	2.063166	4.446667	4.199630	3.2963140	2.223333	6.670000	4.446667	0.1819657	-1.9406097	0.6877220
q9_14	14	5	5.002500	1.667500	5.002500	5.002500	2.4722355	3.335000	6.670000	3.335000	0.0000000	-2.2000000	0.7457287
q9_15	15	6	5.743611	1.299589	6.114167	5.743611	0.8240785	3.335000	6.670000	3.335000	-0.8808986	-0.9042733	0.5305548
q9_16	16	6	5.928889	1.815344	6.670000	5.928889	0.0000000	2.223333	6.670000	4.446667	-1.3608276	-0.0833333	0.7411111

And p-values

pval = printVariationWithinQuestions(indivQ,3)
knitr::kable(pval,digits=3)

	q9_1	q9_2	q9_3	q9_4	q9_5	q9_6	q9_7	q9_8	q9_9	q9_10	q9_11	q9_12	q9_13	q9_14	q9_15	q9_16
q9_1	1.000	0.000*	1.000	0.000*	0.019*	0.470	0.468	0.501	0.366	0.049*	0.023*	0.058	0.094	0.451	0.972	0.818
q9_2		1.000	0.000*	0.250	0.459	0.027*	0.107	0.005*	0.000*	0.006*	0.590	0.559	0.069	0.035*	0.001*	0.006*
q9_3			1.000	0.000*	0.021*	0.481	0.474	0.516	0.396	0.061	0.025*	0.060	0.104	0.463	0.973	0.824
q9_4				1.000	0.174	0.010*	0.047*	0.002*	0.000*	0.006*	0.245	0.255	0.025*	0.011*	0.000*	0.002*
q9_5					1.000	0.154	0.283	0.096	0.004*	0.277	0.914	1.000	0.410	0.145	0.021*	0.029*
q9_6						1.000	0.893	0.906	0.179	0.477	0.146	0.210	0.463	1.000	0.461	0.416
q9_7							1.000	0.810	0.234	0.680	0.261	0.323	0.637	0.890	0.459	0.413
q9_8								1.000	0.156	0.335	0.095	0.157	0.352	0.902	0.492	0.445
q9_9									1.000	0.004*	0.006*	0.025*	0.020*	0.171	0.413	0.668
q9_10										1.000	0.261	0.376	0.881	0.457	0.063	0.097
q9_11											1.000	0.924	0.376	0.138	0.025*	0.031*
q9_12												1.000	0.479	0.202	0.059	0.059
q9_13													1.000	0.447	0.099	0.113
q9_14														1.000	0.443	0.401
q9_15															1.000	0.843
q9_16																1.000

2.6.3 Question 5 on metric conversions passing on first attempt

st100 = subset(m2,m2$score> 79.99 & m2$attempt == 1)
results = variationWithinQuestionTypes(st100,"q5_")
indivQ = results$indi
aveQ = results$ave

boxplot(indivQ)

knitr::kable(describe(indivQ))

	vars	n	mean	sd	median	trimmed	mad	min	max	range	skew	kurtosis	se
q5_1	1	28	5.708571	1.148959	6.660	5.827500	0.000000	3.33	6.66	3.33	-0.7319624	-0.7270408	0.2171329
q5_2	2	24	5.203125	1.498299	4.995	5.244750	2.468529	3.33	6.66	3.33	-0.2305026	-1.7759732	0.3058390
q5_3	3	27	5.365000	1.686211	6.660	5.574130	0.000000	0.00	6.66	6.66	-1.2862884	1.3693107	0.3245115
q5_4	4	23	5.863696	1.655099	6.660	6.221842	0.000000	0.00	6.66	6.66	-2.1818668	4.3766065	0.3451120

Let’s look into the p-values among question 5 variations.

pval = printVariationWithinQuestions(indivQ,3)
knitr::kable(pval,digits=3)

	q5_1	q5_2	q5_3	q5_4
q5_1	1.000	0.185	0.384	0.706
q5_2		1.000	0.718	0.159
q5_3			1.000	0.298
q5_4				1.000

We see that the difference disappears, therefore question q5_2 and q5_3 are not unfair, rather, they discriminate among high performing and low performing students.

3 Milestone 3

The questions for Milestone 3:

Organic nomenclature: Suffixes
Organic nomenclature: Func Groups
Peptides
ID the interaction that breaks when melting (doubled)
ID the major factor affecting melting
Enthalpy of phase change
Predict final T of mixture based on heat capacity
Gas Trends
Heating curves
Isomerism
NMR: number of peaks and number of sp2, sp3
Transition among orbitals
SI prefixes and mole conversion

3.1 Overall performance in Milestone 3

#this may be confusing, but to avoid typos, Im overriding m1df, theres no m3df
m1df = buildDF_fromMilestone(m3,13)
results = flowAndSettling(m1df)
flow = results$flow

pct1st = flow[,1]/sum(flow[,1])*100
pct2nd = flow[,2]/sum(flow[,2])*100
pct3rd = flow[,3]/sum(flow[,3])*100
stackAndGrouped = data.frame(matrix(ncol = 4,nrow=0))
stackAndGrouped = rbind(stackAndGrouped,c("F",pct1st[1],"F21","First"))
stackAndGrouped = rbind(stackAndGrouped,c("C",pct1st[2],"F21","First"))
stackAndGrouped = rbind(stackAndGrouped,c("A",pct1st[3],"F21","First"))
stackAndGrouped = rbind(stackAndGrouped,c("F",pct2nd[1],"F21","Second"))
stackAndGrouped = rbind(stackAndGrouped,c("C",pct2nd[2],"F21","Second"))
stackAndGrouped = rbind(stackAndGrouped,c("A",pct2nd[3],"F21","Second"))
stackAndGrouped = rbind(stackAndGrouped,c("F",pct3rd[1],"F21","Third"))
stackAndGrouped = rbind(stackAndGrouped,c("C",pct3rd[2],"F21","Third"))
stackAndGrouped = rbind(stackAndGrouped,c("A",pct3rd[3],"F21","Third"))

pct1st = paste(signif( pct1st,digits = 4),"%",sep = "")
pct2nd = paste(signif( pct2nd,digits = 4),"%",sep = "")
pct3rd = paste(signif( pct3rd,digits = 4),"%",sep = "")
flow$`After 1st` = paste( flow$`After 1st`, pct1st,sep = " / ")
flow$`After 2nd` = paste( flow$`After 2nd`, pct2nd,sep = " / ")
flow$`After 3rd` = paste( flow$`After 3rd`, pct3rd,sep = " / ")
scores = c("0","80","100")
flow = cbind(scores,flow)
knitr::kable(flow)

scores	After 1st	After 2nd	After 3rd
0	43 / 22.05%	26 / 13.33%	12 / 6.154%
80	30 / 15.38%	28 / 14.36%	27 / 13.85%
100	122 / 62.56%	141 / 72.31%	156 / 80%

row.names(results$flow) = c("0","80","100")
barplot(as.matrix(results$flow),col=c("red","yellow","green"),main = "Milestone 3: students scores best of 3 attempts",legend = rownames(results$flow))

3.1.1 Analyzing students who settled

Some students will not maximize the number of attempts even if they have not achieved the highest >80 score.

results = results$settle
n1_70 = length( results[results$MaxAttempt == 1 & results$MaxScore <70,]$MaxAttempt)
n1_80 = length( results[results$MaxAttempt == 1 & results$MaxScore >69.99,]$MaxAttempt)
n2_70 = length( results[results$MaxAttempt == 2 & results$MaxScore <70,]$MaxAttempt)
n2_80 = length( results[results$MaxAttempt == 2 & results$MaxScore >69.99,]$MaxAttempt)

# Students settling before 3rd attempt	with Score < 70 (failing)	70< Score < 80
Settled in 1st attempt	4	7
Settled in 2nd attempt	3	7
Total	7	14

3.1.2 Comparing with Fall 2019

m1df19 = buildDF_fromMilestone(m3f19,12)
results19 = flowAndSettling(m1df19)
flow19 = results19$flow
pct1st = flow19[,1]/sum(flow19[,1])*100
pct2nd = flow19[,2]/sum(flow19[,2])*100
pct3rd = flow19[,3]/sum(flow19[,3])*100

stackAndGrouped = rbind(stackAndGrouped,c("F",pct1st[1],"F19","First"))
stackAndGrouped = rbind(stackAndGrouped,c("C",pct1st[2],"F19","First"))
stackAndGrouped = rbind(stackAndGrouped,c("A",pct1st[3],"F19","First"))
stackAndGrouped = rbind(stackAndGrouped,c("F",pct2nd[1],"F19","Second"))
stackAndGrouped = rbind(stackAndGrouped,c("C",pct2nd[2],"F19","Second"))
stackAndGrouped = rbind(stackAndGrouped,c("A",pct2nd[3],"F19","Second"))
stackAndGrouped = rbind(stackAndGrouped,c("F",pct3rd[1],"F19","Third"))
stackAndGrouped = rbind(stackAndGrouped,c("C",pct3rd[2],"F19","Third"))
stackAndGrouped = rbind(stackAndGrouped,c("A",pct3rd[3],"F19","Third"))
colnames(stackAndGrouped) = c("Grade","Pct","Year","Attempt")
stackAndGrouped$Pct = as.numeric(stackAndGrouped$Pct)

library(ggplot2)
ggplot() + geom_bar(data=stackAndGrouped, aes(y = Pct, x = Year, fill = Grade), stat="identity",
           position='stack') +
  theme_bw() + 
  scale_fill_manual(values = c("green","yellow","red" )) +
  facet_grid( ~ Attempt)

pct1st = paste(signif( pct1st,digits = 4),"%",sep = "")
pct2nd = paste(signif( pct2nd,digits = 4),"%",sep = "")
pct3rd = paste(signif( pct3rd,digits = 4),"%",sep = "")

flow19$`After 1st` = paste( flow19$`After 1st`, pct1st,sep = " / ")
flow19$`After 2nd` = paste( flow19$`After 2nd`, pct2nd,sep = " / ")
flow19$`After 3rd` = paste( flow19$`After 3rd`, pct3rd,sep = " / ")

colnames(flow19) = c("1st att F19","2nd att F19","3rd att F19")
scores = c("0","80","100")
flow19 = cbind(scores,flow19)
mergedFlow =  merge(flow,flow19,by="scores")
mergedFlow$scores = as.numeric(mergedFlow$scores)
knitr::kable(mergedFlow[order(mergedFlow$scores),] )

	scores	After 1st	After 2nd	After 3rd	1st att F19	2nd att F19	3rd att F19
1	0	43 / 22.05%	26 / 13.33%	12 / 6.154%	11 / 6.011%	3 / 1.639%	3 / 1.639%
3	80	30 / 15.38%	28 / 14.36%	27 / 13.85%	21 / 11.48%	14 / 7.65%	9 / 4.918%
2	100	122 / 62.56%	141 / 72.31%	156 / 80%	151 / 82.51%	166 / 90.71%	171 / 93.44%

Students settling in F19

results = results19$settle
n1_70_19 = length( results[results$MaxAttempt == 1 & results$MaxScore <70,]$MaxAttempt)
n1_80_19 = length( results[results$MaxAttempt == 1 & results$MaxScore >69.99,]$MaxAttempt)
n2_70_19 = length( results[results$MaxAttempt == 2 & results$MaxScore <70,]$MaxAttempt)
n2_80_19 = length( results[results$MaxAttempt == 2 & results$MaxScore >69.99,]$MaxAttempt)

# Students settling before 3rd attempt (F21 / F19)	Score < 70 (F21)	Score < 70 (F19)	70< Score < 80 (F21)	70< Score < 80 (F19)
Settled in 1st attempt	4	2	7	3
Settled in 2nd attempt	3	0	7	2
Total	7	2	14	5

3.2 M3 - Attempt 1 averages

# First attempt stats
results = buildStats(m1df,13,1)
m1dfat1 = results$ave
m1dfat1sd = results$sd

Attempt	Total students	Sudents with score > 80	Sudents with score 70-80	Students with score < 70%
One	195	122 (63%)	30 (15%)	43 (22%)

Table of averages per question and their standard deviation

library(knitr)
knitr::kable(m1dfat1,digits=2)

	q1	q2	q3	q4	q5	q6	q7	q8	q9	q10	q11	q12	q13
Avg > 80	7.46	7.50	7.01	6.60	6.73	6.73	6.54	7.47	6.55	6.96	7.03	7.48	6.54
Avg 70><80	6.26	7.19	5.19	5.99	4.79	5.44	4.88	6.78	5.29	5.32	6.49	6.84	3.86
Avg < 70	4.05	4.38	4.24	4.51	2.75	4.33	4.54	5.44	4.48	4.51	5.71	5.79	3.09

knitr::kable(m1dfat1sd,digits=2)

	q1	q2	q3	q4	q5	q6	q7	q8	q9	q10	q11	q12	q13
SD > 80	0.69	0.43	1.47	1.51	1.52	1.42	1.55	0.55	1.53	1.42	0.76	0.53	1.58
SD 70><80	1.71	1.19	2.73	1.36	2.31	1.44	2.06	1.30	1.93	2.67	0.99	1.18	2.31
SD < 70	2.30	2.42	2.52	1.89	2.40	2.05	2.23	1.86	1.77	2.39	1.57	2.19	2.19

matplot(t(m1dfat1),type="b",ylim=c(0,9),xlab = "Question number",ylab = "Average score/6.33",main="M3: Average score per question on 1st attempt")   
legend("bottomright",legend = c("1:>80","2:70-80","3:<70"))

3.3 M3 - Attempt 2 averages

results = buildStats(m1df,13,2)
m1dfat1 = results$ave
m1dfat1sd = results$sd

Attempt	Total students	Sudents with score > 80	Sudents with score 70-80	Students with score < 70%
Two	60	19 (32%)	17 (28%)	24 (40%)

Table of averages per question and their standard deviation

library(knitr)
knitr::kable(m1dfat1,digits=2)

	q1	q2	q3	q4	q5	q6	q7	q8	q9	q10	q11	q12	q13
Avg > 80	7.05	7.36	6.20	5.74	6.08	5.75	6.62	7.16	6.57	6.93	6.76	7.30	5.70
Avg 70><80	7.07	7.15	4.47	5.89	4.83	5.34	5.13	6.97	4.80	5.36	6.39	6.48	4.58
Avg < 70	4.81	6.02	4.75	5.40	3.32	4.75	4.38	6.46	4.24	4.49	5.48	6.02	2.22

knitr::kable(m1dfat1sd,digits=2)

	q1	q2	q3	q4	q5	q6	q7	q8	q9	q10	q11	q12	q13
SD > 80	0.93	0.57	2.27	1.56	1.95	2.11	1.56	0.76	1.13	1.15	0.94	0.71	2.00
SD 70><80	1.10	1.17	2.76	1.93	2.25	2.19	1.81	0.94	2.24	2.56	1.17	1.65	2.33
SD < 70	2.42	1.71	2.57	1.77	2.33	2.13	1.93	1.52	1.99	3.01	1.23	2.15	1.91

matplot(t(m1dfat1),type="b",ylim=c(0,9),xlab = "Question number",ylab = "Average score/6.33",main="M3: Average score per question on 2md attempt")   
legend("bottomright",legend = c("1:>80","2:70-80","3:<70"))

3.4 M3 - Attempt 3 averages

results = buildStats(m1df,13,3)
m1dfat1 = results$ave
m1dfat1sd = results$sd

Attempt	Total students	Sudents with score > 80	Sudents with score 70-80	Students with score < 70%
Three	33	15 (45%)	13 (39%)	5 (15%)

Table of averages per question and their standard deviation

library(knitr)
knitr::kable(m1dfat1,digits=2)

	q1	q2	q3	q4	q5	q6	q7	q8	q9	q10	q11	q12	q13
Avg > 80	7.19	7.40	5.83	6.33	7.19	6.37	5.65	7.30	6.47	7.43	6.33	6.84	5.45
Avg 70><80	6.04	7.02	5.85	5.23	3.95	6.28	6.12	6.72	5.33	6.53	5.32	6.28	3.65
Avg < 70	4.93	5.17	4.56	4.11	1.54	4.93	5.13	4.94	4.00	3.04	5.62	5.32	3.80

knitr::kable(m1dfat1sd,digits=2)

	q1	q2	q3	q4	q5	q6	q7	q8	q9	q10	q11	q12	q13
SD > 80	1.26	0.53	1.96	1.64	1.06	1.73	1.74	0.85	1.04	0.65	0.80	1.20	2.47
SD 70><80	1.93	0.99	2.51	1.61	1.69	1.17	1.97	1.06	1.62	1.92	1.64	1.62	2.39
SD < 70	2.01	3.15	3.18	1.07	2.30	1.29	2.57	2.37	1.76	1.92	1.38	1.59	3.00

matplot(t(m1dfat1),type="b",ylim=c(0,9),xlab = "Question number",ylab = "Average score/6.33",main="M3: Average score per question on 3rd attempt")   
legend("bottomright",legend = c("1:>80","2:70-80","3:<70"))

3.5 Variation within question groups

3.5.1 Adding one new question on Question 4 on “What Breaks”

Question 4 contained one new question that no one had seen before and that it included the “tricky” sodium carboxylate. We can look into it and see whether or not scores are significantly different among questions (the second question q4_2 was the one everyone took).

This single question was kept there, so the fect that some students did not get it right the second and third attempt also tell us how much they review their previous attempts.

results = variationWithinQuestionTypes(m3,"q4_")
indivQ = results$indi
aveQ = results$ave

boxplot(indivQ)

knitr::kable(describe(indivQ))

	vars	n	mean	sd	median	trimmed	mad	min	max	range	skew	kurtosis	se
q4_1	1	69	6.305072	1.729340	6.416667	6.551754	1.90267	1.283333	7.7	6.416667	-1.0527392	-0.0086944	0.2081880
q4_2	2	288	5.084317	1.691476	5.133333	5.094612	1.90267	1.283333	7.7	6.416667	-0.0119117	-0.9826020	0.0996712
q4_3	3	61	5.995902	1.810688	6.416667	6.207143	1.90267	0.000000	7.7	7.700000	-1.0293704	0.6111612	0.2318348
q4_4	4	61	6.837432	1.439038	7.700000	7.123810	0.00000	2.566667	7.7	5.133333	-1.5621702	1.5296159	0.1842499
q4_5	5	52	6.416667	1.481866	6.416667	6.661111	1.90267	1.283333	7.7	6.416667	-1.2740566	1.5000000	0.2054978

Let’s look into the p-values among question 4 variations.

pval = printVariationWithinQuestions(indivQ,3)
knitr::kable(pval,digits=3)

	q4_1	q4_2	q4_3	q4_4	q4_5
q4_1	1.000	0.000*	0.323	0.058	0.704
q4_2		1.000	0.001*	0.000*	0.000*
q4_3			1.000	0.005*	0.177
q4_4				1.000	0.130
q4_5					1.000

We can clearly see that q4_2 is significantly lower.

3.5.2 Question 5 on deciding what factor decides the outcome

Question 5 is the lowest for the low performers.

results = variationWithinQuestionTypes(m3,"q5_")
indivQ = results$indi
aveQ = results$ave

boxplot(indivQ)

knitr::kable(describe(indivQ))

	vars	n	mean	sd	median	trimmed	mad	min	max	range	skew	kurtosis	se
q5_1	1	45	5.475556	2.768714	7.700000	5.827027	0.00000	0.000000	7.7	7.700000	-0.8955661	-0.6120356	0.4127355
q5_2	2	60	6.031667	2.354524	7.700000	6.523611	0.00000	0.000000	7.7	7.700000	-1.3753759	0.9725644	0.3039677
q5_3	3	50	5.800667	1.995556	5.133333	6.031667	3.80534	0.000000	7.7	7.700000	-0.7226793	-0.2403807	0.2822143
q5_4	4	52	5.034615	2.639196	5.133333	5.316667	3.80534	0.000000	7.7	7.700000	-0.4559226	-1.1153820	0.3659907
q5_6	5	45	5.561111	1.641646	5.133333	5.618919	1.90267	2.566667	7.7	5.133333	-0.0495421	-1.2184728	0.2447221
q5_5	6	42	3.850000	2.791569	5.133333	3.850000	3.80534	0.000000	7.7	7.700000	-0.0555182	-1.3388992	0.4307485
q5_7	7	39	5.528205	2.602782	7.700000	5.833333	0.00000	0.000000	7.7	7.700000	-0.7420168	-0.8420089	0.4167787

Let’s look into the p-values among question 5 variations.

pval = printVariationWithinQuestions(indivQ,3)
knitr::kable(pval,digits=3)

	q5_1	q5_2	q5_3	q5_4	q5_6	q5_5	q5_7
q5_1	1.000	0.281	0.517	0.426	0.859	0.008*	0.929
q5_2		1.000	0.579	0.039*	0.231	0.000*	0.332
q5_3			1.000	0.101	0.523	0.000*	0.590
q5_4				1.000	0.235	0.039*	0.376
q5_6					1.000	0.001*	0.946
q5_5						1.000	0.006*
q5_7							1.000

3.6 Variations within question groups among different groups of students

Will the big variation that we see in question 4 be different if we look at the students who passed the milestone in their first attempt

3.6.1 Question 4 on “what breaks” passing on 1st attempt

Let’s look at question 4 performance among students who passed the milestone on their 1st attempt. We have to remove questions because they were not attempted or have too small sample This will also not allow us to have p values.

st100 = subset(m3,m3$score> 79.99 & m3$attempt == 1)
results = variationWithinQuestionTypes(st100,"q4_")
indivQ = results$indi
#remove q9_17 q9_1 as it was not attempted
#indivQ$q9_17 = NULL
#indivQ$q9_1 = NULL
aveQ = results$ave

boxplot(indivQ)

knitr::kable(describe(indivQ))

	vars	n	mean	sd	median	trimmed	mad	min	max	range	skew	kurtosis	se
q4_1	1	25	7.238000	0.8183871	7.700000	7.394444	0.00000	5.133333	7.7	2.566667	-1.4333383	0.7753774	0.1636774
q4_2	2	122	5.775000	1.7402506	6.416667	5.932143	1.90267	1.283333	7.7	6.416667	-0.4930767	-0.9452706	0.1575549
q4_3	3	26	6.959615	1.3174316	7.700000	7.233333	0.00000	2.566667	7.7	5.133333	-1.9343090	3.1626093	0.2583696
q4_4	4	24	7.432639	0.7548951	7.700000	7.635833	0.00000	5.133333	7.7	2.566667	-2.4174721	4.3227775	0.1540923
q4_5	5	21	7.088889	0.7720343	7.700000	7.171569	0.00000	5.133333	7.7	2.566667	-0.7419630	-0.5964347	0.1684717

#pval = printVariationWithinQuestions(indivQ,3)
#knitr::kable(pval,digits=3)

3.6.2 Question 4 on “What breaks” 2nd and 3rd attempt

This is proof that students who don’t do well in Milestones do not revise their own attempt because q4_2 keeps showing up and they keep answering it wrong.

st100 = subset(m3, m3$attempt > 1 )
results = variationWithinQuestionTypes(st100,"q4_")
indivQ = results$indi
#remove q9_17 q9_1 as it was not attempted
#indivQ$q9_17 = NULL
#indivQ$q9_18 = NULL
#indivQ$q9_19 = NULL
aveQ = results$ave

boxplot(indivQ)

knitr::kable(describe(indivQ))

	vars	n	mean	sd	median	trimmed	mad	min	max	range	skew	kurtosis	se
q4_1	1	27	6.274074	1.682072	6.416667	6.472464	1.90267	2.566667	7.7	5.133333	-0.8894264	-0.5195975	0.3237149
q4_2	2	93	4.553763	1.484072	3.850000	4.517333	1.90267	1.283333	7.7	6.416667	0.2173147	-0.4985959	0.1538911
q4_3	3	18	5.846296	1.474584	6.416667	5.855208	1.90267	3.850000	7.7	3.850000	-0.2378145	-1.4971689	0.3475627
q4_4	4	21	6.905556	1.374796	7.700000	7.171569	0.00000	2.566667	7.7	5.133333	-1.6807250	2.1954419	0.3000051
q4_5	5	18	6.202778	1.476408	6.416667	6.336458	1.90267	2.566667	7.7	5.133333	-0.5716151	-0.4377183	0.3479926

And p-values

pval = printVariationWithinQuestions(indivQ,3)
knitr::kable(pval,digits=3)

	q4_1	q4_2	q4_3	q4_4	q4_5
q4_1	1.000	0.000*	0.373	0.159	0.882
q4_2		1.000	0.002*	0.000*	0.000*
q4_3			1.000	0.027*	0.474
q4_4				1.000	0.135
q4_5					1.000

3.6.3 Question 5 on “conflicting factors” passing on first attempt

st100 = subset(m3,m3$score> 79.99 & m3$attempt == 1)
results = variationWithinQuestionTypes(st100,"q5_")
indivQ = results$indi
aveQ = results$ave

boxplot(indivQ)

knitr::kable(describe(indivQ))

	vars	n	mean	sd	median	trimmed	mad	min	max	range	skew	kurtosis	se
q5_1	1	16	6.897917	1.545338	7.700000	7.150000	0.00000	2.566667	7.7	5.133333	-1.5503911	1.2301065	0.3863345
q5_2	2	27	7.224691	1.016008	7.700000	7.365217	0.00000	5.133333	7.7	2.566667	-1.5316762	0.3635615	0.1955309
q5_3	3	25	6.776000	1.459468	7.700000	6.966667	0.00000	2.566667	7.7	5.133333	-1.1863254	0.3263893	0.2918937
q5_4	4	21	6.722222	1.717211	7.700000	7.096078	0.00000	2.566667	7.7	5.133333	-1.3686912	0.4808129	0.3747262
q5_6	5	26	5.873718	1.709218	6.416667	6.008333	1.90267	2.566667	7.7	5.133333	-0.4028562	-1.1583659	0.3352052
q5_5	6	13	6.317949	1.694578	7.700000	6.533333	0.00000	2.566667	7.7	5.133333	-0.6737567	-0.8226490	0.4699914
q5_7	7	20	7.058333	1.411974	7.700000	7.379167	0.00000	2.566667	7.7	5.133333	-1.9146034	2.6726134	0.3157269

Let’s look into the p-values among question 5 variations.

pval = printVariationWithinQuestions(indivQ,3)
knitr::kable(pval,digits=3)

	q5_1	q5_2	q5_3	q5_4	q5_6	q5_5	q5_7
q5_1	1.000	0.458	0.803	0.746	0.053	0.350	0.750
q5_2		1.000	0.208	0.244	0.001*	0.094	0.657
q5_3			1.000	0.910	0.048*	0.417	0.515
q5_4				1.000	0.099	0.507	0.497
q5_6					1.000	0.449	0.014*
q5_5						1.000	0.204
q5_7							1.000

4 Milestone 4

Questions

Heat Curves
Ions in water
Solubility ranking
Solution preparation
Vapor pressure
Major factor in melting point
What breaks when melting
NMR signals
Isomerism
Transition in a Bohr model of atom
Electrostatics
Isotopes

4.1 Overall performance in Milestone 4

#this may be confusing, but to avoid typos, Im overriding m1df, theres no m4df
m1df = buildDF_fromMilestone(m4,12)
results = flowAndSettling(m1df)
flow = results$flow

pct1st = flow[,1]/sum(flow[,1])*100
pct2nd = flow[,2]/sum(flow[,2])*100
pct3rd = flow[,3]/sum(flow[,3])*100
stackAndGrouped = data.frame(matrix(ncol = 4,nrow=0))
stackAndGrouped = rbind(stackAndGrouped,c("F",pct1st[1],"F21","First"))
stackAndGrouped = rbind(stackAndGrouped,c("C",pct1st[2],"F21","First"))
stackAndGrouped = rbind(stackAndGrouped,c("A",pct1st[3],"F21","First"))
stackAndGrouped = rbind(stackAndGrouped,c("F",pct2nd[1],"F21","Second"))
stackAndGrouped = rbind(stackAndGrouped,c("C",pct2nd[2],"F21","Second"))
stackAndGrouped = rbind(stackAndGrouped,c("A",pct2nd[3],"F21","Second"))
stackAndGrouped = rbind(stackAndGrouped,c("F",pct3rd[1],"F21","Third"))
stackAndGrouped = rbind(stackAndGrouped,c("C",pct3rd[2],"F21","Third"))
stackAndGrouped = rbind(stackAndGrouped,c("A",pct3rd[3],"F21","Third"))

pct1st = paste(signif( pct1st,digits = 4),"%",sep = "")
pct2nd = paste(signif( pct2nd,digits = 4),"%",sep = "")
pct3rd = paste(signif( pct3rd,digits = 4),"%",sep = "")
flow$`After 1st` = paste( flow$`After 1st`, pct1st,sep = " / ")
flow$`After 2nd` = paste( flow$`After 2nd`, pct2nd,sep = " / ")
flow$`After 3rd` = paste( flow$`After 3rd`, pct3rd,sep = " / ")
scores = c("0","80","100")
flow = cbind(scores,flow)
knitr::kable(flow)

scores	After 1st	After 2nd	After 3rd
0	33 / 17.65%	19 / 10.16%	12 / 6.417%
80	38 / 20.32%	29 / 15.51%	26 / 13.9%
100	116 / 62.03%	139 / 74.33%	149 / 79.68%

row.names(results$flow) = c("0","80","100")
barplot(as.matrix(results$flow),col=c("red","yellow","green"),main = "Milestone 4: students scores best of 3 attempts",legend = rownames(results$flow))

4.1.1 Analyzing students who settled

Some students will not maximize the number of attempts even if they have not achieved the highest >80 score.

results = results$settle
n1_70 = length( results[results$MaxAttempt == 1 & results$MaxScore <70,]$MaxAttempt)
n1_80 = length( results[results$MaxAttempt == 1 & results$MaxScore >69.99,]$MaxAttempt)
n2_70 = length( results[results$MaxAttempt == 2 & results$MaxScore <70,]$MaxAttempt)
n2_80 = length( results[results$MaxAttempt == 2 & results$MaxScore >69.99,]$MaxAttempt)

# Students settling before 3rd attempt	with Score < 70 (failing)	70< Score < 80
Settled in 1st attempt	3	11
Settled in 2nd attempt	4	7
Total	7	18

4.1.2 Comparing with Fall 2019

m1df19 = buildDF_fromMilestone(m4f19,12)
results19 = flowAndSettling(m1df19)
flow19 = results19$flow
pct1st = flow19[,1]/sum(flow19[,1])*100
pct2nd = flow19[,2]/sum(flow19[,2])*100
pct3rd = flow19[,3]/sum(flow19[,3])*100

stackAndGrouped = rbind(stackAndGrouped,c("F",pct1st[1],"F19","First"))
stackAndGrouped = rbind(stackAndGrouped,c("C",pct1st[2],"F19","First"))
stackAndGrouped = rbind(stackAndGrouped,c("A",pct1st[3],"F19","First"))
stackAndGrouped = rbind(stackAndGrouped,c("F",pct2nd[1],"F19","Second"))
stackAndGrouped = rbind(stackAndGrouped,c("C",pct2nd[2],"F19","Second"))
stackAndGrouped = rbind(stackAndGrouped,c("A",pct2nd[3],"F19","Second"))
stackAndGrouped = rbind(stackAndGrouped,c("F",pct3rd[1],"F19","Third"))
stackAndGrouped = rbind(stackAndGrouped,c("C",pct3rd[2],"F19","Third"))
stackAndGrouped = rbind(stackAndGrouped,c("A",pct3rd[3],"F19","Third"))
colnames(stackAndGrouped) = c("Grade","Pct","Year","Attempt")
stackAndGrouped$Pct = as.numeric(stackAndGrouped$Pct)

library(ggplot2)
ggplot() + geom_bar(data=stackAndGrouped, aes(y = Pct, x = Year, fill = Grade), stat="identity",
           position='stack') +
  theme_bw() + 
  scale_fill_manual(values = c("green","yellow","red" )) +
  facet_grid( ~ Attempt)

pct1st = paste(signif( pct1st,digits = 4),"%",sep = "")
pct2nd = paste(signif( pct2nd,digits = 4),"%",sep = "")
pct3rd = paste(signif( pct3rd,digits = 4),"%",sep = "")

flow19$`After 1st` = paste( flow19$`After 1st`, pct1st,sep = " / ")
flow19$`After 2nd` = paste( flow19$`After 2nd`, pct2nd,sep = " / ")
flow19$`After 3rd` = paste( flow19$`After 3rd`, pct3rd,sep = " / ")

colnames(flow19) = c("1st att F19","2nd att F19","3rd att F19")
scores = c("0","80","100")
flow19 = cbind(scores,flow19)
mergedFlow =  merge(flow,flow19,by="scores")
mergedFlow$scores = as.numeric(mergedFlow$scores)
knitr::kable(mergedFlow[order(mergedFlow$scores),] )

	scores	After 1st	After 2nd	After 3rd	1st att F19	2nd att F19	3rd att F19
1	0	33 / 17.65%	19 / 10.16%	12 / 6.417%	44 / 24.86%	22 / 12.43%	14 / 7.91%
3	80	38 / 20.32%	29 / 15.51%	26 / 13.9%	37 / 20.9%	30 / 16.95%	22 / 12.43%
2	100	116 / 62.03%	139 / 74.33%	149 / 79.68%	96 / 54.24%	125 / 70.62%	141 / 79.66%

Students settling in F19

results = results19$settle
n1_70_19 = length( results[results$MaxAttempt == 1 & results$MaxScore <70,]$MaxAttempt)
n1_80_19 = length( results[results$MaxAttempt == 1 & results$MaxScore >69.99,]$MaxAttempt)
n2_70_19 = length( results[results$MaxAttempt == 2 & results$MaxScore <70,]$MaxAttempt)
n2_80_19 = length( results[results$MaxAttempt == 2 & results$MaxScore >69.99,]$MaxAttempt)

# Students settling before 3rd attempt (F21 / F19)	Score < 70 (F21)	Score < 70 (F19)	70< Score < 80 (F21)	70< Score < 80 (F19)
Settled in 1st attempt	3	1	11	4
Settled in 2nd attempt	4	4	7	5
Total	7	5	18	9

4.1.3 Dimensional analysis questions in M4-F19

In Fall2019 there were two dimensional analysis questions, q4 and q6

# First attempt stats
results = buildStats(m1df19,12,1)
m1dfat1 = results$ave
m1dfat1sd = results$sd

Attempt	Total students	Sudents with score > 80	Sudents with score 70-80	Students with score < 70%
One	177	96 (54%)	37 (21%)	44 (25%)

Table of averages per question and their standard deviation

library(knitr)
knitr::kable(m1dfat1,digits=2)

	q1	q2	q3	q4	q5	q6	q7	q8	q9	q10	q11	q12
Avg > 80	7.82	7.41	7.23	6.08	7.29	7.81	7.32	7.75	7.94	8.00	8.05	7.35
Avg 70><80	6.76	5.71	5.90	2.70	6.12	5.63	6.68	7.43	7.54	7.73	7.74	6.05
Avg < 70	6.07	3.29	3.92	0.76	4.86	1.70	5.55	6.97	6.91	6.99	6.06	5.46

knitr::kable(m1dfat1sd,digits=2)

	q1	q2	q3	q4	q5	q6	q7	q8	q9	q10	q11	q12
SD > 80	0.93	1.49	2.27	3.73	1.57	2.03	1.85	0.77	0.87	1.01	0.62	1.40
SD 70><80	1.51	2.07	3.01	3.96	1.75	3.95	2.22	0.86	1.14	1.19	1.26	1.97
SD < 70	1.72	2.89	2.86	2.43	2.11	3.40	2.47	1.12	1.78	1.75	2.30	1.94

matplot(t(m1dfat1),type="b",ylim=c(0,9),xlab = "Question number",ylab = "Average score/6.33",main="M4-F19: Average score per question on 1st attempt")   
legend("bottomright",legend = c("1:>80","2:70-80","3:<70"))

4.2 M4 - Attempt 1 averages

# First attempt stats
results = buildStats(m1df,12,1)
m1dfat1 = results$ave
m1dfat1sd = results$sd

Attempt	Total students	Sudents with score > 80	Sudents with score 70-80	Students with score < 70%
One	187	116 (62%)	39 (21%)	32 (17%)

Table of averages per question and their standard deviation

library(knitr)
knitr::kable(m1dfat1,digits=2)

	q1	q2	q3	q4	q5	q6	q7	q8	q9	q10	q11	q12
Avg > 80	7.54	7.33	7.44	5.46	7.74	7.88	7.79	7.88	7.65	7.76	7.96	8.21
Avg 70><80	7.16	6.42	5.31	1.50	6.76	6.34	6.94	7.33	7.08	5.55	7.13	7.90
Avg < 70	6.21	2.08	3.52	1.04	5.12	4.25	6.72	6.35	5.16	4.95	6.66	7.25

knitr::kable(m1dfat1sd,digits=2)

	q1	q2	q3	q4	q5	q6	q7	q8	q9	q10	q11	q12
SD > 80	1.23	1.66	1.98	3.98	1.17	1.15	0.96	0.70	1.81	1.89	0.93	0.43
SD 70><80	1.48	2.79	2.99	3.24	1.89	2.54	1.39	1.10	1.90	3.31	1.48	1.01
SD < 70	1.87	2.64	2.95	2.80	2.35	3.15	1.58	1.58	3.08	3.51	1.64	1.99

matplot(t(m1dfat1),type="b",ylim=c(0,9),xlab = "Question number",ylab = "Average score/6.33",main="M4: Average score per question on 1st attempt")   
legend("bottomright",legend = c("1:>80","2:70-80","3:<70"))

4.3 M4 - Attempt 2 averages

results = buildStats(m1df,12,2)
m1dfat1 = results$ave
m1dfat1sd = results$sd

Attempt	Total students	Sudents with score > 80	Sudents with score 70-80	Students with score < 70%
Two	57	23 (40%)	15 (26%)	19 (33%)

Table of averages per question and their standard deviation

library(knitr)
knitr::kable(m1dfat1,digits=2)

	q1	q2	q3	q4	q5	q6	q7	q8	q9	q10	q11	q12
Avg > 80	7.55	7.13	7.92	4.35	7.48	7.24	7.06	7.68	7.61	7.00	7.97	8.09
Avg 70><80	6.67	5.00	5.65	0.00	7.59	7.22	7.31	6.72	7.40	6.85	7.22	8.24
Avg < 70	6.22	2.63	3.07	0.00	5.33	2.92	5.85	6.44	6.36	4.97	6.84	7.67

knitr::kable(m1dfat1sd,digits=2)

	q1	q2	q3	q4	q5	q6	q7	q8	q9	q10	q11	q12
SD > 80	1.01	2.02	1.29	4.26	1.66	1.62	1.44	0.83	1.43	2.76	0.86	0.68
SD 70><80	1.41	2.62	3.08	0.00	1.16	1.41	1.33	1.53	1.35	2.54	2.24	0.36
SD < 70	1.87	2.85	3.02	0.00	2.23	2.35	2.29	1.14	2.86	3.41	1.66	1.69

matplot(t(m1dfat1),type="b",ylim=c(0,9),xlab = "Question number",ylab = "Average score/6.33",main="M4: Average score per question on 2md attempt")   
legend("bottomright",legend = c("1:>80","2:70-80","3:<70"))

4.4 M4 - Attempt 3 averages

results = buildStats(m1df,12,3)
m1dfat1 = results$ave
m1dfat1sd = results$sd

Attempt	Total students	Sudents with score > 80	Sudents with score 70-80	Students with score < 70%
Three	23	10 (43%)	7 (30%)	6 (26%)

Table of averages per question and their standard deviation

library(knitr)
knitr::kable(m1dfat1,digits=2)

	q1	q2	q3	q4	q5	q6	q7	q8	q9	q10	q11	q12
Avg > 80	7.92	7.78	8.34	2.50	7.91	6.94	7.64	6.91	7.77	7.77	8.33	8.33
Avg 70><80	7.35	5.96	5.96	2.38	7.54	5.55	7.34	6.90	5.55	7.14	7.38	7.34
Avg < 70	5.56	2.78	5.56	0.00	4.63	2.78	6.48	6.94	6.02	5.55	6.66	6.48

knitr::kable(m1dfat1sd,digits=2)

	q1	q2	q3	q4	q5	q6	q7	q8	q9	q10	q11	q12
SD > 80	0.94	1.17	0.00	4.03	0.94	2.36	1.35	1.84	1.17	1.76	0.00	0.00
SD 70><80	1.05	3.38	2.97	4.07	1.35	2.27	1.05	1.42	3.21	3.15	1.31	2.08
SD < 70	1.52	3.52	3.05	0.00	2.27	3.04	0.72	1.25	3.25	3.51	2.79	1.90

matplot(t(m1dfat1),type="b",ylim=c(0,9),xlab = "Question number",ylab = "Average score/6.33",main="M4: Average score per question on 3rd attempt")   
legend("bottomright",legend = c("1:>80","2:70-80","3:<70"))

4.5 Variation within question groups

4.5.1 Question 3 on “solubility ranking”

While this one is a straightforward question, a single mistake will trigger the rest of the ranking wrong. This is why low performers perform significantly lower on this one.

results = variationWithinQuestionTypes(m4,"q3_")
indivQ = results$indi
aveQ = results$ave

boxplot(indivQ)

knitr::kable(describe(indivQ))

	vars	n	mean	sd	median	trimmed	mad	min	max	range	skew	kurtosis	se
q3_1	1	49	6.893265	2.373185	8.34	7.221220	0.000000	0.00	8.34	8.34	-1.2419413	0.0674694	0.3390264
q3_2	2	47	5.412128	3.294878	6.95	5.666923	2.060814	0.00	8.34	8.34	-0.4703762	-1.4660768	0.4806074
q3_3	3	40	6.950000	2.376484	8.34	7.384375	0.000000	1.39	8.34	6.95	-1.2005794	-0.2789127	0.3757551
q3_4	4	38	6.950000	2.524026	8.34	7.384375	0.000000	0.00	8.34	8.34	-1.4767892	0.6065043	0.4094511
q3_5	5	48	6.283958	2.965867	8.34	6.672000	0.000000	0.00	8.34	8.34	-0.8838455	-0.9116062	0.4280860
q3_6	6	45	5.127556	3.203068	4.17	5.334595	6.182442	0.00	8.34	8.34	-0.1608619	-1.6541892	0.4774852

Let’s look into the p-values among question 3 variations.

pval = printVariationWithinQuestions(indivQ,3)
knitr::kable(pval,digits=3)

	q3_1	q3_2	q3_3	q3_4	q3_5	q3_6
q3_1	1.000	0.014*	0.911	0.915	0.267	0.003*
q3_2		1.000	0.014*	0.017*	0.179	0.675
q3_3			1.000	1.000	0.246	0.004*
q3_4				1.000	0.264	0.005*
q3_5					1.000	0.075
q3_6						1.000

Conclusion: Question q3_2(column AI) and q3_6(column ES) are significantly lower than the rest. Worth looking into it.

q3_2: 7661924: Consider the following solutes to be dissolved in hexane. Being 1 the most soluble and 6 the least soluble, rank them accordingly: Pentanone=>2,Water=>6,Ethanoic acid=>5,Dipropyl ether=>1,Butanoic acid=>4,Butanone=>3

4.5.2 Question 2 on ions in water

Question 2 requires not to make mistake but also nomenclature of ions

results = variationWithinQuestionTypes(m4,"q2_")
indivQ = results$indi
aveQ = results$ave

boxplot(indivQ)

knitr::kable(describe(indivQ))

	vars	n	mean	sd	median	trimmed	mad	max	range	skew	kurtosis	se
q2_1	1	42	6.950000	2.234991	8.34	7.440588	0.000000	8.34	8.34	-1.6495299	2.1650520	0.3448666
q2_2	2	61	5.742295	3.039785	5.56	6.127347	4.121628	8.34	8.34	-0.8793804	-0.6084872	0.3892046
q2_3	3	65	5.987692	3.038619	8.34	6.399245	0.000000	8.34	8.34	-0.9369246	-0.5851230	0.3768944
q2_4	4	55	5.560000	3.210068	5.56	5.868889	4.121628	8.34	8.34	-0.6377096	-1.1590909	0.4328454
q2_5	5	44	5.812727	3.046410	5.56	6.177778	4.121628	8.34	8.34	-0.9000883	-0.5982777	0.4592636

Let’s look into the p-values among question 2 variations.

pval = printVariationWithinQuestions(indivQ,3)
knitr::kable(pval,digits=3)

	q2_1	q2_2	q2_3	q2_4	q2_5
q2_1	1.000	0.022*	0.062	0.014*	0.051
q2_2		1.000	0.651	0.755	0.907
q2_3			1.000	0.458	0.769
q2_4				1.000	0.690
q2_5					1.000

4.6 Variations within question groups among different groups of students

Will the big variation that we see in question 4 be different if we look at the students who passed the milestone in their first attempt

4.6.1 Question 2 on “ions in solution” passing on 1st attempt

Let’s look at question 3 performance among students who passed the milestone on their 1st attempt.

st100 = subset(m4,m4$score> 79.99 & m4$attempt == 1)
results = variationWithinQuestionTypes(st100,"q2_")
indivQ = results$indi
#remove q9_17 q9_1 as it was not attempted
#indivQ$q9_17 = NULL
#indivQ$q9_1 = NULL
aveQ = results$ave

boxplot(indivQ)

knitr::kable(describe(indivQ))

	vars	n	mean	sd	median	trimmed	min	max	range	skew	kurtosis	se
q2_1	1	19	7.462105	1.619052	8.34	7.685882	2.78	8.34	5.56	-1.4967847	1.145767	0.3714360
q2_2	2	28	7.148571	1.918382	8.34	7.413333	0.00	8.34	8.34	-1.8764854	4.132959	0.3625402
q2_3	3	30	7.413333	1.686010	8.34	7.760833	2.78	8.34	5.56	-1.5274886	1.161198	0.3078219
q2_4	4	23	7.252174	1.620816	8.34	7.462105	2.78	8.34	5.56	-1.0624995	0.013987	0.3379635
q2_5	5	16	7.471250	1.330824	8.34	7.545714	5.56	8.34	2.78	-0.7343901	-1.545810	0.3327059

#pval = printVariationWithinQuestions(indivQ,3)
#knitr::kable(pval,digits=3)

4.6.2 Question 2 on “ions in solution” 2nd and 3rd attempt

These data shows how all students who took question 2 during their 2nd and 3rd attempt. Regardless if they passed or not.

st100 = subset(m4, m4$attempt > 1 )
results = variationWithinQuestionTypes(st100,"q2_")
indivQ = results$indi
aveQ = results$ave

boxplot(indivQ)

knitr::kable(describe(indivQ))

	vars	n	mean	sd	median	trimmed	mad	max	range	skew	kurtosis	se
q2_1	1	14	6.552857	2.340475	6.95	6.950000	2.060814	8.34	8.34	-1.3777725	1.5729913	0.6255182
q2_2	2	16	4.865000	3.729761	5.56	4.964286	4.121628	8.34	8.34	-0.3493856	-1.7594039	0.9324403
q2_3	3	17	5.723529	3.180426	5.56	5.930667	4.121628	8.34	8.34	-0.8131637	-0.8903646	0.7713667
q2_4	4	18	4.478889	3.322193	4.17	4.517500	6.182442	8.34	8.34	-0.0526470	-1.6368358	0.7830483
q2_5	5	15	5.189333	2.947075	5.56	5.346154	4.121628	8.34	8.34	-0.4292674	-1.1933391	0.7609314

And p-values

pval = printVariationWithinQuestions(indivQ,3)
knitr::kable(pval,digits=3)

	q2_1	q2_2	q2_3	q2_4	q2_5
q2_1	1.000	0.145	0.411	0.047*	0.178
q2_2		1.000	0.484	0.753	0.790
q2_3			1.000	0.266	0.626
q2_4				1.000	0.520
q2_5					1.000

4.6.3 Question 3 on “solubility ranking” passing on first attempt

st100 = subset(m4,m4$score> 79.99 & m4$attempt == 1)
results = variationWithinQuestionTypes(st100,"q3_")
indivQ = results$indi
aveQ = results$ave

boxplot(indivQ)

knitr::kable(describe(indivQ))

	vars	n	mean	sd	median	trimmed	min	max	range	skew	kurtosis	se
q3_1	1	19	7.535263	1.9280340	8.34	7.767647	2.78	8.34	5.56	-1.7979077	1.4091320	0.4423213
q3_2	2	15	7.228000	1.9797482	8.34	7.484615	2.78	8.34	5.56	-1.4342811	0.4974912	0.5111688
q3_3	3	20	7.575500	1.9384081	8.34	8.079375	1.39	8.34	6.95	-2.1147600	3.1575153	0.4334412
q3_4	4	19	7.388947	2.2246096	8.34	7.685882	1.39	8.34	6.95	-2.0321101	2.6167305	0.5103604
q3_5	5	22	8.087273	0.8180009	8.34	8.340000	5.56	8.34	2.78	-2.6542231	5.2915289	0.1743984
q3_6	6	21	6.751429	2.5737799	8.34	7.031765	2.78	8.34	5.56	-0.8817334	-1.2766440	0.5616448

Let’s look into the p-values among question 5 variations.

pval = printVariationWithinQuestions(indivQ,3)
knitr::kable(pval,digits=3)

	q3_1	q3_2	q3_3	q3_4	q3_5	q3_6
q3_1	1.000	0.653	0.949	0.830	0.257	0.280
q3_2		1.000	0.608	0.825	0.130	0.535
q3_3			1.000	0.782	0.284	0.253
q3_4				1.000	0.209	0.406
q3_5					1.000	0.032*
q3_6						1.000

4.6.4 Question 3 on “solubility ranking” 2nd and 3rd attempt

These data shows how all students who took question 2 during their 2nd and 3rd attempt. Regardless if they passed or not.

st100 = subset(m4, m4$attempt > 1 )
results = variationWithinQuestionTypes(st100,"q3_")
indivQ = results$indi
aveQ = results$ave

boxplot(indivQ)

knitr::kable(describe(indivQ))

	vars	n	mean	sd	median	trimmed	mad	min	max	range	skew	kurtosis	se
q3_1	1	17	6.868235	2.223489	8.34	7.042667	0.000000	2.78	8.34	5.56	-0.9452118	-0.8377803	0.5392753
q3_2	2	14	4.765714	3.844136	5.56	4.865000	4.121628	0.00	8.34	8.34	-0.1720159	-1.9361203	1.0273885
q3_3	3	11	6.950000	2.407551	8.34	7.258889	0.000000	2.78	8.34	5.56	-0.9447550	-1.1212121	0.7259038
q3_4	4	12	7.760833	1.618655	8.34	8.201000	0.000000	2.78	8.34	5.56	-2.4003740	4.4904335	0.4672655
q3_5	5	13	6.308461	2.698616	8.34	6.444546	0.000000	2.78	8.34	5.56	-0.4635149	-1.8727478	0.7484615
q3_6	6	13	3.742308	3.469650	2.78	3.664546	4.121628	0.00	8.34	8.34	0.3234434	-1.6330306	0.9623077

And p-values

pval = printVariationWithinQuestions(indivQ,3)
knitr::kable(pval,digits=3)

	q3_1	q3_2	q3_3	q3_4	q3_5	q3_6
q3_1	1.000	0.085	0.929	0.222	0.550	0.011*
q3_2		1.000	0.096	0.016*	0.237	0.474
q3_3			1.000	0.361	0.545	0.015*
q3_4				1.000	0.115	0.002*
q3_5					1.000	0.047*
q3_6						1.000

Milestone analysis F21

1 Milestone 1

1.1 Overall performance in Milestone 1

1.1.1 Analyzing students who settled

1.1.2 Comparing with Fall 2019

1.2 M1 - Attempt 1 averages

1.3 M1 - Attempt 2 averages

1.4 M1 - Attempt 3 averages

1.5 Variation within question groups

1.6 Performance in Milestone like questions

2 Milestone 2

2.1 Overall performance in Milestone 2

2.1.1 Analyzing students who settled

2.1.2 Comparing with Fall 2019

2.2 M2 - Attempt 1 averages

2.3 M2 - Attempt 2 averages

2.4 M2 - Attempt 3 averages

2.5 Variation within question groups

2.5.1 Question 9 on isomerism

2.5.2 Question 5 on metric conversions

2.6 Variations within question groups among different groups of students

2.6.1 Question 9 on isomerism passing on 1st attempt

2.6.2 Question 9 on isomerism 2nd and 3rd attempt

2.6.3 Question 5 on metric conversions passing on first attempt

3 Milestone 3

3.1 Overall performance in Milestone 3

3.1.1 Analyzing students who settled

3.1.2 Comparing with Fall 2019

3.2 M3 - Attempt 1 averages

3.3 M3 - Attempt 2 averages

3.4 M3 - Attempt 3 averages

3.5 Variation within question groups

3.5.1 Adding one new question on Question 4 on “What Breaks”

3.5.2 Question 5 on deciding what factor decides the outcome

3.6 Variations within question groups among different groups of students

3.6.1 Question 4 on “what breaks” passing on 1st attempt

3.6.2 Question 4 on “What breaks” 2nd and 3rd attempt

3.6.3 Question 5 on “conflicting factors” passing on first attempt

4 Milestone 4

4.1 Overall performance in Milestone 4

4.1.1 Analyzing students who settled

4.1.2 Comparing with Fall 2019

4.1.3 Dimensional analysis questions in M4-F19

4.2 M4 - Attempt 1 averages

4.3 M4 - Attempt 2 averages

4.4 M4 - Attempt 3 averages

4.5 Variation within question groups

4.5.1 Question 3 on “solubility ranking”

4.5.2 Question 2 on ions in water

4.6 Variations within question groups among different groups of students

4.6.1 Question 2 on “ions in solution” passing on 1st attempt

4.6.2 Question 2 on “ions in solution” 2nd and 3rd attempt

4.6.3 Question 3 on “solubility ranking” passing on first attempt

4.6.4 Question 3 on “solubility ranking” 2nd and 3rd attempt