#lets first load the files
if (Sys.info()["sysname"] == "Windows"){
}else{
  #F23
  m1f23 =   read.csv("~/Teaching/Grades_and_SRT/Fall2023/m1.csv",header = TRUE)
  m2f23 =   read.csv("~/Teaching/Grades_and_SRT/Fall2023/m2.csv",header = TRUE)
  m3f23 =   read.csv("~/Teaching/Grades_and_SRT/Fall2023/m3.csv",header = TRUE)
  m4f23 =   read.csv("~/Teaching/Grades_and_SRT/Fall2023/m4.csv",header = TRUE)
  gradesf23 =     read.csv("~/Teaching/Grades_and_SRT/Fall2023/chem1331_f23_grades_canvas.csv",header = TRUE)
  
  #F22
  m1f22 = read.csv("~/Teaching/Grades_and_SRT/Fall2022/m1.csv",header = TRUE)
  m2f22 = read.csv("~/Teaching/Grades_and_SRT/Fall2022/m2.csv",header = TRUE)
  m3f22 = read.csv("~/Teaching/Grades_and_SRT/Fall2022/m3.csv",header = TRUE)
  m4f22 = read.csv("~/Teaching/Grades_and_SRT/Fall2022/m4.csv",header = TRUE)
  gradesf22 =     read.csv("~/Teaching/Grades_and_SRT/Fall2022/chem1331_f22_grades_canvas.csv",header = TRUE)
  
  #F21
  m1f21 = read.csv("~/Teaching/Grades_and_SRT/Fall2021/milestone1.csv",header = TRUE)
  m2f21 = read.csv("~/Teaching/Grades_and_SRT/Fall2021/milestone2.csv",header = TRUE)
  m3f21 = read.csv("~/Teaching/Grades_and_SRT/Fall2021/milestone3.csv",header = TRUE)
  m4f21 = read.csv("~/Teaching/Grades_and_SRT/Fall2021/milestone4.csv",header = TRUE)
  #ex1 = read.csv("~/Teaching/Grades_and_SRT/Fall2021/Exam_1_scores.csv",header = TRUE)
  #ex2 = read.csv("~/Teaching/Grades_and_SRT/Fall2021/Exam_2_scores.csv",header = TRUE)
  gradesf21 =     read.csv("~/Teaching/Grades_and_SRT/Fall2021/chem1331_f21_finalgrades.csv",header = TRUE)
  gradesf21 =     read.csv("~/Teaching/Grades_and_SRT/Fall2021/chem1331_f21_grades_canvas.csv",header = TRUE)
  #demo21 =       read.csv("~/Teaching/Grades_and_SRT/Fall2021/")
  #use this file to compare among years
  videof21 =      read.csv("~/Teaching/Grades_and_SRT/Fall2021/videowatching_resultsf21.csv",header = TRUE)
  #this video analytics has all the videos of the course
  videof21_all =      read.csv("~/Teaching/Grades_and_SRT/Fall2021/videowatchingAnalytics.csv",header = TRUE)
  #m1practicef21 = read.csv("~/Teaching/Grades_and_SRT/Fall2021/milestone1_practice.csv",header = TRUE)
  #m2practicef21 = read.csv("~/Teaching/Grades_and_SRT/Fall2021/milestone2_practice.csv",header = TRUE)
  #m3practicef21 = read.csv("~/Teaching/Grades_and_SRT/Fall2021/milestone3_practice.csv",header = TRUE)
  #m4practicef21 = read.csv("~/Teaching/Grades_and_SRT/Fall2021/milestone4_practice.csv",header = TRUE)

  #F19
  m1f19 = read.csv("~/Teaching/Grades_and_SRT/Fall2019/CHEM1331/milestone1.csv",header = TRUE)
  m2f19 = read.csv("~/Teaching/Grades_and_SRT/Fall2019/CHEM1331/milestone2.csv",header = TRUE)
  m3f19 = read.csv("~/Teaching/Grades_and_SRT/Fall2019/CHEM1331/milestone3.csv",header = TRUE)
  m4f19 = read.csv("~/Teaching/Grades_and_SRT/Fall2019/CHEM1331/milestone4.csv",header = TRUE)
  gradesf19 =     read.csv("~/Teaching/Grades_and_SRT/Fall2019/CHEM1331/chem1331f19_gradebook.csv",header = TRUE)
  #demo19 =       read.csv("~/Teaching/Grades_and_SRT/Fall2019/CHEM1331/DISCOVER_chem1331_f19.csv")
  videof19 =      read.csv("~/Teaching/Grades_and_SRT/Fall2019/CHEM1331/videowatching_resultsf19.csv",header = TRUE)
  #m1practicef19 = read.csv("~/Teaching/Grades_and_SRT/Fall2019/CHEM1331/milestone1_practice.csv",header = TRUE)
  #m2practicef19 = read.csv("~/Teaching/Grades_and_SRT/Fall2019/CHEM1331/milestone2_practice.csv",header = TRUE)
  #m3practicef19 = read.csv("~/Teaching/Grades_and_SRT/Fall2019/CHEM1331/milestone3_practice.csv",header = TRUE)
  #m4practicef19 = read.csv("~/Teaching/Grades_and_SRT/Fall2019/CHEM1331/milestone4_practice.csv",header = TRUE)

  gradesf18 =     read.csv("~/Teaching/Grades_and_SRT/Fall2018/chem1331_f18.csv",header = TRUE)
  gradesf18mood = read.csv("~/Teaching/Grades_and_SRT/Fall2018/c1_f18_moodle_gradebook.csv",header = TRUE)
  m1f18 =         read.csv("~/Teaching/Grades_and_SRT/Fall2018/milestone1.csv",header = TRUE)
  videof18      = read.csv("~/Teaching/Grades_and_SRT/Fall2018/videowatching_resultsf18.csv",header = TRUE)
}

1 Course Grade Distribution

As you will see below, since we’re using different assignments, the grade distribution is not comparable. The main takeaways are

  • In Fall2018 we weren’t sure if we were pushing students too much. When we thought that our Exam1 was too harsh we went easier. Specially in the Final. The final exam was too easy which explains the overall grade being much higher.
  • The low number of DFW students in F18 can also be explained by the fact that the milestones had no numerical questions. These are well known to bring down the grade. We realized though that it made no sense to “kick the can down the road”, so we brought dimensional analysis back in F19.
  • Probably our Exam1 in F19 was too hard and Exam1 in F21 was too easy. I think the final exam in those last two years is where we want it to be.
  • Exam1 is still an unknown, I think in F21 it was too easy and it made students overconfident
  • In terms of grade distribution, we probably want to be somewhere between F19 and F21

1.1 Letter grade

nf18 = length(gradesf18$Student)
nf19 = length(gradesf19$Student)
nf21 = length(gradesf21$Student)
nf22 = length(gradesf22$Student)
nf23 = length(gradesf23$Student)
#revmove D-
gradesf18$Unposted.Final.Grade = gsub('D-','D', gradesf18$Unposted.Final.Grade)
gradesf19$Unposted.Final.Grade = gsub('D-','D', gradesf19$Unposted.Final.Grade)
gradesf21$Unposted.Final.Grade = gsub('D-','D', gradesf21$Unposted.Final.Grade)
gradesf22$Unposted.Final.Grade = gsub('D-','D', gradesf22$Unposted.Final.Grade)
gradesf23$Unposted.Final.Grade = gsub('D-','D', gradesf23$Unposted.Final.Grade)
#remove column to avoid confusion
gradesf18$Final.Grade = NULL
gradesf19$Final.Grade = NULL
gradesf21$Final.Grade = NULL
gradesf22$Final.Grade = NULL
gradesf23$Final.Grade = NULL
gradesf18$simpleLetter = gsub('\\+','',
                              gsub('-','',gradesf18$Unposted.Current.Grade))
gradesf19$simpleLetter = gsub('\\+','',
                              gsub('-','',gradesf19$Unposted.Current.Grade))
gradesf21$simpleLetter = gsub('\\+','',
                              gsub('-','',gradesf21$Unposted.Current.Grade))
gradesf22$simpleLetter = gsub('\\+','',
                              gsub('-','',gradesf22$Unposted.Current.Grade))
gradesf23$simpleLetter = gsub('\\+','',
                              gsub('-','',gradesf23$Unposted.Current.Grade))
allLetters = as.data.frame.matrix( rbind(
  table(gradesf18$simpleLetter)/length(gradesf18$simpleLetter)*100,
  table(gradesf19$simpleLetter)/length(gradesf19$simpleLetter)*100,
  table(gradesf21$simpleLetter)/length(gradesf21$simpleLetter)*100,
  table(gradesf22$simpleLetter)/length(gradesf22$simpleLetter)*100,
  table(gradesf23$simpleLetter)/length(gradesf23$simpleLetter)*100
  #table(gradesf21$LetterGrade)/length(gradesf21$LetterGrade)*100
  ) )
allLettersAllYears = as.data.frame.matrix( rbind(
    table(gradesf18$simpleLetter),
    table(gradesf19$simpleLetter),
    table(gradesf21$simpleLetter),
    table(gradesf22$simpleLetter),
    table(gradesf23$simpleLetter),
    table(gradesf18$simpleLetter)+
    table(gradesf19$simpleLetter)+
    table(gradesf21$simpleLetter)+
    table(gradesf22$simpleLetter)+
    table(gradesf23$simpleLetter)
  )
)

allStats = as.data.frame.matrix( rbind(
  summary(gradesf18$Final.Score),
  summary(gradesf19$Final.Score),
  summary(gradesf21$Final.Score),
  summary(gradesf22$Final.Score),
  summary(gradesf23$Final.Score)
))

#allStats = cbind(allStats,allLetters)
allStats = cbind(Nstudents = c(
  length(gradesf18$Student),
  length(gradesf19$Student),
  length(gradesf21$Student),
  length(gradesf22$Student),
  length(gradesf23$Student)
),allStats)
row.names(allStats) = c("Fall18","Fall19","Fall21","Fall22","Fall23")
row.names(allLetters) = c("Fall18","Fall19","Fall21","Fall22","Fall23")
row.names(allLettersAllYears) = c("Fall18","Fall19","Fall21","Fall22","Fall23","Total")

knitr::kable(allLetters,caption = "Final Grade Letter Percents",digits = 1)
Final Grade Letter Percents
A B C D F
Fall18 50.8 37.6 6.6 2.2 2.8
Fall19 23.6 53.3 17.6 3.8 1.6
Fall21 37.9 35.9 16.9 5.1 4.1
Fall22 29.8 49.0 17.3 0.5 3.4
Fall23 47.0 31.8 16.6 0.7 4.0
knitr::kable(allLettersAllYears,caption = "Final Grade Letter Total Numbers",digits = 1)
Final Grade Letter Total Numbers
A B C D F
Fall18 92 68 12 4 5
Fall19 43 97 32 7 3
Fall21 74 70 33 10 8
Fall22 62 102 36 1 7
Fall23 71 48 25 1 6
Total 342 385 138 23 29
# Function to create a pie chart for a given dataframe
create_pie_chart <- function(df, title) {
  ggplot(df, aes(x = "", fill = simpleLetter)) +
    geom_bar(width = 1, color = "white") +
    coord_polar("y") +
    ggtitle(title) +
    theme_void() +
    theme(legend.position = "right")
}

# Create pie charts for each dataframe
p1 <- create_pie_chart(gradesf18, "Fall 18")
p2 <- create_pie_chart(gradesf19, "Fall 19")
p3 <- create_pie_chart(gradesf21, "Fall 21")
p4 <- create_pie_chart(gradesf22, "Fall 22")
p5 <- create_pie_chart(gradesf23, "Fall 23")

# Arrange the pie charts in a 2-column grid
library(gridExtra)
grid.arrange(p1, p2, p3, p4, p5, ncol = 2) 

1.2 Numerical grade

knitr::kable(allStats,caption = "Final Grade Statistics",digits = 1)
Final Grade Statistics
Nstudents Min. 1st Qu. Median Mean 3rd Qu. Max.
Fall18 181 26.8 85.4 90.0 87.5 93.6 97.8
Fall19 182 26.2 76.8 81.2 80.2 85.7 94.6
Fall21 195 20.1 79.7 87.3 84.0 92.3 99.1
Fall22 208 37.4 80.5 86.2 84.6 91.8 97.9
Fall23 151 18.6 81.8 89.2 85.7 92.6 97.9
h18 = ggplot(gradesf18, aes(x=Final.Score))+ geom_histogram(binwidth = 5) + ggtitle("Fall 18")
h19 = ggplot(gradesf19, aes(x=Final.Score))+ geom_histogram(binwidth = 5) + ggtitle("Fall 19")
h21 = ggplot(gradesf21, aes(x=Final.Score))+ geom_histogram(binwidth = 5) + ggtitle("Fall 21")
h22 = ggplot(gradesf22, aes(x=Final.Score))+ geom_histogram(binwidth = 5) + ggtitle("Fall 22")
h23 = ggplot(gradesf23, aes(x=Final.Score))+ geom_histogram(binwidth = 5) + ggtitle("Fall 23")
grid.arrange(h18,h19,h21,h22,h23, ncol=2)

1.3 Withdrawn students

The statistics shown above are only the students who were still registered at the end of the semester. Some students withdrew from the course, we are going to use the number of students who took milestone 1 as the number of students who started to compare with the number of students who finished.

  • In 2018, 181 students took milestone 1 and 181 finished
  • In 2019, 191 students took milestone 1 and 182 finished
  • In 2021, 211 students took milestone 1 and 195 finished.
  • In 2022, 220 students took milestone 1 and 208 finished.
  • In 2023, 163 students took milestone 1 and 151 finished.

Pay attention to this high number of withdrawls. This means that the overall grade average would have been much lower if all these people who dropped stayed until the end.

f18i = length(m1f18$Last.name)
f18f = length(gradesf18$Final.Score)
f18df = sum(gradesf18$simpleLetter %in% c("D","F"))
f18dfp = paste0(round(f18df/f18i*100,2),"%")
f18dfw = paste0(round(((f18i - f18f)+f18df)/f18i*100,2),"%")

f19i = length(m1f19[which(m1f19$attempt==1),]$name)
f19f= length(gradesf19$Final.Score)
f19df = sum(gradesf19$simpleLetter %in% c("D","F"))
f19dfp = paste0(round(f19df/f19i*100,2),"%")
f19dfw = paste0(round(((f19i - f19f)+f19df)/f19i*100,2),"%")

f21i = length(m1f21[which(m1f21$attempt==1),]$name)
f21f= length(gradesf21$Final.Score)
f21df = sum(gradesf21$simpleLetter %in% c("D","F"))
f21dfp = paste0(round(f21df/f21i*100,2),"%")
f21dfw = paste0(round(((f21i - f21f)+f21df)/f21i*100,2),"%")

f22i = length(m1f22[which(m1f22$attempt==1),]$name)
f22f= length(gradesf22$Final.Score)
f22df = sum(gradesf22$simpleLetter %in% c("D","F"))
f22dfp = paste0(round(f22df/f22i*100,2),"%")
f22dfw = paste0(round(((f22i - f22f)+f22df)/f22i*100,2),"%")

f23i = length(m1f23[which(m1f23$attempt==1),]$name)
f23f= length(gradesf23$Final.Score)
f23df = sum(gradesf23$simpleLetter %in% c("D","F"))
f23dfp = paste0(round(f23df/f23i*100,2),"%")
f23dfw = paste0(round(((f23i - f23f)+f23df)/f23i*100,2),"%")


df = data.frame(
  F18 = c( f18i, f18f, f18f-f18i, paste0(round( (f18i-f18f)/f18i*100,2),"%"),f18df,f18dfp,f18dfw ),
  F19 = c( f19i, f19f, f19f-f19i, paste0(round( (f19i-f19f)/f19i*100,2),"%"),f19df,f19dfp,f19dfw ),
  F21 = c( f21i, f21f, f21f-f21i, paste0(round( (f21i-f21f)/f21i*100,2),"%"),f21df,f21dfp,f21dfw ),
  F22 = c( f22i, f22f, f22f-f22i, paste0(round( (f22i-f22f)/f22i*100,2),"%"),f22df,f22dfp,f22dfw ),
  F23 = c( f23i, f23f, f23f-f23i, paste0(round( (f23i-f23f)/f23i*100,2),"%"),f23df,f23dfp,f23dfw )
)
rownames(df)= c("Students started",
                "Student finished",
                "Difference",
                "Percent of W",
                "Students with D or F",
                "Percent D or F",
                "Total Percent of DFW")

kable(df, format = "markdown", row.names = TRUE)
F18 F19 F21 F22 F23
Students started 181 191 211 220 163
Student finished 181 182 195 208 151
Difference 0 -9 -16 -12 -12
Percent of W 0% 4.71% 7.58% 5.45% 7.36%
Students with D or F 9 10 18 8 7
Percent D or F 4.97% 5.24% 8.53% 3.64% 4.29%
Total Percent of DFW 4.97% 9.95% 16.11% 9.09% 11.66%

2 Open Ended Exams Grade Distribution

gradesf18[which(gradesf18$Exam.1..335345.==0),]$Exam.1..335345. = NA
#gradesf21[which(gradesf21$Exam..1..2161998.==0),]$Exam..1..2161998. = NA
gradesf21[which(gradesf21$Exam..1..2161998.=="EX"),]$Exam..1..2161998. = NA
gradesf21$Exam..1..2161998. = as.numeric(gradesf21$Exam..1..2161998.)/25*100

gradesf22[which(gradesf22$Open.Ended.Exam..1..2592814. == 0),]$Open.Ended.Exam..1..2592814. = NA
gradesf22$Open.Ended.Exam..1..2592814. = as.numeric(gradesf22$Open.Ended.Exam..1..2592814.)/25*100

#gradesf23[which(gradesf23$Open.Ended.Exam..1..3262287. == 0),]$Open.Ended.Exam..1..3262287. = NA
gradesf23$Open.Ended.Exam..1..3262287. = as.numeric(gradesf23$Open.Ended.Exam..1..3262287.)/25*100

allStats = as.data.frame.matrix( rbind(
  summary(gradesf18$Exam.1..335345.),
  summary(gradesf19$Open.Ended.Written.Exam.1..816325.),
  summary(gradesf21$Exam..1..2161998.),
  summary(gradesf22$Open.Ended.Exam..1..2592814.),
  summary(gradesf23$Open.Ended.Exam..1..3262287.)
))
row.names(allStats) = c("Fall18","Fall19","Fall21","Fall22","Fall23")
allStats$`NA's` = NULL
knitr::kable(allStats,caption = "Exam 1 Statistics",digits = 1)
Exam 1 Statistics
Min. 1st Qu. Median Mean 3rd Qu. Max.
Fall18 16.0 52.0 66.0 64.2 76.0 96.0
Fall19 13.1 44.3 58.3 57.4 72.3 95.2
Fall21 0.0 68.0 81.2 77.9 92.0 100.0
Fall22 10.0 41.0 58.0 57.0 74.0 100.0
Fall23 8.0 48.0 65.0 64.9 86.0 98.0
#EXAM 2
gradesf18[which(gradesf18$Exam.2..358394.==0),]$Exam.2..358394. = NA
gradesf19[which(gradesf19$Open.Ended.Exam.2..836125.==0),]$Open.Ended.Exam.2..836125. = NA
gradesf21[which(gradesf21$Exam..2..2171776.==0),]$Exam..2..2171776. = NA
gradesf21$Exam..2..2171776. = as.numeric(gradesf21$Exam..2..2171776.)/25*100

gradesf22[which(gradesf22$Open.Ended.Exam..2..2592815. == 0),]$Open.Ended.Exam..2..2592815. = NA
#out of 26 of all numbers :)
gradesf22$Open.Ended.Exam..2..2592815. = as.numeric(gradesf22$Open.Ended.Exam..2..2592815.)/26*100

gradesf23[which(gradesf23$Open.Ended.Exam..2..3262288. == 0),]$Open.Ended.Exam..2..3262288. = NA
gradesf23$Open.Ended.Exam..2..3262288. = as.numeric(gradesf23$Open.Ended.Exam..2..3262288.)/25*100

allStats = as.data.frame.matrix( rbind(
  summary(gradesf18$Exam.2..358394.),
  summary(gradesf19$Open.Ended.Exam.2..836125.),
  summary(gradesf21$Exam..2..2171776.),
  summary(gradesf22$Open.Ended.Exam..2..2592815.),
  summary(gradesf23$Open.Ended.Exam..2..3262288.)
))
row.names(allStats) = c("Fall18","Fall19","Fall21","Fall22","Fall23")
#allStats$`NA's` = NULL
knitr::kable(allStats,caption = "Exam 2 Statistics",digits = 1)
Exam 2 Statistics
Min. 1st Qu. Median Mean 3rd Qu. Max. NA’s
Fall18 19.0 59.5 75.0 70.5 84.0 98.0 5
Fall19 19.0 49.0 67.0 63.8 78.0 100.0 1
Fall21 9.0 51.4 65.4 63.6 78.3 98.5 2
Fall22 6.7 42.0 58.0 58.1 78.8 100.0 2
Fall23 6.0 43.2 60.0 58.6 76.0 94.0 1
#EXAM3
gradesf18[which(gradesf18$Exam.3..395005. == 0),]$Exam.3..395005. = NA
gradesf19[which(gradesf19$Open.Ended.Exam.3..875116. == 0),]$Open.Ended.Exam.3..875116. = NA
gradesf21[which(gradesf21$Exam..3..2184781. == 0),]$Exam..3..2184781. = NA
gradesf21$Exam..3..2184781. = gradesf21$Exam..3..2184781./25*100
gradesf22[which(gradesf22$Open.Ended.Exam..3..2592816. == 0),]$Open.Ended.Exam..3..2592816. = NA
gradesf22$Open.Ended.Exam..3..2592816. = gradesf22$Open.Ended.Exam..3..2592816./25*100

gradesf23[which(gradesf23$Open.Ended.Exam..3..3262289. == 0),]$Open.Ended.Exam..3..3262289. = NA
gradesf23$Open.Ended.Exam..3..3262289. = gradesf23$Open.Ended.Exam..3..3262289./25*100

allStats = as.data.frame.matrix( rbind(
  summary(gradesf18$Final.exam..408510.),
  summary(gradesf19$Open.Ended.Exam.3..875116.),
  summary(gradesf21$Exam..3..2184781.),
  summary(gradesf22$Open.Ended.Exam..3..2592816.),
  summary(gradesf23$Open.Ended.Exam..3..3262289.)
))
row.names(allStats) = c("Fall18","Fall19","Fall21","Fall22","Fall23")
knitr::kable(allStats,caption = "Exam 3 Statistics",digits = 1)
Exam 3 Statistics
Min. 1st Qu. Median Mean 3rd Qu. Max. NA’s
Fall18 48.9 72.4 81.4 79.9 88.3 99 5
Fall19 2.5 42.5 55.0 56.4 72.5 95 5
Fall21 9.3 38.0 52.8 51.2 64.7 99 11
Fall22 21.0 50.0 63.0 62.1 76.3 98 5
Fall23 12.0 48.0 64.0 62.5 81.2 98 7
#FINAL EXAM

gradesf19[which(gradesf19$Final.exam.Unposted.Current.Score == 0),]$Final.exam.Unposted.Current.Score = NA
gradesf21[which(gradesf21$Final.Written.Exam.Current.Score == 0),]$Final.Written.Exam.Current.Score = NA
#gradesf22[which(gradesf22$Final.Written.Exam.Current.Score == 0),]$Final.Written.Exam.Current.Score = NA

allStats = as.data.frame.matrix( rbind(
  summary(gradesf18$Final.exam..408510.),
  summary(gradesf19$Final.exam.Unposted.Current.Score),
  summary(gradesf21$Final.Written.Exam.Current.Score),
  summary(gradesf22$Final.Written.Exam.Current.Score),
  summary(gradesf23$Final.Written.Exam.Current.Score)
))
row.names(allStats) = c("Fall18","Fall19","Fall21","Fall22","Fall23")
knitr::kable(allStats,caption = "Final Exam Statistics",digits = 1)
Final Exam Statistics
Min. 1st Qu. Median Mean 3rd Qu. Max. NA’s
Fall18 48.9 72.4 81.4 79.9 88.3 99.0 5
Fall19 33.7 55.3 64.8 65.3 75.7 97.3 5
Fall21 14.7 49.5 63.7 63.5 76.0 100.0 5
Fall22 40.3 60.4 67.2 68.7 77.9 93.9 1
Fall23 0.0 57.6 66.9 65.2 75.8 93.6 0

3 Milestone Grades Distribution

3.1 Milestone 1

3.1.1 First attempt

#filter for at1
m1f19_att1 = m1f19[which(m1f19$attempt == "1"),]
m1f21_att1 = m1f21[which(m1f21$attempt == "1"),]
m1f22_att1 = m1f22[which(m1f22$attempt == "1"),]
m1f23_att1 = m1f23[which(m1f23$attempt == "1"),]
library(kableExtra)
makeTable = function(m1f19_att1, m1f21_att1, m1f22_att1, m1f23_att1  ){
  n19 = length(m1f19_att1$score) 
  nA19 = length(m1f19_att1[which(m1f19_att1$score > 79.99),]$score)
  nC19 = length(m1f19_att1[which(m1f19_att1$score > 69.99 & m1f19_att1$score < 79.99),]$score)
  nF19 = length(m1f19_att1[which(m1f19_att1$score < 70.00),]$score)
   
  n21 = length(m1f21_att1$score) 
  nA21 = length(m1f21_att1[which(m1f21_att1$score > 79.99),]$score)
  nC21 = length(m1f21_att1[which(m1f21_att1$score > 69.99 & m1f21_att1$score < 79.99),]$score)
  nF21 = length(m1f21_att1[which(m1f21_att1$score < 70.00),]$score)
   
  n22 = length(m1f22_att1$score) 
  nA22 = length(m1f22_att1[which(m1f22_att1$score > 79.99),]$score)
  nC22 = length(m1f22_att1[which(m1f22_att1$score > 69.99 & m1f22_att1$score < 79.99),]$score)
  nF22 = length(m1f22_att1[which(m1f22_att1$score < 70.00),]$score)
   
  n23 = length(m1f23_att1$score) 
  nA23 = length(m1f23_att1[which(m1f23_att1$score > 79.99),]$score)
  nC23 = length(m1f23_att1[which(m1f23_att1$score > 69.99 & m1f23_att1$score < 79.99),]$score)
  nF23 = length(m1f23_att1[which(m1f23_att1$score < 70.00),]$score)
   
  combined_summary = bind_rows(describe(m1f19_att1$score),describe(m1f21_att1$score),describe(m1f22_att1$score),describe(m1f23_att1$score))
  combined_summary <- subset(combined_summary, select = c("n", "mean", "sd", "median", "min","max"))
  colnames(combined_summary)[colnames(combined_summary) == "n"] <- "students"
  rownames(combined_summary) = c("F19","F21","F22","F23")
  colnames(combined_summary)[0] <- "Semester"
  combined_summary$`# / % A scores` = c(
    paste0( nA19, " / ", round(nA19/n19*100, digits = 1),"%"),
    paste0( nA21, " / ", round(nA21/n21*100, digits = 1),"%"),
    paste0( nA22, " / ", round(nA22/n22*100, digits = 1),"%"),
    paste0( nA23, " / ", round(nA23/n23*100, digits = 1),"%")
  )
  combined_summary$`# / % C scores` = c(
    paste0( nC19, " / ", round(nC19/n19*100, digits = 1),"%"),
    paste0( nC21, " / ", round(nC21/n21*100, digits = 1),"%"),
    paste0( nC22, " / ", round(nC22/n22*100, digits = 1),"%"),
    paste0( nC23, " / ", round(nC23/n23*100, digits = 1),"%")
  )
  combined_summary$`# / % F scores` = c(
    paste0( nF19, " / ", round(nF19/n19*100, digits = 1),"%"),
    paste0( nF21, " / ", round(nF21/n21*100, digits = 1),"%"),
    paste0( nF22, " / ", round(nF22/n22*100, digits = 1),"%"),
    paste0( nF23, " / ", round(nF23/n23*100, digits = 1),"%")
  )
   
  table <- kable(combined_summary, format = "markdown")
  
  plot=plotBar(
    c(nA19,nC19,nF19),
    c(nA21,nC21,nF21),
    c(nA22,nC22,nF22),
    c(nA23,nC23,nF23)
  )
  chi=plotChi(
    c(nA19,nC19,nF19),
    c(nA21,nC21,nF21),
    c(nA22,nC22,nF22),
    c(nA23,nC23,nF23)
  )
   #mf19=c(nA19,nC19,nF19)
   #mf21=c(nA21,nC21,nF21)
   #mf22=c(nA22,nC22,nF22)
   #mf23=c(nA23,nC23,nF23)
  return(list(table = table, plot = plot, chi=chi))

}
plotBar= function(mf19,mf21,mf22,mf23){
  # Combine lists into a data frame
  data <- data.frame(
   Category = rep(c("A", "C", "F"), 4),
   Count = c(mf19, mf21, mf22, mf23),
   Group = rep(c("f19", "f21", "f22", "f23"), each = 3)
  )
  
  data <- data %>%
    group_by(Group) %>%
    mutate(Percentage = round(Count / sum(Count) * 100))
  
  myplot=ggplot(data, aes(x = factor(Group), y = Count, fill = Category)) +
    geom_bar(stat = "identity") +
    geom_text(aes(label = paste0(Percentage, "%")), position = position_stack(vjust = 0.5), size = 3) +
    labs(x = "Semester", y = "Students", title = "Milestone ACF grade across semesters") +
    theme_minimal()
  return(myplot)

}
library(corrplot)
plotChi= function(mf19,mf21,mf22,mf23){
  # Combine lists into a matrix
  data_matrix <- matrix(c(mf19, mf21, mf22, mf23), nrow = 4, byrow = TRUE)
  # Label the rows and columns
  rownames(data_matrix) <- c("f19", "f21", "f22", "f23")
  colnames(data_matrix) <- c("A", "C", "F")
  # Perform chi-square test
  chi_square_result <- chisq.test(data_matrix)
  # Extract residuals
  residuals <- chi_square_result$residuals
  # Normalize the residuals to make them suitable for plotting
  normalized_residuals <- residuals / sqrt(chi_square_result$expected)
  cat(paste("<p><b>The Chi-square analysis gives a p=",round(chi_square_result$p.value,5),"</b></p>")) 
  # Plot the residuals using corrplot
  corrplot(normalized_residuals, is.corr = FALSE, method = "circle",  title = "Chi-square Residuals", tl.col = "black", tl.srt = 45) 
  myplot = recordPlot()
  return(myplot)
}
# Apply some additional formatting if needed
#table <- table %>%
#  kable_styling(full_width = FALSE) %>%
#  add_header_above(c("Statistic" = 1, "Value" = 1)) %>%
#  row_spec(0, bold = TRUE)

result =  makeTable(m1f19_att1, m1f21_att1, m1f22_att1, m1f23_att1  )
## <p><b>The Chi-square analysis gives a p= 0.01967 </b></p>

library(lme4)
library(lmerTest)
runAnovaAmongMilestones = function(vec1,vec2,vec3,vec4,listLabels,myTitle){
  # Create a data frame with a grouping factor
  data <- data.frame(
    group = rep(listLabels, times = c(length(vec1), length(vec2), length(vec3), length(vec4))),
    value = c(vec1, vec2, vec3, vec4)
  )

  model <- lmer(value ~ group + (1 | group), data = data)
  anova_result <- anova(model)
  p_value <- anova_result$`Pr(>F)`[1]
   
  base_plot_with_means = ggplot(data, aes(x = group, y = value)) +
    stat_summary(fun = mean, geom = "point", size = 4) +
    stat_summary(fun.data = mean_cl_boot, geom = "errorbar", width = 0.2) +
    theme_minimal() +
    labs(title = myTitle, x = "Group", y = "Score")
  # Annotate the plot with the ANOVA p-value
  plot_with_anova_and_means <- base_plot_with_means +
   annotate("text", x = 1, y = max(data$value), label = paste("ANOVA p-value:", signif(p_value, digits = 3)), hjust = 0, vjust = 1)

  # Print the plot
  print(plot_with_anova_and_means)
}
runAnovaAmongMilestones(m1f19_att1$score, m1f21_att1$score, m1f22_att1$score, m1f23_att1$score,c("F19","F21","F22","F23"),"Milestone 1 - 1st attempt")

result$table
students mean sd median min max # / % A scores # / % C scores # / % F scores
F19 191 86.59701 11.47493 89.28000 35.92683 100 150 / 78.5% 23 / 12% 18 / 9.4%
F21 211 84.70227 13.83789 87.80250 11.11667 100 152 / 72% 33 / 15.6% 26 / 12.3%
F22 220 82.94547 13.94072 85.58783 40.71200 100 144 / 65.5% 35 / 15.9% 41 / 18.6%
F23 163 82.12047 16.51057 87.83817 23.49350 100 107 / 65.6% 22 / 13.5% 34 / 20.9%
  • ACF scale
print(result$plot)

print(result$chi)

3.1.2 Second attempt

m1f19_att1 = m1f19[which(m1f19$attempt == "1" | m1f19$attempt == "2" ),]
m1f21_att1 = m1f21[which(m1f21$attempt == "1" | m1f21$attempt == "2" ),]
m1f22_att1 = m1f22[which(m1f22$attempt == "1" | m1f22$attempt == "2" ),]
m1f23_att1 = m1f23[which(m1f23$attempt == "1" | m1f23$attempt == "2" ),]

chooseHighest = function(df){
  df <- df %>%
  group_by(id) %>%
  filter(score == max(score))
  return(df)
}

m1f19_att1 = chooseHighest(m1f19_att1)
m1f21_att1 = chooseHighest(m1f21_att1)
m1f22_att1 = chooseHighest(m1f22_att1)
m1f23_att1 = chooseHighest(m1f23_att1)

result =  makeTable(m1f19_att1, m1f21_att1, m1f22_att1, m1f23_att1  )
## <p><b>The Chi-square analysis gives a p= 0.21362 </b></p>

result$table
students mean sd median min max # / % A scores # / % C scores # / % F scores
F19 191 88.45648 8.922470 90.00200 58.64667 100 168 / 88% 15 / 7.9% 8 / 4.2%
F21 211 87.57826 11.124503 89.43867 27.53433 100 177 / 83.9% 23 / 10.9% 11 / 5.2%
F22 220 86.62394 9.930089 88.33600 54.61150 100 173 / 78.6% 33 / 15% 14 / 6.4%
F23 163 86.75313 10.975111 89.29000 23.49350 100 128 / 78.5% 24 / 14.7% 11 / 6.7%
  • ACF scale
print(result$plot)

print(result$chi)

3.1.3 Third Attempt

m1f19_att1 = m1f19[which(m1f19$attempt == "1" | m1f19$attempt == "2" | m1f19$attempt == "3" ),]
m1f21_att1 = m1f21[which(m1f21$attempt == "1" | m1f21$attempt == "2" | m1f21$attempt == "3" ),]
m1f22_att1 = m1f22[which(m1f22$attempt == "1" | m1f22$attempt == "2" | m1f22$attempt == "3" ),]
m1f23_att1 = m1f23[which(m1f23$attempt == "1" | m1f23$attempt == "2" | m1f23$attempt == "3" ),]

m1f19_att1 = chooseHighest(m1f19_att1)
m1f21_att1 = chooseHighest(m1f21_att1)
m1f22_att1 = chooseHighest(m1f22_att1)
m1f23_att1 = chooseHighest(m1f23_att1)

result =  makeTable(m1f19_att1, m1f21_att1, m1f22_att1, m1f23_att1  )
## <p><b>The Chi-square analysis gives a p= 0.25726 </b></p>

runAnovaAmongMilestones(m1f19_att1$score, m1f21_att1$score, m1f22_att1$score, m1f23_att1$score,c("F19","F21","F22","F23"),"Milestone 1 - all attempts")

result$table
students mean sd median min max # / % A scores # / % C scores # / % F scores
F19 191 89.05406 8.190662 90.28167 58.64667 100 175 / 91.6% 11 / 5.8% 5 / 2.6%
F21 211 88.15861 10.280442 89.43867 27.53433 100 180 / 85.3% 26 / 12.3% 5 / 2.4%
F22 220 88.12103 8.473910 88.89125 55.81883 100 194 / 88.2% 19 / 8.6% 7 / 3.2%
F23 163 88.18607 10.009504 89.60533 23.49350 100 142 / 87.1% 13 / 8% 8 / 4.9%
  • ACF scale
print(result$plot)

print(result$chi)

3.2 Milestone 2

3.2.1 First attempt

m2f19_att1 = m2f19[which(m2f19$attempt == "1" ),]
m2f21_att1 = m2f21[which(m2f21$attempt == "1" ),]
m2f22_att1 = m2f22[which(m2f22$attempt == "1" ),]
m2f23_att1 = m2f23[which(m2f23$attempt == "1" ),]

m2f19_att1 = chooseHighest(m2f19_att1)
m2f21_att1 = chooseHighest(m2f21_att1)
m2f22_att1 = chooseHighest(m2f22_att1)
m2f23_att1 = chooseHighest(m2f23_att1)

result =  makeTable(m2f19_att1, m2f21_att1, m2f22_att1, m2f23_att1  )
## <p><b>The Chi-square analysis gives a p= 0 </b></p>

runAnovaAmongMilestones(m2f19_att1$score, m2f21_att1$score, m2f22_att1$score, m2f23_att1$score,c("F19","F21","F22","F23"),"Milestone 2 - 1st attempt")

result$table
students mean sd median min max # / % A scores # / % C scores # / % F scores
F19 190 87.24709 10.81401 90.35901 39.07243 100 152 / 80% 22 / 11.6% 16 / 8.4%
F21 205 77.56799 14.75559 79.77267 37.10763 100 102 / 49.8% 51 / 24.9% 52 / 25.4%
F22 216 81.86074 12.90459 85.01722 38.42122 100 136 / 63% 40 / 18.5% 40 / 18.5%
F23 158 82.34789 14.76793 85.93283 29.16567 100 110 / 69.6% 17 / 10.8% 31 / 19.6%
  • ACF scale
print(result$plot)

print(result$chi)

3.2.2 Second attempt

m2f19_att1 = m2f19[which(m2f19$attempt == "1" | m2f19$attempt == "2" ),]
m2f21_att1 = m2f21[which(m2f21$attempt == "1" | m2f21$attempt == "2" ),]
m2f22_att1 = m2f22[which(m2f22$attempt == "1" | m2f22$attempt == "2" ),]
m2f23_att1 = m2f23[which(m2f23$attempt == "1" | m2f23$attempt == "2" ),]

m2f19_att1 = chooseHighest(m2f19_att1)
m2f21_att1 = chooseHighest(m2f21_att1)
m2f22_att1 = chooseHighest(m2f22_att1)
m2f23_att1 = chooseHighest(m2f23_att1)

result =  makeTable(m2f19_att1, m2f21_att1, m2f22_att1, m2f23_att1  )
## <p><b>The Chi-square analysis gives a p= 0 </b></p>

result$table
students mean sd median min max # / % A scores # / % C scores # / % F scores
F19 190 88.71861 9.371188 90.43986 39.07243 100 171 / 90% 10 / 5.3% 9 / 4.7%
F21 205 81.02490 12.313678 83.10267 37.10763 100 128 / 62.4% 43 / 21% 34 / 16.6%
F22 216 85.05377 10.152792 86.92933 46.45111 100 165 / 76.4% 31 / 14.4% 20 / 9.3%
F23 158 85.05217 11.825039 87.05144 30.41511 100 121 / 76.6% 20 / 12.7% 17 / 10.8%
  • ACF scale
print(result$plot)

print(result$chi)

3.2.3 Third Attempt

m2f19_att1 = m2f19[which(m2f19$attempt == "1" | m2f19$attempt == "2" | m2f19$attempt == "3" ),]
m2f21_att1 = m2f21[which(m2f21$attempt == "1" | m2f21$attempt == "2" | m2f21$attempt == "3" ),]
m2f22_att1 = m2f22[which(m2f22$attempt == "1" | m2f22$attempt == "2" | m2f22$attempt == "3" ),]
m2f23_att1 = m2f23[which(m2f23$attempt == "1" | m2f23$attempt == "2" | m2f23$attempt == "3" ),]

m2f19_att1 = chooseHighest(m2f19_att1)
m2f21_att1 = chooseHighest(m2f21_att1)
m2f22_att1 = chooseHighest(m2f22_att1)
m2f23_att1 = chooseHighest(m2f23_att1)

result =  makeTable(m2f19_att1, m2f21_att1, m2f22_att1, m2f23_att1  )
## <p><b>The Chi-square analysis gives a p= 0 </b></p>

runAnovaAmongMilestones(m2f19_att1$score, m2f21_att1$score, m2f22_att1$score, m2f23_att1$score,c("F19","F21","F22","F23"),"Milestone 2 - all attempts")

result$table
students mean sd median min max # / % A scores # / % C scores # / % F scores
F19 190 89.20468 8.464771 90.43986 39.07243 100 175 / 92.1% 10 / 5.3% 5 / 2.6%
F21 205 82.64865 10.762921 84.18644 37.10763 100 140 / 68.3% 41 / 20% 24 / 11.7%
F22 216 86.50969 8.566437 87.95989 53.28894 100 183 / 84.7% 21 / 9.7% 12 / 5.6%
F23 158 86.04928 10.614212 87.43781 30.41511 100 128 / 81% 19 / 12% 11 / 7%
  • ACF scale
print(result$plot)

print(result$chi)

3.3 Milestone 3

3.3.1 First attempt

m3f19_att1 = m3f19[which(m3f19$attempt == "1" ),]
m3f21_att1 = m3f21[which(m3f21$attempt == "1" ),]
m3f22_att1 = m3f22[which(m3f22$attempt == "1" ),]
m3f23_att1 = m3f23[which(m3f23$attempt == "1" ),]

m3f19_att1 = chooseHighest(m3f19_att1)
m3f21_att1 = chooseHighest(m3f21_att1)
m3f22_att1 = chooseHighest(m3f22_att1)
m3f23_att1 = chooseHighest(m3f23_att1)

result =  makeTable(m3f19_att1, m3f21_att1, m3f22_att1, m3f23_att1  )
## <p><b>The Chi-square analysis gives a p= 3e-05 </b></p>

runAnovaAmongMilestones(m3f19_att1$score, m3f21_att1$score, m3f22_att1$score, m3f23_att1$score,c("F19","F21","F22","F23"),"Milestone 3 - 1st attempts")

result$table
students mean sd median min max # / % A scores # / % C scores # / % F scores
F19 183 87.08412 9.75140 88.51711 49.98278 100 151 / 82.5% 21 / 11.5% 11 / 6%
F21 195 81.63402 15.31543 85.99000 24.37333 100 122 / 62.6% 30 / 15.4% 43 / 22.1%
F22 210 79.51752 16.25780 84.11167 9.18000 100 128 / 61% 33 / 15.7% 49 / 23.3%
F23 151 82.07987 13.73418 86.29667 43.91667 100 104 / 68.9% 17 / 11.3% 30 / 19.9%
  • ACF scale
print(result$plot)

print(result$chi)

3.3.2 Second attempt

m3f19_att1 = m3f19[which(m3f19$attempt == "1" | m3f19$attempt == "2" ),]
m3f21_att1 = m3f21[which(m3f21$attempt == "1" | m3f21$attempt == "2" ),]
m3f22_att1 = m3f22[which(m3f22$attempt == "1" | m3f22$attempt == "2" ),]
m3f23_att1 = m3f23[which(m3f23$attempt == "1" | m3f23$attempt == "2" ),]

m3f19_att1 = chooseHighest(m3f19_att1)
m3f21_att1 = chooseHighest(m3f21_att1)
m3f22_att1 = chooseHighest(m3f22_att1)
m3f23_att1 = chooseHighest(m3f23_att1)

result =  makeTable(m3f19_att1, m3f21_att1, m3f22_att1, m3f23_att1  )
## <p><b>The Chi-square analysis gives a p= 1e-05 </b></p>

result$table
students mean sd median min max # / % A scores # / % C scores # / % F scores
F19 183 88.55890 7.485728 88.65200 53.23144 100 166 / 90.7% 14 / 7.7% 3 / 1.6%
F21 195 84.64072 11.961618 87.65000 45.64333 100 141 / 72.3% 28 / 14.4% 26 / 13.3%
F22 210 83.22498 12.068709 85.37167 37.95333 100 147 / 70% 34 / 16.2% 29 / 13.8%
F23 151 85.22079 10.182821 86.61667 44.83333 100 122 / 80.8% 18 / 11.9% 11 / 7.3%
  • ACF scale
print(result$plot)

print(result$chi)

3.3.3 Third Attempt

m3f19_att1 = m3f19[which(m3f19$attempt == "1" | m3f19$attempt == "2" | m3f19$attempt == "3" ),]
m3f21_att1 = m3f21[which(m3f21$attempt == "1" | m3f21$attempt == "2" | m3f21$attempt == "3" ),]
m3f22_att1 = m3f22[which(m3f22$attempt == "1" | m3f22$attempt == "2" | m3f22$attempt == "3" ),]
m3f23_att1 = m3f23[which(m3f23$attempt == "1" | m3f23$attempt == "2" | m3f23$attempt == "3" ),]

m3f19_att1 = chooseHighest(m3f19_att1)
m3f21_att1 = chooseHighest(m3f21_att1)
m3f22_att1 = chooseHighest(m3f22_att1)
m3f23_att1 = chooseHighest(m3f23_att1)

result =  makeTable(m3f19_att1, m3f21_att1, m3f22_att1, m3f23_att1  )
## <p><b>The Chi-square analysis gives a p= 0.0018 </b></p>

runAnovaAmongMilestones(m3f19_att1$score, m3f21_att1$score, m3f22_att1$score, m3f23_att1$score,c("F19","F21","F22","F23"),"Milestone 3 - all attempts")

result$table
students mean sd median min max # / % A scores # / % C scores # / % F scores
F19 183 88.84171 7.287616 88.88267 53.23144 100 171 / 93.4% 9 / 4.9% 3 / 1.6%
F21 195 86.26802 10.446492 87.77333 45.64333 100 156 / 80% 27 / 13.8% 12 / 6.2%
F22 210 84.88897 10.332357 86.15333 37.95333 100 162 / 77.1% 33 / 15.7% 15 / 7.1%
F23 151 85.81885 9.479026 87.04190 44.83333 100 127 / 84.1% 16 / 10.6% 8 / 5.3%
  • ACF scale
print(result$plot)

print(result$chi)

3.4 Milestone 4

3.4.1 First attempt

m4f19_att1 = m4f19[which(m4f19$attempt == "1" ),]
m4f21_att1 = m4f21[which(m4f21$attempt == "1" ),]
m4f22_att1 = m4f22[which(m4f22$attempt == "1" ),]
m4f23_att1 = m4f23[which(m4f23$attempt == "1" ),]

m4f19_att1 = chooseHighest(m4f19_att1)
m4f21_att1 = chooseHighest(m4f21_att1)
m4f22_att1 = chooseHighest(m4f22_att1)
m4f23_att1 = chooseHighest(m4f23_att1)

result =  makeTable(m4f19_att1, 
                   m4f21_att1, 
                   m4f22_att1, 
                   m4f23_att1  )
## <p><b>The Chi-square analysis gives a p= 0.46484 </b></p>

runAnovaAmongMilestones(m4f19_att1$score, m4f21_att1$score, m4f22_att1$score, m4f23_att1$score,c("F19","F21","F22","F23"),"Milestone 4 - 1st attempts")

result$table
students mean sd median min max # / % A scores # / % C scores # / % F scores
F19 177 79.27583 14.73094 81.64083 38.43000 100 96 / 54.2% 37 / 20.9% 44 / 24.9%
F21 187 82.10934 13.27249 83.88033 47.22000 100 116 / 62% 39 / 20.9% 33 / 17.6%
F22 206 79.04682 14.39811 81.80700 26.94200 100 114 / 55.3% 41 / 19.9% 53 / 25.7%
F23 148 80.92414 14.35564 84.57617 44.43833 100 91 / 61.5% 28 / 18.9% 29 / 19.6%
print(result$plot)

print(result$chi)

3.4.2 Second attempt

m4f19_att1 = m4f19[which(m4f19$attempt == "1" | m4f19$attempt == "2" ),]
m4f21_att1 = m4f21[which(m4f21$attempt == "1" | m4f21$attempt == "2" ),]
m4f22_att1 = m4f22[which(m4f22$attempt == "1" | m4f22$attempt == "2" ),]
m4f23_att1 = m4f23[which(m4f23$attempt == "1" | m4f23$attempt == "2" ),]

m4f19_att1 = chooseHighest(m4f19_att1)
m4f21_att1 = chooseHighest(m4f21_att1)
m4f22_att1 = chooseHighest(m4f22_att1)
m4f23_att1 = chooseHighest(m4f23_att1)

result =  makeTable(m4f19_att1, 
                   m4f21_att1, 
                   m4f22_att1, 
                   m4f23_att1  )
## <p><b>The Chi-square analysis gives a p= 0.63746 </b></p>

result$table
students mean sd median min max # / % A scores # / % C scores # / % F scores
F19 177 83.85381 11.883539 86.58267 45.94233 100 125 / 70.6% 30 / 16.9% 22 / 12.4%
F21 187 84.93125 11.022564 86.11333 51.10067 100 139 / 74.3% 29 / 15.5% 19 / 10.2%
F22 206 83.85520 10.368770 84.99433 46.66800 100 146 / 70.9% 43 / 20.9% 18 / 8.7%
F23 148 85.15438 9.762059 86.24550 46.65800 100 112 / 75.7% 24 / 16.2% 12 / 8.1%
  • ACF scale
print(result$plot)

print(result$chi)

3.4.3 Third Attempt

m4f19_att1 = m4f19[which(m4f19$attempt == "1" | m4f19$attempt == "2" | m4f19$attempt == "3" ),]
m4f21_att1 = m4f21[which(m4f21$attempt == "1" | m4f21$attempt == "2" | m4f21$attempt == "3" ),]
m4f22_att1 = m4f22[which(m4f22$attempt == "1" | m4f22$attempt == "2" | m4f22$attempt == "3" ),]
m4f23_att1 = m4f23[which(m4f23$attempt == "1" | m4f23$attempt == "2" | m4f23$attempt == "3" ),]

m4f19_att1 = chooseHighest(m4f19_att1)
m4f21_att1 = chooseHighest(m4f21_att1)
m4f22_att1 = chooseHighest(m4f22_att1)
m4f23_att1 = chooseHighest(m4f23_att1)

result =  makeTable(m4f19_att1, 
                   m4f21_att1, 
                   m4f22_att1, 
                   m4f23_att1  )
## <p><b>The Chi-square analysis gives a p= 0.60029 </b></p>

runAnovaAmongMilestones(m4f19_att1$score, m4f21_att1$score, m4f22_att1$score, m4f23_att1$score,c("F19","F21","F22","F23"),"Milestone 4 - all attempts")

result$table
students mean sd median min max # / % A scores # / % C scores # / % F scores
F19 177 85.56451 10.407627 87.21733 45.94233 100 141 / 79.7% 22 / 12.4% 14 / 7.9%
F21 187 86.17210 9.920564 86.94967 51.10067 100 149 / 79.7% 26 / 13.9% 12 / 6.4%
F22 206 84.98724 9.406832 86.10917 46.66800 100 159 / 77.2% 37 / 18% 11 / 5.3%
F23 148 85.99251 8.627701 86.24800 46.65800 100 118 / 79.7% 24 / 16.2% 6 / 4.1%
  • ACF scale
print(result$plot)

print(result$chi)

3.4.4 Conclusions - Take home

grid.arrange(h18,h19,h21,h22,h23, ncol=2)

  • The first thing is that it seems that the chi square highlights better the differences between the groups. An anova between the scores is not as useful because the p will never be significant.

  • M1-att1: looking at chi square on ACF, F19 did much better than F23 with a p=0.01

  • This signficance is not present when doing ANOVA on absolute scores

  • This trend persists when looking at highest (or third) attempt, but then you can start seeing how F21 stopped trying

  • M2-att1: The f21 debacle (overconfidence perhaps?) emerges. F19 is still doing very well.

  • Even after all attempts we can see how f21 does not catch up, so it is in milestone 2 where this year really suffers. You see the highest numbers in f21 for students who stopped trying which justifies the highest drop numbers.

  • M3-att1: F19 really excelled at this one. This is a clear case of pre and post-covid

  • F22 seemed to have deflated a little bit

  • M4: after several people dropped f21 is doing better than the rest. Even better than f19.

  • F19 has deflated, perhaps due to people not dropping. The molarity questions may have something to do with it.

  • Being F23 when we simplified the isomerism question, there doesnt seem to be an effect and f22 and f23 performed similarly.

It’s as though there too many variables playing out to really see a trend on the cohort, on the milestone or on the attempt.

3.5 Comparing milestones

Each milestone has its majority of questions on a specific topic of the course, being Milestone 1 mostly about atomic and electronic structure and theory of light, Milestone 2 about Chemical bond and molecular structure (including NMR), Milestone 3 functional groups, IR and intermolecular forces, and Milestone 4 about thermochemistry of phase change, solutions, and molarity problems.

Results merging all semesters

library(dplyr)
library(tidyr)
#merge all
mergeAllMilestones = function(df1,df2,df3,df4){
  all = df1[,c("name","attempt","score")]
  all$semester = "f19"
  tempo = df2[,c("name","attempt","score")]
  tempo$semester = "f21"
  all=rbind(all,tempo)
  tempo = df3[,c("name","attempt","score")]
  tempo$semester = "f22"
  all=rbind(all,tempo)
  tempo = df4[,c("name","attempt","score")]
  tempo$semester = "f23"
  all=rbind(all,tempo)
  #all$letter = ifelse(all$score > 79.99,"A",ifelse(all$score > 69.99,"C","F"))
  all = all %>%
    pivot_wider(names_from = attempt, values_from = score, names_prefix = "attempt") %>%
    mutate(highest = pmax(attempt1, attempt2, attempt3, na.rm = TRUE))

  all$attempt4 = NULL
  all$letter1 = ifelse(all$attempt1 > 79.99,"A",ifelse(all$attempt1 > 69.99,"C","F"))
  all$letter_highest = ifelse(all$highest > 79.99,"A",ifelse(all$highest > 69.99,"C","F"))
  return(all)
}
m1_all = mergeAllMilestones(m1f19,m1f21,m1f22,m1f23)
m2_all = mergeAllMilestones(m2f19,m2f21,m2f22,m2f23)
m3_all = mergeAllMilestones(m3f19,m3f21,m3f22,m3f23)
m4_all = mergeAllMilestones(m4f19,m4f21,m4f22,m4f23)

prepare_alluvial_data <- function(df) {
  df_summary <- df %>%
    group_by(letter1, letter_highest) %>%
    summarise(count = n()) %>%
    ungroup()

  df_summary <- df_summary %>%
    group_by(letter1) %>%
    mutate(percentage1 = count / sum(count) * 100) %>%
    ungroup() %>%
    group_by(letter_highest) %>%
    mutate(percentage2 = count / sum(count) * 100) %>%
    ungroup()

  return(df_summary)
}
m1_summary <- prepare_alluvial_data(m1_all)
m2_summary <- prepare_alluvial_data(m2_all)
m3_summary <- prepare_alluvial_data(m3_all)
m4_summary <- prepare_alluvial_data(m4_all)

library(ggalluvial)
plot_alluvial <- function(df_summary, title) {
  ggplot(data = df_summary,
         aes(axis1 = letter1, axis2 = letter_highest, y = count)) +
    geom_alluvium(aes(fill = letter1), width = 0.2) +
    geom_stratum(aes(fill = letter1), width = 0.2) +
    geom_stratum(aes(fill = letter_highest), width = 0.2) +
    #geom_text(stat = "stratum", aes(label = paste(after_stat(stratum), round(after_stat(prop) * 100, 1), "%")), size = 3) +
    geom_text(stat = "stratum", aes(label = paste(round(after_stat(prop) * 100, 0), "%")), size = 3) +
    scale_x_discrete(limits = c("letter1", "letter_highest"),
                     labels = c("1st attempt", "Highest attempt"),
                     expand = c(0.15, 0.05)) +
    scale_fill_manual(values = c("A" = "green", "C" = "blue", "F" = "red")) +
    labs(title = title, y = "Students", x = "") +
    theme_minimal() +
    #theme(legend.position = "right")
    theme(
      legend.position = "none",
      panel.grid.major = element_blank(),
      panel.grid.minor = element_blank()
      )
}

# Plotting the alluvial diagrams
plot1 <- plot_alluvial(m1_summary, "Milestone 1: Atom struct.")
plot2 <- plot_alluvial(m2_summary, "Milestone 2: Molecular struct.")
plot3 <- plot_alluvial(m3_summary, "Milestone 3: Functional Groups")
plot4 <- plot_alluvial(m4_summary, "Milestone 4: Phases & solutions")

# Display the plots
library(gridExtra)
p=grid.arrange(plot1, plot2, plot3, plot4, ncol = 2)

p
## TableGrob (2 x 2) "arrange": 4 grobs
##   z     cells    name           grob
## 1 1 (1-1,1-1) arrange gtable[layout]
## 2 2 (1-1,2-2) arrange gtable[layout]
## 3 3 (2-2,1-1) arrange gtable[layout]
## 4 4 (2-2,2-2) arrange gtable[layout]
#ggsave("milestones_by_topics.png", plot = p, bg = "transparent")

3.6 Students who stopped trying

#count
countGiveUp = function(df){
  # Create a table of name counts
  name_counts <- table(df$id)
  # Create a logical vector indicating names that appear three times or more
  to_remove <- df$id %in% names(name_counts[name_counts >= 3])
  # Create a logical vector indicating names with at least one score > 80
  to_remove_scores <- df$id %in% unique(df$id[df$score > 80.00])
  # Combine the two logical vectors using OR to get the final rows to remove
  final_to_remove <- to_remove | to_remove_scores
  # Filter the dataframe to keep only the valid rows
  df <- df[!final_to_remove, ]
  df = subset(df, select = c("name", "id", "attempt", "score"))
  return(df)
}
giveUp19 = countGiveUp(m1f19)
giveUp21 = countGiveUp(m1f21)
giveUp22 = countGiveUp(m1f22)
giveUp23 = countGiveUp(m1f23)

name_counts <- table(giveUp19$id)
twice19 <- length(names(name_counts[name_counts == 2]))
once19 <- length(names(name_counts[name_counts == 1]))

name_counts <- table(giveUp21$id)
twice21 <- length(names(name_counts[name_counts == 2]))
once21 <- length(names(name_counts[name_counts == 1]))

name_counts <- table(giveUp22$id)
twice22 <- length(names(name_counts[name_counts == 2]))
once22 <- length(names(name_counts[name_counts == 1]))

name_counts <- table(giveUp23$id)
twice23 <- length(names(name_counts[name_counts == 2]))
once23 <- length(names(name_counts[name_counts == 1]))
writeAttemptTable <- function(numbers, titles, table_title) {
  if(length(numbers) != length(titles)) {
    stop("Length of numbers and titles must be the same.")
  }
  
  df <- data.frame(matrix(nrow = 1, ncol = length(numbers)))
  colnames(df) <- titles
  df[1,] <- numbers
  #attr(df, "title") <- table_title
  return(df)
}
#M1
numbers <- c(
  length(unique(giveUp19$id)),
  length(unique(giveUp21$id)),
  length(unique(giveUp22$id)),
  length(unique(giveUp23$id))
)
titles <- c("F19", "F21", "F22", "F23")
table <- writeAttemptTable(numbers, titles,"Students who settled for a lower score without trying 3 attempts. Milestone 1")

#M2
giveUp19 = countGiveUp(m2f19)
giveUp21 = countGiveUp(m2f21)
giveUp22 = countGiveUp(m2f22)
giveUp23 = countGiveUp(m2f23)
numbers <- c(
  length(unique(giveUp19$id)),
  length(unique(giveUp21$id)),
  length(unique(giveUp22$id)),
  length(unique(giveUp23$id))
)
titles <- c("F19", "F21", "F22", "F23")
table2 <- writeAttemptTable(numbers, titles,"Students who settled for a lower score without trying 3 attempts. Milestone 2")
table = rbind(table, table2)

#M3
giveUp19 = countGiveUp(m3f19)
giveUp21 = countGiveUp(m3f21)
giveUp22 = countGiveUp(m3f22)
giveUp23 = countGiveUp(m3f23)
numbers <- c(
  length(unique(giveUp19$id)),
  length(unique(giveUp21$id)),
  length(unique(giveUp22$id)),
  length(unique(giveUp23$id))
)
titles <- c("F19", "F21", "F22", "F23")
table2 <- writeAttemptTable(numbers, titles,"Students who settled for a lower score without trying 3 attempts. Milestone 3")
table = rbind(table, table2)

#M4
giveUp19 = countGiveUp(m4f19)
giveUp21 = countGiveUp(m4f21)
giveUp22 = countGiveUp(m4f22)
giveUp23 = countGiveUp(m4f23)
numbers <- c(
  length(unique(giveUp19$id)),
  length(unique(giveUp21$id)),
  length(unique(giveUp22$id)),
  length(unique(giveUp23$id))
)
titles <- c("F19", "F21", "F22", "F23")
table2 <- writeAttemptTable(numbers, titles,"Students who settled for a lower score without trying 3 attempts. Milestone 4")
table = rbind(table, table2)
rownames(table) = c(
  "Milestone 1",
  "Milestone 2",
  "Milestone 3",
  "Milestone 4"
)
knitr::kable(table, caption="Students who settled for a lower score without trying 3 attempts.")
Students who settled for a lower score without trying 3 attempts.
F19 F21 F22 F23
Milestone 1 14 23 13 12
Milestone 2 9 32 16 14
Milestone 3 7 23 19 14
Milestone 4 14 25 31 18
#now percentage
tablePercent=table
len1=nrow(m1f19_att1)
len2=nrow(m1f21_att1)
len3=nrow(m1f22_att1)
len4=nrow(m1f23_att1)

tablePercent$F19 = paste0( round( (tablePercent$F19/len1)*100,1),"%")
tablePercent$F21 = paste0( round( (tablePercent$F21/len2)*100,1),"%")
tablePercent$F22 = paste0( round( (tablePercent$F22/len3)*100,1),"%")
tablePercent$F23 = paste0( round( (tablePercent$F23/len4)*100,1),"%")
avg_row <- data.frame(
  A = paste0(round(mean(as.numeric(sub("%", "", tablePercent[,1]))), 1), "%"),
  B = paste0(round(mean(as.numeric(sub("%", "", tablePercent[,2]))), 1), "%"),
  C = paste0(round(mean(as.numeric(sub("%", "", tablePercent[,3]))), 1), "%"),
  D = paste0(round(mean(as.numeric(sub("%", "", tablePercent[,4]))), 1), "%")
)
colnames(avg_row) <- colnames(tablePercent)
tablePercent <- rbind(tablePercent, avg_row)
colnames(tablePercent) <- c(paste0("F19 (n=", len1, ")"), paste0("F21 (n=", len2, ")"), paste0("F22 (n=", len3, ")"), paste0("F23 (n=", len4, ")"))
rownames(tablePercent)[nrow(tablePercent)] <- "Average"


knitr::kable(tablePercent, caption="Students who settled for a lower score without trying 3 attempts.")
Students who settled for a lower score without trying 3 attempts.
F19 (n=191) F21 (n=211) F22 (n=220) F23 (n=163)
Milestone 1 7.3% 10.9% 5.9% 7.4%
Milestone 2 4.7% 15.2% 7.3% 8.6%
Milestone 3 3.7% 10.9% 8.6% 8.6%
Milestone 4 7.3% 11.8% 14.1% 11%
Average 5.8% 12.2% 9% 8.9%

Milestone 1

  • In 2019, 14 students settled for a lower score, without trying the 3 attempts.
  • In 2021, 25 students settled for a lower score, without trying the 3 attempts.
  • In 2022, 31 students settled for a lower score, without trying the 3 attempts.
  • In 2023, 18 students settled for a lower score, without trying the 3 attempts.

3.7 Statistics of raw scores

f18m1 = suppressWarnings(as.numeric( gradesf18mood$Quiz.Milestone.1..Do.not.take.it.outside.of.class..Requires.Respondus.LockDown.Browser..Real.))
f18m2 = suppressWarnings(as.numeric( gradesf18mood$Quiz.Milestone.2...Do.not.take.it.outside.of.class...Requires.Respondus.LockDown.Browser..Real.))
f18m3 = suppressWarnings(as.numeric( gradesf18mood$Quiz.Milestone.3...Do.not.take.it.outside.of.class..Requires.Respondus.LockDown.Browser..Real.))
f18m4 = suppressWarnings(as.numeric( gradesf18mood$Quiz.Milestone.4...Do.not.take.it.outside.of.class..Requires.Respondus.LockDown.Browser..Real.))

allStats = as.data.frame.matrix( rbind(
  summary(f18m1),
  summary(gradesf19$Milestone.1.Quiz..Requires.Respondus.LockDown.Browser..798227.),
  summary(gradesf21$Milestone.1..Requires.Respondus.LockDown.Browser..1911986.),
  summary(gradesf22$Milestone.1..Requires.Respondus.LockDown.Browser..2592695.),
  summary(gradesf23$Milestone.1..Requires.Respondus.LockDown.Browser..3262166.)
))
allStats$`NA's` = NULL
row.names(allStats) = c("Fall18","Fall19","Fall21","Fall22","Fall23")
knitr::kable(allStats,caption = "Milestone 1 raw score",digits = 1)
Milestone 1 raw score
Min. 1st Qu. Median Mean 3rd Qu. Max.
Fall18 51.2 88.2 94.0 92.3 98.4 100
Fall19 71.9 84.5 90.5 89.9 95.9 100
Fall21 51.2 84.4 89.8 89.4 96.0 100
Fall22 61.2 84.0 89.2 89.1 94.5 100
Fall23 23.5 85.1 90.0 89.2 95.6 100
allStats = as.data.frame.matrix( rbind(
  summary(f18m2),
  summary(gradesf19$Milestone.2.Quiz..Requires.Respondus.LockDown.Browser..823364.),
  summary(gradesf21$Milestone.2..Requires.Respondus.LockDown.Browser..1912009.),
  summary(gradesf22$Milestone.2..Requires.Respondus.LockDown.Browser..2592715.),
  summary(gradesf23$Milestone.2..Requires.Respondus.LockDown.Browser..3262143.)
))
row.names(allStats) = c("Fall18","Fall19","Fall21","Fall22","Fall23")
allStats$`NA's` = NULL
knitr::kable(allStats,caption = "Milestone 2 raw score",digits = 1)
Milestone 2 raw score
Min. 1st Qu. Median Mean 3rd Qu. Max.
Fall18 55.6 84.4 91.6 90.0 96.0 100
Fall19 67.0 85.6 90.6 90.1 95.7 100
Fall21 37.1 79.3 84.9 84.1 90.1 100
Fall22 53.3 82.8 88.0 87.2 92.4 100
Fall23 30.4 81.7 88.2 87.2 94.2 100
allStats = as.data.frame.matrix( rbind(
  summary(f18m3),
  summary(gradesf19$Milestone.3.Quiz..Requires.Respondus.LockDown.Browser..841979.),
  summary(gradesf21$Milestone.3..Requires.Respondus.LockDown.Browser..2192128.),
  summary(gradesf22$Milestone.3..Requires.Respondus.LockDown.Browser..2592728.),
  summary(gradesf23$Milestone.3..Requires.Respondus.LockDown.Browser..3262146.)
))
row.names(allStats) = c("Fall18","Fall19","Fall21","Fall22","Fall23")
knitr::kable(allStats,caption = "Milestone 3 raw score",digits = 1)
Milestone 3 raw score
Min. 1st Qu. Median Mean 3rd Qu. Max. NA’s
Fall18 35.4 86.8 92.4 90.5 96.7 100 3
Fall19 64.4 84.2 89.5 89.2 94.4 100 2
Fall21 48.4 82.3 88.0 87.4 94.1 100 4
Fall22 38.2 81.4 86.3 85.8 91.8 100 1
Fall23 44.8 82.2 87.2 86.4 92.1 100 3
allStats = as.data.frame.matrix( rbind(
  summary(f18m4),
  summary(gradesf19$Milestone.4.Quiz..Requires.Respondus.LockDown.Browser..860755.),
  summary(gradesf21$Milestone.4..Requires.Respondus.LockDown.Browser..2216543.),
  summary(gradesf22$Milestone.4..Requires.Respondus.LockDown.Browser..2592732.),
  summary(gradesf23$Milestone.4..Requires.Respondus.LockDown.Browser..3262167.)
))
row.names(allStats) = c("Fall18","Fall19","Fall21","Fall22","Fall23")
knitr::kable(allStats,caption = "Milestone 4 raw score",digits = 1)
Milestone 4 raw score
Min. 1st Qu. Median Mean 3rd Qu. Max. NA’s
Fall18 44.0 82.6 88.9 87.2 94.6 100 5
Fall19 46.4 83.4 87.5 86.4 92.2 100 5
Fall21 51.1 81.7 87.0 86.4 94.4 100 8
Fall22 46.7 80.6 86.4 85.3 91.4 100 3
Fall23 46.7 81.6 86.2 86.0 91.7 100 3
  • Passing grades as they attempt milestones
library(ggplot2)
buildHeaders <- function(numberOfQuestions){
  colHeaders = c()
  #build headers
  for (n in seq(numberOfQuestions)){
    colHeaders = c(colHeaders,paste("q",as.character(n),sep = ""))
  }
  return(colHeaders)
}
buildDF_fromMilestone <- function(m1,numberOfQuestions){
  #Build df with just answers 
  totcol = ncol(m1)
  m1df = data.frame( matrix(ncol=numberOfQuestions+3,nrow=0) )
  
  
  colHeaders = c(buildHeaders(numberOfQuestions),"total")
   
  studNames = c()
  attNumb = c()
  #loop over students or row
  for (st in seq(1,nrow(m1)) ){
    #build attemptNumb and studNames. theyll be added as columns later
    attNumb = c(attNumb,m1[st,8])
    studNames = c(studNames,m1[st,1])
    
    #empty the score array and build it up as it finds each question
    thisSt = rep(NA,numberOfQuestions)
    #loop over columns to find nonempty scores
    for (q in seq(10,totcol-2,2)){
      score = m1[st,q]
      if ( !is.na( score ) ){
       questionNumber = colnames(m1)[q-1]
       questionNumber = unlist(strsplit(questionNumber,"_"))[1]
       questionNumber = as.numeric( gsub("^q","",questionNumber))
       thisSt[questionNumber] = score
      }
    }
    #studNames = c(studNames,m1[st,1])
    thisSt = c(thisSt, m1[st,totcol])
    m1df = rbind(m1df,thisSt)
  }
  colnames(m1df) = colHeaders
  m1df = rbind(m1df,colMeans(m1df))
  m1df$studName = c(studNames,NA)
  m1df$attNumb = c(attNumb,NA)
  return(m1df)
}
roundThisScore <- function(score){
  if (score <70){ r = 0 
  } else if (score <80){ r = 80 
  } else if (score >80){ r = 100  }
  return(r)
}
flowAndSettling <- function(m1df){
  #check students who did not attempt a 3rd and got lower than 80
  studs = unique(m1df$studName)
  studs = studs[!is.na(studs)]
  settle = data.frame( matrix(ncol=3,nrow=0) )
  flow = data.frame(matrix(ncol=3,nrow=0))
  for (stud in studs){
    thisStDF = m1df[which(m1df$studName == stud),]
    #if att number lower than 3 and score lower than 80
    maxNum = max(thisStDF$attNumb)
    maxScore = max(thisStDF$total)
    if ( maxNum < 3 & maxScore < 80 ){
      settle = rbind(settle,c(stud,maxNum,maxScore))
    }
    sc1 = roundThisScore( max(m1df[which(m1df$studName == stud & m1df$attNumb < 2),]$total) )
    sc2 = roundThisScore( max(m1df[which(m1df$studName == stud & m1df$attNumb < 3),]$total) )
    sc3 = roundThisScore( max(m1df[which(m1df$studName == stud & m1df$attNumb < 4),]$total) )
    flow = rbind(flow,c(sc1,sc2,sc3))
    
  }
  colnames(flow) = c("one","two","three")
  flow2 = data.frame(matrix(ncol=3,nrow=3))
  flow2[,1]=table(flow$one)
  flow2[,2]=table(flow$two)
  flow2[,3]=table(flow$three)
  colnames(flow2) = c("After 1st","After 2nd","After 3rd")
  
  colnames(settle) = c("name","MaxAttempt","MaxScore")
  all = list("settle" = settle,"flow" = flow2)
  return(all)
}

compileStackAndGrouped = function(m1f21,numb_m1f21,m1f19,numb_m1f19){
  m1df = buildDF_fromMilestone(m1f21,numb_m1f21)
  results = flowAndSettling(m1df)
  flow = results$flow
  
  pct1st = flow[,1]/sum(flow[,1])*100
  pct2nd = flow[,2]/sum(flow[,2])*100
  pct3rd = flow[,3]/sum(flow[,3])*100
  stackAndGrouped = data.frame(matrix(ncol = 4,nrow=0))
  stackAndGrouped = rbind(stackAndGrouped,c("F",pct1st[1],"F21","First"))
  stackAndGrouped = rbind(stackAndGrouped,c("C",pct1st[2],"F21","First"))
  stackAndGrouped = rbind(stackAndGrouped,c("A",pct1st[3],"F21","First"))
  stackAndGrouped = rbind(stackAndGrouped,c("F",pct2nd[1],"F21","Second"))
  stackAndGrouped = rbind(stackAndGrouped,c("C",pct2nd[2],"F21","Second"))
  stackAndGrouped = rbind(stackAndGrouped,c("A",pct2nd[3],"F21","Second"))
  stackAndGrouped = rbind(stackAndGrouped,c("F",pct3rd[1],"F21","Third"))
  stackAndGrouped = rbind(stackAndGrouped,c("C",pct3rd[2],"F21","Third"))
  stackAndGrouped = rbind(stackAndGrouped,c("A",pct3rd[3],"F21","Third"))
   
  m1df19 = buildDF_fromMilestone(m1f19,numb_m1f19)
  results19 = flowAndSettling(m1df19)
  flow19 = results19$flow
  pct1st = flow19[,1]/sum(flow19[,1])*100
  pct2nd = flow19[,2]/sum(flow19[,2])*100
  pct3rd = flow19[,3]/sum(flow19[,3])*100
   
  stackAndGrouped = rbind(stackAndGrouped,c("F",pct1st[1],"F19","First"))
  stackAndGrouped = rbind(stackAndGrouped,c("C",pct1st[2],"F19","First"))
  stackAndGrouped = rbind(stackAndGrouped,c("A",pct1st[3],"F19","First"))
  stackAndGrouped = rbind(stackAndGrouped,c("F",pct2nd[1],"F19","Second"))
  stackAndGrouped = rbind(stackAndGrouped,c("C",pct2nd[2],"F19","Second"))
  stackAndGrouped = rbind(stackAndGrouped,c("A",pct2nd[3],"F19","Second"))
  stackAndGrouped = rbind(stackAndGrouped,c("F",pct3rd[1],"F19","Third"))
  stackAndGrouped = rbind(stackAndGrouped,c("C",pct3rd[2],"F19","Third"))
  stackAndGrouped = rbind(stackAndGrouped,c("A",pct3rd[3],"F19","Third"))
  colnames(stackAndGrouped) = c("Grade","Pct","Year","Attempt")
  stackAndGrouped$Pct = as.numeric(stackAndGrouped$Pct)
  
  return(stackAndGrouped)
  
}

stackAndGrouped = compileStackAndGrouped(m1f21,12,m1f19,12)
ggplot() + geom_bar(data=stackAndGrouped, aes(y = Pct, x = Year, fill = Grade), stat="identity",
           position='stack') +
  theme_bw() + 
  ggtitle("Milestone 1: F19 vs F21")+
  scale_fill_manual(values = c("green","yellow","red" )) +
  facet_grid( ~ Attempt)

stackAndGrouped = compileStackAndGrouped(m2f21,11,m2f19,12)
ggplot() + geom_bar(data=stackAndGrouped, aes(y = Pct, x = Year, fill = Grade), stat="identity",
           position='stack') +
  theme_bw() + 
  ggtitle("Milestone 2: F19 vs F21")+
  scale_fill_manual(values = c("green","yellow","red" )) +
  facet_grid( ~ Attempt)

stackAndGrouped = compileStackAndGrouped(m3f21,13,m3f19,12)
ggplot() + geom_bar(data=stackAndGrouped, aes(y = Pct, x = Year, fill = Grade), stat="identity",
           position='stack') +
  theme_bw() + 
  ggtitle("Milestone 3: F19 vs F21")+
  scale_fill_manual(values = c("green","yellow","red" )) +
  facet_grid( ~ Attempt)

stackAndGrouped = compileStackAndGrouped(m4f21,12,m4f19,12)
ggplot() + geom_bar(data=stackAndGrouped, aes(y = Pct, x = Year, fill = Grade), stat="identity",
           position='stack') +
  theme_bw() + 
  ggtitle("Milestone 4: F19 vs F21")+
  scale_fill_manual(values = c("green","yellow","red" )) +
  facet_grid( ~ Attempt)

4 Preclass

  • Students not turning in preclass through the semester
f18preclass = gradesf18[,grepl("class",names(gradesf18)) & grepl("X",names(gradesf18)) & !grepl("Score",names(gradesf18))]
f19preclass = gradesf19[,grepl("class",names(gradesf19)) & grepl("X",names(gradesf19)) & !grepl("Score",names(gradesf19))]
f21preclass = gradesf21[,grepl("class",names(gradesf21)) & grepl("X",names(gradesf21)) & !grepl("Score",names(gradesf21))]
gradesf18$missPreclass = rowSums( f18preclass == 0, na.rm = TRUE)
gradesf19$missPreclass = rowSums( f19preclass == 0, na.rm = TRUE)
gradesf21$missPreclass = rowSums( f21preclass == 0, na.rm = TRUE)
l = length(colnames(f21preclass))
#f21preclass[,l] = NULL
#f21preclass[,l-1] = NULL
plot(0+colSums(f18preclass == 0, na.rm = TRUE),type="l",col="black",ylim=c(0,40),xlab = "class day",ylab = "students missing preclass")
lines(0+colSums(f19preclass == 0,na.rm = TRUE),type="l",col="red")
lines(0+colSums(f21preclass < 1,na.rm = TRUE),type="l",col="green")
legend(1, 40, legend=c( paste("F2018 n=",nf18), 
                        paste("F2019 n=",nf19), 
                        paste("F2021 n=",nf21)), col=c("black", "red","green"),lty=1:1, cex=0.8)
title("Number of students missing each Preclass")

  • Repeating offenders
par(mfrow=c(1,3),
    oma = c(5,4,0,0) + 0.1,
    mar = c(0,0,1,1) + 0.1)
hist(rowSums( f18preclass == 0,na.rm = TRUE),breaks=10,xlim = c(0,15),ylim=c(0,140),main="Fall18",xlab="Number of preclass missed",ylab="Number of students")

hist(rowSums( f19preclass == 0,na.rm = TRUE),breaks=10,xlim = c(0,15),ylim=c(0,140),main="Fall19",xlab="Number of preclass missed",ylab="Number of students")
hist(rowSums( f21preclass == 0,na.rm = TRUE),breaks=10,xlim = c(0,15),ylim=c(0,140),main="Fall21",xlab="Number of preclass missed",ylab="Number of students")

5 Homework

  • Students not completing the homework is a sign of students giving up or completely disengaged. Notice that because of the “drop the lowest” policy some students just don’t attempt the last one because they’re happy with their grade. I’m wondering if the fact that the grade is out of 12 and not out of 100 loses sense to them. For example, the 70% of 12 is 8.4 which is not as dramatic.
f18hw = gradesf18[,grepl("Thu",names(gradesf18))]
f19hw = gradesf19[,grepl("Homework",names(gradesf19)) & !grepl("Score",names(gradesf19)) & !grepl("Homework.13th.week",names(gradesf19))]
for (i in 1:ncol(f19hw)){ f19hw[,i] = as.numeric(f19hw[,i])/12*100 }
f21hw = gradesf21[,grepl("Thu",names(gradesf21))]
for (i in 1:ncol(f21hw)){ f21hw[,i] = as.numeric(f21hw[,i])/12*100 }

hwfail = data.frame(matrix(ncol = 0,nrow=3))
for (i in 1:ncol(f21hw)){
  hwfail[ paste("hw",i)]=c( 
    sum( f18hw[,i] < 70,na.rm = TRUE),
    sum( f19hw[,i] < 70,na.rm = TRUE),
    sum( f21hw[,i] < 70,na.rm = TRUE)
  )
}
row.names(hwfail) = c("Fall18","Fall19","Fall21")
knitr::kable(hwfail,caption = "Number of students with homework scores below 70",digits = 1)
Number of students with homework scores below 70
hw 1 hw 2 hw 3 hw 4 hw 5 hw 6 hw 7 hw 8 hw 9
Fall18 2 5 1 9 9 3 2 3 11
Fall19 1 4 4 15 44 4 6 3 21
Fall21 3 18 11 29 26 15 15 29 30
hwfail = data.frame(matrix(ncol = 0,nrow=3))
for (i in 1:ncol(f21hw)){
  hwfail[ paste("hw",i)]=c( 
    sum( f18hw[,i] < 50,na.rm = TRUE),
    sum( f19hw[,i] < 50,na.rm = TRUE),
    sum( f21hw[,i] < 50,na.rm = TRUE)
  )
}
row.names(hwfail) = c("Fall18","Fall19","Fall21")
knitr::kable(hwfail,caption = "Number of students with homework scores below 50",digits = 1)
Number of students with homework scores below 50
hw 1 hw 2 hw 3 hw 4 hw 5 hw 6 hw 7 hw 8 hw 9
Fall18 0 4 0 4 3 2 0 1 5
Fall19 0 4 2 4 11 2 3 1 8
Fall21 1 8 2 18 10 7 8 17 18

6 Lab reports

  • Except for one, lab reports are out of ten. The table shows how many students got scores below 5
f18report = gradesf18[, grepl("Reporting",names(gradesf18)) & 
                        !grepl("Score",names(gradesf18))
                      ]
f19report = gradesf19[, grepl("Reporting",names(gradesf19)) & 
                        !grepl("Score",names(gradesf19))
                      ]
f21report = gradesf21[,grepl("Reporting",names(gradesf21))]


for (i in 1:ncol(f18report)){ 
  f18report[,i] = as.numeric(f18report[,i]) 
  thismax = max(f18report[,i],na.rm = TRUE)
  f18report[,i] = f18report[,i]/thismax*100
}
for (i in 1:ncol(f19report)){ 
  f19report[,i] = as.numeric(f19report[,i]) 
  thismax = max(f19report[,i],na.rm = TRUE)
  f19report[,i] = f19report[,i]/thismax*100
}
for (i in 1:ncol(f21report)){ 
  f21report[,i] = as.numeric(f21report[,i]) 
  thismax = max(f21report[,i],na.rm = TRUE)
  f21report[,i] = f21report[,i]/thismax*100
}
f21report = f21report[,order(names(f21report))]
f21report = f21report[,c(1,4,5,6,7,8,9,10,11,2,3)]

reportfail = data.frame(matrix(ncol = 0,nrow=3))
for (i in 1:ncol(f21report)){
  reportfail[ paste("Report",i)]=c( 
    sum( f18report[,i] < 50,na.rm = TRUE),
    sum( f19report[,i] < 50,na.rm = TRUE),
    sum( f21report[,i] < 50,na.rm = TRUE)
  )
}
row.names(reportfail) = c("Fall18","Fall19","Fall21")
knitr::kable(reportfail,caption = "Number of students with report scores below 50",digits = 1)
Number of students with report scores below 50
Report 1 Report 2 Report 3 Report 4 Report 5 Report 6 Report 7 Report 8 Report 9 Report 10 Report 11
Fall18 2 1 5 2 2 4 3 3 3 4 6
Fall19 2 11 26 16 10 10 17 10 18 13 14
Fall21 7 51 30 48 17 24 36 30 48 43 27

7 Prelab

Missing a prelab is big deal because you are not allowed to turn in your report and the moment you miss more than two labs you automatically fail the course.

f18prelab = gradesf18[,grepl("Prelab",names(gradesf18)) & !grepl("Score",names(gradesf18))]
f19prelab = gradesf19[,grepl("Prelab",names(gradesf19)) & !grepl("Score",names(gradesf19))]
f21prelab = gradesf21[,grepl("Prelab",names(gradesf21)) & !grepl("Score",names(gradesf21))]
gradesf18$missPrelab = rowSums( f18prelab == 0, na.rm = TRUE)
gradesf19$missPrelab = rowSums( f19prelab == 0, na.rm = TRUE)
gradesf21$missPrelab = rowSums( f21prelab == 0, na.rm = TRUE)


plot(0+colSums(f18prelab == 0, na.rm = TRUE),type="l",col="black",ylim=c(0,8),xlab = "Lab day",ylab = "students missing prelab")
lines(0+colSums(f19prelab == 0,na.rm = TRUE),type="l",col="red")
lines(0+colSums(f21prelab < 1,na.rm = TRUE),type="l",col="green")
legend("topright", legend=c( paste("F2018 n=",nf18), 
                        paste("F2019 n=",nf19), 
                        paste("F2021 n=",nf21)), col=c("black", "red","green"),lty=1:1, cex=0.8)
title("Number of students missing each Prelab")

8 Video Watching

  • What days students are not watching the videos. The data only shows up to Module 3 (the first two months of the course). I need to compile all the video watching data from F19 and F18, but Kaltura changed the format so it’s harder to get the data for the date ranges that I need.
allVideo = data.frame(matrix(ncol = 0,nrow=3))
j18 = nrow(videof18)
j19 = nrow(videof19)
j21 = nrow(videof21)
#f19 has the least number of columns, I need more data. The last two columns are averages
for (i in 1:(ncol(videof19)-2)){
  allVideo[paste("video",i)] = c(
    videof18[j18,i+1],
    videof19[j19,i+1],
    videof21[j21,i+6]
  )
}
row.names(allVideo) = c("Fall18","Fall19","Fall21")
tAllVideo = t(allVideo)
matplot(tAllVideo,type="l",xlab="Video instance",ylab="Number students not watching it",main="Students who do not watch that video")
legend("topright",legend = colnames(tAllVideo),col=1:3,lty=1:3)

  • Distibution of repeating offenders: Are there a lot of students who consistently do not watch the videos. Fall21 stands out again.
par(mfrow=c(1,3),
    oma = c(5,4,0,0) + 0.1,
    mar = c(0,0,1,1) + 0.1)

hist(videof18$TotalMissed,xlim = c(0,50),ylim=c(0,140),main="Fall18",xlab="Number of videos missed",ylab="Number of students")
hist(videof19$TotalMissed,xlim = c(0,50),ylim=c(0,140),main="Fall19",xlab="Number of videos missed",ylab="Number of students")
hist(videof21$VideosMissed,xlim = c(0,50),ylim=c(0,140),main="Fall21",xlab="Number of videos missed",ylab="Number of students")

#this wont be used until further down
#match with email
missedVideosF21 = videof21_all[c("name","VideosMissed")]
gradesf21 = merge(gradesf21,missedVideosF21,by.x = "SIS.Login.ID" ,by.y = "name", all.x = TRUE)

9 What Correlates With Course Grades

9.1 Semester Exams

plot(  gradesf18$Exam.1..335345., gradesf18$Unposted.Final.Score ,type='p',xlab='Exam1/100',ylab='Final Grade/100',main='Final grade vs Exam1')
abline(lm(gradesf18$Unposted.Final.Score ~ gradesf18$Exam.1..335345.))
points( gradesf19$Open.Ended.Written.Exam.1..816325., gradesf19$Unposted.Final.Score,col="red")
abline(lm(gradesf19$Unposted.Final.Score ~ gradesf19$Open.Ended.Written.Exam.1..816325.),col="red")
points( gradesf21$Exam..1..2161998., gradesf21$Unposted.Final.Score,col="green")
#abline(lm(
#  gradesf21$Unposted.Final.Score[!is.na(gradesf21$Exam..1..2161998.)] ~ 
#    gradesf21$Exam..1..2161998.[!is.na(gradesf21$Exam..1..2161998.)]/25*100),col="green")

legend("bottomright",legend = c("F18","F19","F21"),col=c("black","red","green"),lty=1:1, cex=0.8)

plot( gradesf18$Exam.2..358394., gradesf18$Unposted.Final.Score,type='p',xlab='Exam2/100',ylab='Final Grade/100',main='Final grade vs Exam2')
abline(lm( gradesf18$Unposted.Final.Score ~ gradesf18$Exam.2..358394.))
points( gradesf19$Open.Ended.Exam.2..836125., gradesf19$Unposted.Final.Score, col="red")
abline(lm( gradesf19$Unposted.Final.Score ~ gradesf19$Open.Ended.Exam.2..836125. ),col="red")
points( gradesf21$Exam..2..2171776.,gradesf21$Unposted.Final.Score, col="green")
abline(lm( gradesf21$Unposted.Final.Score ~ gradesf21$Exam..2..2171776.), col="green")
legend("bottomright",legend = c("F18","F19","F21"),col=c("black","red","green"),lty=1:1, cex=0.8)

plot( gradesf18$Exam.3..395005., gradesf18$Unposted.Final.Score, type='p',xlab='Exam3/100',ylab='Final Grade/100',main='Final grade vs Exam3')
abline(lm( gradesf18$Unposted.Final.Score ~ gradesf18$Exam.3..395005.))
points( gradesf19$Open.Ended.Exam.3..875116., gradesf19$Unposted.Final.Score, col="red")
abline( lm( gradesf19$Unposted.Final.Score ~ gradesf19$Open.Ended.Exam.3..875116.),col="red")
points( gradesf21$Exam..3..2184781., gradesf21$Unposted.Final.Score, col="green")
abline( lm( gradesf21$Unposted.Final.Score ~ gradesf21$Exam..3..2184781.),col="green")
legend("bottomright",legend = c("F18","F19","F21"),col=c("black","red","green"),lty=1:1, cex=0.8)

plot( gradesf18$Final.exam..408510., gradesf18$Unposted.Final.Score, type='p',xlab='Final Exam/100',ylab='Final Grade/100',main='Final grade vs Final exam',ylim=c(50,100))
abline( lm( gradesf18$Unposted.Final.Score ~ gradesf18$Final.exam..408510. ))
points( gradesf19$Final.exam.Unposted.Current.Score, gradesf19$Unposted.Final.Score, col="red")
abline( lm( gradesf19$Unposted.Final.Score ~ gradesf19$Final.exam.Unposted.Current.Score), col="red")
points( gradesf21$Final.Written.Exam.Current.Score, gradesf21$Unposted.Current.Score, col="green")
abline(lm( gradesf21$Unposted.Final.Score ~ gradesf21$Final.Written.Exam.Current.Score),col="green")
legend("bottomright",legend = c("F18","F19","F21"),col=c("black","red","green"),lty=1:1, cex=0.8)

9.2 Milestone Exams and Course Grade

  • A graph of milestone exams is not useful because most students get 100/80/0 so it makes no sense to plot it in an xy coordinate. The table below showing what “A”,“B”,“C”.. students get as average is more informative.

9.3 Homework

f18hw$ave = rowMeans(f18hw)
f19hw$ave = rowMeans(f19hw)
f21hw$ave = rowMeans(f21hw)
plot(f18hw$ave, gradesf18$Unposted.Final.Score,type='p', xlab='Ave HW/100',ylab='Final Grade/100',main='Final Grade vs Average Homework',ylim=c(0,100))
abline( lm(gradesf18$Unposted.Final.Score ~ f18hw$ave ))
points(f19hw$ave, gradesf19$Unposted.Current.Score,col="red")
abline( lm(gradesf19$Unposted.Final.Score ~ f19hw$ave ),col="red")
points(f21hw$ave, gradesf21$Unposted.Current.Score,col="green")
abline( lm(gradesf21$Unposted.Final.Score ~ f21hw$ave ),col="green")
legend("bottomright",legend = c("F18","F19","F21"),col=c("black","red","green"),lty=1:1, cex=0.8)

9.4 Lab reports

f18report$ave = rowMeans(f18report)
f19report$ave = rowMeans(f19report)
f21report$ave = rowMeans(f21report)
plot(f18report$ave, gradesf18$Unposted.Final.Score,type='p', xlab='Ave Lab Report/100',ylab='Final Grade/100',main='Final Grade vs Average Lab Report',ylim=c(0,100))
abline( lm(gradesf18$Unposted.Final.Score ~ f18report$ave ))
points(f19report$ave, gradesf19$Unposted.Current.Score,col="red")
abline( lm(gradesf19$Unposted.Final.Score ~ f19report$ave ),col="red")
points(f21report$ave, gradesf21$Unposted.Current.Score,col="green")
abline( lm(gradesf21$Unposted.Final.Score ~ f21report$ave ),col="green")
legend("bottomright",legend = c("F18","F19","F21"),col=c("black","red","green"),lty=1:1, cex=0.8)

9.5 Prelab

It looks like in 2018 we didn’t require the 90% prelab score to to be in the lab. This shows as Fall2018 has the prelab averages all over the place. Fall2019 and Fall2021 however shows pretty much everyone with an average above 90%.

plot( gradesf18$Lab..Prelab.Quiz.Unposted.Final.Score, gradesf18$Unposted.Final.Score,type='p', xlab='Ave Prelab /100',ylab='Final Grade/100',main='Final Grade vs Average Prelab',ylim=c(0,100) )
abline(lm( gradesf18$Unposted.Final.Score ~ gradesf18$Lab..Prelab.Quiz.Unposted.Final.Score))
points(gradesf19$Lab..Prelab.Quiz.Unposted.Final.Score, gradesf19$Unposted.Current.Score,col="red")
abline( lm(gradesf19$Unposted.Final.Score ~  gradesf19$Lab..Prelab.Quiz.Unposted.Final.Score ),col="red")
points(gradesf21$Pre.Lab.Assignments.Unposted.Final.Score, gradesf21$Unposted.Current.Score,col="green")
abline( lm(gradesf21$Unposted.Final.Score ~ gradesf21$Pre.Lab.Assignments.Unposted.Final.Score ),col="green")
legend("bottomright",legend = c("F18","F19","F21"),col=c("black","red","green"),lty=1:1, cex=0.8)

9.6 Tables of Behavioral charactersitics of students based on grade

  • Fall 2018
getMeanAndSD = function(df,score){
  results = data.frame(matrix(ncol=1,nrow=5))
  i = 1
  for (let in c("A","B","C","D","F")){
    thisMean = sprintf("%.2f", mean( df[which(grepl(let,df$Unposted.Final.Grade)),][[score]], na.rm = TRUE))
    thisSD = sprintf("%.1f", sd( df[which(grepl(let,df$Unposted.Final.Grade)),][[score]], na.rm = TRUE))
    results[i,1] = paste(thisMean,'+/-',thisSD)
    i=i+1
  }
  return(results)
}

f18ave = data.frame(matrix(ncol=0,nrow = 5))
f18ave["Written Exams "] = getMeanAndSD(gradesf18,"Science.Practice.Exercises.Unposted.Final.Score") 
f18ave["Final Exam"] = getMeanAndSD(gradesf18,"Final.exam.Unposted.Current.Score")
f18ave["Milestone "] = getMeanAndSD(gradesf18,"Milestones.Unposted.Current.Score") 
f18ave["HW "] = getMeanAndSD(gradesf18,"Homework.Current.Score") 
f18ave["Lab reports"] = getMeanAndSD(gradesf18,"Lab..Reporting.Unposted.Current.Score")
f18ave["Miss Preclass"] = getMeanAndSD(gradesf18,"missPreclass")
f18ave["Miss Prelab"] = getMeanAndSD(gradesf18,"missPrelab")
row.names(f18ave)=c("A students","B students","C students","D students","F students")
knitr::kable(f18ave,caption="Fall 2018: Average +/- Standard Deviation Based on Grade Performance")
Fall 2018: Average +/- Standard Deviation Based on Grade Performance
Written Exams Final Exam Milestone HW Lab reports Miss Preclass Miss Prelab
A students 81.51 +/- 6.6 87.55 +/- 6.0 100.00 +/- 0.0 97.69 +/- 2.5 92.12 +/- 3.8 1.58 +/- 1.9 0.04 +/- 0.3
B students 62.77 +/- 9.8 74.05 +/- 7.3 98.82 +/- 3.0 94.33 +/- 5.1 88.05 +/- 5.5 2.99 +/- 2.7 0.12 +/- 0.4
C students 48.98 +/- 7.2 61.31 +/- 7.0 85.42 +/- 10.3 87.27 +/- 7.3 84.94 +/- 7.5 3.17 +/- 2.5 0.08 +/- 0.3
D students 43.92 +/- 11.3 59.91 +/- 6.3 63.75 +/- 13.1 79.92 +/- 14.8 71.19 +/- 10.8 5.25 +/- 3.9 1.00 +/- 1.2
F students 35.88 +/- 10.7 NaN +/- NA 46.05 +/- 25.3 62.19 +/- 17.2 59.30 +/- 14.9 14.40 +/- 5.2 2.40 +/- 1.5
f19ave = data.frame(matrix(ncol=0,nrow = 5))
f19ave["Written Exams "] = getMeanAndSD(gradesf19,"Open.Ended.Written.Exams.Unposted.Final.Score") 
f19ave["Final Exam"] = getMeanAndSD(gradesf19,"Final.exam.Unposted.Final.Score")
f19ave["Milestone "] = getMeanAndSD(gradesf19,"Milestones.Unposted.Current.Score") 
f19ave["HW "] = getMeanAndSD(gradesf19,"Homework.Current.Score") 
f19ave["Lab reports"] = getMeanAndSD(gradesf19,"Lab..Reporting.Unposted.Current.Score")
f19ave["Miss Preclass"] = getMeanAndSD(gradesf19,"missPreclass")
f19ave["Miss Prelab"] = getMeanAndSD(gradesf19,"missPrelab")
row.names(f19ave)=c("A students","B students","C students","D students","F students")
knitr::kable(f19ave,caption="Fall 2019: Average +/- Standard Deviation Based on Grade Performance")
Fall 2019: Average +/- Standard Deviation Based on Grade Performance
Written Exams Final Exam Milestone HW Lab reports Miss Preclass Miss Prelab
A students 84.14 +/- 6.0 82.22 +/- 8.0 100.00 +/- 0.0 98.25 +/- 1.6 88.44 +/- 6.6 1.77 +/- 1.9 0.07 +/- 0.3
B students 64.24 +/- 9.7 64.08 +/- 9.4 99.74 +/- 1.1 93.92 +/- 4.8 81.71 +/- 9.5 2.33 +/- 2.0 0.08 +/- 0.3
C students 47.63 +/- 10.8 46.57 +/- 13.8 92.34 +/- 8.6 91.34 +/- 6.4 73.07 +/- 11.8 3.00 +/- 2.7 0.09 +/- 0.4
D students 37.11 +/- 12.5 45.38 +/- 7.8 70.83 +/- 10.2 77.24 +/- 14.3 57.39 +/- 23.2 4.00 +/- 4.0 0.33 +/- 0.8
F students 33.85 +/- 12.9 9.96 +/- 19.9 68.75 +/- 33.8 64.80 +/- 16.5 41.30 +/- 16.4 12.25 +/- 5.9 1.75 +/- 3.5
f21ave = data.frame(matrix(ncol=0,nrow = 5))
f21ave["Written Exams "] = getMeanAndSD(gradesf21,"Open.Ended.Semester.Exams.Unposted.Final.Score") 
f21ave["Final Exam"] = getMeanAndSD(gradesf21,"Final.Written.Exam.Unposted.Final.Score")
f21ave["Milestone "] = getMeanAndSD(gradesf21,"Milestones.Unposted.Current.Score") 
f21ave["HW "] = getMeanAndSD(gradesf21,"Homework.Current.Score") 
f21ave["Lab reports"] = getMeanAndSD(gradesf21,"Lab.Reports.Unposted.Current.Score")
f21ave["Miss Preclass"] = getMeanAndSD(gradesf21,"missPreclass")
f21ave["Miss Prelab"] = getMeanAndSD(gradesf21,"missPrelab")
f21ave["Miss Videos"] = getMeanAndSD(gradesf21,"VideosMissed")
row.names(f21ave)=c("A students","B students","C students","D students","F students")
knitr::kable(f21ave,caption="Fall 2021: Average +/- Standard Deviation Based on Grade Performance")
Fall 2021: Average +/- Standard Deviation Based on Grade Performance
Written Exams Final Exam Milestone HW Lab reports Miss Preclass Miss Prelab Miss Videos
A students 86.60 +/- 6.8 78.75 +/- 11.3 99.66 +/- 1.3 96.71 +/- 3.5 91.30 +/- 4.7 0.08 +/- 0.3 0.00 +/- 0.0 8.41 +/- 11.5
B students 70.36 +/- 8.1 58.90 +/- 11.5 98.57 +/- 3.2 91.26 +/- 5.1 78.68 +/- 10.2 0.26 +/- 0.5 0.00 +/- 0.0 19.09 +/- 15.9
C students 54.11 +/- 13.3 46.36 +/- 13.8 85.61 +/- 9.4 82.54 +/- 9.6 65.32 +/- 13.6 0.21 +/- 0.4 0.09 +/- 0.3 25.64 +/- 15.9
D students 45.06 +/- 7.7 44.44 +/- 7.8 66.00 +/- 17.0 73.20 +/- 10.3 52.70 +/- 18.1 0.40 +/- 0.5 0.10 +/- 0.3 35.50 +/- 18.7
F students 34.22 +/- 20.1 17.25 +/- 27.5 30.00 +/- 19.6 52.11 +/- 26.7 40.13 +/- 21.2 0.62 +/- 0.7 0.12 +/- 0.4 44.38 +/- 21.3

Missing four videos is approximately equivalent to missing one day of class

10 Statistical significance

10.1 Written Exams

#install.packages("ggpubr")
library(ggpubr)

plotGGbox = function(df,myx,myy,mytitle,myylab){
  maxy = max(df[[myy]])
  ggboxplot(df, x = myx, y = myy,  
            title = mytitle,
            color = myx, add = "jitter", legend="none",ylab = myylab) + rotate_x_text(angle = 45) +  
    geom_hline( yintercept = mean(df[[myy]]), linetype = 2) + 
    stat_compare_means(method = "anova", label.y = maxy*1.10) +
    stat_compare_means(label = "p.format", size=2.5, method = "t.test", ref.group = ".all.",label.y = maxy*1.05)
}

gradesf18 = gradesf18[order(gradesf18$simpleLetter),]
gradesf19 = gradesf19[order(gradesf19$simpleLetter),]
gradesf21 = gradesf21[order(gradesf21$simpleLetter),]


#Written exams
print( plotGGbox(gradesf18,"simpleLetter","Science.Practice.Exercises.Unposted.Final.Score","Fall18: Written Semester Exams:Wrapped Letters","Average Semester Exams"))

print( plotGGbox(gradesf18,"Unposted.Final.Grade","Science.Practice.Exercises.Unposted.Final.Score","Fall18: Written Semester Exams:All Letters","Average Semester Exams"))

print( plotGGbox(gradesf19,"simpleLetter","Open.Ended.Written.Exams.Unposted.Final.Score","Fall19: Written Semester Exams:Wrapped Letters","Average Semester Exams"))

print(plotGGbox(gradesf19,"Unposted.Final.Grade","Open.Ended.Written.Exams.Unposted.Final.Score","Fall19: Written Semester Exams:All Letters","Average Semester Exams"))

print( plotGGbox(gradesf21,"simpleLetter","Open.Ended.Semester.Exams.Unposted.Current.Score","Fall21: Written Semester Exams:Wrapped Letters","Average Semester Exams"))

print(plotGGbox(gradesf21,"Unposted.Final.Grade","Open.Ended.Semester.Exams.Unposted.Current.Score","Fall21: Written Semester Exams:All Letters","Average Semester Exams"))

10.2 Milestone exams

print( plotGGbox(gradesf18,"simpleLetter","Milestones.Unposted.Current.Score","Fall18: Milestone Avg","Avg Milestone grade") )

print( plotGGbox(gradesf19,"simpleLetter","Milestones.Unposted.Current.Score","Fall19: Milestone Avg","Avg Milestone grade") )

print( plotGGbox(gradesf21,"simpleLetter","Milestones.Unposted.Current.Score","Fall21: Milestone Avg","Avg Milestone grade") )

10.3 HW

print( plotGGbox(gradesf18,"simpleLetter","Homework.Current.Score","Fall18: Homework Avg","Avg Homework grade") )

print( plotGGbox(gradesf19,"simpleLetter","Homework.Current.Score","Fall19: Homework Avg","Avg Homework grade") )

print( plotGGbox(gradesf21,"simpleLetter","Homework.Current.Score","Fall21: Homework Avg","Avg Homework grade") )

10.4 Lab reports

print( plotGGbox(gradesf18,"simpleLetter","Lab..Reporting.Unposted.Current.Score","Fall18: Lab reports Avg","Avg Lab reports grade") )

print( plotGGbox(gradesf19,"simpleLetter","Lab..Reporting.Unposted.Current.Score","Fall19: Lab reports Avg","Avg Lab reports grade") )

print( plotGGbox(gradesf21,"simpleLetter","Lab.Reports.Unposted.Current.Score","Fall21: Lab reports Avg","Avg Lab reports grade") )

10.5 Missed preclass

print( plotGGbox(gradesf18,"simpleLetter","missPreclass","Fall18: Missed preclass instances","Missed preclass") )

print( plotGGbox(gradesf19,"simpleLetter","missPreclass","Fall19: Missed preclass instances","Missed preclass") )

print( plotGGbox(gradesf21,"simpleLetter","missPreclass","Fall21: Missed preclass instances","Missed preclass") )

10.6 Missed prelab

print( plotGGbox(gradesf18,"simpleLetter","missPrelab","Fall18: Missed prelab instances","Missed preclass") )

print( plotGGbox(gradesf19,"simpleLetter","missPrelab","Fall19: Missed prelab instances","Missed preclass") )

print( plotGGbox(gradesf21,"simpleLetter","missPrelab","Fall21: Missed prelab instances","Missed preclass") )

10.7 Missed videos

#theres one student who never opened a single video, so it shows as NA and blows the statistics

print( plotGGbox(gradesf21[!is.na(gradesf21$VideosMissed),],"simpleLetter","VideosMissed","Fall21: Videos not watched","Missed videos #") )

11 Good milestone practices

12 Bundling students: Clustering based on effort and performance

Too often, educators conjugate students as “they” as if they were a homogeneous group. When trying to use a finer granularity we may label “good students” and “bad students” based on performance. However, even among these two groups the charactersitics are not similar. In fact, as instructors we want to identify the group of students that we can help the most. I propose that we cluster students along two axis, performance and effort, yielding at least four different groups:

  • WE-LP (wrong/low effor and low performance)

  • HE-LP (high effort and low performance)

  • LE-AP (low/lazy effort - acceptable performance)

  • HE-AP (high effort - acceptable performance)

12.1 Measuring effort

We have several indicators that when combined can be used to measure effort. We will start simple and look at what Canvas reports as Canvas participation

part_f21 = read.csv("~/Teaching/Grades_and_SRT/Fall2021/participation_f21.csv",header = TRUE )
plot(part_f21$Overall.course.grade,part_f21$Participations)

plot(part_f21$Overall.course.grade,part_f21$Page.Views)

#part_f21 = 
#align
#gradesf21 = merge(gradesf21,part_f21,by.x = "SIS.User.ID" ,by.y = "SIS.Id", all.x = TRUE)