#lets first load the files
if (Sys.info()["sysname"] == "Windows"){
}else{
  #F23
  m1f23 =   read.csv("~/Teaching/Grades_and_SRT/Fall2023/m1.csv",header = TRUE)
  m2f23 =   read.csv("~/Teaching/Grades_and_SRT/Fall2023/m2.csv",header = TRUE)
  m3f23 =   read.csv("~/Teaching/Grades_and_SRT/Fall2023/m3.csv",header = TRUE)
  m4f23 =   read.csv("~/Teaching/Grades_and_SRT/Fall2023/m4.csv",header = TRUE)
  gradesf23 =     read.csv("~/Teaching/Grades_and_SRT/Fall2023/chem1331_f23_grades_canvas.csv",header = TRUE)
  
  #F22
  m1f22 = read.csv("~/Teaching/Grades_and_SRT/Fall2022/m1.csv",header = TRUE)
  m2f22 = read.csv("~/Teaching/Grades_and_SRT/Fall2022/m2.csv",header = TRUE)
  m3f22 = read.csv("~/Teaching/Grades_and_SRT/Fall2022/m3.csv",header = TRUE)
  m4f22 = read.csv("~/Teaching/Grades_and_SRT/Fall2022/m4.csv",header = TRUE)
  gradesf22 =     read.csv("~/Teaching/Grades_and_SRT/Fall2022/chem1331_f22_grades_canvas.csv",header = TRUE)
  
  #F21
  m1f21 = read.csv("~/Teaching/Grades_and_SRT/Fall2021/milestone1.csv",header = TRUE)
  m2f21 = read.csv("~/Teaching/Grades_and_SRT/Fall2021/milestone2.csv",header = TRUE)
  m3f21 = read.csv("~/Teaching/Grades_and_SRT/Fall2021/milestone3.csv",header = TRUE)
  m4f21 = read.csv("~/Teaching/Grades_and_SRT/Fall2021/milestone4.csv",header = TRUE)
  #ex1 = read.csv("~/Teaching/Grades_and_SRT/Fall2021/Exam_1_scores.csv",header = TRUE)
  #ex2 = read.csv("~/Teaching/Grades_and_SRT/Fall2021/Exam_2_scores.csv",header = TRUE)
  gradesf21 =     read.csv("~/Teaching/Grades_and_SRT/Fall2021/chem1331_f21_finalgrades.csv",header = TRUE)
  gradesf21 =     read.csv("~/Teaching/Grades_and_SRT/Fall2021/chem1331_f21_grades_canvas.csv",header = TRUE)
  #demo21 =       read.csv("~/Teaching/Grades_and_SRT/Fall2021/")
  #use this file to compare among years
  videof21 =      read.csv("~/Teaching/Grades_and_SRT/Fall2021/videowatching_resultsf21.csv",header = TRUE)
  #this video analytics has all the videos of the course
  videof21_all =      read.csv("~/Teaching/Grades_and_SRT/Fall2021/videowatchingAnalytics.csv",header = TRUE)
  #m1practicef21 = read.csv("~/Teaching/Grades_and_SRT/Fall2021/milestone1_practice.csv",header = TRUE)
  #m2practicef21 = read.csv("~/Teaching/Grades_and_SRT/Fall2021/milestone2_practice.csv",header = TRUE)
  #m3practicef21 = read.csv("~/Teaching/Grades_and_SRT/Fall2021/milestone3_practice.csv",header = TRUE)
  #m4practicef21 = read.csv("~/Teaching/Grades_and_SRT/Fall2021/milestone4_practice.csv",header = TRUE)

  #F19
  m1f19 = read.csv("~/Teaching/Grades_and_SRT/Fall2019/CHEM1331/milestone1.csv",header = TRUE)
  m2f19 = read.csv("~/Teaching/Grades_and_SRT/Fall2019/CHEM1331/milestone2.csv",header = TRUE)
  m3f19 = read.csv("~/Teaching/Grades_and_SRT/Fall2019/CHEM1331/milestone3.csv",header = TRUE)
  m4f19 = read.csv("~/Teaching/Grades_and_SRT/Fall2019/CHEM1331/milestone4.csv",header = TRUE)
  gradesf19 =     read.csv("~/Teaching/Grades_and_SRT/Fall2019/CHEM1331/chem1331f19_gradebook.csv",header = TRUE)
  #demo19 =       read.csv("~/Teaching/Grades_and_SRT/Fall2019/CHEM1331/DISCOVER_chem1331_f19.csv")
  videof19 =      read.csv("~/Teaching/Grades_and_SRT/Fall2019/CHEM1331/videowatching_resultsf19.csv",header = TRUE)
  #m1practicef19 = read.csv("~/Teaching/Grades_and_SRT/Fall2019/CHEM1331/milestone1_practice.csv",header = TRUE)
  #m2practicef19 = read.csv("~/Teaching/Grades_and_SRT/Fall2019/CHEM1331/milestone2_practice.csv",header = TRUE)
  #m3practicef19 = read.csv("~/Teaching/Grades_and_SRT/Fall2019/CHEM1331/milestone3_practice.csv",header = TRUE)
  #m4practicef19 = read.csv("~/Teaching/Grades_and_SRT/Fall2019/CHEM1331/milestone4_practice.csv",header = TRUE)

  gradesf18 =     read.csv("~/Teaching/Grades_and_SRT/Fall2018/chem1331_f18.csv",header = TRUE)
  gradesf18mood = read.csv("~/Teaching/Grades_and_SRT/Fall2018/c1_f18_moodle_gradebook.csv",header = TRUE)
  m1f18 =         read.csv("~/Teaching/Grades_and_SRT/Fall2018/milestone1.csv",header = TRUE)
  videof18      = read.csv("~/Teaching/Grades_and_SRT/Fall2018/videowatching_resultsf18.csv",header = TRUE)
}

1 Course Grade Distribution

As you will see below, since we’re using different assignments, the grade distribution is not comparable. The main takeaways are

In Fall2018 we weren’t sure if we were pushing students too much. When we thought that our Exam1 was too harsh we went easier. Specially in the Final. The final exam was too easy which explains the overall grade being much higher.
The low number of DFW students in F18 can also be explained by the fact that the milestones had no numerical questions. These are well known to bring down the grade. We realized though that it made no sense to “kick the can down the road”, so we brought dimensional analysis back in F19.
Probably our Exam1 in F19 was too hard and Exam1 in F21 was too easy. I think the final exam in those last two years is where we want it to be.
Exam1 is still an unknown, I think in F21 it was too easy and it made students overconfident
In terms of grade distribution, we probably want to be somewhere between F19 and F21

1.1 Letter grade

nf18 = length(gradesf18$Student)
nf19 = length(gradesf19$Student)
nf21 = length(gradesf21$Student)
nf22 = length(gradesf22$Student)
nf23 = length(gradesf23$Student)
#revmove D-
gradesf18$Unposted.Final.Grade = gsub('D-','D', gradesf18$Unposted.Final.Grade)
gradesf19$Unposted.Final.Grade = gsub('D-','D', gradesf19$Unposted.Final.Grade)
gradesf21$Unposted.Final.Grade = gsub('D-','D', gradesf21$Unposted.Final.Grade)
gradesf22$Unposted.Final.Grade = gsub('D-','D', gradesf22$Unposted.Final.Grade)
gradesf23$Unposted.Final.Grade = gsub('D-','D', gradesf23$Unposted.Final.Grade)
#remove column to avoid confusion
gradesf18$Final.Grade = NULL
gradesf19$Final.Grade = NULL
gradesf21$Final.Grade = NULL
gradesf22$Final.Grade = NULL
gradesf23$Final.Grade = NULL
gradesf18$simpleLetter = gsub('\\+','',
                              gsub('-','',gradesf18$Unposted.Current.Grade))
gradesf19$simpleLetter = gsub('\\+','',
                              gsub('-','',gradesf19$Unposted.Current.Grade))
gradesf21$simpleLetter = gsub('\\+','',
                              gsub('-','',gradesf21$Unposted.Current.Grade))
gradesf22$simpleLetter = gsub('\\+','',
                              gsub('-','',gradesf22$Unposted.Current.Grade))
gradesf23$simpleLetter = gsub('\\+','',
                              gsub('-','',gradesf23$Unposted.Current.Grade))
allLetters = as.data.frame.matrix( rbind(
  table(gradesf18$simpleLetter)/length(gradesf18$simpleLetter)*100,
  table(gradesf19$simpleLetter)/length(gradesf19$simpleLetter)*100,
  table(gradesf21$simpleLetter)/length(gradesf21$simpleLetter)*100,
  table(gradesf22$simpleLetter)/length(gradesf22$simpleLetter)*100,
  table(gradesf23$simpleLetter)/length(gradesf23$simpleLetter)*100
  #table(gradesf21$LetterGrade)/length(gradesf21$LetterGrade)*100
  ) )
allLettersAllYears = as.data.frame.matrix( rbind(
    table(gradesf18$simpleLetter),
    table(gradesf19$simpleLetter),
    table(gradesf21$simpleLetter),
    table(gradesf22$simpleLetter),
    table(gradesf23$simpleLetter),
    table(gradesf18$simpleLetter)+
    table(gradesf19$simpleLetter)+
    table(gradesf21$simpleLetter)+
    table(gradesf22$simpleLetter)+
    table(gradesf23$simpleLetter)
  )
)

allStats = as.data.frame.matrix( rbind(
  summary(gradesf18$Final.Score),
  summary(gradesf19$Final.Score),
  summary(gradesf21$Final.Score),
  summary(gradesf22$Final.Score),
  summary(gradesf23$Final.Score)
))

#allStats = cbind(allStats,allLetters)
allStats = cbind(Nstudents = c(
  length(gradesf18$Student),
  length(gradesf19$Student),
  length(gradesf21$Student),
  length(gradesf22$Student),
  length(gradesf23$Student)
),allStats)
row.names(allStats) = c("Fall18","Fall19","Fall21","Fall22","Fall23")
row.names(allLetters) = c("Fall18","Fall19","Fall21","Fall22","Fall23")
row.names(allLettersAllYears) = c("Fall18","Fall19","Fall21","Fall22","Fall23","Total")

knitr::kable(allLetters,caption = "Final Grade Letter Percents",digits = 1)

Final Grade Letter Percents
	A	B	C	D	F
Fall18	50.8	37.6	6.6	2.2	2.8
Fall19	23.6	53.3	17.6	3.8	1.6
Fall21	37.9	35.9	16.9	5.1	4.1
Fall22	29.8	49.0	17.3	0.5	3.4
Fall23	47.0	31.8	16.6	0.7	4.0

knitr::kable(allLettersAllYears,caption = "Final Grade Letter Total Numbers",digits = 1)

Final Grade Letter Total Numbers
	A	B	C	D	F
Fall18	92	68	12	4	5
Fall19	43	97	32	7	3
Fall21	74	70	33	10	8
Fall22	62	102	36	1	7
Fall23	71	48	25	1	6
Total	342	385	138	23	29

# Function to create a pie chart for a given dataframe
create_pie_chart <- function(df, title) {
  ggplot(df, aes(x = "", fill = simpleLetter)) +
    geom_bar(width = 1, color = "white") +
    coord_polar("y") +
    ggtitle(title) +
    theme_void() +
    theme(legend.position = "right")
}

# Create pie charts for each dataframe
p1 <- create_pie_chart(gradesf18, "Fall 18")
p2 <- create_pie_chart(gradesf19, "Fall 19")
p3 <- create_pie_chart(gradesf21, "Fall 21")
p4 <- create_pie_chart(gradesf22, "Fall 22")
p5 <- create_pie_chart(gradesf23, "Fall 23")

# Arrange the pie charts in a 2-column grid
library(gridExtra)
grid.arrange(p1, p2, p3, p4, p5, ncol = 2)

1.2 Numerical grade

knitr::kable(allStats,caption = "Final Grade Statistics",digits = 1)

Final Grade Statistics
	Nstudents	Min.	1st Qu.	Median	Mean	3rd Qu.	Max.
Fall18	181	26.8	85.4	90.0	87.5	93.6	97.8
Fall19	182	26.2	76.8	81.2	80.2	85.7	94.6
Fall21	195	20.1	79.7	87.3	84.0	92.3	99.1
Fall22	208	37.4	80.5	86.2	84.6	91.8	97.9
Fall23	151	18.6	81.8	89.2	85.7	92.6	97.9

h18 = ggplot(gradesf18, aes(x=Final.Score))+ geom_histogram(binwidth = 5) + ggtitle("Fall 18")
h19 = ggplot(gradesf19, aes(x=Final.Score))+ geom_histogram(binwidth = 5) + ggtitle("Fall 19")
h21 = ggplot(gradesf21, aes(x=Final.Score))+ geom_histogram(binwidth = 5) + ggtitle("Fall 21")
h22 = ggplot(gradesf22, aes(x=Final.Score))+ geom_histogram(binwidth = 5) + ggtitle("Fall 22")
h23 = ggplot(gradesf23, aes(x=Final.Score))+ geom_histogram(binwidth = 5) + ggtitle("Fall 23")
grid.arrange(h18,h19,h21,h22,h23, ncol=2)

1.3 Withdrawn students

The statistics shown above are only the students who were still registered at the end of the semester. Some students withdrew from the course, we are going to use the number of students who took milestone 1 as the number of students who started to compare with the number of students who finished.

In 2018, 181 students took milestone 1 and 181 finished
In 2019, 191 students took milestone 1 and 182 finished
In 2021, 211 students took milestone 1 and 195 finished.
In 2022, 220 students took milestone 1 and 208 finished.
In 2023, 163 students took milestone 1 and 151 finished.

Pay attention to this high number of withdrawls. This means that the overall grade average would have been much lower if all these people who dropped stayed until the end.

f18i = length(m1f18$Last.name)
f18f = length(gradesf18$Final.Score)
f18df = sum(gradesf18$simpleLetter %in% c("D","F"))
f18dfp = paste0(round(f18df/f18i*100,2),"%")
f18dfw = paste0(round(((f18i - f18f)+f18df)/f18i*100,2),"%")

f19i = length(m1f19[which(m1f19$attempt==1),]$name)
f19f= length(gradesf19$Final.Score)
f19df = sum(gradesf19$simpleLetter %in% c("D","F"))
f19dfp = paste0(round(f19df/f19i*100,2),"%")
f19dfw = paste0(round(((f19i - f19f)+f19df)/f19i*100,2),"%")

f21i = length(m1f21[which(m1f21$attempt==1),]$name)
f21f= length(gradesf21$Final.Score)
f21df = sum(gradesf21$simpleLetter %in% c("D","F"))
f21dfp = paste0(round(f21df/f21i*100,2),"%")
f21dfw = paste0(round(((f21i - f21f)+f21df)/f21i*100,2),"%")

f22i = length(m1f22[which(m1f22$attempt==1),]$name)
f22f= length(gradesf22$Final.Score)
f22df = sum(gradesf22$simpleLetter %in% c("D","F"))
f22dfp = paste0(round(f22df/f22i*100,2),"%")
f22dfw = paste0(round(((f22i - f22f)+f22df)/f22i*100,2),"%")

f23i = length(m1f23[which(m1f23$attempt==1),]$name)
f23f= length(gradesf23$Final.Score)
f23df = sum(gradesf23$simpleLetter %in% c("D","F"))
f23dfp = paste0(round(f23df/f23i*100,2),"%")
f23dfw = paste0(round(((f23i - f23f)+f23df)/f23i*100,2),"%")


df = data.frame(
  F18 = c( f18i, f18f, f18f-f18i, paste0(round( (f18i-f18f)/f18i*100,2),"%"),f18df,f18dfp,f18dfw ),
  F19 = c( f19i, f19f, f19f-f19i, paste0(round( (f19i-f19f)/f19i*100,2),"%"),f19df,f19dfp,f19dfw ),
  F21 = c( f21i, f21f, f21f-f21i, paste0(round( (f21i-f21f)/f21i*100,2),"%"),f21df,f21dfp,f21dfw ),
  F22 = c( f22i, f22f, f22f-f22i, paste0(round( (f22i-f22f)/f22i*100,2),"%"),f22df,f22dfp,f22dfw ),
  F23 = c( f23i, f23f, f23f-f23i, paste0(round( (f23i-f23f)/f23i*100,2),"%"),f23df,f23dfp,f23dfw )
)
rownames(df)= c("Students started",
                "Student finished",
                "Difference",
                "Percent of W",
                "Students with D or F",
                "Percent D or F",
                "Total Percent of DFW")

kable(df, format = "markdown", row.names = TRUE)

	F18	F19	F21	F22	F23
Students started	181	191	211	220	163
Student finished	181	182	195	208	151
Difference	0	-9	-16	-12	-12
Percent of W	0%	4.71%	7.58%	5.45%	7.36%
Students with D or F	9	10	18	8	7
Percent D or F	4.97%	5.24%	8.53%	3.64%	4.29%
Total Percent of DFW	4.97%	9.95%	16.11%	9.09%	11.66%

2 Open Ended Exams Grade Distribution

gradesf18[which(gradesf18$Exam.1..335345.==0),]$Exam.1..335345. = NA
#gradesf21[which(gradesf21$Exam..1..2161998.==0),]$Exam..1..2161998. = NA
gradesf21[which(gradesf21$Exam..1..2161998.=="EX"),]$Exam..1..2161998. = NA
gradesf21$Exam..1..2161998. = as.numeric(gradesf21$Exam..1..2161998.)/25*100

gradesf22[which(gradesf22$Open.Ended.Exam..1..2592814. == 0),]$Open.Ended.Exam..1..2592814. = NA
gradesf22$Open.Ended.Exam..1..2592814. = as.numeric(gradesf22$Open.Ended.Exam..1..2592814.)/25*100

#gradesf23[which(gradesf23$Open.Ended.Exam..1..3262287. == 0),]$Open.Ended.Exam..1..3262287. = NA
gradesf23$Open.Ended.Exam..1..3262287. = as.numeric(gradesf23$Open.Ended.Exam..1..3262287.)/25*100

allStats = as.data.frame.matrix( rbind(
  summary(gradesf18$Exam.1..335345.),
  summary(gradesf19$Open.Ended.Written.Exam.1..816325.),
  summary(gradesf21$Exam..1..2161998.),
  summary(gradesf22$Open.Ended.Exam..1..2592814.),
  summary(gradesf23$Open.Ended.Exam..1..3262287.)
))
row.names(allStats) = c("Fall18","Fall19","Fall21","Fall22","Fall23")
allStats$`NA's` = NULL
knitr::kable(allStats,caption = "Exam 1 Statistics",digits = 1)

Exam 1 Statistics
	Min.	1st Qu.	Median	Mean	3rd Qu.	Max.
Fall18	16.0	52.0	66.0	64.2	76.0	96.0
Fall19	13.1	44.3	58.3	57.4	72.3	95.2
Fall21	0.0	68.0	81.2	77.9	92.0	100.0
Fall22	10.0	41.0	58.0	57.0	74.0	100.0
Fall23	8.0	48.0	65.0	64.9	86.0	98.0

#EXAM 2
gradesf18[which(gradesf18$Exam.2..358394.==0),]$Exam.2..358394. = NA
gradesf19[which(gradesf19$Open.Ended.Exam.2..836125.==0),]$Open.Ended.Exam.2..836125. = NA
gradesf21[which(gradesf21$Exam..2..2171776.==0),]$Exam..2..2171776. = NA
gradesf21$Exam..2..2171776. = as.numeric(gradesf21$Exam..2..2171776.)/25*100

gradesf22[which(gradesf22$Open.Ended.Exam..2..2592815. == 0),]$Open.Ended.Exam..2..2592815. = NA
#out of 26 of all numbers :)
gradesf22$Open.Ended.Exam..2..2592815. = as.numeric(gradesf22$Open.Ended.Exam..2..2592815.)/26*100

gradesf23[which(gradesf23$Open.Ended.Exam..2..3262288. == 0),]$Open.Ended.Exam..2..3262288. = NA
gradesf23$Open.Ended.Exam..2..3262288. = as.numeric(gradesf23$Open.Ended.Exam..2..3262288.)/25*100

allStats = as.data.frame.matrix( rbind(
  summary(gradesf18$Exam.2..358394.),
  summary(gradesf19$Open.Ended.Exam.2..836125.),
  summary(gradesf21$Exam..2..2171776.),
  summary(gradesf22$Open.Ended.Exam..2..2592815.),
  summary(gradesf23$Open.Ended.Exam..2..3262288.)
))
row.names(allStats) = c("Fall18","Fall19","Fall21","Fall22","Fall23")
#allStats$`NA's` = NULL
knitr::kable(allStats,caption = "Exam 2 Statistics",digits = 1)

Exam 2 Statistics
	Min.	1st Qu.	Median	Mean	3rd Qu.	Max.	NA’s
Fall18	19.0	59.5	75.0	70.5	84.0	98.0	5
Fall19	19.0	49.0	67.0	63.8	78.0	100.0	1
Fall21	9.0	51.4	65.4	63.6	78.3	98.5	2
Fall22	6.7	42.0	58.0	58.1	78.8	100.0	2
Fall23	6.0	43.2	60.0	58.6	76.0	94.0	1

#EXAM3
gradesf18[which(gradesf18$Exam.3..395005. == 0),]$Exam.3..395005. = NA
gradesf19[which(gradesf19$Open.Ended.Exam.3..875116. == 0),]$Open.Ended.Exam.3..875116. = NA
gradesf21[which(gradesf21$Exam..3..2184781. == 0),]$Exam..3..2184781. = NA
gradesf21$Exam..3..2184781. = gradesf21$Exam..3..2184781./25*100
gradesf22[which(gradesf22$Open.Ended.Exam..3..2592816. == 0),]$Open.Ended.Exam..3..2592816. = NA
gradesf22$Open.Ended.Exam..3..2592816. = gradesf22$Open.Ended.Exam..3..2592816./25*100

gradesf23[which(gradesf23$Open.Ended.Exam..3..3262289. == 0),]$Open.Ended.Exam..3..3262289. = NA
gradesf23$Open.Ended.Exam..3..3262289. = gradesf23$Open.Ended.Exam..3..3262289./25*100

allStats = as.data.frame.matrix( rbind(
  summary(gradesf18$Final.exam..408510.),
  summary(gradesf19$Open.Ended.Exam.3..875116.),
  summary(gradesf21$Exam..3..2184781.),
  summary(gradesf22$Open.Ended.Exam..3..2592816.),
  summary(gradesf23$Open.Ended.Exam..3..3262289.)
))
row.names(allStats) = c("Fall18","Fall19","Fall21","Fall22","Fall23")
knitr::kable(allStats,caption = "Exam 3 Statistics",digits = 1)

Exam 3 Statistics
	Min.	1st Qu.	Median	Mean	3rd Qu.	Max.	NA’s
Fall18	48.9	72.4	81.4	79.9	88.3	99	5
Fall19	2.5	42.5	55.0	56.4	72.5	95	5
Fall21	9.3	38.0	52.8	51.2	64.7	99	11
Fall22	21.0	50.0	63.0	62.1	76.3	98	5
Fall23	12.0	48.0	64.0	62.5	81.2	98	7

#FINAL EXAM

gradesf19[which(gradesf19$Final.exam.Unposted.Current.Score == 0),]$Final.exam.Unposted.Current.Score = NA
gradesf21[which(gradesf21$Final.Written.Exam.Current.Score == 0),]$Final.Written.Exam.Current.Score = NA
#gradesf22[which(gradesf22$Final.Written.Exam.Current.Score == 0),]$Final.Written.Exam.Current.Score = NA

allStats = as.data.frame.matrix( rbind(
  summary(gradesf18$Final.exam..408510.),
  summary(gradesf19$Final.exam.Unposted.Current.Score),
  summary(gradesf21$Final.Written.Exam.Current.Score),
  summary(gradesf22$Final.Written.Exam.Current.Score),
  summary(gradesf23$Final.Written.Exam.Current.Score)
))
row.names(allStats) = c("Fall18","Fall19","Fall21","Fall22","Fall23")
knitr::kable(allStats,caption = "Final Exam Statistics",digits = 1)

Final Exam Statistics
	Min.	1st Qu.	Median	Mean	3rd Qu.	Max.	NA’s
Fall18	48.9	72.4	81.4	79.9	88.3	99.0	5
Fall19	33.7	55.3	64.8	65.3	75.7	97.3	5
Fall21	14.7	49.5	63.7	63.5	76.0	100.0	5
Fall22	40.3	60.4	67.2	68.7	77.9	93.9	1
Fall23	0.0	57.6	66.9	65.2	75.8	93.6	0

3 Milestone Grades Distribution

3.1 Milestone 1

3.1.1 First attempt

#filter for at1
m1f19_att1 = m1f19[which(m1f19$attempt == "1"),]
m1f21_att1 = m1f21[which(m1f21$attempt == "1"),]
m1f22_att1 = m1f22[which(m1f22$attempt == "1"),]
m1f23_att1 = m1f23[which(m1f23$attempt == "1"),]
library(kableExtra)
makeTable = function(m1f19_att1, m1f21_att1, m1f22_att1, m1f23_att1  ){
  n19 = length(m1f19_att1$score) 
  nA19 = length(m1f19_att1[which(m1f19_att1$score > 79.99),]$score)
  nC19 = length(m1f19_att1[which(m1f19_att1$score > 69.99 & m1f19_att1$score < 79.99),]$score)
  nF19 = length(m1f19_att1[which(m1f19_att1$score < 70.00),]$score)
   
  n21 = length(m1f21_att1$score) 
  nA21 = length(m1f21_att1[which(m1f21_att1$score > 79.99),]$score)
  nC21 = length(m1f21_att1[which(m1f21_att1$score > 69.99 & m1f21_att1$score < 79.99),]$score)
  nF21 = length(m1f21_att1[which(m1f21_att1$score < 70.00),]$score)
   
  n22 = length(m1f22_att1$score) 
  nA22 = length(m1f22_att1[which(m1f22_att1$score > 79.99),]$score)
  nC22 = length(m1f22_att1[which(m1f22_att1$score > 69.99 & m1f22_att1$score < 79.99),]$score)
  nF22 = length(m1f22_att1[which(m1f22_att1$score < 70.00),]$score)
   
  n23 = length(m1f23_att1$score) 
  nA23 = length(m1f23_att1[which(m1f23_att1$score > 79.99),]$score)
  nC23 = length(m1f23_att1[which(m1f23_att1$score > 69.99 & m1f23_att1$score < 79.99),]$score)
  nF23 = length(m1f23_att1[which(m1f23_att1$score < 70.00),]$score)
   
  combined_summary = bind_rows(describe(m1f19_att1$score),describe(m1f21_att1$score),describe(m1f22_att1$score),describe(m1f23_att1$score))
  combined_summary <- subset(combined_summary, select = c("n", "mean", "sd", "median", "min","max"))
  colnames(combined_summary)[colnames(combined_summary) == "n"] <- "students"
  rownames(combined_summary) = c("F19","F21","F22","F23")
  colnames(combined_summary)[0] <- "Semester"
  combined_summary$`# / % A scores` = c(
    paste0( nA19, " / ", round(nA19/n19*100, digits = 1),"%"),
    paste0( nA21, " / ", round(nA21/n21*100, digits = 1),"%"),
    paste0( nA22, " / ", round(nA22/n22*100, digits = 1),"%"),
    paste0( nA23, " / ", round(nA23/n23*100, digits = 1),"%")
  )
  combined_summary$`# / % C scores` = c(
    paste0( nC19, " / ", round(nC19/n19*100, digits = 1),"%"),
    paste0( nC21, " / ", round(nC21/n21*100, digits = 1),"%"),
    paste0( nC22, " / ", round(nC22/n22*100, digits = 1),"%"),
    paste0( nC23, " / ", round(nC23/n23*100, digits = 1),"%")
  )
  combined_summary$`# / % F scores` = c(
    paste0( nF19, " / ", round(nF19/n19*100, digits = 1),"%"),
    paste0( nF21, " / ", round(nF21/n21*100, digits = 1),"%"),
    paste0( nF22, " / ", round(nF22/n22*100, digits = 1),"%"),
    paste0( nF23, " / ", round(nF23/n23*100, digits = 1),"%")
  )
   
  table <- kable(combined_summary, format = "markdown")
  
  plot=plotBar(
    c(nA19,nC19,nF19),
    c(nA21,nC21,nF21),
    c(nA22,nC22,nF22),
    c(nA23,nC23,nF23)
  )
  chi=plotChi(
    c(nA19,nC19,nF19),
    c(nA21,nC21,nF21),
    c(nA22,nC22,nF22),
    c(nA23,nC23,nF23)
  )
   #mf19=c(nA19,nC19,nF19)
   #mf21=c(nA21,nC21,nF21)
   #mf22=c(nA22,nC22,nF22)
   #mf23=c(nA23,nC23,nF23)
  return(list(table = table, plot = plot, chi=chi))

}
plotBar= function(mf19,mf21,mf22,mf23){
  # Combine lists into a data frame
  data <- data.frame(
   Category = rep(c("A", "C", "F"), 4),
   Count = c(mf19, mf21, mf22, mf23),
   Group = rep(c("f19", "f21", "f22", "f23"), each = 3)
  )
  
  data <- data %>%
    group_by(Group) %>%
    mutate(Percentage = round(Count / sum(Count) * 100))
  
  myplot=ggplot(data, aes(x = factor(Group), y = Count, fill = Category)) +
    geom_bar(stat = "identity") +
    geom_text(aes(label = paste0(Percentage, "%")), position = position_stack(vjust = 0.5), size = 3) +
    labs(x = "Semester", y = "Students", title = "Milestone ACF grade across semesters") +
    theme_minimal()
  return(myplot)

}
library(corrplot)
plotChi= function(mf19,mf21,mf22,mf23){
  # Combine lists into a matrix
  data_matrix <- matrix(c(mf19, mf21, mf22, mf23), nrow = 4, byrow = TRUE)
  # Label the rows and columns
  rownames(data_matrix) <- c("f19", "f21", "f22", "f23")
  colnames(data_matrix) <- c("A", "C", "F")
  # Perform chi-square test
  chi_square_result <- chisq.test(data_matrix)
  # Extract residuals
  residuals <- chi_square_result$residuals
  # Normalize the residuals to make them suitable for plotting
  normalized_residuals <- residuals / sqrt(chi_square_result$expected)
  cat(paste("<p><b>The Chi-square analysis gives a p=",round(chi_square_result$p.value,5),"</b></p>")) 
  # Plot the residuals using corrplot
  corrplot(normalized_residuals, is.corr = FALSE, method = "circle",  title = "Chi-square Residuals", tl.col = "black", tl.srt = 45) 
  myplot = recordPlot()
  return(myplot)
}
# Apply some additional formatting if needed
#table <- table %>%
#  kable_styling(full_width = FALSE) %>%
#  add_header_above(c("Statistic" = 1, "Value" = 1)) %>%
#  row_spec(0, bold = TRUE)

result =  makeTable(m1f19_att1, m1f21_att1, m1f22_att1, m1f23_att1  )

## <p><b>The Chi-square analysis gives a p= 0.01967 </b></p>

library(lme4)
library(lmerTest)
runAnovaAmongMilestones = function(vec1,vec2,vec3,vec4,listLabels,myTitle){
  # Create a data frame with a grouping factor
  data <- data.frame(
    group = rep(listLabels, times = c(length(vec1), length(vec2), length(vec3), length(vec4))),
    value = c(vec1, vec2, vec3, vec4)
  )

  model <- lmer(value ~ group + (1 | group), data = data)
  anova_result <- anova(model)
  p_value <- anova_result$`Pr(>F)`[1]
   
  base_plot_with_means = ggplot(data, aes(x = group, y = value)) +
    stat_summary(fun = mean, geom = "point", size = 4) +
    stat_summary(fun.data = mean_cl_boot, geom = "errorbar", width = 0.2) +
    theme_minimal() +
    labs(title = myTitle, x = "Group", y = "Score")
  # Annotate the plot with the ANOVA p-value
  plot_with_anova_and_means <- base_plot_with_means +
   annotate("text", x = 1, y = max(data$value), label = paste("ANOVA p-value:", signif(p_value, digits = 3)), hjust = 0, vjust = 1)

  # Print the plot
  print(plot_with_anova_and_means)
}
runAnovaAmongMilestones(m1f19_att1$score, m1f21_att1$score, m1f22_att1$score, m1f23_att1$score,c("F19","F21","F22","F23"),"Milestone 1 - 1st attempt")

result$table

	students	mean	sd	median	min	max	# / % A scores	# / % C scores	# / % F scores
F19	191	86.59701	11.47493	89.28000	35.92683	100	150 / 78.5%	23 / 12%	18 / 9.4%
F21	211	84.70227	13.83789	87.80250	11.11667	100	152 / 72%	33 / 15.6%	26 / 12.3%
F22	220	82.94547	13.94072	85.58783	40.71200	100	144 / 65.5%	35 / 15.9%	41 / 18.6%
F23	163	82.12047	16.51057	87.83817	23.49350	100	107 / 65.6%	22 / 13.5%	34 / 20.9%

ACF scale

print(result$plot)

print(result$chi)

3.1.2 Second attempt

m1f19_att1 = m1f19[which(m1f19$attempt == "1" | m1f19$attempt == "2" ),]
m1f21_att1 = m1f21[which(m1f21$attempt == "1" | m1f21$attempt == "2" ),]
m1f22_att1 = m1f22[which(m1f22$attempt == "1" | m1f22$attempt == "2" ),]
m1f23_att1 = m1f23[which(m1f23$attempt == "1" | m1f23$attempt == "2" ),]

chooseHighest = function(df){
  df <- df %>%
  group_by(id) %>%
  filter(score == max(score))
  return(df)
}

m1f19_att1 = chooseHighest(m1f19_att1)
m1f21_att1 = chooseHighest(m1f21_att1)
m1f22_att1 = chooseHighest(m1f22_att1)
m1f23_att1 = chooseHighest(m1f23_att1)

result =  makeTable(m1f19_att1, m1f21_att1, m1f22_att1, m1f23_att1  )

## <p><b>The Chi-square analysis gives a p= 0.21362 </b></p>

result$table

	students	mean	sd	median	min	max	# / % A scores	# / % C scores	# / % F scores
F19	191	88.45648	8.922470	90.00200	58.64667	100	168 / 88%	15 / 7.9%	8 / 4.2%
F21	211	87.57826	11.124503	89.43867	27.53433	100	177 / 83.9%	23 / 10.9%	11 / 5.2%
F22	220	86.62394	9.930089	88.33600	54.61150	100	173 / 78.6%	33 / 15%	14 / 6.4%
F23	163	86.75313	10.975111	89.29000	23.49350	100	128 / 78.5%	24 / 14.7%	11 / 6.7%

ACF scale

print(result$plot)

print(result$chi)

3.1.3 Third Attempt

m1f19_att1 = m1f19[which(m1f19$attempt == "1" | m1f19$attempt == "2" | m1f19$attempt == "3" ),]
m1f21_att1 = m1f21[which(m1f21$attempt == "1" | m1f21$attempt == "2" | m1f21$attempt == "3" ),]
m1f22_att1 = m1f22[which(m1f22$attempt == "1" | m1f22$attempt == "2" | m1f22$attempt == "3" ),]
m1f23_att1 = m1f23[which(m1f23$attempt == "1" | m1f23$attempt == "2" | m1f23$attempt == "3" ),]

m1f19_att1 = chooseHighest(m1f19_att1)
m1f21_att1 = chooseHighest(m1f21_att1)
m1f22_att1 = chooseHighest(m1f22_att1)
m1f23_att1 = chooseHighest(m1f23_att1)

result =  makeTable(m1f19_att1, m1f21_att1, m1f22_att1, m1f23_att1  )

## <p><b>The Chi-square analysis gives a p= 0.25726 </b></p>

runAnovaAmongMilestones(m1f19_att1$score, m1f21_att1$score, m1f22_att1$score, m1f23_att1$score,c("F19","F21","F22","F23"),"Milestone 1 - all attempts")

result$table

	students	mean	sd	median	min	max	# / % A scores	# / % C scores	# / % F scores
F19	191	89.05406	8.190662	90.28167	58.64667	100	175 / 91.6%	11 / 5.8%	5 / 2.6%
F21	211	88.15861	10.280442	89.43867	27.53433	100	180 / 85.3%	26 / 12.3%	5 / 2.4%
F22	220	88.12103	8.473910	88.89125	55.81883	100	194 / 88.2%	19 / 8.6%	7 / 3.2%
F23	163	88.18607	10.009504	89.60533	23.49350	100	142 / 87.1%	13 / 8%	8 / 4.9%

ACF scale

print(result$plot)

print(result$chi)

3.2 Milestone 2

3.2.1 First attempt

m2f19_att1 = m2f19[which(m2f19$attempt == "1" ),]
m2f21_att1 = m2f21[which(m2f21$attempt == "1" ),]
m2f22_att1 = m2f22[which(m2f22$attempt == "1" ),]
m2f23_att1 = m2f23[which(m2f23$attempt == "1" ),]

m2f19_att1 = chooseHighest(m2f19_att1)
m2f21_att1 = chooseHighest(m2f21_att1)
m2f22_att1 = chooseHighest(m2f22_att1)
m2f23_att1 = chooseHighest(m2f23_att1)

result =  makeTable(m2f19_att1, m2f21_att1, m2f22_att1, m2f23_att1  )

## <p><b>The Chi-square analysis gives a p= 0 </b></p>

runAnovaAmongMilestones(m2f19_att1$score, m2f21_att1$score, m2f22_att1$score, m2f23_att1$score,c("F19","F21","F22","F23"),"Milestone 2 - 1st attempt")

result$table

	students	mean	sd	median	min	max	# / % A scores	# / % C scores	# / % F scores
F19	190	87.24709	10.81401	90.35901	39.07243	100	152 / 80%	22 / 11.6%	16 / 8.4%
F21	205	77.56799	14.75559	79.77267	37.10763	100	102 / 49.8%	51 / 24.9%	52 / 25.4%
F22	216	81.86074	12.90459	85.01722	38.42122	100	136 / 63%	40 / 18.5%	40 / 18.5%
F23	158	82.34789	14.76793	85.93283	29.16567	100	110 / 69.6%	17 / 10.8%	31 / 19.6%

ACF scale

print(result$plot)

print(result$chi)

3.2.2 Second attempt

m2f19_att1 = m2f19[which(m2f19$attempt == "1" | m2f19$attempt == "2" ),]
m2f21_att1 = m2f21[which(m2f21$attempt == "1" | m2f21$attempt == "2" ),]
m2f22_att1 = m2f22[which(m2f22$attempt == "1" | m2f22$attempt == "2" ),]
m2f23_att1 = m2f23[which(m2f23$attempt == "1" | m2f23$attempt == "2" ),]

m2f19_att1 = chooseHighest(m2f19_att1)
m2f21_att1 = chooseHighest(m2f21_att1)
m2f22_att1 = chooseHighest(m2f22_att1)
m2f23_att1 = chooseHighest(m2f23_att1)

result =  makeTable(m2f19_att1, m2f21_att1, m2f22_att1, m2f23_att1  )

## <p><b>The Chi-square analysis gives a p= 0 </b></p>

result$table

	students	mean	sd	median	min	max	# / % A scores	# / % C scores	# / % F scores
F19	190	88.71861	9.371188	90.43986	39.07243	100	171 / 90%	10 / 5.3%	9 / 4.7%
F21	205	81.02490	12.313678	83.10267	37.10763	100	128 / 62.4%	43 / 21%	34 / 16.6%
F22	216	85.05377	10.152792	86.92933	46.45111	100	165 / 76.4%	31 / 14.4%	20 / 9.3%
F23	158	85.05217	11.825039	87.05144	30.41511	100	121 / 76.6%	20 / 12.7%	17 / 10.8%

ACF scale

print(result$plot)

print(result$chi)

3.2.3 Third Attempt

m2f19_att1 = m2f19[which(m2f19$attempt == "1" | m2f19$attempt == "2" | m2f19$attempt == "3" ),]
m2f21_att1 = m2f21[which(m2f21$attempt == "1" | m2f21$attempt == "2" | m2f21$attempt == "3" ),]
m2f22_att1 = m2f22[which(m2f22$attempt == "1" | m2f22$attempt == "2" | m2f22$attempt == "3" ),]
m2f23_att1 = m2f23[which(m2f23$attempt == "1" | m2f23$attempt == "2" | m2f23$attempt == "3" ),]

m2f19_att1 = chooseHighest(m2f19_att1)
m2f21_att1 = chooseHighest(m2f21_att1)
m2f22_att1 = chooseHighest(m2f22_att1)
m2f23_att1 = chooseHighest(m2f23_att1)

result =  makeTable(m2f19_att1, m2f21_att1, m2f22_att1, m2f23_att1  )

## <p><b>The Chi-square analysis gives a p= 0 </b></p>

runAnovaAmongMilestones(m2f19_att1$score, m2f21_att1$score, m2f22_att1$score, m2f23_att1$score,c("F19","F21","F22","F23"),"Milestone 2 - all attempts")

result$table

	students	mean	sd	median	min	max	# / % A scores	# / % C scores	# / % F scores
F19	190	89.20468	8.464771	90.43986	39.07243	100	175 / 92.1%	10 / 5.3%	5 / 2.6%
F21	205	82.64865	10.762921	84.18644	37.10763	100	140 / 68.3%	41 / 20%	24 / 11.7%
F22	216	86.50969	8.566437	87.95989	53.28894	100	183 / 84.7%	21 / 9.7%	12 / 5.6%
F23	158	86.04928	10.614212	87.43781	30.41511	100	128 / 81%	19 / 12%	11 / 7%

ACF scale

print(result$plot)

print(result$chi)

3.3 Milestone 3

3.3.1 First attempt

m3f19_att1 = m3f19[which(m3f19$attempt == "1" ),]
m3f21_att1 = m3f21[which(m3f21$attempt == "1" ),]
m3f22_att1 = m3f22[which(m3f22$attempt == "1" ),]
m3f23_att1 = m3f23[which(m3f23$attempt == "1" ),]

m3f19_att1 = chooseHighest(m3f19_att1)
m3f21_att1 = chooseHighest(m3f21_att1)
m3f22_att1 = chooseHighest(m3f22_att1)
m3f23_att1 = chooseHighest(m3f23_att1)

result =  makeTable(m3f19_att1, m3f21_att1, m3f22_att1, m3f23_att1  )

## <p><b>The Chi-square analysis gives a p= 3e-05 </b></p>

runAnovaAmongMilestones(m3f19_att1$score, m3f21_att1$score, m3f22_att1$score, m3f23_att1$score,c("F19","F21","F22","F23"),"Milestone 3 - 1st attempts")

result$table

	students	mean	sd	median	min	max	# / % A scores	# / % C scores	# / % F scores
F19	183	87.08412	9.75140	88.51711	49.98278	100	151 / 82.5%	21 / 11.5%	11 / 6%
F21	195	81.63402	15.31543	85.99000	24.37333	100	122 / 62.6%	30 / 15.4%	43 / 22.1%
F22	210	79.51752	16.25780	84.11167	9.18000	100	128 / 61%	33 / 15.7%	49 / 23.3%
F23	151	82.07987	13.73418	86.29667	43.91667	100	104 / 68.9%	17 / 11.3%	30 / 19.9%

ACF scale

print(result$plot)

print(result$chi)

3.3.2 Second attempt

m3f19_att1 = m3f19[which(m3f19$attempt == "1" | m3f19$attempt == "2" ),]
m3f21_att1 = m3f21[which(m3f21$attempt == "1" | m3f21$attempt == "2" ),]
m3f22_att1 = m3f22[which(m3f22$attempt == "1" | m3f22$attempt == "2" ),]
m3f23_att1 = m3f23[which(m3f23$attempt == "1" | m3f23$attempt == "2" ),]

m3f19_att1 = chooseHighest(m3f19_att1)
m3f21_att1 = chooseHighest(m3f21_att1)
m3f22_att1 = chooseHighest(m3f22_att1)
m3f23_att1 = chooseHighest(m3f23_att1)

result =  makeTable(m3f19_att1, m3f21_att1, m3f22_att1, m3f23_att1  )

## <p><b>The Chi-square analysis gives a p= 1e-05 </b></p>

result$table

	students	mean	sd	median	min	max	# / % A scores	# / % C scores	# / % F scores
F19	183	88.55890	7.485728	88.65200	53.23144	100	166 / 90.7%	14 / 7.7%	3 / 1.6%
F21	195	84.64072	11.961618	87.65000	45.64333	100	141 / 72.3%	28 / 14.4%	26 / 13.3%
F22	210	83.22498	12.068709	85.37167	37.95333	100	147 / 70%	34 / 16.2%	29 / 13.8%
F23	151	85.22079	10.182821	86.61667	44.83333	100	122 / 80.8%	18 / 11.9%	11 / 7.3%

ACF scale

print(result$plot)

print(result$chi)

3.3.3 Third Attempt

m3f19_att1 = m3f19[which(m3f19$attempt == "1" | m3f19$attempt == "2" | m3f19$attempt == "3" ),]
m3f21_att1 = m3f21[which(m3f21$attempt == "1" | m3f21$attempt == "2" | m3f21$attempt == "3" ),]
m3f22_att1 = m3f22[which(m3f22$attempt == "1" | m3f22$attempt == "2" | m3f22$attempt == "3" ),]
m3f23_att1 = m3f23[which(m3f23$attempt == "1" | m3f23$attempt == "2" | m3f23$attempt == "3" ),]

m3f19_att1 = chooseHighest(m3f19_att1)
m3f21_att1 = chooseHighest(m3f21_att1)
m3f22_att1 = chooseHighest(m3f22_att1)
m3f23_att1 = chooseHighest(m3f23_att1)

result =  makeTable(m3f19_att1, m3f21_att1, m3f22_att1, m3f23_att1  )

## <p><b>The Chi-square analysis gives a p= 0.0018 </b></p>

runAnovaAmongMilestones(m3f19_att1$score, m3f21_att1$score, m3f22_att1$score, m3f23_att1$score,c("F19","F21","F22","F23"),"Milestone 3 - all attempts")

result$table

	students	mean	sd	median	min	max	# / % A scores	# / % C scores	# / % F scores
F19	183	88.84171	7.287616	88.88267	53.23144	100	171 / 93.4%	9 / 4.9%	3 / 1.6%
F21	195	86.26802	10.446492	87.77333	45.64333	100	156 / 80%	27 / 13.8%	12 / 6.2%
F22	210	84.88897	10.332357	86.15333	37.95333	100	162 / 77.1%	33 / 15.7%	15 / 7.1%
F23	151	85.81885	9.479026	87.04190	44.83333	100	127 / 84.1%	16 / 10.6%	8 / 5.3%

ACF scale

print(result$plot)

print(result$chi)

3.4 Milestone 4

3.4.1 First attempt

m4f19_att1 = m4f19[which(m4f19$attempt == "1" ),]
m4f21_att1 = m4f21[which(m4f21$attempt == "1" ),]
m4f22_att1 = m4f22[which(m4f22$attempt == "1" ),]
m4f23_att1 = m4f23[which(m4f23$attempt == "1" ),]

m4f19_att1 = chooseHighest(m4f19_att1)
m4f21_att1 = chooseHighest(m4f21_att1)
m4f22_att1 = chooseHighest(m4f22_att1)
m4f23_att1 = chooseHighest(m4f23_att1)

result =  makeTable(m4f19_att1, 
                   m4f21_att1, 
                   m4f22_att1, 
                   m4f23_att1  )

## <p><b>The Chi-square analysis gives a p= 0.46484 </b></p>

runAnovaAmongMilestones(m4f19_att1$score, m4f21_att1$score, m4f22_att1$score, m4f23_att1$score,c("F19","F21","F22","F23"),"Milestone 4 - 1st attempts")

result$table

	students	mean	sd	median	min	max	# / % A scores	# / % C scores	# / % F scores
F19	177	79.27583	14.73094	81.64083	38.43000	100	96 / 54.2%	37 / 20.9%	44 / 24.9%
F21	187	82.10934	13.27249	83.88033	47.22000	100	116 / 62%	39 / 20.9%	33 / 17.6%
F22	206	79.04682	14.39811	81.80700	26.94200	100	114 / 55.3%	41 / 19.9%	53 / 25.7%
F23	148	80.92414	14.35564	84.57617	44.43833	100	91 / 61.5%	28 / 18.9%	29 / 19.6%

print(result$plot)

print(result$chi)

3.4.2 Second attempt

m4f19_att1 = m4f19[which(m4f19$attempt == "1" | m4f19$attempt == "2" ),]
m4f21_att1 = m4f21[which(m4f21$attempt == "1" | m4f21$attempt == "2" ),]
m4f22_att1 = m4f22[which(m4f22$attempt == "1" | m4f22$attempt == "2" ),]
m4f23_att1 = m4f23[which(m4f23$attempt == "1" | m4f23$attempt == "2" ),]

m4f19_att1 = chooseHighest(m4f19_att1)
m4f21_att1 = chooseHighest(m4f21_att1)
m4f22_att1 = chooseHighest(m4f22_att1)
m4f23_att1 = chooseHighest(m4f23_att1)

result =  makeTable(m4f19_att1, 
                   m4f21_att1, 
                   m4f22_att1, 
                   m4f23_att1  )

## <p><b>The Chi-square analysis gives a p= 0.63746 </b></p>

result$table

	students	mean	sd	median	min	max	# / % A scores	# / % C scores	# / % F scores
F19	177	83.85381	11.883539	86.58267	45.94233	100	125 / 70.6%	30 / 16.9%	22 / 12.4%
F21	187	84.93125	11.022564	86.11333	51.10067	100	139 / 74.3%	29 / 15.5%	19 / 10.2%
F22	206	83.85520	10.368770	84.99433	46.66800	100	146 / 70.9%	43 / 20.9%	18 / 8.7%
F23	148	85.15438	9.762059	86.24550	46.65800	100	112 / 75.7%	24 / 16.2%	12 / 8.1%

ACF scale

print(result$plot)

print(result$chi)

3.4.3 Third Attempt

m4f19_att1 = m4f19[which(m4f19$attempt == "1" | m4f19$attempt == "2" | m4f19$attempt == "3" ),]
m4f21_att1 = m4f21[which(m4f21$attempt == "1" | m4f21$attempt == "2" | m4f21$attempt == "3" ),]
m4f22_att1 = m4f22[which(m4f22$attempt == "1" | m4f22$attempt == "2" | m4f22$attempt == "3" ),]
m4f23_att1 = m4f23[which(m4f23$attempt == "1" | m4f23$attempt == "2" | m4f23$attempt == "3" ),]

m4f19_att1 = chooseHighest(m4f19_att1)
m4f21_att1 = chooseHighest(m4f21_att1)
m4f22_att1 = chooseHighest(m4f22_att1)
m4f23_att1 = chooseHighest(m4f23_att1)

result =  makeTable(m4f19_att1, 
                   m4f21_att1, 
                   m4f22_att1, 
                   m4f23_att1  )

## <p><b>The Chi-square analysis gives a p= 0.60029 </b></p>

runAnovaAmongMilestones(m4f19_att1$score, m4f21_att1$score, m4f22_att1$score, m4f23_att1$score,c("F19","F21","F22","F23"),"Milestone 4 - all attempts")

result$table

	students	mean	sd	median	min	max	# / % A scores	# / % C scores	# / % F scores
F19	177	85.56451	10.407627	87.21733	45.94233	100	141 / 79.7%	22 / 12.4%	14 / 7.9%
F21	187	86.17210	9.920564	86.94967	51.10067	100	149 / 79.7%	26 / 13.9%	12 / 6.4%
F22	206	84.98724	9.406832	86.10917	46.66800	100	159 / 77.2%	37 / 18%	11 / 5.3%
F23	148	85.99251	8.627701	86.24800	46.65800	100	118 / 79.7%	24 / 16.2%	6 / 4.1%

ACF scale

print(result$plot)

print(result$chi)

3.4.4 Conclusions - Take home

grid.arrange(h18,h19,h21,h22,h23, ncol=2)

The first thing is that it seems that the chi square highlights better the differences between the groups. An anova between the scores is not as useful because the p will never be significant.
M1-att1: looking at chi square on ACF, F19 did much better than F23 with a p=0.01
This signficance is not present when doing ANOVA on absolute scores
This trend persists when looking at highest (or third) attempt, but then you can start seeing how F21 stopped trying
M2-att1: The f21 debacle (overconfidence perhaps?) emerges. F19 is still doing very well.
Even after all attempts we can see how f21 does not catch up, so it is in milestone 2 where this year really suffers. You see the highest numbers in f21 for students who stopped trying which justifies the highest drop numbers.
M3-att1: F19 really excelled at this one. This is a clear case of pre and post-covid
F22 seemed to have deflated a little bit
M4: after several people dropped f21 is doing better than the rest. Even better than f19.
F19 has deflated, perhaps due to people not dropping. The molarity questions may have something to do with it.
Being F23 when we simplified the isomerism question, there doesnt seem to be an effect and f22 and f23 performed similarly.

It’s as though there too many variables playing out to really see a trend on the cohort, on the milestone or on the attempt.

3.5 Comparing milestones

Each milestone has its majority of questions on a specific topic of the course, being Milestone 1 mostly about atomic and electronic structure and theory of light, Milestone 2 about Chemical bond and molecular structure (including NMR), Milestone 3 functional groups, IR and intermolecular forces, and Milestone 4 about thermochemistry of phase change, solutions, and molarity problems.

Results merging all semesters

library(dplyr)
library(tidyr)
#merge all
mergeAllMilestones = function(df1,df2,df3,df4){
  all = df1[,c("name","attempt","score")]
  all$semester = "f19"
  tempo = df2[,c("name","attempt","score")]
  tempo$semester = "f21"
  all=rbind(all,tempo)
  tempo = df3[,c("name","attempt","score")]
  tempo$semester = "f22"
  all=rbind(all,tempo)
  tempo = df4[,c("name","attempt","score")]
  tempo$semester = "f23"
  all=rbind(all,tempo)
  #all$letter = ifelse(all$score > 79.99,"A",ifelse(all$score > 69.99,"C","F"))
  all = all %>%
    pivot_wider(names_from = attempt, values_from = score, names_prefix = "attempt") %>%
    mutate(highest = pmax(attempt1, attempt2, attempt3, na.rm = TRUE))

  all$attempt4 = NULL
  all$letter1 = ifelse(all$attempt1 > 79.99,"A",ifelse(all$attempt1 > 69.99,"C","F"))
  all$letter_highest = ifelse(all$highest > 79.99,"A",ifelse(all$highest > 69.99,"C","F"))
  return(all)
}
m1_all = mergeAllMilestones(m1f19,m1f21,m1f22,m1f23)
m2_all = mergeAllMilestones(m2f19,m2f21,m2f22,m2f23)
m3_all = mergeAllMilestones(m3f19,m3f21,m3f22,m3f23)
m4_all = mergeAllMilestones(m4f19,m4f21,m4f22,m4f23)

prepare_alluvial_data <- function(df) {
  df_summary <- df %>%
    group_by(letter1, letter_highest) %>%
    summarise(count = n()) %>%
    ungroup()

  df_summary <- df_summary %>%
    group_by(letter1) %>%
    mutate(percentage1 = count / sum(count) * 100) %>%
    ungroup() %>%
    group_by(letter_highest) %>%
    mutate(percentage2 = count / sum(count) * 100) %>%
    ungroup()

  return(df_summary)
}
m1_summary <- prepare_alluvial_data(m1_all)
m2_summary <- prepare_alluvial_data(m2_all)
m3_summary <- prepare_alluvial_data(m3_all)
m4_summary <- prepare_alluvial_data(m4_all)

library(ggalluvial)
plot_alluvial <- function(df_summary, title) {
  ggplot(data = df_summary,
         aes(axis1 = letter1, axis2 = letter_highest, y = count)) +
    geom_alluvium(aes(fill = letter1), width = 0.2) +
    geom_stratum(aes(fill = letter1), width = 0.2) +
    geom_stratum(aes(fill = letter_highest), width = 0.2) +
    #geom_text(stat = "stratum", aes(label = paste(after_stat(stratum), round(after_stat(prop) * 100, 1), "%")), size = 3) +
    geom_text(stat = "stratum", aes(label = paste(round(after_stat(prop) * 100, 0), "%")), size = 3) +
    scale_x_discrete(limits = c("letter1", "letter_highest"),
                     labels = c("1st attempt", "Highest attempt"),
                     expand = c(0.15, 0.05)) +
    scale_fill_manual(values = c("A" = "green", "C" = "blue", "F" = "red")) +
    labs(title = title, y = "Students", x = "") +
    theme_minimal() +
    #theme(legend.position = "right")
    theme(
      legend.position = "none",
      panel.grid.major = element_blank(),
      panel.grid.minor = element_blank()
      )
}

# Plotting the alluvial diagrams
plot1 <- plot_alluvial(m1_summary, "Milestone 1: Atom struct.")
plot2 <- plot_alluvial(m2_summary, "Milestone 2: Molecular struct.")
plot3 <- plot_alluvial(m3_summary, "Milestone 3: Functional Groups")
plot4 <- plot_alluvial(m4_summary, "Milestone 4: Phases & solutions")

# Display the plots
library(gridExtra)
p=grid.arrange(plot1, plot2, plot3, plot4, ncol = 2)

## TableGrob (2 x 2) "arrange": 4 grobs
##   z     cells    name           grob
## 1 1 (1-1,1-1) arrange gtable[layout]
## 2 2 (1-1,2-2) arrange gtable[layout]
## 3 3 (2-2,1-1) arrange gtable[layout]
## 4 4 (2-2,2-2) arrange gtable[layout]

#ggsave("milestones_by_topics.png", plot = p, bg = "transparent")

3.6 Students who stopped trying

#count
countGiveUp = function(df){
  # Create a table of name counts
  name_counts <- table(df$id)
  # Create a logical vector indicating names that appear three times or more
  to_remove <- df$id %in% names(name_counts[name_counts >= 3])
  # Create a logical vector indicating names with at least one score > 80
  to_remove_scores <- df$id %in% unique(df$id[df$score > 80.00])
  # Combine the two logical vectors using OR to get the final rows to remove
  final_to_remove <- to_remove | to_remove_scores
  # Filter the dataframe to keep only the valid rows
  df <- df[!final_to_remove, ]
  df = subset(df, select = c("name", "id", "attempt", "score"))
  return(df)
}
giveUp19 = countGiveUp(m1f19)
giveUp21 = countGiveUp(m1f21)
giveUp22 = countGiveUp(m1f22)
giveUp23 = countGiveUp(m1f23)

name_counts <- table(giveUp19$id)
twice19 <- length(names(name_counts[name_counts == 2]))
once19 <- length(names(name_counts[name_counts == 1]))

name_counts <- table(giveUp21$id)
twice21 <- length(names(name_counts[name_counts == 2]))
once21 <- length(names(name_counts[name_counts == 1]))

name_counts <- table(giveUp22$id)
twice22 <- length(names(name_counts[name_counts == 2]))
once22 <- length(names(name_counts[name_counts == 1]))

name_counts <- table(giveUp23$id)
twice23 <- length(names(name_counts[name_counts == 2]))
once23 <- length(names(name_counts[name_counts == 1]))
writeAttemptTable <- function(numbers, titles, table_title) {
  if(length(numbers) != length(titles)) {
    stop("Length of numbers and titles must be the same.")
  }
  
  df <- data.frame(matrix(nrow = 1, ncol = length(numbers)))
  colnames(df) <- titles
  df[1,] <- numbers
  #attr(df, "title") <- table_title
  return(df)
}
#M1
numbers <- c(
  length(unique(giveUp19$id)),
  length(unique(giveUp21$id)),
  length(unique(giveUp22$id)),
  length(unique(giveUp23$id))
)
titles <- c("F19", "F21", "F22", "F23")
table <- writeAttemptTable(numbers, titles,"Students who settled for a lower score without trying 3 attempts. Milestone 1")

#M2
giveUp19 = countGiveUp(m2f19)
giveUp21 = countGiveUp(m2f21)
giveUp22 = countGiveUp(m2f22)
giveUp23 = countGiveUp(m2f23)
numbers <- c(
  length(unique(giveUp19$id)),
  length(unique(giveUp21$id)),
  length(unique(giveUp22$id)),
  length(unique(giveUp23$id))
)
titles <- c("F19", "F21", "F22", "F23")
table2 <- writeAttemptTable(numbers, titles,"Students who settled for a lower score without trying 3 attempts. Milestone 2")
table = rbind(table, table2)

#M3
giveUp19 = countGiveUp(m3f19)
giveUp21 = countGiveUp(m3f21)
giveUp22 = countGiveUp(m3f22)
giveUp23 = countGiveUp(m3f23)
numbers <- c(
  length(unique(giveUp19$id)),
  length(unique(giveUp21$id)),
  length(unique(giveUp22$id)),
  length(unique(giveUp23$id))
)
titles <- c("F19", "F21", "F22", "F23")
table2 <- writeAttemptTable(numbers, titles,"Students who settled for a lower score without trying 3 attempts. Milestone 3")
table = rbind(table, table2)

#M4
giveUp19 = countGiveUp(m4f19)
giveUp21 = countGiveUp(m4f21)
giveUp22 = countGiveUp(m4f22)
giveUp23 = countGiveUp(m4f23)
numbers <- c(
  length(unique(giveUp19$id)),
  length(unique(giveUp21$id)),
  length(unique(giveUp22$id)),
  length(unique(giveUp23$id))
)
titles <- c("F19", "F21", "F22", "F23")
table2 <- writeAttemptTable(numbers, titles,"Students who settled for a lower score without trying 3 attempts. Milestone 4")
table = rbind(table, table2)
rownames(table) = c(
  "Milestone 1",
  "Milestone 2",
  "Milestone 3",
  "Milestone 4"
)
knitr::kable(table, caption="Students who settled for a lower score without trying 3 attempts.")

Students who settled for a lower score without trying 3 attempts.
	F19	F21	F22	F23
Milestone 1	14	23	13	12
Milestone 2	9	32	16	14
Milestone 3	7	23	19	14
Milestone 4	14	25	31	18

#now percentage
tablePercent=table
len1=nrow(m1f19_att1)
len2=nrow(m1f21_att1)
len3=nrow(m1f22_att1)
len4=nrow(m1f23_att1)

tablePercent$F19 = paste0( round( (tablePercent$F19/len1)*100,1),"%")
tablePercent$F21 = paste0( round( (tablePercent$F21/len2)*100,1),"%")
tablePercent$F22 = paste0( round( (tablePercent$F22/len3)*100,1),"%")
tablePercent$F23 = paste0( round( (tablePercent$F23/len4)*100,1),"%")
avg_row <- data.frame(
  A = paste0(round(mean(as.numeric(sub("%", "", tablePercent[,1]))), 1), "%"),
  B = paste0(round(mean(as.numeric(sub("%", "", tablePercent[,2]))), 1), "%"),
  C = paste0(round(mean(as.numeric(sub("%", "", tablePercent[,3]))), 1), "%"),
  D = paste0(round(mean(as.numeric(sub("%", "", tablePercent[,4]))), 1), "%")
)
colnames(avg_row) <- colnames(tablePercent)
tablePercent <- rbind(tablePercent, avg_row)
colnames(tablePercent) <- c(paste0("F19 (n=", len1, ")"), paste0("F21 (n=", len2, ")"), paste0("F22 (n=", len3, ")"), paste0("F23 (n=", len4, ")"))
rownames(tablePercent)[nrow(tablePercent)] <- "Average"


knitr::kable(tablePercent, caption="Students who settled for a lower score without trying 3 attempts.")

Students who settled for a lower score without trying 3 attempts.
	F19 (n=191)	F21 (n=211)	F22 (n=220)	F23 (n=163)
Milestone 1	7.3%	10.9%	5.9%	7.4%
Milestone 2	4.7%	15.2%	7.3%	8.6%
Milestone 3	3.7%	10.9%	8.6%	8.6%
Milestone 4	7.3%	11.8%	14.1%	11%
Average	5.8%	12.2%	9%	8.9%

Milestone 1

In 2019, 14 students settled for a lower score, without trying the 3 attempts.
In 2021, 25 students settled for a lower score, without trying the 3 attempts.
In 2022, 31 students settled for a lower score, without trying the 3 attempts.
In 2023, 18 students settled for a lower score, without trying the 3 attempts.

3.7 Statistics of raw scores

f18m1 = suppressWarnings(as.numeric( gradesf18mood$Quiz.Milestone.1..Do.not.take.it.outside.of.class..Requires.Respondus.LockDown.Browser..Real.))
f18m2 = suppressWarnings(as.numeric( gradesf18mood$Quiz.Milestone.2...Do.not.take.it.outside.of.class...Requires.Respondus.LockDown.Browser..Real.))
f18m3 = suppressWarnings(as.numeric( gradesf18mood$Quiz.Milestone.3...Do.not.take.it.outside.of.class..Requires.Respondus.LockDown.Browser..Real.))
f18m4 = suppressWarnings(as.numeric( gradesf18mood$Quiz.Milestone.4...Do.not.take.it.outside.of.class..Requires.Respondus.LockDown.Browser..Real.))

allStats = as.data.frame.matrix( rbind(
  summary(f18m1),
  summary(gradesf19$Milestone.1.Quiz..Requires.Respondus.LockDown.Browser..798227.),
  summary(gradesf21$Milestone.1..Requires.Respondus.LockDown.Browser..1911986.),
  summary(gradesf22$Milestone.1..Requires.Respondus.LockDown.Browser..2592695.),
  summary(gradesf23$Milestone.1..Requires.Respondus.LockDown.Browser..3262166.)
))
allStats$`NA's` = NULL
row.names(allStats) = c("Fall18","Fall19","Fall21","Fall22","Fall23")
knitr::kable(allStats,caption = "Milestone 1 raw score",digits = 1)

Milestone 1 raw score
	Min.	1st Qu.	Median	Mean	3rd Qu.	Max.
Fall18	51.2	88.2	94.0	92.3	98.4	100
Fall19	71.9	84.5	90.5	89.9	95.9	100
Fall21	51.2	84.4	89.8	89.4	96.0	100
Fall22	61.2	84.0	89.2	89.1	94.5	100
Fall23	23.5	85.1	90.0	89.2	95.6	100

allStats = as.data.frame.matrix( rbind(
  summary(f18m2),
  summary(gradesf19$Milestone.2.Quiz..Requires.Respondus.LockDown.Browser..823364.),
  summary(gradesf21$Milestone.2..Requires.Respondus.LockDown.Browser..1912009.),
  summary(gradesf22$Milestone.2..Requires.Respondus.LockDown.Browser..2592715.),
  summary(gradesf23$Milestone.2..Requires.Respondus.LockDown.Browser..3262143.)
))
row.names(allStats) = c("Fall18","Fall19","Fall21","Fall22","Fall23")
allStats$`NA's` = NULL
knitr::kable(allStats,caption = "Milestone 2 raw score",digits = 1)

Milestone 2 raw score
	Min.	1st Qu.	Median	Mean	3rd Qu.	Max.
Fall18	55.6	84.4	91.6	90.0	96.0	100
Fall19	67.0	85.6	90.6	90.1	95.7	100
Fall21	37.1	79.3	84.9	84.1	90.1	100
Fall22	53.3	82.8	88.0	87.2	92.4	100
Fall23	30.4	81.7	88.2	87.2	94.2	100

allStats = as.data.frame.matrix( rbind(
  summary(f18m3),
  summary(gradesf19$Milestone.3.Quiz..Requires.Respondus.LockDown.Browser..841979.),
  summary(gradesf21$Milestone.3..Requires.Respondus.LockDown.Browser..2192128.),
  summary(gradesf22$Milestone.3..Requires.Respondus.LockDown.Browser..2592728.),
  summary(gradesf23$Milestone.3..Requires.Respondus.LockDown.Browser..3262146.)
))
row.names(allStats) = c("Fall18","Fall19","Fall21","Fall22","Fall23")
knitr::kable(allStats,caption = "Milestone 3 raw score",digits = 1)

Milestone 3 raw score
	Min.	1st Qu.	Median	Mean	3rd Qu.	Max.	NA’s
Fall18	35.4	86.8	92.4	90.5	96.7	100	3
Fall19	64.4	84.2	89.5	89.2	94.4	100	2
Fall21	48.4	82.3	88.0	87.4	94.1	100	4
Fall22	38.2	81.4	86.3	85.8	91.8	100	1
Fall23	44.8	82.2	87.2	86.4	92.1	100	3

allStats = as.data.frame.matrix( rbind(
  summary(f18m4),
  summary(gradesf19$Milestone.4.Quiz..Requires.Respondus.LockDown.Browser..860755.),
  summary(gradesf21$Milestone.4..Requires.Respondus.LockDown.Browser..2216543.),
  summary(gradesf22$Milestone.4..Requires.Respondus.LockDown.Browser..2592732.),
  summary(gradesf23$Milestone.4..Requires.Respondus.LockDown.Browser..3262167.)
))
row.names(allStats) = c("Fall18","Fall19","Fall21","Fall22","Fall23")
knitr::kable(allStats,caption = "Milestone 4 raw score",digits = 1)

Milestone 4 raw score
	Min.	1st Qu.	Median	Mean	3rd Qu.	Max.	NA’s
Fall18	44.0	82.6	88.9	87.2	94.6	100	5
Fall19	46.4	83.4	87.5	86.4	92.2	100	5
Fall21	51.1	81.7	87.0	86.4	94.4	100	8
Fall22	46.7	80.6	86.4	85.3	91.4	100	3
Fall23	46.7	81.6	86.2	86.0	91.7	100	3

Passing grades as they attempt milestones

library(ggplot2)
buildHeaders <- function(numberOfQuestions){
  colHeaders = c()
  #build headers
  for (n in seq(numberOfQuestions)){
    colHeaders = c(colHeaders,paste("q",as.character(n),sep = ""))
  }
  return(colHeaders)
}
buildDF_fromMilestone <- function(m1,numberOfQuestions){
  #Build df with just answers 
  totcol = ncol(m1)
  m1df = data.frame( matrix(ncol=numberOfQuestions+3,nrow=0) )
  
  
  colHeaders = c(buildHeaders(numberOfQuestions),"total")
   
  studNames = c()
  attNumb = c()
  #loop over students or row
  for (st in seq(1,nrow(m1)) ){
    #build attemptNumb and studNames. theyll be added as columns later
    attNumb = c(attNumb,m1[st,8])
    studNames = c(studNames,m1[st,1])
    
    #empty the score array and build it up as it finds each question
    thisSt = rep(NA,numberOfQuestions)
    #loop over columns to find nonempty scores
    for (q in seq(10,totcol-2,2)){
      score = m1[st,q]
      if ( !is.na( score ) ){
       questionNumber = colnames(m1)[q-1]
       questionNumber = unlist(strsplit(questionNumber,"_"))[1]
       questionNumber = as.numeric( gsub("^q","",questionNumber))
       thisSt[questionNumber] = score
      }
    }
    #studNames = c(studNames,m1[st,1])
    thisSt = c(thisSt, m1[st,totcol])
    m1df = rbind(m1df,thisSt)
  }
  colnames(m1df) = colHeaders
  m1df = rbind(m1df,colMeans(m1df))
  m1df$studName = c(studNames,NA)
  m1df$attNumb = c(attNumb,NA)
  return(m1df)
}
roundThisScore <- function(score){
  if (score <70){ r = 0 
  } else if (score <80){ r = 80 
  } else if (score >80){ r = 100  }
  return(r)
}
flowAndSettling <- function(m1df){
  #check students who did not attempt a 3rd and got lower than 80
  studs = unique(m1df$studName)
  studs = studs[!is.na(studs)]
  settle = data.frame( matrix(ncol=3,nrow=0) )
  flow = data.frame(matrix(ncol=3,nrow=0))
  for (stud in studs){
    thisStDF = m1df[which(m1df$studName == stud),]
    #if att number lower than 3 and score lower than 80
    maxNum = max(thisStDF$attNumb)
    maxScore = max(thisStDF$total)
    if ( maxNum < 3 & maxScore < 80 ){
      settle = rbind(settle,c(stud,maxNum,maxScore))
    }
    sc1 = roundThisScore( max(m1df[which(m1df$studName == stud & m1df$attNumb < 2),]$total) )
    sc2 = roundThisScore( max(m1df[which(m1df$studName == stud & m1df$attNumb < 3),]$total) )
    sc3 = roundThisScore( max(m1df[which(m1df$studName == stud & m1df$attNumb < 4),]$total) )
    flow = rbind(flow,c(sc1,sc2,sc3))
    
  }
  colnames(flow) = c("one","two","three")
  flow2 = data.frame(matrix(ncol=3,nrow=3))
  flow2[,1]=table(flow$one)
  flow2[,2]=table(flow$two)
  flow2[,3]=table(flow$three)
  colnames(flow2) = c("After 1st","After 2nd","After 3rd")
  
  colnames(settle) = c("name","MaxAttempt","MaxScore")
  all = list("settle" = settle,"flow" = flow2)
  return(all)
}

compileStackAndGrouped = function(m1f21,numb_m1f21,m1f19,numb_m1f19){
  m1df = buildDF_fromMilestone(m1f21,numb_m1f21)
  results = flowAndSettling(m1df)
  flow = results$flow
  
  pct1st = flow[,1]/sum(flow[,1])*100
  pct2nd = flow[,2]/sum(flow[,2])*100
  pct3rd = flow[,3]/sum(flow[,3])*100
  stackAndGrouped = data.frame(matrix(ncol = 4,nrow=0))
  stackAndGrouped = rbind(stackAndGrouped,c("F",pct1st[1],"F21","First"))
  stackAndGrouped = rbind(stackAndGrouped,c("C",pct1st[2],"F21","First"))
  stackAndGrouped = rbind(stackAndGrouped,c("A",pct1st[3],"F21","First"))
  stackAndGrouped = rbind(stackAndGrouped,c("F",pct2nd[1],"F21","Second"))
  stackAndGrouped = rbind(stackAndGrouped,c("C",pct2nd[2],"F21","Second"))
  stackAndGrouped = rbind(stackAndGrouped,c("A",pct2nd[3],"F21","Second"))
  stackAndGrouped = rbind(stackAndGrouped,c("F",pct3rd[1],"F21","Third"))
  stackAndGrouped = rbind(stackAndGrouped,c("C",pct3rd[2],"F21","Third"))
  stackAndGrouped = rbind(stackAndGrouped,c("A",pct3rd[3],"F21","Third"))
   
  m1df19 = buildDF_fromMilestone(m1f19,numb_m1f19)
  results19 = flowAndSettling(m1df19)
  flow19 = results19$flow
  pct1st = flow19[,1]/sum(flow19[,1])*100
  pct2nd = flow19[,2]/sum(flow19[,2])*100
  pct3rd = flow19[,3]/sum(flow19[,3])*100
   
  stackAndGrouped = rbind(stackAndGrouped,c("F",pct1st[1],"F19","First"))
  stackAndGrouped = rbind(stackAndGrouped,c("C",pct1st[2],"F19","First"))
  stackAndGrouped = rbind(stackAndGrouped,c("A",pct1st[3],"F19","First"))
  stackAndGrouped = rbind(stackAndGrouped,c("F",pct2nd[1],"F19","Second"))
  stackAndGrouped = rbind(stackAndGrouped,c("C",pct2nd[2],"F19","Second"))
  stackAndGrouped = rbind(stackAndGrouped,c("A",pct2nd[3],"F19","Second"))
  stackAndGrouped = rbind(stackAndGrouped,c("F",pct3rd[1],"F19","Third"))
  stackAndGrouped = rbind(stackAndGrouped,c("C",pct3rd[2],"F19","Third"))
  stackAndGrouped = rbind(stackAndGrouped,c("A",pct3rd[3],"F19","Third"))
  colnames(stackAndGrouped) = c("Grade","Pct","Year","Attempt")
  stackAndGrouped$Pct = as.numeric(stackAndGrouped$Pct)
  
  return(stackAndGrouped)
  
}

stackAndGrouped = compileStackAndGrouped(m1f21,12,m1f19,12)
ggplot() + geom_bar(data=stackAndGrouped, aes(y = Pct, x = Year, fill = Grade), stat="identity",
           position='stack') +
  theme_bw() + 
  ggtitle("Milestone 1: F19 vs F21")+
  scale_fill_manual(values = c("green","yellow","red" )) +
  facet_grid( ~ Attempt)

stackAndGrouped = compileStackAndGrouped(m2f21,11,m2f19,12)
ggplot() + geom_bar(data=stackAndGrouped, aes(y = Pct, x = Year, fill = Grade), stat="identity",
           position='stack') +
  theme_bw() + 
  ggtitle("Milestone 2: F19 vs F21")+
  scale_fill_manual(values = c("green","yellow","red" )) +
  facet_grid( ~ Attempt)

stackAndGrouped = compileStackAndGrouped(m3f21,13,m3f19,12)
ggplot() + geom_bar(data=stackAndGrouped, aes(y = Pct, x = Year, fill = Grade), stat="identity",
           position='stack') +
  theme_bw() + 
  ggtitle("Milestone 3: F19 vs F21")+
  scale_fill_manual(values = c("green","yellow","red" )) +
  facet_grid( ~ Attempt)

stackAndGrouped = compileStackAndGrouped(m4f21,12,m4f19,12)
ggplot() + geom_bar(data=stackAndGrouped, aes(y = Pct, x = Year, fill = Grade), stat="identity",
           position='stack') +
  theme_bw() + 
  ggtitle("Milestone 4: F19 vs F21")+
  scale_fill_manual(values = c("green","yellow","red" )) +
  facet_grid( ~ Attempt)

4 Preclass

Students not turning in preclass through the semester

f18preclass = gradesf18[,grepl("class",names(gradesf18)) & grepl("X",names(gradesf18)) & !grepl("Score",names(gradesf18))]
f19preclass = gradesf19[,grepl("class",names(gradesf19)) & grepl("X",names(gradesf19)) & !grepl("Score",names(gradesf19))]
f21preclass = gradesf21[,grepl("class",names(gradesf21)) & grepl("X",names(gradesf21)) & !grepl("Score",names(gradesf21))]
gradesf18$missPreclass = rowSums( f18preclass == 0, na.rm = TRUE)
gradesf19$missPreclass = rowSums( f19preclass == 0, na.rm = TRUE)
gradesf21$missPreclass = rowSums( f21preclass == 0, na.rm = TRUE)
l = length(colnames(f21preclass))
#f21preclass[,l] = NULL
#f21preclass[,l-1] = NULL
plot(0+colSums(f18preclass == 0, na.rm = TRUE),type="l",col="black",ylim=c(0,40),xlab = "class day",ylab = "students missing preclass")
lines(0+colSums(f19preclass == 0,na.rm = TRUE),type="l",col="red")
lines(0+colSums(f21preclass < 1,na.rm = TRUE),type="l",col="green")
legend(1, 40, legend=c( paste("F2018 n=",nf18), 
                        paste("F2019 n=",nf19), 
                        paste("F2021 n=",nf21)), col=c("black", "red","green"),lty=1:1, cex=0.8)
title("Number of students missing each Preclass")

Repeating offenders

par(mfrow=c(1,3),
    oma = c(5,4,0,0) + 0.1,
    mar = c(0,0,1,1) + 0.1)
hist(rowSums( f18preclass == 0,na.rm = TRUE),breaks=10,xlim = c(0,15),ylim=c(0,140),main="Fall18",xlab="Number of preclass missed",ylab="Number of students")

hist(rowSums( f19preclass == 0,na.rm = TRUE),breaks=10,xlim = c(0,15),ylim=c(0,140),main="Fall19",xlab="Number of preclass missed",ylab="Number of students")
hist(rowSums( f21preclass == 0,na.rm = TRUE),breaks=10,xlim = c(0,15),ylim=c(0,140),main="Fall21",xlab="Number of preclass missed",ylab="Number of students")

5 Homework

Students not completing the homework is a sign of students giving up or completely disengaged. Notice that because of the “drop the lowest” policy some students just don’t attempt the last one because they’re happy with their grade. I’m wondering if the fact that the grade is out of 12 and not out of 100 loses sense to them. For example, the 70% of 12 is 8.4 which is not as dramatic.

f18hw = gradesf18[,grepl("Thu",names(gradesf18))]
f19hw = gradesf19[,grepl("Homework",names(gradesf19)) & !grepl("Score",names(gradesf19)) & !grepl("Homework.13th.week",names(gradesf19))]
for (i in 1:ncol(f19hw)){ f19hw[,i] = as.numeric(f19hw[,i])/12*100 }
f21hw = gradesf21[,grepl("Thu",names(gradesf21))]
for (i in 1:ncol(f21hw)){ f21hw[,i] = as.numeric(f21hw[,i])/12*100 }

hwfail = data.frame(matrix(ncol = 0,nrow=3))
for (i in 1:ncol(f21hw)){
  hwfail[ paste("hw",i)]=c( 
    sum( f18hw[,i] < 70,na.rm = TRUE),
    sum( f19hw[,i] < 70,na.rm = TRUE),
    sum( f21hw[,i] < 70,na.rm = TRUE)
  )
}
row.names(hwfail) = c("Fall18","Fall19","Fall21")
knitr::kable(hwfail,caption = "Number of students with homework scores below 70",digits = 1)

Number of students with homework scores below 70
	hw 1	hw 2	hw 3	hw 4	hw 5	hw 6	hw 7	hw 8	hw 9
Fall18	2	5	1	9	9	3	2	3	11
Fall19	1	4	4	15	44	4	6	3	21
Fall21	3	18	11	29	26	15	15	29	30

hwfail = data.frame(matrix(ncol = 0,nrow=3))
for (i in 1:ncol(f21hw)){
  hwfail[ paste("hw",i)]=c( 
    sum( f18hw[,i] < 50,na.rm = TRUE),
    sum( f19hw[,i] < 50,na.rm = TRUE),
    sum( f21hw[,i] < 50,na.rm = TRUE)
  )
}
row.names(hwfail) = c("Fall18","Fall19","Fall21")
knitr::kable(hwfail,caption = "Number of students with homework scores below 50",digits = 1)

Number of students with homework scores below 50
	hw 1	hw 2	hw 3	hw 4	hw 5	hw 6	hw 7	hw 8	hw 9
Fall18	0	4	0	4	3	2	0	1	5
Fall19	0	4	2	4	11	2	3	1	8
Fall21	1	8	2	18	10	7	8	17	18

6 Lab reports

Except for one, lab reports are out of ten. The table shows how many students got scores below 5

f18report = gradesf18[, grepl("Reporting",names(gradesf18)) & 
                        !grepl("Score",names(gradesf18))
                      ]
f19report = gradesf19[, grepl("Reporting",names(gradesf19)) & 
                        !grepl("Score",names(gradesf19))
                      ]
f21report = gradesf21[,grepl("Reporting",names(gradesf21))]


for (i in 1:ncol(f18report)){ 
  f18report[,i] = as.numeric(f18report[,i]) 
  thismax = max(f18report[,i],na.rm = TRUE)
  f18report[,i] = f18report[,i]/thismax*100
}
for (i in 1:ncol(f19report)){ 
  f19report[,i] = as.numeric(f19report[,i]) 
  thismax = max(f19report[,i],na.rm = TRUE)
  f19report[,i] = f19report[,i]/thismax*100
}
for (i in 1:ncol(f21report)){ 
  f21report[,i] = as.numeric(f21report[,i]) 
  thismax = max(f21report[,i],na.rm = TRUE)
  f21report[,i] = f21report[,i]/thismax*100
}
f21report = f21report[,order(names(f21report))]
f21report = f21report[,c(1,4,5,6,7,8,9,10,11,2,3)]

reportfail = data.frame(matrix(ncol = 0,nrow=3))
for (i in 1:ncol(f21report)){
  reportfail[ paste("Report",i)]=c( 
    sum( f18report[,i] < 50,na.rm = TRUE),
    sum( f19report[,i] < 50,na.rm = TRUE),
    sum( f21report[,i] < 50,na.rm = TRUE)
  )
}
row.names(reportfail) = c("Fall18","Fall19","Fall21")
knitr::kable(reportfail,caption = "Number of students with report scores below 50",digits = 1)

Number of students with report scores below 50
	Report 1	Report 2	Report 3	Report 4	Report 5	Report 6	Report 7	Report 8	Report 9	Report 10	Report 11
Fall18	2	1	5	2	2	4	3	3	3	4	6
Fall19	2	11	26	16	10	10	17	10	18	13	14
Fall21	7	51	30	48	17	24	36	30	48	43	27

7 Prelab

Missing a prelab is big deal because you are not allowed to turn in your report and the moment you miss more than two labs you automatically fail the course.

f18prelab = gradesf18[,grepl("Prelab",names(gradesf18)) & !grepl("Score",names(gradesf18))]
f19prelab = gradesf19[,grepl("Prelab",names(gradesf19)) & !grepl("Score",names(gradesf19))]
f21prelab = gradesf21[,grepl("Prelab",names(gradesf21)) & !grepl("Score",names(gradesf21))]
gradesf18$missPrelab = rowSums( f18prelab == 0, na.rm = TRUE)
gradesf19$missPrelab = rowSums( f19prelab == 0, na.rm = TRUE)
gradesf21$missPrelab = rowSums( f21prelab == 0, na.rm = TRUE)


plot(0+colSums(f18prelab == 0, na.rm = TRUE),type="l",col="black",ylim=c(0,8),xlab = "Lab day",ylab = "students missing prelab")
lines(0+colSums(f19prelab == 0,na.rm = TRUE),type="l",col="red")
lines(0+colSums(f21prelab < 1,na.rm = TRUE),type="l",col="green")
legend("topright", legend=c( paste("F2018 n=",nf18), 
                        paste("F2019 n=",nf19), 
                        paste("F2021 n=",nf21)), col=c("black", "red","green"),lty=1:1, cex=0.8)
title("Number of students missing each Prelab")

8 Video Watching

What days students are not watching the videos. The data only shows up to Module 3 (the first two months of the course). I need to compile all the video watching data from F19 and F18, but Kaltura changed the format so it’s harder to get the data for the date ranges that I need.

allVideo = data.frame(matrix(ncol = 0,nrow=3))
j18 = nrow(videof18)
j19 = nrow(videof19)
j21 = nrow(videof21)
#f19 has the least number of columns, I need more data. The last two columns are averages
for (i in 1:(ncol(videof19)-2)){
  allVideo[paste("video",i)] = c(
    videof18[j18,i+1],
    videof19[j19,i+1],
    videof21[j21,i+6]
  )
}
row.names(allVideo) = c("Fall18","Fall19","Fall21")
tAllVideo = t(allVideo)
matplot(tAllVideo,type="l",xlab="Video instance",ylab="Number students not watching it",main="Students who do not watch that video")
legend("topright",legend = colnames(tAllVideo),col=1:3,lty=1:3)

Distibution of repeating offenders: Are there a lot of students who consistently do not watch the videos. Fall21 stands out again.

par(mfrow=c(1,3),
    oma = c(5,4,0,0) + 0.1,
    mar = c(0,0,1,1) + 0.1)

hist(videof18$TotalMissed,xlim = c(0,50),ylim=c(0,140),main="Fall18",xlab="Number of videos missed",ylab="Number of students")
hist(videof19$TotalMissed,xlim = c(0,50),ylim=c(0,140),main="Fall19",xlab="Number of videos missed",ylab="Number of students")
hist(videof21$VideosMissed,xlim = c(0,50),ylim=c(0,140),main="Fall21",xlab="Number of videos missed",ylab="Number of students")

#this wont be used until further down
#match with email
missedVideosF21 = videof21_all[c("name","VideosMissed")]
gradesf21 = merge(gradesf21,missedVideosF21,by.x = "SIS.Login.ID" ,by.y = "name", all.x = TRUE)

9 What Correlates With Course Grades

9.1 Semester Exams

plot(  gradesf18$Exam.1..335345., gradesf18$Unposted.Final.Score ,type='p',xlab='Exam1/100',ylab='Final Grade/100',main='Final grade vs Exam1')
abline(lm(gradesf18$Unposted.Final.Score ~ gradesf18$Exam.1..335345.))
points( gradesf19$Open.Ended.Written.Exam.1..816325., gradesf19$Unposted.Final.Score,col="red")
abline(lm(gradesf19$Unposted.Final.Score ~ gradesf19$Open.Ended.Written.Exam.1..816325.),col="red")
points( gradesf21$Exam..1..2161998., gradesf21$Unposted.Final.Score,col="green")
#abline(lm(
#  gradesf21$Unposted.Final.Score[!is.na(gradesf21$Exam..1..2161998.)] ~ 
#    gradesf21$Exam..1..2161998.[!is.na(gradesf21$Exam..1..2161998.)]/25*100),col="green")

legend("bottomright",legend = c("F18","F19","F21"),col=c("black","red","green"),lty=1:1, cex=0.8)

plot( gradesf18$Exam.2..358394., gradesf18$Unposted.Final.Score,type='p',xlab='Exam2/100',ylab='Final Grade/100',main='Final grade vs Exam2')
abline(lm( gradesf18$Unposted.Final.Score ~ gradesf18$Exam.2..358394.))
points( gradesf19$Open.Ended.Exam.2..836125., gradesf19$Unposted.Final.Score, col="red")
abline(lm( gradesf19$Unposted.Final.Score ~ gradesf19$Open.Ended.Exam.2..836125. ),col="red")
points( gradesf21$Exam..2..2171776.,gradesf21$Unposted.Final.Score, col="green")
abline(lm( gradesf21$Unposted.Final.Score ~ gradesf21$Exam..2..2171776.), col="green")
legend("bottomright",legend = c("F18","F19","F21"),col=c("black","red","green"),lty=1:1, cex=0.8)

plot( gradesf18$Exam.3..395005., gradesf18$Unposted.Final.Score, type='p',xlab='Exam3/100',ylab='Final Grade/100',main='Final grade vs Exam3')
abline(lm( gradesf18$Unposted.Final.Score ~ gradesf18$Exam.3..395005.))
points( gradesf19$Open.Ended.Exam.3..875116., gradesf19$Unposted.Final.Score, col="red")
abline( lm( gradesf19$Unposted.Final.Score ~ gradesf19$Open.Ended.Exam.3..875116.),col="red")
points( gradesf21$Exam..3..2184781., gradesf21$Unposted.Final.Score, col="green")
abline( lm( gradesf21$Unposted.Final.Score ~ gradesf21$Exam..3..2184781.),col="green")
legend("bottomright",legend = c("F18","F19","F21"),col=c("black","red","green"),lty=1:1, cex=0.8)

plot( gradesf18$Final.exam..408510., gradesf18$Unposted.Final.Score, type='p',xlab='Final Exam/100',ylab='Final Grade/100',main='Final grade vs Final exam',ylim=c(50,100))
abline( lm( gradesf18$Unposted.Final.Score ~ gradesf18$Final.exam..408510. ))
points( gradesf19$Final.exam.Unposted.Current.Score, gradesf19$Unposted.Final.Score, col="red")
abline( lm( gradesf19$Unposted.Final.Score ~ gradesf19$Final.exam.Unposted.Current.Score), col="red")
points( gradesf21$Final.Written.Exam.Current.Score, gradesf21$Unposted.Current.Score, col="green")
abline(lm( gradesf21$Unposted.Final.Score ~ gradesf21$Final.Written.Exam.Current.Score),col="green")
legend("bottomright",legend = c("F18","F19","F21"),col=c("black","red","green"),lty=1:1, cex=0.8)

9.2 Milestone Exams and Course Grade

A graph of milestone exams is not useful because most students get 100/80/0 so it makes no sense to plot it in an xy coordinate. The table below showing what “A”,“B”,“C”.. students get as average is more informative.

9.3 Homework

f18hw$ave = rowMeans(f18hw)
f19hw$ave = rowMeans(f19hw)
f21hw$ave = rowMeans(f21hw)
plot(f18hw$ave, gradesf18$Unposted.Final.Score,type='p', xlab='Ave HW/100',ylab='Final Grade/100',main='Final Grade vs Average Homework',ylim=c(0,100))
abline( lm(gradesf18$Unposted.Final.Score ~ f18hw$ave ))
points(f19hw$ave, gradesf19$Unposted.Current.Score,col="red")
abline( lm(gradesf19$Unposted.Final.Score ~ f19hw$ave ),col="red")
points(f21hw$ave, gradesf21$Unposted.Current.Score,col="green")
abline( lm(gradesf21$Unposted.Final.Score ~ f21hw$ave ),col="green")
legend("bottomright",legend = c("F18","F19","F21"),col=c("black","red","green"),lty=1:1, cex=0.8)

9.4 Lab reports

f18report$ave = rowMeans(f18report)
f19report$ave = rowMeans(f19report)
f21report$ave = rowMeans(f21report)
plot(f18report$ave, gradesf18$Unposted.Final.Score,type='p', xlab='Ave Lab Report/100',ylab='Final Grade/100',main='Final Grade vs Average Lab Report',ylim=c(0,100))
abline( lm(gradesf18$Unposted.Final.Score ~ f18report$ave ))
points(f19report$ave, gradesf19$Unposted.Current.Score,col="red")
abline( lm(gradesf19$Unposted.Final.Score ~ f19report$ave ),col="red")
points(f21report$ave, gradesf21$Unposted.Current.Score,col="green")
abline( lm(gradesf21$Unposted.Final.Score ~ f21report$ave ),col="green")
legend("bottomright",legend = c("F18","F19","F21"),col=c("black","red","green"),lty=1:1, cex=0.8)

9.5 Prelab

It looks like in 2018 we didn’t require the 90% prelab score to to be in the lab. This shows as Fall2018 has the prelab averages all over the place. Fall2019 and Fall2021 however shows pretty much everyone with an average above 90%.

plot( gradesf18$Lab..Prelab.Quiz.Unposted.Final.Score, gradesf18$Unposted.Final.Score,type='p', xlab='Ave Prelab /100',ylab='Final Grade/100',main='Final Grade vs Average Prelab',ylim=c(0,100) )
abline(lm( gradesf18$Unposted.Final.Score ~ gradesf18$Lab..Prelab.Quiz.Unposted.Final.Score))
points(gradesf19$Lab..Prelab.Quiz.Unposted.Final.Score, gradesf19$Unposted.Current.Score,col="red")
abline( lm(gradesf19$Unposted.Final.Score ~  gradesf19$Lab..Prelab.Quiz.Unposted.Final.Score ),col="red")
points(gradesf21$Pre.Lab.Assignments.Unposted.Final.Score, gradesf21$Unposted.Current.Score,col="green")
abline( lm(gradesf21$Unposted.Final.Score ~ gradesf21$Pre.Lab.Assignments.Unposted.Final.Score ),col="green")
legend("bottomright",legend = c("F18","F19","F21"),col=c("black","red","green"),lty=1:1, cex=0.8)

9.6 Tables of Behavioral charactersitics of students based on grade

Fall 2018

getMeanAndSD = function(df,score){
  results = data.frame(matrix(ncol=1,nrow=5))
  i = 1
  for (let in c("A","B","C","D","F")){
    thisMean = sprintf("%.2f", mean( df[which(grepl(let,df$Unposted.Final.Grade)),][[score]], na.rm = TRUE))
    thisSD = sprintf("%.1f", sd( df[which(grepl(let,df$Unposted.Final.Grade)),][[score]], na.rm = TRUE))
    results[i,1] = paste(thisMean,'+/-',thisSD)
    i=i+1
  }
  return(results)
}

f18ave = data.frame(matrix(ncol=0,nrow = 5))
f18ave["Written Exams "] = getMeanAndSD(gradesf18,"Science.Practice.Exercises.Unposted.Final.Score") 
f18ave["Final Exam"] = getMeanAndSD(gradesf18,"Final.exam.Unposted.Current.Score")
f18ave["Milestone "] = getMeanAndSD(gradesf18,"Milestones.Unposted.Current.Score") 
f18ave["HW "] = getMeanAndSD(gradesf18,"Homework.Current.Score") 
f18ave["Lab reports"] = getMeanAndSD(gradesf18,"Lab..Reporting.Unposted.Current.Score")
f18ave["Miss Preclass"] = getMeanAndSD(gradesf18,"missPreclass")
f18ave["Miss Prelab"] = getMeanAndSD(gradesf18,"missPrelab")
row.names(f18ave)=c("A students","B students","C students","D students","F students")
knitr::kable(f18ave,caption="Fall 2018: Average +/- Standard Deviation Based on Grade Performance")

Fall 2018: Average +/- Standard Deviation Based on Grade Performance
	Written Exams	Final Exam	Milestone	HW	Lab reports	Miss Preclass	Miss Prelab
A students	81.51 +/- 6.6	87.55 +/- 6.0	100.00 +/- 0.0	97.69 +/- 2.5	92.12 +/- 3.8	1.58 +/- 1.9	0.04 +/- 0.3
B students	62.77 +/- 9.8	74.05 +/- 7.3	98.82 +/- 3.0	94.33 +/- 5.1	88.05 +/- 5.5	2.99 +/- 2.7	0.12 +/- 0.4
C students	48.98 +/- 7.2	61.31 +/- 7.0	85.42 +/- 10.3	87.27 +/- 7.3	84.94 +/- 7.5	3.17 +/- 2.5	0.08 +/- 0.3
D students	43.92 +/- 11.3	59.91 +/- 6.3	63.75 +/- 13.1	79.92 +/- 14.8	71.19 +/- 10.8	5.25 +/- 3.9	1.00 +/- 1.2
F students	35.88 +/- 10.7	NaN +/- NA	46.05 +/- 25.3	62.19 +/- 17.2	59.30 +/- 14.9	14.40 +/- 5.2	2.40 +/- 1.5

f19ave = data.frame(matrix(ncol=0,nrow = 5))
f19ave["Written Exams "] = getMeanAndSD(gradesf19,"Open.Ended.Written.Exams.Unposted.Final.Score") 
f19ave["Final Exam"] = getMeanAndSD(gradesf19,"Final.exam.Unposted.Final.Score")
f19ave["Milestone "] = getMeanAndSD(gradesf19,"Milestones.Unposted.Current.Score") 
f19ave["HW "] = getMeanAndSD(gradesf19,"Homework.Current.Score") 
f19ave["Lab reports"] = getMeanAndSD(gradesf19,"Lab..Reporting.Unposted.Current.Score")
f19ave["Miss Preclass"] = getMeanAndSD(gradesf19,"missPreclass")
f19ave["Miss Prelab"] = getMeanAndSD(gradesf19,"missPrelab")
row.names(f19ave)=c("A students","B students","C students","D students","F students")
knitr::kable(f19ave,caption="Fall 2019: Average +/- Standard Deviation Based on Grade Performance")

Fall 2019: Average +/- Standard Deviation Based on Grade Performance
	Written Exams	Final Exam	Milestone	HW	Lab reports	Miss Preclass	Miss Prelab
A students	84.14 +/- 6.0	82.22 +/- 8.0	100.00 +/- 0.0	98.25 +/- 1.6	88.44 +/- 6.6	1.77 +/- 1.9	0.07 +/- 0.3
B students	64.24 +/- 9.7	64.08 +/- 9.4	99.74 +/- 1.1	93.92 +/- 4.8	81.71 +/- 9.5	2.33 +/- 2.0	0.08 +/- 0.3
C students	47.63 +/- 10.8	46.57 +/- 13.8	92.34 +/- 8.6	91.34 +/- 6.4	73.07 +/- 11.8	3.00 +/- 2.7	0.09 +/- 0.4
D students	37.11 +/- 12.5	45.38 +/- 7.8	70.83 +/- 10.2	77.24 +/- 14.3	57.39 +/- 23.2	4.00 +/- 4.0	0.33 +/- 0.8
F students	33.85 +/- 12.9	9.96 +/- 19.9	68.75 +/- 33.8	64.80 +/- 16.5	41.30 +/- 16.4	12.25 +/- 5.9	1.75 +/- 3.5

f21ave = data.frame(matrix(ncol=0,nrow = 5))
f21ave["Written Exams "] = getMeanAndSD(gradesf21,"Open.Ended.Semester.Exams.Unposted.Final.Score") 
f21ave["Final Exam"] = getMeanAndSD(gradesf21,"Final.Written.Exam.Unposted.Final.Score")
f21ave["Milestone "] = getMeanAndSD(gradesf21,"Milestones.Unposted.Current.Score") 
f21ave["HW "] = getMeanAndSD(gradesf21,"Homework.Current.Score") 
f21ave["Lab reports"] = getMeanAndSD(gradesf21,"Lab.Reports.Unposted.Current.Score")
f21ave["Miss Preclass"] = getMeanAndSD(gradesf21,"missPreclass")
f21ave["Miss Prelab"] = getMeanAndSD(gradesf21,"missPrelab")
f21ave["Miss Videos"] = getMeanAndSD(gradesf21,"VideosMissed")
row.names(f21ave)=c("A students","B students","C students","D students","F students")
knitr::kable(f21ave,caption="Fall 2021: Average +/- Standard Deviation Based on Grade Performance")

Fall 2021: Average +/- Standard Deviation Based on Grade Performance
	Written Exams	Final Exam	Milestone	HW	Lab reports	Miss Preclass	Miss Prelab	Miss Videos
A students	86.60 +/- 6.8	78.75 +/- 11.3	99.66 +/- 1.3	96.71 +/- 3.5	91.30 +/- 4.7	0.08 +/- 0.3	0.00 +/- 0.0	8.41 +/- 11.5
B students	70.36 +/- 8.1	58.90 +/- 11.5	98.57 +/- 3.2	91.26 +/- 5.1	78.68 +/- 10.2	0.26 +/- 0.5	0.00 +/- 0.0	19.09 +/- 15.9
C students	54.11 +/- 13.3	46.36 +/- 13.8	85.61 +/- 9.4	82.54 +/- 9.6	65.32 +/- 13.6	0.21 +/- 0.4	0.09 +/- 0.3	25.64 +/- 15.9
D students	45.06 +/- 7.7	44.44 +/- 7.8	66.00 +/- 17.0	73.20 +/- 10.3	52.70 +/- 18.1	0.40 +/- 0.5	0.10 +/- 0.3	35.50 +/- 18.7
F students	34.22 +/- 20.1	17.25 +/- 27.5	30.00 +/- 19.6	52.11 +/- 26.7	40.13 +/- 21.2	0.62 +/- 0.7	0.12 +/- 0.4	44.38 +/- 21.3

Missing four videos is approximately equivalent to missing one day of class

10 Statistical significance

10.1 Written Exams

#install.packages("ggpubr")
library(ggpubr)

plotGGbox = function(df,myx,myy,mytitle,myylab){
  maxy = max(df[[myy]])
  ggboxplot(df, x = myx, y = myy,  
            title = mytitle,
            color = myx, add = "jitter", legend="none",ylab = myylab) + rotate_x_text(angle = 45) +  
    geom_hline( yintercept = mean(df[[myy]]), linetype = 2) + 
    stat_compare_means(method = "anova", label.y = maxy*1.10) +
    stat_compare_means(label = "p.format", size=2.5, method = "t.test", ref.group = ".all.",label.y = maxy*1.05)
}

gradesf18 = gradesf18[order(gradesf18$simpleLetter),]
gradesf19 = gradesf19[order(gradesf19$simpleLetter),]
gradesf21 = gradesf21[order(gradesf21$simpleLetter),]


#Written exams
print( plotGGbox(gradesf18,"simpleLetter","Science.Practice.Exercises.Unposted.Final.Score","Fall18: Written Semester Exams:Wrapped Letters","Average Semester Exams"))

print( plotGGbox(gradesf18,"Unposted.Final.Grade","Science.Practice.Exercises.Unposted.Final.Score","Fall18: Written Semester Exams:All Letters","Average Semester Exams"))

print( plotGGbox(gradesf19,"simpleLetter","Open.Ended.Written.Exams.Unposted.Final.Score","Fall19: Written Semester Exams:Wrapped Letters","Average Semester Exams"))

print(plotGGbox(gradesf19,"Unposted.Final.Grade","Open.Ended.Written.Exams.Unposted.Final.Score","Fall19: Written Semester Exams:All Letters","Average Semester Exams"))

print( plotGGbox(gradesf21,"simpleLetter","Open.Ended.Semester.Exams.Unposted.Current.Score","Fall21: Written Semester Exams:Wrapped Letters","Average Semester Exams"))

print(plotGGbox(gradesf21,"Unposted.Final.Grade","Open.Ended.Semester.Exams.Unposted.Current.Score","Fall21: Written Semester Exams:All Letters","Average Semester Exams"))

10.2 Milestone exams

print( plotGGbox(gradesf18,"simpleLetter","Milestones.Unposted.Current.Score","Fall18: Milestone Avg","Avg Milestone grade") )

print( plotGGbox(gradesf19,"simpleLetter","Milestones.Unposted.Current.Score","Fall19: Milestone Avg","Avg Milestone grade") )

print( plotGGbox(gradesf21,"simpleLetter","Milestones.Unposted.Current.Score","Fall21: Milestone Avg","Avg Milestone grade") )

10.3 HW

print( plotGGbox(gradesf18,"simpleLetter","Homework.Current.Score","Fall18: Homework Avg","Avg Homework grade") )

print( plotGGbox(gradesf19,"simpleLetter","Homework.Current.Score","Fall19: Homework Avg","Avg Homework grade") )

print( plotGGbox(gradesf21,"simpleLetter","Homework.Current.Score","Fall21: Homework Avg","Avg Homework grade") )

10.4 Lab reports

print( plotGGbox(gradesf18,"simpleLetter","Lab..Reporting.Unposted.Current.Score","Fall18: Lab reports Avg","Avg Lab reports grade") )

print( plotGGbox(gradesf19,"simpleLetter","Lab..Reporting.Unposted.Current.Score","Fall19: Lab reports Avg","Avg Lab reports grade") )

print( plotGGbox(gradesf21,"simpleLetter","Lab.Reports.Unposted.Current.Score","Fall21: Lab reports Avg","Avg Lab reports grade") )

10.5 Missed preclass

print( plotGGbox(gradesf18,"simpleLetter","missPreclass","Fall18: Missed preclass instances","Missed preclass") )

print( plotGGbox(gradesf19,"simpleLetter","missPreclass","Fall19: Missed preclass instances","Missed preclass") )

print( plotGGbox(gradesf21,"simpleLetter","missPreclass","Fall21: Missed preclass instances","Missed preclass") )

10.6 Missed prelab

print( plotGGbox(gradesf18,"simpleLetter","missPrelab","Fall18: Missed prelab instances","Missed preclass") )

print( plotGGbox(gradesf19,"simpleLetter","missPrelab","Fall19: Missed prelab instances","Missed preclass") )

print( plotGGbox(gradesf21,"simpleLetter","missPrelab","Fall21: Missed prelab instances","Missed preclass") )

10.7 Missed videos

#theres one student who never opened a single video, so it shows as NA and blows the statistics

print( plotGGbox(gradesf21[!is.na(gradesf21$VideosMissed),],"simpleLetter","VideosMissed","Fall21: Videos not watched","Missed videos #") )

11 Good milestone practices

12 Bundling students: Clustering based on effort and performance

Too often, educators conjugate students as “they” as if they were a homogeneous group. When trying to use a finer granularity we may label “good students” and “bad students” based on performance. However, even among these two groups the charactersitics are not similar. In fact, as instructors we want to identify the group of students that we can help the most. I propose that we cluster students along two axis, performance and effort, yielding at least four different groups:

WE-LP (wrong/low effor and low performance)
HE-LP (high effort and low performance)
LE-AP (low/lazy effort - acceptable performance)
HE-AP (high effort - acceptable performance)

12.1 Measuring effort

We have several indicators that when combined can be used to measure effort. We will start simple and look at what Canvas reports as Canvas participation

part_f21 = read.csv("~/Teaching/Grades_and_SRT/Fall2021/participation_f21.csv",header = TRUE )
plot(part_f21$Overall.course.grade,part_f21$Participations)

plot(part_f21$Overall.course.grade,part_f21$Page.Views)

#part_f21 = 
#align
#gradesf21 = merge(gradesf21,part_f21,by.x = "SIS.User.ID" ,by.y = "SIS.Id", all.x = TRUE)

CHEM1331 Performance Analysis

Xavier Prat-Resina

2022-08-08