if (Sys.info()["sysname"] == "Windows"){
}else{
m1f23 = read.csv("~/Teaching/Grades_and_SRT/Fall2023/m1.csv",header = TRUE)
m2f23 = read.csv("~/Teaching/Grades_and_SRT/Fall2023/m2.csv",header = TRUE)
m3f23 = read.csv("~/Teaching/Grades_and_SRT/Fall2023/m3.csv",header = TRUE)
m4f23 = read.csv("~/Teaching/Grades_and_SRT/Fall2023/m4.csv",header = TRUE)
p1f23 = read.csv("~/Teaching/Grades_and_SRT/Fall2023/m1_practice.csv",header = TRUE)
p2f23 = read.csv("~/Teaching/Grades_and_SRT/Fall2023/m2_practice.csv",header = TRUE)
p3f23 = read.csv("~/Teaching/Grades_and_SRT/Fall2023/m3_practice.csv",header = TRUE)
p4f23 = read.csv("~/Teaching/Grades_and_SRT/Fall2023/m4_practice.csv",header = TRUE)
justAsk = read.csv("~/Teaching/Grades_and_SRT/Fall2023/justAsk_f23.csv",header = TRUE)
justAsk$name <- paste(justAsk$First, justAsk$Last, sep = " ")
justAsk = justAsk[,c(6,4,5)]
justAsk$justAsk1 = ifelse(
grepl("2|3",justAsk$X1st.half) ,"Often",
ifelse( grepl("1",justAsk$X1st.half ), "Seldom", "Never")
)
justAsk$justAsk2 = ifelse(
grepl("2|3",justAsk$X2nd.half) ,"Often",
ifelse( grepl("1",justAsk$X2nd.half ), "Seldom", "Never")
)
}
library(lubridate)
filterDate = function(df,date_column,thisDate){
# Assuming df is your DataFrame with a column named date_column checking all rows completed BEFORE thisDate
# Convert the date_column to a POSIXct object
df <- df %>%
mutate(date_column = as.POSIXct(date_column, format = "%Y-%m-%d %H:%M:%S", tz = "UTC"))
# Format the date_column to "YYYY-MM-DD" format
df <- df %>%
mutate(date_column = format(date_column, format = "%Y-%m-%d"))
df <- df %>%
filter(submitted <= as.POSIXct(thisDate, tz = "UTC"))
return(df)
}
library(dplyr)
classifyStudents = function(df_practice,df_score,threshold){
#name is the name of students
grouped_df <- df_practice %>% group_by(name)
summary_df <- grouped_df %>%
summarize(
practice_attempt = max(attempt),
practice_score = max(score)
) %>%
ungroup() # Remove the grouping
final_summary_df <- left_join(df_score, summary_df, by = "name") %>%
mutate_at(vars(practice_attempt, practice_score), ~ifelse(is.na(.), 0, .))
final = merge(df_score,final_summary_df, by="name", all.x = TRUE)
final <- final %>%
mutate(by_attempt = case_when(
#not practicing
practice_attempt == 0 & score.x > threshold ~ "LEAP",
practice_attempt == 0 & score.x <= threshold ~ "WELP",
#practicing
practice_attempt > 0 & score.x >= threshold ~ "HEAP",
practice_attempt > 0 & score.x < threshold ~ "HELP",
TRUE ~ "Unknown" # Handle any other cases (optional)
))
final <- final %>%
mutate(by_practice80 = case_when(
#not practicing
practice_score < threshold & score.x >= threshold ~ "LEAP",
practice_score < threshold & score.x < threshold ~ "WELP",
#practicing
practice_score >= threshold & score.x >= threshold ~ "HEAP",
practice_score >= threshold & score.x < threshold ~ "HELP",
TRUE ~ "Unknown" # Handle any other cases (optional)
))
return(final)
}
calculate_transition <- function(df, col1_name, col2_name) {
# Count of transitions
#help_to_welp <- sum(df[[col1_name]] == 'HELP' & df[[col2_name]] == 'WELP')
welp_to_help <- sum(df[[col1_name]] == 'WELP' & df[[col2_name]] == 'HELP')
welp_to_heap <- sum(df[[col1_name]] == 'WELP' & df[[col2_name]] == 'HEAP')
help_to_heap <- sum(df[[col1_name]] == 'HELP' & df[[col2_name]] == 'HEAP')
# Total count of rows
total_rows <- nrow(df)
# Calculating percentages
#help_to_welp_percent <- (help_to_welp / total_rows) * 100
welp_to_help_percent <- (welp_to_help / total_rows) * 100
welp_to_heap_percent <- (welp_to_heap / total_rows) * 100
help_to_heap_percent <- (help_to_heap / total_rows) * 100
# Create DataFrame of results
result_df <- data.frame(
Transition = c("WELP to HELP", "WELP to HEAP", "HELP to HEAP"),
Count = c(welp_to_help, welp_to_heap, help_to_heap),
Percentage = c(welp_to_help_percent, welp_to_heap_percent, help_to_heap_percent)
)
return(result_df)
}
calculate_transition2 <- function(df, col1_name, col2_name, categories) {
# Initialize transition counts
transition_counts <- matrix(0, nrow = length(categories), ncol = length(categories))
colnames(transition_counts) <- categories
rownames(transition_counts) <- categories
# Calculate transition counts
for (i in 1:length(categories)) {
for (j in 1:length(categories)) {
transition_counts[i, j] <- sum(df[[col1_name]] == categories[i] & df[[col2_name]] == categories[j])
}
}
# Total count of rows
total_rows <- nrow(df)
# Calculate transition percentages
transition_percentages <- (transition_counts / total_rows) * 100
# Create DataFrame of results
result_df <- as.data.frame(transition_counts)
result_df$Transition <- rownames(transition_counts)
rownames(result_df) <- NULL
result_df <- result_df[, c(ncol(result_df), 1:(ncol(result_df)-1))]
colnames(result_df)[1] <- "Transition"
#result_df$Percentage <- transition_percentages[lower.tri(transition_percentages, diag = TRUE)]
#result_df$Percentage <- round(transition_percentages,digits=2)
return(result_df)
}
categories <- c("HELP", "WELP", "HEAP", "LEAP","DROP")
calculate_transition3 <- function(df, col1_name, col2_name, categories) {
# Initialize transition counts
transition_counts <- matrix(0, nrow = length(categories), ncol = length(categories))
colnames(transition_counts) <- categories
rownames(transition_counts) <- categories
# Calculate transition counts
for (i in 1:length(categories)) {
for (j in 1:length(categories)) {
transition_counts[i, j] <- sum(df[[col1_name]] == categories[i] & df[[col2_name]] == categories[j])
}
}
# Total count of rows
total_rows <- nrow(df)
# Calculate transition percentages
transition_percentages <- (transition_counts / total_rows) * 100
# Initialize a list to store results
result_list <- list()
# Loop through the transition counts and store them in the list
for (i in 1:length(categories)) {
for (j in 1:length(categories)) {
transition <- paste(categories[i], "to", categories[j])
result_list[[length(result_list) + 1]] <- c(Transition = transition, Count = transition_counts[i, j], Percentage = transition_percentages[i, j])
}
}
# Create DataFrame of results
result_df <- do.call(rbind, result_list)
return(result_df)
}
calculate_transition4 <- function(df, col1_name, col2_name, categories) {
# Initialize transition counts
transition_counts <- matrix(0, nrow = length(categories) + 1, ncol = length(categories) + 1)
colnames(transition_counts) <- c(categories, "Total")
rownames(transition_counts) <- c(categories, "Total")
# Calculate transition counts
for (i in 1:length(categories)) {
for (j in 1:length(categories)) {
transition_counts[i, j] <- sum(df[[col1_name]] == categories[i] & df[[col2_name]] == categories[j])
}
}
# Calculate row sums
transition_counts[, "Total"] <- rowSums(transition_counts)
# Calculate column sums
transition_counts["Total", ] <- colSums(transition_counts)
return(transition_counts)
}
makeHist = function(df,column_name,mytitle){
ggplot(df, aes(x = !!sym(column_name))) +
geom_histogram(fill = "skyblue", color = "black") +
labs(title = mytitle, x = "score / 100", y = "Number of students")
}
Introduction and
preamble
- We often talk about students either as one monolithic group or on
the other extreme, an each student is world case.
- We must be able to talk about students performance in a way that we
can easily convey to colleagues and institutions a sense of
performance
- We define the efficiency as the ratio between performance and
\[
Efficiency = \frac{performance}{effort}
\]
plot(1, 1, xlim = c(0, 3), ylim = c(0, 3), type = "n", xlab = "Performance", ylab = "Effort", axes = FALSE, xaxt = "n", yaxt = "n" )
text(2, 1, "High Efficiency")
text(1, 2, "Low Efficiency")
lines(c(0, 3), c(1.5, 1.5), lty = 2)
arrows(0, 0, 3.1, 0, length = 0.1)
arrows(0, 0, 0, 3.1, length = 0.1)
- While performance can be easily identified as the score, the
quantification of effort is more problematic to identify and
quantify
- Here we propose three different ways to quantify effort: video
watching, quiz preparation, justAsk attendance
- Regardless of how to quantify effort, we can always identify four
quadrants
plot(1, 1, xlim = c(0, 3), ylim = c(0, 3), type = "n", xlab = "Performance", ylab = "Effort", axes = FALSE, xaxt = "n", yaxt = "n" )
text(1, 1, "WELP")
text(1, 0.8, "Withdrawn Effort-Low Performance", cex = 0.5)
text(1, 2, "HELP")
text(1, 1.8, "High Effort-Low Performance", cex = 0.5)
text(2, 1, "LEAP")
text(2, 0.8, "Low Effort-Acceptable Performance", cex = 0.5)
text(2, 2, "HEAP")
text(2, 1.8, "High Effort-Acceptable Performance", cex = 0.5)
lines(c(1.5, 1.5), c(0, 3), lty = 2)
lines(c(0, 3), c(1.5, 1.5), lty = 2)
arrows(0, 0, 3.1, 0, length = 0.1)
arrows(0, 0, 0, 3.1, length = 0.1)
Practice Optional Quiz
to Measure Effort
- HEAP: passed milestone with 100 and practiced with a passing score
in the practice
- LEAP: passed the milestone with 100 without practicing (didn’t open
the practice or tried just once)
- HELP: did not pass the milestone (<80) but practiced at least
more than once
- WELP: did not pass the milestone (<80) and did not open the
practice or just once
#filter practice until the day of first attempt
#m1-at1 in f23 was delivered between oct 2nd and oct3rd of 2023
p1f23_f = filterDate(p1f23,"submitted","2023-10-03 23:59:59 UTC")
allstud = m1f23[which(m1f23$attempt == 1),]
allstud = allstud[,c("name","score")]
#build a df stating each students: times they tried, high score in practice, and score in milestone
m1f23_class = classifyStudents(p1f23_f,allstud,80)
#merge it with justAsk
m1f23_class = merge(m1f23_class, justAsk, by = "name", all.x = TRUE)
#BUILDING HERE THE MASSIVE DF allf23
allf23 = m1f23_class[,-c(2)]
colnames(allf23)[2] <- "score_m1"
colnames(allf23)[3] <- "practice_attempt_m1"
colnames(allf23)[4] <- "practice_score_m1"
colnames(allf23)[5] <- "by_attempt_m1"
colnames(allf23)[6] <- "by_practice80_m1"
#other attempts just use the highest score in the milestone
allstud <- m1f23 %>%
group_by(name) %>%
slice(which.max(score)) %>%
ungroup()
allstud = allstud[,c("name","score")]
thisOne = classifyStudents(p1f23,allstud,80)
thisOne = thisOne[,-c(2)]
colnames(thisOne)[2] <- "score_m1_all"
colnames(thisOne)[3] <- "practice_attempt_m1_all"
colnames(thisOne)[4] <- "practice_score_m1_all"
colnames(thisOne)[5] <- "by_attempt_m1_all"
colnames(thisOne)[6] <- "by_practice80_m1_all"
allf23 = merge(allf23,thisOne, by = "name", all.x = TRUE)
#M2
#M2 was done by Thursday Nov 2nd
# p2f23 score was
p2f23$score = p2f23$score/12*100
p2f23_f = filterDate(p2f23,"submitted","2023-11-02 23:59:59 UTC")
allstud = m2f23[which(m2f23$attempt == 1),]
allstud = allstud[,c("name","score")]
m2f23_class = classifyStudents(p2f23_f,allstud,80)
m2f23_class = m2f23_class[,-c(2)]
colnames(m2f23_class)[2] <- "score_m2"
colnames(m2f23_class)[3] <- "practice_attempt_m2"
colnames(m2f23_class)[4] <- "practice_score_m2"
colnames(m2f23_class)[5] <- "by_attempt_m2"
colnames(m2f23_class)[6] <- "by_practice80_m2"
allf23 = merge(allf23,m2f23_class, by = "name", all.x = TRUE)
#other attempts just use the highest score in the milestone
allstud <- m2f23 %>%
group_by(name) %>%
slice(which.max(score)) %>%
ungroup()
allstud = allstud[,c("name","score")]
thisOne = classifyStudents(p2f23,allstud,80)
thisOne = thisOne[,-c(2)]
colnames(thisOne)[2] <- "score_m2_all"
colnames(thisOne)[3] <- "practice_attempt_m2_all"
colnames(thisOne)[4] <- "practice_score_m2_all"
colnames(thisOne)[5] <- "by_attempt_m2_all"
colnames(thisOne)[6] <- "by_practice80_m2_all"
allf23 = merge(allf23,thisOne, by = "name", all.x = TRUE)
#M3
# by the end of Thu Nov 30th everyone should have attempted it
p3f23$score = p3f23$score/12*100
p3f23_f = filterDate(p3f23,"submitted","2023-11-30 23:59:59 UTC")
allstud = m3f23[which(m3f23$attempt == 1),]
allstud = allstud[,c("name","score")]
m3f23_class = classifyStudents(p3f23_f,allstud,80)
m3f23_class = m3f23_class[,-c(2)]
colnames(m3f23_class)[2] <- "score_m3"
colnames(m3f23_class)[3] <- "practice_attempt_m3"
colnames(m3f23_class)[4] <- "practice_score_m3"
colnames(m3f23_class)[5] <- "by_attempt_m3"
colnames(m3f23_class)[6] <- "by_practice80_m3"
allf23 = merge(allf23,m3f23_class, by = "name", all.x = TRUE)
#other attempts just use the highest score in the milestone
allstud <- m3f23 %>%
group_by(name) %>%
slice(which.max(score)) %>%
ungroup()
allstud = allstud[,c("name","score")]
thisOne = classifyStudents(p3f23,allstud,80)
thisOne = thisOne[,-c(2)]
colnames(thisOne)[2] <- "score_m3_all"
colnames(thisOne)[3] <- "practice_attempt_m3_all"
colnames(thisOne)[4] <- "practice_score_m3_all"
colnames(thisOne)[5] <- "by_attempt_m3_all"
colnames(thisOne)[6] <- "by_practice80_m3_all"
allf23 = merge(allf23,thisOne, by = "name", all.x = TRUE)
#M4
# by the end of Dec 12th everyone should have attempted it
p4f23$score = p4f23$score/12*100
p4f23_f = filterDate(p4f23,"submitted","2023-12-12 23:59:59 UTC")
allstud = m4f23[which(m4f23$attempt == 1),]
allstud = allstud[,c("name","score")]
m4f23_class = classifyStudents(p4f23_f,allstud,80)
m4f23_class = m4f23_class[,-c(2)]
colnames(m4f23_class)[2] <- "score_m4"
colnames(m4f23_class)[3] <- "practice_attempt_m4"
colnames(m4f23_class)[4] <- "practice_score_m4"
colnames(m4f23_class)[5] <- "by_attempt_m4"
colnames(m4f23_class)[6] <- "by_practice80_m4"
allf23 = merge(allf23,m4f23_class, by = "name", all.x = TRUE)
#other attempts just use the highest score in the milestone
allstud <- m4f23 %>%
group_by(name) %>%
slice(which.max(score)) %>%
ungroup()
allstud = allstud[,c("name","score")]
thisOne = classifyStudents(p4f23,allstud,80)
thisOne = thisOne[,-c(2)]
colnames(thisOne)[2] <- "score_m4_all"
colnames(thisOne)[3] <- "practice_attempt_m4_all"
colnames(thisOne)[4] <- "practice_score_m4_all"
colnames(thisOne)[5] <- "by_attempt_m4_all"
colnames(thisOne)[6] <- "by_practice80_m4_all"
allf23 = merge(allf23,thisOne, by = "name", all.x = TRUE)
#substitute the NA from drop students into DROP
bycolumns = grepl("^by",names(allf23))
allf23[,bycolumns] = lapply(
allf23[,bycolumns],
function(x) ifelse(is.na(x), "DROP", x)
)
Tables
library(knitr)
generate_category_table <- function(data, column_name,mytitle) {
# Calculate counts
table_counts <- table(data[[column_name]])
# Create a data frame with counts and percentages
table_df <- data.frame(Category = names(table_counts),
Count = as.numeric(table_counts),
Percentage = paste(round(prop.table(table_counts) * 100,2),"%"))
table_df = table_df[, !grepl("Percentage.Var1", names(table_df))]
# Use kable to create a formatted table
table_output <- kable(table_df,
caption = mytitle,
col.names = c("Category", "Count", "Percentage"),
align = c("l", "c", "c"),
format = "markdown")
return(table_output)
}
# Example usage:
# Assuming your dataframe is df and the categorical variable is in column1
#table_string <- generate_category_table(m1f23_class, "by_attempt","Using times practiced to quantify effort")
# Print the table in the R Markdown document
#table_string
#table_string <- generate_category_table(m1f23_class, "by_practice80","Using score in practice to quantify effort")
# Print the table in the R Markdown document
#table_string
plotEffortVsPerformance <- function(df,practice_col,score_col,attempts_col,justAsk_col,mytitle) {
df = df[,c(practice_col,score_col,attempts_col,justAsk_col),drop = FALSE]
df = df[complete.cases(df),]
p <- ggplot(df, aes_string(y = practice_col, x = score_col, size = attempts_col)) +
geom_point(na.rm = TRUE) +
scale_size_continuous(range = c(1, 5),breaks = c(0, 1, 3, 10)) + # Adjust point size range as needed
labs(x = paste("Performance: ",mytitle, " score"), y = "Practice: Max score during practice", title = paste(mytitle, ": Effort vs Performance")) +
theme_minimal() +
scale_x_continuous(breaks = seq(0, 100, by = 10)) +
scale_y_continuous(breaks = seq(0, 100, by = 10))
q <- ggplot(df, aes_string(y = practice_col, x = score_col)) +
geom_point(na.rm = TRUE) +
scale_size_continuous(range = c(1, 5)) + # Adjust point size range as needed
labs(x = paste("Performance: ",mytitle, " score"), y = "Practice: Max score during practice", title = paste(mytitle, ": Effort vs Performance")) +
theme_minimal() +
scale_x_continuous(breaks = seq(0, 100, by = 10)) +
scale_y_continuous(breaks = seq(0, 100, by = 10))
r <- ggplot(df, aes_string(y = practice_col, x = score_col, size = attempts_col, color = justAsk_col)) +
geom_point(na.rm = TRUE) +
scale_size_continuous(range = c(1, 5),breaks = c(0, 1, 3, 10), name = "Attempts") + # Adjust point size range as needed
scale_color_manual(values = c("Often" = "green", "Seldom" = "orange", "Never" = "black"),
name = "Just Ask",
labels = c("Often" = "Often (green)", "Seldom" = "Seldom (orange)", "Never" = "Never (black)")) +
labs(x = paste("Performance: ",mytitle, " score"), y = "Practice: Max score during practice", title = paste(mytitle, ": Effort vs Performance")) +
theme_minimal() +
#scale_x_continuous(breaks = pretty(range(df[[practice_col]]), n = 10)) +
#scale_y_continuous(breaks = pretty(range(df[[score_col]]), n = 10))
scale_x_continuous(breaks = seq(0, 100, by = 10)) +
scale_y_continuous(breaks = seq(0, 100, by = 10))
# Print the plot
print(p)
print(q)
print(r)
}
Milestone 1
Distribution table M1 - 1st attempt and at the end
makeHist(allf23,"score_m1","Milestone 1 - 1st attempt")
table_string <- generate_category_table(allf23, "by_practice80_m1","Milestone 1 - attempt 1")
table_string
Milestone 1 - attempt 1
HEAP |
80 |
49.08 % |
HELP |
14 |
8.59 % |
LEAP |
27 |
16.56 % |
WELP |
42 |
25.77 % |
table_string <- generate_category_table(allf23, "by_practice80_m1_all","Milestone 1 - all attempt")
table_string
Milestone 1 - all attempt
HEAP |
107 |
65.64 % |
HELP |
4 |
2.45 % |
LEAP |
35 |
21.47 % |
WELP |
17 |
10.43 % |
result = calculate_transition(allf23,"by_practice80_m1","by_practice80_m1_all")
kable(result, format = "markdown", caption = "M1: Transitions from 1st attempt to final")
M1: Transitions from 1st attempt to final
WELP to HELP |
3 |
1.840491 |
WELP to HEAP |
10 |
6.134969 |
HELP to HEAP |
13 |
7.975460 |
result = calculate_transition4(allf23,"by_practice80_m1","by_practice80_m1_all",categories)
kable(result, format = "markdown", caption = "M1 1st att vs M1 all ")
M1 1st att vs M1 all
HELP |
1 |
0 |
13 |
0 |
0 |
14 |
WELP |
3 |
17 |
10 |
12 |
0 |
42 |
HEAP |
0 |
0 |
80 |
0 |
0 |
80 |
LEAP |
0 |
0 |
4 |
23 |
0 |
27 |
DROP |
0 |
0 |
0 |
0 |
0 |
0 |
Total |
4 |
17 |
107 |
35 |
0 |
163 |
Scatter Plots
#plotEffortVsPerformance(m1f23_class,"practice_score","score.x","practice_attempt","justAsk1")
plotEffortVsPerformance(allf23,"practice_score_m1","score_m1","practice_attempt_m1","justAsk1","Milestone 1 - 1st attempt")
plotEffortVsPerformance(allf23,"practice_score_m1_all","score_m1_all","practice_attempt_m1_all","justAsk1","Milestone 1 - All attempts")
Milestone 2
Distribution table M2 - 1st attempt and at the end
makeHist(allf23,"score_m2","Milestone 2 - 1st attempt")
table_string <- generate_category_table(allf23, "by_practice80_m2","Milestone 2 - attempt 1")
table_string
Milestone 2 - attempt 1
DROP |
5 |
3.07 % |
HEAP |
91 |
55.83 % |
HELP |
15 |
9.2 % |
LEAP |
18 |
11.04 % |
WELP |
34 |
20.86 % |
table_string <- generate_category_table(allf23, "by_practice80_m2_all","Milestone 2 - all attempt")
table_string
Milestone 2 - all attempt
DROP |
5 |
3.07 % |
HEAP |
110 |
67.48 % |
HELP |
12 |
7.36 % |
LEAP |
17 |
10.43 % |
WELP |
19 |
11.66 % |
result = calculate_transition(allf23,"by_practice80_m2","by_practice80_m2_all")
kable(result, format = "markdown", caption = "M2: Transitions from 1st attempt to final")
M2: Transitions from 1st attempt to final
WELP to HELP |
4 |
2.453988 |
WELP to HEAP |
8 |
4.907976 |
HELP to HEAP |
7 |
4.294479 |
result = calculate_transition4(allf23,"by_practice80_m2","by_practice80_m2_all",categories)
kable(result, format = "markdown", caption = "M2 1st att vs M2 all ")
M2 1st att vs M2 all
HELP |
8 |
0 |
7 |
0 |
0 |
15 |
WELP |
4 |
19 |
8 |
3 |
0 |
34 |
HEAP |
0 |
0 |
91 |
0 |
0 |
91 |
LEAP |
0 |
0 |
4 |
14 |
0 |
18 |
DROP |
0 |
0 |
0 |
0 |
5 |
5 |
Total |
12 |
19 |
110 |
17 |
5 |
163 |
result = calculate_transition4(allf23,"by_practice80_m1","by_practice80_m2",categories)
kable(result, format = "markdown", caption = "M1 vs M2: Transitions 1st attempt ")
M1 vs M2: Transitions 1st attempt
HELP |
3 |
3 |
5 |
1 |
2 |
14 |
WELP |
7 |
26 |
3 |
4 |
2 |
42 |
HEAP |
1 |
5 |
69 |
5 |
0 |
80 |
LEAP |
4 |
0 |
14 |
8 |
1 |
27 |
DROP |
0 |
0 |
0 |
0 |
0 |
0 |
Total |
15 |
34 |
91 |
18 |
5 |
163 |
result = calculate_transition3(allf23,"by_practice80_m1","by_practice80_m2",categories)
kable(result, format = "markdown", caption = "M1 vs M2: Transitions 1st attempt ")
M1 vs M2: Transitions 1st attempt
HELP to HELP |
3 |
1.84049079754601 |
HELP to WELP |
3 |
1.84049079754601 |
HELP to HEAP |
5 |
3.06748466257669 |
HELP to LEAP |
1 |
0.613496932515337 |
HELP to DROP |
2 |
1.22699386503067 |
WELP to HELP |
7 |
4.29447852760736 |
WELP to WELP |
26 |
15.9509202453988 |
WELP to HEAP |
3 |
1.84049079754601 |
WELP to LEAP |
4 |
2.45398773006135 |
WELP to DROP |
2 |
1.22699386503067 |
HEAP to HELP |
1 |
0.613496932515337 |
HEAP to WELP |
5 |
3.06748466257669 |
HEAP to HEAP |
69 |
42.3312883435583 |
HEAP to LEAP |
5 |
3.06748466257669 |
HEAP to DROP |
0 |
0 |
LEAP to HELP |
4 |
2.45398773006135 |
LEAP to WELP |
0 |
0 |
LEAP to HEAP |
14 |
8.58895705521472 |
LEAP to LEAP |
8 |
4.9079754601227 |
LEAP to DROP |
1 |
0.613496932515337 |
DROP to HELP |
0 |
0 |
DROP to WELP |
0 |
0 |
DROP to HEAP |
0 |
0 |
DROP to LEAP |
0 |
0 |
DROP to DROP |
0 |
0 |
result = calculate_transition4(allf23,"by_practice80_m1_all","by_practice80_m2_all",categories)
kable(result, format = "markdown", caption = "M1 vs M2: Transitions all attempt ")
M1 vs M2: Transitions all attempt
HELP |
1 |
1 |
0 |
0 |
2 |
4 |
WELP |
4 |
10 |
2 |
1 |
0 |
17 |
HEAP |
3 |
4 |
91 |
8 |
1 |
107 |
LEAP |
4 |
4 |
17 |
8 |
2 |
35 |
DROP |
0 |
0 |
0 |
0 |
0 |
0 |
Total |
12 |
19 |
110 |
17 |
5 |
163 |
Scatter Plots
#plotEffortVsPerformance(m1f23_class,"practice_score","score.x","practice_attempt","justAsk1")
plotEffortVsPerformance(allf23,"practice_score_m2","score_m2","practice_attempt_m2","justAsk1","Milestone 2 - 1st attempt")
plotEffortVsPerformance(allf23,"practice_score_m2_all","score_m2_all","practice_attempt_m2_all","justAsk1","Milestone 2 - All attempts")
Milestone 3
Distribution table M3 - 1st attempt and at the end
makeHist(allf23,"score_m3","Milestone 3 - 1st attempt")
table_string <- generate_category_table(allf23, "by_practice80_m3","Milestone 3 - attempt 1")
table_string
Milestone 3 - attempt 1
DROP |
12 |
7.36 % |
HEAP |
77 |
47.24 % |
HELP |
11 |
6.75 % |
LEAP |
27 |
16.56 % |
WELP |
36 |
22.09 % |
table_string <- generate_category_table(allf23, "by_practice80_m3_all","Milestone 3 - all attempt")
table_string
Milestone 3 - all attempt
DROP |
12 |
7.36 % |
HEAP |
101 |
61.96 % |
HELP |
8 |
4.91 % |
LEAP |
26 |
15.95 % |
WELP |
16 |
9.82 % |
result = calculate_transition4(allf23,"by_practice80_m3","by_practice80_m3_all",categories)
kable(result, format = "markdown", caption = "M3 1st vs M3 all attempts ")
M3 1st vs M3 all attempts
HELP |
3 |
0 |
8 |
0 |
0 |
11 |
WELP |
5 |
16 |
10 |
5 |
0 |
36 |
HEAP |
0 |
0 |
77 |
0 |
0 |
77 |
LEAP |
0 |
0 |
6 |
21 |
0 |
27 |
DROP |
0 |
0 |
0 |
0 |
12 |
12 |
Total |
8 |
16 |
101 |
26 |
12 |
163 |
result = calculate_transition4(allf23,"by_practice80_m1","by_practice80_m3",categories)
kable(result, format = "markdown", caption = "M1 1st vs M3 1st ")
M1 1st vs M3 1st
HELP |
2 |
4 |
5 |
1 |
2 |
14 |
WELP |
3 |
25 |
1 |
5 |
8 |
42 |
HEAP |
5 |
5 |
57 |
12 |
1 |
80 |
LEAP |
1 |
2 |
14 |
9 |
1 |
27 |
DROP |
0 |
0 |
0 |
0 |
0 |
0 |
Total |
11 |
36 |
77 |
27 |
12 |
163 |
result = calculate_transition4(allf23,"by_practice80_m1_all","by_practice80_m3_all",categories)
kable(result, format = "markdown", caption = "M1 vs M3 all attempts ")
M1 vs M3 all attempts
HELP |
1 |
0 |
1 |
0 |
2 |
4 |
WELP |
2 |
7 |
1 |
3 |
4 |
17 |
HEAP |
1 |
7 |
86 |
10 |
3 |
107 |
LEAP |
4 |
2 |
13 |
13 |
3 |
35 |
DROP |
0 |
0 |
0 |
0 |
0 |
0 |
Total |
8 |
16 |
101 |
26 |
12 |
163 |
result = calculate_transition4(allf23,"by_practice80_m2_all","by_practice80_m3_all",categories)
kable(result, format = "markdown", caption = "M2 vs M3 all attempts ")
M2 vs M3 all attempts
HELP |
3 |
4 |
5 |
0 |
0 |
12 |
WELP |
4 |
5 |
2 |
2 |
6 |
19 |
HEAP |
1 |
3 |
89 |
17 |
0 |
110 |
LEAP |
0 |
4 |
5 |
7 |
1 |
17 |
DROP |
0 |
0 |
0 |
0 |
5 |
5 |
Total |
8 |
16 |
101 |
26 |
12 |
163 |
Scatter Plots
#plotEffortVsPerformance(m1f23_class,"practice_score","score.x","practice_attempt","justAsk1")
plotEffortVsPerformance(allf23,"practice_score_m3","score_m3","practice_attempt_m3","justAsk2","Milestone 3 - 1st attempt")
plotEffortVsPerformance(allf23,"practice_score_m3_all","score_m3_all","practice_attempt_m3_all","justAsk2","Milestone 3 - All attempts")
Milestone 4
Distribution table M4 - 1st attempt and at the end
makeHist(allf23,"score_m4","Milestone 4 - 1st attempt")
table_string <- generate_category_table(allf23, "by_practice80_m4","Milestone 4 - attempt 1")
table_string
Milestone 4 - attempt 1
DROP |
15 |
9.2 % |
HEAP |
72 |
44.17 % |
HELP |
14 |
8.59 % |
LEAP |
18 |
11.04 % |
WELP |
44 |
26.99 % |
table_string <- generate_category_table(allf23, "by_practice80_m4_all","Milestone 4 - all attempt")
table_string
Milestone 4 - all attempt
DROP |
15 |
9.2 % |
HEAP |
91 |
55.83 % |
HELP |
12 |
7.36 % |
LEAP |
25 |
15.34 % |
WELP |
20 |
12.27 % |
result = calculate_transition4(allf23,"by_practice80_m4","by_practice80_m4_all",categories)
kable(result, format = "markdown", caption = "M4 1st vs M4 all attempts ")
M4 1st vs M4 all attempts
HELP |
4 |
0 |
10 |
0 |
0 |
14 |
WELP |
8 |
20 |
5 |
11 |
0 |
44 |
HEAP |
0 |
0 |
72 |
0 |
0 |
72 |
LEAP |
0 |
0 |
4 |
14 |
0 |
18 |
DROP |
0 |
0 |
0 |
0 |
15 |
15 |
Total |
12 |
20 |
91 |
25 |
15 |
163 |
result = calculate_transition4(allf23,"by_practice80_m1_all","by_practice80_m4_all",categories)
kable(result, format = "markdown", caption = "M1 vs M4 all attempts ")
M1 vs M4 all attempts
HELP |
0 |
1 |
0 |
1 |
2 |
4 |
WELP |
0 |
7 |
0 |
3 |
7 |
17 |
HEAP |
4 |
8 |
81 |
11 |
3 |
107 |
LEAP |
8 |
4 |
10 |
10 |
3 |
35 |
DROP |
0 |
0 |
0 |
0 |
0 |
0 |
Total |
12 |
20 |
91 |
25 |
15 |
163 |
result = calculate_transition4(allf23,"by_practice80_m2_all","by_practice80_m4_all",categories)
kable(result, format = "markdown", caption = "M2 vs M4 all attempts ")
M2 vs M4 all attempts
HELP |
2 |
5 |
1 |
3 |
1 |
12 |
WELP |
1 |
7 |
1 |
2 |
8 |
19 |
HEAP |
7 |
6 |
83 |
14 |
0 |
110 |
LEAP |
2 |
2 |
6 |
6 |
1 |
17 |
DROP |
0 |
0 |
0 |
0 |
5 |
5 |
Total |
12 |
20 |
91 |
25 |
15 |
163 |
result = calculate_transition4(allf23,"by_practice80_m3_all","by_practice80_m4_all",categories)
kable(result, format = "markdown", caption = "M3 vs M4 all attempts ")
M3 vs M4 all attempts
HELP |
3 |
5 |
0 |
0 |
0 |
8 |
WELP |
3 |
8 |
1 |
1 |
3 |
16 |
HEAP |
6 |
3 |
78 |
14 |
0 |
101 |
LEAP |
0 |
4 |
12 |
10 |
0 |
26 |
DROP |
0 |
0 |
0 |
0 |
12 |
12 |
Total |
12 |
20 |
91 |
25 |
15 |
163 |
Scatter Plots
#plotEffortVsPerformance(m1f23_class,"practice_score","score.x","practice_attempt","justAsk1")
plotEffortVsPerformance(allf23,"practice_score_m4","score_m4","practice_attempt_m4","justAsk2","Milestone 4 - 1st attempt")
plotEffortVsPerformance(allf23,"practice_score_m4_all","score_m4_all","practice_attempt_m4_all","justAsk2","Milestone 4 - All attempts")
How much does it change if we use <70 as the cutting for high and
low performance?
All together: Flow
Sankey diagrams
library(ggplot2)
library(ggalluvial)
library(scales)
#converting alldf3 into a format that alluvial can understand
makeitAlluvial2 = function(df,colexam1,colexam2,name,mytitle){
melted_df = df[,c(name,colexam1,colexam2)]
melted_df = reshape(melted_df,
varying = c(colexam1,colexam2),
v.names = "group",
timevar = "exam",
times = c(colexam1,colexam2),
direction = "long"
)
rownames(melted_df) <- NULL
ggplot(melted_df,
aes(x = exam, stratum = group, alluvium = id, fill = group)) +
scale_x_discrete(expand = c(.1, .1)) +
geom_flow() +
geom_stratum(alpha = .5) +
theme_minimal() +
geom_text(stat = "stratum",
aes(label = percent(after_stat(prop), accuracy = .1)))+
labs(title = mytitle, x = "", y = "Number of students")
}
makeitAlluvial2(allf23,"by_practice80_m1","by_practice80_m1_all","name","Milestone 1: 1st vs final attempt")
makeitAlluvial2(allf23,"by_practice80_m1","by_practice80_m3","name","Milestone 1 vs 3: 1st attempt")
makeitAlluvial2(allf23,"by_practice80_m1_all","by_practice80_m3_all","name","Milestone 1 vs 3: final attempt")
#makeitAlluvial2(allf23,"by_practice80_m1","grade_category","name","Milestone 1 1st attempt vs final letter grade")
Looking at their
final course grade
finalgrades = read.csv("~/Teaching/Grades_and_SRT/Fall2023/chem1331_f23_grades.csv", header = TRUE)
allf23 = merge(allf23,finalgrades, by = "name", all.x = TRUE)
categorize_grades <- function(grade) {
if (is.na(grade)) {
return("W")
} else if (startsWith(grade, "A")) {
return("A")
} else if (startsWith(grade, "B")) {
return("B")
} else if (startsWith(grade, "C")) {
return("C")
} else if (startsWith(grade, "D")) {
return("D")
} else if (startsWith(grade, "F")) {
return("F")
} else if (startsWith(grade, "W")) {
return("W")
} else {
return("Other")
}
}
allf23$grade_category <- sapply(allf23$finalLetter, categorize_grades)
# Calculate the fraction of rows for each category
grade_counts <- table(allf23$grade_category)
grade_fraction <- prop.table(grade_counts)
# Create a data frame for the pie chart
pie_data <- data.frame(grade_category = names(grade_fraction), fraction = grade_fraction)
# Plot the pie chart
ggplot(pie_data, aes(x = "", y = grade_fraction, fill = grade_category)) +
geom_bar(stat = "identity", width = 1) +
coord_polar("y", start = 0) +
labs(title = "Fraction of Rows by Letter Grade Category") +
theme_void() +
theme(legend.position = "right")
pie_data <- data.frame(grade_category = names(grade_fraction), grade_fraction = grade_fraction)
# Define colors for each category
grade_colors <- c("A" = "blue", "B" = "green", "C" = "yellow", "D" = "orange", "F" = "red", "W" = "gray")
# Plot the pie chart with custom colors
ggplot(pie_data, aes(x = "", y = grade_fraction, fill = grade_category)) +
geom_bar(stat = "identity", width = 1) +
coord_polar("y", start = 0) +
labs(title = "Fraction of Rows by Letter Grade Category") +
theme_void() +
theme(legend.position = "right") +
scale_fill_manual(values = grade_colors)
generate_col1_percentage_table <- function(df, gradeLetter_col, col1_col,myTitle) {
# Calculate percentages
#df = allf23
#gradeLetter_col = "grade_category"
#col1_col = "by_practice80_m1"
result <- df %>%
group_by(!!sym(gradeLetter_col), !!sym(col1_col)) %>%
summarize(percentage = n() / nrow(df) * 100) %>%
ungroup()
# Pivot the data for easier viewing
result_pivot <- tidyr::pivot_wider(result, names_from = !!sym(col1_col), values_from = percentage, values_fill = 0)
#result_pivot <- round(result_pivot, 1)
round_indices = 2:ncol(result_pivot)
result_pivot[, round_indices] <- round(result_pivot[, round_indices], 1)
#result_pivot[, round_indices] <- paste0(result_pivot[, round_indices], "%")
result_pivot[, round_indices] <- lapply(result_pivot[, round_indices], function(x) paste0(x, "%"))
# Format the table and return
return(kable(result_pivot, format = "markdown", caption = myTitle))
}
col1_percentage_table <- generate_col1_percentage_table(allf23, "grade_category", "by_practice80_m1","Milestone 1 - 1st attempt")
col1_percentage_table
Milestone 1 - 1st attempt
A |
31.3% |
0.6% |
11.7% |
0% |
B |
16% |
4.9% |
3.1% |
5.5% |
C |
1.2% |
1.8% |
1.2% |
11% |
D |
0% |
0% |
0% |
0.6% |
F |
0% |
0% |
0% |
1.2% |
W |
0.6% |
1.2% |
0.6% |
7.4% |
col1_percentage_table <- generate_col1_percentage_table(allf23, gradeLetter_col = "grade_category", col1_col ="by_practice80_m1_all","Milestone 1 - all attempts")
col1_percentage_table
Milestone 1 - all attempts
A |
34.4% |
9.2% |
0% |
0% |
B |
22.7% |
6.1% |
0.6% |
0% |
C |
6.7% |
4.3% |
3.7% |
0.6% |
D |
0% |
0% |
0.6% |
0% |
F |
0% |
0% |
0.6% |
0.6% |
W |
1.8% |
1.8% |
4.9% |
1.2% |
col1_percentage_table <- generate_col1_percentage_table(allf23, gradeLetter_col = "grade_category", col1_col ="by_practice80_m3","Milestone 3 - 1st attempt")
col1_percentage_table
Milestone 3 - 1st attempt
A |
34.4% |
8% |
1.2% |
0% |
0% |
B |
12.9% |
6.7% |
6.7% |
3.1% |
0% |
C |
0% |
1.2% |
11% |
3.1% |
0% |
D |
0% |
0% |
0.6% |
0% |
0% |
F |
0% |
0.6% |
0% |
0.6% |
0% |
W |
0% |
0% |
2.5% |
0% |
7.4% |
col1_percentage_table <- generate_col1_percentage_table(allf23, gradeLetter_col = "grade_category", col1_col ="by_practice80_m3_all","Milestone 3 - all attempts")
col1_percentage_table
Milestone 3 - all attempts
A |
36.8% |
6.7% |
0% |
0% |
0% |
B |
20.2% |
6.7% |
0.6% |
1.8% |
0% |
C |
4.9% |
1.8% |
3.1% |
5.5% |
0% |
D |
0% |
0% |
0.6% |
0% |
0% |
F |
0% |
0.6% |
0.6% |
0% |
0% |
W |
0% |
0% |
0% |
2.5% |
7.4% |