if (Sys.info()["sysname"] == "Windows"){
}else{
  m1f23 =   read.csv("~/Teaching/Grades_and_SRT/Fall2023/m1.csv",header = TRUE)
  m2f23 =   read.csv("~/Teaching/Grades_and_SRT/Fall2023/m2.csv",header = TRUE)
  m3f23 =   read.csv("~/Teaching/Grades_and_SRT/Fall2023/m3.csv",header = TRUE)
  m4f23 =   read.csv("~/Teaching/Grades_and_SRT/Fall2023/m4.csv",header = TRUE)
  
  p1f23 =   read.csv("~/Teaching/Grades_and_SRT/Fall2023/m1_practice.csv",header = TRUE)
  p2f23 =   read.csv("~/Teaching/Grades_and_SRT/Fall2023/m2_practice.csv",header = TRUE)
  p3f23 =   read.csv("~/Teaching/Grades_and_SRT/Fall2023/m3_practice.csv",header = TRUE)
  p4f23 =   read.csv("~/Teaching/Grades_and_SRT/Fall2023/m4_practice.csv",header = TRUE)
  
  justAsk =   read.csv("~/Teaching/Grades_and_SRT/Fall2023/justAsk_f23.csv",header = TRUE)
  justAsk$name <- paste(justAsk$First, justAsk$Last, sep = " ")
  justAsk = justAsk[,c(6,4,5)]
  justAsk$justAsk1 = ifelse( 
    grepl("2|3",justAsk$X1st.half) ,"Often",
                             ifelse( grepl("1",justAsk$X1st.half ), "Seldom", "Never")
  )
  justAsk$justAsk2 = ifelse( 
    grepl("2|3",justAsk$X2nd.half) ,"Often",
                             ifelse( grepl("1",justAsk$X2nd.half ), "Seldom", "Never")
  )


}
library(lubridate)
filterDate = function(df,date_column,thisDate){
  # Assuming df is your DataFrame with a column named date_column checking all rows completed BEFORE thisDate
  # Convert the date_column to a POSIXct object
  df <- df %>%
    mutate(date_column = as.POSIXct(date_column, format = "%Y-%m-%d %H:%M:%S", tz = "UTC"))

  # Format the date_column to "YYYY-MM-DD" format
  df <- df %>%
    mutate(date_column = format(date_column, format = "%Y-%m-%d"))
  df <- df %>%
  filter(submitted <= as.POSIXct(thisDate, tz = "UTC"))
  return(df)
}
library(dplyr)
classifyStudents = function(df_practice,df_score,threshold){
  #name is the name of students
  grouped_df <- df_practice %>% group_by(name)
  summary_df <- grouped_df %>%
    summarize(
      practice_attempt = max(attempt),
      practice_score = max(score)
    ) %>%
    ungroup()  # Remove the grouping

  final_summary_df <- left_join(df_score, summary_df, by = "name") %>%
    mutate_at(vars(practice_attempt, practice_score), ~ifelse(is.na(.), 0, .))

  final = merge(df_score,final_summary_df, by="name", all.x = TRUE)

  final <- final %>%
    mutate(by_attempt = case_when(
      #not practicing
      practice_attempt == 0 & score.x > threshold ~ "LEAP",
      practice_attempt == 0 & score.x <= threshold ~ "WELP",
      #practicing
      practice_attempt > 0 & score.x >= threshold ~ "HEAP",
      practice_attempt > 0 & score.x < threshold ~ "HELP",
      TRUE ~ "Unknown"  # Handle any other cases (optional)
    ))
   
  final <- final %>%
    mutate(by_practice80 = case_when(
      #not practicing
      practice_score < threshold & score.x >= threshold ~ "LEAP",
      practice_score < threshold & score.x < threshold ~ "WELP",
      #practicing
      practice_score >= threshold & score.x >= threshold ~ "HEAP",
      practice_score >= threshold & score.x < threshold ~ "HELP",
      TRUE ~ "Unknown"  # Handle any other cases (optional)
    ))
  return(final)
  
}
calculate_transition <- function(df, col1_name, col2_name) {
  # Count of transitions
  #help_to_welp <- sum(df[[col1_name]] == 'HELP' & df[[col2_name]] == 'WELP')
  welp_to_help <- sum(df[[col1_name]] == 'WELP' & df[[col2_name]] == 'HELP')
  welp_to_heap <- sum(df[[col1_name]] == 'WELP' & df[[col2_name]] == 'HEAP')
  help_to_heap <- sum(df[[col1_name]] == 'HELP' & df[[col2_name]] == 'HEAP')
  
  # Total count of rows
  total_rows <- nrow(df)
  
  # Calculating percentages
  #help_to_welp_percent <- (help_to_welp / total_rows) * 100
  welp_to_help_percent <- (welp_to_help / total_rows) * 100
  welp_to_heap_percent <- (welp_to_heap / total_rows) * 100
  help_to_heap_percent <- (help_to_heap / total_rows) * 100
  
  # Create DataFrame of results
  result_df <- data.frame(
    Transition = c("WELP to HELP", "WELP to HEAP", "HELP to HEAP"),
    Count = c(welp_to_help, welp_to_heap, help_to_heap),
    Percentage = c(welp_to_help_percent, welp_to_heap_percent, help_to_heap_percent)
  )
  
  return(result_df)
}
calculate_transition2 <- function(df, col1_name, col2_name, categories) {
  # Initialize transition counts
  transition_counts <- matrix(0, nrow = length(categories), ncol = length(categories))
  colnames(transition_counts) <- categories
  rownames(transition_counts) <- categories
  
  # Calculate transition counts
  for (i in 1:length(categories)) {
    for (j in 1:length(categories)) {
      transition_counts[i, j] <- sum(df[[col1_name]] == categories[i] & df[[col2_name]] == categories[j])
    }
  }
  
  # Total count of rows
  total_rows <- nrow(df)
  
  # Calculate transition percentages
  transition_percentages <- (transition_counts / total_rows) * 100
  
  # Create DataFrame of results
  result_df <- as.data.frame(transition_counts)
  result_df$Transition <- rownames(transition_counts)
  rownames(result_df) <- NULL
  result_df <- result_df[, c(ncol(result_df), 1:(ncol(result_df)-1))]
  colnames(result_df)[1] <- "Transition"
  #result_df$Percentage <- transition_percentages[lower.tri(transition_percentages, diag = TRUE)]
  #result_df$Percentage <- round(transition_percentages,digits=2)
  
  return(result_df)
}

categories <- c("HELP", "WELP", "HEAP", "LEAP","DROP")
calculate_transition3 <- function(df, col1_name, col2_name, categories) {
  # Initialize transition counts
  transition_counts <- matrix(0, nrow = length(categories), ncol = length(categories))
  colnames(transition_counts) <- categories
  rownames(transition_counts) <- categories
  
  # Calculate transition counts
  for (i in 1:length(categories)) {
    for (j in 1:length(categories)) {
      transition_counts[i, j] <- sum(df[[col1_name]] == categories[i] & df[[col2_name]] == categories[j])
    }
  }
  
  # Total count of rows
  total_rows <- nrow(df)
  
  # Calculate transition percentages
  transition_percentages <- (transition_counts / total_rows) * 100
  
  # Initialize a list to store results
  result_list <- list()
  
  # Loop through the transition counts and store them in the list
  for (i in 1:length(categories)) {
    for (j in 1:length(categories)) {
      transition <- paste(categories[i], "to", categories[j])
      result_list[[length(result_list) + 1]] <- c(Transition = transition, Count = transition_counts[i, j], Percentage = transition_percentages[i, j])
    }
  }
  
  # Create DataFrame of results
  result_df <- do.call(rbind, result_list)
  
  return(result_df)
}
calculate_transition4 <- function(df, col1_name, col2_name, categories) {
  # Initialize transition counts
  transition_counts <- matrix(0, nrow = length(categories) + 1, ncol = length(categories) + 1)
  colnames(transition_counts) <- c(categories, "Total")
  rownames(transition_counts) <- c(categories, "Total")
  
  # Calculate transition counts
  for (i in 1:length(categories)) {
    for (j in 1:length(categories)) {
      transition_counts[i, j] <- sum(df[[col1_name]] == categories[i] & df[[col2_name]] == categories[j])
    }
  }
  
  # Calculate row sums
  transition_counts[, "Total"] <- rowSums(transition_counts)
  
  # Calculate column sums
  transition_counts["Total", ] <- colSums(transition_counts)
  
  return(transition_counts)
}

makeHist = function(df,column_name,mytitle){
  ggplot(df, aes(x = !!sym(column_name))) +
    geom_histogram(fill = "skyblue", color = "black") +
    labs(title = mytitle, x = "score / 100", y = "Number of students")

}

1 Introduction and preamble

We often talk about students either as one monolithic group or on the other extreme, an each student is world case.
We must be able to talk about students performance in a way that we can easily convey to colleagues and institutions a sense of performance
We define the efficiency as the ratio between performance and

\[ Efficiency = \frac{performance}{effort} \]

plot(1, 1, xlim = c(0, 3), ylim = c(0, 3), type = "n", xlab = "Performance", ylab = "Effort", axes = FALSE,  xaxt = "n", yaxt = "n" )
text(2, 1, "High Efficiency")
text(1, 2, "Low Efficiency")
lines(c(0, 3), c(1.5, 1.5), lty = 2)
arrows(0, 0, 3.1, 0, length = 0.1)
arrows(0, 0, 0, 3.1, length = 0.1)

While performance can be easily identified as the score, the quantification of effort is more problematic to identify and quantify
Here we propose three different ways to quantify effort: video watching, quiz preparation, justAsk attendance
Regardless of how to quantify effort, we can always identify four quadrants

plot(1, 1, xlim = c(0, 3), ylim = c(0, 3), type = "n", xlab = "Performance", ylab = "Effort", axes = FALSE,  xaxt = "n", yaxt = "n" )
text(1, 1, "WELP")
text(1, 0.8, "Withdrawn Effort-Low Performance", cex = 0.5)
text(1, 2, "HELP")
text(1, 1.8, "High Effort-Low Performance", cex = 0.5)
text(2, 1, "LEAP")
text(2, 0.8, "Low Effort-Acceptable Performance", cex = 0.5)
text(2, 2, "HEAP")
text(2, 1.8, "High Effort-Acceptable Performance", cex = 0.5)
lines(c(1.5, 1.5), c(0, 3), lty = 2)
lines(c(0, 3), c(1.5, 1.5), lty = 2)
arrows(0, 0, 3.1, 0, length = 0.1)
arrows(0, 0, 0, 3.1, length = 0.1)

2 Helping students to “UMR”

3 Practice Optional Quiz to Measure Effort

HEAP: passed milestone with 100 and practiced with a passing score in the practice
LEAP: passed the milestone with 100 without practicing (didn’t open the practice or tried just once)
HELP: did not pass the milestone (<80) but practiced at least more than once
WELP: did not pass the milestone (<80) and did not open the practice or just once

#filter practice until the day of first attempt
#m1-at1 in f23 was delivered between oct 2nd and oct3rd of 2023
p1f23_f = filterDate(p1f23,"submitted","2023-10-03 23:59:59 UTC")
allstud = m1f23[which(m1f23$attempt == 1),]
allstud = allstud[,c("name","score")]

#build a df stating each students: times they tried, high score in practice, and score in milestone
m1f23_class = classifyStudents(p1f23_f,allstud,80)
#merge it with justAsk
m1f23_class = merge(m1f23_class, justAsk, by = "name", all.x = TRUE)

#BUILDING HERE THE MASSIVE DF allf23

allf23 = m1f23_class[,-c(2)]
colnames(allf23)[2] <- "score_m1"
colnames(allf23)[3] <- "practice_attempt_m1"
colnames(allf23)[4] <- "practice_score_m1"
colnames(allf23)[5] <- "by_attempt_m1"
colnames(allf23)[6] <- "by_practice80_m1"
#other attempts just use the highest score in the milestone
allstud <- m1f23 %>%
  group_by(name) %>%
  slice(which.max(score)) %>%
  ungroup()
allstud = allstud[,c("name","score")]
thisOne = classifyStudents(p1f23,allstud,80)
thisOne = thisOne[,-c(2)]
colnames(thisOne)[2] <- "score_m1_all"
colnames(thisOne)[3] <- "practice_attempt_m1_all"
colnames(thisOne)[4] <- "practice_score_m1_all"
colnames(thisOne)[5] <- "by_attempt_m1_all"
colnames(thisOne)[6] <- "by_practice80_m1_all"
allf23 = merge(allf23,thisOne, by = "name", all.x = TRUE)

#M2
#M2 was done by Thursday Nov 2nd 
# p2f23 score was 
p2f23$score = p2f23$score/12*100
p2f23_f = filterDate(p2f23,"submitted","2023-11-02 23:59:59 UTC")
allstud = m2f23[which(m2f23$attempt == 1),]
allstud = allstud[,c("name","score")]
m2f23_class = classifyStudents(p2f23_f,allstud,80)
m2f23_class = m2f23_class[,-c(2)]
colnames(m2f23_class)[2] <- "score_m2"
colnames(m2f23_class)[3] <- "practice_attempt_m2"
colnames(m2f23_class)[4] <- "practice_score_m2"
colnames(m2f23_class)[5] <- "by_attempt_m2"
colnames(m2f23_class)[6] <- "by_practice80_m2"

allf23 = merge(allf23,m2f23_class, by = "name", all.x = TRUE)
#other attempts just use the highest score in the milestone
allstud <- m2f23 %>%
  group_by(name) %>%
  slice(which.max(score)) %>%
  ungroup()
allstud = allstud[,c("name","score")]
thisOne = classifyStudents(p2f23,allstud,80)
thisOne = thisOne[,-c(2)]
colnames(thisOne)[2] <- "score_m2_all"
colnames(thisOne)[3] <- "practice_attempt_m2_all"
colnames(thisOne)[4] <- "practice_score_m2_all"
colnames(thisOne)[5] <- "by_attempt_m2_all"
colnames(thisOne)[6] <- "by_practice80_m2_all"
allf23 = merge(allf23,thisOne, by = "name", all.x = TRUE)

#M3
# by the end of Thu Nov 30th everyone should have attempted it
p3f23$score = p3f23$score/12*100
p3f23_f = filterDate(p3f23,"submitted","2023-11-30 23:59:59 UTC")
allstud = m3f23[which(m3f23$attempt == 1),]
allstud = allstud[,c("name","score")]
m3f23_class = classifyStudents(p3f23_f,allstud,80)
m3f23_class = m3f23_class[,-c(2)]
colnames(m3f23_class)[2] <- "score_m3"
colnames(m3f23_class)[3] <- "practice_attempt_m3"
colnames(m3f23_class)[4] <- "practice_score_m3"
colnames(m3f23_class)[5] <- "by_attempt_m3"
colnames(m3f23_class)[6] <- "by_practice80_m3"
allf23 = merge(allf23,m3f23_class, by = "name", all.x = TRUE)
#other attempts just use the highest score in the milestone
allstud <- m3f23 %>%
  group_by(name) %>%
  slice(which.max(score)) %>%
  ungroup()
allstud = allstud[,c("name","score")]
thisOne = classifyStudents(p3f23,allstud,80)
thisOne = thisOne[,-c(2)]
colnames(thisOne)[2] <- "score_m3_all"
colnames(thisOne)[3] <- "practice_attempt_m3_all"
colnames(thisOne)[4] <- "practice_score_m3_all"
colnames(thisOne)[5] <- "by_attempt_m3_all"
colnames(thisOne)[6] <- "by_practice80_m3_all"
allf23 = merge(allf23,thisOne, by = "name", all.x = TRUE)

#M4
# by the end of Dec 12th everyone should have attempted it
p4f23$score = p4f23$score/12*100
p4f23_f = filterDate(p4f23,"submitted","2023-12-12 23:59:59 UTC")
allstud = m4f23[which(m4f23$attempt == 1),]
allstud = allstud[,c("name","score")]
m4f23_class = classifyStudents(p4f23_f,allstud,80)
m4f23_class = m4f23_class[,-c(2)]
colnames(m4f23_class)[2] <- "score_m4"
colnames(m4f23_class)[3] <- "practice_attempt_m4"
colnames(m4f23_class)[4] <- "practice_score_m4"
colnames(m4f23_class)[5] <- "by_attempt_m4"
colnames(m4f23_class)[6] <- "by_practice80_m4"
allf23 = merge(allf23,m4f23_class, by = "name", all.x = TRUE)
#other attempts just use the highest score in the milestone
allstud <- m4f23 %>%
  group_by(name) %>%
  slice(which.max(score)) %>%
  ungroup()
allstud = allstud[,c("name","score")]
thisOne = classifyStudents(p4f23,allstud,80)
thisOne = thisOne[,-c(2)]
colnames(thisOne)[2] <- "score_m4_all"
colnames(thisOne)[3] <- "practice_attempt_m4_all"
colnames(thisOne)[4] <- "practice_score_m4_all"
colnames(thisOne)[5] <- "by_attempt_m4_all"
colnames(thisOne)[6] <- "by_practice80_m4_all"
allf23 = merge(allf23,thisOne, by = "name", all.x = TRUE)


#substitute the NA from drop students into DROP
bycolumns = grepl("^by",names(allf23))
allf23[,bycolumns] = lapply(
  allf23[,bycolumns],
  function(x) ifelse(is.na(x), "DROP", x)
)

Tables

library(knitr)

generate_category_table <- function(data, column_name,mytitle) {
  # Calculate counts
  table_counts <- table(data[[column_name]])

  # Create a data frame with counts and percentages
  table_df <- data.frame(Category = names(table_counts),
                         Count = as.numeric(table_counts),
                         Percentage = paste(round(prop.table(table_counts) * 100,2),"%"))
  table_df = table_df[, !grepl("Percentage.Var1", names(table_df))]

  # Use kable to create a formatted table
  table_output <- kable(table_df, 
                        caption = mytitle,
                        col.names = c("Category", "Count", "Percentage"),
                        align = c("l", "c", "c"),
                        format = "markdown")
  return(table_output)
}

# Example usage:
# Assuming your dataframe is df and the categorical variable is in column1
#table_string <- generate_category_table(m1f23_class, "by_attempt","Using times practiced to quantify effort")

# Print the table in the R Markdown document
#table_string

#table_string <- generate_category_table(m1f23_class, "by_practice80","Using score in practice to quantify effort")

# Print the table in the R Markdown document
#table_string

plotEffortVsPerformance <- function(df,practice_col,score_col,attempts_col,justAsk_col,mytitle) {
  df = df[,c(practice_col,score_col,attempts_col,justAsk_col),drop = FALSE]
  df = df[complete.cases(df),]
  
  p <- ggplot(df, aes_string(y = practice_col, x = score_col, size = attempts_col)) +
    geom_point(na.rm = TRUE) +
    scale_size_continuous(range = c(1, 5),breaks = c(0, 1, 3, 10)) +  # Adjust point size range as needed
    labs(x = paste("Performance: ",mytitle, " score"), y = "Practice: Max score during practice", title = paste(mytitle, ": Effort vs Performance")) +
    theme_minimal() +
    scale_x_continuous(breaks = seq(0, 100, by = 10)) +
    scale_y_continuous(breaks = seq(0, 100, by = 10)) 
  
  q <- ggplot(df, aes_string(y = practice_col, x = score_col)) +
    geom_point(na.rm = TRUE) +
    scale_size_continuous(range = c(1, 5)) +  # Adjust point size range as needed
    labs(x = paste("Performance: ",mytitle, " score"), y = "Practice: Max score during practice", title = paste(mytitle, ": Effort vs Performance")) +
    theme_minimal() +
    scale_x_continuous(breaks = seq(0, 100, by = 10)) +
    scale_y_continuous(breaks = seq(0, 100, by = 10)) 
  
  r <- ggplot(df, aes_string(y = practice_col, x = score_col, size = attempts_col, color = justAsk_col)) +
    geom_point(na.rm = TRUE) +
    scale_size_continuous(range = c(1, 5),breaks = c(0, 1, 3, 10), name = "Attempts") +  # Adjust point size range as needed
    scale_color_manual(values = c("Often" = "green", "Seldom" = "orange", "Never" = "black"),
                       name = "Just Ask",
                     labels = c("Often" = "Often (green)", "Seldom" = "Seldom (orange)", "Never" = "Never (black)")) +
    labs(x = paste("Performance: ",mytitle, " score"), y = "Practice: Max score during practice", title = paste(mytitle, ": Effort vs Performance")) +
    theme_minimal() +
    #scale_x_continuous(breaks = pretty(range(df[[practice_col]]), n = 10)) +
    #scale_y_continuous(breaks = pretty(range(df[[score_col]]), n = 10))
    scale_x_continuous(breaks = seq(0, 100, by = 10)) +
    scale_y_continuous(breaks = seq(0, 100, by = 10)) 
  
  # Print the plot
  print(p)
  print(q)
  print(r)
}

3.1 Milestone 1

Distribution table M1 - 1st attempt and at the end

makeHist(allf23,"score_m1","Milestone 1 - 1st attempt")

table_string <- generate_category_table(allf23, "by_practice80_m1","Milestone 1 - attempt 1")
table_string

Milestone 1 - attempt 1
Category	Count	Percentage
HEAP	80	49.08 %
HELP	14	8.59 %
LEAP	27	16.56 %
WELP	42	25.77 %

table_string <- generate_category_table(allf23, "by_practice80_m1_all","Milestone 1 - all attempt")
table_string

Milestone 1 - all attempt
Category	Count	Percentage
HEAP	107	65.64 %
HELP	4	2.45 %
LEAP	35	21.47 %
WELP	17	10.43 %

result = calculate_transition(allf23,"by_practice80_m1","by_practice80_m1_all")
kable(result, format = "markdown", caption = "M1: Transitions from 1st attempt to final")

M1: Transitions from 1st attempt to final
Transition	Count	Percentage
WELP to HELP	3	1.840491
WELP to HEAP	10	6.134969
HELP to HEAP	13	7.975460

result = calculate_transition4(allf23,"by_practice80_m1","by_practice80_m1_all",categories)
kable(result, format = "markdown", caption = "M1 1st att vs M1 all ")

M1 1st att vs M1 all
	HELP	WELP	HEAP	LEAP	Total
HELP	1	0	13	0	14
WELP	3	17	10	12	42
HEAP	0	0	80	0	80
LEAP	0	0	4	23	27
DROP	0	0	0	0	0
Total	4	17	107	35	163

Scatter Plots

#plotEffortVsPerformance(m1f23_class,"practice_score","score.x","practice_attempt","justAsk1")
plotEffortVsPerformance(allf23,"practice_score_m1","score_m1","practice_attempt_m1","justAsk1","Milestone 1 - 1st attempt")

plotEffortVsPerformance(allf23,"practice_score_m1_all","score_m1_all","practice_attempt_m1_all","justAsk1","Milestone 1 - All attempts")

3.2 Milestone 2

Distribution table M2 - 1st attempt and at the end

makeHist(allf23,"score_m2","Milestone 2 - 1st attempt")

table_string <- generate_category_table(allf23, "by_practice80_m2","Milestone 2 - attempt 1")
table_string

Milestone 2 - attempt 1
Category	Count	Percentage
DROP	5	3.07 %
HEAP	91	55.83 %
HELP	15	9.2 %
LEAP	18	11.04 %
WELP	34	20.86 %

table_string <- generate_category_table(allf23, "by_practice80_m2_all","Milestone 2 - all attempt")
table_string

Milestone 2 - all attempt
Category	Count	Percentage
DROP	5	3.07 %
HEAP	110	67.48 %
HELP	12	7.36 %
LEAP	17	10.43 %
WELP	19	11.66 %

result = calculate_transition(allf23,"by_practice80_m2","by_practice80_m2_all")
kable(result, format = "markdown", caption = "M2: Transitions from 1st attempt to final")

M2: Transitions from 1st attempt to final
Transition	Count	Percentage
WELP to HELP	4	2.453988
WELP to HEAP	8	4.907976
HELP to HEAP	7	4.294479

result = calculate_transition4(allf23,"by_practice80_m2","by_practice80_m2_all",categories)
kable(result, format = "markdown", caption = "M2 1st att vs M2 all ")

M2 1st att vs M2 all
	HELP	WELP	HEAP	LEAP	DROP	Total
HELP	8	0	7	0	0	15
WELP	4	19	8	3	0	34
HEAP	0	0	91	0	0	91
LEAP	0	0	4	14	0	18
DROP	0	0	0	0	5	5
Total	12	19	110	17	5	163

result = calculate_transition4(allf23,"by_practice80_m1","by_practice80_m2",categories)
kable(result, format = "markdown", caption = "M1 vs M2: Transitions 1st attempt ")

M1 vs M2: Transitions 1st attempt
	HELP	WELP	HEAP	LEAP	DROP	Total
HELP	3	3	5	1	2	14
WELP	7	26	3	4	2	42
HEAP	1	5	69	5	0	80
LEAP	4	0	14	8	1	27
DROP	0	0	0	0	0	0
Total	15	34	91	18	5	163

result = calculate_transition3(allf23,"by_practice80_m1","by_practice80_m2",categories)
kable(result, format = "markdown", caption = "M1 vs M2: Transitions 1st attempt ")

M1 vs M2: Transitions 1st attempt
Transition	Count	Percentage
HELP to HELP	3	1.84049079754601
HELP to WELP	3	1.84049079754601
HELP to HEAP	5	3.06748466257669
HELP to LEAP	1	0.613496932515337
HELP to DROP	2	1.22699386503067
WELP to HELP	7	4.29447852760736
WELP to WELP	26	15.9509202453988
WELP to HEAP	3	1.84049079754601
WELP to LEAP	4	2.45398773006135
WELP to DROP	2	1.22699386503067
HEAP to HELP	1	0.613496932515337
HEAP to WELP	5	3.06748466257669
HEAP to HEAP	69	42.3312883435583
HEAP to LEAP	5	3.06748466257669
HEAP to DROP	0	0
LEAP to HELP	4	2.45398773006135
LEAP to WELP	0	0
LEAP to HEAP	14	8.58895705521472
LEAP to LEAP	8	4.9079754601227
LEAP to DROP	1	0.613496932515337
DROP to HELP	0	0
DROP to WELP	0	0
DROP to HEAP	0	0
DROP to LEAP	0	0
DROP to DROP	0	0

result = calculate_transition4(allf23,"by_practice80_m1_all","by_practice80_m2_all",categories)
kable(result, format = "markdown", caption = "M1 vs M2: Transitions all attempt ")

M1 vs M2: Transitions all attempt
	HELP	WELP	HEAP	LEAP	DROP	Total
HELP	1	1	0	0	2	4
WELP	4	10	2	1	0	17
HEAP	3	4	91	8	1	107
LEAP	4	4	17	8	2	35
DROP	0	0	0	0	0	0
Total	12	19	110	17	5	163

Scatter Plots

#plotEffortVsPerformance(m1f23_class,"practice_score","score.x","practice_attempt","justAsk1")
plotEffortVsPerformance(allf23,"practice_score_m2","score_m2","practice_attempt_m2","justAsk1","Milestone 2 - 1st attempt")

plotEffortVsPerformance(allf23,"practice_score_m2_all","score_m2_all","practice_attempt_m2_all","justAsk1","Milestone 2 - All attempts")

3.3 Milestone 3

Distribution table M3 - 1st attempt and at the end

makeHist(allf23,"score_m3","Milestone 3 - 1st attempt")

table_string <- generate_category_table(allf23, "by_practice80_m3","Milestone 3 - attempt 1")
table_string

Milestone 3 - attempt 1
Category	Count	Percentage
DROP	12	7.36 %
HEAP	77	47.24 %
HELP	11	6.75 %
LEAP	27	16.56 %
WELP	36	22.09 %

table_string <- generate_category_table(allf23, "by_practice80_m3_all","Milestone 3 - all attempt")
table_string

Milestone 3 - all attempt
Category	Count	Percentage
DROP	12	7.36 %
HEAP	101	61.96 %
HELP	8	4.91 %
LEAP	26	15.95 %
WELP	16	9.82 %

result = calculate_transition4(allf23,"by_practice80_m3","by_practice80_m3_all",categories)
kable(result, format = "markdown", caption = "M3 1st vs M3 all attempts ")

M3 1st vs M3 all attempts
	HELP	WELP	HEAP	LEAP	DROP	Total
HELP	3	0	8	0	0	11
WELP	5	16	10	5	0	36
HEAP	0	0	77	0	0	77
LEAP	0	0	6	21	0	27
DROP	0	0	0	0	12	12
Total	8	16	101	26	12	163

result = calculate_transition4(allf23,"by_practice80_m1","by_practice80_m3",categories)
kable(result, format = "markdown", caption = "M1 1st vs M3 1st ")

M1 1st vs M3 1st
	HELP	WELP	HEAP	LEAP	DROP	Total
HELP	2	4	5	1	2	14
WELP	3	25	1	5	8	42
HEAP	5	5	57	12	1	80
LEAP	1	2	14	9	1	27
DROP	0	0	0	0	0	0
Total	11	36	77	27	12	163

result = calculate_transition4(allf23,"by_practice80_m1_all","by_practice80_m3_all",categories)
kable(result, format = "markdown", caption = "M1 vs M3 all attempts ")

M1 vs M3 all attempts
	HELP	WELP	HEAP	LEAP	DROP	Total
HELP	1	0	1	0	2	4
WELP	2	7	1	3	4	17
HEAP	1	7	86	10	3	107
LEAP	4	2	13	13	3	35
DROP	0	0	0	0	0	0
Total	8	16	101	26	12	163

result = calculate_transition4(allf23,"by_practice80_m2_all","by_practice80_m3_all",categories)
kable(result, format = "markdown", caption = "M2 vs M3 all attempts ")

M2 vs M3 all attempts
	HELP	WELP	HEAP	LEAP	DROP	Total
HELP	3	4	5	0	0	12
WELP	4	5	2	2	6	19
HEAP	1	3	89	17	0	110
LEAP	0	4	5	7	1	17
DROP	0	0	0	0	5	5
Total	8	16	101	26	12	163

Scatter Plots

#plotEffortVsPerformance(m1f23_class,"practice_score","score.x","practice_attempt","justAsk1")
plotEffortVsPerformance(allf23,"practice_score_m3","score_m3","practice_attempt_m3","justAsk2","Milestone 3 - 1st attempt")

plotEffortVsPerformance(allf23,"practice_score_m3_all","score_m3_all","practice_attempt_m3_all","justAsk2","Milestone 3 - All attempts")

3.4 Milestone 4

Distribution table M4 - 1st attempt and at the end

makeHist(allf23,"score_m4","Milestone 4 - 1st attempt")

table_string <- generate_category_table(allf23, "by_practice80_m4","Milestone 4 - attempt 1")
table_string

Milestone 4 - attempt 1
Category	Count	Percentage
DROP	15	9.2 %
HEAP	72	44.17 %
HELP	14	8.59 %
LEAP	18	11.04 %
WELP	44	26.99 %

table_string <- generate_category_table(allf23, "by_practice80_m4_all","Milestone 4 - all attempt")
table_string

Milestone 4 - all attempt
Category	Count	Percentage
DROP	15	9.2 %
HEAP	91	55.83 %
HELP	12	7.36 %
LEAP	25	15.34 %
WELP	20	12.27 %

result = calculate_transition4(allf23,"by_practice80_m4","by_practice80_m4_all",categories)
kable(result, format = "markdown", caption = "M4 1st vs M4 all attempts ")

M4 1st vs M4 all attempts
	HELP	WELP	HEAP	LEAP	DROP	Total
HELP	4	0	10	0	0	14
WELP	8	20	5	11	0	44
HEAP	0	0	72	0	0	72
LEAP	0	0	4	14	0	18
DROP	0	0	0	0	15	15
Total	12	20	91	25	15	163

result = calculate_transition4(allf23,"by_practice80_m1_all","by_practice80_m4_all",categories)
kable(result, format = "markdown", caption = "M1 vs M4 all attempts ")

M1 vs M4 all attempts
	HELP	WELP	HEAP	LEAP	DROP	Total
HELP	0	1	0	1	2	4
WELP	0	7	0	3	7	17
HEAP	4	8	81	11	3	107
LEAP	8	4	10	10	3	35
DROP	0	0	0	0	0	0
Total	12	20	91	25	15	163

result = calculate_transition4(allf23,"by_practice80_m2_all","by_practice80_m4_all",categories)
kable(result, format = "markdown", caption = "M2 vs M4 all attempts ")

M2 vs M4 all attempts
	HELP	WELP	HEAP	LEAP	DROP	Total
HELP	2	5	1	3	1	12
WELP	1	7	1	2	8	19
HEAP	7	6	83	14	0	110
LEAP	2	2	6	6	1	17
DROP	0	0	0	0	5	5
Total	12	20	91	25	15	163

result = calculate_transition4(allf23,"by_practice80_m3_all","by_practice80_m4_all",categories)
kable(result, format = "markdown", caption = "M3 vs M4 all attempts ")

M3 vs M4 all attempts
	HELP	WELP	HEAP	LEAP	DROP	Total
HELP	3	5	0	0	0	8
WELP	3	8	1	1	3	16
HEAP	6	3	78	14	0	101
LEAP	0	4	12	10	0	26
DROP	0	0	0	0	12	12
Total	12	20	91	25	15	163

Scatter Plots

#plotEffortVsPerformance(m1f23_class,"practice_score","score.x","practice_attempt","justAsk1")
plotEffortVsPerformance(allf23,"practice_score_m4","score_m4","practice_attempt_m4","justAsk2","Milestone 4 - 1st attempt")

plotEffortVsPerformance(allf23,"practice_score_m4_all","score_m4_all","practice_attempt_m4_all","justAsk2","Milestone 4 - All attempts")

How much does it change if we use <70 as the cutting for high and low performance?

3.5 All together: Flow Sankey diagrams

library(ggplot2)
library(ggalluvial)
library(scales)

#converting alldf3 into a format that alluvial can understand
makeitAlluvial2 = function(df,colexam1,colexam2,name,mytitle){
  melted_df = df[,c(name,colexam1,colexam2)]
  melted_df = reshape(melted_df,
                      varying = c(colexam1,colexam2),
                      v.names = "group",
                      timevar = "exam",
                      times = c(colexam1,colexam2),
                      direction = "long"
                      )
  rownames(melted_df) <- NULL
  ggplot(melted_df,
         aes(x = exam, stratum = group, alluvium = id, fill = group)) +
    scale_x_discrete(expand = c(.1, .1)) +
    geom_flow() +
    geom_stratum(alpha = .5) +
    theme_minimal() +
    geom_text(stat = "stratum",
              aes(label = percent(after_stat(prop), accuracy = .1)))+
    labs(title = mytitle, x = "", y = "Number of students")

}
makeitAlluvial2(allf23,"by_practice80_m1","by_practice80_m1_all","name","Milestone 1: 1st vs final attempt")

makeitAlluvial2(allf23,"by_practice80_m1","by_practice80_m3","name","Milestone 1 vs 3: 1st attempt")

makeitAlluvial2(allf23,"by_practice80_m1_all","by_practice80_m3_all","name","Milestone 1 vs 3: final attempt")

#makeitAlluvial2(allf23,"by_practice80_m1","grade_category","name","Milestone 1 1st attempt vs final letter grade")

3.6 Looking at their final course grade

finalgrades = read.csv("~/Teaching/Grades_and_SRT/Fall2023/chem1331_f23_grades.csv", header = TRUE)
allf23 = merge(allf23,finalgrades, by = "name", all.x = TRUE)
categorize_grades <- function(grade) {
  if (is.na(grade)) {
    return("W")
  } else if (startsWith(grade, "A")) {
    return("A")
  } else if (startsWith(grade, "B")) {
    return("B")
  } else if (startsWith(grade, "C")) {
    return("C")
  } else if (startsWith(grade, "D")) {
    return("D")
  } else if (startsWith(grade, "F")) {
    return("F")
  } else if (startsWith(grade, "W")) {
    return("W")
  } else {
    return("Other")
  }
}
allf23$grade_category <- sapply(allf23$finalLetter, categorize_grades)

# Calculate the fraction of rows for each category
grade_counts <- table(allf23$grade_category)
grade_fraction <- prop.table(grade_counts)

# Create a data frame for the pie chart
pie_data <- data.frame(grade_category = names(grade_fraction), fraction = grade_fraction)

# Plot the pie chart
ggplot(pie_data, aes(x = "", y = grade_fraction, fill = grade_category)) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar("y", start = 0) +
  labs(title = "Fraction of Rows by Letter Grade Category") +
  theme_void() +
  theme(legend.position = "right")

pie_data <- data.frame(grade_category = names(grade_fraction), grade_fraction = grade_fraction)

# Define colors for each category
grade_colors <- c("A" = "blue", "B" = "green", "C" = "yellow", "D" = "orange", "F" = "red", "W" = "gray")

# Plot the pie chart with custom colors
ggplot(pie_data, aes(x = "", y = grade_fraction, fill = grade_category)) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar("y", start = 0) +
  labs(title = "Fraction of Rows by Letter Grade Category") +
  theme_void() +
  theme(legend.position = "right") +
  scale_fill_manual(values = grade_colors)

generate_col1_percentage_table <- function(df, gradeLetter_col, col1_col,myTitle) {
  # Calculate percentages
  #df = allf23
  #gradeLetter_col = "grade_category"
  #col1_col = "by_practice80_m1"
  result <- df %>%
    group_by(!!sym(gradeLetter_col), !!sym(col1_col)) %>%
    summarize(percentage = n() / nrow(df) * 100) %>%
    ungroup()
  
  # Pivot the data for easier viewing
  result_pivot <- tidyr::pivot_wider(result, names_from = !!sym(col1_col), values_from = percentage, values_fill = 0)
  #result_pivot <- round(result_pivot, 1)
  round_indices = 2:ncol(result_pivot)
  result_pivot[, round_indices] <- round(result_pivot[, round_indices], 1)
  #result_pivot[, round_indices] <- paste0(result_pivot[, round_indices], "%")
  
  result_pivot[, round_indices] <- lapply(result_pivot[, round_indices], function(x) paste0(x, "%"))
  
  # Format the table and return
  return(kable(result_pivot, format = "markdown", caption = myTitle))
}

col1_percentage_table <- generate_col1_percentage_table(allf23,  "grade_category", "by_practice80_m1","Milestone 1 - 1st attempt")
col1_percentage_table

Milestone 1 - 1st attempt
grade_category	HEAP	HELP	LEAP	WELP
A	31.3%	0.6%	11.7%	0%
B	16%	4.9%	3.1%	5.5%
C	1.2%	1.8%	1.2%	11%
D	0%	0%	0%	0.6%
F	0%	0%	0%	1.2%
W	0.6%	1.2%	0.6%	7.4%

col1_percentage_table <- generate_col1_percentage_table(allf23, gradeLetter_col = "grade_category", col1_col ="by_practice80_m1_all","Milestone 1 - all attempts")
col1_percentage_table

Milestone 1 - all attempts
grade_category	HEAP	LEAP	WELP	HELP
A	34.4%	9.2%	0%	0%
B	22.7%	6.1%	0.6%	0%
C	6.7%	4.3%	3.7%	0.6%
D	0%	0%	0.6%	0%
F	0%	0%	0.6%	0.6%
W	1.8%	1.8%	4.9%	1.2%

col1_percentage_table <- generate_col1_percentage_table(allf23, gradeLetter_col = "grade_category", col1_col ="by_practice80_m3","Milestone 3 - 1st attempt")
col1_percentage_table

Milestone 3 - 1st attempt
grade_category	HEAP	LEAP	WELP	HELP	DROP
A	34.4%	8%	1.2%	0%	0%
B	12.9%	6.7%	6.7%	3.1%	0%
C	0%	1.2%	11%	3.1%	0%
D	0%	0%	0.6%	0%	0%
F	0%	0.6%	0%	0.6%	0%
W	0%	0%	2.5%	0%	7.4%

col1_percentage_table <- generate_col1_percentage_table(allf23, gradeLetter_col = "grade_category", col1_col ="by_practice80_m3_all","Milestone 3 - all attempts")
col1_percentage_table

Milestone 3 - all attempts
grade_category	HEAP	LEAP	HELP	WELP	DROP
A	36.8%	6.7%	0%	0%	0%
B	20.2%	6.7%	0.6%	1.8%	0%
C	4.9%	1.8%	3.1%	5.5%	0%
D	0%	0%	0.6%	0%	0%
F	0%	0.6%	0.6%	0%	0%
W	0%	0%	0%	2.5%	7.4%

4 Video Watching to Measure Effort

Measuring and Classifying Effort and Performance

Xavier Prat-Resina

10-3-2023

1 Introduction and preamble

2 Helping students to “UMR”

3 Practice Optional Quiz to Measure Effort

3.1 Milestone 1

3.2 Milestone 2

3.3 Milestone 3

3.4 Milestone 4

3.5 All together: Flow Sankey diagrams

3.6 Looking at their final course grade

4 Video Watching to Measure Effort