if (Sys.info()["sysname"] == "Windows"){
}else{
  m1f23 =   read.csv("~/Teaching/Grades_and_SRT/Fall2023/m1.csv",header = TRUE)
  m2f23 =   read.csv("~/Teaching/Grades_and_SRT/Fall2023/m2.csv",header = TRUE)
  m3f23 =   read.csv("~/Teaching/Grades_and_SRT/Fall2023/m3.csv",header = TRUE)
  m4f23 =   read.csv("~/Teaching/Grades_and_SRT/Fall2023/m4.csv",header = TRUE)
  
  p1f23 =   read.csv("~/Teaching/Grades_and_SRT/Fall2023/m1_practice.csv",header = TRUE)
  p2f23 =   read.csv("~/Teaching/Grades_and_SRT/Fall2023/m2_practice.csv",header = TRUE)
  p3f23 =   read.csv("~/Teaching/Grades_and_SRT/Fall2023/m3_practice.csv",header = TRUE)
  p4f23 =   read.csv("~/Teaching/Grades_and_SRT/Fall2023/m4_practice.csv",header = TRUE)
  
  justAsk =   read.csv("~/Teaching/Grades_and_SRT/Fall2023/justAsk_f23.csv",header = TRUE)
  justAsk$name <- paste(justAsk$First, justAsk$Last, sep = " ")
  justAsk = justAsk[,c(6,4,5)]
  justAsk$justAsk1 = ifelse( 
    grepl("2|3",justAsk$X1st.half) ,"Often",
                             ifelse( grepl("1",justAsk$X1st.half ), "Seldom", "Never")
  )
  justAsk$justAsk2 = ifelse( 
    grepl("2|3",justAsk$X2nd.half) ,"Often",
                             ifelse( grepl("1",justAsk$X2nd.half ), "Seldom", "Never")
  )


}
library(lubridate)
filterDate = function(df,date_column,thisDate){
  # Assuming df is your DataFrame with a column named date_column checking all rows completed BEFORE thisDate
  # Convert the date_column to a POSIXct object
  df <- df %>%
    mutate(date_column = as.POSIXct(date_column, format = "%Y-%m-%d %H:%M:%S", tz = "UTC"))

  # Format the date_column to "YYYY-MM-DD" format
  df <- df %>%
    mutate(date_column = format(date_column, format = "%Y-%m-%d"))
  df <- df %>%
  filter(submitted <= as.POSIXct(thisDate, tz = "UTC"))
  return(df)
}
library(dplyr)
classifyStudents = function(df_practice,df_score,threshold){
  #name is the name of students
  grouped_df <- df_practice %>% group_by(name)
  summary_df <- grouped_df %>%
    summarize(
      practice_attempt = max(attempt),
      practice_score = max(score)
    ) %>%
    ungroup()  # Remove the grouping

  final_summary_df <- left_join(df_score, summary_df, by = "name") %>%
    mutate_at(vars(practice_attempt, practice_score), ~ifelse(is.na(.), 0, .))

  final = merge(df_score,final_summary_df, by="name", all.x = TRUE)

  final <- final %>%
    mutate(by_attempt = case_when(
      #not practicing
      practice_attempt == 0 & score.x > threshold ~ "LEAP",
      practice_attempt == 0 & score.x <= threshold ~ "WELP",
      #practicing
      practice_attempt > 0 & score.x >= threshold ~ "HEAP",
      practice_attempt > 0 & score.x < threshold ~ "HELP",
      TRUE ~ "Unknown"  # Handle any other cases (optional)
    ))
   
  final <- final %>%
    mutate(by_practice80 = case_when(
      #not practicing
      practice_score < threshold & score.x >= threshold ~ "LEAP",
      practice_score < threshold & score.x < threshold ~ "WELP",
      #practicing
      practice_score >= threshold & score.x >= threshold ~ "HEAP",
      practice_score >= threshold & score.x < threshold ~ "HELP",
      TRUE ~ "Unknown"  # Handle any other cases (optional)
    ))
  return(final)
  
}
calculate_transition <- function(df, col1_name, col2_name) {
  # Count of transitions
  #help_to_welp <- sum(df[[col1_name]] == 'HELP' & df[[col2_name]] == 'WELP')
  welp_to_help <- sum(df[[col1_name]] == 'WELP' & df[[col2_name]] == 'HELP')
  welp_to_heap <- sum(df[[col1_name]] == 'WELP' & df[[col2_name]] == 'HEAP')
  help_to_heap <- sum(df[[col1_name]] == 'HELP' & df[[col2_name]] == 'HEAP')
  
  # Total count of rows
  total_rows <- nrow(df)
  
  # Calculating percentages
  #help_to_welp_percent <- (help_to_welp / total_rows) * 100
  welp_to_help_percent <- (welp_to_help / total_rows) * 100
  welp_to_heap_percent <- (welp_to_heap / total_rows) * 100
  help_to_heap_percent <- (help_to_heap / total_rows) * 100
  
  # Create DataFrame of results
  result_df <- data.frame(
    Transition = c("WELP to HELP", "WELP to HEAP", "HELP to HEAP"),
    Count = c(welp_to_help, welp_to_heap, help_to_heap),
    Percentage = c(welp_to_help_percent, welp_to_heap_percent, help_to_heap_percent)
  )
  
  return(result_df)
}
calculate_transition2 <- function(df, col1_name, col2_name, categories) {
  # Initialize transition counts
  transition_counts <- matrix(0, nrow = length(categories), ncol = length(categories))
  colnames(transition_counts) <- categories
  rownames(transition_counts) <- categories
  
  # Calculate transition counts
  for (i in 1:length(categories)) {
    for (j in 1:length(categories)) {
      transition_counts[i, j] <- sum(df[[col1_name]] == categories[i] & df[[col2_name]] == categories[j])
    }
  }
  
  # Total count of rows
  total_rows <- nrow(df)
  
  # Calculate transition percentages
  transition_percentages <- (transition_counts / total_rows) * 100
  
  # Create DataFrame of results
  result_df <- as.data.frame(transition_counts)
  result_df$Transition <- rownames(transition_counts)
  rownames(result_df) <- NULL
  result_df <- result_df[, c(ncol(result_df), 1:(ncol(result_df)-1))]
  colnames(result_df)[1] <- "Transition"
  #result_df$Percentage <- transition_percentages[lower.tri(transition_percentages, diag = TRUE)]
  #result_df$Percentage <- round(transition_percentages,digits=2)
  
  return(result_df)
}

categories <- c("HELP", "WELP", "HEAP", "LEAP","DROP")
calculate_transition3 <- function(df, col1_name, col2_name, categories) {
  # Initialize transition counts
  transition_counts <- matrix(0, nrow = length(categories), ncol = length(categories))
  colnames(transition_counts) <- categories
  rownames(transition_counts) <- categories
  
  # Calculate transition counts
  for (i in 1:length(categories)) {
    for (j in 1:length(categories)) {
      transition_counts[i, j] <- sum(df[[col1_name]] == categories[i] & df[[col2_name]] == categories[j])
    }
  }
  
  # Total count of rows
  total_rows <- nrow(df)
  
  # Calculate transition percentages
  transition_percentages <- (transition_counts / total_rows) * 100
  
  # Initialize a list to store results
  result_list <- list()
  
  # Loop through the transition counts and store them in the list
  for (i in 1:length(categories)) {
    for (j in 1:length(categories)) {
      transition <- paste(categories[i], "to", categories[j])
      result_list[[length(result_list) + 1]] <- c(Transition = transition, Count = transition_counts[i, j], Percentage = transition_percentages[i, j])
    }
  }
  
  # Create DataFrame of results
  result_df <- do.call(rbind, result_list)
  
  return(result_df)
}
calculate_transition4 <- function(df, col1_name, col2_name, categories) {
  # Initialize transition counts
  transition_counts <- matrix(0, nrow = length(categories) + 1, ncol = length(categories) + 1)
  colnames(transition_counts) <- c(categories, "Total")
  rownames(transition_counts) <- c(categories, "Total")
  
  # Calculate transition counts
  for (i in 1:length(categories)) {
    for (j in 1:length(categories)) {
      transition_counts[i, j] <- sum(df[[col1_name]] == categories[i] & df[[col2_name]] == categories[j])
    }
  }
  
  # Calculate row sums
  transition_counts[, "Total"] <- rowSums(transition_counts)
  
  # Calculate column sums
  transition_counts["Total", ] <- colSums(transition_counts)
  
  return(transition_counts)
}

makeHist = function(df,column_name,mytitle){
  ggplot(df, aes(x = !!sym(column_name))) +
    geom_histogram(fill = "skyblue", color = "black") +
    labs(title = mytitle, x = "score / 100", y = "Number of students")

}

1 Introduction and preamble

  • We often talk about students either as one monolithic group or on the other extreme, an each student is world case.
  • We must be able to talk about students performance in a way that we can easily convey to colleagues and institutions a sense of performance
  • We define the efficiency as the ratio between performance and

\[ Efficiency = \frac{performance}{effort} \]

plot(1, 1, xlim = c(0, 3), ylim = c(0, 3), type = "n", xlab = "Performance", ylab = "Effort", axes = FALSE,  xaxt = "n", yaxt = "n" )
text(2, 1, "High Efficiency")
text(1, 2, "Low Efficiency")
lines(c(0, 3), c(1.5, 1.5), lty = 2)
arrows(0, 0, 3.1, 0, length = 0.1)
arrows(0, 0, 0, 3.1, length = 0.1)

  • While performance can be easily identified as the score, the quantification of effort is more problematic to identify and quantify
  • Here we propose three different ways to quantify effort: video watching, quiz preparation, justAsk attendance
  • Regardless of how to quantify effort, we can always identify four quadrants
plot(1, 1, xlim = c(0, 3), ylim = c(0, 3), type = "n", xlab = "Performance", ylab = "Effort", axes = FALSE,  xaxt = "n", yaxt = "n" )
text(1, 1, "WELP")
text(1, 0.8, "Withdrawn Effort-Low Performance", cex = 0.5)
text(1, 2, "HELP")
text(1, 1.8, "High Effort-Low Performance", cex = 0.5)
text(2, 1, "LEAP")
text(2, 0.8, "Low Effort-Acceptable Performance", cex = 0.5)
text(2, 2, "HEAP")
text(2, 1.8, "High Effort-Acceptable Performance", cex = 0.5)
lines(c(1.5, 1.5), c(0, 3), lty = 2)
lines(c(0, 3), c(1.5, 1.5), lty = 2)
arrows(0, 0, 3.1, 0, length = 0.1)
arrows(0, 0, 0, 3.1, length = 0.1)

2 Helping students to “UMR”

3 Practice Optional Quiz to Measure Effort

  • HEAP: passed milestone with 100 and practiced with a passing score in the practice
  • LEAP: passed the milestone with 100 without practicing (didn’t open the practice or tried just once)
  • HELP: did not pass the milestone (<80) but practiced at least more than once
  • WELP: did not pass the milestone (<80) and did not open the practice or just once
#filter practice until the day of first attempt
#m1-at1 in f23 was delivered between oct 2nd and oct3rd of 2023
p1f23_f = filterDate(p1f23,"submitted","2023-10-03 23:59:59 UTC")
allstud = m1f23[which(m1f23$attempt == 1),]
allstud = allstud[,c("name","score")]

#build a df stating each students: times they tried, high score in practice, and score in milestone
m1f23_class = classifyStudents(p1f23_f,allstud,80)
#merge it with justAsk
m1f23_class = merge(m1f23_class, justAsk, by = "name", all.x = TRUE)

#BUILDING HERE THE MASSIVE DF allf23

allf23 = m1f23_class[,-c(2)]
colnames(allf23)[2] <- "score_m1"
colnames(allf23)[3] <- "practice_attempt_m1"
colnames(allf23)[4] <- "practice_score_m1"
colnames(allf23)[5] <- "by_attempt_m1"
colnames(allf23)[6] <- "by_practice80_m1"
#other attempts just use the highest score in the milestone
allstud <- m1f23 %>%
  group_by(name) %>%
  slice(which.max(score)) %>%
  ungroup()
allstud = allstud[,c("name","score")]
thisOne = classifyStudents(p1f23,allstud,80)
thisOne = thisOne[,-c(2)]
colnames(thisOne)[2] <- "score_m1_all"
colnames(thisOne)[3] <- "practice_attempt_m1_all"
colnames(thisOne)[4] <- "practice_score_m1_all"
colnames(thisOne)[5] <- "by_attempt_m1_all"
colnames(thisOne)[6] <- "by_practice80_m1_all"
allf23 = merge(allf23,thisOne, by = "name", all.x = TRUE)

#M2
#M2 was done by Thursday Nov 2nd 
# p2f23 score was 
p2f23$score = p2f23$score/12*100
p2f23_f = filterDate(p2f23,"submitted","2023-11-02 23:59:59 UTC")
allstud = m2f23[which(m2f23$attempt == 1),]
allstud = allstud[,c("name","score")]
m2f23_class = classifyStudents(p2f23_f,allstud,80)
m2f23_class = m2f23_class[,-c(2)]
colnames(m2f23_class)[2] <- "score_m2"
colnames(m2f23_class)[3] <- "practice_attempt_m2"
colnames(m2f23_class)[4] <- "practice_score_m2"
colnames(m2f23_class)[5] <- "by_attempt_m2"
colnames(m2f23_class)[6] <- "by_practice80_m2"

allf23 = merge(allf23,m2f23_class, by = "name", all.x = TRUE)
#other attempts just use the highest score in the milestone
allstud <- m2f23 %>%
  group_by(name) %>%
  slice(which.max(score)) %>%
  ungroup()
allstud = allstud[,c("name","score")]
thisOne = classifyStudents(p2f23,allstud,80)
thisOne = thisOne[,-c(2)]
colnames(thisOne)[2] <- "score_m2_all"
colnames(thisOne)[3] <- "practice_attempt_m2_all"
colnames(thisOne)[4] <- "practice_score_m2_all"
colnames(thisOne)[5] <- "by_attempt_m2_all"
colnames(thisOne)[6] <- "by_practice80_m2_all"
allf23 = merge(allf23,thisOne, by = "name", all.x = TRUE)

#M3
# by the end of Thu Nov 30th everyone should have attempted it
p3f23$score = p3f23$score/12*100
p3f23_f = filterDate(p3f23,"submitted","2023-11-30 23:59:59 UTC")
allstud = m3f23[which(m3f23$attempt == 1),]
allstud = allstud[,c("name","score")]
m3f23_class = classifyStudents(p3f23_f,allstud,80)
m3f23_class = m3f23_class[,-c(2)]
colnames(m3f23_class)[2] <- "score_m3"
colnames(m3f23_class)[3] <- "practice_attempt_m3"
colnames(m3f23_class)[4] <- "practice_score_m3"
colnames(m3f23_class)[5] <- "by_attempt_m3"
colnames(m3f23_class)[6] <- "by_practice80_m3"
allf23 = merge(allf23,m3f23_class, by = "name", all.x = TRUE)
#other attempts just use the highest score in the milestone
allstud <- m3f23 %>%
  group_by(name) %>%
  slice(which.max(score)) %>%
  ungroup()
allstud = allstud[,c("name","score")]
thisOne = classifyStudents(p3f23,allstud,80)
thisOne = thisOne[,-c(2)]
colnames(thisOne)[2] <- "score_m3_all"
colnames(thisOne)[3] <- "practice_attempt_m3_all"
colnames(thisOne)[4] <- "practice_score_m3_all"
colnames(thisOne)[5] <- "by_attempt_m3_all"
colnames(thisOne)[6] <- "by_practice80_m3_all"
allf23 = merge(allf23,thisOne, by = "name", all.x = TRUE)

#M4
# by the end of Dec 12th everyone should have attempted it
p4f23$score = p4f23$score/12*100
p4f23_f = filterDate(p4f23,"submitted","2023-12-12 23:59:59 UTC")
allstud = m4f23[which(m4f23$attempt == 1),]
allstud = allstud[,c("name","score")]
m4f23_class = classifyStudents(p4f23_f,allstud,80)
m4f23_class = m4f23_class[,-c(2)]
colnames(m4f23_class)[2] <- "score_m4"
colnames(m4f23_class)[3] <- "practice_attempt_m4"
colnames(m4f23_class)[4] <- "practice_score_m4"
colnames(m4f23_class)[5] <- "by_attempt_m4"
colnames(m4f23_class)[6] <- "by_practice80_m4"
allf23 = merge(allf23,m4f23_class, by = "name", all.x = TRUE)
#other attempts just use the highest score in the milestone
allstud <- m4f23 %>%
  group_by(name) %>%
  slice(which.max(score)) %>%
  ungroup()
allstud = allstud[,c("name","score")]
thisOne = classifyStudents(p4f23,allstud,80)
thisOne = thisOne[,-c(2)]
colnames(thisOne)[2] <- "score_m4_all"
colnames(thisOne)[3] <- "practice_attempt_m4_all"
colnames(thisOne)[4] <- "practice_score_m4_all"
colnames(thisOne)[5] <- "by_attempt_m4_all"
colnames(thisOne)[6] <- "by_practice80_m4_all"
allf23 = merge(allf23,thisOne, by = "name", all.x = TRUE)


#substitute the NA from drop students into DROP
bycolumns = grepl("^by",names(allf23))
allf23[,bycolumns] = lapply(
  allf23[,bycolumns],
  function(x) ifelse(is.na(x), "DROP", x)
)

Tables

library(knitr)

generate_category_table <- function(data, column_name,mytitle) {
  # Calculate counts
  table_counts <- table(data[[column_name]])

  # Create a data frame with counts and percentages
  table_df <- data.frame(Category = names(table_counts),
                         Count = as.numeric(table_counts),
                         Percentage = paste(round(prop.table(table_counts) * 100,2),"%"))
  table_df = table_df[, !grepl("Percentage.Var1", names(table_df))]

  # Use kable to create a formatted table
  table_output <- kable(table_df, 
                        caption = mytitle,
                        col.names = c("Category", "Count", "Percentage"),
                        align = c("l", "c", "c"),
                        format = "markdown")
  return(table_output)
}

# Example usage:
# Assuming your dataframe is df and the categorical variable is in column1
#table_string <- generate_category_table(m1f23_class, "by_attempt","Using times practiced to quantify effort")

# Print the table in the R Markdown document
#table_string

#table_string <- generate_category_table(m1f23_class, "by_practice80","Using score in practice to quantify effort")

# Print the table in the R Markdown document
#table_string
plotEffortVsPerformance <- function(df,practice_col,score_col,attempts_col,justAsk_col,mytitle) {
  df = df[,c(practice_col,score_col,attempts_col,justAsk_col),drop = FALSE]
  df = df[complete.cases(df),]
  
  p <- ggplot(df, aes_string(y = practice_col, x = score_col, size = attempts_col)) +
    geom_point(na.rm = TRUE) +
    scale_size_continuous(range = c(1, 5),breaks = c(0, 1, 3, 10)) +  # Adjust point size range as needed
    labs(x = paste("Performance: ",mytitle, " score"), y = "Practice: Max score during practice", title = paste(mytitle, ": Effort vs Performance")) +
    theme_minimal() +
    scale_x_continuous(breaks = seq(0, 100, by = 10)) +
    scale_y_continuous(breaks = seq(0, 100, by = 10)) 
  
  q <- ggplot(df, aes_string(y = practice_col, x = score_col)) +
    geom_point(na.rm = TRUE) +
    scale_size_continuous(range = c(1, 5)) +  # Adjust point size range as needed
    labs(x = paste("Performance: ",mytitle, " score"), y = "Practice: Max score during practice", title = paste(mytitle, ": Effort vs Performance")) +
    theme_minimal() +
    scale_x_continuous(breaks = seq(0, 100, by = 10)) +
    scale_y_continuous(breaks = seq(0, 100, by = 10)) 
  
  r <- ggplot(df, aes_string(y = practice_col, x = score_col, size = attempts_col, color = justAsk_col)) +
    geom_point(na.rm = TRUE) +
    scale_size_continuous(range = c(1, 5),breaks = c(0, 1, 3, 10), name = "Attempts") +  # Adjust point size range as needed
    scale_color_manual(values = c("Often" = "green", "Seldom" = "orange", "Never" = "black"),
                       name = "Just Ask",
                     labels = c("Often" = "Often (green)", "Seldom" = "Seldom (orange)", "Never" = "Never (black)")) +
    labs(x = paste("Performance: ",mytitle, " score"), y = "Practice: Max score during practice", title = paste(mytitle, ": Effort vs Performance")) +
    theme_minimal() +
    #scale_x_continuous(breaks = pretty(range(df[[practice_col]]), n = 10)) +
    #scale_y_continuous(breaks = pretty(range(df[[score_col]]), n = 10))
    scale_x_continuous(breaks = seq(0, 100, by = 10)) +
    scale_y_continuous(breaks = seq(0, 100, by = 10)) 
  
  # Print the plot
  print(p)
  print(q)
  print(r)
}

3.1 Milestone 1

Distribution table M1 - 1st attempt and at the end

makeHist(allf23,"score_m1","Milestone 1 - 1st attempt")

table_string <- generate_category_table(allf23, "by_practice80_m1","Milestone 1 - attempt 1")
table_string
Milestone 1 - attempt 1
Category Count Percentage
HEAP 80 49.08 %
HELP 14 8.59 %
LEAP 27 16.56 %
WELP 42 25.77 %
table_string <- generate_category_table(allf23, "by_practice80_m1_all","Milestone 1 - all attempt")
table_string
Milestone 1 - all attempt
Category Count Percentage
HEAP 107 65.64 %
HELP 4 2.45 %
LEAP 35 21.47 %
WELP 17 10.43 %
result = calculate_transition(allf23,"by_practice80_m1","by_practice80_m1_all")
kable(result, format = "markdown", caption = "M1: Transitions from 1st attempt to final")
M1: Transitions from 1st attempt to final
Transition Count Percentage
WELP to HELP 3 1.840491
WELP to HEAP 10 6.134969
HELP to HEAP 13 7.975460
result = calculate_transition4(allf23,"by_practice80_m1","by_practice80_m1_all",categories)
kable(result, format = "markdown", caption = "M1 1st att vs M1 all ")
M1 1st att vs M1 all
HELP WELP HEAP LEAP DROP Total
HELP 1 0 13 0 0 14
WELP 3 17 10 12 0 42
HEAP 0 0 80 0 0 80
LEAP 0 0 4 23 0 27
DROP 0 0 0 0 0 0
Total 4 17 107 35 0 163

Scatter Plots

#plotEffortVsPerformance(m1f23_class,"practice_score","score.x","practice_attempt","justAsk1")
plotEffortVsPerformance(allf23,"practice_score_m1","score_m1","practice_attempt_m1","justAsk1","Milestone 1 - 1st attempt")

plotEffortVsPerformance(allf23,"practice_score_m1_all","score_m1_all","practice_attempt_m1_all","justAsk1","Milestone 1 - All attempts")

3.2 Milestone 2

Distribution table M2 - 1st attempt and at the end

makeHist(allf23,"score_m2","Milestone 2 - 1st attempt")

table_string <- generate_category_table(allf23, "by_practice80_m2","Milestone 2 - attempt 1")
table_string
Milestone 2 - attempt 1
Category Count Percentage
DROP 5 3.07 %
HEAP 91 55.83 %
HELP 15 9.2 %
LEAP 18 11.04 %
WELP 34 20.86 %
table_string <- generate_category_table(allf23, "by_practice80_m2_all","Milestone 2 - all attempt")
table_string
Milestone 2 - all attempt
Category Count Percentage
DROP 5 3.07 %
HEAP 110 67.48 %
HELP 12 7.36 %
LEAP 17 10.43 %
WELP 19 11.66 %
result = calculate_transition(allf23,"by_practice80_m2","by_practice80_m2_all")
kable(result, format = "markdown", caption = "M2: Transitions from 1st attempt to final")
M2: Transitions from 1st attempt to final
Transition Count Percentage
WELP to HELP 4 2.453988
WELP to HEAP 8 4.907976
HELP to HEAP 7 4.294479
result = calculate_transition4(allf23,"by_practice80_m2","by_practice80_m2_all",categories)
kable(result, format = "markdown", caption = "M2 1st att vs M2 all ")
M2 1st att vs M2 all
HELP WELP HEAP LEAP DROP Total
HELP 8 0 7 0 0 15
WELP 4 19 8 3 0 34
HEAP 0 0 91 0 0 91
LEAP 0 0 4 14 0 18
DROP 0 0 0 0 5 5
Total 12 19 110 17 5 163
result = calculate_transition4(allf23,"by_practice80_m1","by_practice80_m2",categories)
kable(result, format = "markdown", caption = "M1 vs M2: Transitions 1st attempt ")
M1 vs M2: Transitions 1st attempt
HELP WELP HEAP LEAP DROP Total
HELP 3 3 5 1 2 14
WELP 7 26 3 4 2 42
HEAP 1 5 69 5 0 80
LEAP 4 0 14 8 1 27
DROP 0 0 0 0 0 0
Total 15 34 91 18 5 163
result = calculate_transition3(allf23,"by_practice80_m1","by_practice80_m2",categories)
kable(result, format = "markdown", caption = "M1 vs M2: Transitions 1st attempt ")
M1 vs M2: Transitions 1st attempt
Transition Count Percentage
HELP to HELP 3 1.84049079754601
HELP to WELP 3 1.84049079754601
HELP to HEAP 5 3.06748466257669
HELP to LEAP 1 0.613496932515337
HELP to DROP 2 1.22699386503067
WELP to HELP 7 4.29447852760736
WELP to WELP 26 15.9509202453988
WELP to HEAP 3 1.84049079754601
WELP to LEAP 4 2.45398773006135
WELP to DROP 2 1.22699386503067
HEAP to HELP 1 0.613496932515337
HEAP to WELP 5 3.06748466257669
HEAP to HEAP 69 42.3312883435583
HEAP to LEAP 5 3.06748466257669
HEAP to DROP 0 0
LEAP to HELP 4 2.45398773006135
LEAP to WELP 0 0
LEAP to HEAP 14 8.58895705521472
LEAP to LEAP 8 4.9079754601227
LEAP to DROP 1 0.613496932515337
DROP to HELP 0 0
DROP to WELP 0 0
DROP to HEAP 0 0
DROP to LEAP 0 0
DROP to DROP 0 0
result = calculate_transition4(allf23,"by_practice80_m1_all","by_practice80_m2_all",categories)
kable(result, format = "markdown", caption = "M1 vs M2: Transitions all attempt ")
M1 vs M2: Transitions all attempt
HELP WELP HEAP LEAP DROP Total
HELP 1 1 0 0 2 4
WELP 4 10 2 1 0 17
HEAP 3 4 91 8 1 107
LEAP 4 4 17 8 2 35
DROP 0 0 0 0 0 0
Total 12 19 110 17 5 163

Scatter Plots

#plotEffortVsPerformance(m1f23_class,"practice_score","score.x","practice_attempt","justAsk1")
plotEffortVsPerformance(allf23,"practice_score_m2","score_m2","practice_attempt_m2","justAsk1","Milestone 2 - 1st attempt")

plotEffortVsPerformance(allf23,"practice_score_m2_all","score_m2_all","practice_attempt_m2_all","justAsk1","Milestone 2 - All attempts")

3.3 Milestone 3

Distribution table M3 - 1st attempt and at the end

makeHist(allf23,"score_m3","Milestone 3 - 1st attempt")

table_string <- generate_category_table(allf23, "by_practice80_m3","Milestone 3 - attempt 1")
table_string
Milestone 3 - attempt 1
Category Count Percentage
DROP 12 7.36 %
HEAP 77 47.24 %
HELP 11 6.75 %
LEAP 27 16.56 %
WELP 36 22.09 %
table_string <- generate_category_table(allf23, "by_practice80_m3_all","Milestone 3 - all attempt")
table_string
Milestone 3 - all attempt
Category Count Percentage
DROP 12 7.36 %
HEAP 101 61.96 %
HELP 8 4.91 %
LEAP 26 15.95 %
WELP 16 9.82 %
result = calculate_transition4(allf23,"by_practice80_m3","by_practice80_m3_all",categories)
kable(result, format = "markdown", caption = "M3 1st vs M3 all attempts ")
M3 1st vs M3 all attempts
HELP WELP HEAP LEAP DROP Total
HELP 3 0 8 0 0 11
WELP 5 16 10 5 0 36
HEAP 0 0 77 0 0 77
LEAP 0 0 6 21 0 27
DROP 0 0 0 0 12 12
Total 8 16 101 26 12 163
result = calculate_transition4(allf23,"by_practice80_m1","by_practice80_m3",categories)
kable(result, format = "markdown", caption = "M1 1st vs M3 1st ")
M1 1st vs M3 1st
HELP WELP HEAP LEAP DROP Total
HELP 2 4 5 1 2 14
WELP 3 25 1 5 8 42
HEAP 5 5 57 12 1 80
LEAP 1 2 14 9 1 27
DROP 0 0 0 0 0 0
Total 11 36 77 27 12 163
result = calculate_transition4(allf23,"by_practice80_m1_all","by_practice80_m3_all",categories)
kable(result, format = "markdown", caption = "M1 vs M3 all attempts ")
M1 vs M3 all attempts
HELP WELP HEAP LEAP DROP Total
HELP 1 0 1 0 2 4
WELP 2 7 1 3 4 17
HEAP 1 7 86 10 3 107
LEAP 4 2 13 13 3 35
DROP 0 0 0 0 0 0
Total 8 16 101 26 12 163
result = calculate_transition4(allf23,"by_practice80_m2_all","by_practice80_m3_all",categories)
kable(result, format = "markdown", caption = "M2 vs M3 all attempts ")
M2 vs M3 all attempts
HELP WELP HEAP LEAP DROP Total
HELP 3 4 5 0 0 12
WELP 4 5 2 2 6 19
HEAP 1 3 89 17 0 110
LEAP 0 4 5 7 1 17
DROP 0 0 0 0 5 5
Total 8 16 101 26 12 163

Scatter Plots

#plotEffortVsPerformance(m1f23_class,"practice_score","score.x","practice_attempt","justAsk1")
plotEffortVsPerformance(allf23,"practice_score_m3","score_m3","practice_attempt_m3","justAsk2","Milestone 3 - 1st attempt")

plotEffortVsPerformance(allf23,"practice_score_m3_all","score_m3_all","practice_attempt_m3_all","justAsk2","Milestone 3 - All attempts")

3.4 Milestone 4

Distribution table M4 - 1st attempt and at the end

makeHist(allf23,"score_m4","Milestone 4 - 1st attempt")

table_string <- generate_category_table(allf23, "by_practice80_m4","Milestone 4 - attempt 1")
table_string
Milestone 4 - attempt 1
Category Count Percentage
DROP 15 9.2 %
HEAP 72 44.17 %
HELP 14 8.59 %
LEAP 18 11.04 %
WELP 44 26.99 %
table_string <- generate_category_table(allf23, "by_practice80_m4_all","Milestone 4 - all attempt")
table_string
Milestone 4 - all attempt
Category Count Percentage
DROP 15 9.2 %
HEAP 91 55.83 %
HELP 12 7.36 %
LEAP 25 15.34 %
WELP 20 12.27 %
result = calculate_transition4(allf23,"by_practice80_m4","by_practice80_m4_all",categories)
kable(result, format = "markdown", caption = "M4 1st vs M4 all attempts ")
M4 1st vs M4 all attempts
HELP WELP HEAP LEAP DROP Total
HELP 4 0 10 0 0 14
WELP 8 20 5 11 0 44
HEAP 0 0 72 0 0 72
LEAP 0 0 4 14 0 18
DROP 0 0 0 0 15 15
Total 12 20 91 25 15 163
result = calculate_transition4(allf23,"by_practice80_m1_all","by_practice80_m4_all",categories)
kable(result, format = "markdown", caption = "M1 vs M4 all attempts ")
M1 vs M4 all attempts
HELP WELP HEAP LEAP DROP Total
HELP 0 1 0 1 2 4
WELP 0 7 0 3 7 17
HEAP 4 8 81 11 3 107
LEAP 8 4 10 10 3 35
DROP 0 0 0 0 0 0
Total 12 20 91 25 15 163
result = calculate_transition4(allf23,"by_practice80_m2_all","by_practice80_m4_all",categories)
kable(result, format = "markdown", caption = "M2 vs M4 all attempts ")
M2 vs M4 all attempts
HELP WELP HEAP LEAP DROP Total
HELP 2 5 1 3 1 12
WELP 1 7 1 2 8 19
HEAP 7 6 83 14 0 110
LEAP 2 2 6 6 1 17
DROP 0 0 0 0 5 5
Total 12 20 91 25 15 163
result = calculate_transition4(allf23,"by_practice80_m3_all","by_practice80_m4_all",categories)
kable(result, format = "markdown", caption = "M3 vs M4 all attempts ")
M3 vs M4 all attempts
HELP WELP HEAP LEAP DROP Total
HELP 3 5 0 0 0 8
WELP 3 8 1 1 3 16
HEAP 6 3 78 14 0 101
LEAP 0 4 12 10 0 26
DROP 0 0 0 0 12 12
Total 12 20 91 25 15 163

Scatter Plots

#plotEffortVsPerformance(m1f23_class,"practice_score","score.x","practice_attempt","justAsk1")
plotEffortVsPerformance(allf23,"practice_score_m4","score_m4","practice_attempt_m4","justAsk2","Milestone 4 - 1st attempt")

plotEffortVsPerformance(allf23,"practice_score_m4_all","score_m4_all","practice_attempt_m4_all","justAsk2","Milestone 4 - All attempts")

How much does it change if we use <70 as the cutting for high and low performance?

3.5 All together: Flow Sankey diagrams

library(ggplot2)
library(ggalluvial)
library(scales)

#converting alldf3 into a format that alluvial can understand
makeitAlluvial2 = function(df,colexam1,colexam2,name,mytitle){
  melted_df = df[,c(name,colexam1,colexam2)]
  melted_df = reshape(melted_df,
                      varying = c(colexam1,colexam2),
                      v.names = "group",
                      timevar = "exam",
                      times = c(colexam1,colexam2),
                      direction = "long"
                      )
  rownames(melted_df) <- NULL
  ggplot(melted_df,
         aes(x = exam, stratum = group, alluvium = id, fill = group)) +
    scale_x_discrete(expand = c(.1, .1)) +
    geom_flow() +
    geom_stratum(alpha = .5) +
    theme_minimal() +
    geom_text(stat = "stratum",
              aes(label = percent(after_stat(prop), accuracy = .1)))+
    labs(title = mytitle, x = "", y = "Number of students")

}
makeitAlluvial2(allf23,"by_practice80_m1","by_practice80_m1_all","name","Milestone 1: 1st vs final attempt")

makeitAlluvial2(allf23,"by_practice80_m1","by_practice80_m3","name","Milestone 1 vs 3: 1st attempt")

makeitAlluvial2(allf23,"by_practice80_m1_all","by_practice80_m3_all","name","Milestone 1 vs 3: final attempt")

#makeitAlluvial2(allf23,"by_practice80_m1","grade_category","name","Milestone 1 1st attempt vs final letter grade")

3.6 Looking at their final course grade

finalgrades = read.csv("~/Teaching/Grades_and_SRT/Fall2023/chem1331_f23_grades.csv", header = TRUE)
allf23 = merge(allf23,finalgrades, by = "name", all.x = TRUE)
categorize_grades <- function(grade) {
  if (is.na(grade)) {
    return("W")
  } else if (startsWith(grade, "A")) {
    return("A")
  } else if (startsWith(grade, "B")) {
    return("B")
  } else if (startsWith(grade, "C")) {
    return("C")
  } else if (startsWith(grade, "D")) {
    return("D")
  } else if (startsWith(grade, "F")) {
    return("F")
  } else if (startsWith(grade, "W")) {
    return("W")
  } else {
    return("Other")
  }
}
allf23$grade_category <- sapply(allf23$finalLetter, categorize_grades)

# Calculate the fraction of rows for each category
grade_counts <- table(allf23$grade_category)
grade_fraction <- prop.table(grade_counts)

# Create a data frame for the pie chart
pie_data <- data.frame(grade_category = names(grade_fraction), fraction = grade_fraction)

# Plot the pie chart
ggplot(pie_data, aes(x = "", y = grade_fraction, fill = grade_category)) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar("y", start = 0) +
  labs(title = "Fraction of Rows by Letter Grade Category") +
  theme_void() +
  theme(legend.position = "right")

pie_data <- data.frame(grade_category = names(grade_fraction), grade_fraction = grade_fraction)

# Define colors for each category
grade_colors <- c("A" = "blue", "B" = "green", "C" = "yellow", "D" = "orange", "F" = "red", "W" = "gray")

# Plot the pie chart with custom colors
ggplot(pie_data, aes(x = "", y = grade_fraction, fill = grade_category)) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar("y", start = 0) +
  labs(title = "Fraction of Rows by Letter Grade Category") +
  theme_void() +
  theme(legend.position = "right") +
  scale_fill_manual(values = grade_colors)

generate_col1_percentage_table <- function(df, gradeLetter_col, col1_col,myTitle) {
  # Calculate percentages
  #df = allf23
  #gradeLetter_col = "grade_category"
  #col1_col = "by_practice80_m1"
  result <- df %>%
    group_by(!!sym(gradeLetter_col), !!sym(col1_col)) %>%
    summarize(percentage = n() / nrow(df) * 100) %>%
    ungroup()
  
  # Pivot the data for easier viewing
  result_pivot <- tidyr::pivot_wider(result, names_from = !!sym(col1_col), values_from = percentage, values_fill = 0)
  #result_pivot <- round(result_pivot, 1)
  round_indices = 2:ncol(result_pivot)
  result_pivot[, round_indices] <- round(result_pivot[, round_indices], 1)
  #result_pivot[, round_indices] <- paste0(result_pivot[, round_indices], "%")
  
  result_pivot[, round_indices] <- lapply(result_pivot[, round_indices], function(x) paste0(x, "%"))
  
  # Format the table and return
  return(kable(result_pivot, format = "markdown", caption = myTitle))
}

col1_percentage_table <- generate_col1_percentage_table(allf23,  "grade_category", "by_practice80_m1","Milestone 1 - 1st attempt")
col1_percentage_table
Milestone 1 - 1st attempt
grade_category HEAP HELP LEAP WELP
A 31.3% 0.6% 11.7% 0%
B 16% 4.9% 3.1% 5.5%
C 1.2% 1.8% 1.2% 11%
D 0% 0% 0% 0.6%
F 0% 0% 0% 1.2%
W 0.6% 1.2% 0.6% 7.4%
col1_percentage_table <- generate_col1_percentage_table(allf23, gradeLetter_col = "grade_category", col1_col ="by_practice80_m1_all","Milestone 1 - all attempts")
col1_percentage_table
Milestone 1 - all attempts
grade_category HEAP LEAP WELP HELP
A 34.4% 9.2% 0% 0%
B 22.7% 6.1% 0.6% 0%
C 6.7% 4.3% 3.7% 0.6%
D 0% 0% 0.6% 0%
F 0% 0% 0.6% 0.6%
W 1.8% 1.8% 4.9% 1.2%
col1_percentage_table <- generate_col1_percentage_table(allf23, gradeLetter_col = "grade_category", col1_col ="by_practice80_m3","Milestone 3 - 1st attempt")
col1_percentage_table
Milestone 3 - 1st attempt
grade_category HEAP LEAP WELP HELP DROP
A 34.4% 8% 1.2% 0% 0%
B 12.9% 6.7% 6.7% 3.1% 0%
C 0% 1.2% 11% 3.1% 0%
D 0% 0% 0.6% 0% 0%
F 0% 0.6% 0% 0.6% 0%
W 0% 0% 2.5% 0% 7.4%
col1_percentage_table <- generate_col1_percentage_table(allf23, gradeLetter_col = "grade_category", col1_col ="by_practice80_m3_all","Milestone 3 - all attempts")
col1_percentage_table
Milestone 3 - all attempts
grade_category HEAP LEAP HELP WELP DROP
A 36.8% 6.7% 0% 0% 0%
B 20.2% 6.7% 0.6% 1.8% 0%
C 4.9% 1.8% 3.1% 5.5% 0%
D 0% 0% 0.6% 0% 0%
F 0% 0.6% 0.6% 0% 0%
W 0% 0% 0% 2.5% 7.4%

4 Video Watching to Measure Effort