1 This dataset

The file “umr_all_preFall22.csv” is missing 2 rows of students who didn’t have PLC or NS data (not sure why) and 11 more students who I was not able to find what course they were enrolled When I merge it with “bioc3321_f22_allquestions.csv”, we write the “UMR_all_for_R_with_courses.csv”

Experts dataset: Experts_all_for_R.csv

Other campuses dataset: “Dennison_UNL_UT_all_for_R.csv”

setwd("~/Research/02b Neural Network Research UMR/Data + Analysis/Clustering_Xavier")

#umr_pref22 = read.csv("umr_all_preFall22.csv",header = TRUE)
#biocf22 = read.csv("bioc3321_f22_allquestions.csv",header = TRUE)
#biocf22$Course_collected = gsub('BIOC3321', 'Biochem 1', biocf22$Course_collected)
#biocf22$actual_year = "third_year"
#umr = rbind(umr_pref22,biocf22)
#write.csv(umr,"UMR_all_for_R_with_courses.csv",row.names=FALSE)
umr = read.csv("UMR_all_for_R_with_courses.csv", header = TRUE)

allBioc = umr[which(umr$Course_collected == "Biochem 1" & umr$Term_collected == "Fall2021"),]
allBioc$Course_collected = gsub('Biochem 1','BIOC3321_F21',allBioc$Course_collected)
allBioc = rbind(allBioc,
                umr[which(umr$Course_collected == "Biochem 1" & umr$Term_collected == "Fall2022"),])
allBioc$Course_collected = gsub('Biochem 1','BIOC3321_F22',allBioc$Course_collected)
######


expert = read.csv("Experts_all_for_R.csv",header = TRUE)
other = read.csv("Dennison_UNL_UT_all_for_R.csv",header = TRUE)
other = other[which(other$Course_collected != "CHEM131"),]
allBioc = allBioc[,c("Institution", "Survey", "Course_collected", "Deidentifier","Sex_birth","Race_ethnicity","Coherency","NS","PLC")]
other =     other[,c("Institution", "Survey","Course_collected", "Deidentifier","Sex_birth","Race_ethnicity","Coherency","NS","PLC")]
#other = na.omit(other)
other = other[!is.na(other$PLC),]
allBioc = rbind(allBioc,other)
allBioc$actual_year = "Whatever"

exs1 = expert[which(expert$Survey=="ES_Chemical_Equation"),]
exs2 = expert[which(expert$Survey=="ES_Glucosidase"),]
exs3 = expert[which(expert$Survey=="Nucleic_Acids"),]
exs4 = expert[which(expert$Survey=="Oxygen_Binding"),]
exs5 = expert[which(expert$Survey=="Protein_Strcuture"),]

allBioc1 = allBioc[which(allBioc$Survey=="ES_Chemical_Reaction"),]
allBioc2 = allBioc[which(allBioc$Survey=="ES_Glucosidase"),]
allBioc3 = allBioc[which(allBioc$Survey=="Nucleic_Acids"),]
allBioc4 = allBioc[which(allBioc$Survey=="Oxygen_Binding"),]
allBioc5 = allBioc[which(allBioc$Survey=="Protein_Structure"),]

library(psych)

analyzeUMRCourses = function(umrs1){
  allBiochem = umrs1[,c("Institution", "Course_collected", "Deidentifier","Sex_birth","Race_ethnicity","Coherency","NS","actual_year","PLC")]
  allBiochem$Coherency = as.numeric(allBiochem$Coherency)
  allBiochem$NS = as.numeric(allBiochem$NS)
  allBiochem$PLC = as.numeric(allBiochem$PLC)
  allBiochem$race_binary <- ifelse(allBiochem$Race_ethnicity == "White/Caucasian" , 'White', "Non-white")
   
  #Cluster. Setting one seed, whatever
  set.seed(42)
  df <- matrix(data=c(allBiochem$PLC,allBiochem$NS),ncol=2)
  allBiochem$cluster = kmeans(scale(df[,1:2]),3)$cluster
  
  #this is clumsy but I have to programmatically find the cluster number corresponding to HP, LP, and IP
  #Using the PLC to make sure its working
  meanPLCbyCluster = describeBy(allBiochem$PLC,allBiochem$cluster,mat=TRUE)
  maxPLC = max(meanPLCbyCluster$mean)
  HPgroup = as.numeric(meanPLCbyCluster[which(meanPLCbyCluster$mean==maxPLC),]$group1)
  minPLC = min(meanPLCbyCluster$mean)
  LPgroup = as.numeric(meanPLCbyCluster[which(meanPLCbyCluster$mean==minPLC),]$group1)
  if (HPgroup + LPgroup == 3 ){IPgroup = 3}
  if (HPgroup + LPgroup == 4 ){IPgroup = 2}
  if (HPgroup + LPgroup == 5 ){IPgroup = 1}
  allBiochem$clusterLetter = ifelse(allBiochem$cluster == HPgroup, "HP",
                                    ifelse(allBiochem$cluster == LPgroup,"LP",
                                           ifelse(allBiochem$cluster == IPgroup,"IP","Oops")))  
  #allBiochem$Course_collected = factor(allBiochem$Course_collected,levels = c(
  #  "Gen + Organic 1","O Chem 1","O Chem 2","Gen Chem 2","Biochem 1","Biochem 2"))
  return(allBiochem)
}

buildTables = function(allBiochem){
  mata<-describeBy(allBiochem$PLC,allBiochem$clusterLetter,mat=TRUE,digits = 2)
  print(knitr::kable(mata[,c(2,4,5,6,7,8,9,10,11,12)] ,  caption = "PLC by cluster group"))
  mata<-describeBy(allBiochem$PLC,allBiochem$Institution,mat=TRUE,digits = 2)
  print(knitr::kable(mata[,c(2,4,5,6,7,8,9,10,11,12)] ,  caption = "PLC by institution"))
  mata<-describeBy(allBiochem$PLC,allBiochem$actual_year,mat=TRUE,digits = 2)
  print(knitr::kable(mata[,c(2,4,5,6,7,8,9,10,11,12)] ,  caption = "PLC by Actual Year"))
  mata<-describeBy(allBiochem$PLC,allBiochem$Course_collected,mat=TRUE,digits = 2)
  print(knitr::kable(mata[,c(2,4,5,6,7,8,9,10,11,12)] ,  caption = "PLC by course"))
  mata<-describeBy(allBiochem$PLC,allBiochem$Sex_birth,mat=TRUE,digits = 2)
  print(knitr::kable(mata[,c(2,4,5,6,7,8,9,10,11,12)] ,  caption = "PLC by Sex"))
  mata<-describeBy(allBiochem$PLC,allBiochem$race_binary,mat=TRUE,digits = 2)
  print(knitr::kable(mata[,c(2,4,5,6,7,8,9,10,11,12)] ,  caption = "PLC by Race"))
}
calcStats = function(allBiochem,mycategory){
  #using the term course as a generic category
   for (course in unique(allBiochem$Course_collected)){
     if ( course == "Expert") next
     header = paste("<b>Results for category: ",course,"</b></br></br>")
     cat(header)
     umrTot= sum(allBiochem$Course_collected == course )
     umrHP = sum(allBiochem$Course_collected == course & allBiochem$clusterLetter == "HP")
     umrIP = sum(allBiochem$Course_collected == course & allBiochem$clusterLetter == "IP")
     umrLP = sum(allBiochem$Course_collected == course & allBiochem$clusterLetter == "LP")
     
     umrMale = sum(allBiochem$Course_collected == course & allBiochem$Sex_birth == "Male")
     umrHPmale = sum(allBiochem$Course_collected == course & allBiochem$Sex_birth == "Male" & allBiochem$clusterLetter == "HP")
     umrIPmale = sum(allBiochem$Course_collected == course & allBiochem$Sex_birth == "Male" & allBiochem$clusterLetter == "IP")
     umrLPmale = sum(allBiochem$Course_collected == course & allBiochem$Sex_birth == "Male" & allBiochem$clusterLetter == "LP")
      
     umrFemale = sum(allBiochem$Course_collected == course & allBiochem$Sex_birth == "Female")
     umrHPfemale = sum(allBiochem$Course_collected == course & allBiochem$Sex_birth == "Female" & allBiochem$clusterLetter == "HP")
     umrIPfemale = sum(allBiochem$Course_collected == course & allBiochem$Sex_birth == "Female" & allBiochem$clusterLetter == "IP")
     umrLPfemale = sum(allBiochem$Course_collected == course & allBiochem$Sex_birth == "Female" & allBiochem$clusterLetter == "LP")
      
     umrWhite = sum(allBiochem$Course_collected == course & allBiochem$race_binary == "White")
     umrHPWhite = sum(allBiochem$Course_collected == course & allBiochem$race_binary == "White" & allBiochem$clusterLetter == "HP")
     umrIPWhite = sum(allBiochem$Course_collected == course & allBiochem$race_binary == "White" & allBiochem$clusterLetter == "IP")
     umrLPWhite = sum(allBiochem$Course_collected == course & allBiochem$race_binary == "White" & allBiochem$clusterLetter == "LP")
      
     umrNonwhite = sum(allBiochem$Course_collected == course & allBiochem$race_binary == "Non-white")
     umrHPNonwhite = sum(allBiochem$Course_collected == course & allBiochem$race_binary == "Non-white" & allBiochem$clusterLetter == "HP")
     umrIPNonwhite = sum(allBiochem$Course_collected == course & allBiochem$race_binary == "Non-white" & allBiochem$clusterLetter == "IP")
     umrLPNonwhite = sum(allBiochem$Course_collected == course & allBiochem$race_binary == "Non-white" & allBiochem$clusterLetter == "LP")
     
     output = paste("<table >
<thead>
<tr>
  <th colspan='2'></th>
  <th colspan='2'>High Performers</th>
  <th colspan='2'>Intermediate Performers</th>
  <th colspan='2'>Low Performers</th>
  
</tr>
</thead>
<tbody>
  <tr>
    <td rowspan='5'>",course," </td>
    <td>Total N=", umrTot,"</td>
    <td colspan='2'>", signif(umrHP/umrTot*100,digits=2),"% </td>
    <td colspan='2'>", signif(umrIP/umrTot*100,digits=2),"%</td>
    <td colspan='2'>", signif(umrLP/umrTot*100,digits=2),"% </td>
  </tr>
  <tr>
    <td rowspan='2'>Sex: males N=",umrMale,"; females N=",umrFemale,"</td>
    <td>male</td>
    <td>female</td>
    <td>male</td>
    <td>female</td>
    <td>male</td>
    <td>female</td>
  </tr>
  <tr>
    <td>", signif(umrHPmale/umrMale*100,digits=2),"%</td>
    <td>", signif(umrHPfemale/umrFemale*100,digits=2),"%</td>
    <td>", signif(umrIPmale/umrMale*100,digits=2),"%</td>
    <td>", signif(umrIPfemale/umrFemale*100,digits=2),"%</td>
    <td>", signif(umrLPmale/umrMale*100,digits=2),"%</td>
    <td>", signif(umrLPfemale/umrFemale*100,digits=2),"%</td>
  </tr>
  <tr>
    <td rowspan='2'>Race: White N=",umrWhite,"; Non-white N=",umrNonwhite,"</td>
    <td>white</td>
    <td>non-white</td>
    <td>white</td>
    <td>non-white</td>
    <td>white</td>
    <td>non-white</td>
  </tr>
  <tr>
    <td>", signif(umrHPWhite/umrWhite*100,digits=2),"%</td>
    <td>", signif(umrHPNonwhite/umrNonwhite*100,digits=2),"%</td>
    <td>", signif(umrIPWhite/umrWhite*100,digits=2),"%</td>
    <td>", signif(umrIPNonwhite/umrNonwhite*100,digits=2),"%</td>
    <td>", signif(umrLPWhite/umrWhite*100,digits=2),"%</td>
    <td>", signif(umrLPNonwhite/umrNonwhite*100,digits=2),"%</td>
  </tr>
</tbody>
</table> ")
     cat(output)
   }
  
}


calcStats2 = function(allBiochem,mycategory){
  #using the term course as a generic   category
   for (course in unique(allBiochem$actual_year)){
     if ( course == "Expert") next
     header = paste("<b>Results for category: ",course,"</b></br></br>")
     cat(header)
     umrTot= sum(allBiochem$actual_year == course )
     umrHP = sum(allBiochem$actual_year == course & allBiochem$clusterLetter == "HP")
     umrIP = sum(allBiochem$actual_year == course & allBiochem$clusterLetter == "IP")
     umrLP = sum(allBiochem$actual_year == course & allBiochem$clusterLetter == "LP")
     
     umrMale = sum(allBiochem$actual_year == course & allBiochem$Sex_birth == "Male")
     umrHPmale = sum(allBiochem$actual_year == course & allBiochem$Sex_birth == "Male" & allBiochem$clusterLetter == "HP")
     umrIPmale = sum(allBiochem$actual_year == course & allBiochem$Sex_birth == "Male" & allBiochem$clusterLetter == "IP")
     umrLPmale = sum(allBiochem$actual_year == course & allBiochem$Sex_birth == "Male" & allBiochem$clusterLetter == "LP")
      
     umrFemale = sum(allBiochem$actual_year == course & allBiochem$Sex_birth == "Female")
     umrHPfemale = sum(allBiochem$actual_year == course & allBiochem$Sex_birth == "Female" & allBiochem$clusterLetter == "HP")
     umrIPfemale = sum(allBiochem$actual_year == course & allBiochem$Sex_birth == "Female" & allBiochem$clusterLetter == "IP")
     umrLPfemale = sum(allBiochem$actual_year == course & allBiochem$Sex_birth == "Female" & allBiochem$clusterLetter == "LP")
      
     umrWhite = sum(allBiochem$actual_year == course & allBiochem$race_binary == "White")
     umrHPWhite = sum(allBiochem$actual_year == course & allBiochem$race_binary == "White" & allBiochem$clusterLetter == "HP")
     umrIPWhite = sum(allBiochem$actual_year == course & allBiochem$race_binary == "White" & allBiochem$clusterLetter == "IP")
     umrLPWhite = sum(allBiochem$actual_year == course & allBiochem$race_binary == "White" & allBiochem$clusterLetter == "LP")
      
     umrNonwhite = sum(allBiochem$actual_year == course & allBiochem$race_binary == "Non-white")
     umrHPNonwhite = sum(allBiochem$actual_year == course & allBiochem$race_binary == "Non-white" & allBiochem$clusterLetter == "HP")
     umrIPNonwhite = sum(allBiochem$actual_year == course & allBiochem$race_binary == "Non-white" & allBiochem$clusterLetter == "IP")
     umrLPNonwhite = sum(allBiochem$actual_year == course & allBiochem$race_binary == "Non-white" & allBiochem$clusterLetter == "LP")
     
     output = paste("<table >
<thead>
<tr>
  <th colspan='2'></th>
  <th colspan='2'>High Performers</th>
  <th colspan='2'>Intermediate Performers</th>
  <th colspan='2'>Low Performers</th>
  
</tr>
</thead>
<tbody>
  <tr>
    <td rowspan='5'>",course," </td>
    <td>Total N=", umrTot,"</td>
    <td colspan='2'>", signif(umrHP/umrTot*100,digits=2),"% </td>
    <td colspan='2'>", signif(umrIP/umrTot*100,digits=2),"%</td>
    <td colspan='2'>", signif(umrLP/umrTot*100,digits=2),"% </td>
  </tr>
  <tr>
    <td rowspan='2'>Sex: males N=",umrMale,"; females N=",umrFemale,"</td>
    <td>male</td>
    <td>female</td>
    <td>male</td>
    <td>female</td>
    <td>male</td>
    <td>female</td>
  </tr>
  <tr>
    <td>", signif(umrHPmale/umrMale*100,digits=2),"%</td>
    <td>", signif(umrHPfemale/umrFemale*100,digits=2),"%</td>
    <td>", signif(umrIPmale/umrMale*100,digits=2),"%</td>
    <td>", signif(umrIPfemale/umrFemale*100,digits=2),"%</td>
    <td>", signif(umrLPmale/umrMale*100,digits=2),"%</td>
    <td>", signif(umrLPfemale/umrFemale*100,digits=2),"%</td>
  </tr>
  <tr>
    <td rowspan='2'>Race: White N=",umrWhite,"; Non-white N=",umrNonwhite,"</td>
    <td>white</td>
    <td>non-white</td>
    <td>white</td>
    <td>non-white</td>
    <td>white</td>
    <td>non-white</td>
  </tr>
  <tr>
    <td>", signif(umrHPWhite/umrWhite*100,digits=2),"%</td>
    <td>", signif(umrHPNonwhite/umrNonwhite*100,digits=2),"%</td>
    <td>", signif(umrIPWhite/umrWhite*100,digits=2),"%</td>
    <td>", signif(umrIPNonwhite/umrNonwhite*100,digits=2),"%</td>
    <td>", signif(umrLPWhite/umrWhite*100,digits=2),"%</td>
    <td>", signif(umrLPNonwhite/umrNonwhite*100,digits=2),"%</td>
  </tr>
</tbody>
</table> ")
     cat(output)
   }
  
}



library(ggplot2)
library(ggpubr)
library(psych)

plotGGbox = function(df,myx,myy,mytitle,myylab){
  df = df[complete.cases(df[[myy]]),]
  maxy = max(df[[myy]])
  ggboxplot(df, x = myx, y = myy,  
            title = mytitle,
            color = myx, add = "jitter", legend="none",ylab = myylab) + rotate_x_text(angle = 45) +  
    geom_hline( yintercept = mean(df[[myy]]), linetype = 2) + 
    stat_compare_means(method = "anova", label.y = maxy*1.10) +
    coord_cartesian(ylim = c(0, maxy*1.2)) + 
    stat_compare_means(label = "p.format", size=2.5, method = "t.test", ref.group = ".all.",label.y = maxy*1.05)
}
getAnova = function(df,myx,myy,mytitle,myylab){
  #get anova
  a<- TukeyHSD( aov(df[[myy]] ~ df[[myx]])) 
  b<-as.data.frame(a$`df[[myx]]`[,4])
  colnames(b) = c("Testing statistical significance: p-values")
  print(knitr::kable(b, caption = paste("Anova: ",mytitle)))
}
plotAndTable = function(df,myx,myy,mytitle,myylab){
  if (myx=="Sex_birth" | myx=="race_binary"){
    df = df[!grepl("(?i)Expert", df$Course_collected),]
    df = df[!grepl("(?)Prefer not to answer",df$Sex_birth),]
  }
  print(plotGGbox(df,myx,myy,mytitle,myylab))
  table = describeBy(df[[myy]],df[[myx]],mat=TRUE,digits = 2)
  print(knitr::kable(table[,c(2,4,5,6,7,10,11,12)],caption=paste("Statistics of ",myylab," based on the category",myx)))
  getAnova(df,myx,myy,mytitle,myylab)
}
addExperts = function(alldf, experts){
  alldf = allBiochem
  ex_new = as.data.frame( matrix( ncol=ncol(alldf),nrow = nrow(experts)) )
  colnames(ex_new) =  colnames(alldf)
  #colnames(ex_new) =  c("Institution", "Course_collected", "Deidentifier","Sex_birth","Race_ethnicity","Coherency","NS","actual_year","PLC","cluster","race_binary","clusterLeter")
  ex_new[,1:12] = "Expert"
  ex_new$PLC = experts$PLC
  ex_new$NS = experts$NS
  ex_new$Coherency = experts$Coherency
  alldf=rbind(alldf,ex_new)
  return(alldf)
}

library(dplyr)
library(corrplot)
plotChi = function(a){
  #I need to use droplevels otherwise it was showing Expert with zeros as a ghost category?
  b=chisq.test(table(droplevels(a)))
  cat(paste("<p><b>The Chi-square analysis gives a p=",round(b$p.value,5),"</b></p>"))
  cat(paste("<p><b>Residuals analysis:</b></p>"))
  cat("A negative residual implies that the measured value is lower than expected and a positive value higher than expected</br>")
  corrplot(b$residuals, is.cor = FALSE)
  #normalize it
  #contrib <- 100*b$residuals^2/b$statistic
  #round(contrib, 3)
  #corrplot(contrib, is.cor = FALSE)
  #corrplot(contrib, is.cor = FALSE, col.lim = c(0.3,1) )


}
plotBarAndCorr = function(df,myx,myy,myxlabel,myylabel,mytitle){
  #myx is the course or demographic variable, the independent variable
  #myy is typically the clusterLetter, the dependent variable
  #remove experts, not useful for the chisquare analysis
  a = df[!grepl("Expert",df[,1]),]
  if (myx=="Sex_birth"){
    a = a[!grepl("(?)Prefer not to answer",a$Sex_birth),]
  }
  #select the two categorical variables
  a = a[,c(myy,myx)]
  print(plotBarCategories(a,myx,myy,myxlabel,myylabel,mytitle))
  plotChi(a)
}
plotBarCategories = function(a,myx,myy,myxlabel,myylabel,mytitle){
  #using aes_string instead of aes because colnames are variables
  #ggplot(a, aes_string(x=myx,fill=myy)) + geom_bar()
  
  
  #c=prop.table(table(a$clusterLetter))
  #scales::percent(as.double(z))
  #a %>% select(clusterLetter) %>% table() %>% prop.table() %>% as.double() %>% scales::percent()
  #this one
  #myx = enquo(myx)
  #myy = enquo(myy)
  a %>% 
    count(!!sym(myy),!!sym(myx))  %>% 
    group_by(!!sym(myx)) %>% 
    mutate(lab = paste0(round(prop.table(n) * 100, 2), '%')) %>%
    ggplot(aes(!!sym(myx),n, fill=!!sym(myy))) + 
    geom_col() + geom_text(aes(label=lab),position='stack',vjust=1.5) +
    labs(x=myxlabel,y=myylabel,title=mytitle)
}

2 Introduction

Refer to this this link: http://chem.r.umn.edu/visual_literacy/ for an introduction of what we are doing and what this file is trying to analyze

3 AllBiochem: ES Chemical Equation

3.1 PLC only: Anova

We are comparing how the PLC score is significantly different among the different categories “Course collected”, “Student year”, “White/Non-white”, and “Sex at birth”

#
allBiochem = analyzeUMRCourses(allBioc1)
allBiochem = addExperts(allBiochem,exs1)
#adding experts
#buildTables(allBiochem)
plotAndTable(allBiochem,"Course_collected","PLC","PLC: Course","PLC")

Statistics of PLC based on the category Course_collected
	group1	n	mean	sd	median	min	max	range
X11	BCH339F	67	0.42	0.17	0.42	-0.06	0.71	0.77
X12	BCH339M	45	0.37	0.17	0.39	-0.08	0.69	0.77
X13	BCH369	434	0.38	0.17	0.41	-0.16	0.74	0.89
X14	BIO206	15	0.27	0.22	0.35	-0.33	0.51	0.84
X15	BIOC3321_F21	58	0.40	0.18	0.45	-0.18	0.67	0.85
X16	BIOC3321_F22	43	0.46	0.11	0.47	0.23	0.72	0.49
X17	BIOC431	106	0.38	0.17	0.39	-0.12	0.66	0.78
X18	Expert	6	0.67	0.12	0.69	0.49	0.82	0.33

Anova: PLC: Course
	Testing statistical significance: p-values
BCH339M-BCH339F	0.7440374
BCH369-BCH339F	0.6379475
BIO206-BCH339F	0.0397217
BIOC3321_F21-BCH339F	0.9963575
BIOC3321_F22-BCH339F	0.9128459
BIOC431-BCH339F	0.6869647
Expert-BCH339F	0.0133771
BCH369-BCH339M	0.9996829
BIO206-BCH339M	0.5198322
BIOC3321_F21-BCH339M	0.9850332
BIOC3321_F22-BCH339M	0.1535356
BIOC431-BCH339M	0.9999967
Expert-BCH339M	0.0011671
BIO206-BCH369	0.1952398
BIOC3321_F21-BCH369	0.9957465
BIOC3321_F22-BCH369	0.0563817
BIOC431-BCH369	0.9999892
Expert-BCH369	0.0009626
BIOC3321_F21-BIO206	0.1477599
BIOC3321_F22-BIO206	0.0040136
BIOC431-BIO206	0.3141028
Expert-BIO206	0.0000332
BIOC3321_F22-BIOC3321_F21	0.5738702
BIOC431-BIOC3321_F21	0.9912266
Expert-BIOC3321_F21	0.0049232
BIOC431-BIOC3321_F22	0.0890571
Expert-BIOC3321_F22	0.0928535
Expert-BIOC431	0.0009923

#plotAndTable(allBiochem,"actual_year","PLC","PLC: Year","PLC")
plotAndTable(allBiochem,"race_binary","PLC","PLC: White/Non-white","PLC")

Statistics of PLC based on the category race_binary
	group1	n	mean	sd	median	min	max	range
X11	Non-white	472	0.37	0.17	0.40	-0.33	0.74	1.06
X12	White	295	0.41	0.16	0.44	-0.12	0.72	0.83

Anova: PLC: White/Non-white
Testing statistical significance: p-values
0.0006445

plotAndTable(allBiochem,"Sex_birth","PLC","PLC: Sex","PLC")

Statistics of PLC based on the category Sex_birth
	group1	n	mean	sd	median	min	max	range
X11	Female	536	0.38	0.16	0.42	-0.33	0.74	1.06
X12	Male	231	0.39	0.18	0.41	-0.18	0.72	0.90

Anova: PLC: Sex
Testing statistical significance: p-values
0.6741292

3.2 NS only: Anova

plotAndTable(allBiochem,"Course_collected","NS","NS: Course","NS")

Statistics of NS based on the category Course_collected
	group1	n	mean	sd	median	min	max	range
X11	BCH339F	67	0.23	0.08	0.22	0.09	0.44	0.35
X12	BCH339M	45	0.25	0.11	0.22	0.05	0.64	0.60
X13	BCH369	434	0.23	0.08	0.22	0.04	0.50	0.46
X14	BIO206	15	0.26	0.11	0.25	0.09	0.53	0.44
X15	BIOC3321_F21	58	0.22	0.07	0.23	0.09	0.41	0.32
X16	BIOC3321_F22	43	0.24	0.07	0.25	0.13	0.42	0.29
X17	BIOC431	106	0.23	0.08	0.23	0.05	0.47	0.43
X18	Expert	6	0.37	0.11	0.34	0.28	0.57	0.29

Anova: NS: Course
	Testing statistical significance: p-values
BCH339M-BCH339F	0.9584618
BCH369-BCH339F	0.9999960
BIO206-BCH339F	0.8954802
BIOC3321_F21-BCH339F	0.9999382
BIOC3321_F22-BCH339F	0.9827704
BIOC431-BCH339F	0.9999993
Expert-BCH339F	0.0008977
BCH369-BCH339M	0.7729823
BIO206-BCH339M	0.9993777
BIOC3321_F21-BCH339M	0.8527668
BIOC3321_F22-BCH339M	1.0000000
BIOC431-BCH339M	0.9735230
Expert-BCH339M	0.0080767
BIO206-BCH369	0.7776656
BIOC3321_F21-BCH369	0.9999966
BIOC3321_F22-BCH369	0.8775129
BIOC431-BCH369	0.9988141
Expert-BCH369	0.0003255
BIOC3321_F21-BIO206	0.7904576
BIOC3321_F22-BIO206	0.9983188
BIOC431-BIO206	0.9192574
Expert-BIO206	0.0736462
BIOC3321_F22-BIOC3321_F21	0.9158007
BIOC431-BIOC3321_F21	0.9986318
Expert-BIOC3321_F21	0.0005035
BIOC431-BIOC3321_F22	0.9910969
Expert-BIOC3321_F22	0.0066300
Expert-BIOC431	0.0009379

plotAndTable(allBiochem,"race_binary","NS","NS: White/Non-white","NS")

Statistics of NS based on the category race_binary
	group1	n	mean	sd	median	min	max	range
X11	Non-white	472	0.23	0.08	0.22	0.04	0.64	0.60
X12	White	295	0.24	0.08	0.23	0.05	0.47	0.43

Anova: NS: White/Non-white
Testing statistical significance: p-values
0.1406698

plotAndTable(allBiochem,"Sex_birth","NS","NS: Sex","NS")

Statistics of NS based on the category Sex_birth
	group1	n	mean	sd	median	min	max	range
X11	Female	536	0.23	0.08	0.22	0.04	0.50	0.46
X12	Male	231	0.23	0.08	0.23	0.05	0.64	0.60

Anova: NS: Sex
Testing statistical significance: p-values
0.3742489

3.3 PLC/NS clustering

The problem with clustering is that it is an iterative method and different “initial seeds” will yield to different results. It is only reproducible when the k-means method uses “set.seed(42)”

plotAndTable(allBiochem,"clusterLetter","PLC","PLC: Cluster letter","PLC")

Statistics of PLC based on the category clusterLetter
	group1	n	mean	sd	median	min	max	range
X11	Expert	6	0.67	0.12	0.69	0.49	0.82	0.33
X12	HP	228	0.51	0.10	0.51	0.18	0.74	0.55
X13	IP	346	0.44	0.09	0.43	0.24	0.72	0.47
X14	LP	194	0.15	0.12	0.19	-0.33	0.38	0.71

Anova: PLC: Cluster letter
	Testing statistical significance: p-values
HP-Expert	0.0005333
IP-Expert	0.0000001
LP-Expert	0.0000000
IP-HP	0.0000000
LP-HP	0.0000000
LP-IP	0.0000000

Are cluster groups unevenly distributed among these categories? A chi-square analysis will give us the probability that all three cluster groups (HP,IP,LP) contain statistically similar proportions of this category (course, year, sex, race…)

3.3.1 Analysis by course

plotBarAndCorr(allBiochem,"Course_collected","clusterLetter","Course","N of students","High, Intermediate, Low Performance cluster")

The Chi-square analysis gives a p= 0.18695

Residuals analysis:

A negative residual implies that the measured value is lower than expected and a positive value higher than expected

markerIntegers = as.integer(as.factor(allBiochem$Course_collected))
plot(allBiochem$PLC,allBiochem$NS,pch=allBiochem$clusterLetter,main = "ES_Chemical_Reaction - High(H), Intermediate(I), Low(L) performers",ylab="NS",xlab="PLC",col=markerIntegers)
legend("topleft", legend=unique(allBiochem$Course_collected), col=unique(markerIntegers), lty=1:1, cex=0.8)

calcStats(allBiochem,"Course_collected")

Results for category: BIOC3321_F21

		High Performers		Intermediate Performers		Low Performers
BIOC3321_F21	Total N= 58	28 %		53 %		19 %
	Sex: males N= 18 ; females N= 40	male	female	male	female	male	female
	Sex: males N= 18 ; females N= 40	50 %	18 %	39 %	60 %	11 %	22 %
	Race: White N= 32 ; Non-white N= 26	white	non-white	white	non-white	white	non-white
	Race: White N= 32 ; Non-white N= 26	28 %	27 %	56 %	50 %	16 %	23 %

Results for category: BIOC3321_F22

		High Performers		Intermediate Performers		Low Performers
BIOC3321_F22	Total N= 43	44 %		47 %		9.3 %
	Sex: males N= 5 ; females N= 38	male	female	male	female	male	female
	Sex: males N= 5 ; females N= 38	40 %	45 %	20 %	50 %	40 %	5.3 %
	Race: White N= 28 ; Non-white N= 15	white	non-white	white	non-white	white	non-white
	Race: White N= 28 ; Non-white N= 15	54 %	27 %	43 %	53 %	3.6 %	20 %

Results for category: BIOC431

		High Performers		Intermediate Performers		Low Performers
BIOC431	Total N= 106	30 %		42 %		28 %
	Sex: males N= 42 ; females N= 64	male	female	male	female	male	female
	Sex: males N= 42 ; females N= 64	36 %	27 %	43 %	41 %	21 %	33 %
	Race: White N= 80 ; Non-white N= 26	white	non-white	white	non-white	white	non-white
	Race: White N= 80 ; Non-white N= 26	31 %	27 %	40 %	46 %	29 %	27 %

Results for category: BCH339F

		High Performers		Intermediate Performers		Low Performers
BCH339F	Total N= 67	28 %		52 %		19 %
	Sex: males N= 26 ; females N= 41	male	female	male	female	male	female
	Sex: males N= 26 ; females N= 41	38 %	22 %	42 %	59 %	19 %	20 %
	Race: White N= 19 ; Non-white N= 48	white	non-white	white	non-white	white	non-white
	Race: White N= 19 ; Non-white N= 48	26 %	29 %	74 %	44 %	0 %	27 %

Results for category: BCH339M

		High Performers		Intermediate Performers		Low Performers
BCH339M	Total N= 45	33 %		33 %		33 %
	Sex: males N= 14 ; females N= 31	male	female	male	female	male	female
	Sex: males N= 14 ; females N= 31	43 %	29 %	21 %	39 %	36 %	32 %
	Race: White N= 11 ; Non-white N= 34	white	non-white	white	non-white	white	non-white
	Race: White N= 11 ; Non-white N= 34	27 %	35 %	27 %	35 %	45 %	29 %

Results for category: BCH369

		High Performers		Intermediate Performers		Low Performers
BCH369	Total N= 434	28 %		45 %		27 %
	Sex: males N= 124 ; females N= 309	male	female	male	female	male	female
	Sex: males N= 124 ; females N= 309	27 %	29 %	45 %	45 %	28 %	26 %
	Race: White N= 122 ; Non-white N= 312	white	non-white	white	non-white	white	non-white
	Race: White N= 122 ; Non-white N= 312	34 %	26 %	48 %	44 %	18 %	30 %

Results for category: BIO206

		High Performers		Intermediate Performers		Low Performers
BIO206	Total N= 15	33 %		33 %		33 %
	Sex: males N= 2 ; females N= 13	male	female	male	female	male	female
	Sex: males N= 2 ; females N= 13	50 %	31 %	0 %	38 %	50 %	31 %
	Race: White N= 3 ; Non-white N= 12	white	non-white	white	non-white	white	non-white
	Race: White N= 3 ; Non-white N= 12	33 %	33 %	33 %	33 %	33 %	33 %

4 All Biochem: ES Glucosidase

4.1 PLC only: Anova

We are comparing how the PLC score is significantly different among the different categories “Course collected”, “Student year”, “White/Non-white”, and “Sex at birth”

#
allBiochem = analyzeUMRCourses(allBioc2)
allBiochem = addExperts(allBiochem,exs2)
#buildTables(allBiochem)
plotAndTable(allBiochem,"Course_collected","PLC","PLC: Course","PLC")

Statistics of PLC based on the category Course_collected
	group1	n	mean	sd	median	min	max	range
X11	BCH339F	67	0.47	0.16	0.50	-0.02	0.74	0.77
X12	BCH339M	45	0.42	0.16	0.42	0.06	0.70	0.64
X13	BCH369	434	0.42	0.17	0.46	-0.18	0.74	0.92
X14	BIO206	15	0.24	0.24	0.30	-0.28	0.51	0.79
X15	BIOC3321_F21	58	0.44	0.17	0.46	-0.16	0.68	0.84
X16	BIOC3321_F22	43	0.47	0.10	0.48	0.24	0.65	0.41
X17	BIOC431	106	0.42	0.18	0.44	-0.10	0.80	0.89
X18	Expert	8	0.72	0.09	0.70	0.59	0.82	0.23

Anova: PLC: Course
	Testing statistical significance: p-values
BCH339M-BCH339F	0.8080831
BCH369-BCH339F	0.3210822
BIO206-BCH339F	0.0000776
BIOC3321_F21-BCH339F	0.9642580
BIOC3321_F22-BCH339F	1.0000000
BIOC431-BCH339F	0.6205090
Expert-BCH339F	0.0022142
BCH369-BCH339M	1.0000000
BIO206-BCH339M	0.0097112
BIOC3321_F21-BCH339M	0.9996817
BIOC3321_F22-BCH339M	0.8710228
BIOC431-BCH339M	1.0000000
Expert-BCH339M	0.0001342
BIO206-BCH369	0.0017564
BIOC3321_F21-BCH369	0.9952487
BIOC3321_F22-BCH369	0.5723920
BIOC431-BCH369	1.0000000
Expert-BCH369	0.0000233
BIOC3321_F21-BIO206	0.0018300
BIOC3321_F22-BIO206	0.0002082
BIOC431-BIO206	0.0030260
Expert-BIO206	0.0000000
BIOC3321_F22-BIOC3321_F21	0.9794716
BIOC431-BIOC3321_F21	0.9993384
Expert-BIOC3321_F21	0.0003043
BIOC431-BIOC3321_F22	0.7678388
Expert-BIOC3321_F22	0.0035366
Expert-BIOC431	0.0000544

plotAndTable(allBiochem,"race_binary","PLC","PLC: White/Non-white","PLC")

Statistics of PLC based on the category race_binary
	group1	n	mean	sd	median	min	max	range
X11	Non-white	472	0.41	0.18	0.44	-0.28	0.79	1.06
X12	White	295	0.45	0.16	0.47	-0.10	0.80	0.89

Anova: PLC: White/Non-white
Testing statistical significance: p-values
0.0027902

plotAndTable(allBiochem,"Sex_birth","PLC","PLC: Sex","PLC")

Statistics of PLC based on the category Sex_birth
	group1	n	mean	sd	median	min	max	range
X11	Female	536	0.42	0.17	0.46	-0.28	0.79	1.06
X12	Male	231	0.42	0.19	0.46	-0.18	0.80	0.97

Anova: PLC: Sex
Testing statistical significance: p-values
0.8856167

4.2 NS only: Anova

plotAndTable(allBiochem,"Course_collected","NS","NS: Course","NS")

Statistics of NS based on the category Course_collected
	group1	n	mean	sd	median	min	max	range
X11	BCH339F	67	0.27	0.10	0.26	0.09	0.56	0.48
X12	BCH339M	45	0.28	0.10	0.27	0.09	0.57	0.48
X13	BCH369	434	0.25	0.09	0.24	0.03	0.71	0.68
X14	BIO206	15	0.25	0.10	0.24	0.05	0.44	0.40
X15	BIOC3321_F21	58	0.25	0.09	0.26	0.04	0.45	0.41
X16	BIOC3321_F22	43	0.25	0.07	0.23	0.10	0.41	0.31
X17	BIOC431	106	0.26	0.09	0.26	0.04	0.53	0.49
X18	Expert	8	0.40	0.06	0.42	0.29	0.47	0.17

Anova: NS: Course
	Testing statistical significance: p-values
BCH339M-BCH339F	0.9995223
BCH369-BCH339F	0.8252287
BIO206-BCH339F	0.9975207
BIOC3321_F21-BCH339F	0.9777935
BIOC3321_F22-BCH339F	0.9671558
BIOC431-BCH339F	0.9999998
Expert-BCH339F	0.0032583
BCH369-BCH339M	0.5672256
BIO206-BCH339M	0.9757107
BIOC3321_F21-BCH339M	0.8611781
BIOC3321_F22-BCH339M	0.8411231
BIOC431-BCH339M	0.9963071
Expert-BCH339M	0.0121561
BIO206-BCH369	1.0000000
BIOC3321_F21-BCH369	1.0000000
BIOC3321_F22-BCH369	1.0000000
BIOC431-BCH369	0.7936262
Expert-BCH369	0.0001522
BIOC3321_F21-BIO206	1.0000000
BIOC3321_F22-BIO206	1.0000000
BIOC431-BIO206	0.9988230
Expert-BIO206	0.0050687
BIOC3321_F22-BIOC3321_F21	0.9999999
BIOC431-BIOC3321_F21	0.9856284
Expert-BIOC3321_F21	0.0005597
BIOC431-BIOC3321_F22	0.9771820
Expert-BIOC3321_F22	0.0005873
Expert-BIOC431	0.0017943

plotAndTable(allBiochem,"race_binary","NS","NS: White/Non-white","NS")

Statistics of NS based on the category race_binary
	group1	n	mean	sd	median	min	max	range
X11	Non-white	472	0.25	0.09	0.24	0.03	0.62	0.59
X12	White	295	0.26	0.09	0.26	0.04	0.71	0.67

Anova: NS: White/Non-white
Testing statistical significance: p-values
0.0241297

plotAndTable(allBiochem,"Sex_birth","NS","NS: Sex","NS")

Statistics of NS based on the category Sex_birth
	group1	n	mean	sd	median	min	max	range
X11	Female	536	0.25	0.09	0.25	0.03	0.71	0.68
X12	Male	231	0.26	0.09	0.26	0.03	0.57	0.54

Anova: NS: Sex
Testing statistical significance: p-values
0.3837504

4.3 PLC/NS clustering

The problem with clustering is that it is an iterative method and different “initial seeds” will yield to different results. It is only reproducible when the k-means method uses “set.seed(42)”

plotAndTable(allBiochem,"clusterLetter","PLC","PLC: Cluster letter","PLC")

Statistics of PLC based on the category clusterLetter
	group1	n	mean	sd	median	min	max	range
X11	Expert	8	0.72	0.09	0.70	0.59	0.82	0.23
X12	HP	205	0.54	0.10	0.55	0.23	0.80	0.57
X13	IP	407	0.47	0.09	0.47	0.27	0.70	0.43
X14	LP	156	0.16	0.13	0.17	-0.28	0.34	0.62

Anova: PLC: Cluster letter
	Testing statistical significance: p-values
HP-Expert	1.46e-05
IP-Expert	0.00e+00
LP-Expert	0.00e+00
IP-HP	0.00e+00
LP-HP	0.00e+00
LP-IP	0.00e+00

4.3.1 Analysis by course

plotBarAndCorr(allBiochem,"Course_collected","clusterLetter","Course","N of students","High, Intermediate, Low Performance cluster")

The Chi-square analysis gives a p= 0.0192

Residuals analysis:

A negative residual implies that the measured value is lower than expected and a positive value higher than expected

markerIntegers = as.integer(as.factor(allBiochem$Course_collected))
plot(allBiochem$PLC,allBiochem$NS,pch=allBiochem$clusterLetter,main = "ES Glucosidase - High(H), Intermediate(I), Low(L) performers",ylab="NS",xlab="PLC",col=markerIntegers)
legend("topleft", legend=unique(allBiochem$Course_collected), col=unique(markerIntegers), lty=1:1, cex=0.8)

calcStats(allBiochem,"Course_collected")

Results for category: BIOC3321_F21

		High Performers		Intermediate Performers		Low Performers
BIOC3321_F21	Total N= 58	26 %		57 %		17 %
	Sex: males N= 18 ; females N= 40	male	female	male	female	male	female
	Sex: males N= 18 ; females N= 40	28 %	25 %	56 %	57 %	17 %	18 %
	Race: White N= 32 ; Non-white N= 26	white	non-white	white	non-white	white	non-white
	Race: White N= 32 ; Non-white N= 26	34 %	15 %	53 %	62 %	12 %	23 %

Results for category: BIOC3321_F22

		High Performers		Intermediate Performers		Low Performers
BIOC3321_F22	Total N= 43	19 %		74 %		7 %
	Sex: males N= 5 ; females N= 38	male	female	male	female	male	female
	Sex: males N= 5 ; females N= 38	20 %	18 %	60 %	76 %	20 %	5.3 %
	Race: White N= 28 ; Non-white N= 15	white	non-white	white	non-white	white	non-white
	Race: White N= 28 ; Non-white N= 15	25 %	6.7 %	71 %	80 %	3.6 %	13 %

Results for category: BIOC431

		High Performers		Intermediate Performers		Low Performers
BIOC431	Total N= 106	33 %		48 %		19 %
	Sex: males N= 42 ; females N= 64	male	female	male	female	male	female
	Sex: males N= 42 ; females N= 64	38 %	30 %	50 %	47 %	12 %	23 %
	Race: White N= 80 ; Non-white N= 26	white	non-white	white	non-white	white	non-white
	Race: White N= 80 ; Non-white N= 26	35 %	27 %	45 %	58 %	20 %	15 %

Results for category: BCH339F

		High Performers		Intermediate Performers		Low Performers
BCH339F	Total N= 67	27 %		58 %		15 %
	Sex: males N= 26 ; females N= 41	male	female	male	female	male	female
	Sex: males N= 26 ; females N= 41	31 %	24 %	58 %	59 %	12 %	17 %
	Race: White N= 19 ; Non-white N= 48	white	non-white	white	non-white	white	non-white
	Race: White N= 19 ; Non-white N= 48	42 %	21 %	53 %	60 %	5.3 %	19 %

Results for category: BCH339M

		High Performers		Intermediate Performers		Low Performers
BCH339M	Total N= 45	31 %		47 %		22 %
	Sex: males N= 14 ; females N= 31	male	female	male	female	male	female
	Sex: males N= 14 ; females N= 31	29 %	32 %	50 %	45 %	21 %	23 %
	Race: White N= 11 ; Non-white N= 34	white	non-white	white	non-white	white	non-white
	Race: White N= 11 ; Non-white N= 34	18 %	35 %	45 %	47 %	36 %	18 %

Results for category: BCH369

		High Performers		Intermediate Performers		Low Performers
BCH369	Total N= 434	26 %		52 %		22 %
	Sex: males N= 124 ; females N= 309	male	female	male	female	male	female
	Sex: males N= 124 ; females N= 309	24 %	26 %	53 %	52 %	23 %	22 %
	Race: White N= 122 ; Non-white N= 312	white	non-white	white	non-white	white	non-white
	Race: White N= 122 ; Non-white N= 312	24 %	27 %	59 %	50 %	17 %	24 %

Results for category: BIO206

		High Performers		Intermediate Performers		Low Performers
BIO206	Total N= 15	20 %		27 %		53 %
	Sex: males N= 2 ; females N= 13	male	female	male	female	male	female
	Sex: males N= 2 ; females N= 13	50 %	15 %	0 %	31 %	50 %	54 %
	Race: White N= 3 ; Non-white N= 12	white	non-white	white	non-white	white	non-white
	Race: White N= 3 ; Non-white N= 12	0 %	25 %	33 %	25 %	67 %	50 %

5 All Biochem: Nucleic Acids

5.1 PLC only: Anova

We are comparing how the PLC score is significantly different among the different categories “Course collected”, “Student year”, “White/Non-white”, and “Sex at birth”

#
allBiochem = analyzeUMRCourses(allBioc3)
allBiochem = addExperts(allBiochem,exs3)
#buildTables(allBiochem)
plotAndTable(allBiochem,"Course_collected","PLC","PLC: Course","PLC")

Statistics of PLC based on the category Course_collected
	group1	n	mean	sd	median	min	max	range
X11	BCH339F	54	0.21	0.17	0.24	-0.33	0.59	0.93
X12	BCH339M	39	0.24	0.20	0.27	-0.23	0.61	0.84
X13	BCH369	140	0.16	0.17	0.15	-0.29	0.92	1.21
X14	BIO206	8	0.13	0.12	0.14	-0.03	0.31	0.35
X15	BIOC3321_F21	53	0.14	0.15	0.14	-0.17	0.56	0.72
X16	BIOC3321_F22	36	0.16	0.12	0.15	-0.08	0.39	0.47
X17	BIOC431	105	0.19	0.15	0.19	-0.18	0.60	0.78
X18	Expert	7	0.71	0.08	0.69	0.60	0.82	0.22

Anova: PLC: Course
	Testing statistical significance: p-values
BCH339M-BCH339F	0.9935714
BCH369-BCH339F	0.3958337
BIO206-BCH339F	0.8735208
BIOC3321_F21-BCH339F	0.3778839
BIOC3321_F22-BCH339F	0.7614683
BIOC431-BCH339F	0.9972204
Expert-BCH339F	0.0000000
BCH369-BCH339M	0.0967327
BIO206-BCH339M	0.6488155
BIOC3321_F21-BCH339M	0.1060527
BIOC3321_F22-BCH339M	0.3594868
BIOC431-BCH339M	0.8060640
Expert-BCH339M	0.0000000
BIO206-BCH369	0.9997373
BIOC3321_F21-BCH369	0.9997653
BIOC3321_F22-BCH369	1.0000000
BIOC431-BCH369	0.6543179
Expert-BCH369	0.0000000
BIOC3321_F21-BIO206	0.9999960
BIOC3321_F22-BIO206	0.9998297
BIOC431-BIO206	0.9582419
Expert-BIO206	0.0000000
BIOC3321_F22-BIOC3321_F21	0.9999632
BIOC431-BIOC3321_F21	0.6251432
Expert-BIOC3321_F21	0.0000000
BIOC431-BIOC3321_F22	0.9419214
Expert-BIOC3321_F22	0.0000000
Expert-BIOC431	0.0000000

plotAndTable(allBiochem,"race_binary","PLC","PLC: White/Non-white","PLC")

Statistics of PLC based on the category race_binary
	group1	n	mean	sd	median	min	max	range
X11	Non-white	229	0.16	0.16	0.17	-0.33	0.59	0.93
X12	White	206	0.20	0.16	0.19	-0.24	0.92	1.16

Anova: PLC: White/Non-white
Testing statistical significance: p-values
0.0050469

plotAndTable(allBiochem,"Sex_birth","PLC","PLC: Sex","PLC")

Statistics of PLC based on the category Sex_birth
	group1	n	mean	sd	median	min	max	range
X11	Female	295	0.17	0.15	0.18	-0.23	0.92	1.15
X12	Male	140	0.19	0.18	0.17	-0.33	0.60	0.94

Anova: PLC: Sex
Testing statistical significance: p-values
0.4114192

5.2 NS only: Anova

plotAndTable(allBiochem,"Course_collected","NS","NS: Course","NS")

Statistics of NS based on the category Course_collected
	group1	n	mean	sd	median	min	max	range
X11	BCH339F	54	0.20	0.08	0.19	0.04	0.44	0.40
X12	BCH339M	39	0.20	0.09	0.20	0.04	0.42	0.38
X13	BCH369	140	0.17	0.08	0.17	0.04	0.53	0.49
X14	BIO206	8	0.15	0.06	0.15	0.09	0.28	0.18
X15	BIOC3321_F21	53	0.17	0.08	0.15	0.00	0.35	0.35
X16	BIOC3321_F22	36	0.17	0.07	0.18	0.03	0.30	0.27
X17	BIOC431	105	0.21	0.09	0.21	0.04	0.50	0.46
X18	Expert	7	0.43	0.08	0.44	0.33	0.53	0.20

Anova: NS: Course
	Testing statistical significance: p-values
BCH339M-BCH339F	1.0000000
BCH369-BCH339F	0.3298772
BIO206-BCH339F	0.8215999
BIOC3321_F21-BCH339F	0.5291601
BIOC3321_F22-BCH339F	0.6943732
BIOC431-BCH339F	0.9994788
Expert-BCH339F	0.0000000
BCH369-BCH339M	0.4155710
BIO206-BCH339M	0.8108479
BIOC3321_F21-BCH339M	0.5697093
BIOC3321_F22-BCH339M	0.7091461
BIOC431-BCH339M	0.9999622
Expert-BCH339M	0.0000000
BIO206-BCH369	0.9993766
BIOC3321_F21-BCH369	1.0000000
BIOC3321_F22-BCH369	1.0000000
BIOC431-BCH369	0.0131812
Expert-BCH369	0.0000000
BIOC3321_F21-BIO206	0.9997178
BIOC3321_F22-BIO206	0.9996690
BIOC431-BIO206	0.6444092
Expert-BIO206	0.0000000
BIOC3321_F22-BIOC3321_F21	1.0000000
BIOC431-BIOC3321_F21	0.1115039
Expert-BIOC3321_F21	0.0000000
BIOC431-BIOC3321_F22	0.2722941
Expert-BIOC3321_F22	0.0000000
Expert-BIOC431	0.0000000

plotAndTable(allBiochem,"race_binary","NS","NS: White/Non-white","NS")

Statistics of NS based on the category race_binary
	group1	n	mean	sd	median	min	max	range
X11	Non-white	229	0.18	0.08	0.17	0.03	0.44	0.41
X12	White	206	0.20	0.09	0.19	0.00	0.53	0.53

Anova: NS: White/Non-white
Testing statistical significance: p-values
0.0129279

plotAndTable(allBiochem,"Sex_birth","NS","NS: Sex","NS")

Statistics of NS based on the category Sex_birth
	group1	n	mean	sd	median	min	max	range
X11	Female	295	0.18	0.08	0.18	0.00	0.44	0.44
X12	Male	140	0.19	0.10	0.17	0.04	0.53	0.50

Anova: NS: Sex
Testing statistical significance: p-values
0.6659064

5.3 PLC/NS clustering

The problem with clustering is that it is an iterative method and different “initial seeds” will yield to different results. It is only reproducible when the k-means method uses “set.seed(42)”

plotAndTable(allBiochem,"clusterLetter","PLC","PLC: Cluster letter","PLC")

Statistics of PLC based on the category clusterLetter
	group1	n	mean	sd	median	min	max	range
X11	Expert	7	0.71	0.08	0.69	0.60	0.82	0.22
X12	HP	81	0.37	0.11	0.36	0.16	0.61	0.45
X13	IP	176	0.24	0.10	0.23	0.03	0.92	0.89
X14	LP	178	0.03	0.10	0.05	-0.33	0.23	0.57

Anova: PLC: Cluster letter
	Testing statistical significance: p-values
HP-Expert	0
IP-Expert	0
LP-Expert	0
IP-HP	0
LP-HP	0
LP-IP	0

5.3.1 Analysis by course

plotBarAndCorr(allBiochem,"Course_collected","clusterLetter","Course","N of students","High, Intermediate, Low Performance cluster")

The Chi-square analysis gives a p= 0.07602

Residuals analysis:

A negative residual implies that the measured value is lower than expected and a positive value higher than expected

markerIntegers = as.integer(as.factor(allBiochem$Course_collected))
plot(allBiochem$PLC,allBiochem$NS,pch=allBiochem$clusterLetter,main = "Nucleic Acids - High(H), Intermediate(I), Low(L) performers",ylab="NS",xlab="PLC",col=markerIntegers)
legend("topleft", legend=unique(allBiochem$Course_collected), col=unique(markerIntegers), lty=1:1, cex=0.8)

calcStats(allBiochem,"Course_collected")

Results for category: BIOC3321_F21

		High Performers		Intermediate Performers		Low Performers
BIOC3321_F21	Total N= 53	13 %		38 %		49 %
	Sex: males N= 15 ; females N= 38	male	female	male	female	male	female
	Sex: males N= 15 ; females N= 38	13 %	13 %	47 %	34 %	40 %	53 %
	Race: White N= 29 ; Non-white N= 24	white	non-white	white	non-white	white	non-white
	Race: White N= 29 ; Non-white N= 24	10 %	17 %	45 %	29 %	45 %	54 %

Results for category: BIOC3321_F22

		High Performers		Intermediate Performers		Low Performers
BIOC3321_F22	Total N= 36	14 %		39 %		47 %
	Sex: males N= 4 ; females N= 32	male	female	male	female	male	female
	Sex: males N= 4 ; females N= 32	0 %	16 %	25 %	41 %	75 %	44 %
	Race: White N= 24 ; Non-white N= 12	white	non-white	white	non-white	white	non-white
	Race: White N= 24 ; Non-white N= 12	12 %	17 %	42 %	33 %	46 %	50 %

Results for category: BIOC431

		High Performers		Intermediate Performers		Low Performers
BIOC431	Total N= 105	27 %		38 %		35 %
	Sex: males N= 41 ; females N= 64	male	female	male	female	male	female
	Sex: males N= 41 ; females N= 64	29 %	25 %	37 %	39 %	34 %	36 %
	Race: White N= 79 ; Non-white N= 26	white	non-white	white	non-white	white	non-white
	Race: White N= 79 ; Non-white N= 26	29 %	19 %	42 %	27 %	29 %	54 %

Results for category: BCH339F

		High Performers		Intermediate Performers		Low Performers
BCH339F	Total N= 54	22 %		50 %		28 %
	Sex: males N= 20 ; females N= 34	male	female	male	female	male	female
	Sex: males N= 20 ; females N= 34	20 %	24 %	40 %	56 %	40 %	21 %
	Race: White N= 16 ; Non-white N= 38	white	non-white	white	non-white	white	non-white
	Race: White N= 16 ; Non-white N= 38	31 %	18 %	44 %	53 %	25 %	29 %

Results for category: BCH339M

		High Performers		Intermediate Performers		Low Performers
BCH339M	Total N= 39	28 %		38 %		33 %
	Sex: males N= 14 ; females N= 25	male	female	male	female	male	female
	Sex: males N= 14 ; females N= 25	21 %	32 %	29 %	44 %	50 %	24 %
	Race: White N= 9 ; Non-white N= 30	white	non-white	white	non-white	white	non-white
	Race: White N= 9 ; Non-white N= 30	44 %	23 %	22 %	43 %	33 %	33 %

Results for category: BCH369

		High Performers		Intermediate Performers		Low Performers
BCH369	Total N= 140	13 %		41 %		46 %
	Sex: males N= 45 ; females N= 95	male	female	male	female	male	female
	Sex: males N= 45 ; females N= 95	22 %	8.4 %	24 %	48 %	53 %	43 %
	Race: White N= 48 ; Non-white N= 92	white	non-white	white	non-white	white	non-white
	Race: White N= 48 ; Non-white N= 92	19 %	9.8 %	38 %	42 %	44 %	48 %

Results for category: BIO206

		High Performers		Intermediate Performers		Low Performers
BIO206	Total N= 8	0 %		38 %		62 %
	Sex: males N= 1 ; females N= 7	male	female	male	female	male	female
	Sex: males N= 1 ; females N= 7	0 %	0 %	100 %	29 %	0 %	71 %
	Race: White N= 1 ; Non-white N= 7	white	non-white	white	non-white	white	non-white
	Race: White N= 1 ; Non-white N= 7	0 %	0 %	0 %	43 %	100 %	57 %

6 All Biochem: Oxygen Binding

6.1 PLC only: Anova

We are comparing how the PLC score is significantly different among the different categories “Course collected”, “Student year”, “White/Non-white”, and “Sex at birth”

#
allBiochem = analyzeUMRCourses(allBioc4)
allBiochem = addExperts(allBiochem,exs4)
#buildTables(allBiochem)
plotAndTable(allBiochem,"Course_collected","PLC","PLC: Course","PLC")

Statistics of PLC based on the category Course_collected
	group1	n	mean	sd	median	min	max	range
X11	BCH339F	53	0.18	0.15	0.19	-0.18	0.67	0.84
X12	BCH339M	32	0.19	0.17	0.19	-0.32	0.50	0.82
X13	BCH369	123	0.18	0.14	0.18	-0.21	0.53	0.74
X14	BIOC3321_F21	53	0.22	0.26	0.18	-0.25	0.86	1.11
X15	BIOC3321_F22	37	0.20	0.13	0.20	-0.04	0.42	0.46
X16	BIOC431	110	0.18	0.16	0.19	-0.19	0.57	0.76
X17	Expert	15	0.69	0.13	0.66	0.52	0.89	0.38

Anova: PLC: Course
	Testing statistical significance: p-values
BCH339M-BCH339F	0.9999523
BCH369-BCH339F	0.9999997
BIOC3321_F21-BCH339F	0.9136631
BIOC3321_F22-BCH339F	0.9999127
BIOC431-BCH339F	0.9999979
Expert-BCH339F	0.0000000
BCH369-BCH339M	0.9995189
BIOC3321_F21-BCH339M	0.9922864
BIOC3321_F22-BCH339M	1.0000000
BIOC431-BCH339M	0.9992457
Expert-BCH339M	0.0000000
BIOC3321_F21-BCH369	0.7574619
BIOC3321_F22-BCH369	0.9991373
BIOC431-BCH369	1.0000000
Expert-BCH369	0.0000000
BIOC3321_F22-BIOC3321_F21	0.9914685
BIOC431-BIOC3321_F21	0.7440660
Expert-BIOC3321_F21	0.0000000
BIOC431-BIOC3321_F22	0.9986926
Expert-BIOC3321_F22	0.0000000
Expert-BIOC431	0.0000000

plotAndTable(allBiochem,"race_binary","PLC","PLC: White/Non-white","PLC")

Statistics of PLC based on the category race_binary
	group1	n	mean	sd	median	min	max	range
X11	Non-white	226	0.18	0.17	0.18	-0.32	0.86	1.17
X12	White	182	0.20	0.17	0.19	-0.19	0.69	0.88

Anova: PLC: White/Non-white
Testing statistical significance: p-values
0.260965

plotAndTable(allBiochem,"Sex_birth","PLC","PLC: Sex","PLC")

Statistics of PLC based on the category Sex_birth
	group1	n	mean	sd	median	min	max	range
X11	Female	275	0.19	0.16	0.18	-0.25	0.80	1.05
X12	Male	133	0.19	0.18	0.19	-0.32	0.86	1.17

Anova: PLC: Sex
Testing statistical significance: p-values
0.7977316

6.2 NS only: Anova

plotAndTable(allBiochem,"Course_collected","NS","NS: Course","NS")

Statistics of NS based on the category Course_collected
	group1	n	mean	sd	median	min	max	range
X11	BCH339F	53	0.20	0.09	0.19	0.05	0.45	0.40
X12	BCH339M	32	0.18	0.07	0.18	0.04	0.33	0.29
X13	BCH369	123	0.18	0.07	0.17	0.00	0.35	0.35
X14	BIOC3321_F21	53	0.21	0.09	0.21	0.06	0.44	0.38
X15	BIOC3321_F22	37	0.17	0.06	0.17	0.04	0.30	0.26
X16	BIOC431	110	0.19	0.07	0.18	0.00	0.38	0.38
X17	Expert	15	0.35	0.09	0.35	0.25	0.53	0.28

Anova: NS: Course
	Testing statistical significance: p-values
BCH339M-BCH339F	0.9243116
BCH369-BCH339F	0.2818569
BIOC3321_F21-BCH339F	0.9994112
BIOC3321_F22-BCH339F	0.3596134
BIOC431-BCH339F	0.8045716
Expert-BCH339F	0.0000000
BCH369-BCH339M	0.9970343
BIOC3321_F21-BCH339M	0.7461135
BIOC3321_F22-BCH339M	0.9823159
BIOC431-BCH339M	1.0000000
Expert-BCH339M	0.0000000
BIOC3321_F21-BCH369	0.0875734
BIOC3321_F22-BCH369	0.9994760
BIOC431-BCH369	0.9505337
Expert-BCH369	0.0000000
BIOC3321_F22-BIOC3321_F21	0.1638216
BIOC431-BIOC3321_F21	0.4739195
Expert-BIOC3321_F21	0.0000000
BIOC431-BIOC3321_F22	0.9184516
Expert-BIOC3321_F22	0.0000000
Expert-BIOC431	0.0000000

plotAndTable(allBiochem,"race_binary","NS","NS: White/Non-white","NS")

Statistics of NS based on the category race_binary
	group1	n	mean	sd	median	min	max	range
X11	Non-white	226	0.18	0.08	0.18	0	0.45	0.45
X12	White	182	0.19	0.07	0.19	0	0.44	0.44

Anova: NS: White/Non-white
Testing statistical significance: p-values
0.5478315

plotAndTable(allBiochem,"Sex_birth","NS","NS: Sex","NS")

Statistics of NS based on the category Sex_birth
	group1	n	mean	sd	median	min	max	range
X11	Female	275	0.19	0.07	0.18	0	0.45	0.45
X12	Male	133	0.19	0.08	0.19	0	0.44	0.44

Anova: NS: Sex
Testing statistical significance: p-values
0.4978964

6.3 PLC/NS clustering

The problem with clustering is that it is an iterative method and different “initial seeds” will yield to different results. It is only reproducible when the k-means method uses “set.seed(42)”

plotAndTable(allBiochem,"clusterLetter","PLC","PLC: Cluster letter","PLC")

Statistics of PLC based on the category clusterLetter
	group1	n	mean	sd	median	min	max	range
X11	Expert	15	0.69	0.13	0.66	0.52	0.89	0.38
X12	HP	78	0.38	0.15	0.36	0.09	0.86	0.77
X13	IP	195	0.23	0.09	0.22	0.04	0.53	0.49
X14	LP	135	0.01	0.09	0.02	-0.32	0.18	0.50

Anova: PLC: Cluster letter
	Testing statistical significance: p-values
HP-Expert	0
IP-Expert	0
LP-Expert	0
IP-HP	0
LP-HP	0
LP-IP	0

6.3.1 Analysis by course

plotBarAndCorr(allBiochem,"Course_collected","clusterLetter","Course","N of students","High, Intermediate, Low Performance cluster")

The Chi-square analysis gives a p= 0.02553

Residuals analysis:

A negative residual implies that the measured value is lower than expected and a positive value higher than expected

markerIntegers = as.integer(as.factor(allBiochem$Course_collected))
plot(allBiochem$PLC,allBiochem$NS,pch=allBiochem$clusterLetter,main = "Oxygen Binding - High(H), Intermediate(I), Low(L) performers",ylab="NS",xlab="PLC",col=markerIntegers)
legend("topleft", legend=unique(allBiochem$Course_collected), col=unique(markerIntegers), lty=1:1, cex=0.8)

calcStats(allBiochem,"Course_collected")

Results for category: BIOC3321_F21

		High Performers		Intermediate Performers		Low Performers
BIOC3321_F21	Total N= 53	34 %		25 %		42 %
	Sex: males N= 16 ; females N= 37	male	female	male	female	male	female
	Sex: males N= 16 ; females N= 37	44 %	30 %	31 %	22 %	25 %	49 %
	Race: White N= 29 ; Non-white N= 24	white	non-white	white	non-white	white	non-white
	Race: White N= 29 ; Non-white N= 24	34 %	33 %	34 %	12 %	31 %	54 %

Results for category: BIOC3321_F22

		High Performers		Intermediate Performers		Low Performers
BIOC3321_F22	Total N= 37	8.1 %		62 %		30 %
	Sex: males N= 4 ; females N= 33	male	female	male	female	male	female
	Sex: males N= 4 ; females N= 33	0 %	9.1 %	75 %	61 %	25 %	30 %
	Race: White N= 24 ; Non-white N= 13	white	non-white	white	non-white	white	non-white
	Race: White N= 24 ; Non-white N= 13	8.3 %	7.7 %	71 %	46 %	21 %	46 %

Results for category: BIOC431

		High Performers		Intermediate Performers		Low Performers
BIOC431	Total N= 110	18 %		49 %		33 %
	Sex: males N= 41 ; females N= 69	male	female	male	female	male	female
	Sex: males N= 41 ; females N= 69	20 %	17 %	46 %	51 %	34 %	32 %
	Race: White N= 84 ; Non-white N= 26	white	non-white	white	non-white	white	non-white
	Race: White N= 84 ; Non-white N= 26	19 %	15 %	48 %	54 %	33 %	31 %

Results for category: BCH339F

		High Performers		Intermediate Performers		Low Performers
BCH339F	Total N= 53	25 %		47 %		28 %
	Sex: males N= 19 ; females N= 34	male	female	male	female	male	female
	Sex: males N= 19 ; females N= 34	16 %	29 %	53 %	44 %	32 %	26 %
	Race: White N= 14 ; Non-white N= 39	white	non-white	white	non-white	white	non-white
	Race: White N= 14 ; Non-white N= 39	21 %	26 %	43 %	49 %	36 %	26 %

Results for category: BCH339M

		High Performers		Intermediate Performers		Low Performers
BCH339M	Total N= 32	19 %		50 %		31 %
	Sex: males N= 11 ; females N= 21	male	female	male	female	male	female
	Sex: males N= 11 ; females N= 21	27 %	14 %	45 %	52 %	27 %	33 %
	Race: White N= 8 ; Non-white N= 24	white	non-white	white	non-white	white	non-white
	Race: White N= 8 ; Non-white N= 24	25 %	17 %	50 %	50 %	25 %	33 %

Results for category: BCH369

		High Performers		Intermediate Performers		Low Performers
BCH369	Total N= 123	15 %		52 %		33 %
	Sex: males N= 42 ; females N= 81	male	female	male	female	male	female
	Sex: males N= 42 ; females N= 81	14 %	15 %	50 %	53 %	36 %	32 %
	Race: White N= 23 ; Non-white N= 100	white	non-white	white	non-white	white	non-white
	Race: White N= 23 ; Non-white N= 100	13 %	15 %	52 %	52 %	35 %	33 %

7 All Biochem: Protein Structure

7.1 PLC only: Anova

We are comparing how the PLC score is significantly different among the different categories “Course collected”, “Student year”, “White/Non-white”, and “Sex at birth”

#
allBiochem = analyzeUMRCourses(allBioc5)
allBiochem = addExperts(allBiochem,exs5)
#buildTables(allBiochem)
plotAndTable(allBiochem,"Course_collected","PLC","PLC: Course","PLC")

Statistics of PLC based on the category Course_collected
	group1	n	mean	sd	median	min	max	range
X11	BCH339F	60	0.17	0.08	0.17	0.04	0.47	0.44
X12	BCH339M	39	0.29	0.21	0.30	-0.20	0.80	1.00
X13	BCH369	133	0.19	0.17	0.20	-0.24	0.58	0.83
X14	BIO206	11	0.21	0.11	0.25	0.02	0.35	0.33
X15	BIOC3321_F21	51	0.26	0.15	0.29	-0.13	0.54	0.67
X16	BIOC3321_F22	35	0.27	0.13	0.27	0.01	0.51	0.51
X17	BIOC431	102	0.32	0.20	0.35	-0.12	0.78	0.90
X18	Expert	7	0.76	0.10	0.79	0.59	0.89	0.30

Anova: PLC: Course
	Testing statistical significance: p-values
BCH339M-BCH339F	0.0168274
BCH369-BCH339F	0.9961941
BIO206-BCH339F	0.9964481
BIOC3321_F21-BCH339F	0.1383356
BIOC3321_F22-BCH339F	0.1466410
BIOC431-BCH339F	0.0000025
Expert-BCH339F	0.0000000
BCH369-BCH339M	0.0298873
BIO206-BCH339M	0.8757653
BIOC3321_F21-BCH339M	0.9854992
BIOC3321_F22-BCH339M	0.9989217
BIOC431-BCH339M	0.9760226
Expert-BCH339M	0.0000000
BIO206-BCH369	0.9999347
BIOC3321_F21-BCH369	0.2492654
BIOC3321_F22-BCH369	0.2666801
BIOC431-BCH369	0.0000002
Expert-BCH369	0.0000000
BIOC3321_F21-BIO206	0.9922509
BIOC3321_F22-BIO206	0.9819973
BIOC431-BIO206	0.4518129
Expert-BIO206	0.0000000
BIOC3321_F22-BIOC3321_F21	0.9999973
BIOC431-BIOC3321_F21	0.3494096
Expert-BIOC3321_F21	0.0000000
BIOC431-BIOC3321_F22	0.7147123
Expert-BIOC3321_F22	0.0000000
Expert-BIOC431	0.0000000

plotAndTable(allBiochem,"race_binary","PLC","PLC: White/Non-white","PLC")

Statistics of PLC based on the category race_binary
	group1	n	mean	sd	median	min	max	range
X11	Non-white	238	0.22	0.17	0.22	-0.24	0.80	1.05
X12	White	192	0.27	0.18	0.27	-0.15	0.76	0.91

Anova: PLC: White/Non-white
Testing statistical significance: p-values
0.0031158

plotAndTable(allBiochem,"Sex_birth","PLC","PLC: Sex","PLC")

Statistics of PLC based on the category Sex_birth
	group1	n	mean	sd	median	min	max	range
X11	Female	300	0.24	0.16	0.26	-0.24	0.78	1.02
X12	Male	130	0.24	0.19	0.23	-0.20	0.80	1.00

Anova: PLC: Sex
Testing statistical significance: p-values
0.9303274

7.2 NS only: Anova

plotAndTable(allBiochem,"Course_collected","NS","NS: Course","NS")

Statistics of NS based on the category Course_collected
	group1	n	mean	sd	median	min	max	range
X11	BCH339F	60	0.36	0.30	0.40	-0.29	0.84	1.13
X12	BCH339M	39	0.18	0.08	0.18	0.03	0.38	0.35
X13	BCH369	133	0.15	0.07	0.15	0.00	0.38	0.38
X14	BIO206	11	0.18	0.05	0.19	0.07	0.24	0.17
X15	BIOC3321_F21	51	0.17	0.07	0.17	0.04	0.33	0.29
X16	BIOC3321_F22	35	0.18	0.06	0.17	0.08	0.42	0.34
X17	BIOC431	102	0.18	0.08	0.18	0.04	0.40	0.36
X18	Expert	7	0.35	0.08	0.35	0.24	0.44	0.21

Anova: NS: Course
	Testing statistical significance: p-values
BCH339M-BCH339F	0.0000000
BCH369-BCH339F	0.0000000
BIO206-BCH339F	0.0004226
BIOC3321_F21-BCH339F	0.0000000
BIOC3321_F22-BCH339F	0.0000000
BIOC431-BCH339F	0.0000000
Expert-BCH339F	0.9999811
BCH369-BCH339M	0.9162571
BIO206-BCH339M	1.0000000
BIOC3321_F21-BCH339M	0.9999193
BIOC3321_F22-BCH339M	1.0000000
BIOC431-BCH339M	0.9999995
Expert-BCH339M	0.0472545
BIO206-BCH369	0.9990061
BIOC3321_F21-BCH369	0.9891985
BIOC3321_F22-BCH369	0.9428209
BIOC431-BCH369	0.8328689
Expert-BCH369	0.0036271
BIOC3321_F21-BIO206	1.0000000
BIOC3321_F22-BIO206	1.0000000
BIOC431-BIO206	1.0000000
Expert-BIO206	0.1279153
BIOC3321_F22-BIOC3321_F21	0.9999646
BIOC431-BIOC3321_F21	0.9999922
Expert-BIOC3321_F21	0.0207016
BIOC431-BIOC3321_F22	0.9999999
Expert-BIOC3321_F22	0.0484295
Expert-BIOC431	0.0220915

plotAndTable(allBiochem,"race_binary","NS","NS: White/Non-white","NS")

Statistics of NS based on the category race_binary
	group1	n	mean	sd	median	min	max	range
X11	Non-white	238	0.20	0.16	0.17	-0.29	0.84	1.13
X12	White	192	0.19	0.12	0.18	-0.19	0.79	0.97

Anova: NS: White/Non-white
Testing statistical significance: p-values
0.6012536

plotAndTable(allBiochem,"Sex_birth","NS","NS: Sex","NS")

Statistics of NS based on the category Sex_birth
	group1	n	mean	sd	median	min	max	range
X11	Female	300	0.20	0.14	0.17	-0.29	0.80	1.08
X12	Male	130	0.19	0.16	0.17	-0.19	0.84	1.03

Anova: NS: Sex
Testing statistical significance: p-values
0.8330036

7.3 PLC/NS clustering

The problem with clustering is that it is an iterative method and different “initial seeds” will yield to different results. It is only reproducible when the k-means method uses “set.seed(42)”

plotAndTable(allBiochem,"clusterLetter","PLC","PLC: Cluster letter","PLC")

Statistics of PLC based on the category clusterLetter
	group1	n	mean	sd	median	min	max	range
X11	Expert	7	0.76	0.10	0.79	0.59	0.89	0.30
X12	HP	221	0.37	0.12	0.36	0.19	0.80	0.62
X13	IP	33	0.19	0.07	0.21	0.04	0.29	0.25
X14	LP	177	0.09	0.11	0.11	-0.24	0.32	0.56

Anova: PLC: Cluster letter
	Testing statistical significance: p-values
HP-Expert	0.0e+00
IP-Expert	0.0e+00
LP-Expert	0.0e+00
IP-HP	0.0e+00
LP-HP	0.0e+00
LP-IP	5.9e-06

7.3.1 Analysis by course

plotBarAndCorr(allBiochem,"Course_collected","clusterLetter","Course","N of students","High, Intermediate, Low Performance cluster")

The Chi-square analysis gives a p= 0

Residuals analysis:

A negative residual implies that the measured value is lower than expected and a positive value higher than expected

markerIntegers = as.integer(as.factor(allBiochem$Course_collected))
plot(allBiochem$PLC,allBiochem$NS,pch=allBiochem$clusterLetter,main = "Protein Structure - High(H), Intermediate(I), Low(L) performers",ylab="NS",xlab="PLC",col=markerIntegers)
legend("topleft", legend=unique(allBiochem$Course_collected), col=unique(markerIntegers), lty=1:1, cex=0.8)

calcStats(allBiochem,"Course_collected")

Results for category: BIOC3321_F21

		High Performers		Intermediate Performers		Low Performers
BIOC3321_F21	Total N= 51	63 %		0 %		37 %
	Sex: males N= 15 ; females N= 36	male	female	male	female	male	female
	Sex: males N= 15 ; females N= 36	67 %	61 %	0 %	0 %	33 %	39 %
	Race: White N= 28 ; Non-white N= 23	white	non-white	white	non-white	white	non-white
	Race: White N= 28 ; Non-white N= 23	61 %	65 %	0 %	0 %	39 %	35 %

Results for category: BIOC3321_F22

		High Performers		Intermediate Performers		Low Performers
BIOC3321_F22	Total N= 35	66 %		0 %		34 %
	Sex: males N= 4 ; females N= 31	male	female	male	female	male	female
	Sex: males N= 4 ; females N= 31	50 %	68 %	0 %	0 %	50 %	32 %
	Race: White N= 23 ; Non-white N= 12	white	non-white	white	non-white	white	non-white
	Race: White N= 23 ; Non-white N= 12	65 %	67 %	0 %	0 %	35 %	33 %

Results for category: BIOC431

		High Performers		Intermediate Performers		Low Performers
BIOC431	Total N= 102	69 %		0.98 %		30 %
	Sex: males N= 41 ; females N= 61	male	female	male	female	male	female
	Sex: males N= 41 ; females N= 61	66 %	70 %	2.4 %	0 %	32 %	30 %
	Race: White N= 75 ; Non-white N= 27	white	non-white	white	non-white	white	non-white
	Race: White N= 75 ; Non-white N= 27	69 %	67 %	0 %	3.7 %	31 %	30 %

Results for category: BCH339F

		High Performers		Intermediate Performers		Low Performers
BCH339F	Total N= 60	12 %		53 %		35 %
	Sex: males N= 24 ; females N= 36	male	female	male	female	male	female
	Sex: males N= 24 ; females N= 36	12 %	11 %	46 %	58 %	42 %	31 %
	Race: White N= 16 ; Non-white N= 44	white	non-white	white	non-white	white	non-white
	Race: White N= 16 ; Non-white N= 44	19 %	9.1 %	56 %	52 %	25 %	39 %

Results for category: BCH339M

		High Performers		Intermediate Performers		Low Performers
BCH339M	Total N= 39	59 %		0 %		41 %
	Sex: males N= 13 ; females N= 26	male	female	male	female	male	female
	Sex: males N= 13 ; females N= 26	54 %	62 %	0 %	0 %	46 %	38 %
	Race: White N= 10 ; Non-white N= 29	white	non-white	white	non-white	white	non-white
	Race: White N= 10 ; Non-white N= 29	80 %	52 %	0 %	0 %	20 %	48 %

Results for category: BCH369

		High Performers		Intermediate Performers		Low Performers
BCH369	Total N= 133	45 %		0 %		55 %
	Sex: males N= 31 ; females N= 101	male	female	male	female	male	female
	Sex: males N= 31 ; females N= 101	32 %	49 %	0 %	0 %	68 %	51 %
	Race: White N= 38 ; Non-white N= 95	white	non-white	white	non-white	white	non-white
	Race: White N= 38 ; Non-white N= 95	42 %	46 %	0 %	0 %	58 %	54 %

Results for category: BIO206

		High Performers		Intermediate Performers		Low Performers
BIO206	Total N= 11	55 %		0 %		45 %
	Sex: males N= 2 ; females N= 9	male	female	male	female	male	female
	Sex: males N= 2 ; females N= 9	50 %	56 %	0 %	0 %	50 %	44 %
	Race: White N= 2 ; Non-white N= 9	white	non-white	white	non-white	white	non-white
	Race: White N= 2 ; Non-white N= 9	100 %	44 %	0 %	0 %	0 %	56 %

Comparing Biochemistry Courses in different campuses

Xavier Prat-Resina

2023-02-28

1 This dataset

2 Introduction

3 AllBiochem: ES Chemical Equation

3.1 PLC only: Anova

3.2 NS only: Anova

3.3 PLC/NS clustering

3.3.1 Analysis by course

4 All Biochem: ES Glucosidase

4.1 PLC only: Anova

4.2 NS only: Anova

4.3 PLC/NS clustering

4.3.1 Analysis by course

5 All Biochem: Nucleic Acids

5.1 PLC only: Anova

5.2 NS only: Anova

5.3 PLC/NS clustering

5.3.1 Analysis by course

6 All Biochem: Oxygen Binding

6.1 PLC only: Anova

6.2 NS only: Anova

6.3 PLC/NS clustering

6.3.1 Analysis by course

7 All Biochem: Protein Structure

7.1 PLC only: Anova

7.2 NS only: Anova

7.3 PLC/NS clustering

7.3.1 Analysis by course