The file “umr_all_preFall22.csv” is missing 2 rows of students who didn’t have PLC or NS data (not sure why) and 11 more students who I was not able to find what course they were enrolled When I merge it with “bioc3321_f22_allquestions.csv”, we write the “UMR_all_for_R_with_courses.csv”
Experts dataset: Experts_all_for_R.csv
Other campuses dataset: “Dennison_UNL_UT_all_for_R.csv”
setwd("~/Research/02b Neural Network Research UMR/Data + Analysis/Clustering_Xavier")
#umr_pref22 = read.csv("umr_all_preFall22.csv",header = TRUE)
#biocf22 = read.csv("bioc3321_f22_allquestions.csv",header = TRUE)
#biocf22$Course_collected = gsub('BIOC3321', 'Biochem 1', biocf22$Course_collected)
#biocf22$actual_year = "third_year"
#umr = rbind(umr_pref22,biocf22)
#write.csv(umr,"UMR_all_for_R_with_courses.csv",row.names=FALSE)
= read.csv("UMR_all_for_R_with_courses.csv", header = TRUE)
umr
= umr[which(umr$Course_collected == "Biochem 1" & umr$Term_collected == "Fall2021"),]
allBioc $Course_collected = gsub('Biochem 1','BIOC3321_F21',allBioc$Course_collected)
allBioc= rbind(allBioc,
allBioc which(umr$Course_collected == "Biochem 1" & umr$Term_collected == "Fall2022"),])
umr[$Course_collected = gsub('Biochem 1','BIOC3321_F22',allBioc$Course_collected)
allBioc######
= read.csv("Experts_all_for_R.csv",header = TRUE)
expert = read.csv("Dennison_UNL_UT_all_for_R.csv",header = TRUE)
other = other[which(other$Course_collected != "CHEM131"),]
other = allBioc[,c("Institution", "Survey", "Course_collected", "Deidentifier","Sex_birth","Race_ethnicity","Coherency","NS","PLC")]
allBioc = other[,c("Institution", "Survey","Course_collected", "Deidentifier","Sex_birth","Race_ethnicity","Coherency","NS","PLC")]
other #other = na.omit(other)
= other[!is.na(other$PLC),]
other = rbind(allBioc,other)
allBioc $actual_year = "Whatever"
allBioc
= expert[which(expert$Survey=="ES_Chemical_Equation"),]
exs1 = expert[which(expert$Survey=="ES_Glucosidase"),]
exs2 = expert[which(expert$Survey=="Nucleic_Acids"),]
exs3 = expert[which(expert$Survey=="Oxygen_Binding"),]
exs4 = expert[which(expert$Survey=="Protein_Strcuture"),]
exs5
= allBioc[which(allBioc$Survey=="ES_Chemical_Reaction"),]
allBioc1 = allBioc[which(allBioc$Survey=="ES_Glucosidase"),]
allBioc2 = allBioc[which(allBioc$Survey=="Nucleic_Acids"),]
allBioc3 = allBioc[which(allBioc$Survey=="Oxygen_Binding"),]
allBioc4 = allBioc[which(allBioc$Survey=="Protein_Structure"),] allBioc5
library(psych)
= function(umrs1){
analyzeUMRCourses = umrs1[,c("Institution", "Course_collected", "Deidentifier","Sex_birth","Race_ethnicity","Coherency","NS","actual_year","PLC")]
allBiochem $Coherency = as.numeric(allBiochem$Coherency)
allBiochem$NS = as.numeric(allBiochem$NS)
allBiochem$PLC = as.numeric(allBiochem$PLC)
allBiochem$race_binary <- ifelse(allBiochem$Race_ethnicity == "White/Caucasian" , 'White', "Non-white")
allBiochem
#Cluster. Setting one seed, whatever
set.seed(42)
<- matrix(data=c(allBiochem$PLC,allBiochem$NS),ncol=2)
df $cluster = kmeans(scale(df[,1:2]),3)$cluster
allBiochem
#this is clumsy but I have to programmatically find the cluster number corresponding to HP, LP, and IP
#Using the PLC to make sure its working
= describeBy(allBiochem$PLC,allBiochem$cluster,mat=TRUE)
meanPLCbyCluster = max(meanPLCbyCluster$mean)
maxPLC = as.numeric(meanPLCbyCluster[which(meanPLCbyCluster$mean==maxPLC),]$group1)
HPgroup = min(meanPLCbyCluster$mean)
minPLC = as.numeric(meanPLCbyCluster[which(meanPLCbyCluster$mean==minPLC),]$group1)
LPgroup if (HPgroup + LPgroup == 3 ){IPgroup = 3}
if (HPgroup + LPgroup == 4 ){IPgroup = 2}
if (HPgroup + LPgroup == 5 ){IPgroup = 1}
$clusterLetter = ifelse(allBiochem$cluster == HPgroup, "HP",
allBiochemifelse(allBiochem$cluster == LPgroup,"LP",
ifelse(allBiochem$cluster == IPgroup,"IP","Oops")))
#allBiochem$Course_collected = factor(allBiochem$Course_collected,levels = c(
# "Gen + Organic 1","O Chem 1","O Chem 2","Gen Chem 2","Biochem 1","Biochem 2"))
return(allBiochem)
}
= function(allBiochem){
buildTables <-describeBy(allBiochem$PLC,allBiochem$clusterLetter,mat=TRUE,digits = 2)
mataprint(knitr::kable(mata[,c(2,4,5,6,7,8,9,10,11,12)] , caption = "PLC by cluster group"))
<-describeBy(allBiochem$PLC,allBiochem$Institution,mat=TRUE,digits = 2)
mataprint(knitr::kable(mata[,c(2,4,5,6,7,8,9,10,11,12)] , caption = "PLC by institution"))
<-describeBy(allBiochem$PLC,allBiochem$actual_year,mat=TRUE,digits = 2)
mataprint(knitr::kable(mata[,c(2,4,5,6,7,8,9,10,11,12)] , caption = "PLC by Actual Year"))
<-describeBy(allBiochem$PLC,allBiochem$Course_collected,mat=TRUE,digits = 2)
mataprint(knitr::kable(mata[,c(2,4,5,6,7,8,9,10,11,12)] , caption = "PLC by course"))
<-describeBy(allBiochem$PLC,allBiochem$Sex_birth,mat=TRUE,digits = 2)
mataprint(knitr::kable(mata[,c(2,4,5,6,7,8,9,10,11,12)] , caption = "PLC by Sex"))
<-describeBy(allBiochem$PLC,allBiochem$race_binary,mat=TRUE,digits = 2)
mataprint(knitr::kable(mata[,c(2,4,5,6,7,8,9,10,11,12)] , caption = "PLC by Race"))
}= function(allBiochem,mycategory){
calcStats #using the term course as a generic category
for (course in unique(allBiochem$Course_collected)){
if ( course == "Expert") next
= paste("<b>Results for category: ",course,"</b></br></br>")
header cat(header)
= sum(allBiochem$Course_collected == course )
umrTot= sum(allBiochem$Course_collected == course & allBiochem$clusterLetter == "HP")
umrHP = sum(allBiochem$Course_collected == course & allBiochem$clusterLetter == "IP")
umrIP = sum(allBiochem$Course_collected == course & allBiochem$clusterLetter == "LP")
umrLP
= sum(allBiochem$Course_collected == course & allBiochem$Sex_birth == "Male")
umrMale = sum(allBiochem$Course_collected == course & allBiochem$Sex_birth == "Male" & allBiochem$clusterLetter == "HP")
umrHPmale = sum(allBiochem$Course_collected == course & allBiochem$Sex_birth == "Male" & allBiochem$clusterLetter == "IP")
umrIPmale = sum(allBiochem$Course_collected == course & allBiochem$Sex_birth == "Male" & allBiochem$clusterLetter == "LP")
umrLPmale
= sum(allBiochem$Course_collected == course & allBiochem$Sex_birth == "Female")
umrFemale = sum(allBiochem$Course_collected == course & allBiochem$Sex_birth == "Female" & allBiochem$clusterLetter == "HP")
umrHPfemale = sum(allBiochem$Course_collected == course & allBiochem$Sex_birth == "Female" & allBiochem$clusterLetter == "IP")
umrIPfemale = sum(allBiochem$Course_collected == course & allBiochem$Sex_birth == "Female" & allBiochem$clusterLetter == "LP")
umrLPfemale
= sum(allBiochem$Course_collected == course & allBiochem$race_binary == "White")
umrWhite = sum(allBiochem$Course_collected == course & allBiochem$race_binary == "White" & allBiochem$clusterLetter == "HP")
umrHPWhite = sum(allBiochem$Course_collected == course & allBiochem$race_binary == "White" & allBiochem$clusterLetter == "IP")
umrIPWhite = sum(allBiochem$Course_collected == course & allBiochem$race_binary == "White" & allBiochem$clusterLetter == "LP")
umrLPWhite
= sum(allBiochem$Course_collected == course & allBiochem$race_binary == "Non-white")
umrNonwhite = sum(allBiochem$Course_collected == course & allBiochem$race_binary == "Non-white" & allBiochem$clusterLetter == "HP")
umrHPNonwhite = sum(allBiochem$Course_collected == course & allBiochem$race_binary == "Non-white" & allBiochem$clusterLetter == "IP")
umrIPNonwhite = sum(allBiochem$Course_collected == course & allBiochem$race_binary == "Non-white" & allBiochem$clusterLetter == "LP")
umrLPNonwhite
= paste("<table >
output <thead>
<tr>
<th colspan='2'></th>
<th colspan='2'>High Performers</th>
<th colspan='2'>Intermediate Performers</th>
<th colspan='2'>Low Performers</th>
</tr>
</thead>
<tbody>
<tr>
<td rowspan='5'>",course," </td>
<td>Total N=", umrTot,"</td>
<td colspan='2'>", signif(umrHP/umrTot*100,digits=2),"% </td>
<td colspan='2'>", signif(umrIP/umrTot*100,digits=2),"%</td>
<td colspan='2'>", signif(umrLP/umrTot*100,digits=2),"% </td>
</tr>
<tr>
<td rowspan='2'>Sex: males N=",umrMale,"; females N=",umrFemale,"</td>
<td>male</td>
<td>female</td>
<td>male</td>
<td>female</td>
<td>male</td>
<td>female</td>
</tr>
<tr>
<td>", signif(umrHPmale/umrMale*100,digits=2),"%</td>
<td>", signif(umrHPfemale/umrFemale*100,digits=2),"%</td>
<td>", signif(umrIPmale/umrMale*100,digits=2),"%</td>
<td>", signif(umrIPfemale/umrFemale*100,digits=2),"%</td>
<td>", signif(umrLPmale/umrMale*100,digits=2),"%</td>
<td>", signif(umrLPfemale/umrFemale*100,digits=2),"%</td>
</tr>
<tr>
<td rowspan='2'>Race: White N=",umrWhite,"; Non-white N=",umrNonwhite,"</td>
<td>white</td>
<td>non-white</td>
<td>white</td>
<td>non-white</td>
<td>white</td>
<td>non-white</td>
</tr>
<tr>
<td>", signif(umrHPWhite/umrWhite*100,digits=2),"%</td>
<td>", signif(umrHPNonwhite/umrNonwhite*100,digits=2),"%</td>
<td>", signif(umrIPWhite/umrWhite*100,digits=2),"%</td>
<td>", signif(umrIPNonwhite/umrNonwhite*100,digits=2),"%</td>
<td>", signif(umrLPWhite/umrWhite*100,digits=2),"%</td>
<td>", signif(umrLPNonwhite/umrNonwhite*100,digits=2),"%</td>
</tr>
</tbody>
</table> ")
cat(output)
}
}
= function(allBiochem,mycategory){
calcStats2 #using the term course as a generic category
for (course in unique(allBiochem$actual_year)){
if ( course == "Expert") next
= paste("<b>Results for category: ",course,"</b></br></br>")
header cat(header)
= sum(allBiochem$actual_year == course )
umrTot= sum(allBiochem$actual_year == course & allBiochem$clusterLetter == "HP")
umrHP = sum(allBiochem$actual_year == course & allBiochem$clusterLetter == "IP")
umrIP = sum(allBiochem$actual_year == course & allBiochem$clusterLetter == "LP")
umrLP
= sum(allBiochem$actual_year == course & allBiochem$Sex_birth == "Male")
umrMale = sum(allBiochem$actual_year == course & allBiochem$Sex_birth == "Male" & allBiochem$clusterLetter == "HP")
umrHPmale = sum(allBiochem$actual_year == course & allBiochem$Sex_birth == "Male" & allBiochem$clusterLetter == "IP")
umrIPmale = sum(allBiochem$actual_year == course & allBiochem$Sex_birth == "Male" & allBiochem$clusterLetter == "LP")
umrLPmale
= sum(allBiochem$actual_year == course & allBiochem$Sex_birth == "Female")
umrFemale = sum(allBiochem$actual_year == course & allBiochem$Sex_birth == "Female" & allBiochem$clusterLetter == "HP")
umrHPfemale = sum(allBiochem$actual_year == course & allBiochem$Sex_birth == "Female" & allBiochem$clusterLetter == "IP")
umrIPfemale = sum(allBiochem$actual_year == course & allBiochem$Sex_birth == "Female" & allBiochem$clusterLetter == "LP")
umrLPfemale
= sum(allBiochem$actual_year == course & allBiochem$race_binary == "White")
umrWhite = sum(allBiochem$actual_year == course & allBiochem$race_binary == "White" & allBiochem$clusterLetter == "HP")
umrHPWhite = sum(allBiochem$actual_year == course & allBiochem$race_binary == "White" & allBiochem$clusterLetter == "IP")
umrIPWhite = sum(allBiochem$actual_year == course & allBiochem$race_binary == "White" & allBiochem$clusterLetter == "LP")
umrLPWhite
= sum(allBiochem$actual_year == course & allBiochem$race_binary == "Non-white")
umrNonwhite = sum(allBiochem$actual_year == course & allBiochem$race_binary == "Non-white" & allBiochem$clusterLetter == "HP")
umrHPNonwhite = sum(allBiochem$actual_year == course & allBiochem$race_binary == "Non-white" & allBiochem$clusterLetter == "IP")
umrIPNonwhite = sum(allBiochem$actual_year == course & allBiochem$race_binary == "Non-white" & allBiochem$clusterLetter == "LP")
umrLPNonwhite
= paste("<table >
output <thead>
<tr>
<th colspan='2'></th>
<th colspan='2'>High Performers</th>
<th colspan='2'>Intermediate Performers</th>
<th colspan='2'>Low Performers</th>
</tr>
</thead>
<tbody>
<tr>
<td rowspan='5'>",course," </td>
<td>Total N=", umrTot,"</td>
<td colspan='2'>", signif(umrHP/umrTot*100,digits=2),"% </td>
<td colspan='2'>", signif(umrIP/umrTot*100,digits=2),"%</td>
<td colspan='2'>", signif(umrLP/umrTot*100,digits=2),"% </td>
</tr>
<tr>
<td rowspan='2'>Sex: males N=",umrMale,"; females N=",umrFemale,"</td>
<td>male</td>
<td>female</td>
<td>male</td>
<td>female</td>
<td>male</td>
<td>female</td>
</tr>
<tr>
<td>", signif(umrHPmale/umrMale*100,digits=2),"%</td>
<td>", signif(umrHPfemale/umrFemale*100,digits=2),"%</td>
<td>", signif(umrIPmale/umrMale*100,digits=2),"%</td>
<td>", signif(umrIPfemale/umrFemale*100,digits=2),"%</td>
<td>", signif(umrLPmale/umrMale*100,digits=2),"%</td>
<td>", signif(umrLPfemale/umrFemale*100,digits=2),"%</td>
</tr>
<tr>
<td rowspan='2'>Race: White N=",umrWhite,"; Non-white N=",umrNonwhite,"</td>
<td>white</td>
<td>non-white</td>
<td>white</td>
<td>non-white</td>
<td>white</td>
<td>non-white</td>
</tr>
<tr>
<td>", signif(umrHPWhite/umrWhite*100,digits=2),"%</td>
<td>", signif(umrHPNonwhite/umrNonwhite*100,digits=2),"%</td>
<td>", signif(umrIPWhite/umrWhite*100,digits=2),"%</td>
<td>", signif(umrIPNonwhite/umrNonwhite*100,digits=2),"%</td>
<td>", signif(umrLPWhite/umrWhite*100,digits=2),"%</td>
<td>", signif(umrLPNonwhite/umrNonwhite*100,digits=2),"%</td>
</tr>
</tbody>
</table> ")
cat(output)
}
}
library(ggplot2)
library(ggpubr)
library(psych)
= function(df,myx,myy,mytitle,myylab){
plotGGbox = df[complete.cases(df[[myy]]),]
df = max(df[[myy]])
maxy ggboxplot(df, x = myx, y = myy,
title = mytitle,
color = myx, add = "jitter", legend="none",ylab = myylab) + rotate_x_text(angle = 45) +
geom_hline( yintercept = mean(df[[myy]]), linetype = 2) +
stat_compare_means(method = "anova", label.y = maxy*1.10) +
coord_cartesian(ylim = c(0, maxy*1.2)) +
stat_compare_means(label = "p.format", size=2.5, method = "t.test", ref.group = ".all.",label.y = maxy*1.05)
}= function(df,myx,myy,mytitle,myylab){
getAnova #get anova
<- TukeyHSD( aov(df[[myy]] ~ df[[myx]]))
a<-as.data.frame(a$`df[[myx]]`[,4])
bcolnames(b) = c("Testing statistical significance: p-values")
print(knitr::kable(b, caption = paste("Anova: ",mytitle)))
}= function(df,myx,myy,mytitle,myylab){
plotAndTable if (myx=="Sex_birth" | myx=="race_binary"){
= df[!grepl("(?i)Expert", df$Course_collected),]
df = df[!grepl("(?)Prefer not to answer",df$Sex_birth),]
df
}print(plotGGbox(df,myx,myy,mytitle,myylab))
= describeBy(df[[myy]],df[[myx]],mat=TRUE,digits = 2)
table print(knitr::kable(table[,c(2,4,5,6,7,10,11,12)],caption=paste("Statistics of ",myylab," based on the category",myx)))
getAnova(df,myx,myy,mytitle,myylab)
}= function(alldf, experts){
addExperts = allBiochem
alldf = as.data.frame( matrix( ncol=ncol(alldf),nrow = nrow(experts)) )
ex_new colnames(ex_new) = colnames(alldf)
#colnames(ex_new) = c("Institution", "Course_collected", "Deidentifier","Sex_birth","Race_ethnicity","Coherency","NS","actual_year","PLC","cluster","race_binary","clusterLeter")
1:12] = "Expert"
ex_new[,$PLC = experts$PLC
ex_new$NS = experts$NS
ex_new$Coherency = experts$Coherency
ex_new=rbind(alldf,ex_new)
alldfreturn(alldf)
}
library(dplyr)
library(corrplot)
= function(a){
plotChi #I need to use droplevels otherwise it was showing Expert with zeros as a ghost category?
=chisq.test(table(droplevels(a)))
bcat(paste("<p><b>The Chi-square analysis gives a p=",round(b$p.value,5),"</b></p>"))
cat(paste("<p><b>Residuals analysis:</b></p>"))
cat("A negative residual implies that the measured value is lower than expected and a positive value higher than expected</br>")
corrplot(b$residuals, is.cor = FALSE)
#normalize it
#contrib <- 100*b$residuals^2/b$statistic
#round(contrib, 3)
#corrplot(contrib, is.cor = FALSE)
#corrplot(contrib, is.cor = FALSE, col.lim = c(0.3,1) )
}= function(df,myx,myy,myxlabel,myylabel,mytitle){
plotBarAndCorr #myx is the course or demographic variable, the independent variable
#myy is typically the clusterLetter, the dependent variable
#remove experts, not useful for the chisquare analysis
= df[!grepl("Expert",df[,1]),]
a if (myx=="Sex_birth"){
= a[!grepl("(?)Prefer not to answer",a$Sex_birth),]
a
}#select the two categorical variables
= a[,c(myy,myx)]
a print(plotBarCategories(a,myx,myy,myxlabel,myylabel,mytitle))
plotChi(a)
}= function(a,myx,myy,myxlabel,myylabel,mytitle){
plotBarCategories #using aes_string instead of aes because colnames are variables
#ggplot(a, aes_string(x=myx,fill=myy)) + geom_bar()
#c=prop.table(table(a$clusterLetter))
#scales::percent(as.double(z))
#a %>% select(clusterLetter) %>% table() %>% prop.table() %>% as.double() %>% scales::percent()
#this one
#myx = enquo(myx)
#myy = enquo(myy)
%>%
a count(!!sym(myy),!!sym(myx)) %>%
group_by(!!sym(myx)) %>%
mutate(lab = paste0(round(prop.table(n) * 100, 2), '%')) %>%
ggplot(aes(!!sym(myx),n, fill=!!sym(myy))) +
geom_col() + geom_text(aes(label=lab),position='stack',vjust=1.5) +
labs(x=myxlabel,y=myylabel,title=mytitle)
}
Refer to this this link: http://chem.r.umn.edu/visual_literacy/ for an introduction of what we are doing and what this file is trying to analyze
We are comparing how the PLC score is significantly different among the different categories “Course collected”, “Student year”, “White/Non-white”, and “Sex at birth”
#
= analyzeUMRCourses(allBioc1)
allBiochem = addExperts(allBiochem,exs1)
allBiochem #adding experts
#buildTables(allBiochem)
plotAndTable(allBiochem,"Course_collected","PLC","PLC: Course","PLC")
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | BCH339F | 67 | 0.42 | 0.17 | 0.42 | -0.06 | 0.71 | 0.77 |
X12 | BCH339M | 45 | 0.37 | 0.17 | 0.39 | -0.08 | 0.69 | 0.77 |
X13 | BCH369 | 434 | 0.38 | 0.17 | 0.41 | -0.16 | 0.74 | 0.89 |
X14 | BIO206 | 15 | 0.27 | 0.22 | 0.35 | -0.33 | 0.51 | 0.84 |
X15 | BIOC3321_F21 | 58 | 0.40 | 0.18 | 0.45 | -0.18 | 0.67 | 0.85 |
X16 | BIOC3321_F22 | 43 | 0.46 | 0.11 | 0.47 | 0.23 | 0.72 | 0.49 |
X17 | BIOC431 | 106 | 0.38 | 0.17 | 0.39 | -0.12 | 0.66 | 0.78 |
X18 | Expert | 6 | 0.67 | 0.12 | 0.69 | 0.49 | 0.82 | 0.33 |
Testing statistical significance: p-values | |
---|---|
BCH339M-BCH339F | 0.7440374 |
BCH369-BCH339F | 0.6379475 |
BIO206-BCH339F | 0.0397217 |
BIOC3321_F21-BCH339F | 0.9963575 |
BIOC3321_F22-BCH339F | 0.9128459 |
BIOC431-BCH339F | 0.6869647 |
Expert-BCH339F | 0.0133771 |
BCH369-BCH339M | 0.9996829 |
BIO206-BCH339M | 0.5198322 |
BIOC3321_F21-BCH339M | 0.9850332 |
BIOC3321_F22-BCH339M | 0.1535356 |
BIOC431-BCH339M | 0.9999967 |
Expert-BCH339M | 0.0011671 |
BIO206-BCH369 | 0.1952398 |
BIOC3321_F21-BCH369 | 0.9957465 |
BIOC3321_F22-BCH369 | 0.0563817 |
BIOC431-BCH369 | 0.9999892 |
Expert-BCH369 | 0.0009626 |
BIOC3321_F21-BIO206 | 0.1477599 |
BIOC3321_F22-BIO206 | 0.0040136 |
BIOC431-BIO206 | 0.3141028 |
Expert-BIO206 | 0.0000332 |
BIOC3321_F22-BIOC3321_F21 | 0.5738702 |
BIOC431-BIOC3321_F21 | 0.9912266 |
Expert-BIOC3321_F21 | 0.0049232 |
BIOC431-BIOC3321_F22 | 0.0890571 |
Expert-BIOC3321_F22 | 0.0928535 |
Expert-BIOC431 | 0.0009923 |
#plotAndTable(allBiochem,"actual_year","PLC","PLC: Year","PLC")
plotAndTable(allBiochem,"race_binary","PLC","PLC: White/Non-white","PLC")
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | Non-white | 472 | 0.37 | 0.17 | 0.40 | -0.33 | 0.74 | 1.06 |
X12 | White | 295 | 0.41 | 0.16 | 0.44 | -0.12 | 0.72 | 0.83 |
Testing statistical significance: p-values |
---|
0.0006445 |
plotAndTable(allBiochem,"Sex_birth","PLC","PLC: Sex","PLC")
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | Female | 536 | 0.38 | 0.16 | 0.42 | -0.33 | 0.74 | 1.06 |
X12 | Male | 231 | 0.39 | 0.18 | 0.41 | -0.18 | 0.72 | 0.90 |
Testing statistical significance: p-values |
---|
0.6741292 |
plotAndTable(allBiochem,"Course_collected","NS","NS: Course","NS")
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | BCH339F | 67 | 0.23 | 0.08 | 0.22 | 0.09 | 0.44 | 0.35 |
X12 | BCH339M | 45 | 0.25 | 0.11 | 0.22 | 0.05 | 0.64 | 0.60 |
X13 | BCH369 | 434 | 0.23 | 0.08 | 0.22 | 0.04 | 0.50 | 0.46 |
X14 | BIO206 | 15 | 0.26 | 0.11 | 0.25 | 0.09 | 0.53 | 0.44 |
X15 | BIOC3321_F21 | 58 | 0.22 | 0.07 | 0.23 | 0.09 | 0.41 | 0.32 |
X16 | BIOC3321_F22 | 43 | 0.24 | 0.07 | 0.25 | 0.13 | 0.42 | 0.29 |
X17 | BIOC431 | 106 | 0.23 | 0.08 | 0.23 | 0.05 | 0.47 | 0.43 |
X18 | Expert | 6 | 0.37 | 0.11 | 0.34 | 0.28 | 0.57 | 0.29 |
Testing statistical significance: p-values | |
---|---|
BCH339M-BCH339F | 0.9584618 |
BCH369-BCH339F | 0.9999960 |
BIO206-BCH339F | 0.8954802 |
BIOC3321_F21-BCH339F | 0.9999382 |
BIOC3321_F22-BCH339F | 0.9827704 |
BIOC431-BCH339F | 0.9999993 |
Expert-BCH339F | 0.0008977 |
BCH369-BCH339M | 0.7729823 |
BIO206-BCH339M | 0.9993777 |
BIOC3321_F21-BCH339M | 0.8527668 |
BIOC3321_F22-BCH339M | 1.0000000 |
BIOC431-BCH339M | 0.9735230 |
Expert-BCH339M | 0.0080767 |
BIO206-BCH369 | 0.7776656 |
BIOC3321_F21-BCH369 | 0.9999966 |
BIOC3321_F22-BCH369 | 0.8775129 |
BIOC431-BCH369 | 0.9988141 |
Expert-BCH369 | 0.0003255 |
BIOC3321_F21-BIO206 | 0.7904576 |
BIOC3321_F22-BIO206 | 0.9983188 |
BIOC431-BIO206 | 0.9192574 |
Expert-BIO206 | 0.0736462 |
BIOC3321_F22-BIOC3321_F21 | 0.9158007 |
BIOC431-BIOC3321_F21 | 0.9986318 |
Expert-BIOC3321_F21 | 0.0005035 |
BIOC431-BIOC3321_F22 | 0.9910969 |
Expert-BIOC3321_F22 | 0.0066300 |
Expert-BIOC431 | 0.0009379 |
plotAndTable(allBiochem,"race_binary","NS","NS: White/Non-white","NS")
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | Non-white | 472 | 0.23 | 0.08 | 0.22 | 0.04 | 0.64 | 0.60 |
X12 | White | 295 | 0.24 | 0.08 | 0.23 | 0.05 | 0.47 | 0.43 |
Testing statistical significance: p-values |
---|
0.1406698 |
plotAndTable(allBiochem,"Sex_birth","NS","NS: Sex","NS")
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | Female | 536 | 0.23 | 0.08 | 0.22 | 0.04 | 0.50 | 0.46 |
X12 | Male | 231 | 0.23 | 0.08 | 0.23 | 0.05 | 0.64 | 0.60 |
Testing statistical significance: p-values |
---|
0.3742489 |
The problem with clustering is that it is an iterative method and different “initial seeds” will yield to different results. It is only reproducible when the k-means method uses “set.seed(42)”
plotAndTable(allBiochem,"clusterLetter","PLC","PLC: Cluster letter","PLC")
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | Expert | 6 | 0.67 | 0.12 | 0.69 | 0.49 | 0.82 | 0.33 |
X12 | HP | 228 | 0.51 | 0.10 | 0.51 | 0.18 | 0.74 | 0.55 |
X13 | IP | 346 | 0.44 | 0.09 | 0.43 | 0.24 | 0.72 | 0.47 |
X14 | LP | 194 | 0.15 | 0.12 | 0.19 | -0.33 | 0.38 | 0.71 |
Testing statistical significance: p-values | |
---|---|
HP-Expert | 0.0005333 |
IP-Expert | 0.0000001 |
LP-Expert | 0.0000000 |
IP-HP | 0.0000000 |
LP-HP | 0.0000000 |
LP-IP | 0.0000000 |
Are cluster groups unevenly distributed among these categories? A chi-square analysis will give us the probability that all three cluster groups (HP,IP,LP) contain statistically similar proportions of this category (course, year, sex, race…)
plotBarAndCorr(allBiochem,"Course_collected","clusterLetter","Course","N of students","High, Intermediate, Low Performance cluster")
The Chi-square analysis gives a p= 0.18695
Residuals analysis:
A negative residual implies that the measured value is lower than expected and a positive value higher than expected
= as.integer(as.factor(allBiochem$Course_collected))
markerIntegers plot(allBiochem$PLC,allBiochem$NS,pch=allBiochem$clusterLetter,main = "ES_Chemical_Reaction - High(H), Intermediate(I), Low(L) performers",ylab="NS",xlab="PLC",col=markerIntegers)
legend("topleft", legend=unique(allBiochem$Course_collected), col=unique(markerIntegers), lty=1:1, cex=0.8)
calcStats(allBiochem,"Course_collected")
High Performers | Intermediate Performers | Low Performers | |||||
---|---|---|---|---|---|---|---|
BIOC3321_F21 | Total N= 58 | 28 % | 53 % | 19 % | |||
Sex: males N= 18 ; females N= 40 | male | female | male | female | male | female | |
50 % | 18 % | 39 % | 60 % | 11 % | 22 % | ||
Race: White N= 32 ; Non-white N= 26 | white | non-white | white | non-white | white | non-white | |
28 % | 27 % | 56 % | 50 % | 16 % | 23 % |
High Performers | Intermediate Performers | Low Performers | |||||
---|---|---|---|---|---|---|---|
BIOC3321_F22 | Total N= 43 | 44 % | 47 % | 9.3 % | |||
Sex: males N= 5 ; females N= 38 | male | female | male | female | male | female | |
40 % | 45 % | 20 % | 50 % | 40 % | 5.3 % | ||
Race: White N= 28 ; Non-white N= 15 | white | non-white | white | non-white | white | non-white | |
54 % | 27 % | 43 % | 53 % | 3.6 % | 20 % |
High Performers | Intermediate Performers | Low Performers | |||||
---|---|---|---|---|---|---|---|
BIOC431 | Total N= 106 | 30 % | 42 % | 28 % | |||
Sex: males N= 42 ; females N= 64 | male | female | male | female | male | female | |
36 % | 27 % | 43 % | 41 % | 21 % | 33 % | ||
Race: White N= 80 ; Non-white N= 26 | white | non-white | white | non-white | white | non-white | |
31 % | 27 % | 40 % | 46 % | 29 % | 27 % |
High Performers | Intermediate Performers | Low Performers | |||||
---|---|---|---|---|---|---|---|
BCH339F | Total N= 67 | 28 % | 52 % | 19 % | |||
Sex: males N= 26 ; females N= 41 | male | female | male | female | male | female | |
38 % | 22 % | 42 % | 59 % | 19 % | 20 % | ||
Race: White N= 19 ; Non-white N= 48 | white | non-white | white | non-white | white | non-white | |
26 % | 29 % | 74 % | 44 % | 0 % | 27 % |
High Performers | Intermediate Performers | Low Performers | |||||
---|---|---|---|---|---|---|---|
BCH339M | Total N= 45 | 33 % | 33 % | 33 % | |||
Sex: males N= 14 ; females N= 31 | male | female | male | female | male | female | |
43 % | 29 % | 21 % | 39 % | 36 % | 32 % | ||
Race: White N= 11 ; Non-white N= 34 | white | non-white | white | non-white | white | non-white | |
27 % | 35 % | 27 % | 35 % | 45 % | 29 % |
High Performers | Intermediate Performers | Low Performers | |||||
---|---|---|---|---|---|---|---|
BCH369 | Total N= 434 | 28 % | 45 % | 27 % | |||
Sex: males N= 124 ; females N= 309 | male | female | male | female | male | female | |
27 % | 29 % | 45 % | 45 % | 28 % | 26 % | ||
Race: White N= 122 ; Non-white N= 312 | white | non-white | white | non-white | white | non-white | |
34 % | 26 % | 48 % | 44 % | 18 % | 30 % |
High Performers | Intermediate Performers | Low Performers | |||||
---|---|---|---|---|---|---|---|
BIO206 | Total N= 15 | 33 % | 33 % | 33 % | |||
Sex: males N= 2 ; females N= 13 | male | female | male | female | male | female | |
50 % | 31 % | 0 % | 38 % | 50 % | 31 % | ||
Race: White N= 3 ; Non-white N= 12 | white | non-white | white | non-white | white | non-white | |
33 % | 33 % | 33 % | 33 % | 33 % | 33 % |
We are comparing how the PLC score is significantly different among the different categories “Course collected”, “Student year”, “White/Non-white”, and “Sex at birth”
#
= analyzeUMRCourses(allBioc2)
allBiochem = addExperts(allBiochem,exs2)
allBiochem #buildTables(allBiochem)
plotAndTable(allBiochem,"Course_collected","PLC","PLC: Course","PLC")
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | BCH339F | 67 | 0.47 | 0.16 | 0.50 | -0.02 | 0.74 | 0.77 |
X12 | BCH339M | 45 | 0.42 | 0.16 | 0.42 | 0.06 | 0.70 | 0.64 |
X13 | BCH369 | 434 | 0.42 | 0.17 | 0.46 | -0.18 | 0.74 | 0.92 |
X14 | BIO206 | 15 | 0.24 | 0.24 | 0.30 | -0.28 | 0.51 | 0.79 |
X15 | BIOC3321_F21 | 58 | 0.44 | 0.17 | 0.46 | -0.16 | 0.68 | 0.84 |
X16 | BIOC3321_F22 | 43 | 0.47 | 0.10 | 0.48 | 0.24 | 0.65 | 0.41 |
X17 | BIOC431 | 106 | 0.42 | 0.18 | 0.44 | -0.10 | 0.80 | 0.89 |
X18 | Expert | 8 | 0.72 | 0.09 | 0.70 | 0.59 | 0.82 | 0.23 |
Testing statistical significance: p-values | |
---|---|
BCH339M-BCH339F | 0.8080831 |
BCH369-BCH339F | 0.3210822 |
BIO206-BCH339F | 0.0000776 |
BIOC3321_F21-BCH339F | 0.9642580 |
BIOC3321_F22-BCH339F | 1.0000000 |
BIOC431-BCH339F | 0.6205090 |
Expert-BCH339F | 0.0022142 |
BCH369-BCH339M | 1.0000000 |
BIO206-BCH339M | 0.0097112 |
BIOC3321_F21-BCH339M | 0.9996817 |
BIOC3321_F22-BCH339M | 0.8710228 |
BIOC431-BCH339M | 1.0000000 |
Expert-BCH339M | 0.0001342 |
BIO206-BCH369 | 0.0017564 |
BIOC3321_F21-BCH369 | 0.9952487 |
BIOC3321_F22-BCH369 | 0.5723920 |
BIOC431-BCH369 | 1.0000000 |
Expert-BCH369 | 0.0000233 |
BIOC3321_F21-BIO206 | 0.0018300 |
BIOC3321_F22-BIO206 | 0.0002082 |
BIOC431-BIO206 | 0.0030260 |
Expert-BIO206 | 0.0000000 |
BIOC3321_F22-BIOC3321_F21 | 0.9794716 |
BIOC431-BIOC3321_F21 | 0.9993384 |
Expert-BIOC3321_F21 | 0.0003043 |
BIOC431-BIOC3321_F22 | 0.7678388 |
Expert-BIOC3321_F22 | 0.0035366 |
Expert-BIOC431 | 0.0000544 |
plotAndTable(allBiochem,"race_binary","PLC","PLC: White/Non-white","PLC")
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | Non-white | 472 | 0.41 | 0.18 | 0.44 | -0.28 | 0.79 | 1.06 |
X12 | White | 295 | 0.45 | 0.16 | 0.47 | -0.10 | 0.80 | 0.89 |
Testing statistical significance: p-values |
---|
0.0027902 |
plotAndTable(allBiochem,"Sex_birth","PLC","PLC: Sex","PLC")
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | Female | 536 | 0.42 | 0.17 | 0.46 | -0.28 | 0.79 | 1.06 |
X12 | Male | 231 | 0.42 | 0.19 | 0.46 | -0.18 | 0.80 | 0.97 |
Testing statistical significance: p-values |
---|
0.8856167 |
plotAndTable(allBiochem,"Course_collected","NS","NS: Course","NS")
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | BCH339F | 67 | 0.27 | 0.10 | 0.26 | 0.09 | 0.56 | 0.48 |
X12 | BCH339M | 45 | 0.28 | 0.10 | 0.27 | 0.09 | 0.57 | 0.48 |
X13 | BCH369 | 434 | 0.25 | 0.09 | 0.24 | 0.03 | 0.71 | 0.68 |
X14 | BIO206 | 15 | 0.25 | 0.10 | 0.24 | 0.05 | 0.44 | 0.40 |
X15 | BIOC3321_F21 | 58 | 0.25 | 0.09 | 0.26 | 0.04 | 0.45 | 0.41 |
X16 | BIOC3321_F22 | 43 | 0.25 | 0.07 | 0.23 | 0.10 | 0.41 | 0.31 |
X17 | BIOC431 | 106 | 0.26 | 0.09 | 0.26 | 0.04 | 0.53 | 0.49 |
X18 | Expert | 8 | 0.40 | 0.06 | 0.42 | 0.29 | 0.47 | 0.17 |
Testing statistical significance: p-values | |
---|---|
BCH339M-BCH339F | 0.9995223 |
BCH369-BCH339F | 0.8252287 |
BIO206-BCH339F | 0.9975207 |
BIOC3321_F21-BCH339F | 0.9777935 |
BIOC3321_F22-BCH339F | 0.9671558 |
BIOC431-BCH339F | 0.9999998 |
Expert-BCH339F | 0.0032583 |
BCH369-BCH339M | 0.5672256 |
BIO206-BCH339M | 0.9757107 |
BIOC3321_F21-BCH339M | 0.8611781 |
BIOC3321_F22-BCH339M | 0.8411231 |
BIOC431-BCH339M | 0.9963071 |
Expert-BCH339M | 0.0121561 |
BIO206-BCH369 | 1.0000000 |
BIOC3321_F21-BCH369 | 1.0000000 |
BIOC3321_F22-BCH369 | 1.0000000 |
BIOC431-BCH369 | 0.7936262 |
Expert-BCH369 | 0.0001522 |
BIOC3321_F21-BIO206 | 1.0000000 |
BIOC3321_F22-BIO206 | 1.0000000 |
BIOC431-BIO206 | 0.9988230 |
Expert-BIO206 | 0.0050687 |
BIOC3321_F22-BIOC3321_F21 | 0.9999999 |
BIOC431-BIOC3321_F21 | 0.9856284 |
Expert-BIOC3321_F21 | 0.0005597 |
BIOC431-BIOC3321_F22 | 0.9771820 |
Expert-BIOC3321_F22 | 0.0005873 |
Expert-BIOC431 | 0.0017943 |
plotAndTable(allBiochem,"race_binary","NS","NS: White/Non-white","NS")
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | Non-white | 472 | 0.25 | 0.09 | 0.24 | 0.03 | 0.62 | 0.59 |
X12 | White | 295 | 0.26 | 0.09 | 0.26 | 0.04 | 0.71 | 0.67 |
Testing statistical significance: p-values |
---|
0.0241297 |
plotAndTable(allBiochem,"Sex_birth","NS","NS: Sex","NS")
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | Female | 536 | 0.25 | 0.09 | 0.25 | 0.03 | 0.71 | 0.68 |
X12 | Male | 231 | 0.26 | 0.09 | 0.26 | 0.03 | 0.57 | 0.54 |
Testing statistical significance: p-values |
---|
0.3837504 |
The problem with clustering is that it is an iterative method and different “initial seeds” will yield to different results. It is only reproducible when the k-means method uses “set.seed(42)”
plotAndTable(allBiochem,"clusterLetter","PLC","PLC: Cluster letter","PLC")
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | Expert | 8 | 0.72 | 0.09 | 0.70 | 0.59 | 0.82 | 0.23 |
X12 | HP | 205 | 0.54 | 0.10 | 0.55 | 0.23 | 0.80 | 0.57 |
X13 | IP | 407 | 0.47 | 0.09 | 0.47 | 0.27 | 0.70 | 0.43 |
X14 | LP | 156 | 0.16 | 0.13 | 0.17 | -0.28 | 0.34 | 0.62 |
Testing statistical significance: p-values | |
---|---|
HP-Expert | 1.46e-05 |
IP-Expert | 0.00e+00 |
LP-Expert | 0.00e+00 |
IP-HP | 0.00e+00 |
LP-HP | 0.00e+00 |
LP-IP | 0.00e+00 |
plotBarAndCorr(allBiochem,"Course_collected","clusterLetter","Course","N of students","High, Intermediate, Low Performance cluster")
The Chi-square analysis gives a p= 0.0192
Residuals analysis:
A negative residual implies that the measured value is lower than expected and a positive value higher than expected
= as.integer(as.factor(allBiochem$Course_collected))
markerIntegers plot(allBiochem$PLC,allBiochem$NS,pch=allBiochem$clusterLetter,main = "ES Glucosidase - High(H), Intermediate(I), Low(L) performers",ylab="NS",xlab="PLC",col=markerIntegers)
legend("topleft", legend=unique(allBiochem$Course_collected), col=unique(markerIntegers), lty=1:1, cex=0.8)
calcStats(allBiochem,"Course_collected")
High Performers | Intermediate Performers | Low Performers | |||||
---|---|---|---|---|---|---|---|
BIOC3321_F21 | Total N= 58 | 26 % | 57 % | 17 % | |||
Sex: males N= 18 ; females N= 40 | male | female | male | female | male | female | |
28 % | 25 % | 56 % | 57 % | 17 % | 18 % | ||
Race: White N= 32 ; Non-white N= 26 | white | non-white | white | non-white | white | non-white | |
34 % | 15 % | 53 % | 62 % | 12 % | 23 % |
High Performers | Intermediate Performers | Low Performers | |||||
---|---|---|---|---|---|---|---|
BIOC3321_F22 | Total N= 43 | 19 % | 74 % | 7 % | |||
Sex: males N= 5 ; females N= 38 | male | female | male | female | male | female | |
20 % | 18 % | 60 % | 76 % | 20 % | 5.3 % | ||
Race: White N= 28 ; Non-white N= 15 | white | non-white | white | non-white | white | non-white | |
25 % | 6.7 % | 71 % | 80 % | 3.6 % | 13 % |
High Performers | Intermediate Performers | Low Performers | |||||
---|---|---|---|---|---|---|---|
BIOC431 | Total N= 106 | 33 % | 48 % | 19 % | |||
Sex: males N= 42 ; females N= 64 | male | female | male | female | male | female | |
38 % | 30 % | 50 % | 47 % | 12 % | 23 % | ||
Race: White N= 80 ; Non-white N= 26 | white | non-white | white | non-white | white | non-white | |
35 % | 27 % | 45 % | 58 % | 20 % | 15 % |
High Performers | Intermediate Performers | Low Performers | |||||
---|---|---|---|---|---|---|---|
BCH339F | Total N= 67 | 27 % | 58 % | 15 % | |||
Sex: males N= 26 ; females N= 41 | male | female | male | female | male | female | |
31 % | 24 % | 58 % | 59 % | 12 % | 17 % | ||
Race: White N= 19 ; Non-white N= 48 | white | non-white | white | non-white | white | non-white | |
42 % | 21 % | 53 % | 60 % | 5.3 % | 19 % |
High Performers | Intermediate Performers | Low Performers | |||||
---|---|---|---|---|---|---|---|
BCH339M | Total N= 45 | 31 % | 47 % | 22 % | |||
Sex: males N= 14 ; females N= 31 | male | female | male | female | male | female | |
29 % | 32 % | 50 % | 45 % | 21 % | 23 % | ||
Race: White N= 11 ; Non-white N= 34 | white | non-white | white | non-white | white | non-white | |
18 % | 35 % | 45 % | 47 % | 36 % | 18 % |
High Performers | Intermediate Performers | Low Performers | |||||
---|---|---|---|---|---|---|---|
BCH369 | Total N= 434 | 26 % | 52 % | 22 % | |||
Sex: males N= 124 ; females N= 309 | male | female | male | female | male | female | |
24 % | 26 % | 53 % | 52 % | 23 % | 22 % | ||
Race: White N= 122 ; Non-white N= 312 | white | non-white | white | non-white | white | non-white | |
24 % | 27 % | 59 % | 50 % | 17 % | 24 % |
High Performers | Intermediate Performers | Low Performers | |||||
---|---|---|---|---|---|---|---|
BIO206 | Total N= 15 | 20 % | 27 % | 53 % | |||
Sex: males N= 2 ; females N= 13 | male | female | male | female | male | female | |
50 % | 15 % | 0 % | 31 % | 50 % | 54 % | ||
Race: White N= 3 ; Non-white N= 12 | white | non-white | white | non-white | white | non-white | |
0 % | 25 % | 33 % | 25 % | 67 % | 50 % |
We are comparing how the PLC score is significantly different among the different categories “Course collected”, “Student year”, “White/Non-white”, and “Sex at birth”
#
= analyzeUMRCourses(allBioc3)
allBiochem = addExperts(allBiochem,exs3)
allBiochem #buildTables(allBiochem)
plotAndTable(allBiochem,"Course_collected","PLC","PLC: Course","PLC")
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | BCH339F | 54 | 0.21 | 0.17 | 0.24 | -0.33 | 0.59 | 0.93 |
X12 | BCH339M | 39 | 0.24 | 0.20 | 0.27 | -0.23 | 0.61 | 0.84 |
X13 | BCH369 | 140 | 0.16 | 0.17 | 0.15 | -0.29 | 0.92 | 1.21 |
X14 | BIO206 | 8 | 0.13 | 0.12 | 0.14 | -0.03 | 0.31 | 0.35 |
X15 | BIOC3321_F21 | 53 | 0.14 | 0.15 | 0.14 | -0.17 | 0.56 | 0.72 |
X16 | BIOC3321_F22 | 36 | 0.16 | 0.12 | 0.15 | -0.08 | 0.39 | 0.47 |
X17 | BIOC431 | 105 | 0.19 | 0.15 | 0.19 | -0.18 | 0.60 | 0.78 |
X18 | Expert | 7 | 0.71 | 0.08 | 0.69 | 0.60 | 0.82 | 0.22 |
Testing statistical significance: p-values | |
---|---|
BCH339M-BCH339F | 0.9935714 |
BCH369-BCH339F | 0.3958337 |
BIO206-BCH339F | 0.8735208 |
BIOC3321_F21-BCH339F | 0.3778839 |
BIOC3321_F22-BCH339F | 0.7614683 |
BIOC431-BCH339F | 0.9972204 |
Expert-BCH339F | 0.0000000 |
BCH369-BCH339M | 0.0967327 |
BIO206-BCH339M | 0.6488155 |
BIOC3321_F21-BCH339M | 0.1060527 |
BIOC3321_F22-BCH339M | 0.3594868 |
BIOC431-BCH339M | 0.8060640 |
Expert-BCH339M | 0.0000000 |
BIO206-BCH369 | 0.9997373 |
BIOC3321_F21-BCH369 | 0.9997653 |
BIOC3321_F22-BCH369 | 1.0000000 |
BIOC431-BCH369 | 0.6543179 |
Expert-BCH369 | 0.0000000 |
BIOC3321_F21-BIO206 | 0.9999960 |
BIOC3321_F22-BIO206 | 0.9998297 |
BIOC431-BIO206 | 0.9582419 |
Expert-BIO206 | 0.0000000 |
BIOC3321_F22-BIOC3321_F21 | 0.9999632 |
BIOC431-BIOC3321_F21 | 0.6251432 |
Expert-BIOC3321_F21 | 0.0000000 |
BIOC431-BIOC3321_F22 | 0.9419214 |
Expert-BIOC3321_F22 | 0.0000000 |
Expert-BIOC431 | 0.0000000 |
plotAndTable(allBiochem,"race_binary","PLC","PLC: White/Non-white","PLC")
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | Non-white | 229 | 0.16 | 0.16 | 0.17 | -0.33 | 0.59 | 0.93 |
X12 | White | 206 | 0.20 | 0.16 | 0.19 | -0.24 | 0.92 | 1.16 |
Testing statistical significance: p-values |
---|
0.0050469 |
plotAndTable(allBiochem,"Sex_birth","PLC","PLC: Sex","PLC")
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | Female | 295 | 0.17 | 0.15 | 0.18 | -0.23 | 0.92 | 1.15 |
X12 | Male | 140 | 0.19 | 0.18 | 0.17 | -0.33 | 0.60 | 0.94 |
Testing statistical significance: p-values |
---|
0.4114192 |
plotAndTable(allBiochem,"Course_collected","NS","NS: Course","NS")
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | BCH339F | 54 | 0.20 | 0.08 | 0.19 | 0.04 | 0.44 | 0.40 |
X12 | BCH339M | 39 | 0.20 | 0.09 | 0.20 | 0.04 | 0.42 | 0.38 |
X13 | BCH369 | 140 | 0.17 | 0.08 | 0.17 | 0.04 | 0.53 | 0.49 |
X14 | BIO206 | 8 | 0.15 | 0.06 | 0.15 | 0.09 | 0.28 | 0.18 |
X15 | BIOC3321_F21 | 53 | 0.17 | 0.08 | 0.15 | 0.00 | 0.35 | 0.35 |
X16 | BIOC3321_F22 | 36 | 0.17 | 0.07 | 0.18 | 0.03 | 0.30 | 0.27 |
X17 | BIOC431 | 105 | 0.21 | 0.09 | 0.21 | 0.04 | 0.50 | 0.46 |
X18 | Expert | 7 | 0.43 | 0.08 | 0.44 | 0.33 | 0.53 | 0.20 |
Testing statistical significance: p-values | |
---|---|
BCH339M-BCH339F | 1.0000000 |
BCH369-BCH339F | 0.3298772 |
BIO206-BCH339F | 0.8215999 |
BIOC3321_F21-BCH339F | 0.5291601 |
BIOC3321_F22-BCH339F | 0.6943732 |
BIOC431-BCH339F | 0.9994788 |
Expert-BCH339F | 0.0000000 |
BCH369-BCH339M | 0.4155710 |
BIO206-BCH339M | 0.8108479 |
BIOC3321_F21-BCH339M | 0.5697093 |
BIOC3321_F22-BCH339M | 0.7091461 |
BIOC431-BCH339M | 0.9999622 |
Expert-BCH339M | 0.0000000 |
BIO206-BCH369 | 0.9993766 |
BIOC3321_F21-BCH369 | 1.0000000 |
BIOC3321_F22-BCH369 | 1.0000000 |
BIOC431-BCH369 | 0.0131812 |
Expert-BCH369 | 0.0000000 |
BIOC3321_F21-BIO206 | 0.9997178 |
BIOC3321_F22-BIO206 | 0.9996690 |
BIOC431-BIO206 | 0.6444092 |
Expert-BIO206 | 0.0000000 |
BIOC3321_F22-BIOC3321_F21 | 1.0000000 |
BIOC431-BIOC3321_F21 | 0.1115039 |
Expert-BIOC3321_F21 | 0.0000000 |
BIOC431-BIOC3321_F22 | 0.2722941 |
Expert-BIOC3321_F22 | 0.0000000 |
Expert-BIOC431 | 0.0000000 |
plotAndTable(allBiochem,"race_binary","NS","NS: White/Non-white","NS")
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | Non-white | 229 | 0.18 | 0.08 | 0.17 | 0.03 | 0.44 | 0.41 |
X12 | White | 206 | 0.20 | 0.09 | 0.19 | 0.00 | 0.53 | 0.53 |
Testing statistical significance: p-values |
---|
0.0129279 |
plotAndTable(allBiochem,"Sex_birth","NS","NS: Sex","NS")
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | Female | 295 | 0.18 | 0.08 | 0.18 | 0.00 | 0.44 | 0.44 |
X12 | Male | 140 | 0.19 | 0.10 | 0.17 | 0.04 | 0.53 | 0.50 |
Testing statistical significance: p-values |
---|
0.6659064 |
The problem with clustering is that it is an iterative method and different “initial seeds” will yield to different results. It is only reproducible when the k-means method uses “set.seed(42)”
plotAndTable(allBiochem,"clusterLetter","PLC","PLC: Cluster letter","PLC")
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | Expert | 7 | 0.71 | 0.08 | 0.69 | 0.60 | 0.82 | 0.22 |
X12 | HP | 81 | 0.37 | 0.11 | 0.36 | 0.16 | 0.61 | 0.45 |
X13 | IP | 176 | 0.24 | 0.10 | 0.23 | 0.03 | 0.92 | 0.89 |
X14 | LP | 178 | 0.03 | 0.10 | 0.05 | -0.33 | 0.23 | 0.57 |
Testing statistical significance: p-values | |
---|---|
HP-Expert | 0 |
IP-Expert | 0 |
LP-Expert | 0 |
IP-HP | 0 |
LP-HP | 0 |
LP-IP | 0 |
plotBarAndCorr(allBiochem,"Course_collected","clusterLetter","Course","N of students","High, Intermediate, Low Performance cluster")
The Chi-square analysis gives a p= 0.07602
Residuals analysis:
A negative residual implies that the measured value is lower than expected and a positive value higher than expected
= as.integer(as.factor(allBiochem$Course_collected))
markerIntegers plot(allBiochem$PLC,allBiochem$NS,pch=allBiochem$clusterLetter,main = "Nucleic Acids - High(H), Intermediate(I), Low(L) performers",ylab="NS",xlab="PLC",col=markerIntegers)
legend("topleft", legend=unique(allBiochem$Course_collected), col=unique(markerIntegers), lty=1:1, cex=0.8)
calcStats(allBiochem,"Course_collected")
High Performers | Intermediate Performers | Low Performers | |||||
---|---|---|---|---|---|---|---|
BIOC3321_F21 | Total N= 53 | 13 % | 38 % | 49 % | |||
Sex: males N= 15 ; females N= 38 | male | female | male | female | male | female | |
13 % | 13 % | 47 % | 34 % | 40 % | 53 % | ||
Race: White N= 29 ; Non-white N= 24 | white | non-white | white | non-white | white | non-white | |
10 % | 17 % | 45 % | 29 % | 45 % | 54 % |
High Performers | Intermediate Performers | Low Performers | |||||
---|---|---|---|---|---|---|---|
BIOC3321_F22 | Total N= 36 | 14 % | 39 % | 47 % | |||
Sex: males N= 4 ; females N= 32 | male | female | male | female | male | female | |
0 % | 16 % | 25 % | 41 % | 75 % | 44 % | ||
Race: White N= 24 ; Non-white N= 12 | white | non-white | white | non-white | white | non-white | |
12 % | 17 % | 42 % | 33 % | 46 % | 50 % |
High Performers | Intermediate Performers | Low Performers | |||||
---|---|---|---|---|---|---|---|
BIOC431 | Total N= 105 | 27 % | 38 % | 35 % | |||
Sex: males N= 41 ; females N= 64 | male | female | male | female | male | female | |
29 % | 25 % | 37 % | 39 % | 34 % | 36 % | ||
Race: White N= 79 ; Non-white N= 26 | white | non-white | white | non-white | white | non-white | |
29 % | 19 % | 42 % | 27 % | 29 % | 54 % |
High Performers | Intermediate Performers | Low Performers | |||||
---|---|---|---|---|---|---|---|
BCH339F | Total N= 54 | 22 % | 50 % | 28 % | |||
Sex: males N= 20 ; females N= 34 | male | female | male | female | male | female | |
20 % | 24 % | 40 % | 56 % | 40 % | 21 % | ||
Race: White N= 16 ; Non-white N= 38 | white | non-white | white | non-white | white | non-white | |
31 % | 18 % | 44 % | 53 % | 25 % | 29 % |
High Performers | Intermediate Performers | Low Performers | |||||
---|---|---|---|---|---|---|---|
BCH339M | Total N= 39 | 28 % | 38 % | 33 % | |||
Sex: males N= 14 ; females N= 25 | male | female | male | female | male | female | |
21 % | 32 % | 29 % | 44 % | 50 % | 24 % | ||
Race: White N= 9 ; Non-white N= 30 | white | non-white | white | non-white | white | non-white | |
44 % | 23 % | 22 % | 43 % | 33 % | 33 % |
High Performers | Intermediate Performers | Low Performers | |||||
---|---|---|---|---|---|---|---|
BCH369 | Total N= 140 | 13 % | 41 % | 46 % | |||
Sex: males N= 45 ; females N= 95 | male | female | male | female | male | female | |
22 % | 8.4 % | 24 % | 48 % | 53 % | 43 % | ||
Race: White N= 48 ; Non-white N= 92 | white | non-white | white | non-white | white | non-white | |
19 % | 9.8 % | 38 % | 42 % | 44 % | 48 % |
High Performers | Intermediate Performers | Low Performers | |||||
---|---|---|---|---|---|---|---|
BIO206 | Total N= 8 | 0 % | 38 % | 62 % | |||
Sex: males N= 1 ; females N= 7 | male | female | male | female | male | female | |
0 % | 0 % | 100 % | 29 % | 0 % | 71 % | ||
Race: White N= 1 ; Non-white N= 7 | white | non-white | white | non-white | white | non-white | |
0 % | 0 % | 0 % | 43 % | 100 % | 57 % |
We are comparing how the PLC score is significantly different among the different categories “Course collected”, “Student year”, “White/Non-white”, and “Sex at birth”
#
= analyzeUMRCourses(allBioc4)
allBiochem = addExperts(allBiochem,exs4)
allBiochem #buildTables(allBiochem)
plotAndTable(allBiochem,"Course_collected","PLC","PLC: Course","PLC")
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | BCH339F | 53 | 0.18 | 0.15 | 0.19 | -0.18 | 0.67 | 0.84 |
X12 | BCH339M | 32 | 0.19 | 0.17 | 0.19 | -0.32 | 0.50 | 0.82 |
X13 | BCH369 | 123 | 0.18 | 0.14 | 0.18 | -0.21 | 0.53 | 0.74 |
X14 | BIOC3321_F21 | 53 | 0.22 | 0.26 | 0.18 | -0.25 | 0.86 | 1.11 |
X15 | BIOC3321_F22 | 37 | 0.20 | 0.13 | 0.20 | -0.04 | 0.42 | 0.46 |
X16 | BIOC431 | 110 | 0.18 | 0.16 | 0.19 | -0.19 | 0.57 | 0.76 |
X17 | Expert | 15 | 0.69 | 0.13 | 0.66 | 0.52 | 0.89 | 0.38 |
Testing statistical significance: p-values | |
---|---|
BCH339M-BCH339F | 0.9999523 |
BCH369-BCH339F | 0.9999997 |
BIOC3321_F21-BCH339F | 0.9136631 |
BIOC3321_F22-BCH339F | 0.9999127 |
BIOC431-BCH339F | 0.9999979 |
Expert-BCH339F | 0.0000000 |
BCH369-BCH339M | 0.9995189 |
BIOC3321_F21-BCH339M | 0.9922864 |
BIOC3321_F22-BCH339M | 1.0000000 |
BIOC431-BCH339M | 0.9992457 |
Expert-BCH339M | 0.0000000 |
BIOC3321_F21-BCH369 | 0.7574619 |
BIOC3321_F22-BCH369 | 0.9991373 |
BIOC431-BCH369 | 1.0000000 |
Expert-BCH369 | 0.0000000 |
BIOC3321_F22-BIOC3321_F21 | 0.9914685 |
BIOC431-BIOC3321_F21 | 0.7440660 |
Expert-BIOC3321_F21 | 0.0000000 |
BIOC431-BIOC3321_F22 | 0.9986926 |
Expert-BIOC3321_F22 | 0.0000000 |
Expert-BIOC431 | 0.0000000 |
plotAndTable(allBiochem,"race_binary","PLC","PLC: White/Non-white","PLC")
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | Non-white | 226 | 0.18 | 0.17 | 0.18 | -0.32 | 0.86 | 1.17 |
X12 | White | 182 | 0.20 | 0.17 | 0.19 | -0.19 | 0.69 | 0.88 |
Testing statistical significance: p-values |
---|
0.260965 |
plotAndTable(allBiochem,"Sex_birth","PLC","PLC: Sex","PLC")
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | Female | 275 | 0.19 | 0.16 | 0.18 | -0.25 | 0.80 | 1.05 |
X12 | Male | 133 | 0.19 | 0.18 | 0.19 | -0.32 | 0.86 | 1.17 |
Testing statistical significance: p-values |
---|
0.7977316 |
plotAndTable(allBiochem,"Course_collected","NS","NS: Course","NS")
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | BCH339F | 53 | 0.20 | 0.09 | 0.19 | 0.05 | 0.45 | 0.40 |
X12 | BCH339M | 32 | 0.18 | 0.07 | 0.18 | 0.04 | 0.33 | 0.29 |
X13 | BCH369 | 123 | 0.18 | 0.07 | 0.17 | 0.00 | 0.35 | 0.35 |
X14 | BIOC3321_F21 | 53 | 0.21 | 0.09 | 0.21 | 0.06 | 0.44 | 0.38 |
X15 | BIOC3321_F22 | 37 | 0.17 | 0.06 | 0.17 | 0.04 | 0.30 | 0.26 |
X16 | BIOC431 | 110 | 0.19 | 0.07 | 0.18 | 0.00 | 0.38 | 0.38 |
X17 | Expert | 15 | 0.35 | 0.09 | 0.35 | 0.25 | 0.53 | 0.28 |
Testing statistical significance: p-values | |
---|---|
BCH339M-BCH339F | 0.9243116 |
BCH369-BCH339F | 0.2818569 |
BIOC3321_F21-BCH339F | 0.9994112 |
BIOC3321_F22-BCH339F | 0.3596134 |
BIOC431-BCH339F | 0.8045716 |
Expert-BCH339F | 0.0000000 |
BCH369-BCH339M | 0.9970343 |
BIOC3321_F21-BCH339M | 0.7461135 |
BIOC3321_F22-BCH339M | 0.9823159 |
BIOC431-BCH339M | 1.0000000 |
Expert-BCH339M | 0.0000000 |
BIOC3321_F21-BCH369 | 0.0875734 |
BIOC3321_F22-BCH369 | 0.9994760 |
BIOC431-BCH369 | 0.9505337 |
Expert-BCH369 | 0.0000000 |
BIOC3321_F22-BIOC3321_F21 | 0.1638216 |
BIOC431-BIOC3321_F21 | 0.4739195 |
Expert-BIOC3321_F21 | 0.0000000 |
BIOC431-BIOC3321_F22 | 0.9184516 |
Expert-BIOC3321_F22 | 0.0000000 |
Expert-BIOC431 | 0.0000000 |
plotAndTable(allBiochem,"race_binary","NS","NS: White/Non-white","NS")
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | Non-white | 226 | 0.18 | 0.08 | 0.18 | 0 | 0.45 | 0.45 |
X12 | White | 182 | 0.19 | 0.07 | 0.19 | 0 | 0.44 | 0.44 |
Testing statistical significance: p-values |
---|
0.5478315 |
plotAndTable(allBiochem,"Sex_birth","NS","NS: Sex","NS")
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | Female | 275 | 0.19 | 0.07 | 0.18 | 0 | 0.45 | 0.45 |
X12 | Male | 133 | 0.19 | 0.08 | 0.19 | 0 | 0.44 | 0.44 |
Testing statistical significance: p-values |
---|
0.4978964 |
The problem with clustering is that it is an iterative method and different “initial seeds” will yield to different results. It is only reproducible when the k-means method uses “set.seed(42)”
plotAndTable(allBiochem,"clusterLetter","PLC","PLC: Cluster letter","PLC")
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | Expert | 15 | 0.69 | 0.13 | 0.66 | 0.52 | 0.89 | 0.38 |
X12 | HP | 78 | 0.38 | 0.15 | 0.36 | 0.09 | 0.86 | 0.77 |
X13 | IP | 195 | 0.23 | 0.09 | 0.22 | 0.04 | 0.53 | 0.49 |
X14 | LP | 135 | 0.01 | 0.09 | 0.02 | -0.32 | 0.18 | 0.50 |
Testing statistical significance: p-values | |
---|---|
HP-Expert | 0 |
IP-Expert | 0 |
LP-Expert | 0 |
IP-HP | 0 |
LP-HP | 0 |
LP-IP | 0 |
plotBarAndCorr(allBiochem,"Course_collected","clusterLetter","Course","N of students","High, Intermediate, Low Performance cluster")
The Chi-square analysis gives a p= 0.02553
Residuals analysis:
A negative residual implies that the measured value is lower than expected and a positive value higher than expected
= as.integer(as.factor(allBiochem$Course_collected))
markerIntegers plot(allBiochem$PLC,allBiochem$NS,pch=allBiochem$clusterLetter,main = "Oxygen Binding - High(H), Intermediate(I), Low(L) performers",ylab="NS",xlab="PLC",col=markerIntegers)
legend("topleft", legend=unique(allBiochem$Course_collected), col=unique(markerIntegers), lty=1:1, cex=0.8)
calcStats(allBiochem,"Course_collected")
High Performers | Intermediate Performers | Low Performers | |||||
---|---|---|---|---|---|---|---|
BIOC3321_F21 | Total N= 53 | 34 % | 25 % | 42 % | |||
Sex: males N= 16 ; females N= 37 | male | female | male | female | male | female | |
44 % | 30 % | 31 % | 22 % | 25 % | 49 % | ||
Race: White N= 29 ; Non-white N= 24 | white | non-white | white | non-white | white | non-white | |
34 % | 33 % | 34 % | 12 % | 31 % | 54 % |
High Performers | Intermediate Performers | Low Performers | |||||
---|---|---|---|---|---|---|---|
BIOC3321_F22 | Total N= 37 | 8.1 % | 62 % | 30 % | |||
Sex: males N= 4 ; females N= 33 | male | female | male | female | male | female | |
0 % | 9.1 % | 75 % | 61 % | 25 % | 30 % | ||
Race: White N= 24 ; Non-white N= 13 | white | non-white | white | non-white | white | non-white | |
8.3 % | 7.7 % | 71 % | 46 % | 21 % | 46 % |
High Performers | Intermediate Performers | Low Performers | |||||
---|---|---|---|---|---|---|---|
BIOC431 | Total N= 110 | 18 % | 49 % | 33 % | |||
Sex: males N= 41 ; females N= 69 | male | female | male | female | male | female | |
20 % | 17 % | 46 % | 51 % | 34 % | 32 % | ||
Race: White N= 84 ; Non-white N= 26 | white | non-white | white | non-white | white | non-white | |
19 % | 15 % | 48 % | 54 % | 33 % | 31 % |
High Performers | Intermediate Performers | Low Performers | |||||
---|---|---|---|---|---|---|---|
BCH339F | Total N= 53 | 25 % | 47 % | 28 % | |||
Sex: males N= 19 ; females N= 34 | male | female | male | female | male | female | |
16 % | 29 % | 53 % | 44 % | 32 % | 26 % | ||
Race: White N= 14 ; Non-white N= 39 | white | non-white | white | non-white | white | non-white | |
21 % | 26 % | 43 % | 49 % | 36 % | 26 % |
High Performers | Intermediate Performers | Low Performers | |||||
---|---|---|---|---|---|---|---|
BCH339M | Total N= 32 | 19 % | 50 % | 31 % | |||
Sex: males N= 11 ; females N= 21 | male | female | male | female | male | female | |
27 % | 14 % | 45 % | 52 % | 27 % | 33 % | ||
Race: White N= 8 ; Non-white N= 24 | white | non-white | white | non-white | white | non-white | |
25 % | 17 % | 50 % | 50 % | 25 % | 33 % |
High Performers | Intermediate Performers | Low Performers | |||||
---|---|---|---|---|---|---|---|
BCH369 | Total N= 123 | 15 % | 52 % | 33 % | |||
Sex: males N= 42 ; females N= 81 | male | female | male | female | male | female | |
14 % | 15 % | 50 % | 53 % | 36 % | 32 % | ||
Race: White N= 23 ; Non-white N= 100 | white | non-white | white | non-white | white | non-white | |
13 % | 15 % | 52 % | 52 % | 35 % | 33 % |
We are comparing how the PLC score is significantly different among the different categories “Course collected”, “Student year”, “White/Non-white”, and “Sex at birth”
#
= analyzeUMRCourses(allBioc5)
allBiochem = addExperts(allBiochem,exs5)
allBiochem #buildTables(allBiochem)
plotAndTable(allBiochem,"Course_collected","PLC","PLC: Course","PLC")
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | BCH339F | 60 | 0.17 | 0.08 | 0.17 | 0.04 | 0.47 | 0.44 |
X12 | BCH339M | 39 | 0.29 | 0.21 | 0.30 | -0.20 | 0.80 | 1.00 |
X13 | BCH369 | 133 | 0.19 | 0.17 | 0.20 | -0.24 | 0.58 | 0.83 |
X14 | BIO206 | 11 | 0.21 | 0.11 | 0.25 | 0.02 | 0.35 | 0.33 |
X15 | BIOC3321_F21 | 51 | 0.26 | 0.15 | 0.29 | -0.13 | 0.54 | 0.67 |
X16 | BIOC3321_F22 | 35 | 0.27 | 0.13 | 0.27 | 0.01 | 0.51 | 0.51 |
X17 | BIOC431 | 102 | 0.32 | 0.20 | 0.35 | -0.12 | 0.78 | 0.90 |
X18 | Expert | 7 | 0.76 | 0.10 | 0.79 | 0.59 | 0.89 | 0.30 |
Testing statistical significance: p-values | |
---|---|
BCH339M-BCH339F | 0.0168274 |
BCH369-BCH339F | 0.9961941 |
BIO206-BCH339F | 0.9964481 |
BIOC3321_F21-BCH339F | 0.1383356 |
BIOC3321_F22-BCH339F | 0.1466410 |
BIOC431-BCH339F | 0.0000025 |
Expert-BCH339F | 0.0000000 |
BCH369-BCH339M | 0.0298873 |
BIO206-BCH339M | 0.8757653 |
BIOC3321_F21-BCH339M | 0.9854992 |
BIOC3321_F22-BCH339M | 0.9989217 |
BIOC431-BCH339M | 0.9760226 |
Expert-BCH339M | 0.0000000 |
BIO206-BCH369 | 0.9999347 |
BIOC3321_F21-BCH369 | 0.2492654 |
BIOC3321_F22-BCH369 | 0.2666801 |
BIOC431-BCH369 | 0.0000002 |
Expert-BCH369 | 0.0000000 |
BIOC3321_F21-BIO206 | 0.9922509 |
BIOC3321_F22-BIO206 | 0.9819973 |
BIOC431-BIO206 | 0.4518129 |
Expert-BIO206 | 0.0000000 |
BIOC3321_F22-BIOC3321_F21 | 0.9999973 |
BIOC431-BIOC3321_F21 | 0.3494096 |
Expert-BIOC3321_F21 | 0.0000000 |
BIOC431-BIOC3321_F22 | 0.7147123 |
Expert-BIOC3321_F22 | 0.0000000 |
Expert-BIOC431 | 0.0000000 |
plotAndTable(allBiochem,"race_binary","PLC","PLC: White/Non-white","PLC")
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | Non-white | 238 | 0.22 | 0.17 | 0.22 | -0.24 | 0.80 | 1.05 |
X12 | White | 192 | 0.27 | 0.18 | 0.27 | -0.15 | 0.76 | 0.91 |
Testing statistical significance: p-values |
---|
0.0031158 |
plotAndTable(allBiochem,"Sex_birth","PLC","PLC: Sex","PLC")
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | Female | 300 | 0.24 | 0.16 | 0.26 | -0.24 | 0.78 | 1.02 |
X12 | Male | 130 | 0.24 | 0.19 | 0.23 | -0.20 | 0.80 | 1.00 |
Testing statistical significance: p-values |
---|
0.9303274 |
plotAndTable(allBiochem,"Course_collected","NS","NS: Course","NS")
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | BCH339F | 60 | 0.36 | 0.30 | 0.40 | -0.29 | 0.84 | 1.13 |
X12 | BCH339M | 39 | 0.18 | 0.08 | 0.18 | 0.03 | 0.38 | 0.35 |
X13 | BCH369 | 133 | 0.15 | 0.07 | 0.15 | 0.00 | 0.38 | 0.38 |
X14 | BIO206 | 11 | 0.18 | 0.05 | 0.19 | 0.07 | 0.24 | 0.17 |
X15 | BIOC3321_F21 | 51 | 0.17 | 0.07 | 0.17 | 0.04 | 0.33 | 0.29 |
X16 | BIOC3321_F22 | 35 | 0.18 | 0.06 | 0.17 | 0.08 | 0.42 | 0.34 |
X17 | BIOC431 | 102 | 0.18 | 0.08 | 0.18 | 0.04 | 0.40 | 0.36 |
X18 | Expert | 7 | 0.35 | 0.08 | 0.35 | 0.24 | 0.44 | 0.21 |
Testing statistical significance: p-values | |
---|---|
BCH339M-BCH339F | 0.0000000 |
BCH369-BCH339F | 0.0000000 |
BIO206-BCH339F | 0.0004226 |
BIOC3321_F21-BCH339F | 0.0000000 |
BIOC3321_F22-BCH339F | 0.0000000 |
BIOC431-BCH339F | 0.0000000 |
Expert-BCH339F | 0.9999811 |
BCH369-BCH339M | 0.9162571 |
BIO206-BCH339M | 1.0000000 |
BIOC3321_F21-BCH339M | 0.9999193 |
BIOC3321_F22-BCH339M | 1.0000000 |
BIOC431-BCH339M | 0.9999995 |
Expert-BCH339M | 0.0472545 |
BIO206-BCH369 | 0.9990061 |
BIOC3321_F21-BCH369 | 0.9891985 |
BIOC3321_F22-BCH369 | 0.9428209 |
BIOC431-BCH369 | 0.8328689 |
Expert-BCH369 | 0.0036271 |
BIOC3321_F21-BIO206 | 1.0000000 |
BIOC3321_F22-BIO206 | 1.0000000 |
BIOC431-BIO206 | 1.0000000 |
Expert-BIO206 | 0.1279153 |
BIOC3321_F22-BIOC3321_F21 | 0.9999646 |
BIOC431-BIOC3321_F21 | 0.9999922 |
Expert-BIOC3321_F21 | 0.0207016 |
BIOC431-BIOC3321_F22 | 0.9999999 |
Expert-BIOC3321_F22 | 0.0484295 |
Expert-BIOC431 | 0.0220915 |
plotAndTable(allBiochem,"race_binary","NS","NS: White/Non-white","NS")
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | Non-white | 238 | 0.20 | 0.16 | 0.17 | -0.29 | 0.84 | 1.13 |
X12 | White | 192 | 0.19 | 0.12 | 0.18 | -0.19 | 0.79 | 0.97 |
Testing statistical significance: p-values |
---|
0.6012536 |
plotAndTable(allBiochem,"Sex_birth","NS","NS: Sex","NS")
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | Female | 300 | 0.20 | 0.14 | 0.17 | -0.29 | 0.80 | 1.08 |
X12 | Male | 130 | 0.19 | 0.16 | 0.17 | -0.19 | 0.84 | 1.03 |
Testing statistical significance: p-values |
---|
0.8330036 |
The problem with clustering is that it is an iterative method and different “initial seeds” will yield to different results. It is only reproducible when the k-means method uses “set.seed(42)”
plotAndTable(allBiochem,"clusterLetter","PLC","PLC: Cluster letter","PLC")
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | Expert | 7 | 0.76 | 0.10 | 0.79 | 0.59 | 0.89 | 0.30 |
X12 | HP | 221 | 0.37 | 0.12 | 0.36 | 0.19 | 0.80 | 0.62 |
X13 | IP | 33 | 0.19 | 0.07 | 0.21 | 0.04 | 0.29 | 0.25 |
X14 | LP | 177 | 0.09 | 0.11 | 0.11 | -0.24 | 0.32 | 0.56 |
Testing statistical significance: p-values | |
---|---|
HP-Expert | 0.0e+00 |
IP-Expert | 0.0e+00 |
LP-Expert | 0.0e+00 |
IP-HP | 0.0e+00 |
LP-HP | 0.0e+00 |
LP-IP | 5.9e-06 |
plotBarAndCorr(allBiochem,"Course_collected","clusterLetter","Course","N of students","High, Intermediate, Low Performance cluster")
The Chi-square analysis gives a p= 0
Residuals analysis:
A negative residual implies that the measured value is lower than expected and a positive value higher than expected
= as.integer(as.factor(allBiochem$Course_collected))
markerIntegers plot(allBiochem$PLC,allBiochem$NS,pch=allBiochem$clusterLetter,main = "Protein Structure - High(H), Intermediate(I), Low(L) performers",ylab="NS",xlab="PLC",col=markerIntegers)
legend("topleft", legend=unique(allBiochem$Course_collected), col=unique(markerIntegers), lty=1:1, cex=0.8)
calcStats(allBiochem,"Course_collected")
High Performers | Intermediate Performers | Low Performers | |||||
---|---|---|---|---|---|---|---|
BIOC3321_F21 | Total N= 51 | 63 % | 0 % | 37 % | |||
Sex: males N= 15 ; females N= 36 | male | female | male | female | male | female | |
67 % | 61 % | 0 % | 0 % | 33 % | 39 % | ||
Race: White N= 28 ; Non-white N= 23 | white | non-white | white | non-white | white | non-white | |
61 % | 65 % | 0 % | 0 % | 39 % | 35 % |
High Performers | Intermediate Performers | Low Performers | |||||
---|---|---|---|---|---|---|---|
BIOC3321_F22 | Total N= 35 | 66 % | 0 % | 34 % | |||
Sex: males N= 4 ; females N= 31 | male | female | male | female | male | female | |
50 % | 68 % | 0 % | 0 % | 50 % | 32 % | ||
Race: White N= 23 ; Non-white N= 12 | white | non-white | white | non-white | white | non-white | |
65 % | 67 % | 0 % | 0 % | 35 % | 33 % |
High Performers | Intermediate Performers | Low Performers | |||||
---|---|---|---|---|---|---|---|
BIOC431 | Total N= 102 | 69 % | 0.98 % | 30 % | |||
Sex: males N= 41 ; females N= 61 | male | female | male | female | male | female | |
66 % | 70 % | 2.4 % | 0 % | 32 % | 30 % | ||
Race: White N= 75 ; Non-white N= 27 | white | non-white | white | non-white | white | non-white | |
69 % | 67 % | 0 % | 3.7 % | 31 % | 30 % |
High Performers | Intermediate Performers | Low Performers | |||||
---|---|---|---|---|---|---|---|
BCH339F | Total N= 60 | 12 % | 53 % | 35 % | |||
Sex: males N= 24 ; females N= 36 | male | female | male | female | male | female | |
12 % | 11 % | 46 % | 58 % | 42 % | 31 % | ||
Race: White N= 16 ; Non-white N= 44 | white | non-white | white | non-white | white | non-white | |
19 % | 9.1 % | 56 % | 52 % | 25 % | 39 % |
High Performers | Intermediate Performers | Low Performers | |||||
---|---|---|---|---|---|---|---|
BCH339M | Total N= 39 | 59 % | 0 % | 41 % | |||
Sex: males N= 13 ; females N= 26 | male | female | male | female | male | female | |
54 % | 62 % | 0 % | 0 % | 46 % | 38 % | ||
Race: White N= 10 ; Non-white N= 29 | white | non-white | white | non-white | white | non-white | |
80 % | 52 % | 0 % | 0 % | 20 % | 48 % |
High Performers | Intermediate Performers | Low Performers | |||||
---|---|---|---|---|---|---|---|
BCH369 | Total N= 133 | 45 % | 0 % | 55 % | |||
Sex: males N= 31 ; females N= 101 | male | female | male | female | male | female | |
32 % | 49 % | 0 % | 0 % | 68 % | 51 % | ||
Race: White N= 38 ; Non-white N= 95 | white | non-white | white | non-white | white | non-white | |
42 % | 46 % | 0 % | 0 % | 58 % | 54 % |
High Performers | Intermediate Performers | Low Performers | |||||
---|---|---|---|---|---|---|---|
BIO206 | Total N= 11 | 55 % | 0 % | 45 % | |||
Sex: males N= 2 ; females N= 9 | male | female | male | female | male | female | |
50 % | 56 % | 0 % | 0 % | 50 % | 44 % | ||
Race: White N= 2 ; Non-white N= 9 | white | non-white | white | non-white | white | non-white | |
100 % | 44 % | 0 % | 0 % | 0 % | 56 % |