setwd("~/Research/02b Neural Network Research UMR/Data + Analysis/Clustering_Xavier")
= read.csv("UMR_all_for_R_with_courses.csv",header = TRUE)
umr
which(umr$Term_collected == "Fall2022"),]$Course_collected = gsub('Biochem 1', 'BiocF22', umr[which(umr$Term_collected == "Fall2022"),]$Course_collected)
umr[which(umr$Term_collected == "Fall2021"),]$Course_collected = gsub('Biochem 1', 'BiocF21', umr[which(umr$Term_collected == "Fall2021"),]$Course_collected)
umr[
= umr[which(umr$Survey=="ES_Chemical_Reaction"),]
umrs1 = umr[which(umr$Survey=="ES_Glucosidase"),]
umrs2 = umr[which(umr$Survey=="Nucleic_Acids"),]
umrs3 = umr[which(umr$Survey=="Oxygen_Binding"),]
umrs4 = umr[which(umr$Survey=="Protein_Structure"),]
umrs5
= read.csv("Experts_all_for_R.csv",header = TRUE)
expert = expert[which(expert$Survey=="ES_Chemical_Equation"),]
exs1 = expert[which(expert$Survey=="ES_Glucosidase"),]
exs2 = expert[which(expert$Survey=="Nucleic_Acids"),]
exs3 = expert[which(expert$Survey=="Oxygen_Binding"),]
exs4 = expert[which(expert$Survey=="Protein_Strcuture"),] exs5
library(psych)
= function(umrs1){
analyzeUMRCourses #allBiochem = data.frame(matrix(ncol = 8,nrow=0))
#myCols = c("Institution", "Course_collected", "Deidentifier","Sex_birth","Race_ethnicity","Coherency","PLC","NS")
#colnames(allBiochem) = myCols
#allBiochem = rbind(allBiochem,otherss1[myCols])
##remove UT/BIO206 and all Dennison and non-Bioc3321 at UMR
#allBiochem = allBiochem[! grepl("Dennison",allBiochem$Institution ),]
#allBiochem = allBiochem[! grepl("BIO206",allBiochem$Course_collected),]
#tempo = umrs1[grep("BIOC3321",umrs1$Course_collected),]
#allBiochem = rbind(allBiochem,tempo[myCols])
= umrs1[,c("Institution", "Course_collected", "Deidentifier","Sex_birth","Race_ethnicity","Coherency","NS","actual_year","PLC")]
allBiochem $Coherency = as.numeric(allBiochem$Coherency)
allBiochem$NS = as.numeric(allBiochem$NS)
allBiochem$PLC = as.numeric(allBiochem$PLC)
allBiochem$race_binary <- ifelse(allBiochem$Race_ethnicity == "White/Caucasian" , 'White', "Non-white")
allBiochem
#Cluster. Setting one seed, whatever
set.seed(42)
<- matrix(data=c(allBiochem$PLC,allBiochem$NS),ncol=2)
df $cluster = kmeans(scale(df[,1:2]),3)$cluster
allBiochem
#this is clumsy but I have to programmatically find the cluster number corresponding to HP, LP, and IP
#Using the PLC to make sure its working
= describeBy(allBiochem$PLC,allBiochem$cluster,mat=TRUE)
meanPLCbyCluster = max(meanPLCbyCluster$mean)
maxPLC = as.numeric(meanPLCbyCluster[which(meanPLCbyCluster$mean==maxPLC),]$group1)
HPgroup = min(meanPLCbyCluster$mean)
minPLC = as.numeric(meanPLCbyCluster[which(meanPLCbyCluster$mean==minPLC),]$group1)
LPgroup if (HPgroup + LPgroup == 3 ){IPgroup = 3}
if (HPgroup + LPgroup == 4 ){IPgroup = 2}
if (HPgroup + LPgroup == 5 ){IPgroup = 1}
$clusterLetter = ifelse(allBiochem$cluster == HPgroup, "HP",
allBiochemifelse(allBiochem$cluster == LPgroup,"LP",
ifelse(allBiochem$cluster == IPgroup,"IP","Oops")))
$Course_collected = factor(allBiochem$Course_collected,levels = c(
allBiochem"Gen + Organic 1","O Chem 1","O Chem 2","Gen Chem 2","BiocF21","BiocF22","Biochem 2"))
return(allBiochem)
}
= function(allBiochem){
buildTables <-describeBy(allBiochem$PLC,allBiochem$clusterLetter,mat=TRUE,digits = 2)
mataprint(knitr::kable(mata[,c(2,4,5,6,7,8,9,10,11,12)] , caption = "PLC by cluster group"))
<-describeBy(allBiochem$PLC,allBiochem$Institution,mat=TRUE,digits = 2)
mataprint(knitr::kable(mata[,c(2,4,5,6,7,8,9,10,11,12)] , caption = "PLC by institution"))
<-describeBy(allBiochem$PLC,allBiochem$actual_year,mat=TRUE,digits = 2)
mataprint(knitr::kable(mata[,c(2,4,5,6,7,8,9,10,11,12)] , caption = "PLC by Actual Year"))
<-describeBy(allBiochem$PLC,allBiochem$Course_collected,mat=TRUE,digits = 2)
mataprint(knitr::kable(mata[,c(2,4,5,6,7,8,9,10,11,12)] , caption = "PLC by course"))
<-describeBy(allBiochem$PLC,allBiochem$Sex_birth,mat=TRUE,digits = 2)
mataprint(knitr::kable(mata[,c(2,4,5,6,7,8,9,10,11,12)] , caption = "PLC by Sex"))
<-describeBy(allBiochem$PLC,allBiochem$race_binary,mat=TRUE,digits = 2)
mataprint(knitr::kable(mata[,c(2,4,5,6,7,8,9,10,11,12)] , caption = "PLC by Race"))
}= function(allBiochem,mycategory){
calcStats #using the term course as a generic category
for (course in unique(allBiochem$Course_collected)){
if ( course == "Expert") next
= paste("<b>Results for category: ",course,"</b></br></br>")
header cat(header)
= sum(allBiochem$Course_collected == course )
umrTot= sum(allBiochem$Course_collected == course & allBiochem$clusterLetter == "HP")
umrHP = sum(allBiochem$Course_collected == course & allBiochem$clusterLetter == "IP")
umrIP = sum(allBiochem$Course_collected == course & allBiochem$clusterLetter == "LP")
umrLP
= sum(allBiochem$Course_collected == course & allBiochem$Sex_birth == "Male")
umrMale = sum(allBiochem$Course_collected == course & allBiochem$Sex_birth == "Male" & allBiochem$clusterLetter == "HP")
umrHPmale = sum(allBiochem$Course_collected == course & allBiochem$Sex_birth == "Male" & allBiochem$clusterLetter == "IP")
umrIPmale = sum(allBiochem$Course_collected == course & allBiochem$Sex_birth == "Male" & allBiochem$clusterLetter == "LP")
umrLPmale
= sum(allBiochem$Course_collected == course & allBiochem$Sex_birth == "Female")
umrFemale = sum(allBiochem$Course_collected == course & allBiochem$Sex_birth == "Female" & allBiochem$clusterLetter == "HP")
umrHPfemale = sum(allBiochem$Course_collected == course & allBiochem$Sex_birth == "Female" & allBiochem$clusterLetter == "IP")
umrIPfemale = sum(allBiochem$Course_collected == course & allBiochem$Sex_birth == "Female" & allBiochem$clusterLetter == "LP")
umrLPfemale
= sum(allBiochem$Course_collected == course & allBiochem$race_binary == "White")
umrWhite = sum(allBiochem$Course_collected == course & allBiochem$race_binary == "White" & allBiochem$clusterLetter == "HP")
umrHPWhite = sum(allBiochem$Course_collected == course & allBiochem$race_binary == "White" & allBiochem$clusterLetter == "IP")
umrIPWhite = sum(allBiochem$Course_collected == course & allBiochem$race_binary == "White" & allBiochem$clusterLetter == "LP")
umrLPWhite
= sum(allBiochem$Course_collected == course & allBiochem$race_binary == "Non-white")
umrNonwhite = sum(allBiochem$Course_collected == course & allBiochem$race_binary == "Non-white" & allBiochem$clusterLetter == "HP")
umrHPNonwhite = sum(allBiochem$Course_collected == course & allBiochem$race_binary == "Non-white" & allBiochem$clusterLetter == "IP")
umrIPNonwhite = sum(allBiochem$Course_collected == course & allBiochem$race_binary == "Non-white" & allBiochem$clusterLetter == "LP")
umrLPNonwhite
= paste("<table >
output <thead>
<tr>
<th colspan='2'></th>
<th colspan='2'>High Performers</th>
<th colspan='2'>Intermediate Performers</th>
<th colspan='2'>Low Performers</th>
</tr>
</thead>
<tbody>
<tr>
<td rowspan='5'>",course," </td>
<td>Total N=", umrTot,"</td>
<td colspan='2'>", signif(umrHP/umrTot*100,digits=2),"% </td>
<td colspan='2'>", signif(umrIP/umrTot*100,digits=2),"%</td>
<td colspan='2'>", signif(umrLP/umrTot*100,digits=2),"% </td>
</tr>
<tr>
<td rowspan='2'>Sex: males N=",umrMale,"; females N=",umrFemale,"</td>
<td>male</td>
<td>female</td>
<td>male</td>
<td>female</td>
<td>male</td>
<td>female</td>
</tr>
<tr>
<td>", signif(umrHPmale/umrMale*100,digits=2),"%</td>
<td>", signif(umrHPfemale/umrFemale*100,digits=2),"%</td>
<td>", signif(umrIPmale/umrMale*100,digits=2),"%</td>
<td>", signif(umrIPfemale/umrFemale*100,digits=2),"%</td>
<td>", signif(umrLPmale/umrMale*100,digits=2),"%</td>
<td>", signif(umrLPfemale/umrFemale*100,digits=2),"%</td>
</tr>
<tr>
<td rowspan='2'>Race: White N=",umrWhite,"; Non-white N=",umrNonwhite,"</td>
<td>white</td>
<td>non-white</td>
<td>white</td>
<td>non-white</td>
<td>white</td>
<td>non-white</td>
</tr>
<tr>
<td>", signif(umrHPWhite/umrWhite*100,digits=2),"%</td>
<td>", signif(umrHPNonwhite/umrNonwhite*100,digits=2),"%</td>
<td>", signif(umrIPWhite/umrWhite*100,digits=2),"%</td>
<td>", signif(umrIPNonwhite/umrNonwhite*100,digits=2),"%</td>
<td>", signif(umrLPWhite/umrWhite*100,digits=2),"%</td>
<td>", signif(umrLPNonwhite/umrNonwhite*100,digits=2),"%</td>
</tr>
</tbody>
</table> ")
cat(output)
}
}
= function(allBiochem,mycategory){
calcStats2 #using the term course as a generic category
for (course in unique(allBiochem$actual_year)){
if ( course == "Expert") next
= paste("<b>Results for category: ",course,"</b></br></br>")
header cat(header)
= sum(allBiochem$actual_year == course )
umrTot= sum(allBiochem$actual_year == course & allBiochem$clusterLetter == "HP")
umrHP = sum(allBiochem$actual_year == course & allBiochem$clusterLetter == "IP")
umrIP = sum(allBiochem$actual_year == course & allBiochem$clusterLetter == "LP")
umrLP
= sum(allBiochem$actual_year == course & allBiochem$Sex_birth == "Male")
umrMale = sum(allBiochem$actual_year == course & allBiochem$Sex_birth == "Male" & allBiochem$clusterLetter == "HP")
umrHPmale = sum(allBiochem$actual_year == course & allBiochem$Sex_birth == "Male" & allBiochem$clusterLetter == "IP")
umrIPmale = sum(allBiochem$actual_year == course & allBiochem$Sex_birth == "Male" & allBiochem$clusterLetter == "LP")
umrLPmale
= sum(allBiochem$actual_year == course & allBiochem$Sex_birth == "Female")
umrFemale = sum(allBiochem$actual_year == course & allBiochem$Sex_birth == "Female" & allBiochem$clusterLetter == "HP")
umrHPfemale = sum(allBiochem$actual_year == course & allBiochem$Sex_birth == "Female" & allBiochem$clusterLetter == "IP")
umrIPfemale = sum(allBiochem$actual_year == course & allBiochem$Sex_birth == "Female" & allBiochem$clusterLetter == "LP")
umrLPfemale
= sum(allBiochem$actual_year == course & allBiochem$race_binary == "White")
umrWhite = sum(allBiochem$actual_year == course & allBiochem$race_binary == "White" & allBiochem$clusterLetter == "HP")
umrHPWhite = sum(allBiochem$actual_year == course & allBiochem$race_binary == "White" & allBiochem$clusterLetter == "IP")
umrIPWhite = sum(allBiochem$actual_year == course & allBiochem$race_binary == "White" & allBiochem$clusterLetter == "LP")
umrLPWhite
= sum(allBiochem$actual_year == course & allBiochem$race_binary == "Non-white")
umrNonwhite = sum(allBiochem$actual_year == course & allBiochem$race_binary == "Non-white" & allBiochem$clusterLetter == "HP")
umrHPNonwhite = sum(allBiochem$actual_year == course & allBiochem$race_binary == "Non-white" & allBiochem$clusterLetter == "IP")
umrIPNonwhite = sum(allBiochem$actual_year == course & allBiochem$race_binary == "Non-white" & allBiochem$clusterLetter == "LP")
umrLPNonwhite
= paste("<table >
output <thead>
<tr>
<th colspan='2'></th>
<th colspan='2'>High Performers</th>
<th colspan='2'>Intermediate Performers</th>
<th colspan='2'>Low Performers</th>
</tr>
</thead>
<tbody>
<tr>
<td rowspan='5'>",course," </td>
<td>Total N=", umrTot,"</td>
<td colspan='2'>", signif(umrHP/umrTot*100,digits=2),"% </td>
<td colspan='2'>", signif(umrIP/umrTot*100,digits=2),"%</td>
<td colspan='2'>", signif(umrLP/umrTot*100,digits=2),"% </td>
</tr>
<tr>
<td rowspan='2'>Sex: males N=",umrMale,"; females N=",umrFemale,"</td>
<td>male</td>
<td>female</td>
<td>male</td>
<td>female</td>
<td>male</td>
<td>female</td>
</tr>
<tr>
<td>", signif(umrHPmale/umrMale*100,digits=2),"%</td>
<td>", signif(umrHPfemale/umrFemale*100,digits=2),"%</td>
<td>", signif(umrIPmale/umrMale*100,digits=2),"%</td>
<td>", signif(umrIPfemale/umrFemale*100,digits=2),"%</td>
<td>", signif(umrLPmale/umrMale*100,digits=2),"%</td>
<td>", signif(umrLPfemale/umrFemale*100,digits=2),"%</td>
</tr>
<tr>
<td rowspan='2'>Race: White N=",umrWhite,"; Non-white N=",umrNonwhite,"</td>
<td>white</td>
<td>non-white</td>
<td>white</td>
<td>non-white</td>
<td>white</td>
<td>non-white</td>
</tr>
<tr>
<td>", signif(umrHPWhite/umrWhite*100,digits=2),"%</td>
<td>", signif(umrHPNonwhite/umrNonwhite*100,digits=2),"%</td>
<td>", signif(umrIPWhite/umrWhite*100,digits=2),"%</td>
<td>", signif(umrIPNonwhite/umrNonwhite*100,digits=2),"%</td>
<td>", signif(umrLPWhite/umrWhite*100,digits=2),"%</td>
<td>", signif(umrLPNonwhite/umrNonwhite*100,digits=2),"%</td>
</tr>
</tbody>
</table> ")
cat(output)
}
}
library(ggplot2)
library(ggpubr)
library(psych)
= function(df,myx,myy,mytitle,myylab){
plotGGbox = df[complete.cases(df[[myy]]),]
df = max(df[[myy]])
maxy ggboxplot(df, x = myx, y = myy,
title = mytitle,
color = myx, add = "jitter", legend="none",ylab = myylab) + rotate_x_text(angle = 45) +
geom_hline( yintercept = mean(df[[myy]]), linetype = 2) +
stat_compare_means(method = "anova", label.y = maxy*1.10) +
coord_cartesian(ylim = c(0, maxy*1.2)) +
stat_compare_means(label = "p.format", size=2.5, method = "t.test", ref.group = ".all.",label.y = maxy*1.05)
}= function(df,myx,myy,mytitle,myylab){
getAnova #get anova
<- TukeyHSD( aov(df[[myy]] ~ df[[myx]]))
a<-as.data.frame(a$`df[[myx]]`[,4])
bcolnames(b) = c("Testing statistical significance: p-values")
print(knitr::kable(b, caption = paste("Anova: ",mytitle)))
}= function(df,myx,myy,mytitle,myylab){
plotAndTable if (myx=="Sex_birth" | myx=="race_binary"){
= df[!grepl("(?i)Expert", df$Course_collected),]
df = df[!grepl("(?)Prefer not to answer",df$Sex_birth),]
df
}print(plotGGbox(df,myx,myy,mytitle,myylab))
= describeBy(df[[myy]],df[[myx]],mat=TRUE,digits = 2)
table print(knitr::kable(table[,c(2,4,5,6,7,10,11,12)],caption=paste("Statistics of ",myylab," based on the category",myx)))
getAnova(df,myx,myy,mytitle,myylab)
}= function(alldf, experts){
addExperts = allBiochem
alldf = as.data.frame( matrix( ncol=ncol(alldf),nrow = nrow(experts)) )
ex_new colnames(ex_new) = colnames(alldf)
#colnames(ex_new) = c("Institution", "Course_collected", "Deidentifier","Sex_birth","Race_ethnicity","Coherency","NS","actual_year","PLC","cluster","race_binary","clusterLeter")
1:12] = "Expert"
ex_new[,$PLC = experts$PLC
ex_new$NS = experts$NS
ex_new$Coherency = experts$Coherency
ex_new=rbind(alldf,ex_new)
alldfreturn(alldf)
}
library(dplyr)
library(corrplot)
= function(a){
plotChi #I need to use droplevels otherwise it was showing Expert with zeros as a ghost category?
=chisq.test(table(droplevels(a)))
bcat(paste("<p><b>The Chi-square analysis gives a p=",round(b$p.value,5),"</b></p>"))
cat(paste("<p><b>Residuals analysis:</b></p>"))
cat("A negative residual implies that the measured value is lower than expected and a positive value higher than expected</br>")
corrplot(b$residuals, is.cor = FALSE)
#normalize it
#contrib <- 100*b$residuals^2/b$statistic
#round(contrib, 3)
#corrplot(contrib, is.cor = FALSE)
#corrplot(contrib, is.cor = FALSE, col.lim = c(0.3,1) )
}= function(df,myx,myy,myxlabel,myylabel,mytitle){
plotBarAndCorr #myx is the course or demographic variable, the independent variable
#myy is typically the clusterLetter, the dependent variable
#remove experts, not useful for the chisquare analysis
= df[!grepl("Expert",df[,1]),]
a if (myx=="Sex_birth"){
= a[!grepl("(?)Prefer not to answer",a$Sex_birth),]
a
}#select the two categorical variables
= a[,c(myy,myx)]
a print(plotBarCategories(a,myx,myy,myxlabel,myylabel,mytitle))
plotChi(a)
}= function(a,myx,myy,myxlabel,myylabel,mytitle){
plotBarCategories #using aes_string instead of aes because colnames are variables
#ggplot(a, aes_string(x=myx,fill=myy)) + geom_bar()
#c=prop.table(table(a$clusterLetter))
#scales::percent(as.double(z))
#a %>% select(clusterLetter) %>% table() %>% prop.table() %>% as.double() %>% scales::percent()
#this one
#myx = enquo(myx)
#myy = enquo(myy)
%>%
a count(!!sym(myy),!!sym(myx)) %>%
group_by(!!sym(myx)) %>%
mutate(lab = paste0(round(prop.table(n) * 100, 2), '%')) %>%
ggplot(aes(!!sym(myx),n, fill=!!sym(myy))) +
geom_col() + geom_text(aes(label=lab),position='stack',vjust=1.5) +
labs(x=myxlabel,y=myylabel,title=mytitle)
}
What was learned from “clustering_indeces_v2” analysis about indicators
Meaning of PLC and NS * PLC: path length correlation. How connected are two nodes. The closer to one more connected. Compared to expert. * NS: Neighborhood similarity. Compared to the experts.
First we will compare how PLC score is distributed among demographics and courses. Then we will combine students’ PLC and NS indexes as a measurement of students performance. We then cluster the PLC,NS pairs into three groups: Low performers, intermediate, and high performers. We then analyze the composition of those three groups by its demographics, year and courses. The year is not by the number of credits, rather, if they are in CHEM1 or CHEM2 they are labeled as first_year, CHEM3 and CHEM4 will be second_year, and Bioc1 and Bioc2 will be third_year.
We are comparing how the PLC score is significantly different among the different categories “Course collected”, “Student year”, “White/Non-white”, and “Sex at birth”
#
= analyzeUMRCourses(umrs1)
allBiochem = addExperts(allBiochem,exs1)
allBiochem #adding experts
#buildTables(allBiochem)
plotAndTable(allBiochem,"Course_collected","PLC","PLC: Course","PLC")
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | Gen + Organic 1 | 109 | 0.23 | 0.17 | 0.26 | -0.20 | 0.51 | 0.71 |
X12 | O Chem 1 | 100 | 0.26 | 0.16 | 0.28 | -0.16 | 0.55 | 0.71 |
X13 | O Chem 2 | 76 | 0.29 | 0.15 | 0.30 | -0.40 | 0.54 | 0.94 |
X14 | Gen Chem 2 | 57 | 0.25 | 0.15 | 0.24 | -0.05 | 0.53 | 0.58 |
X15 | BiocF21 | 58 | 0.40 | 0.18 | 0.45 | -0.18 | 0.67 | 0.85 |
X16 | BiocF22 | 43 | 0.46 | 0.11 | 0.47 | 0.23 | 0.72 | 0.49 |
X17 | Biochem 2 | 22 | 0.40 | 0.20 | 0.43 | -0.06 | 0.67 | 0.73 |
X18 | Expert | 6 | 0.67 | 0.12 | 0.69 | 0.49 | 0.82 | 0.33 |
Testing statistical significance: p-values | |
---|---|
O Chem 1-Gen + Organic 1 | 0.9652073 |
O Chem 2-Gen + Organic 1 | 0.3643457 |
Gen Chem 2-Gen + Organic 1 | 0.9997580 |
BiocF21-Gen + Organic 1 | 0.0000000 |
BiocF22-Gen + Organic 1 | 0.0000000 |
Biochem 2-Gen + Organic 1 | 0.0005200 |
Expert-Gen + Organic 1 | 0.0000000 |
O Chem 2-O Chem 1 | 0.9368395 |
Gen Chem 2-O Chem 1 | 0.9999042 |
BiocF21-O Chem 1 | 0.0000038 |
BiocF22-O Chem 1 | 0.0000000 |
Biochem 2-O Chem 1 | 0.0068022 |
Expert-O Chem 1 | 0.0000001 |
Gen Chem 2-O Chem 2 | 0.8486153 |
BiocF21-O Chem 2 | 0.0016387 |
BiocF22-O Chem 2 | 0.0000004 |
Biochem 2-O Chem 2 | 0.0934819 |
Expert-O Chem 2 | 0.0000009 |
BiocF21-Gen Chem 2 | 0.0000134 |
BiocF22-Gen Chem 2 | 0.0000000 |
Biochem 2-Gen Chem 2 | 0.0057281 |
Expert-Gen Chem 2 | 0.0000001 |
BiocF22-BiocF21 | 0.5102566 |
Biochem 2-BiocF21 | 1.0000000 |
Expert-BiocF21 | 0.0025478 |
Biochem 2-BiocF22 | 0.7580255 |
Expert-BiocF22 | 0.0645927 |
Expert-Biochem 2 | 0.0058951 |
plotAndTable(allBiochem,"actual_year","PLC","PLC: Year","PLC")
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | Expert | 6 | 0.67 | 0.12 | 0.69 | 0.49 | 0.82 | 0.33 |
X12 | first_year | 209 | 0.24 | 0.17 | 0.27 | -0.20 | 0.55 | 0.75 |
X13 | second_year | 133 | 0.27 | 0.15 | 0.28 | -0.40 | 0.54 | 0.94 |
X14 | third_year | 123 | 0.42 | 0.16 | 0.46 | -0.18 | 0.72 | 0.90 |
Testing statistical significance: p-values | |
---|---|
first_year-Expert | 0.0000000 |
second_year-Expert | 0.0000000 |
third_year-Expert | 0.0014380 |
second_year-first_year | 0.5354066 |
third_year-first_year | 0.0000000 |
third_year-second_year | 0.0000000 |
plotAndTable(allBiochem,"race_binary","PLC","PLC: White/Non-white","PLC")
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | Non-white | 189 | 0.26 | 0.18 | 0.26 | -0.40 | 0.72 | 1.12 |
X12 | White | 272 | 0.32 | 0.17 | 0.35 | -0.16 | 0.67 | 0.83 |
Testing statistical significance: p-values |
---|
0.0001431 |
plotAndTable(allBiochem,"Sex_birth","PLC","PLC: Sex","PLC")
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | Female | 369 | 0.30 | 0.18 | 0.31 | -0.40 | 0.67 | 1.07 |
X12 | Male | 92 | 0.31 | 0.19 | 0.33 | -0.18 | 0.72 | 0.90 |
Testing statistical significance: p-values |
---|
0.547014 |
plotAndTable(allBiochem,"Course_collected","NS","NS: Course","NS")
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | Gen + Organic 1 | 109 | 0.22 | 0.08 | 0.22 | 0.04 | 0.41 | 0.37 |
X12 | O Chem 1 | 100 | 0.22 | 0.09 | 0.21 | 0.04 | 0.47 | 0.43 |
X13 | O Chem 2 | 76 | 0.25 | 0.08 | 0.24 | 0.08 | 0.41 | 0.33 |
X14 | Gen Chem 2 | 57 | 0.21 | 0.06 | 0.22 | 0.08 | 0.37 | 0.29 |
X15 | BiocF21 | 58 | 0.22 | 0.07 | 0.23 | 0.09 | 0.41 | 0.32 |
X16 | BiocF22 | 43 | 0.24 | 0.07 | 0.25 | 0.13 | 0.42 | 0.29 |
X17 | Biochem 2 | 22 | 0.23 | 0.09 | 0.21 | 0.07 | 0.42 | 0.35 |
X18 | Expert | 6 | 0.37 | 0.11 | 0.34 | 0.28 | 0.57 | 0.29 |
Testing statistical significance: p-values | |
---|---|
O Chem 1-Gen + Organic 1 | 1.0000000 |
O Chem 2-Gen + Organic 1 | 0.1841246 |
Gen Chem 2-Gen + Organic 1 | 0.9985103 |
BiocF21-Gen + Organic 1 | 0.9999784 |
BiocF22-Gen + Organic 1 | 0.6710577 |
Biochem 2-Gen + Organic 1 | 0.9999959 |
Expert-Gen + Organic 1 | 0.0001402 |
O Chem 2-O Chem 1 | 0.1649445 |
Gen Chem 2-O Chem 1 | 0.9994987 |
BiocF21-O Chem 1 | 0.9999028 |
BiocF22-O Chem 1 | 0.6324514 |
Biochem 2-O Chem 1 | 0.9999833 |
Expert-O Chem 1 | 0.0001260 |
Gen Chem 2-O Chem 2 | 0.1120421 |
BiocF21-O Chem 2 | 0.5781042 |
BiocF22-O Chem 2 | 0.9999652 |
Biochem 2-O Chem 2 | 0.8941579 |
Expert-O Chem 2 | 0.0066311 |
BiocF21-Gen Chem 2 | 0.9908048 |
BiocF22-Gen Chem 2 | 0.4529292 |
Biochem 2-Gen Chem 2 | 0.9980973 |
Expert-Gen Chem 2 | 0.0000762 |
BiocF22-BiocF21 | 0.9076963 |
Biochem 2-BiocF21 | 1.0000000 |
Expert-BiocF21 | 0.0003856 |
Biochem 2-BiocF22 | 0.9805426 |
Expert-BiocF22 | 0.0053866 |
Expert-Biochem 2 | 0.0014475 |
plotAndTable(allBiochem,"actual_year","NS","NS: Year","NS")
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | Expert | 6 | 0.37 | 0.11 | 0.34 | 0.28 | 0.57 | 0.29 |
X12 | first_year | 209 | 0.22 | 0.08 | 0.21 | 0.04 | 0.47 | 0.43 |
X13 | second_year | 133 | 0.23 | 0.08 | 0.23 | 0.08 | 0.41 | 0.33 |
X14 | third_year | 123 | 0.23 | 0.07 | 0.23 | 0.07 | 0.42 | 0.35 |
Testing statistical significance: p-values | |
---|---|
first_year-Expert | 0.0000246 |
second_year-Expert | 0.0001911 |
third_year-Expert | 0.0001494 |
second_year-first_year | 0.3752155 |
third_year-first_year | 0.5451535 |
third_year-second_year | 0.9963083 |
plotAndTable(allBiochem,"race_binary","NS","NS: White/Non-white","NS")
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | Non-white | 189 | 0.22 | 0.08 | 0.21 | 0.07 | 0.41 | 0.34 |
X12 | White | 272 | 0.24 | 0.08 | 0.23 | 0.04 | 0.47 | 0.43 |
Testing statistical significance: p-values |
---|
0.0104875 |
plotAndTable(allBiochem,"Sex_birth","NS","NS: Sex","NS")
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | Female | 369 | 0.23 | 0.08 | 0.22 | 0.04 | 0.47 | 0.43 |
X12 | Male | 92 | 0.22 | 0.07 | 0.22 | 0.04 | 0.40 | 0.36 |
Testing statistical significance: p-values |
---|
0.5026976 |
The problem with clustering is that it is an iterative method and different “initial seeds” will yield to different results. It is only reproducible when the k-means method uses “set.seed(42)”
plotAndTable(allBiochem,"clusterLetter","PLC","PLC: Cluster letter","PLC")
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | Expert | 6 | 0.67 | 0.12 | 0.69 | 0.49 | 0.82 | 0.33 |
X12 | HP | 183 | 0.41 | 0.10 | 0.41 | 0.23 | 0.72 | 0.49 |
X13 | IP | 133 | 0.36 | 0.12 | 0.37 | 0.07 | 0.63 | 0.56 |
X14 | LP | 149 | 0.10 | 0.12 | 0.14 | -0.40 | 0.29 | 0.69 |
Testing statistical significance: p-values | |
---|---|
HP-Expert | 0.0000005 |
IP-Expert | 0.0000000 |
LP-Expert | 0.0000000 |
IP-HP | 0.0007267 |
LP-HP | 0.0000000 |
LP-IP | 0.0000000 |
Are cluster groups unevenly distributed among these categories? A chi-square analysis will give us the probability that all three cluster groups (HP,IP,LP) contain statistically similar proportions of this category (course, year, sex, race…)
plotBarAndCorr(allBiochem,"Course_collected","clusterLetter","Course","N of students","High, Intermediate, Low Performance cluster")
The Chi-square analysis gives a p= 0
Residuals analysis:
A negative residual implies that the measured value is lower than expected and a positive value higher than expected
= as.integer(as.factor(allBiochem$Course_collected))
markerIntegers plot(allBiochem$PLC,allBiochem$NS,pch=allBiochem$clusterLetter,main = "ES_Chemical_Reaction - High(H), Intermediate(I), Low(L) performers",ylab="NS",xlab="PLC",col=markerIntegers)
legend("topleft", legend=unique(allBiochem$Course_collected), col=unique(markerIntegers), lty=1:1, cex=0.8)
calcStats(allBiochem,"Course_collected")
High Performers | Intermediate Performers | Low Performers | |||||
---|---|---|---|---|---|---|---|
Gen + Organic 1 | Total N= 109 | 27 % | 29 % | 44 % | |||
Sex: males N= 8 ; females N= 99 | male | female | male | female | male | female | |
50 % | 25 % | 0 % | 31 % | 50 % | 43 % | ||
Race: White N= 62 ; Non-white N= 47 | white | non-white | white | non-white | white | non-white | |
31 % | 21 % | 32 % | 26 % | 37 % | 53 % |
High Performers | Intermediate Performers | Low Performers | |||||
---|---|---|---|---|---|---|---|
O Chem 2 | Total N= 76 | 33 % | 39 % | 28 % | |||
Sex: males N= 15 ; females N= 60 | male | female | male | female | male | female | |
60 % | 27 % | 27 % | 43 % | 13 % | 30 % | ||
Race: White N= 48 ; Non-white N= 28 | white | non-white | white | non-white | white | non-white | |
38 % | 25 % | 44 % | 32 % | 19 % | 43 % |
High Performers | Intermediate Performers | Low Performers | |||||
---|---|---|---|---|---|---|---|
BiocF21 | Total N= 58 | 62 % | 21 % | 17 % | |||
Sex: males N= 18 ; females N= 40 | male | female | male | female | male | female | |
61 % | 62 % | 28 % | 18 % | 11 % | 20 % | ||
Race: White N= 32 ; Non-white N= 26 | white | non-white | white | non-white | white | non-white | |
59 % | 65 % | 25 % | 15 % | 16 % | 19 % |
High Performers | Intermediate Performers | Low Performers | |||||
---|---|---|---|---|---|---|---|
Gen Chem 2 | Total N= 57 | 37 % | 21 % | 42 % | |||
Sex: males N= 18 ; females N= 39 | male | female | male | female | male | female | |
44 % | 33 % | 11 % | 26 % | 44 % | 41 % | ||
Race: White N= 33 ; Non-white N= 24 | white | non-white | white | non-white | white | non-white | |
30 % | 46 % | 27 % | 12 % | 42 % | 42 % |
High Performers | Intermediate Performers | Low Performers | |||||
---|---|---|---|---|---|---|---|
O Chem 1 | Total N= 100 | 31 % | 30 % | 39 % | |||
Sex: males N= 19 ; females N= 80 | male | female | male | female | male | female | |
32 % | 31 % | 26 % | 31 % | 42 % | 38 % | ||
Race: White N= 56 ; Non-white N= 44 | white | non-white | white | non-white | white | non-white | |
36 % | 25 % | 32 % | 27 % | 32 % | 48 % |
High Performers | Intermediate Performers | Low Performers | |||||
---|---|---|---|---|---|---|---|
Biochem 2 | Total N= 22 | 59 % | 23 % | 18 % | |||
Sex: males N= 9 ; females N= 13 | male | female | male | female | male | female | |
56 % | 62 % | 11 % | 31 % | 33 % | 7.7 % | ||
Race: White N= 14 ; Non-white N= 8 | white | non-white | white | non-white | white | non-white | |
64 % | 50 % | 29 % | 12 % | 7.1 % | 38 % |
High Performers | Intermediate Performers | Low Performers | |||||
---|---|---|---|---|---|---|---|
BiocF22 | Total N= 43 | 65 % | 28 % | 7 % | |||
Sex: males N= 5 ; females N= 38 | male | female | male | female | male | female | |
60 % | 66 % | 20 % | 29 % | 20 % | 5.3 % | ||
Race: White N= 28 ; Non-white N= 15 | white | non-white | white | non-white | white | non-white | |
61 % | 73 % | 39 % | 6.7 % | 0 % | 20 % |
plotBarAndCorr(allBiochem,"actual_year","clusterLetter","Year","N of students","High, Intermediate, Low Performance cluster")
The Chi-square analysis gives a p= 0
Residuals analysis:
A negative residual implies that the measured value is lower than expected and a positive value higher than expected
= as.integer(as.factor(allBiochem$actual_year))
markerIntegers plot(allBiochem$PLC,allBiochem$NS,pch=allBiochem$clusterLetter,main = "ES_Chemical_Reaction - High(H), Intermediate(I), Low(L) performers",ylab="NS",xlab="PLC",col=markerIntegers)
legend("topleft", legend=unique(allBiochem$actual_year), col=unique(markerIntegers), lty=1:1, cex=0.8)
calcStats2(allBiochem,"actual_year")
High Performers | Intermediate Performers | Low Performers | |||||
---|---|---|---|---|---|---|---|
first_year | Total N= 209 | 29 % | 30 % | 42 % | |||
Sex: males N= 27 ; females N= 179 | male | female | male | female | male | female | |
37 % | 28 % | 19 % | 31 % | 44 % | 41 % | ||
Race: White N= 118 ; Non-white N= 91 | white | non-white | white | non-white | white | non-white | |
33 % | 23 % | 32 % | 26 % | 35 % | 51 % |
High Performers | Intermediate Performers | Low Performers | |||||
---|---|---|---|---|---|---|---|
second_year | Total N= 133 | 35 % | 32 % | 34 % | |||
Sex: males N= 33 ; females N= 99 | male | female | male | female | male | female | |
52 % | 29 % | 18 % | 36 % | 30 % | 34 % | ||
Race: White N= 81 ; Non-white N= 52 | white | non-white | white | non-white | white | non-white | |
35 % | 35 % | 37 % | 23 % | 28 % | 42 % |
High Performers | Intermediate Performers | Low Performers | |||||
---|---|---|---|---|---|---|---|
third_year | Total N= 123 | 63 % | 24 % | 14 % | |||
Sex: males N= 32 ; females N= 91 | male | female | male | female | male | female | |
59 % | 64 % | 22 % | 24 % | 19 % | 12 % | ||
Race: White N= 74 ; Non-white N= 49 | white | non-white | white | non-white | white | non-white | |
61 % | 65 % | 31 % | 12 % | 8.1 % | 22 % |
cat("<b>Chi-square analysis of Performance by Sex and Race considering different years</b></br>")
Chi-square analysis of Performance by Sex and Race considering different years
plotBarAndCorr(allBiochem[which(allBiochem$actual_year=="first_year"),],"Sex_birth","clusterLetter","Sex","N of students","Performance by Sex 1st year")
The Chi-square analysis gives a p= 0.36146
Residuals analysis:
A negative residual implies that the measured value is lower than expected and a positive value higher than expected
plotBarAndCorr(allBiochem[which(allBiochem$actual_year=="second_year"),],"Sex_birth","clusterLetter","Sex","N of students","Performance by Sex 2nd year")
The Chi-square analysis gives a p= 0.04505
Residuals analysis:
A negative residual implies that the measured value is lower than expected and a positive value higher than expected
plotBarAndCorr(allBiochem[which(allBiochem$actual_year=="third_year"),],"Sex_birth","clusterLetter","Sex","N of students","Performance by Sex 3rd year")
The Chi-square analysis gives a p= 0.64232
Residuals analysis:
A negative residual implies that the measured value is lower than expected and a positive value higher than expected
plotBarAndCorr(allBiochem[which(allBiochem$actual_year=="first_year"),],"race_binary","clusterLetter","Race","N of students","Performance by Race 1st year")
The Chi-square analysis gives a p= 0.06549
Residuals analysis:
A negative residual implies that the measured value is lower than expected and a positive value higher than expected
plotBarAndCorr(allBiochem[which(allBiochem$actual_year=="second_year"),],"race_binary","clusterLetter","Race","N of students","Performance by Race 2nd year")
The Chi-square analysis gives a p= 0.15212
Residuals analysis:
A negative residual implies that the measured value is lower than expected and a positive value higher than expected
plotBarAndCorr(allBiochem[which(allBiochem$actual_year=="third_year"),],"race_binary","clusterLetter","Race","N of students","Performance by Race 3rd year")
The Chi-square analysis gives a p= 0.01157
Residuals analysis:
A negative residual implies that the measured value is lower than expected and a positive value higher than expected
We are comparing how the PLC score is significantly different among the different categories “Course collected”, “Student year”, “White/Non-white”, and “Sex at birth”
#
= analyzeUMRCourses(umrs2)
allBiochem = addExperts(allBiochem,exs2)
allBiochem #buildTables(allBiochem)
plotAndTable(allBiochem,"Course_collected","PLC","PLC: Course","PLC")
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | Gen + Organic 1 | 109 | 0.24 | 0.15 | 0.26 | -0.15 | 0.50 | 0.65 |
X12 | O Chem 1 | 100 | 0.28 | 0.17 | 0.27 | -0.19 | 0.60 | 0.78 |
X13 | O Chem 2 | 76 | 0.29 | 0.14 | 0.28 | -0.25 | 0.56 | 0.81 |
X14 | Gen Chem 2 | 57 | 0.24 | 0.18 | 0.25 | -0.09 | 0.61 | 0.70 |
X15 | BiocF21 | 58 | 0.44 | 0.17 | 0.46 | -0.16 | 0.68 | 0.84 |
X16 | BiocF22 | 43 | 0.47 | 0.10 | 0.48 | 0.24 | 0.65 | 0.41 |
X17 | Biochem 2 | 22 | 0.41 | 0.19 | 0.45 | 0.03 | 0.72 | 0.69 |
X18 | Expert | 8 | 0.72 | 0.09 | 0.70 | 0.59 | 0.82 | 0.23 |
Testing statistical significance: p-values | |
---|---|
O Chem 1-Gen + Organic 1 | 0.5497471 |
O Chem 2-Gen + Organic 1 | 0.3654800 |
Gen Chem 2-Gen + Organic 1 | 0.9999957 |
BiocF21-Gen + Organic 1 | 0.0000000 |
BiocF22-Gen + Organic 1 | 0.0000000 |
Biochem 2-Gen + Organic 1 | 0.0001120 |
Expert-Gen + Organic 1 | 0.0000000 |
O Chem 2-O Chem 1 | 0.9999098 |
Gen Chem 2-O Chem 1 | 0.8876521 |
BiocF21-O Chem 1 | 0.0000001 |
BiocF22-O Chem 1 | 0.0000000 |
Biochem 2-O Chem 1 | 0.0116580 |
Expert-O Chem 1 | 0.0000000 |
Gen Chem 2-O Chem 2 | 0.7414446 |
BiocF21-O Chem 2 | 0.0000027 |
BiocF22-O Chem 2 | 0.0000001 |
Biochem 2-O Chem 2 | 0.0364355 |
Expert-O Chem 2 | 0.0000000 |
BiocF21-Gen Chem 2 | 0.0000000 |
BiocF22-Gen Chem 2 | 0.0000000 |
Biochem 2-Gen Chem 2 | 0.0009537 |
Expert-Gen Chem 2 | 0.0000000 |
BiocF22-BiocF21 | 0.9665517 |
Biochem 2-BiocF21 | 0.9963039 |
Expert-BiocF21 | 0.0000540 |
Biochem 2-BiocF22 | 0.8097147 |
Expert-BiocF22 | 0.0009480 |
Expert-Biochem 2 | 0.0000488 |
plotAndTable(allBiochem,"actual_year","PLC","PLC: Year","PLC")
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | Expert | 8 | 0.72 | 0.09 | 0.70 | 0.59 | 0.82 | 0.23 |
X12 | first_year | 209 | 0.26 | 0.16 | 0.27 | -0.19 | 0.60 | 0.78 |
X13 | second_year | 133 | 0.27 | 0.16 | 0.27 | -0.25 | 0.61 | 0.86 |
X14 | third_year | 123 | 0.44 | 0.15 | 0.47 | -0.16 | 0.72 | 0.88 |
Testing statistical significance: p-values | |
---|---|
first_year-Expert | 0.0000000 |
second_year-Expert | 0.0000000 |
third_year-Expert | 0.0000105 |
second_year-first_year | 0.8952110 |
third_year-first_year | 0.0000000 |
third_year-second_year | 0.0000000 |
plotAndTable(allBiochem,"race_binary","PLC","PLC: White/Non-white","PLC")
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | Non-white | 189 | 0.28 | 0.18 | 0.28 | -0.25 | 0.65 | 0.90 |
X12 | White | 272 | 0.34 | 0.17 | 0.36 | -0.15 | 0.72 | 0.87 |
Testing statistical significance: p-values |
---|
0.0002627 |
plotAndTable(allBiochem,"Sex_birth","PLC","PLC: Sex","PLC")
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | Female | 369 | 0.31 | 0.17 | 0.32 | -0.25 | 0.72 | 0.97 |
X12 | Male | 92 | 0.31 | 0.19 | 0.34 | -0.16 | 0.65 | 0.81 |
Testing statistical significance: p-values |
---|
0.9661744 |
plotAndTable(allBiochem,"Course_collected","NS","NS: Course","NS")
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | Gen + Organic 1 | 109 | 0.31 | 0.24 | 0.27 | -0.36 | 0.83 | 1.19 |
X12 | O Chem 1 | 100 | 0.23 | 0.08 | 0.24 | 0.04 | 0.45 | 0.41 |
X13 | O Chem 2 | 76 | 0.24 | 0.08 | 0.25 | 0.04 | 0.41 | 0.37 |
X14 | Gen Chem 2 | 57 | 0.21 | 0.07 | 0.21 | 0.09 | 0.47 | 0.38 |
X15 | BiocF21 | 58 | 0.25 | 0.09 | 0.26 | 0.04 | 0.45 | 0.41 |
X16 | BiocF22 | 43 | 0.25 | 0.07 | 0.23 | 0.10 | 0.41 | 0.31 |
X17 | Biochem 2 | 22 | 0.25 | 0.07 | 0.26 | 0.13 | 0.39 | 0.26 |
X18 | Expert | 8 | 0.40 | 0.06 | 0.42 | 0.29 | 0.47 | 0.17 |
Testing statistical significance: p-values | |
---|---|
O Chem 1-Gen + Organic 1 | 0.0015567 |
O Chem 2-Gen + Organic 1 | 0.0187765 |
Gen Chem 2-Gen + Organic 1 | 0.0004946 |
BiocF21-Gen + Organic 1 | 0.1457790 |
BiocF22-Gen + Organic 1 | 0.2016682 |
Biochem 2-Gen + Organic 1 | 0.5667910 |
Expert-Gen + Organic 1 | 0.5832677 |
O Chem 2-O Chem 1 | 0.9999463 |
Gen Chem 2-O Chem 1 | 0.9875516 |
BiocF21-O Chem 1 | 0.9931104 |
BiocF22-O Chem 1 | 0.9987084 |
Biochem 2-O Chem 1 | 0.9995817 |
Expert-O Chem 1 | 0.0181461 |
Gen Chem 2-O Chem 2 | 0.9412761 |
BiocF21-O Chem 2 | 0.9998844 |
BiocF22-O Chem 2 | 0.9999929 |
Biochem 2-O Chem 2 | 0.9999957 |
Expert-O Chem 2 | 0.0335475 |
BiocF21-Gen Chem 2 | 0.8113878 |
BiocF22-Gen Chem 2 | 0.9067041 |
Biochem 2-Gen Chem 2 | 0.9632280 |
Expert-Gen Chem 2 | 0.0065152 |
BiocF22-BiocF21 | 1.0000000 |
Biochem 2-BiocF21 | 1.0000000 |
Expert-BiocF21 | 0.0677658 |
Biochem 2-BiocF22 | 1.0000000 |
Expert-BiocF22 | 0.0692078 |
Expert-Biochem 2 | 0.1226520 |
plotAndTable(allBiochem,"actual_year","NS","NS: Year","NS")
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | Expert | 8 | 0.40 | 0.06 | 0.42 | 0.29 | 0.47 | 0.17 |
X12 | first_year | 209 | 0.27 | 0.19 | 0.25 | -0.36 | 0.83 | 1.19 |
X13 | second_year | 133 | 0.23 | 0.07 | 0.24 | 0.04 | 0.47 | 0.43 |
X14 | third_year | 123 | 0.25 | 0.08 | 0.25 | 0.04 | 0.45 | 0.41 |
Testing statistical significance: p-values | |
---|---|
first_year-Expert | 0.0482549 |
second_year-Expert | 0.0035754 |
third_year-Expert | 0.0143046 |
second_year-first_year | 0.0228005 |
third_year-first_year | 0.4577446 |
third_year-second_year | 0.6250317 |
plotAndTable(allBiochem,"race_binary","NS","NS: White/Non-white","NS")
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | Non-white | 189 | 0.23 | 0.14 | 0.22 | -0.36 | 0.71 | 1.07 |
X12 | White | 272 | 0.27 | 0.13 | 0.25 | -0.13 | 0.83 | 0.96 |
Testing statistical significance: p-values |
---|
0.0004332 |
plotAndTable(allBiochem,"Sex_birth","NS","NS: Sex","NS")
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | Female | 369 | 0.26 | 0.14 | 0.24 | -0.36 | 0.83 | 1.19 |
X12 | Male | 92 | 0.24 | 0.11 | 0.24 | -0.21 | 0.64 | 0.85 |
Testing statistical significance: p-values |
---|
0.2258991 |
The problem with clustering is that it is an iterative method and different “initial seeds” will yield to different results. It is only reproducible when the k-means method uses “set.seed(42)”
plotAndTable(allBiochem,"clusterLetter","PLC","PLC: Cluster letter","PLC")
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | Expert | 8 | 0.72 | 0.09 | 0.70 | 0.59 | 0.82 | 0.23 |
X12 | HP | 259 | 0.43 | 0.10 | 0.42 | 0.23 | 0.72 | 0.49 |
X13 | IP | 38 | 0.29 | 0.11 | 0.29 | 0.07 | 0.46 | 0.40 |
X14 | LP | 168 | 0.13 | 0.12 | 0.16 | -0.25 | 0.39 | 0.64 |
Testing statistical significance: p-values | |
---|---|
HP-Expert | 0 |
IP-Expert | 0 |
LP-Expert | 0 |
IP-HP | 0 |
LP-HP | 0 |
LP-IP | 0 |
plotBarAndCorr(allBiochem,"Course_collected","clusterLetter","Course","N of students","High, Intermediate, Low Performance cluster")
The Chi-square analysis gives a p= 0
Residuals analysis:
A negative residual implies that the measured value is lower than expected and a positive value higher than expected
= as.integer(as.factor(allBiochem$Course_collected))
markerIntegers plot(allBiochem$PLC,allBiochem$NS,pch=allBiochem$clusterLetter,main = "ES Glucosidase - High(H), Intermediate(I), Low(L) performers",ylab="NS",xlab="PLC",col=markerIntegers)
legend("topleft", legend=unique(allBiochem$Course_collected), col=unique(markerIntegers), lty=1:1, cex=0.8)
calcStats(allBiochem,"Course_collected")
High Performers | Intermediate Performers | Low Performers | |||||
---|---|---|---|---|---|---|---|
Gen + Organic 1 | Total N= 109 | 28 % | 32 % | 40 % | |||
Sex: males N= 8 ; females N= 99 | male | female | male | female | male | female | |
0 % | 30 % | 38 % | 30 % | 62 % | 39 % | ||
Race: White N= 62 ; Non-white N= 47 | white | non-white | white | non-white | white | non-white | |
29 % | 26 % | 35 % | 28 % | 35 % | 47 % |
High Performers | Intermediate Performers | Low Performers | |||||
---|---|---|---|---|---|---|---|
O Chem 2 | Total N= 76 | 61 % | 1.3 % | 38 % | |||
Sex: males N= 15 ; females N= 60 | male | female | male | female | male | female | |
67 % | 58 % | 0 % | 1.7 % | 33 % | 40 % | ||
Race: White N= 48 ; Non-white N= 28 | white | non-white | white | non-white | white | non-white | |
71 % | 43 % | 2.1 % | 0 % | 27 % | 57 % |
High Performers | Intermediate Performers | Low Performers | |||||
---|---|---|---|---|---|---|---|
BiocF21 | Total N= 58 | 83 % | 0 % | 17 % | |||
Sex: males N= 18 ; females N= 40 | male | female | male | female | male | female | |
83 % | 82 % | 0 % | 0 % | 17 % | 18 % | ||
Race: White N= 32 ; Non-white N= 26 | white | non-white | white | non-white | white | non-white | |
88 % | 77 % | 0 % | 0 % | 12 % | 23 % |
High Performers | Intermediate Performers | Low Performers | |||||
---|---|---|---|---|---|---|---|
Gen Chem 2 | Total N= 57 | 46 % | 1.8 % | 53 % | |||
Sex: males N= 18 ; females N= 39 | male | female | male | female | male | female | |
33 % | 51 % | 0 % | 2.6 % | 67 % | 46 % | ||
Race: White N= 33 ; Non-white N= 24 | white | non-white | white | non-white | white | non-white | |
45 % | 46 % | 3 % | 0 % | 52 % | 54 % |
High Performers | Intermediate Performers | Low Performers | |||||
---|---|---|---|---|---|---|---|
O Chem 1 | Total N= 100 | 51 % | 1 % | 48 % | |||
Sex: males N= 19 ; females N= 80 | male | female | male | female | male | female | |
42 % | 54 % | 0 % | 1.2 % | 58 % | 45 % | ||
Race: White N= 56 ; Non-white N= 44 | white | non-white | white | non-white | white | non-white | |
55 % | 45 % | 0 % | 2.3 % | 45 % | 52 % |
High Performers | Intermediate Performers | Low Performers | |||||
---|---|---|---|---|---|---|---|
Biochem 2 | Total N= 22 | 77 % | 0 % | 23 % | |||
Sex: males N= 9 ; females N= 13 | male | female | male | female | male | female | |
78 % | 77 % | 0 % | 0 % | 22 % | 23 % | ||
Race: White N= 14 ; Non-white N= 8 | white | non-white | white | non-white | white | non-white | |
86 % | 62 % | 0 % | 0 % | 14 % | 38 % |
High Performers | Intermediate Performers | Low Performers | |||||
---|---|---|---|---|---|---|---|
BiocF22 | Total N= 43 | 95 % | 0 % | 4.7 % | |||
Sex: males N= 5 ; females N= 38 | male | female | male | female | male | female | |
100 % | 95 % | 0 % | 0 % | 0 % | 5.3 % | ||
Race: White N= 28 ; Non-white N= 15 | white | non-white | white | non-white | white | non-white | |
100 % | 87 % | 0 % | 0 % | 0 % | 13 % |
plotBarAndCorr(allBiochem,"actual_year","clusterLetter","Year","N of students","High, Intermediate, Low Performance cluster")
The Chi-square analysis gives a p= 0
Residuals analysis:
A negative residual implies that the measured value is lower than expected and a positive value higher than expected
= as.integer(as.factor(allBiochem$actual_year))
markerIntegers plot(allBiochem$PLC,allBiochem$NS,pch=allBiochem$clusterLetter,main = "ES Glucosidase - High(H), Intermediate(I), Low(L) performers",ylab="NS",xlab="PLC",col=markerIntegers)
legend("topleft", legend=unique(allBiochem$actual_year), col=unique(markerIntegers), lty=1:1, cex=0.8)
calcStats2(allBiochem,"actual_year")
High Performers | Intermediate Performers | Low Performers | |||||
---|---|---|---|---|---|---|---|
first_year | Total N= 209 | 39 % | 17 % | 44 % | |||
Sex: males N= 27 ; females N= 179 | male | female | male | female | male | female | |
30 % | 41 % | 11 % | 17 % | 59 % | 42 % | ||
Race: White N= 118 ; Non-white N= 91 | white | non-white | white | non-white | white | non-white | |
42 % | 35 % | 19 % | 15 % | 40 % | 49 % |
High Performers | Intermediate Performers | Low Performers | |||||
---|---|---|---|---|---|---|---|
second_year | Total N= 133 | 54 % | 1.5 % | 44 % | |||
Sex: males N= 33 ; females N= 99 | male | female | male | female | male | female | |
48 % | 56 % | 0 % | 2 % | 52 % | 42 % | ||
Race: White N= 81 ; Non-white N= 52 | white | non-white | white | non-white | white | non-white | |
60 % | 44 % | 2.5 % | 0 % | 37 % | 56 % |
High Performers | Intermediate Performers | Low Performers | |||||
---|---|---|---|---|---|---|---|
third_year | Total N= 123 | 86 % | 0 % | 14 % | |||
Sex: males N= 32 ; females N= 91 | male | female | male | female | male | female | |
84 % | 87 % | 0 % | 0 % | 16 % | 13 % | ||
Race: White N= 74 ; Non-white N= 49 | white | non-white | white | non-white | white | non-white | |
92 % | 78 % | 0 % | 0 % | 8.1 % | 22 % |
cat("<b>Chi-square analysis of Performance by Sex and Race considering different years</b></br>")
Chi-square analysis of Performance by Sex and Race considering different years
plotBarAndCorr(allBiochem[which(allBiochem$actual_year=="first_year"),],"Sex_birth","clusterLetter","Sex","N of students","Performance by Sex 1st year")
The Chi-square analysis gives a p= 0.2357
Residuals analysis:
A negative residual implies that the measured value is lower than expected and a positive value higher than expected
plotBarAndCorr(allBiochem[which(allBiochem$actual_year=="second_year"),],"Sex_birth","clusterLetter","Sex","N of students","Performance by Sex 2nd year")
The Chi-square analysis gives a p= 0.50805
Residuals analysis:
A negative residual implies that the measured value is lower than expected and a positive value higher than expected
plotBarAndCorr(allBiochem[which(allBiochem$actual_year=="third_year"),],"Sex_birth","clusterLetter","Sex","N of students","Performance by Sex 3rd year")
The Chi-square analysis gives a p= 0.96331
Residuals analysis:
A negative residual implies that the measured value is lower than expected and a positive value higher than expected
plotBarAndCorr(allBiochem[which(allBiochem$actual_year=="first_year"),],"race_binary","clusterLetter","Race","N of students","Performance by Race 1st year")
The Chi-square analysis gives a p= 0.38034
Residuals analysis:
A negative residual implies that the measured value is lower than expected and a positive value higher than expected
plotBarAndCorr(allBiochem[which(allBiochem$actual_year=="second_year"),],"race_binary","clusterLetter","Race","N of students","Performance by Race 2nd year")
The Chi-square analysis gives a p= 0.06938
Residuals analysis:
A negative residual implies that the measured value is lower than expected and a positive value higher than expected
plotBarAndCorr(allBiochem[which(allBiochem$actual_year=="third_year"),],"race_binary","clusterLetter","Race","N of students","Performance by Race 3rd year")
The Chi-square analysis gives a p= 0.04667
Residuals analysis:
A negative residual implies that the measured value is lower than expected and a positive value higher than expected
We are comparing how the PLC score is significantly different among the different categories “Course collected”, “Student year”, “White/Non-white”, and “Sex at birth”
#
= analyzeUMRCourses(umrs3)
allBiochem = addExperts(allBiochem,exs3)
allBiochem #buildTables(allBiochem)
plotAndTable(allBiochem,"Course_collected","PLC","PLC: Course","PLC")
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | Gen + Organic 1 | 82 | 0.06 | 0.14 | 0.07 | -0.79 | 0.43 | 1.23 |
X12 | O Chem 1 | 107 | 0.11 | 0.12 | 0.11 | -0.21 | 0.37 | 0.58 |
X13 | O Chem 2 | 61 | 0.12 | 0.12 | 0.14 | -0.33 | 0.40 | 0.73 |
X14 | Gen Chem 2 | 58 | 0.10 | 0.14 | 0.09 | -0.27 | 0.42 | 0.69 |
X15 | BiocF21 | 53 | 0.14 | 0.15 | 0.14 | -0.17 | 0.56 | 0.72 |
X16 | BiocF22 | 36 | 0.16 | 0.12 | 0.15 | -0.08 | 0.39 | 0.47 |
X17 | Biochem 2 | 24 | 0.19 | 0.19 | 0.22 | -0.18 | 0.56 | 0.74 |
X18 | Expert | 7 | 0.71 | 0.08 | 0.69 | 0.60 | 0.82 | 0.22 |
Testing statistical significance: p-values | |
---|---|
O Chem 1-Gen + Organic 1 | 0.3117001 |
O Chem 2-Gen + Organic 1 | 0.1952197 |
Gen Chem 2-Gen + Organic 1 | 0.5777922 |
BiocF21-Gen + Organic 1 | 0.0128166 |
BiocF22-Gen + Organic 1 | 0.0107056 |
Biochem 2-Gen + Organic 1 | 0.0006625 |
Expert-Gen + Organic 1 | 0.0000000 |
O Chem 2-O Chem 1 | 0.9992456 |
Gen Chem 2-O Chem 1 | 1.0000000 |
BiocF21-O Chem 1 | 0.7089232 |
BiocF22-O Chem 1 | 0.5273523 |
Biochem 2-O Chem 1 | 0.0750461 |
Expert-O Chem 1 | 0.0000000 |
Gen Chem 2-O Chem 2 | 0.9991966 |
BiocF21-O Chem 2 | 0.9729913 |
BiocF22-O Chem 2 | 0.8814935 |
Biochem 2-O Chem 2 | 0.2727323 |
Expert-O Chem 2 | 0.0000000 |
BiocF21-Gen Chem 2 | 0.7815819 |
BiocF22-Gen Chem 2 | 0.6046273 |
Biochem 2-Gen Chem 2 | 0.1090752 |
Expert-Gen Chem 2 | 0.0000000 |
BiocF22-BiocF21 | 0.9998746 |
Biochem 2-BiocF21 | 0.7937217 |
Expert-BiocF21 | 0.0000000 |
Biochem 2-BiocF22 | 0.9624613 |
Expert-BiocF22 | 0.0000000 |
Expert-Biochem 2 | 0.0000000 |
plotAndTable(allBiochem,"actual_year","PLC","PLC: Year","PLC")
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | Expert | 7 | 0.71 | 0.08 | 0.69 | 0.60 | 0.82 | 0.22 |
X12 | first_year | 189 | 0.09 | 0.13 | 0.09 | -0.79 | 0.43 | 1.23 |
X13 | second_year | 119 | 0.11 | 0.13 | 0.12 | -0.33 | 0.42 | 0.76 |
X14 | third_year | 113 | 0.16 | 0.15 | 0.16 | -0.18 | 0.56 | 0.74 |
Testing statistical significance: p-values | |
---|---|
first_year-Expert | 0.0000000 |
second_year-Expert | 0.0000000 |
third_year-Expert | 0.0000000 |
second_year-first_year | 0.3984704 |
third_year-first_year | 0.0000594 |
third_year-second_year | 0.0414213 |
plotAndTable(allBiochem,"race_binary","PLC","PLC: White/Non-white","PLC")
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | Non-white | 173 | 0.10 | 0.15 | 0.09 | -0.79 | 0.56 | 1.35 |
X12 | White | 244 | 0.12 | 0.13 | 0.12 | -0.27 | 0.52 | 0.79 |
Testing statistical significance: p-values |
---|
0.1649182 |
plotAndTable(allBiochem,"Sex_birth","PLC","PLC: Sex","PLC")
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | Female | 338 | 0.11 | 0.14 | 0.11 | -0.79 | 0.56 | 1.35 |
X12 | Male | 79 | 0.14 | 0.13 | 0.14 | -0.17 | 0.43 | 0.59 |
Testing statistical significance: p-values |
---|
0.0802224 |
plotAndTable(allBiochem,"Course_collected","NS","NS: Course","NS")
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | Gen + Organic 1 | 82 | 0.13 | 0.06 | 0.13 | 0.04 | 0.29 | 0.25 |
X12 | O Chem 1 | 107 | 0.16 | 0.07 | 0.15 | 0.04 | 0.40 | 0.36 |
X13 | O Chem 2 | 61 | 0.16 | 0.06 | 0.15 | 0.04 | 0.27 | 0.23 |
X14 | Gen Chem 2 | 58 | 0.16 | 0.07 | 0.15 | 0.06 | 0.32 | 0.25 |
X15 | BiocF21 | 53 | 0.17 | 0.08 | 0.15 | 0.00 | 0.35 | 0.35 |
X16 | BiocF22 | 36 | 0.17 | 0.07 | 0.18 | 0.03 | 0.30 | 0.27 |
X17 | Biochem 2 | 24 | 0.18 | 0.09 | 0.17 | 0.04 | 0.41 | 0.37 |
X18 | Expert | 7 | 0.43 | 0.08 | 0.44 | 0.33 | 0.53 | 0.20 |
Testing statistical significance: p-values | |
---|---|
O Chem 1-Gen + Organic 1 | 0.1222132 |
O Chem 2-Gen + Organic 1 | 0.4688702 |
Gen Chem 2-Gen + Organic 1 | 0.2024925 |
BiocF21-Gen + Organic 1 | 0.0570943 |
BiocF22-Gen + Organic 1 | 0.1233538 |
Biochem 2-Gen + Organic 1 | 0.0502438 |
Expert-Gen + Organic 1 | 0.0000000 |
O Chem 2-O Chem 1 | 0.9999693 |
Gen Chem 2-O Chem 1 | 0.9999997 |
BiocF21-O Chem 1 | 0.9941082 |
BiocF22-O Chem 1 | 0.9955670 |
Biochem 2-O Chem 1 | 0.8716951 |
Expert-O Chem 1 | 0.0000000 |
Gen Chem 2-O Chem 2 | 0.9997855 |
BiocF21-O Chem 2 | 0.9749227 |
BiocF22-O Chem 2 | 0.9809149 |
Biochem 2-O Chem 2 | 0.7995603 |
Expert-O Chem 2 | 0.0000000 |
BiocF21-Gen Chem 2 | 0.9994726 |
BiocF22-Gen Chem 2 | 0.9994520 |
Biochem 2-Gen Chem 2 | 0.9448838 |
Expert-Gen Chem 2 | 0.0000000 |
BiocF22-BiocF21 | 1.0000000 |
Biochem 2-BiocF21 | 0.9962882 |
Expert-BiocF21 | 0.0000000 |
Biochem 2-BiocF22 | 0.9984874 |
Expert-BiocF22 | 0.0000000 |
Expert-Biochem 2 | 0.0000000 |
plotAndTable(allBiochem,"actual_year","NS","NS: Year","NS")
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | Expert | 7 | 0.43 | 0.08 | 0.44 | 0.33 | 0.53 | 0.20 |
X12 | first_year | 189 | 0.15 | 0.06 | 0.14 | 0.04 | 0.40 | 0.36 |
X13 | second_year | 119 | 0.16 | 0.06 | 0.15 | 0.04 | 0.32 | 0.28 |
X14 | third_year | 113 | 0.17 | 0.08 | 0.16 | 0.00 | 0.41 | 0.41 |
Testing statistical significance: p-values | |
---|---|
first_year-Expert | 0.0000000 |
second_year-Expert | 0.0000000 |
third_year-Expert | 0.0000000 |
second_year-first_year | 0.5347359 |
third_year-first_year | 0.0210884 |
third_year-second_year | 0.4892358 |
plotAndTable(allBiochem,"race_binary","NS","NS: White/Non-white","NS")
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | Non-white | 173 | 0.15 | 0.07 | 0.14 | 0.03 | 0.40 | 0.37 |
X12 | White | 244 | 0.16 | 0.07 | 0.15 | 0.00 | 0.41 | 0.41 |
Testing statistical significance: p-values |
---|
0.3201503 |
plotAndTable(allBiochem,"Sex_birth","NS","NS: Sex","NS")
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | Female | 338 | 0.15 | 0.07 | 0.14 | 0.00 | 0.41 | 0.41 |
X12 | Male | 79 | 0.17 | 0.07 | 0.16 | 0.04 | 0.40 | 0.36 |
Testing statistical significance: p-values |
---|
0.1498567 |
The problem with clustering is that it is an iterative method and different “initial seeds” will yield to different results. It is only reproducible when the k-means method uses “set.seed(42)”
plotAndTable(allBiochem,"clusterLetter","PLC","PLC: Cluster letter","PLC")
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | Expert | 7 | 0.71 | 0.08 | 0.69 | 0.60 | 0.82 | 0.22 |
X12 | HP | 75 | 0.29 | 0.09 | 0.28 | 0.09 | 0.56 | 0.47 |
X13 | IP | 201 | 0.14 | 0.07 | 0.14 | -0.08 | 0.38 | 0.46 |
X14 | LP | 145 | -0.01 | 0.11 | 0.01 | -0.79 | 0.14 | 0.94 |
Testing statistical significance: p-values | |
---|---|
HP-Expert | 0 |
IP-Expert | 0 |
LP-Expert | 0 |
IP-HP | 0 |
LP-HP | 0 |
LP-IP | 0 |
plotBarAndCorr(allBiochem,"Course_collected","clusterLetter","Course","N of students","High, Intermediate, Low Performance cluster")
The Chi-square analysis gives a p= 1e-04
Residuals analysis:
A negative residual implies that the measured value is lower than expected and a positive value higher than expected
= as.integer(as.factor(allBiochem$Course_collected))
markerIntegers plot(allBiochem$PLC,allBiochem$NS,pch=allBiochem$clusterLetter,main = "Nucleic Acids - High(H), Intermediate(I), Low(L) performers",ylab="NS",xlab="PLC",col=markerIntegers)
legend("topleft", legend=unique(allBiochem$Course_collected), col=unique(markerIntegers), lty=1:1, cex=0.8)
calcStats(allBiochem,"Course_collected")
High Performers | Intermediate Performers | Low Performers | |||||
---|---|---|---|---|---|---|---|
Gen + Organic 1 | Total N= 82 | 6.1 % | 41 % | 52 % | |||
Sex: males N= 3 ; females N= 77 | male | female | male | female | male | female | |
0 % | 6.5 % | 33 % | 42 % | 67 % | 52 % | ||
Race: White N= 46 ; Non-white N= 36 | white | non-white | white | non-white | white | non-white | |
4.3 % | 8.3 % | 41 % | 42 % | 54 % | 50 % |
High Performers | Intermediate Performers | Low Performers | |||||
---|---|---|---|---|---|---|---|
O Chem 2 | Total N= 61 | 11 % | 67 % | 21 % | |||
Sex: males N= 11 ; females N= 49 | male | female | male | female | male | female | |
9.1 % | 12 % | 73 % | 67 % | 18 % | 20 % | ||
Race: White N= 36 ; Non-white N= 25 | white | non-white | white | non-white | white | non-white | |
14 % | 8 % | 75 % | 56 % | 11 % | 36 % |
High Performers | Intermediate Performers | Low Performers | |||||
---|---|---|---|---|---|---|---|
BiocF21 | Total N= 53 | 25 % | 47 % | 28 % | |||
Sex: males N= 15 ; females N= 38 | male | female | male | female | male | female | |
27 % | 24 % | 53 % | 45 % | 20 % | 32 % | ||
Race: White N= 29 ; Non-white N= 24 | white | non-white | white | non-white | white | non-white | |
28 % | 21 % | 55 % | 38 % | 17 % | 42 % |
High Performers | Intermediate Performers | Low Performers | |||||
---|---|---|---|---|---|---|---|
O Chem 1 | Total N= 107 | 17 % | 48 % | 36 % | |||
Sex: males N= 19 ; females N= 87 | male | female | male | female | male | female | |
21 % | 16 % | 47 % | 47 % | 32 % | 37 % | ||
Race: White N= 61 ; Non-white N= 46 | white | non-white | white | non-white | white | non-white | |
18 % | 15 % | 52 % | 41 % | 30 % | 43 % |
High Performers | Intermediate Performers | Low Performers | |||||
---|---|---|---|---|---|---|---|
Gen Chem 2 | Total N= 58 | 19 % | 45 % | 36 % | |||
Sex: males N= 18 ; females N= 40 | male | female | male | female | male | female | |
28 % | 15 % | 44 % | 45 % | 28 % | 40 % | ||
Race: White N= 32 ; Non-white N= 26 | white | non-white | white | non-white | white | non-white | |
22 % | 15 % | 41 % | 50 % | 38 % | 35 % |
High Performers | Intermediate Performers | Low Performers | |||||
---|---|---|---|---|---|---|---|
Biochem 2 | Total N= 24 | 38 % | 33 % | 29 % | |||
Sex: males N= 9 ; females N= 15 | male | female | male | female | male | female | |
22 % | 47 % | 56 % | 20 % | 22 % | 33 % | ||
Race: White N= 17 ; Non-white N= 7 | white | non-white | white | non-white | white | non-white | |
41 % | 29 % | 24 % | 57 % | 35 % | 14 % |
High Performers | Intermediate Performers | Low Performers | |||||
---|---|---|---|---|---|---|---|
BiocF22 | Total N= 36 | 33 % | 44 % | 22 % | |||
Sex: males N= 4 ; females N= 32 | male | female | male | female | male | female | |
25 % | 34 % | 75 % | 41 % | 0 % | 25 % | ||
Race: White N= 24 ; Non-white N= 12 | white | non-white | white | non-white | white | non-white | |
33 % | 33 % | 50 % | 33 % | 17 % | 33 % |
plotBarAndCorr(allBiochem,"actual_year","clusterLetter","Year","N of students","High, Intermediate, Low Performance cluster")
The Chi-square analysis gives a p= 0.00012
Residuals analysis:
A negative residual implies that the measured value is lower than expected and a positive value higher than expected
= as.integer(as.factor(allBiochem$actual_year))
markerIntegers plot(allBiochem$PLC,allBiochem$NS,pch=allBiochem$clusterLetter,main = "Nucleic Acids - High(H), Intermediate(I), Low(L) performers",ylab="NS",xlab="PLC",col=markerIntegers)
legend("topleft", legend=unique(allBiochem$actual_year), col=unique(markerIntegers), lty=1:1, cex=0.8)
calcStats2(allBiochem,"actual_year")
High Performers | Intermediate Performers | Low Performers | |||||
---|---|---|---|---|---|---|---|
first_year | Total N= 189 | 12 % | 45 % | 43 % | |||
Sex: males N= 22 ; females N= 164 | male | female | male | female | male | female | |
18 % | 12 % | 45 % | 45 % | 36 % | 44 % | ||
Race: White N= 107 ; Non-white N= 82 | white | non-white | white | non-white | white | non-white | |
12 % | 12 % | 48 % | 41 % | 40 % | 46 % |
High Performers | Intermediate Performers | Low Performers | |||||
---|---|---|---|---|---|---|---|
second_year | Total N= 119 | 15 % | 56 % | 29 % | |||
Sex: males N= 29 ; females N= 89 | male | female | male | female | male | female | |
21 % | 13 % | 55 % | 57 % | 24 % | 29 % | ||
Race: White N= 68 ; Non-white N= 51 | white | non-white | white | non-white | white | non-white | |
18 % | 12 % | 59 % | 53 % | 24 % | 35 % |
High Performers | Intermediate Performers | Low Performers | |||||
---|---|---|---|---|---|---|---|
third_year | Total N= 113 | 30 % | 43 % | 27 % | |||
Sex: males N= 28 ; females N= 85 | male | female | male | female | male | female | |
25 % | 32 % | 57 % | 39 % | 18 % | 29 % | ||
Race: White N= 70 ; Non-white N= 43 | white | non-white | white | non-white | white | non-white | |
33 % | 26 % | 46 % | 40 % | 21 % | 35 % |
cat("<b>Chi-square analysis of Performance by Sex and Race considering different years</b></br>")
Chi-square analysis of Performance by Sex and Race considering different years
plotBarAndCorr(allBiochem[which(allBiochem$actual_year=="first_year"),],"Sex_birth","clusterLetter","Sex","N of students","Performance by Sex 1st year")
The Chi-square analysis gives a p= 0.62414
Residuals analysis:
A negative residual implies that the measured value is lower than expected and a positive value higher than expected
plotBarAndCorr(allBiochem[which(allBiochem$actual_year=="second_year"),],"Sex_birth","clusterLetter","Sex","N of students","Performance by Sex 2nd year")
The Chi-square analysis gives a p= 0.61766
Residuals analysis:
A negative residual implies that the measured value is lower than expected and a positive value higher than expected
plotBarAndCorr(allBiochem[which(allBiochem$actual_year=="third_year"),],"Sex_birth","clusterLetter","Sex","N of students","Performance by Sex 3rd year")
The Chi-square analysis gives a p= 0.22207
Residuals analysis:
A negative residual implies that the measured value is lower than expected and a positive value higher than expected
plotBarAndCorr(allBiochem[which(allBiochem$actual_year=="first_year"),],"race_binary","clusterLetter","Race","N of students","Performance by Race 1st year")
The Chi-square analysis gives a p= 0.66791
Residuals analysis:
A negative residual implies that the measured value is lower than expected and a positive value higher than expected
plotBarAndCorr(allBiochem[which(allBiochem$actual_year=="second_year"),],"race_binary","clusterLetter","Race","N of students","Performance by Race 2nd year")
The Chi-square analysis gives a p= 0.32343
Residuals analysis:
A negative residual implies that the measured value is lower than expected and a positive value higher than expected
plotBarAndCorr(allBiochem[which(allBiochem$actual_year=="third_year"),],"race_binary","clusterLetter","Race","N of students","Performance by Race 3rd year")
The Chi-square analysis gives a p= 0.2837
Residuals analysis:
A negative residual implies that the measured value is lower than expected and a positive value higher than expected
We are comparing how the PLC score is significantly different among the different categories “Course collected”, “Student year”, “White/Non-white”, and “Sex at birth”
#
= analyzeUMRCourses(umrs4)
allBiochem = addExperts(allBiochem,exs4)
allBiochem #buildTables(allBiochem)
plotAndTable(allBiochem,"Course_collected","PLC","PLC: Course","PLC")
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | Gen + Organic 1 | 94 | 0.17 | 0.13 | 0.19 | -0.17 | 0.51 | 0.68 |
X12 | O Chem 1 | 126 | 0.18 | 0.12 | 0.18 | -0.19 | 0.41 | 0.60 |
X13 | O Chem 2 | 65 | 0.18 | 0.14 | 0.19 | -0.20 | 0.48 | 0.68 |
X14 | Gen Chem 2 | 67 | 0.21 | 0.13 | 0.24 | -0.21 | 0.42 | 0.63 |
X15 | BiocF21 | 53 | 0.22 | 0.26 | 0.18 | -0.25 | 0.86 | 1.11 |
X16 | BiocF22 | 37 | 0.20 | 0.13 | 0.20 | -0.04 | 0.42 | 0.46 |
X17 | Biochem 2 | 26 | 0.32 | 0.23 | 0.30 | -0.09 | 0.70 | 0.80 |
X18 | Expert | 15 | 0.69 | 0.13 | 0.66 | 0.52 | 0.89 | 0.38 |
Testing statistical significance: p-values | |
---|---|
O Chem 1-Gen + Organic 1 | 0.9995797 |
O Chem 2-Gen + Organic 1 | 0.9997234 |
Gen Chem 2-Gen + Organic 1 | 0.7022747 |
BiocF21-Gen + Organic 1 | 0.5671070 |
BiocF22-Gen + Organic 1 | 0.9934641 |
Biochem 2-Gen + Organic 1 | 0.0005910 |
Expert-Gen + Organic 1 | 0.0000000 |
O Chem 2-O Chem 1 | 1.0000000 |
Gen Chem 2-O Chem 1 | 0.8956474 |
BiocF21-O Chem 1 | 0.7851551 |
BiocF22-O Chem 1 | 0.9998398 |
Biochem 2-O Chem 1 | 0.0013717 |
Expert-O Chem 1 | 0.0000000 |
Gen Chem 2-O Chem 2 | 0.9585310 |
BiocF21-O Chem 2 | 0.8909605 |
BiocF22-O Chem 2 | 0.9999571 |
Biochem 2-O Chem 2 | 0.0048459 |
Expert-O Chem 2 | 0.0000000 |
BiocF21-Gen Chem 2 | 0.9999902 |
BiocF22-Gen Chem 2 | 0.9992966 |
Biochem 2-Gen Chem 2 | 0.0661446 |
Expert-Gen Chem 2 | 0.0000000 |
BiocF22-BiocF21 | 0.9937191 |
Biochem 2-BiocF21 | 0.1530198 |
Expert-BiocF21 | 0.0000000 |
Biochem 2-BiocF22 | 0.0417856 |
Expert-BiocF22 | 0.0000000 |
Expert-Biochem 2 | 0.0000000 |
plotAndTable(allBiochem,"actual_year","PLC","PLC: Year","PLC")
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | Expert | 15 | 0.69 | 0.13 | 0.66 | 0.52 | 0.89 | 0.38 |
X12 | first_year | 220 | 0.18 | 0.12 | 0.18 | -0.19 | 0.51 | 0.71 |
X13 | second_year | 132 | 0.20 | 0.13 | 0.22 | -0.21 | 0.48 | 0.69 |
X14 | third_year | 116 | 0.23 | 0.22 | 0.20 | -0.25 | 0.86 | 1.11 |
Testing statistical significance: p-values | |
---|---|
first_year-Expert | 0.0000000 |
second_year-Expert | 0.0000000 |
third_year-Expert | 0.0000000 |
second_year-first_year | 0.6208045 |
third_year-first_year | 0.0082881 |
third_year-second_year | 0.2616615 |
plotAndTable(allBiochem,"race_binary","PLC","PLC: White/Non-white","PLC")
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | Non-white | 195 | 0.17 | 0.16 | 0.17 | -0.25 | 0.86 | 1.11 |
X12 | White | 269 | 0.22 | 0.15 | 0.22 | -0.21 | 0.70 | 0.92 |
Testing statistical significance: p-values |
---|
0.001165 |
plotAndTable(allBiochem,"Sex_birth","PLC","PLC: Sex","PLC")
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | Female | 372 | 0.19 | 0.16 | 0.2 | -0.25 | 0.80 | 1.05 |
X12 | Male | 92 | 0.21 | 0.16 | 0.2 | -0.09 | 0.86 | 0.95 |
Testing statistical significance: p-values |
---|
0.4297313 |
plotAndTable(allBiochem,"Course_collected","NS","NS: Course","NS")
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | Gen + Organic 1 | 94 | 0.18 | 0.06 | 0.17 | 0.04 | 0.32 | 0.28 |
X12 | O Chem 1 | 126 | 0.18 | 0.06 | 0.17 | 0.04 | 0.37 | 0.33 |
X13 | O Chem 2 | 65 | 0.18 | 0.08 | 0.17 | 0.00 | 0.41 | 0.41 |
X14 | Gen Chem 2 | 67 | 0.19 | 0.07 | 0.19 | 0.04 | 0.44 | 0.39 |
X15 | BiocF21 | 53 | 0.21 | 0.09 | 0.21 | 0.06 | 0.44 | 0.38 |
X16 | BiocF22 | 37 | 0.17 | 0.06 | 0.17 | 0.04 | 0.30 | 0.26 |
X17 | Biochem 2 | 26 | 0.22 | 0.08 | 0.21 | 0.05 | 0.41 | 0.36 |
X18 | Expert | 15 | 0.35 | 0.09 | 0.35 | 0.25 | 0.53 | 0.28 |
Testing statistical significance: p-values | |
---|---|
O Chem 1-Gen + Organic 1 | 0.9999998 |
O Chem 2-Gen + Organic 1 | 1.0000000 |
Gen Chem 2-Gen + Organic 1 | 0.9996865 |
BiocF21-Gen + Organic 1 | 0.2186001 |
BiocF22-Gen + Organic 1 | 0.9949659 |
Biochem 2-Gen + Organic 1 | 0.3220053 |
Expert-Gen + Organic 1 | 0.0000000 |
O Chem 2-O Chem 1 | 1.0000000 |
Gen Chem 2-O Chem 1 | 0.9975965 |
BiocF21-O Chem 1 | 0.1205696 |
BiocF22-O Chem 1 | 0.9978219 |
Biochem 2-O Chem 1 | 0.2314305 |
Expert-O Chem 1 | 0.0000000 |
Gen Chem 2-O Chem 2 | 0.9996240 |
BiocF21-O Chem 2 | 0.2807652 |
BiocF22-O Chem 2 | 0.9977051 |
Biochem 2-O Chem 2 | 0.3582391 |
Expert-O Chem 2 | 0.0000000 |
BiocF21-Gen Chem 2 | 0.5773117 |
BiocF22-Gen Chem 2 | 0.9555810 |
Biochem 2-Gen Chem 2 | 0.6071783 |
Expert-Gen Chem 2 | 0.0000000 |
BiocF22-BiocF21 | 0.1393354 |
Biochem 2-BiocF21 | 0.9999812 |
Expert-BiocF21 | 0.0000000 |
Biochem 2-BiocF22 | 0.1865267 |
Expert-BiocF22 | 0.0000000 |
Expert-Biochem 2 | 0.0000001 |
plotAndTable(allBiochem,"actual_year","NS","NS: Year","NS")
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | Expert | 15 | 0.35 | 0.09 | 0.35 | 0.25 | 0.53 | 0.28 |
X12 | first_year | 220 | 0.18 | 0.06 | 0.17 | 0.04 | 0.37 | 0.33 |
X13 | second_year | 132 | 0.18 | 0.07 | 0.18 | 0.00 | 0.44 | 0.44 |
X14 | third_year | 116 | 0.20 | 0.08 | 0.20 | 0.04 | 0.44 | 0.40 |
Testing statistical significance: p-values | |
---|---|
first_year-Expert | 0.0000000 |
second_year-Expert | 0.0000000 |
third_year-Expert | 0.0000000 |
second_year-first_year | 0.9715375 |
third_year-first_year | 0.0892992 |
third_year-second_year | 0.3068437 |
plotAndTable(allBiochem,"race_binary","NS","NS: White/Non-white","NS")
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | Non-white | 195 | 0.18 | 0.07 | 0.17 | 0.04 | 0.44 | 0.40 |
X12 | White | 269 | 0.19 | 0.07 | 0.18 | 0.00 | 0.44 | 0.44 |
Testing statistical significance: p-values |
---|
0.1920829 |
plotAndTable(allBiochem,"Sex_birth","NS","NS: Sex","NS")
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | Female | 372 | 0.18 | 0.07 | 0.18 | 0.00 | 0.44 | 0.44 |
X12 | Male | 92 | 0.19 | 0.08 | 0.18 | 0.05 | 0.44 | 0.39 |
Testing statistical significance: p-values |
---|
0.4814695 |
The problem with clustering is that it is an iterative method and different “initial seeds” will yield to different results. It is only reproducible when the k-means method uses “set.seed(42)”
plotAndTable(allBiochem,"clusterLetter","PLC","PLC: Cluster letter","PLC")
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | Expert | 15 | 0.69 | 0.13 | 0.66 | 0.52 | 0.89 | 0.38 |
X12 | HP | 78 | 0.40 | 0.15 | 0.36 | 0.13 | 0.86 | 0.72 |
X13 | IP | 243 | 0.22 | 0.09 | 0.22 | -0.01 | 0.43 | 0.44 |
X14 | LP | 147 | 0.05 | 0.11 | 0.06 | -0.25 | 0.32 | 0.57 |
Testing statistical significance: p-values | |
---|---|
HP-Expert | 0 |
IP-Expert | 0 |
LP-Expert | 0 |
IP-HP | 0 |
LP-HP | 0 |
LP-IP | 0 |
plotBarAndCorr(allBiochem,"Course_collected","clusterLetter","Course","N of students","High, Intermediate, Low Performance cluster")
The Chi-square analysis gives a p= 1e-05
Residuals analysis:
A negative residual implies that the measured value is lower than expected and a positive value higher than expected
= as.integer(as.factor(allBiochem$Course_collected))
markerIntegers plot(allBiochem$PLC,allBiochem$NS,pch=allBiochem$clusterLetter,main = "Oxygen Binding - High(H), Intermediate(I), Low(L) performers",ylab="NS",xlab="PLC",col=markerIntegers)
legend("topleft", legend=unique(allBiochem$Course_collected), col=unique(markerIntegers), lty=1:1, cex=0.8)
calcStats(allBiochem,"Course_collected")
High Performers | Intermediate Performers | Low Performers | |||||
---|---|---|---|---|---|---|---|
Gen + Organic 1 | Total N= 94 | 9.6 % | 55 % | 35 % | |||
Sex: males N= 7 ; females N= 85 | male | female | male | female | male | female | |
0 % | 9.4 % | 29 % | 58 % | 71 % | 33 % | ||
Race: White N= 50 ; Non-white N= 44 | white | non-white | white | non-white | white | non-white | |
6 % | 14 % | 62 % | 48 % | 32 % | 39 % |
High Performers | Intermediate Performers | Low Performers | |||||
---|---|---|---|---|---|---|---|
O Chem 2 | Total N= 65 | 17 % | 55 % | 28 % | |||
Sex: males N= 12 ; females N= 52 | male | female | male | female | male | female | |
8.3 % | 19 % | 83 % | 50 % | 8.3 % | 31 % | ||
Race: White N= 40 ; Non-white N= 25 | white | non-white | white | non-white | white | non-white | |
25 % | 4 % | 52 % | 60 % | 22 % | 36 % |
High Performers | Intermediate Performers | Low Performers | |||||
---|---|---|---|---|---|---|---|
BiocF21 | Total N= 53 | 34 % | 25 % | 42 % | |||
Sex: males N= 16 ; females N= 37 | male | female | male | female | male | female | |
44 % | 30 % | 19 % | 27 % | 38 % | 43 % | ||
Race: White N= 29 ; Non-white N= 24 | white | non-white | white | non-white | white | non-white | |
34 % | 33 % | 31 % | 17 % | 34 % | 50 % |
High Performers | Intermediate Performers | Low Performers | |||||
---|---|---|---|---|---|---|---|
O Chem 1 | Total N= 126 | 10 % | 58 % | 32 % | |||
Sex: males N= 22 ; females N= 103 | male | female | male | female | male | female | |
9.1 % | 11 % | 59 % | 57 % | 32 % | 32 % | ||
Race: White N= 71 ; Non-white N= 55 | white | non-white | white | non-white | white | non-white | |
11 % | 9.1 % | 59 % | 56 % | 30 % | 35 % |
High Performers | Intermediate Performers | Low Performers | |||||
---|---|---|---|---|---|---|---|
Biochem 2 | Total N= 26 | 46 % | 38 % | 15 % | |||
Sex: males N= 11 ; females N= 15 | male | female | male | female | male | female | |
18 % | 67 % | 64 % | 20 % | 18 % | 13 % | ||
Race: White N= 19 ; Non-white N= 7 | white | non-white | white | non-white | white | non-white | |
42 % | 57 % | 42 % | 29 % | 16 % | 14 % |
High Performers | Intermediate Performers | Low Performers | |||||
---|---|---|---|---|---|---|---|
Gen Chem 2 | Total N= 67 | 18 % | 55 % | 27 % | |||
Sex: males N= 20 ; females N= 47 | male | female | male | female | male | female | |
20 % | 17 % | 60 % | 53 % | 20 % | 30 % | ||
Race: White N= 37 ; Non-white N= 30 | white | non-white | white | non-white | white | non-white | |
14 % | 23 % | 65 % | 43 % | 22 % | 33 % |
High Performers | Intermediate Performers | Low Performers | |||||
---|---|---|---|---|---|---|---|
BiocF22 | Total N= 37 | 8.1 % | 59 % | 32 % | |||
Sex: males N= 4 ; females N= 33 | male | female | male | female | male | female | |
0 % | 9.1 % | 75 % | 58 % | 25 % | 33 % | ||
Race: White N= 24 ; Non-white N= 13 | white | non-white | white | non-white | white | non-white | |
8.3 % | 7.7 % | 67 % | 46 % | 25 % | 46 % |
plotBarAndCorr(allBiochem,"actual_year","clusterLetter","Year","N of students","High, Intermediate, Low Performance cluster")
The Chi-square analysis gives a p= 0.00023
Residuals analysis:
A negative residual implies that the measured value is lower than expected and a positive value higher than expected
= as.integer(as.factor(allBiochem$actual_year))
markerIntegers plot(allBiochem$PLC,allBiochem$NS,pch=allBiochem$clusterLetter,main = "Oxygen Binding - High(H), Intermediate(I), Low(L) performers",ylab="NS",xlab="PLC",col=markerIntegers)
legend("topleft", legend=unique(allBiochem$actual_year), col=unique(markerIntegers), lty=1:1, cex=0.8)
calcStats2(allBiochem,"actual_year")
High Performers | Intermediate Performers | Low Performers | |||||
---|---|---|---|---|---|---|---|
first_year | Total N= 220 | 10 % | 57 % | 33 % | |||
Sex: males N= 29 ; females N= 188 | male | female | male | female | male | female | |
6.9 % | 10 % | 52 % | 57 % | 41 % | 32 % | ||
Race: White N= 121 ; Non-white N= 99 | white | non-white | white | non-white | white | non-white | |
9.1 % | 11 % | 60 % | 53 % | 31 % | 36 % |
High Performers | Intermediate Performers | Low Performers | |||||
---|---|---|---|---|---|---|---|
second_year | Total N= 132 | 17 % | 55 % | 27 % | |||
Sex: males N= 32 ; females N= 99 | male | female | male | female | male | female | |
16 % | 18 % | 69 % | 52 % | 16 % | 30 % | ||
Race: White N= 77 ; Non-white N= 55 | white | non-white | white | non-white | white | non-white | |
19 % | 15 % | 58 % | 51 % | 22 % | 35 % |
High Performers | Intermediate Performers | Low Performers | |||||
---|---|---|---|---|---|---|---|
third_year | Total N= 116 | 28 % | 39 % | 33 % | |||
Sex: males N= 31 ; females N= 85 | male | female | male | female | male | female | |
29 % | 28 % | 42 % | 38 % | 29 % | 34 % | ||
Race: White N= 72 ; Non-white N= 44 | white | non-white | white | non-white | white | non-white | |
28 % | 30 % | 46 % | 27 % | 26 % | 43 % |
cat("<b>Chi-square analysis of Performance by Sex and Race considering different years</b></br>")
Chi-square analysis of Performance by Sex and Race considering different years
plotBarAndCorr(allBiochem[which(allBiochem$actual_year=="first_year"),],"Sex_birth","clusterLetter","Sex","N of students","Performance by Sex 1st year")
The Chi-square analysis gives a p= 0.60394
Residuals analysis:
A negative residual implies that the measured value is lower than expected and a positive value higher than expected
plotBarAndCorr(allBiochem[which(allBiochem$actual_year=="second_year"),],"Sex_birth","clusterLetter","Sex","N of students","Performance by Sex 2nd year")
The Chi-square analysis gives a p= 0.18927
Residuals analysis:
A negative residual implies that the measured value is lower than expected and a positive value higher than expected
plotBarAndCorr(allBiochem[which(allBiochem$actual_year=="third_year"),],"Sex_birth","clusterLetter","Sex","N of students","Performance by Sex 3rd year")
The Chi-square analysis gives a p= 0.86412
Residuals analysis:
A negative residual implies that the measured value is lower than expected and a positive value higher than expected
plotBarAndCorr(allBiochem[which(allBiochem$actual_year=="first_year"),],"race_binary","clusterLetter","Race","N of students","Performance by Race 1st year")
The Chi-square analysis gives a p= 0.50782
Residuals analysis:
A negative residual implies that the measured value is lower than expected and a positive value higher than expected
plotBarAndCorr(allBiochem[which(allBiochem$actual_year=="second_year"),],"race_binary","clusterLetter","Race","N of students","Performance by Race 2nd year")
The Chi-square analysis gives a p= 0.27169
Residuals analysis:
A negative residual implies that the measured value is lower than expected and a positive value higher than expected
plotBarAndCorr(allBiochem[which(allBiochem$actual_year=="third_year"),],"race_binary","clusterLetter","Race","N of students","Performance by Race 3rd year")
The Chi-square analysis gives a p= 0.09043
Residuals analysis:
A negative residual implies that the measured value is lower than expected and a positive value higher than expected
We are comparing how the PLC score is significantly different among the different categories “Course collected”, “Student year”, “White/Non-white”, and “Sex at birth”
#
= analyzeUMRCourses(umrs5)
allBiochem = addExperts(allBiochem,exs5)
allBiochem #buildTables(allBiochem)
plotAndTable(allBiochem,"Course_collected","PLC","PLC: Course","PLC")
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | Gen + Organic 1 | 89 | 0.16 | 0.06 | 0.15 | 0.00 | 0.35 | 0.35 |
X12 | O Chem 1 | 109 | 0.14 | 0.15 | 0.13 | -0.18 | 0.46 | 0.64 |
X13 | O Chem 2 | 59 | 0.18 | 0.17 | 0.18 | -0.18 | 0.62 | 0.80 |
X14 | Gen Chem 2 | 61 | 0.16 | 0.15 | 0.17 | -0.13 | 0.47 | 0.60 |
X15 | BiocF21 | 51 | 0.26 | 0.15 | 0.29 | -0.13 | 0.54 | 0.67 |
X16 | BiocF22 | 35 | 0.27 | 0.13 | 0.27 | 0.01 | 0.51 | 0.51 |
X17 | Biochem 2 | 23 | 0.26 | 0.19 | 0.22 | -0.18 | 0.59 | 0.77 |
X18 | Expert | 7 | 0.76 | 0.10 | 0.79 | 0.59 | 0.89 | 0.30 |
Testing statistical significance: p-values | |
---|---|
O Chem 1-Gen + Organic 1 | 0.9933172 |
O Chem 2-Gen + Organic 1 | 0.9601136 |
Gen Chem 2-Gen + Organic 1 | 1.0000000 |
BiocF21-Gen + Organic 1 | 0.0011686 |
BiocF22-Gen + Organic 1 | 0.0023723 |
Biochem 2-Gen + Organic 1 | 0.0547524 |
Expert-Gen + Organic 1 | 0.0000000 |
O Chem 2-O Chem 1 | 0.6013307 |
Gen Chem 2-O Chem 1 | 0.9898798 |
BiocF21-O Chem 1 | 0.0000313 |
BiocF22-O Chem 1 | 0.0001285 |
Biochem 2-O Chem 1 | 0.0094669 |
Expert-O Chem 1 | 0.0000000 |
Gen Chem 2-O Chem 2 | 0.9883586 |
BiocF21-O Chem 2 | 0.0893150 |
BiocF22-O Chem 2 | 0.0891965 |
Biochem 2-O Chem 2 | 0.3981718 |
Expert-O Chem 2 | 0.0000000 |
BiocF21-Gen Chem 2 | 0.0060084 |
BiocF22-Gen Chem 2 | 0.0083529 |
Biochem 2-Gen Chem 2 | 0.1004083 |
Expert-Gen Chem 2 | 0.0000000 |
BiocF22-BiocF21 | 0.9999921 |
Biochem 2-BiocF21 | 1.0000000 |
Expert-BiocF21 | 0.0000000 |
Biochem 2-BiocF22 | 0.9999882 |
Expert-BiocF22 | 0.0000000 |
Expert-Biochem 2 | 0.0000000 |
plotAndTable(allBiochem,"actual_year","PLC","PLC: Year","PLC")
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | Expert | 7 | 0.76 | 0.10 | 0.79 | 0.59 | 0.89 | 0.30 |
X12 | first_year | 198 | 0.15 | 0.12 | 0.14 | -0.18 | 0.46 | 0.64 |
X13 | second_year | 120 | 0.17 | 0.16 | 0.18 | -0.18 | 0.62 | 0.80 |
X14 | third_year | 109 | 0.26 | 0.15 | 0.27 | -0.18 | 0.59 | 0.77 |
Testing statistical significance: p-values | |
---|---|
first_year-Expert | 0.0000000 |
second_year-Expert | 0.0000000 |
third_year-Expert | 0.0000000 |
second_year-first_year | 0.4895237 |
third_year-first_year | 0.0000000 |
third_year-second_year | 0.0000105 |
plotAndTable(allBiochem,"race_binary","PLC","PLC: White/Non-white","PLC")
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | Non-white | 173 | 0.17 | 0.15 | 0.17 | -0.18 | 0.46 | 0.64 |
X12 | White | 250 | 0.19 | 0.15 | 0.18 | -0.17 | 0.62 | 0.79 |
Testing statistical significance: p-values |
---|
0.057055 |
plotAndTable(allBiochem,"Sex_birth","PLC","PLC: Sex","PLC")
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | Female | 343 | 0.18 | 0.14 | 0.17 | -0.18 | 0.62 | 0.80 |
X12 | Male | 80 | 0.18 | 0.17 | 0.16 | -0.18 | 0.48 | 0.66 |
Testing statistical significance: p-values |
---|
0.7431887 |
plotAndTable(allBiochem,"Course_collected","NS","NS: Course","NS")
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | Gen + Organic 1 | 89 | 0.18 | 0.16 | 0.20 | -0.22 | 0.50 | 0.72 |
X12 | O Chem 1 | 109 | 0.16 | 0.07 | 0.16 | 0.03 | 0.35 | 0.32 |
X13 | O Chem 2 | 59 | 0.17 | 0.06 | 0.17 | 0.04 | 0.35 | 0.31 |
X14 | Gen Chem 2 | 61 | 0.17 | 0.07 | 0.17 | 0.04 | 0.41 | 0.37 |
X15 | BiocF21 | 51 | 0.17 | 0.07 | 0.17 | 0.04 | 0.33 | 0.29 |
X16 | BiocF22 | 35 | 0.18 | 0.06 | 0.17 | 0.08 | 0.42 | 0.34 |
X17 | Biochem 2 | 23 | 0.19 | 0.10 | 0.19 | 0.04 | 0.39 | 0.35 |
X18 | Expert | 7 | 0.35 | 0.08 | 0.35 | 0.24 | 0.44 | 0.21 |
Testing statistical significance: p-values | |
---|---|
O Chem 1-Gen + Organic 1 | 0.9097223 |
O Chem 2-Gen + Organic 1 | 0.9856908 |
Gen Chem 2-Gen + Organic 1 | 0.9989850 |
BiocF21-Gen + Organic 1 | 0.9995746 |
BiocF22-Gen + Organic 1 | 1.0000000 |
Biochem 2-Gen + Organic 1 | 0.9988883 |
Expert-Gen + Organic 1 | 0.0003529 |
O Chem 2-O Chem 1 | 0.9999996 |
Gen Chem 2-O Chem 1 | 0.9995544 |
BiocF21-O Chem 1 | 0.9994740 |
BiocF22-O Chem 1 | 0.9724458 |
Biochem 2-O Chem 1 | 0.8569256 |
Expert-O Chem 1 | 0.0000370 |
Gen Chem 2-O Chem 2 | 0.9999908 |
BiocF21-O Chem 2 | 0.9999839 |
BiocF22-O Chem 2 | 0.9939313 |
Biochem 2-O Chem 2 | 0.9355910 |
Expert-O Chem 2 | 0.0000924 |
BiocF21-Gen Chem 2 | 1.0000000 |
BiocF22-Gen Chem 2 | 0.9994642 |
Biochem 2-Gen Chem 2 | 0.9779878 |
Expert-Gen Chem 2 | 0.0001638 |
BiocF22-BiocF21 | 0.9997260 |
Biochem 2-BiocF21 | 0.9841815 |
Expert-BiocF21 | 0.0002177 |
Biochem 2-BiocF22 | 0.9997919 |
Expert-BiocF22 | 0.0010806 |
Expert-Biochem 2 | 0.0062939 |
plotAndTable(allBiochem,"actual_year","NS","NS: Year","NS")
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | Expert | 7 | 0.35 | 0.08 | 0.35 | 0.24 | 0.44 | 0.21 |
X12 | first_year | 198 | 0.17 | 0.12 | 0.17 | -0.22 | 0.50 | 0.72 |
X13 | second_year | 120 | 0.17 | 0.07 | 0.17 | 0.04 | 0.41 | 0.37 |
X14 | third_year | 109 | 0.18 | 0.07 | 0.17 | 0.04 | 0.42 | 0.38 |
Testing statistical significance: p-values | |
---|---|
first_year-Expert | 0.0000153 |
second_year-Expert | 0.0000145 |
third_year-Expert | 0.0000606 |
second_year-first_year | 0.9965851 |
third_year-first_year | 0.8674005 |
third_year-second_year | 0.8145313 |
plotAndTable(allBiochem,"race_binary","NS","NS: White/Non-white","NS")
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | Non-white | 173 | 0.16 | 0.09 | 0.17 | -0.11 | 0.50 | 0.61 |
X12 | White | 250 | 0.18 | 0.09 | 0.17 | -0.22 | 0.46 | 0.68 |
Testing statistical significance: p-values |
---|
0.0386413 |
plotAndTable(allBiochem,"Sex_birth","NS","NS: Sex","NS")
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | Female | 343 | 0.17 | 0.10 | 0.17 | -0.22 | 0.50 | 0.72 |
X12 | Male | 80 | 0.17 | 0.07 | 0.17 | 0.03 | 0.33 | 0.30 |
Testing statistical significance: p-values |
---|
0.4421795 |
The problem with clustering is that it is an iterative method and different “initial seeds” will yield to different results. It is only reproducible when the k-means method uses “set.seed(42)”
plotAndTable(allBiochem,"clusterLetter","PLC","PLC: Cluster letter","PLC")
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | Expert | 7 | 0.76 | 0.10 | 0.79 | 0.59 | 0.89 | 0.30 |
X12 | HP | 174 | 0.28 | 0.09 | 0.27 | 0.14 | 0.57 | 0.43 |
X13 | IP | 91 | 0.24 | 0.12 | 0.21 | 0.04 | 0.62 | 0.58 |
X14 | LP | 162 | 0.04 | 0.09 | 0.07 | -0.18 | 0.21 | 0.40 |
Testing statistical significance: p-values | |
---|---|
HP-Expert | 0.0000000 |
IP-Expert | 0.0000000 |
LP-Expert | 0.0000000 |
IP-HP | 0.0012838 |
LP-HP | 0.0000000 |
LP-IP | 0.0000000 |
plotBarAndCorr(allBiochem,"Course_collected","clusterLetter","Course","N of students","High, Intermediate, Low Performance cluster")
The Chi-square analysis gives a p= 0
Residuals analysis:
A negative residual implies that the measured value is lower than expected and a positive value higher than expected
= as.integer(as.factor(allBiochem$Course_collected))
markerIntegers plot(allBiochem$PLC,allBiochem$NS,pch=allBiochem$clusterLetter,main = "Protein Structure - High(H), Intermediate(I), Low(L) performers",ylab="NS",xlab="PLC",col=markerIntegers)
legend("topleft", legend=unique(allBiochem$Course_collected), col=unique(markerIntegers), lty=1:1, cex=0.8)
calcStats(allBiochem,"Course_collected")
High Performers | Intermediate Performers | Low Performers | |||||
---|---|---|---|---|---|---|---|
Gen + Organic 1 | Total N= 89 | 18 % | 44 % | 38 % | |||
Sex: males N= 2 ; females N= 85 | male | female | male | female | male | female | |
0 % | 19 % | 50 % | 44 % | 50 % | 38 % | ||
Race: White N= 51 ; Non-white N= 38 | white | non-white | white | non-white | white | non-white | |
16 % | 21 % | 55 % | 29 % | 29 % | 50 % |
High Performers | Intermediate Performers | Low Performers | |||||
---|---|---|---|---|---|---|---|
O Chem 2 | Total N= 59 | 47 % | 10 % | 42 % | |||
Sex: males N= 11 ; females N= 47 | male | female | male | female | male | female | |
45 % | 47 % | 9.1 % | 11 % | 45 % | 43 % | ||
Race: White N= 35 ; Non-white N= 24 | white | non-white | white | non-white | white | non-white | |
54 % | 38 % | 8.6 % | 12 % | 37 % | 50 % |
High Performers | Intermediate Performers | Low Performers | |||||
---|---|---|---|---|---|---|---|
BiocF21 | Total N= 51 | 63 % | 14 % | 24 % | |||
Sex: males N= 15 ; females N= 36 | male | female | male | female | male | female | |
60 % | 64 % | 6.7 % | 17 % | 33 % | 19 % | ||
Race: White N= 28 ; Non-white N= 23 | white | non-white | white | non-white | white | non-white | |
64 % | 61 % | 11 % | 17 % | 25 % | 22 % |
High Performers | Intermediate Performers | Low Performers | |||||
---|---|---|---|---|---|---|---|
O Chem 1 | Total N= 109 | 36 % | 15 % | 50 % | |||
Sex: males N= 19 ; females N= 89 | male | female | male | female | male | female | |
26 % | 38 % | 16 % | 15 % | 58 % | 47 % | ||
Race: White N= 63 ; Non-white N= 46 | white | non-white | white | non-white | white | non-white | |
37 % | 35 % | 16 % | 13 % | 48 % | 52 % |
High Performers | Intermediate Performers | Low Performers | |||||
---|---|---|---|---|---|---|---|
Gen Chem 2 | Total N= 61 | 38 % | 20 % | 43 % | |||
Sex: males N= 19 ; females N= 42 | male | female | male | female | male | female | |
53 % | 31 % | 26 % | 17 % | 21 % | 52 % | ||
Race: White N= 35 ; Non-white N= 26 | white | non-white | white | non-white | white | non-white | |
49 % | 23 % | 8.6 % | 35 % | 43 % | 42 % |
High Performers | Intermediate Performers | Low Performers | |||||
---|---|---|---|---|---|---|---|
Biochem 2 | Total N= 23 | 43 % | 35 % | 22 % | |||
Sex: males N= 10 ; females N= 13 | male | female | male | female | male | female | |
30 % | 54 % | 30 % | 38 % | 40 % | 7.7 % | ||
Race: White N= 16 ; Non-white N= 7 | white | non-white | white | non-white | white | non-white | |
44 % | 43 % | 44 % | 14 % | 12 % | 43 % |
High Performers | Intermediate Performers | Low Performers | |||||
---|---|---|---|---|---|---|---|
BiocF22 | Total N= 35 | 74 % | 8.6 % | 17 % | |||
Sex: males N= 4 ; females N= 31 | male | female | male | female | male | female | |
75 % | 74 % | 0 % | 9.7 % | 25 % | 16 % | ||
Race: White N= 23 ; Non-white N= 12 | white | non-white | white | non-white | white | non-white | |
70 % | 83 % | 13 % | 0 % | 17 % | 17 % |
plotBarAndCorr(allBiochem,"actual_year","clusterLetter","Year","N of students","High, Intermediate, Low Performance cluster")
The Chi-square analysis gives a p= 0
Residuals analysis:
A negative residual implies that the measured value is lower than expected and a positive value higher than expected
= as.integer(as.factor(allBiochem$actual_year))
markerIntegers plot(allBiochem$PLC,allBiochem$NS,pch=allBiochem$clusterLetter,main = "Protein Structure - High(H), Intermediate(I), Low(L) performers",ylab="NS",xlab="PLC",col=markerIntegers)
legend("topleft", legend=unique(allBiochem$actual_year), col=unique(markerIntegers), lty=1:1, cex=0.8)
calcStats2(allBiochem,"actual_year")
High Performers | Intermediate Performers | Low Performers | |||||
---|---|---|---|---|---|---|---|
first_year | Total N= 198 | 28 % | 28 % | 44 % | |||
Sex: males N= 21 ; females N= 174 | male | female | male | female | male | female | |
24 % | 29 % | 19 % | 29 % | 57 % | 43 % | ||
Race: White N= 114 ; Non-white N= 84 | white | non-white | white | non-white | white | non-white | |
27 % | 29 % | 33 % | 20 % | 39 % | 51 % |
High Performers | Intermediate Performers | Low Performers | |||||
---|---|---|---|---|---|---|---|
second_year | Total N= 120 | 42 % | 15 % | 42 % | |||
Sex: males N= 30 ; females N= 89 | male | female | male | female | male | female | |
50 % | 39 % | 20 % | 13 % | 30 % | 47 % | ||
Race: White N= 70 ; Non-white N= 50 | white | non-white | white | non-white | white | non-white | |
51 % | 30 % | 8.6 % | 24 % | 40 % | 46 % |
High Performers | Intermediate Performers | Low Performers | |||||
---|---|---|---|---|---|---|---|
third_year | Total N= 109 | 62 % | 17 % | 21 % | |||
Sex: males N= 29 ; females N= 80 | male | female | male | female | male | female | |
52 % | 66 % | 14 % | 18 % | 34 % | 16 % | ||
Race: White N= 67 ; Non-white N= 42 | white | non-white | white | non-white | white | non-white | |
61 % | 64 % | 19 % | 12 % | 19 % | 24 % |
cat("<b>Chi-square analysis of Performance by Sex and Race considering different years</b></br>")
Chi-square analysis of Performance by Sex and Race considering different years
plotBarAndCorr(allBiochem[which(allBiochem$actual_year=="first_year"),],"Sex_birth","clusterLetter","Sex","N of students","Performance by Sex 1st year")
The Chi-square analysis gives a p= 0.42661
Residuals analysis:
A negative residual implies that the measured value is lower than expected and a positive value higher than expected
plotBarAndCorr(allBiochem[which(allBiochem$actual_year=="second_year"),],"Sex_birth","clusterLetter","Sex","N of students","Performance by Sex 2nd year")
The Chi-square analysis gives a p= 0.24838
Residuals analysis:
A negative residual implies that the measured value is lower than expected and a positive value higher than expected
plotBarAndCorr(allBiochem[which(allBiochem$actual_year=="third_year"),],"Sex_birth","clusterLetter","Sex","N of students","Performance by Sex 3rd year")
The Chi-square analysis gives a p= 0.11942
Residuals analysis:
A negative residual implies that the measured value is lower than expected and a positive value higher than expected
plotBarAndCorr(allBiochem[which(allBiochem$actual_year=="first_year"),],"race_binary","clusterLetter","Race","N of students","Performance by Race 1st year")
The Chi-square analysis gives a p= 0.10473
Residuals analysis:
A negative residual implies that the measured value is lower than expected and a positive value higher than expected
plotBarAndCorr(allBiochem[which(allBiochem$actual_year=="second_year"),],"race_binary","clusterLetter","Race","N of students","Performance by Race 2nd year")
The Chi-square analysis gives a p= 0.01807
Residuals analysis:
A negative residual implies that the measured value is lower than expected and a positive value higher than expected
plotBarAndCorr(allBiochem[which(allBiochem$actual_year=="third_year"),],"race_binary","clusterLetter","Race","N of students","Performance by Race 3rd year")
The Chi-square analysis gives a p= 0.561
Residuals analysis:
A negative residual implies that the measured value is lower than expected and a positive value higher than expected