allData = read.csv("~/Papers/22_Ochem_Models_across_the_Curriculum/Analysis/Modelusage_cleandataset_July2022_v2.csv",header = TRUE)
allData[which(allData$Exam..2 == "EX"),]$Exam..2 = NA
allData$Exam..2 = as.numeric(allData$Exam..2)
allData[which(allData$Exam..3 == "EX"),]$Exam..3 = NA
allData$Exam..3 = as.numeric(allData$Exam..3)
#for some reason, theres a lot of different types of answers for the main question: "Did you use the model".
# making a decision that if it's not yes, its no.
allData[which(allData$Did.you.use.the.model.kit. == "N/A"),]$Did.you.use.the.model.kit. = "N"
allData[which(allData$Did.you.use.the.model.kit. == ""),]$Did.you.use.the.model.kit. = "N"
allData[which(allData$Did.you.use.the.model.kit. == "No answer"),]$Did.you.use.the.model.kit. = "N"
allData[which(allData$Did.you.use.the.model.kit. == "Both"),]$Did.you.use.the.model.kit. = "Y"
allData$Did.you.use.the.model.kit. = gsub("\\s+", "", allData$Did.you.use.the.model.kit.)
newman = allData[which(allData$Question == "Newman Projection (1.1)"),]
diaste = allData[which(allData$Question == "Diastereomers model (1.6)"),]
dimeso = allData[which(allData$Question == "Diastereomer Meso (1.3)"),]
enanto = allData[which(allData$Question == "Enantiomers Ring (1.2)"),]
didash = allData[which(allData$Question == "Diastereomers Wedge and Dash (1.5)"),]
#assuming the order of students is the same in allData
#all(newman$ID == diaste$ID )
#all(newman$ID == dimeso$ID )
#all(newman$ID == enanto$ID )
#all(newman$ID == didash$ID )
students = newman[,c(1:11)]
makeColumn4ModelUse = function(df){
column = c()
for (i in 1:nrow(df)){
if ( df$Did.you.use.the.model.kit.[i] == "Y" ){
ans="Using"
} else if ( df$I.preferred.to.use.other.methods..R.and.S[i] == "Y" |
df$I.preferred.to.use.other.methods..Visualizing.in.my.head[i] == "Y"){
ans="NotNeed"
}else{
ans="NotUsing"
}
column = append(column,ans)
}
return(column)
}
makeColumn4ModelUse2 = function(df){
column = c()
for (i in 1:nrow(df)){
if ( df$Did.you.use.the.model.kit.[i] == "Y" ){
ans="Using"
}else{
ans="NotUsing"
}
column = append(column,ans)
}
return(column)
}
makeColumn4ModelUseCorrect = function(df){
column = c()
for (i in 1:nrow(df)){
if ( df$Did.you.use.the.model.kit.[i] == "Y" ){
if (df$Question.correct[i] == "correct"){
ans = "Using Correct"
}else{
ans = "Using Incorrect"
}
}else{
if (df$Question.correct[i] == "correct"){
ans = "Notusing Correct"
}else{
ans = "Notusing Incorrect"
}
}
column = append(column,ans)
}
return(column)
}
getExamBlock = function(df,cat){
high = max(df[[cat]])
low = min(df[[cat]])
range = high-low
grade = c()
for (i in 1:nrow(df)){
x=df[[cat]][i]
if (x > low+range*0.8){
grade = append(grade,100)
}else if (x > low+range*0.6){
grade = append(grade,80)
}else if (x > low+range*0.4){
grade = append(grade,40)
}else if (x > low+range*0.2){
grade = append(grade,20)
}else {
grade = append(grade,0)
}
}
return(grade)
}
students$newman = makeColumn4ModelUse(newman)
students$diaste = makeColumn4ModelUse(diaste)
students$enanto = makeColumn4ModelUse(enanto)
students$didash = makeColumn4ModelUse(didash)
students$dimeso = makeColumn4ModelUse(dimeso)
students$newman2= makeColumn4ModelUse2(newman)
students$diaste2= makeColumn4ModelUse2(diaste)
students$enanto2= makeColumn4ModelUse2(enanto)
students$didash2= makeColumn4ModelUse2(didash)
students$dimeso2= makeColumn4ModelUse2(dimeso)
students$newmanComb= makeColumn4ModelUseCorrect(newman)
students$diasteComb= makeColumn4ModelUseCorrect(diaste)
students$enantoComb= makeColumn4ModelUseCorrect(enanto)
students$didashComb= makeColumn4ModelUseCorrect(didash)
students$dimesoComb= makeColumn4ModelUseCorrect(dimeso)
students$newmanQ= newman$Question.correct
students$diasteQ= diaste$Question.correct
students$enantoQ= enanto$Question.correct
students$didashQ= didash$Question.correct
students$dimesoQ= dimeso$Question.correct
#students$Exam1block = getExamBlock(newman,"Exam..1")
# R and python cannot pass variables, so Im writing it to be read by Python.
write.csv(students,file = "out.csv")
addAnswerColumn = function(df){
df$answers = NA
for (i in 1:nrow(df)){
if ( df$Question.correct[i] == "correct" & df$Did.you.use.the.model.kit.[i] == "Y" ) { df$answers[i] = "Yes&Yes"}
if ( df$Question.correct[i] != "correct" & df$Did.you.use.the.model.kit.[i] == "Y" ) { df$answers[i] = "No&Yes"}
if ( df$Question.correct[i] == "correct" & df$Did.you.use.the.model.kit.[i] != "Y" ) { df$answers[i] = "Yes&No"}
if ( df$Question.correct[i] != "correct" & df$Did.you.use.the.model.kit.[i] != "Y" ) { df$answers[i] = "No&No"}
}
df = df[order(df$answers),]
return(df)
}
library(ggplot2)
library(ggpubr)
library(psych)
plotGGbox = function(df,myx,myy,mytitle,myylab){
df = df[complete.cases(df[[myy]]),]
maxy = max(df[[myy]])
ggboxplot(df, x = myx, y = myy,
title = mytitle,
color = myx, add = "jitter", legend="none",ylab = myylab) + rotate_x_text(angle = 45) +
geom_hline( yintercept = mean(df[[myy]]), linetype = 2) +
stat_compare_means(method = "anova", label.y = maxy*1.10) +
coord_cartesian(ylim = c(0, maxy*1.2)) +
stat_compare_means(label = "p.format", size=2.5, method = "t.test", ref.group = ".all.",label.y = maxy*1.05)
}
getAnova = function(df,myx,myy,mytitle,myylab){
#get anova
a<- TukeyHSD( aov(df[[myy]] ~ df[[myx]]))
b<-as.data.frame(a$`df[[myx]]`[,4])
colnames(b) = c("Testing statistical significance: p-values")
print(knitr::kable(b, caption = paste("Anova: ",mytitle)))
}
plotAndTable = function(df,myx,myy,mytitle,myylab){
print(plotGGbox(df,myx,myy,mytitle,myylab))
table = describeBy(df[[myy]],df[[myx]],mat=TRUE,digits = 2)
print(knitr::kable(table[,c(2,4,5,6,7,10,11,12)],caption=paste("Statistics of ",myylab," based on getting the question correct (Yes/No) & using the models (Yes/No)")))
getAnova(df,myx,myy,mytitle,myylab)
}
library(dplyr)
library(corrplot)
plotChi = function(a){
#I need to use droplevels otherwise it was showing Expert with zeros as a ghost category?
b=chisq.test(table(droplevels(a)))
cat(paste("<p><b>The Chi-square analysis gives a p=",round(b$p.value,5),"</b></p>"))
cat(paste("<p><b>Residuals analysis:</b></p>"))
cat("A negative residual implies that the measured value is lower than expected and a positive value higher than expected</br>")
corrplot(b$residuals, is.cor = FALSE)
}
plotBarAndCorr = function(df,myx,myy,myxlabel,myylabel,mytitle){
#myx is the course or demographic variable, the independent variable
#myy is typically the clusterLetter, the dependent variable
#remove experts, not useful for the chisquare analysis
#select the two categorical variables
a = df[,c(myy,myx)]
print(plotBarCategories(a,myx,myy,myxlabel,myylabel,mytitle))
plotChi(a)
}
plotBarCategories = function(a,myx,myy,myxlabel,myylabel,mytitle){
#using aes_string instead of aes because colnames are variables
#ggplot(a, aes_string(x=myx,fill=myy)) + geom_bar()
a %>%
count(!!sym(myy),!!sym(myx)) %>%
group_by(!!sym(myx)) %>%
mutate(lab = paste0(round(prop.table(n) * 100, 2), '%')) %>%
ggplot(aes(!!sym(myx),n, fill=!!sym(myy))) +
geom_col() + geom_text(aes(label=lab),position='stack',vjust=1.5) +
labs(x=myxlabel,y=myylabel,title=mytitle)
}
Newman | Diastereomer | Enantiomer Ring | Wedge Dash | Diastereomer Meso |
|
|
|
|
|
From the table below we can see:
calcDistribution = function(df){
n = nrow(df)
using = sum(df$Did.you.use.the.model.kit. == "Y" )
usingCorrect = sum(df$Did.you.use.the.model.kit. == "Y" & df$Question.correct == "correct")
usingIncorrect = using - usingCorrect
notusing = n - using
notusingCorrect = sum(df$Did.you.use.the.model.kit. != "Y" & df$Question.correct == "correct")
notusingIncorrect = notusing - notusingCorrect
return(
list(
c(using,usingCorrect,usingIncorrect,notusing,notusingCorrect,notusingIncorrect),
c( "%" ,signif(usingCorrect/using*100,digits = 2), signif(usingIncorrect/using*100,digits = 2),"%",signif(notusingCorrect/notusing*100,digits = 2),signif(notusingIncorrect/notusing*100,digits = 2))
)
)
}
distribution = data.frame(matrix(ncol=0,nrow=6))
distribution$Newman = unlist( calcDistribution(newman))[1:6]
distribution$NewmanPercent = unlist( calcDistribution(newman))[7:12]
a=chisq.test(table(newman$Did.you.use.the.model.kit.,newman$Question.correct))
distribution$NewmanChiSquare = c("Chi-squared","p = ",round(a$p.value,4)," "," "," ")
distribution$Diastereomer = unlist( calcDistribution(diaste))[1:6]
distribution$DiastereomerPercent = unlist( calcDistribution(diaste))[7:12]
a=chisq.test(table(diaste$Did.you.use.the.model.kit.,diaste$Question.correct))
distribution$DiasteChiSquare = c("Chi-squared","p = ",round(a$p.value,4)," "," "," ")
distribution$EnantiomerRing = unlist( calcDistribution(enanto))[1:6]
distribution$EnantiomerRingPercert = unlist( calcDistribution(enanto))[7:12]
a=chisq.test(table(enanto$Did.you.use.the.model.kit.,enanto$Question.correct))
distribution$EnantoChiSquare = c("Chi-squared","p = ",round(a$p.value,4)," "," "," ")
distribution$WedgeDash = unlist( calcDistribution(didash))[1:6]
distribution$WedgeDashPercent = unlist( calcDistribution(didash))[7:12]
a=chisq.test(table(didash$Did.you.use.the.model.kit.,didash$Question.correct))
distribution$DidashChiSquare = c("Chi-squared","p = ",round(a$p.value,4)," "," "," ")
distribution$DiastereomerMeso = unlist( calcDistribution(dimeso))[1:6]
distribution$DiastereomerMesoPercent = unlist( calcDistribution(dimeso))[7:12]
a=chisq.test(table(dimeso$Did.you.use.the.model.kit.,dimeso$Question.correct))
distribution$DimesoChiSquare = c("Chi-squared","p = ",round(a$p.value,4)," "," "," ")
rownames(distribution) = c("Using Models N-Total","Using Models N-Correct","Using Models N-Incorrect",
"Not Using N-Total","Not Using N-Correct","Not Using N-Incorrect")
knitr::kable(t(distribution),digits = 1)
Using Models N-Total | Using Models N-Correct | Using Models N-Incorrect | Not Using N-Total | Not Using N-Correct | Not Using N-Incorrect | |
---|---|---|---|---|---|---|
Newman | 80 | 59 | 21 | 70 | 46 | 24 |
NewmanPercent | % | 74 | 26 | % | 66 | 34 |
NewmanChiSquare | Chi-squared | p = | 0.3719 | |||
Diastereomer | 80 | 61 | 19 | 70 | 44 | 26 |
DiastereomerPercent | % | 76 | 24 | % | 63 | 37 |
DiasteChiSquare | Chi-squared | p = | 0.108 | |||
EnantiomerRing | 36 | 24 | 12 | 114 | 72 | 42 |
EnantiomerRingPercert | % | 67 | 33 | % | 63 | 37 |
EnantoChiSquare | Chi-squared | p = | 0.8546 | |||
WedgeDash | 94 | 64 | 30 | 56 | 37 | 19 |
WedgeDashPercent | % | 68 | 32 | % | 66 | 34 |
DidashChiSquare | Chi-squared | p = | 0.9407 | |||
DiastereomerMeso | 49 | 35 | 14 | 101 | 75 | 26 |
DiastereomerMesoPercent | % | 71 | 29 | % | 74 | 26 |
DimesoChiSquare | Chi-squared | p = | 0.8645 |
We will show again the same information as in the table above but with graphs. The idea is that even if the Chi-square test gives us no significance, in some cases there are trends that must be recognized
library(dplyr)
library(tidyr)
library(ggplot2)
# Reshape only the known columns
df_long <- students %>%
pivot_longer(
cols = c(newman2, diaste2, enanto2, didash2, dimeso2,
newmanQ, diasteQ, enantoQ, didashQ, dimesoQ),
names_to = "variable",
values_to = "value"
) %>%
mutate(
item = case_when(
variable %in% c("enanto2", "enantoQ") ~ "enanto",
variable %in% c("dimeso2", "dimesoQ") ~ "dimeso",
variable %in% c("diaste2", "diasteQ") ~ "diaste",
variable %in% c("newman2", "newmanQ") ~ "newman",
variable %in% c("didash2", "didashQ") ~ "didash"
),
type = ifelse(grepl("2$", variable), "Usage", "Correctness"),
item = factor(item, levels = c("enanto", "dimeso", "diaste", "newman", "didash"))
)
ggplot(df_long, aes(x = item, fill = value)) +
geom_bar(position = "dodge") +
facet_wrap(~type, scales = "free_y") +
labs(x = "Item", y = "Count", fill = "Response",
title = "Distribution of Usage and Correctness") +
theme_minimal()
ggplot(df_long, aes(x = item, fill = value)) +
geom_bar(position = "fill") +
facet_wrap(~type) +
labs(x = "Item", y = "Proportion", fill = "Response") +
theme_minimal()
ggplot(df_long %>% filter(type == "Correctness"),
aes(x = item, fill = value)) +
geom_bar(position = "fill") +
scale_fill_brewer(palette = "Set1", name = "Correctness") +
labs(
title = "Distribution of Usage and Correctness",
x = "Item",
y = "Proportion"
) +
theme_minimal() +
theme(legend.position = "left")
ggplot(df_long %>% filter(type == "Usage"),
aes(x = item, fill = value)) +
geom_bar(position = "fill") +
scale_fill_brewer(palette = "Paired", name = "Model usage") +
labs(
x = "Item",
y = "Proportion"
) +
theme_minimal() +
theme(legend.position = "right")
df_combo <- students %>%
select(enanto2, dimeso2, diaste2, newman2, didash2,
enantoQ, dimesoQ, diasteQ, newmanQ, didashQ) %>%
mutate(id = row_number()) %>%
pivot_longer(
cols = c(enanto2, dimeso2, diaste2, newman2, didash2),
names_to = "item2", values_to = "usage"
) %>%
mutate(
correctness = case_when(
item2 == "enanto2" ~ students$enantoQ[id],
item2 == "dimeso2" ~ students$dimesoQ[id],
item2 == "diaste2" ~ students$diasteQ[id],
item2 == "newman2" ~ students$newmanQ[id],
item2 == "didash2" ~ students$didashQ[id]
),
item = case_when(
item2 == "enanto2" ~ "enanto",
item2 == "dimeso2" ~ "dimeso",
item2 == "diaste2" ~ "diaste",
item2 == "newman2" ~ "newman",
item2 == "didash2" ~ "didash"
),
item = factor(item, levels = c("enanto", "dimeso", "diaste", "newman", "didash"))
) %>%
count(item, usage, correctness)
# Plot heatmap of joint distributions
ggplot(df_combo, aes(x = usage, y = correctness, fill = n)) +
geom_tile(color = "white") +
facet_wrap(~item) +
scale_fill_gradient(low = "white", high = "steelblue") +
labs(title = "Usage × Correctness per Item", x = "Usage", y = "Correctness", fill = "Count") +
theme_minimal()
ggplot(df_combo, aes(x = usage, y = n, fill = correctness)) +
geom_col(position = "dodge") +
facet_wrap(~item) +
labs(
title = "Usage × Correctness per Item",
x = "Usage",
y = "Count",
fill = "Correctness"
) +
theme_minimal()
df_combo2 <- df_combo %>%
group_by(item, usage) %>%
# reorder correctness so the larger n is plotted last (top of stack)
mutate(
correctness = factor(correctness, levels = correctness[order(n)]),
pct = n / sum(n) * 100,
pct_label = paste0(round(pct, 1), "%"),
ypos = cumsum(n) - n/2
)
ggplot(df_combo2, aes(x = usage, y = n, fill = correctness)) +
geom_col(position = "stack") +
geom_text(aes(y = ypos, label = pct_label),
color = "black", size = 3) +
facet_wrap(~item) +
labs(
title = "Usage × Correctness per Item",
x = "Usage",
y = "Count",
fill = "Correctness"
) +
theme_minimal()
plotBarAndCorr(newman,"Did.you.use.the.model.kit.","Question.correct","Using Models","N of students","Newman projection question")
The Chi-square analysis gives a p= 0.37193
Residuals analysis:
A negative residual implies that the measured value is lower than
expected and a positive value higher than
expected
plotBarAndCorr(diaste,"Did.you.use.the.model.kit.","Question.correct","Using Models","N of students","Diastereomer question")
The Chi-square analysis gives a p= 0.10802
Residuals analysis:
A negative residual implies that the measured value is lower than
expected and a positive value higher than
expected
plotBarAndCorr(enanto,"Did.you.use.the.model.kit.","Question.correct","Using Models","N of students","Enantiomer ring question")
The Chi-square analysis gives a p= 0.85463
Residuals analysis:
A negative residual implies that the measured value is lower than
expected and a positive value higher than
expected
plotBarAndCorr(didash,"Did.you.use.the.model.kit.","Question.correct","Using Models","N of students","Wedge Dash question")
The Chi-square analysis gives a p= 0.9407
Residuals analysis:
A negative residual implies that the measured value is lower than
expected and a positive value higher than
expected
plotBarAndCorr(dimeso,"Did.you.use.the.model.kit.","Question.correct","Using Models","N of students","Diastereomer Meso question")
The Chi-square analysis gives a p= 0.86454
Residuals analysis:
A negative residual implies that the measured value is lower than
expected and a positive value higher than
expected
We define the group “Dont need models” those students who not using models they replied that it is because either they visualize it in their head or they’re using the R/S approach.
With the performance of these three groups shown in the table below, we can extract the following observations:
calcDistribution3 = function(df){
n = nrow(df)
using = sum(df$Did.you.use.the.model.kit. == "Y" )
usingCorrect = sum(df$Did.you.use.the.model.kit. == "Y" & df$Question.correct == "correct")
usingIncorrect = using - usingCorrect
notNeeded = sum(
df$I.preferred.to.use.other.methods..R.and.S == "Y" | df$I.preferred.to.use.other.methods..Visualizing.in.my.head == "Y")
notNeededCorrect =
sum( (df$I.preferred.to.use.other.methods..R.and.S == "Y" | df$I.preferred.to.use.other.methods..Visualizing.in.my.head == "Y") &
df$Question.correct == "correct"
)
notNeededIncorrect = notNeeded - notNeededCorrect
notusing = n - using - notNeeded
notusingCorrect =
sum(
df$Did.you.use.the.model.kit. != "Y" &
( df$I.preferred.to.use.other.methods..R.and.S != "Y" & df$I.preferred.to.use.other.methods..Visualizing.in.my.head != "Y") &
df$Question.correct == "correct"
)
notusingIncorrect = notusing - notusingCorrect
mymat = matrix(c(usingCorrect,usingIncorrect,notNeededCorrect,notNeededIncorrect,notusingCorrect,notusingIncorrect),nrow = 3,ncol = 2,byrow = TRUE)
a=chisq.test(mymat)
# I cannot return a vector because it is a combination of characters and numbers
return(
list(
c(using,usingCorrect,usingIncorrect,notNeeded,notNeededCorrect,notNeededIncorrect, notusing,notusingCorrect,notusingIncorrect),
c( "%" ,signif(usingCorrect/using*100,digits = 2), signif(usingIncorrect/using*100,digits = 2),
"%",signif(notNeededCorrect/notNeeded*100,digits = 2),signif(notNeededIncorrect/notNeeded*100,digits = 2),
"%",signif(notusingCorrect/notusing*100,digits = 2),signif(notusingIncorrect/notusing*100,digits = 2)),
c(a$p.value)
)
)
}
distribution3 = data.frame(matrix(ncol=0,nrow=9))
a=unlist( calcDistribution3(newman))
distribution3$Newman = a[1:9]
distribution3$NewmanPercent = a[10:18]
distribution3$NewmanChiSquare = c("Chi-squared","p = ",round(as.numeric(a[19]),4)," "," "," "," "," "," ")
a=unlist( calcDistribution3(diaste))
distribution3$Diastereomer = a[1:9]
distribution3$DiastereomerPercent = a[10:18]
distribution3$DiasteChiSquare = c("Chi-squared","p = ",round(as.numeric(a[19]),4)," "," "," "," "," "," ")
a=unlist( calcDistribution3(enanto))
distribution3$EnantiomerRing = a[1:9]
distribution3$EnantiomerRingPercert = a[10:18]
distribution3$EnantiomerChiSquare = c("Chi-squared","p = ",round(as.numeric(a[19]),4)," "," "," "," "," "," ")
a=unlist( calcDistribution3(didash))
distribution3$WedgeDash = a[1:9]
distribution3$WedgeDashPercent = a[10:18]
distribution3$WedgeDashChiSquare = c("Chi-squared","p = ",round(as.numeric(a[19]),4)," "," "," "," "," "," ")
a=unlist( calcDistribution3(dimeso))
distribution3$DiastereomerMeso = a[1:9]
distribution3$DiastereomerMesoPercent = a[10:18]
distribution3$DiastereomerChiSquare = c("Chi-squared","p = ",round(as.numeric(a[19]),4)," "," "," "," "," "," ")
rownames(distribution3) = c("Using Models N-Total","Using Models N-Correct","Using Models N-Incorrect",
"Not Needed N-Total","Not Needed N-Correct","Not Needed N-Incorrect",
"Not Using N-Total","Not Using N-Correct","Not Using N-Incorrect")
knitr::kable(t(distribution3),digits = 1)
Using Models N-Total | Using Models N-Correct | Using Models N-Incorrect | Not Needed N-Total | Not Needed N-Correct | Not Needed N-Incorrect | Not Using N-Total | Not Using N-Correct | Not Using N-Incorrect | |
---|---|---|---|---|---|---|---|---|---|
Newman | 80 | 59 | 21 | 26 | 21 | 5 | 44 | 27 | 17 |
NewmanPercent | % | 74 | 26 | % | 81 | 19 | % | 61 | 39 |
NewmanChiSquare | Chi-squared | p = | 0.1738 | ||||||
Diastereomer | 80 | 61 | 19 | 28 | 21 | 7 | 42 | 24 | 18 |
DiastereomerPercent | % | 76 | 24 | % | 75 | 25 | % | 57 | 43 |
DiasteChiSquare | Chi-squared | p = | 0.0757 | ||||||
EnantiomerRing | 36 | 24 | 12 | 54 | 40 | 14 | 60 | 34 | 26 |
EnantiomerRingPercert | % | 67 | 33 | % | 74 | 26 | % | 57 | 43 |
EnantiomerChiSquare | Chi-squared | p = | 0.1466 | ||||||
WedgeDash | 94 | 64 | 30 | 13 | 11 | 2 | 43 | 27 | 16 |
WedgeDashPercent | % | 68 | 32 | % | 85 | 15 | % | 63 | 37 |
WedgeDashChiSquare | Chi-squared | p = | 0.3352 | ||||||
DiastereomerMeso | 49 | 35 | 14 | 50 | 40 | 10 | 51 | 36 | 15 |
DiastereomerMesoPercent | % | 71 | 29 | % | 80 | 20 | % | 71 | 29 |
DiastereomerChiSquare | Chi-squared | p = | 0.4935 |
df_combo <- students %>%
select(enanto, dimeso, diaste, newman, didash,
enantoQ, dimesoQ, diasteQ, newmanQ, didashQ) %>%
mutate(id = row_number()) %>%
pivot_longer(
cols = c(enanto, dimeso, diaste, newman, didash),
names_to = "item2", values_to = "usage"
) %>%
mutate(
correctness = case_when(
item2 == "enanto" ~ students$enantoQ[id],
item2 == "dimeso" ~ students$dimesoQ[id],
item2 == "diaste" ~ students$diasteQ[id],
item2 == "newman" ~ students$newmanQ[id],
item2 == "didash" ~ students$didashQ[id]
),
item = case_when(
item2 == "enanto" ~ "enanto",
item2 == "dimeso" ~ "dimeso",
item2 == "diaste" ~ "diaste",
item2 == "newman" ~ "newman",
item2 == "didash" ~ "didash"
),
item = factor(item, levels = c("enanto", "dimeso", "diaste", "newman", "didash"))
) %>%
count(item, usage, correctness)
df_combo2 <- df_combo %>%
group_by(item, usage) %>%
# reorder correctness so the larger n is plotted last (top of stack)
mutate(
correctness = factor(correctness, levels = correctness[order(n)]),
pct = n / sum(n) * 100,
pct_label = paste0(round(pct, 1), "%"),
ypos = cumsum(n) - n/2
)
ggplot(df_combo2, aes(x = usage, y = n, fill = correctness)) +
geom_col(position = "stack") +
geom_text(aes(y = ypos, label = pct_label),
color = "black", size = 3) +
facet_wrap(~item) +
labs(
title = "Usage × Correctness per Item",
x = "Usage",
y = "Count",
fill = "Correctness"
) +
theme_minimal()
plotBarAndCorr(students,"newman","newmanQ","Using Models","N of students","Newman projection question")
The Chi-square analysis gives a p= 0.11676
Residuals analysis:
A negative residual implies that the measured value is lower than
expected and a positive value higher than
expected
The Chi-square analysis gives a p= 0.05442
Residuals analysis:
A negative residual implies that the measured value is lower than
expected and a positive value higher than
expected
plotBarAndCorr(students,"enanto","enantoQ","Using Models","N of students","Enantiomer ring question")
The Chi-square analysis gives a p= 0.07037
Residuals analysis:
A negative residual implies that the measured value is lower than
expected and a positive value higher than
expected
The Chi-square analysis gives a p= 0.34409
Residuals analysis:
A negative residual implies that the measured value is lower than
expected and a positive value higher than
expected
plotBarAndCorr(students,"dimeso","dimesoQ","Using Models","N of students","Diastereomer Meso question")
The Chi-square analysis gives a p= 0.16913
Residuals analysis:
A negative residual implies that the measured value is lower than
expected and a positive value higher than
expected
finalExam = read.csv("~/Papers/22_Ochem_Models_across_the_Curriculum/Analysis/Final_Exam_Scores.csv",header = TRUE)
#Q3 is multiple choice on stereochemistry
#X3.1..A..2.0.pts." "X3.2..B..2.0.pts." "X3.3..C..2.0.pts." "X3.4..D..2.0.pts.
#Q4 X4..Q4..2.0.pts is assigning S/R
#Q5 is declaring achiral, chiral, meso: "X5.1..1..1.0.pts.""X5.2..2..1.0.pts." "X5.3..3..1.0.pts."
# Make sure finalExam$Name matches (no space after comma)
finalExam <- finalExam %>%
mutate(
stereochem_grade = rowSums(select(., starts_with("X3."), starts_with("X4."), starts_with("X5.")), na.rm = TRUE),
Name = paste(Last.Name, First.Name, sep = ",")
) %>%
relocate(Name, .before = 1) %>%
relocate(stereochem_grade, .before = 2)
library(stringr)
students <- students %>%
mutate(
# Remove middle name if it exists
Name = str_replace(Name, "^([^,]+,[^ ]+).*", "\\1"),
# Remove any space after comma
Name = str_replace(Name, ",\\s+", ",")
)%>%
relocate(Name, .before = 1)
students <- students %>%
left_join(
finalExam %>% select(Name, stereochem_grade),
by = "Name"
)
How do they perform in the final exam does who use. not use or not need the models?
#3.1
students_long <- students %>%
filter(Final.Exam != 0) %>%
pivot_longer(
cols = c(newman, diaste, enanto, didash, dimeso),
names_to = "Category",
values_to = "Usage"
)
ggplot(students_long, aes(x = Usage, y = Final.Exam, fill = Usage)) +
geom_boxplot(alpha = 0.7, outlier.color = "red") +
facet_wrap(~ Category) +
labs(
title = "Final Exam Performance by Usage Group",
x = "Usage Group",
y = "Final Exam Score"
) +
theme_minimal() +
theme(legend.position = "none")
students_long %>%
group_by(Category, Usage) %>%
summarise(
mean_exam = mean(Final.Exam, na.rm = TRUE),
se_exam = sd(Final.Exam, na.rm = TRUE)/sqrt(n()),
.groups = "drop"
) %>%
ggplot(aes(x = Usage, y = mean_exam, fill = Usage)) +
geom_col(position = position_dodge()) +
geom_errorbar(aes(ymin = mean_exam - se_exam, ymax = mean_exam + se_exam),
width = 0.2, position = position_dodge(0.9)) +
facet_wrap(~ Category) +
labs(
title = "Average Final Exam by Usage Group (Excluding 0s)",
x = "Usage Group",
y = "Mean Final Exam"
) +
theme_minimal()
Let’s look at the actual questions in the final exam that may contain stereochemistry questions
students_long <- students %>%
filter(stereochem_grade != 0) %>%
pivot_longer(
cols = c(newman, diaste, enanto, didash, dimeso),
names_to = "Category",
values_to = "Usage"
)
ggplot(students_long, aes(x = Usage, y = stereochem_grade, fill = Usage)) +
geom_boxplot(alpha = 0.7, outlier.color = "red") +
facet_wrap(~ Category) +
labs(
title = "Final Exam Stereochem questions by Usage Group",
x = "Usage Group",
y = "Final Stereochem Score"
) +
theme_minimal() +
theme(legend.position = "none")
students_long %>%
group_by(Category, Usage) %>%
summarise(
mean_exam = mean(stereochem_grade, na.rm = TRUE),
se_exam = sd(stereochem_grade, na.rm = TRUE)/sqrt(n()),
.groups = "drop"
) %>%
ggplot(aes(x = Usage, y = mean_exam, fill = Usage)) +
geom_col(position = position_dodge()) +
geom_errorbar(aes(ymin = mean_exam - se_exam, ymax = mean_exam + se_exam),
width = 0.2, position = position_dodge(0.9)) +
facet_wrap(~ Category) +
labs(
title = "Average Final Stereochem by Usage Group (Excluding 0s)",
x = "Usage Group",
y = "Mean Final Stereochem"
) +
theme_minimal()
#install.packages("circlize")
library(circlize)
# assume 'students' dataframe exists with columns:
cols <- c("newman", "diaste", "enanto", "didash", "dimeso")
# safeguard: keep only those columns if present
cols <- intersect(cols, names(students))
st <- students[ , cols, drop = FALSE]
# build symmetric co-usage matrix: counts of students Using in both columns
n <- length(cols)
mat <- matrix(0, nrow = n, ncol = n, dimnames = list(cols, cols))
for(i in seq_len(n)) {
for(j in seq_len(n)) {
mat[i, j] <- sum(st[[i]] == "Using" & st[[j]] == "Using", na.rm = TRUE)
}
}
diag(mat) <- 0 # optional: remove self-links
# quick check
print(mat)
## newman diaste enanto didash dimeso
## newman 0 60 28 71 37
## diaste 60 0 30 73 37
## enanto 28 30 0 30 25
## didash 71 73 30 0 42
## dimeso 37 37 25 42 0
# choose a color for each sector
grid_col <- structure(rainbow(n), names = cols)
# draw chord diagram
circos.clear()
circos.par(gap.degree = 6) # small gap between sectors
chordDiagram(mat, annotationTrack = c("name", "grid"), preAllocateTracks = 1,
grid.col = grid_col)
# add title
title("Chord diagram — co-usage of 'Using' across columns")
library(ggplot2)
library(ggalluvial)
library(dplyr)
# columns in the order you want
cols <- c("enanto2", "dimeso2", "diaste2", "newman2", "didash2")
colsRev <- rev(cols)
# make sure they are factors (to preserve order on x-axis)
students_long <- students %>%
mutate(across(all_of(cols), ~factor(.x, levels = c("Using", "NotUsing" ))))
# plot
ggplot(students_long,
aes(axis1 = enanto2, axis2 = dimeso2, axis3 = diaste2,
axis4 = newman2, axis5 = didash2)) +
geom_alluvium(aes(fill = enanto2), width = 1/12) +
geom_stratum(width = 1/8, fill = "grey80", color = "black") +
geom_label(stat = "stratum", aes(label = after_stat(stratum))) +
scale_x_discrete(limits = cols, expand = c(.05, .05)) +
theme_minimal() +
theme(axis.title.x = element_blank()) +
scale_x_discrete(limits = cols, expand = c(.2, .2)) +
labs(x = NULL, y = "Number of students")
# reverse the order
cols <- rev(cols)
students_long <- students %>%
mutate(across(all_of(cols), ~factor(.x, levels = c("Using", "NotUsing"))))
ggplot(students_long,
aes(axis1 = didash2, axis2 = newman2, axis3 = diaste2,
axis4 = dimeso2, axis5 = enanto2)) +
geom_alluvium(aes(fill = didash2), width = 1/12) +
geom_stratum(width = 1/8, fill = "grey80", color = "black") +
geom_label(stat = "stratum", aes(label = after_stat(stratum))) +
scale_x_discrete(limits = cols, expand = c(.05, .05)) +
theme_minimal() +
theme(axis.title.x = element_blank()) +
scale_x_discrete(limits = cols, expand = c(.2, .2)) +
labs(x = NULL, y = "Number of students")
#```{python }
#modelUse = students[["didash2","diaste2","newman2","dimeso2","enanto2","Exam..1"]]
#sortedModelUse = modelUse.sort_values(by=['didash2','diaste2','newman2','dimeso2','enanto2'], axis=0, ascending=False)
#fig = px.parallel_categories(sortedModelUse,
# color="Exam..1",
# #color_continuous_scale='Bluered_r',
# labels={
# "didash2":"Wedge/Dash",
# "diaste2":"Diastereomers",
# "newman2":"Newman",
# "dimeso2":"Diastereomer Meso",
# "enanto2":"Enantiomer Ring",
# })
#fig.show()
library(ggalluvial)
# columns in the order you want
cols <- c("enanto", "dimeso", "diaste", "newman", "didash")
# make sure they are factors (to preserve order on x-axis)
students_long <- students %>%
mutate(across(all_of(cols), ~factor(.x, levels = c("Using", "NotUsing", "NotNeed"))))
# plot
ggplot(students_long,
aes(axis1 = enanto, axis2 = dimeso, axis3 = diaste,
axis4 = newman, axis5 = didash)) +
geom_alluvium(aes(fill = enanto), width = 1/12) +
geom_stratum(width = 1/8, fill = "grey80", color = "black") +
geom_label(stat = "stratum", aes(label = after_stat(stratum))) +
scale_x_discrete(limits = cols, expand = c(.05, .05)) +
theme_minimal() +
theme(axis.title.x = element_blank()) +
scale_x_discrete(limits = cols, expand = c(.2, .2)) +
labs(x = NULL, y = "Number of students")
cols <- rev(cols)
students_long <- students %>%
mutate(across(all_of(cols), ~factor(.x, levels = c("Using", "NotUsing", "NotNeed"))))
ggplot(students_long,
aes(axis1 = didash, axis2 = newman, axis3 = diaste,
axis4 = dimeso, axis5 = enanto)) +
geom_alluvium(aes(fill = didash), width = 1/12) +
geom_stratum(width = 1/8, fill = "grey80", color = "black") +
geom_label(stat = "stratum", aes(label = after_stat(stratum))) +
scale_x_discrete(limits = cols, expand = c(.05, .05)) +
theme_minimal() +
theme(axis.title.x = element_blank()) +
scale_x_discrete(limits = cols, expand = c(.2, .2)) +
labs(x = NULL, y = "Number of students")
#```{python }
#import pandas as pd
#import plotly.express as px
#students = pd.read_csv("~/Papers/22_Ochem_Models_across_the_Curriculum/Analysis/out.csv")
#modelUse = students[["didash","diaste","newman","dimeso","enanto","Exam..1"]]
#sortedModelUse = modelUse.sort_values(by=['didash','diaste','newman','dimeso','enanto'], axis=0, ascending=False)
#fig = px.parallel_categories(sortedModelUse,
# color="Exam..1",
# #color_continuous_scale='Bluered_r',
# labels={
# "didash":"Wedge/Dash",
# "diaste":"Diastereomers",
# "newman":"Newman",
# "dimeso":"Diastereomer Meso",
# "enanto":"Enantiomer Ring",
# })
#fig.show()
#```{python}
#modelUse = students[["didashComb","diasteComb","newmanComb","dimesoComb","enantoComb","Exam..1"]]
#sortedModelUse = modelUse.sort_values(by=['didashComb','diasteComb','newmanComb','dimesoComb','enantoComb'], axis=0, ascending=False)
#fig = px.parallel_categories(sortedModelUse,
# color="Exam..1",
# #color_continuous_scale='Bluered_r',
# labels={
# "didashComb":"Wedge/Dash",
# "diasteComb":"Diastereomers",
# "newmanComb":"Newman",
# "dimesoComb":"Diastereomer Meso",
# "enantoComb":"Enantiomer Ring",
# })
#fig.show()
#maybe this can be done easier with parallel categories https://plotly.com/python/parallel-categories-diagram/
mysource = c()
for (i in 0:11){
for (j in 0:2){
mysource = append(source,i)
}
}
mytarget = c(3,4,5,3,4,5,3,4,5,
6,7,8,6,7,8,6,7,8,
9,10,11,9,10,11,9,10,11,
12,13,14,12,13,14,12,13,14)
myvalue = rep(1,each=45)
#order from most used to least used
exercises = c("didash","newman","diaste","dimeso","enanto")
myvalue = c()
for (i in 2:length(exercises)){
print(exercises[i])
}
library(plotly)
fig <- plot_ly(
type = "sankey",
orientation = "h",
node = list(
label = c("Using", "Not Using", "Not Needed",
"Using", "Not Using", "Not Needed",
"Using", "Not Using", "Not Needed",
"Using", "Not Using", "Not Needed",
"Using", "Not Using", "Not Needed"
),
color = rep("blue",each=15),
pad = 15,
thickness = 20,
line = list(
color = "black",
width = 0.5
)
),
link = list(
source = mysource,
target = mytarget,
value = myvalue
)
)
fig <- fig %>% layout(
title = "Students Model Use Through Exercises",
font = list(
size = 10
)
)
fig
The relevant data that we are going to test against course performance for each exercise is:
Very few students reply “no” to the question “have you found the models helpful”, so the statistics would be skewed. It may be better to just analyze whether they used them or not.
The p-values shown in the boxplot are not an ANOVA analysis. Rather it is a t-test of that group against the rest of students.
newman = addAnswerColumn(newman)
plotAndTable(newman,"answers","Exam..1","Exam 1: Was the question correct? Did they use the models?","Exam 1")
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | No&No | 24 | 33.26 | 8.79 | 36.38 | 19.75 | 44.75 | 25.00 |
X12 | No&Yes | 21 | 35.15 | 6.51 | 35.50 | 24.00 | 44.25 | 20.25 |
X13 | Yes&No | 46 | 41.70 | 5.35 | 41.88 | 26.50 | 49.50 | 23.00 |
X14 | Yes&Yes | 59 | 41.71 | 7.23 | 43.75 | 22.75 | 49.75 | 27.00 |
Testing statistical significance: p-values | |
---|---|
No&Yes-No&No | 0.7943966 |
Yes&No-No&No | 0.0000178 |
Yes&Yes-No&No | 0.0000073 |
Yes&No-No&Yes | 0.0024101 |
Yes&Yes-No&Yes | 0.0014855 |
Yes&Yes-Yes&No | 0.9999998 |
plotAndTable(newman,"answers","Exam..2","Exam 2: Was the question correct? Did they use the models?","Exam 2")
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | No&No | 23 | 36.76 | 6.52 | 37.40 | 23.30 | 46.0 | 22.70 |
X12 | No&Yes | 21 | 36.32 | 7.42 | 35.75 | 20.40 | 47.5 | 27.10 |
X13 | Yes&No | 45 | 40.98 | 6.04 | 41.15 | 19.75 | 49.5 | 29.75 |
X14 | Yes&Yes | 59 | 41.92 | 5.66 | 43.75 | 22.50 | 50.0 | 27.50 |
Testing statistical significance: p-values | |
---|---|
No&Yes-No&No | 0.9954546 |
Yes&No-No&No | 0.0422499 |
Yes&Yes-No&No | 0.0048311 |
Yes&No-No&Yes | 0.0254227 |
Yes&Yes-No&Yes | 0.0027572 |
Yes&Yes-Yes&No | 0.8683489 |
plotAndTable(newman,"answers","Exam..3","Exam 3: Was the question correct? Did they use the models?","Exam 3")
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | No&No | 24 | 31.95 | 11.25 | 35.17 | 7.50 | 45.50 | 38.00 |
X12 | No&Yes | 20 | 28.43 | 10.21 | 28.50 | 11.50 | 47.05 | 35.55 |
X13 | Yes&No | 46 | 37.68 | 9.13 | 39.15 | 0.00 | 49.50 | 49.50 |
X14 | Yes&Yes | 58 | 39.81 | 8.31 | 41.35 | 19.75 | 50.00 | 30.25 |
Testing statistical significance: p-values | |
---|---|
No&Yes-No&No | 0.5997337 |
Yes&No-No&No | 0.0746253 |
Yes&Yes-No&No | 0.0038671 |
Yes&No-No&Yes | 0.0017343 |
Yes&Yes-No&Yes | 0.0000361 |
Yes&Yes-Yes&No | 0.6592264 |
plotAndTable(newman,"answers","Final.Exam","Final Exam: Was the question correct? Did they use the models?","Final Exam")
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | No&No | 24 | 59.95 | 15.85 | 62.25 | 36.00 | 90.00 | 54.00 |
X12 | No&Yes | 21 | 54.89 | 16.59 | 54.25 | 29.25 | 82.75 | 53.50 |
X13 | Yes&No | 46 | 70.46 | 17.72 | 70.88 | 0.00 | 92.75 | 92.75 |
X14 | Yes&Yes | 59 | 74.71 | 16.29 | 77.00 | 34.50 | 99.00 | 64.50 |
Testing statistical significance: p-values | |
---|---|
No&Yes-No&No | 0.7422885 |
Yes&No-No&No | 0.0648681 |
Yes&Yes-No&No | 0.0020852 |
Yes&No-No&Yes | 0.0030441 |
Yes&Yes-No&Yes | 0.0000410 |
Yes&Yes-Yes&No | 0.5695576 |
plotAndTable(newman,"answers","Quizzes.Final.Score","Quizzes: Was the question correct? Did they use the models?","Quizzes Average Score")
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | No&No | 24 | 76.31 | 14.80 | 80.56 | 53.44 | 99.44 | 46.00 |
X12 | No&Yes | 21 | 71.67 | 16.07 | 72.78 | 46.39 | 97.78 | 51.39 |
X13 | Yes&No | 46 | 81.04 | 16.17 | 82.37 | 30.00 | 103.33 | 73.33 |
X14 | Yes&Yes | 59 | 87.54 | 13.77 | 90.63 | 51.39 | 105.56 | 54.17 |
Testing statistical significance: p-values | |
---|---|
No&Yes-No&No | 0.7300690 |
Yes&No-No&No | 0.5963749 |
Yes&Yes-No&No | 0.0128058 |
Yes&No-No&Yes | 0.0880143 |
Yes&Yes-No&Yes | 0.0003173 |
Yes&Yes-Yes&No | 0.1281489 |
diaste = addAnswerColumn(diaste)
plotAndTable(diaste,"answers","Exam..1","Exam 1: Was the question correct? Did they use the models?","Exam 1")
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | No&No | 26 | 34.61 | 7.22 | 36.50 | 21.50 | 47.50 | 26.00 |
X12 | No&Yes | 19 | 35.36 | 6.98 | 35.50 | 19.75 | 48.00 | 28.25 |
X13 | Yes&No | 44 | 40.47 | 7.27 | 41.38 | 19.75 | 49.75 | 30.00 |
X14 | Yes&Yes | 61 | 42.02 | 7.01 | 44.00 | 22.75 | 49.75 | 27.00 |
Testing statistical significance: p-values | |
---|---|
No&Yes-No&No | 0.9853766 |
Yes&No-No&No | 0.0059834 |
Yes&Yes-No&No | 0.0001015 |
Yes&No-No&Yes | 0.0476465 |
Yes&Yes-No&Yes | 0.0027828 |
Yes&Yes-Yes&No | 0.6919621 |
plotAndTable(diaste,"answers","Exam..2","Exam 2: Was the question correct? Did they use the models?","Exam 2")
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | No&No | 24 | 37.10 | 6.50 | 37.10 | 19.75 | 49.0 | 29.25 |
X12 | No&Yes | 19 | 38.42 | 6.00 | 36.70 | 23.30 | 47.5 | 24.20 |
X13 | Yes&No | 44 | 39.88 | 6.45 | 40.58 | 20.40 | 49.5 | 29.10 |
X14 | Yes&Yes | 61 | 41.82 | 6.35 | 44.25 | 22.50 | 50.0 | 27.50 |
Testing statistical significance: p-values | |
---|---|
No&Yes-No&No | 0.9063228 |
Yes&No-No&No | 0.3161622 |
Yes&Yes-No&No | 0.0130503 |
Yes&No-No&Yes | 0.8370842 |
Yes&Yes-No&Yes | 0.1794463 |
Yes&Yes-Yes&No | 0.4137197 |
plotAndTable(diaste,"answers","Exam..3","Exam 3: Was the question correct? Did they use the models?","Exam 3")
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | No&No | 26 | 29.88 | 12.93 | 33.65 | 0.00 | 50.0 | 50.00 |
X12 | No&Yes | 18 | 32.94 | 8.02 | 31.32 | 17.25 | 45.5 | 28.25 |
X13 | Yes&No | 44 | 37.47 | 8.65 | 38.52 | 15.75 | 50.0 | 34.25 |
X14 | Yes&Yes | 60 | 39.32 | 8.91 | 41.02 | 14.90 | 49.8 | 34.90 |
Testing statistical significance: p-values | |
---|---|
No&Yes-No&No | 0.7237166 |
Yes&No-No&No | 0.0088721 |
Yes&Yes-No&No | 0.0002696 |
Yes&No-No&Yes | 0.3321327 |
Yes&Yes-No&Yes | 0.0676680 |
Yes&Yes-Yes&No | 0.7658246 |
plotAndTable(diaste,"answers","Final.Exam","Final Exam: Was the question correct? Did they use the models?","Final Exam")
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | No&No | 26 | 57.58 | 19.79 | 57.75 | 0.00 | 99 | 99.00 |
X12 | No&Yes | 19 | 61.64 | 14.45 | 56.75 | 41.50 | 90 | 48.50 |
X13 | Yes&No | 44 | 69.45 | 15.02 | 68.50 | 37.00 | 94 | 57.00 |
X14 | Yes&Yes | 61 | 74.04 | 18.14 | 77.00 | 29.25 | 96 | 66.75 |
Testing statistical significance: p-values | |
---|---|
No&Yes-No&No | 0.8610101 |
Yes&No-No&No | 0.0295541 |
Yes&Yes-No&No | 0.0004023 |
Yes&No-No&Yes | 0.3504037 |
Yes&Yes-No&Yes | 0.0337593 |
Yes&Yes-Yes&No | 0.5322270 |
plotAndTable(diaste,"answers","Quizzes.Final.Score","Quizzes: Was the question correct? Did they use the models?","Quizzes Average Score")
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | No&No | 26 | 72.15 | 17.57 | 71.78 | 30.00 | 105.56 | 75.56 |
X12 | No&Yes | 19 | 77.67 | 13.12 | 77.19 | 56.11 | 100.00 | 43.89 |
X13 | Yes&No | 44 | 81.57 | 15.63 | 85.62 | 43.13 | 104.44 | 61.31 |
X14 | Yes&Yes | 61 | 86.70 | 14.30 | 90.56 | 46.94 | 104.44 | 57.50 |
Testing statistical significance: p-values | |
---|---|
No&Yes-No&No | 0.6245966 |
Yes&No-No&No | 0.0624727 |
Yes&Yes-No&No | 0.0004001 |
Yes&No-No&Yes | 0.7849585 |
Yes&Yes-No&Yes | 0.1105684 |
Yes&Yes-Yes&No | 0.3217175 |
enanto = addAnswerColumn(enanto)
plotAndTable(enanto,"answers","Exam..1","Exam 1: Was the question correct? Did they use the models?","Exam 1")
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | No&No | 42 | 33.29 | 7.08 | 34.62 | 19.75 | 43.50 | 23.75 |
X12 | No&Yes | 12 | 38.46 | 5.27 | 39.50 | 30.75 | 46.50 | 15.75 |
X13 | Yes&No | 72 | 41.71 | 6.41 | 42.38 | 20.75 | 49.75 | 29.00 |
X14 | Yes&Yes | 24 | 43.85 | 6.77 | 46.88 | 25.50 | 49.75 | 24.25 |
Testing statistical significance: p-values | |
---|---|
No&Yes-No&No | 0.0817935 |
Yes&No-No&No | 0.0000000 |
Yes&Yes-No&No | 0.0000000 |
Yes&No-No&Yes | 0.3909838 |
Yes&Yes-No&Yes | 0.0989274 |
Yes&Yes-Yes&No | 0.5140672 |
plotAndTable(enanto,"answers","Exam..2","Exam 2: Was the question correct? Did they use the models?","Exam 2")
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | No&No | 41 | 36.35 | 6.25 | 35.95 | 19.75 | 46.15 | 26.40 |
X12 | No&Yes | 12 | 39.68 | 6.16 | 39.92 | 25.50 | 47.75 | 22.25 |
X13 | Yes&No | 71 | 41.31 | 5.90 | 42.50 | 22.50 | 50.00 | 27.50 |
X14 | Yes&Yes | 24 | 42.76 | 6.66 | 45.00 | 20.40 | 49.50 | 29.10 |
Testing statistical significance: p-values | |
---|---|
No&Yes-No&No | 0.3533042 |
Yes&No-No&No | 0.0003666 |
Yes&Yes-No&No | 0.0004661 |
Yes&No-No&Yes | 0.8289157 |
Yes&Yes-No&Yes | 0.4911216 |
Yes&Yes-Yes&No | 0.7532891 |
plotAndTable(enanto,"answers","Exam..3","Exam 3: Was the question correct? Did they use the models?","Exam 3")
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | No&No | 42 | 30.18 | 11.66 | 32.05 | 0.00 | 49.0 | 49.00 |
X12 | No&Yes | 11 | 36.21 | 9.08 | 38.25 | 14.90 | 49.0 | 34.10 |
X13 | Yes&No | 71 | 38.60 | 7.81 | 39.75 | 17.25 | 50.0 | 32.75 |
X14 | Yes&Yes | 24 | 40.48 | 9.41 | 43.77 | 16.00 | 49.8 | 33.80 |
Testing statistical significance: p-values | |
---|---|
No&Yes-No&No | 0.2334518 |
Yes&No-No&No | 0.0000526 |
Yes&Yes-No&No | 0.0001913 |
Yes&No-No&Yes | 0.8619775 |
Yes&Yes-No&Yes | 0.5976589 |
Yes&Yes-Yes&No | 0.8307620 |
plotAndTable(enanto,"answers","Final.Exam","Final Exam: Was the question correct? Did they use the models?","Final Exam")
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | No&No | 42 | 56.25 | 18.85 | 54.62 | 0.00 | 90.0 | 90.00 |
X12 | No&Yes | 12 | 64.90 | 17.76 | 70.88 | 33.75 | 88.0 | 54.25 |
X13 | Yes&No | 72 | 73.09 | 14.00 | 71.50 | 34.50 | 99.0 | 64.50 |
X14 | Yes&Yes | 24 | 76.51 | 17.98 | 83.12 | 35.75 | 95.9 | 60.15 |
Testing statistical significance: p-values | |
---|---|
No&Yes-No&No | 0.3772369 |
Yes&No-No&No | 0.0000027 |
Yes&Yes-No&No | 0.0000211 |
Yes&No-No&Yes | 0.3813311 |
Yes&Yes-No&Yes | 0.1929542 |
Yes&Yes-Yes&No | 0.8145588 |
plotAndTable(enanto,"answers","Quizzes.Final.Score","Quizzes: Was the question correct? Did they use the models?","Quizzes Average Score")
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | No&No | 42 | 70.18 | 16.66 | 69.31 | 30.00 | 97.78 | 67.78 |
X12 | No&Yes | 12 | 82.65 | 11.54 | 84.62 | 63.75 | 99.44 | 35.69 |
X13 | Yes&No | 72 | 85.03 | 13.07 | 86.46 | 54.44 | 105.56 | 51.12 |
X14 | Yes&Yes | 24 | 90.32 | 14.23 | 96.25 | 46.67 | 104.44 | 57.77 |
Testing statistical significance: p-values | |
---|---|
No&Yes-No&No | 0.0410941 |
Yes&No-No&No | 0.0000018 |
Yes&Yes-No&No | 0.0000009 |
Yes&No-No&Yes | 0.9500051 |
Yes&Yes-No&Yes | 0.4261788 |
Yes&Yes-Yes&No | 0.3958202 |
didash = addAnswerColumn(didash)
plotAndTable(didash,"answers","Exam..1","Exam 1: Was the question correct? Did they use the models?","Exam 1")
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | No&No | 19 | 33.29 | 7.80 | 36.75 | 19.75 | 44.00 | 24.25 |
X12 | No&Yes | 30 | 34.94 | 6.64 | 34.75 | 22.75 | 47.25 | 24.50 |
X13 | Yes&No | 37 | 40.18 | 7.13 | 40.75 | 20.75 | 49.75 | 29.00 |
X14 | Yes&Yes | 64 | 42.93 | 6.23 | 44.12 | 19.75 | 49.75 | 30.00 |
Testing statistical significance: p-values | |
---|---|
No&Yes-No&No | 0.8377591 |
Yes&No-No&No | 0.0022821 |
Yes&Yes-No&No | 0.0000011 |
Yes&No-No&Yes | 0.0102264 |
Yes&Yes-No&Yes | 0.0000020 |
Yes&Yes-Yes&No | 0.2026792 |
plotAndTable(didash,"answers","Exam..2","Exam 2: Was the question correct? Did they use the models?","Exam 2")
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | No&No | 19 | 35.08 | 6.24 | 37.15 | 23.30 | 45.0 | 21.70 |
X12 | No&Yes | 30 | 38.02 | 7.62 | 36.27 | 20.40 | 50.0 | 29.60 |
X13 | Yes&No | 35 | 40.15 | 6.27 | 40.65 | 19.75 | 49.5 | 29.75 |
X14 | Yes&Yes | 64 | 42.40 | 5.07 | 43.95 | 23.75 | 49.5 | 25.75 |
Testing statistical significance: p-values | |
---|---|
No&Yes-No&No | 0.3559186 |
Yes&No-No&No | 0.0205889 |
Yes&Yes-No&No | 0.0000533 |
Yes&No-No&Yes | 0.4947170 |
Yes&Yes-No&Yes | 0.0076862 |
Yes&Yes-Yes&No | 0.2998958 |
plotAndTable(didash,"answers","Exam..3","Exam 3: Was the question correct? Did they use the models?","Exam 3")
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | No&No | 19 | 28.01 | 11.14 | 30.30 | 7.5 | 40.3 | 32.8 |
X12 | No&Yes | 29 | 31.67 | 9.83 | 29.90 | 16.0 | 48.0 | 32.0 |
X13 | Yes&No | 37 | 37.26 | 10.02 | 38.50 | 0.0 | 50.0 | 50.0 |
X14 | Yes&Yes | 63 | 40.45 | 7.47 | 41.65 | 21.3 | 50.0 | 28.7 |
Testing statistical significance: p-values | |
---|---|
No&Yes-No&No | 0.5272977 |
Yes&No-No&No | 0.0025590 |
Yes&Yes-No&No | 0.0000039 |
Yes&No-No&Yes | 0.0699516 |
Yes&Yes-No&Yes | 0.0001966 |
Yes&Yes-Yes&No | 0.3343617 |
plotAndTable(didash,"answers","Final.Exam","Final Exam: Was the question correct? Did they use the models?","Final Exam")
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | No&No | 19 | 56.90 | 12.79 | 60.75 | 36.00 | 80.75 | 44.75 |
X12 | No&Yes | 30 | 60.59 | 19.63 | 57.88 | 29.25 | 96.00 | 66.75 |
X13 | Yes&No | 37 | 67.63 | 19.14 | 68.50 | 0.00 | 95.50 | 95.50 |
X14 | Yes&Yes | 64 | 75.61 | 14.85 | 77.88 | 39.50 | 99.00 | 59.50 |
Testing statistical significance: p-values | |
---|---|
No&Yes-No&No | 0.8767616 |
Yes&No-No&No | 0.1116913 |
Yes&Yes-No&No | 0.0002093 |
Yes&No-No&Yes | 0.3248693 |
Yes&Yes-No&Yes | 0.0004945 |
Yes&Yes-Yes&No | 0.1025616 |
plotAndTable(didash,"answers","Quizzes.Final.Score","Quizzes: Was the question correct? Did they use the models?","Quizzes Average Score")
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | No&No | 19 | 68.84 | 12.57 | 66.39 | 46.39 | 88.61 | 42.22 |
X12 | No&Yes | 30 | 74.68 | 16.70 | 74.59 | 46.67 | 102.22 | 55.55 |
X13 | Yes&No | 37 | 82.90 | 15.16 | 86.25 | 30.00 | 104.44 | 74.44 |
X14 | Yes&Yes | 64 | 87.72 | 13.57 | 90.60 | 43.13 | 105.56 | 62.43 |
Testing statistical significance: p-values | |
---|---|
No&Yes-No&No | 0.5209217 |
Yes&No-No&No | 0.0043566 |
Yes&Yes-No&No | 0.0000108 |
Yes&No-No&Yes | 0.1017889 |
Yes&Yes-No&Yes | 0.0004638 |
Yes&Yes-Yes&No | 0.3784543 |
dimeso = addAnswerColumn(dimeso)
plotAndTable(dimeso,"answers","Exam..1","Exam 1: Was the question correct? Did they use the models?","Exam 1")
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | No&No | 26 | 34.24 | 7.58 | 35.50 | 19.75 | 45.00 | 25.25 |
X12 | No&Yes | 14 | 31.82 | 7.37 | 33.62 | 19.75 | 47.50 | 27.75 |
X13 | Yes&No | 75 | 41.07 | 6.16 | 41.25 | 22.75 | 49.50 | 26.75 |
X14 | Yes&Yes | 35 | 42.83 | 7.09 | 45.50 | 20.75 | 49.75 | 29.00 |
Testing statistical significance: p-values | |
---|---|
No&Yes-No&No | 0.7020425 |
Yes&No-No&No | 0.0001010 |
Yes&Yes-No&No | 0.0000142 |
Yes&No-No&Yes | 0.0000344 |
Yes&Yes-No&Yes | 0.0000048 |
Yes&Yes-Yes&No | 0.5837784 |
plotAndTable(dimeso,"answers","Exam..2","Exam 2: Was the question correct? Did they use the models?","Exam 2")
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | No&No | 25 | 37.51 | 6.13 | 37.75 | 24.50 | 50.0 | 25.50 |
X12 | No&Yes | 14 | 34.74 | 7.80 | 34.62 | 20.40 | 49.5 | 29.10 |
X13 | Yes&No | 74 | 40.80 | 6.01 | 41.10 | 19.75 | 49.5 | 29.75 |
X14 | Yes&Yes | 35 | 42.36 | 5.87 | 44.25 | 23.30 | 49.0 | 25.70 |
Testing statistical significance: p-values | |
---|---|
No&Yes-No&No | 0.5369279 |
Yes&No-No&No | 0.1019028 |
Yes&Yes-No&No | 0.0167917 |
Yes&No-No&Yes | 0.0053259 |
Yes&Yes-No&Yes | 0.0008415 |
Yes&Yes-Yes&No | 0.6108740 |
plotAndTable(dimeso,"answers","Exam..3","Exam 3: Was the question correct? Did they use the models?","Exam 3")
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | No&No | 26 | 33.81 | 9.43 | 37.50 | 8.75 | 45.5 | 36.75 |
X12 | No&Yes | 14 | 25.78 | 11.45 | 27.15 | 7.50 | 46.8 | 39.30 |
X13 | Yes&No | 73 | 37.71 | 9.25 | 38.55 | 0.00 | 49.8 | 49.80 |
X14 | Yes&Yes | 35 | 39.57 | 8.98 | 41.00 | 17.25 | 50.0 | 32.75 |
Testing statistical significance: p-values | |
---|---|
No&Yes-No&No | 0.0544558 |
Yes&No-No&No | 0.2728122 |
Yes&Yes-No&No | 0.0902442 |
Yes&No-No&Yes | 0.0001605 |
Yes&Yes-No&Yes | 0.0000498 |
Yes&Yes-Yes&No | 0.7737299 |
plotAndTable(dimeso,"answers","Final.Exam","Final Exam: Was the question correct? Did they use the models?","Final Exam")
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | No&No | 26 | 61.28 | 15.92 | 63.50 | 29.25 | 96.00 | 66.75 |
X12 | No&Yes | 14 | 54.45 | 18.08 | 52.62 | 29.25 | 93.25 | 64.00 |
X13 | Yes&No | 75 | 70.30 | 17.15 | 71.75 | 0.00 | 95.50 | 95.50 |
X14 | Yes&Yes | 35 | 74.63 | 17.88 | 77.75 | 35.75 | 99.00 | 63.25 |
Testing statistical significance: p-values | |
---|---|
No&Yes-No&No | 0.6287530 |
Yes&No-No&No | 0.1021404 |
Yes&Yes-No&No | 0.0167406 |
Yes&No-No&Yes | 0.0101008 |
Yes&Yes-No&Yes | 0.0016576 |
Yes&Yes-Yes&No | 0.6086441 |
plotAndTable(dimeso,"answers","Quizzes.Final.Score","Quizzes: Was the question correct? Did they use the models?","Quizzes Average Score")
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | No&No | 26 | 73.73 | 14.23 | 73.47 | 46.94 | 102.22 | 55.28 |
X12 | No&Yes | 14 | 66.33 | 15.93 | 64.69 | 46.39 | 101.11 | 54.72 |
X13 | Yes&No | 75 | 83.78 | 14.99 | 85.56 | 30.00 | 103.33 | 73.33 |
X14 | Yes&Yes | 35 | 88.59 | 13.13 | 90.63 | 55.31 | 105.56 | 50.25 |
Testing statistical significance: p-values | |
---|---|
No&Yes-No&No | 0.4186218 |
Yes&No-No&No | 0.0148549 |
Yes&Yes-No&No | 0.0006988 |
Yes&No-No&Yes | 0.0003605 |
Yes&Yes-No&Yes | 0.0000191 |
Yes&Yes-Yes&No | 0.3723377 |
Analyzing those who already transitioned and don’t need the models: How many are not using the models because they are visualizing it in their head or using the R/S.
How many students don’t need the models at all or for some specific question. What question?
#merging by ID the students array which has pretty much everything
demoData = read.csv("~/Papers/22_Ochem_Models_across_the_Curriculum/Analysis/modelusage_mergedwithdemographics_cleanforexternal.csv",header = TRUE)
demoData = demoData[which(demoData$Question == "Newman Projection (1.1)"),]
trimDemoData = demoData[,c("ID","Sex","Ethnicity","SOC","First.Generation","Underrepresented","Home.state","HS.GPA","LLC")]
students = merge(students,trimDemoData,by.x = "ID",by.y = "ID")
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | F | 112 | 39.08 | 8.05 | 40.50 | 19.75 | 49.75 | 30.00 |
X12 | M | 24 | 40.73 | 7.02 | 40.62 | 22.75 | 49.50 | 26.75 |
Testing statistical significance: p-values |
---|
0.3540949 |
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | F | 110 | 39.94 | 6.68 | 40.52 | 19.75 | 50.0 | 30.25 |
X12 | M | 24 | 39.44 | 6.75 | 39.90 | 22.50 | 49.5 | 27.00 |
Testing statistical significance: p-values |
---|
0.743875 |
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | F | 110 | 35.66 | 10.76 | 37.88 | 0.0 | 50 | 50.0 |
X12 | M | 24 | 38.55 | 8.09 | 39.80 | 19.8 | 50 | 30.2 |
Testing statistical significance: p-values |
---|
0.2168484 |
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | F | 112 | 67.07 | 19.04 | 68.50 | 0.0 | 96 | 96.0 |
X12 | M | 24 | 71.83 | 16.34 | 68.38 | 34.5 | 99 | 64.5 |
Testing statistical significance: p-values |
---|
0.2580761 |
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | F | 103 | 3.66 | 0.36 | 3.70 | 2.72 | 4.46 | 1.74 |
X12 | M | 19 | 3.56 | 0.40 | 3.78 | 2.77 | 3.95 | 1.18 |
Testing statistical significance: p-values |
---|
0.2704221 |
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | N | 74 | 40.04 | 7.87 | 41.38 | 19.75 | 49.75 | 30 |
X12 | Y | 62 | 38.58 | 7.88 | 39.50 | 19.75 | 49.75 | 30 |
Testing statistical significance: p-values |
---|
0.2832614 |
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | N | 72 | 40.05 | 6.91 | 40.25 | 19.75 | 50 | 30.25 |
X12 | Y | 62 | 39.61 | 6.43 | 40.62 | 20.40 | 49 | 28.60 |
Testing statistical significance: p-values |
---|
0.7077274 |
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | N | 73 | 36.75 | 10.55 | 40.00 | 0.0 | 50 | 50.0 |
X12 | Y | 61 | 35.50 | 10.18 | 37.25 | 7.5 | 50 | 42.5 |
Testing statistical significance: p-values |
---|
0.4889579 |
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | N | 74 | 68.40 | 19.89 | 69.25 | 0.00 | 99 | 99.00 |
X12 | Y | 62 | 67.33 | 17.14 | 66.25 | 29.25 | 94 | 64.75 |
Testing statistical significance: p-values |
---|
0.7404699 |
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | N | 63 | 3.69 | 0.34 | 3.77 | 2.72 | 4.46 | 1.74 |
X12 | Y | 59 | 3.60 | 0.39 | 3.69 | 2.74 | 4.22 | 1.48 |
Testing statistical significance: p-values |
---|
0.1785735 |
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | N | 76 | 40.83 | 7.05 | 41.38 | 20.75 | 49.75 | 29 |
X12 | Y | 60 | 37.52 | 8.52 | 39.75 | 19.75 | 49.75 | 30 |
Testing statistical significance: p-values |
---|
0.0143914 |
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | N | 74 | 40.84 | 6.53 | 42.20 | 22.50 | 50.0 | 27.50 |
X12 | Y | 60 | 38.62 | 6.69 | 38.52 | 19.75 | 49.5 | 29.75 |
Testing statistical significance: p-values |
---|
0.0542465 |
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | N | 75 | 37.48 | 9.96 | 40.00 | 8.75 | 50 | 41.25 |
X12 | Y | 59 | 34.53 | 10.71 | 35.55 | 0.00 | 50 | 50.00 |
Testing statistical significance: p-values |
---|
0.1026239 |
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | N | 76 | 70.82 | 18.24 | 73.12 | 29.25 | 99 | 69.75 |
X12 | Y | 60 | 64.23 | 18.60 | 65.25 | 0.00 | 94 | 94.00 |
Testing statistical significance: p-values |
---|
0.0397817 |
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | N | 64 | 3.67 | 0.38 | 3.75 | 2.74 | 4.46 | 1.72 |
X12 | Y | 58 | 3.62 | 0.36 | 3.69 | 2.72 | 4.22 | 1.50 |
Testing statistical significance: p-values |
---|
0.4748857 |
We can plot the final score
mycor= cor(students[,c("Exam..1","Exam..2","Exam..3","Final.Exam","Quizzes.Final.Score","Final.Score","HS.GPA")],use = "pairwise.complete.obs")
upper<-round(mycor,3)
upper[upper.tri(mycor)]<-""
#upper<-as.data.frame(upper)
#upper
#library(xtable)
#print(xtable(upper, type="html"))
knitr::kable(upper , caption = "Correlation between the numerical data")
Exam..1 | Exam..2 | Exam..3 | Final.Exam | Quizzes.Final.Score | Final.Score | HS.GPA | |
---|---|---|---|---|---|---|---|
Exam..1 | 1 | ||||||
Exam..2 | 0.725 | 1 | |||||
Exam..3 | 0.772 | 0.791 | 1 | ||||
Final.Exam | 0.813 | 0.851 | 0.883 | 1 | |||
Quizzes.Final.Score | 0.755 | 0.778 | 0.81 | 0.83 | 1 | ||
Final.Score | 0.794 | 0.854 | 0.895 | 0.909 | 0.897 | 1 | |
HS.GPA | 0.312 | 0.372 | 0.282 | 0.366 | 0.391 | 0.405 | 1 |
The Chi-square analysis gives a p= 0.20455
Residuals analysis:
A negative residual implies that the measured value is lower than
expected and a positive value higher than
expected
The Chi-square analysis gives a p= 0.04451
Residuals analysis:
A negative residual implies that the measured value is lower than
expected and a positive value higher than
expected
plotBarAndCorr(students,"SOC","newman","Model use","N of students","Model use by Student of Color (SOC)")
The Chi-square analysis gives a p= 0.14023
Residuals analysis:
A negative residual implies that the measured value is lower than
expected and a positive value higher than
expected
plotBarAndCorr(students,"SOC","newmanComb","Model use","N of students","Model use by Student of Color")
The Chi-square analysis gives a p= 0.04874
Residuals analysis:
A negative residual implies that the measured value is lower than
expected and a positive value higher than
expected
plotBarAndCorr(students,"First.Generation","newman","Model use","N of students","Model use by First Generation")
The Chi-square analysis gives a p= 0.50607
Residuals analysis:
A negative residual implies that the measured value is lower than
expected and a positive value higher than
expected
plotBarAndCorr(students,"First.Generation","newmanComb","Model use","N of students","Model use First Generation")
The Chi-square analysis gives a p= 0.10626
Residuals analysis:
A negative residual implies that the measured value is lower than
expected and a positive value higher than
expected
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | NotNeed | 20 | 3.78 | 0.26 | 3.80 | 3.19 | 4.22 | 1.03 |
X12 | NotUsing | 38 | 3.48 | 0.36 | 3.47 | 2.74 | 4.17 | 1.43 |
X13 | Using | 64 | 3.71 | 0.37 | 3.82 | 2.72 | 4.46 | 1.74 |
Testing statistical significance: p-values | |
---|---|
NotUsing-NotNeed | 0.0072797 |
Using-NotNeed | 0.7495826 |
Using-NotUsing | 0.0043354 |
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | Notusing Correct | 39 | 3.56 | 0.37 | 3.60 | 2.74 | 4.22 | 1.48 |
X12 | Notusing Incorrect | 19 | 3.63 | 0.33 | 3.70 | 2.83 | 4.17 | 1.33 |
X13 | Using Correct | 45 | 3.75 | 0.37 | 3.86 | 2.72 | 4.46 | 1.74 |
X14 | Using Incorrect | 19 | 3.61 | 0.37 | 3.71 | 2.82 | 4.00 | 1.19 |
Testing statistical significance: p-values | |
---|---|
Notusing Incorrect-Notusing Correct | 0.8933020 |
Using Correct-Notusing Correct | 0.0657781 |
Using Incorrect-Notusing Correct | 0.9603225 |
Using Correct-Notusing Incorrect | 0.5834926 |
Using Incorrect-Notusing Incorrect | 0.9977177 |
Using Incorrect-Using Correct | 0.4463924 |
The Chi-square analysis gives a p= 0.02205
Residuals analysis:
A negative residual implies that the measured value is lower than
expected and a positive value higher than
expected
The Chi-square analysis gives a p= 0.0152
Residuals analysis:
A negative residual implies that the measured value is lower than
expected and a positive value higher than
expected
plotBarAndCorr(students,"SOC","diaste","Model use","N of students","Model use by Student of Color (SOC)")
The Chi-square analysis gives a p= 0.08004
Residuals analysis:
A negative residual implies that the measured value is lower than
expected and a positive value higher than
expected
plotBarAndCorr(students,"SOC","diasteComb","Model use","N of students","Model use by Student of Color")
The Chi-square analysis gives a p= 0.29166
Residuals analysis:
A negative residual implies that the measured value is lower than
expected and a positive value higher than
expected
plotBarAndCorr(students,"First.Generation","diaste","Model use","N of students","Model use by First Generation")
The Chi-square analysis gives a p= 0.42854
Residuals analysis:
A negative residual implies that the measured value is lower than
expected and a positive value higher than
expected
plotBarAndCorr(students,"First.Generation","diasteComb","Model use","N of students","Model use First Generation")
The Chi-square analysis gives a p= 0.46868
Residuals analysis:
A negative residual implies that the measured value is lower than
expected and a positive value higher than
expected
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | NotNeed | 23 | 3.71 | 0.30 | 3.82 | 2.92 | 4.17 | 1.25 |
X12 | NotUsing | 37 | 3.43 | 0.36 | 3.44 | 2.74 | 4.25 | 1.51 |
X13 | Using | 62 | 3.75 | 0.35 | 3.86 | 2.72 | 4.46 | 1.74 |
Testing statistical significance: p-values | |
---|---|
NotUsing-NotNeed | 0.0058891 |
Using-NotNeed | 0.8811543 |
Using-NotUsing | 0.0000336 |
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | Notusing Correct | 38 | 3.49 | 0.38 | 3.53 | 2.74 | 4.17 | 1.43 |
X12 | Notusing Incorrect | 22 | 3.61 | 0.32 | 3.69 | 2.83 | 4.25 | 1.42 |
X13 | Using Correct | 48 | 3.78 | 0.36 | 3.90 | 2.72 | 4.46 | 1.74 |
X14 | Using Incorrect | 14 | 3.67 | 0.31 | 3.68 | 3.19 | 4.22 | 1.04 |
Testing statistical significance: p-values | |
---|---|
Notusing Incorrect-Notusing Correct | 0.5995632 |
Using Correct-Notusing Correct | 0.0019687 |
Using Incorrect-Notusing Correct | 0.3695345 |
Using Correct-Notusing Incorrect | 0.2741740 |
Using Incorrect-Notusing Incorrect | 0.9568384 |
Using Incorrect-Using Correct | 0.7719006 |
The Chi-square analysis gives a p= 0.59795
Residuals analysis:
A negative residual implies that the measured value is lower than
expected and a positive value higher than
expected
The Chi-square analysis gives a p= 0.02156
Residuals analysis:
A negative residual implies that the measured value is lower than
expected and a positive value higher than
expected
plotBarAndCorr(students,"SOC","enanto","Model use","N of students","Model use by Student of Color (SOC)")
The Chi-square analysis gives a p= 0.46662
Residuals analysis:
A negative residual implies that the measured value is lower than
expected and a positive value higher than
expected
plotBarAndCorr(students,"SOC","enantoComb","Model use","N of students","Model use by Student of Color")
The Chi-square analysis gives a p= 0.04216
Residuals analysis:
A negative residual implies that the measured value is lower than
expected and a positive value higher than
expected
plotBarAndCorr(students,"First.Generation","enanto","Model use","N of students","Model use by First Generation")
The Chi-square analysis gives a p= 0.04054
Residuals analysis:
A negative residual implies that the measured value is lower than
expected and a positive value higher than
expected
plotBarAndCorr(students,"First.Generation","enantoComb","Model use","N of students","Model use First Generation")
The Chi-square analysis gives a p= 0.09056
Residuals analysis:
A negative residual implies that the measured value is lower than
expected and a positive value higher than
expected
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | NotNeed | 45 | 3.80 | 0.31 | 3.87 | 2.83 | 4.28 | 1.45 |
X12 | NotUsing | 52 | 3.45 | 0.35 | 3.50 | 2.72 | 3.98 | 1.26 |
X13 | Using | 25 | 3.77 | 0.33 | 3.88 | 3.08 | 4.46 | 1.38 |
Testing statistical significance: p-values | |
---|---|
NotUsing-NotNeed | 0.0000032 |
Using-NotNeed | 0.9452032 |
Using-NotUsing | 0.0003567 |
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | Notusing Correct | 59 | 3.63 | 0.38 | 3.74 | 2.74 | 4.28 | 1.54 |
X12 | Notusing Incorrect | 38 | 3.58 | 0.37 | 3.66 | 2.72 | 4.25 | 1.53 |
X13 | Using Correct | 16 | 3.90 | 0.25 | 3.94 | 3.31 | 4.46 | 1.15 |
X14 | Using Incorrect | 9 | 3.55 | 0.35 | 3.53 | 3.08 | 4.00 | 0.92 |
Testing statistical significance: p-values | |
---|---|
Notusing Incorrect-Notusing Correct | 0.9115999 |
Using Correct-Notusing Correct | 0.0480009 |
Using Incorrect-Notusing Correct | 0.9183449 |
Using Correct-Notusing Incorrect | 0.0202872 |
Using Incorrect-Notusing Incorrect | 0.9945846 |
Using Incorrect-Using Correct | 0.0982578 |
The Chi-square analysis gives a p= 0.08109
Residuals analysis:
A negative residual implies that the measured value is lower than
expected and a positive value higher than
expected
The Chi-square analysis gives a p= 0.05317
Residuals analysis:
A negative residual implies that the measured value is lower than
expected and a positive value higher than
expected
plotBarAndCorr(students,"SOC","didash","Model use","N of students","Model use by Student of Color (SOC)")
The Chi-square analysis gives a p= 0.62383
Residuals analysis:
A negative residual implies that the measured value is lower than
expected and a positive value higher than
expected
plotBarAndCorr(students,"SOC","didashComb","Model use","N of students","Model use by Student of Color")
The Chi-square analysis gives a p= 0.89281
Residuals analysis:
A negative residual implies that the measured value is lower than
expected and a positive value higher than
expected
plotBarAndCorr(students,"First.Generation","didash","Model use","N of students","Model use by First Generation")
The Chi-square analysis gives a p= 0.44463
Residuals analysis:
A negative residual implies that the measured value is lower than
expected and a positive value higher than
expected
plotBarAndCorr(students,"First.Generation","didashComb","Model use","N of students","Model use First Generation")
The Chi-square analysis gives a p= 0.73594
Residuals analysis:
A negative residual implies that the measured value is lower than
expected and a positive value higher than
expected
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | NotNeed | 10 | 3.72 | 0.33 | 3.86 | 2.92 | 4.00 | 1.08 |
X12 | NotUsing | 36 | 3.46 | 0.34 | 3.47 | 2.74 | 4.17 | 1.43 |
X13 | Using | 76 | 3.73 | 0.36 | 3.82 | 2.72 | 4.46 | 1.74 |
Testing statistical significance: p-values | |
---|---|
NotUsing-NotNeed | 0.1128374 |
Using-NotNeed | 0.9949252 |
Using-NotUsing | 0.0008828 |
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | Notusing Correct | 31 | 3.52 | 0.36 | 3.55 | 2.74 | 4.00 | 1.26 |
X12 | Notusing Incorrect | 15 | 3.50 | 0.33 | 3.49 | 2.92 | 4.17 | 1.25 |
X13 | Using Correct | 52 | 3.76 | 0.33 | 3.86 | 2.72 | 4.46 | 1.74 |
X14 | Using Incorrect | 24 | 3.65 | 0.42 | 3.74 | 2.80 | 4.25 | 1.45 |
Testing statistical significance: p-values | |
---|---|
Notusing Incorrect-Notusing Correct | 0.9973854 |
Using Correct-Notusing Correct | 0.0197914 |
Using Incorrect-Notusing Correct | 0.5753114 |
Using Correct-Notusing Incorrect | 0.0663154 |
Using Incorrect-Notusing Incorrect | 0.5990924 |
Using Incorrect-Using Correct | 0.5658395 |
The Chi-square analysis gives a p= 0.81715
Residuals analysis:
A negative residual implies that the measured value is lower than
expected and a positive value higher than
expected
The Chi-square analysis gives a p= 0.04783
Residuals analysis:
A negative residual implies that the measured value is lower than
expected and a positive value higher than
expected
plotBarAndCorr(students,"SOC","dimeso","Model use","N of students","Model use by Student of Color (SOC)")
The Chi-square analysis gives a p= 0.76848
Residuals analysis:
A negative residual implies that the measured value is lower than
expected and a positive value higher than
expected
plotBarAndCorr(students,"SOC","dimesoComb","Model use","N of students","Model use by Student of Color")
The Chi-square analysis gives a p= 0.69718
Residuals analysis:
A negative residual implies that the measured value is lower than
expected and a positive value higher than
expected
plotBarAndCorr(students,"First.Generation","dimeso","Model use","N of students","Model use by First Generation")
The Chi-square analysis gives a p= 0.36419
Residuals analysis:
A negative residual implies that the measured value is lower than
expected and a positive value higher than
expected
plotBarAndCorr(students,"First.Generation","dimesoComb","Model use","N of students","Model use First Generation")
The Chi-square analysis gives a p= 0.07435
Residuals analysis:
A negative residual implies that the measured value is lower than
expected and a positive value higher than
expected
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | NotNeed | 37 | 3.73 | 0.30 | 3.77 | 2.92 | 4.25 | 1.33 |
X12 | NotUsing | 48 | 3.46 | 0.36 | 3.48 | 2.72 | 4.05 | 1.33 |
X13 | Using | 37 | 3.80 | 0.34 | 3.93 | 2.83 | 4.46 | 1.63 |
Testing statistical significance: p-values | |
---|---|
NotUsing-NotNeed | 0.0009286 |
Using-NotNeed | 0.6585847 |
Using-NotUsing | 0.0000274 |
group1 | n | mean | sd | median | min | max | range | |
---|---|---|---|---|---|---|---|---|
X11 | Notusing Correct | 61 | 3.63 | 0.33 | 3.69 | 2.74 | 4.22 | 1.48 |
X12 | Notusing Incorrect | 24 | 3.46 | 0.41 | 3.50 | 2.72 | 4.25 | 1.53 |
X13 | Using Correct | 26 | 3.88 | 0.27 | 3.95 | 3.08 | 4.46 | 1.38 |
X14 | Using Incorrect | 11 | 3.62 | 0.42 | 3.78 | 2.83 | 4.28 | 1.45 |
Testing statistical significance: p-values | |
---|---|
Notusing Incorrect-Notusing Correct | 0.1970409 |
Using Correct-Notusing Correct | 0.0126614 |
Using Incorrect-Notusing Correct | 0.9999967 |
Using Correct-Notusing Incorrect | 0.0002371 |
Using Incorrect-Notusing Incorrect | 0.5652983 |
Using Incorrect-Using Correct | 0.1795673 |