pkgs <- c("pipeR", "dplyr", "tidyr", "ggplot2", "readr", "readxl")
lapply(pkgs, require, character.only = TRUE)
d <- read_csv("teaching_methods.csv")
head(d)
str(d)
names(d) <- c("id", "name", "sex","math", "stat", "psy_test", "stat_test1", "stat_test2", "teaching_method")
library(rlist)
library(purrr)
d %>>%
# dplyr::select(stat_test2, teaching_method) %>>%
split(.$teaching_method) %>>%
purrr::map(~ .[["stat_test2"]]) %>>%
rlist::list.cbind()
?df
options(repr.plot.width = 4, repr.plot.height = 4)
ggplot(data.frame(x = c(0:5)), aes(x)) +
stat_function(fun = df, args = list(df1 = 3, df2 = 16))
dd <- d %>>%
select(stat_test2, teaching_method) %>>%
mutate(teaching_method = factor(teaching_method))
str(dd)
oneway.test()
oneway.test(dd$stat_test2 ~ dd$teaching_method, var.equal = TRUE)
aov()
aov(dd$stat_test2 ~ dd$teaching_method)
aov(dd$stat_test2 ~ dd$teaching_method) %>>% summary
anova()
lm(dd$stat_test2 ~ dd$teaching_method) %>>% anova
intra_group_sq_sum <- dd %>>%
group_by(teaching_method) %>>%
mutate_each(funs(my_fun = stat_test2 - mean(stat_test2))) %>>%
ungroup %>>%
mutate(stat_test2 = stat_test2^2) %>>%
summarise(res = sum(stat_test2)) %>>%
(.[["res"]])
intra_group_sq_sum
all_mean <- dd$stat_test2 %>>% mean
inter_group_sq_sum <- dd %>>%
group_by(teaching_method) %>>%
summarise_each(funs(mean, n())) %>>%
mutate(dev_sq_sum = (mean - all_mean)^2 * n) %>>%
summarise(res = sum(dev_sq_sum)) %>>%
(.[["res"]])
inter_group_sq_sum
all_sq_sum <- dd %>>%
mutate(dev_sq_sum = (stat_test2 - all_mean)^2) %>>%
summarise(res = sum(dev_sq_sum)) %>>%
(.[["res"]])
all_sq_sum
分散分析表との対応を見る
aov(dd$stat_test2 ~ dd$teaching_method) %>>% summary
inter_group_sq_sum + intra_group_sq_sum
自由度
df_inter <- (unique(dd$teaching_method) %>>% length) - 1
df_inter
df_intra <- dd %>>%
group_by(teaching_method) %>>%
summarise(n = n()) %>>%
ungroup %>>%
summarise(res = sum(n - 1)) %>>%
(.[["res"]])
df_intra
df_all <- length(dd$stat_test2) - 1
df_all
全データ = 群間 + 群内
df_inter + df_intra
群間平均平方
inter_mean_sq <- inter_group_sq_sum / df_inter
inter_mean_sq
群内平均平方
intra_mean_sq <- intra_group_sq_sum / df_intra
intra_mean_sq
全体平方和を全体の自由度で割った平均平方は、全データの不偏分散となる
all_sq_sum / df_all
var(d$stat_test2)
f_value <- inter_mean_sq / intra_mean_sq
f_value
combn(c("A", "B", "C", "D"), 2)
dd %>>% group_by(teaching_method) %>>% summarise(n = n())
n <- dd %>>% group_by(teaching_method) %>>% summarise(n = n()) %>>% (.[["n"]][1])
n
dd %>>%
group_by(teaching_method) %>>%
summarise(gr_mean = mean(stat_test2)) %>>%
(function(x){
ret <- x$gr_mean
names(ret) <- x$teaching_method
ret
})() %>>%
(function(x){
names(x) %>>%
combn(2) %>>%
apply(2, function(y){
a <- y[1]
b <- y[2]
q <- abs(x[a] - x[b]) / sqrt(intra_mean_sq / n)
cat(a, " and ", b, ": ", q, "\n")
})
invisible()
})()
ls.str()
?qtukey
qtukey()
ngroup <- dd$teaching_method %>>% unique %>>% length
ngroup
qtukey(0.95, ngroup, df_intra)
TukeyHSD()
を用いると,aov(dd$stat_test2 ~ dd$teaching_method) %>>% TukeyHSD
d2 <- read_csv("chap07_2.csv", locale=locale(encoding="CP932")) # this csv file was generated by Excel
d2
names(d2) <- c("student", "algebra", "calculus", "probability")
str(d2)
d2 %>>%
gather(subject, point , -student) %>>% head()
d2 %>>%
gather(subject, point , -student) %>>%
{aov(point ~ subject, data = .)} %>>%
summary
d2 %>>% gather(subject, point, -student) %>>%
{aov(point ~ subject + student, data = .)} %>>%
summary
棄却域は,
qf(0.05, 2, 8, lower.tail = FALSE)
d2 %>>% gather(subject, point, -student) %>>%
{aov(point ~ subject + student, data = .)} %>>%
TukeyHSD()
d2 %>>% gather(subject, point, -student) %>>%
ggplot(aes(x = subject, y = point)) +
geom_boxplot()
全体平方和
d2.long <- d2 %>>% gather(subject, point, -student)
all_mean <- mean(d2.long$point)
all_mean
all_sq_sum <- d2.long %>>%
mutate(dev_sq_sum = (point - all_mean)^2) %>>%
summarise(res = sum(dev_sq_sum)) %>>%
(.[["res"]])
all_sq_sum
条件平方和
subject_sq_sum <- d2.long %>>%
group_by(subject) %>>%
summarise_each(funs(mean, n()), point) %>>%
mutate(dev_sq_sum = (mean - all_mean)^2 * n) %>>%
summarise(res = sum(dev_sq_sum)) %>>%
(.[["res"]])
subject_sq_sum
個人差平方和
student_sq_sum <- d2.long %>>%
group_by(student) %>>%
mutate_each(funs(my_fun = mean(point) - all_mean), point) %>>%
ungroup %>>%
mutate(point = point^2) %>>%
summarise(res = sum(point)) %>>%
(.[["res"]])
student_sq_sum
残差平方和
residuals_sq_sum <- d2.long %>>%
group_by(student) %>>%
mutate(mean.student = mean(point) - all_mean) %>>%
group_by(subject) %>>%
mutate(mean.subject = mean(point) - all_mean) %>>%
ungroup %>>%
mutate(point = point - all_mean - mean.student - mean.subject) %>>%
mutate(point = point^2) %>>%
summarise(res = sum(point)) %>>%
(.[["res"]])
residuals_sq_sum
分解の確認
all_sq_sum
subject_sq_sum + student_sq_sum + residuals_sq_sum
all_sq_sum == (subject_sq_sum + student_sq_sum + residuals_sq_sum)
identical(all_sq_sum, (subject_sq_sum + student_sq_sum + residuals_sq_sum))
all.equal(all_sq_sum, (subject_sq_sum + student_sq_sum + residuals_sq_sum))
残差の自由度 $=$ 条件自由度 $\times$ 個人差の自由度
$F$値は,
(subject_sq_sum / (3 - 1)) / (residuals_sq_sum / ((3 - 1) * (5 - 1)))
d3 <- read_csv("chap07_3.csv", locale = locale(encoding="CP932"))
d3
d3.long <- d3 %>>%
gather(key, taste) %>>%
separate(key, into = c("water", "condition"), sep = "\\.")
head(d3.long)
要因A,要因B,AとBの交互作用効果のそれぞれに関して設定する
aov(taste ~ condition * water, data = d3.long) %>>% summary
aov(taste ~ condition + water + water:condition, data = d3.long) %>>% summary
平方和の分解の確認
d3.res <- aov(taste ~ condition * water, data = d3.long) %>>% summary
str(d3.res)
all_sq_sum <- d3.long %>>%
mutate(dev_sq_sum = (taste - mean(taste))^2) %>>%
summarise(res = sum(dev_sq_sum)) %>>%
(.[["res"]])
all_sq_sum
sum(d3.res[[1]]$`Sum Sq`)
all.equal(all_sq_sum, sum(d3.res[[1]]$`Sum Sq`))
library(Cairo)
CairoFonts(regular = "IPAexGothic")
options(repr.plot.width = 8, repr.plot.height = 4)
Cairo(type="raster")
par(family = "IPAexGothic")
par(cex.axis = 0.8)
layout(matrix(c(1, 2), 1, 2, byrow = TRUE))
with(d3.long, {
interaction.plot(condition, water, taste)
interaction.plot(water, condition, taste)
})
dev.off()
library(grid)
library(gridExtra)
Cairo(type = "raster")
gp1 <- d3.long %>>%
ggplot(aes(x = condition, y = taste, group = water, linetype = water)) +
stat_summary(fun.y=mean, geom="line") +
theme(legend.position = c(.85, .85),
legend.text = element_text(family="IPAexGothic"))
gp2 <- d3.long %>>%
ggplot(aes(x = water, y = taste, group = condition, linetype = condition)) +
stat_summary(fun.y=mean, geom="line") +
theme(legend.position = c(.9, .9),
legend.text = element_text(family="IPAexGothic"))
grid.arrange(gp1, gp2, ncol = 2)
dev.off()
aov(taste ~ condition, data = d3.long) %>>% summary()
d3.res[[1]]$`Sum Sq`[1]
sum(d3.res[[1]]$`Sum Sq`[2:4])
d4 <- d3 %>>% mutate(person = c("村松","川崎","井口","松中","城島"))
d4
d4.long <- d4 %>>%
gather(key, taste, -person) %>>%
separate(key, into = c("water", "condition"), sep = "\\.")
str(d4.long)
aov(taste ~ condition*water +
Error(person + person:condition + person:water + person:condition:water), data = d4.long) %>>%
summary
d5 <- c("A", "B") %>>%
lapply(function(x){
switch(x,
"A" = {person <- c("村松","川崎","井口","松中","城島")},
"B" = {person <- c("斉藤","和田","寺原","杉内","新垣")})
d3 %>>%
select(ends_with(x)) %>>%
mutate(person = person) %>>%
gather(key, taste, -person) %>>%
separate(key, into = c("water", "condition"))
}) %>>%
bind_rows
glimpse(d5)
aov(taste ~ condition * water +
Error(person:condition + person:condition:water), data = d5) %>>% summary
aov(taste ~ condition * water +
Error(person + person:condition + person:water + person:condition:water), data = d5) %>>% summary
aov(taste ~ condition * water +
Error(person), data = d5) %>>% summary
d.ex1 <- read_csv("chap07_ex1.csv", locale = locale(encoding = "cp932"), col_names = FALSE)
d.ex1
d.ex1 %>>%
(function(x){
faculty <- x[[1]]
ret <- x[,-1] %>>% t %>>% data.frame
names(ret) <- faculty
ret
})() %>>%
gather(fuculty, point) %>>%
(~ d.ex1.long) %>>%
{aov(point ~ fuculty, data = .)} %>>%
summary
aov(point ~ fuculty, data = d.ex1.long) %>>% TukeyHSD
d.ex2 <- read_csv("chap07_ex2.csv", locale = locale(encoding = "cp932"), col_names = FALSE, col_types = "cccccccc")
d.ex2
d.ex2 %>>%
(function(x){
header <- enc2native(x[[1]]) # avoid UTF-8 Encoding on Windows
student <- x[1,] %>>% unlist
ret <- x[-1,-1] %>>% t %>>% apply(2, as.numeric) %>>% data.frame()
ret[[student[1]]] <- student[2:length(student)]
ret <- ret[c(4, 1:3)]
names(ret) <- header
ret
})() %>>%
gather(type, point, -学生) %>>%
rename("student" = 学生) %>>%
(~ d.ex2.long) %>>%
{aov(point ~ type + student, data = .)} %>>%
summary
aov(point ~ type + student, data = d.ex2.long) %>>% TukeyHSD
devtools::session_info()