options(repr.plot.width= 4, repr.plot.height = 4)
library(readr)
d <- read_csv("teaching_methods.csv") # UTF-8
d
names(d) <- c("id", "name", "gender", "math", "stat", "psy_test", "stat_test1", "stat_test2", "teach_method")
d
psy_test <- d$psy_test
(4) 統計検定量の計算
z <- (mean(psy_test) - 12) / (sqrt(10 / length(psy_test)))
z
棄却域を計算する
上側確率は,
qnorm(1 - 0.05 / 2)
下側確率は,
qnorm(0.05 / 2)
棄却域は,
$Z \lt -1.95996$, $Z \gt 1.95996$
library(ggplot2)
ggplot(data.frame(x = c(-3, 3)), aes(x)) +
stat_function(fun = dnorm, args = list(mean = 0, sd = 1)) +
scale_x_continuous(breaks = c(-3:3, 1)) +
geom_vline(xintercept = c(qnorm(0.025), qnorm(0.975)))
$p$値を計算する.
検定統計量z が左側の棄却域に入る確率は,
pnorm(z)
両側検定なので,
2 * pnorm(-z, lower.tail = FALSE)
$p$値が有意水準0.05より小さいので,帰無仮説は棄却される.
(標本平均は母平均と等しいとは言えない)
$t = \frac{\bar{X} - \mu}{\hat{\sigma} / \sqrt{n}}$
ggplot(data.frame(x = c(-5, 5)), aes(x)) +
stat_function(fun = dt, args = list(df = 1)) +
stat_function(fun = dt, args = list(df = 2), col = "green") +
stat_function(fun = dt, args = list(df = 4), col = "red") +
stat_function(fun = dt, args = list(df = 8), col = "blue") +
scale_x_continuous(breaks = c(-4:4, 1))
母集団分布は, $N(12, \sigma^2)$
検定統計量$t$は,
t <- (mean(psy_test) - 12) / sqrt(var(psy_test) / length(psy_test))
t
棄却域は,
lower <- qt(0.05 / 2, df = length(psy_test) - 1)
upper <- qt(0.05 / 2, df = length(psy_test) - 1, lower.tail = FALSE)
cat("t < ", lower, ", t > ", upper)
ggplot(data.frame(x = c(-3, 3)), aes(x)) +
stat_function(fun = dt, args = list(df = 19)) +
scale_x_continuous(breaks = c(-3:3, 1)) +
geom_vline(xintercept = c(qt(0.025, df = 19), qt(0.975, df = 19)))
帰無仮説は棄却された
$p$ 値は,
2 * pt(t, df = length(psy_test) - 1)
t.test()
で計算
t.test(psy_test, mu = 12)
H0: 母集団において相間が0である($\rho = 0$)
$t = \frac{r\sqrt{n - 2}}{\sqrt{1 - r^2}}$
stat_test1 <- d$stat_test1
stat_test2 <- d$stat_test2
検定統計量は,
r <- cor(stat_test1, stat_test2)
n <- length(stat_test1)
t <- (r * sqrt(n - 2)) / sqrt(1 - r^2)
t
棄却域は,
lower <- qt(0.05 / 2, df = n-2)
upper <- qt(0.05 / 2, df = n - 2, lower.tail = FALSE)
cat("t < ", lower, ", t > ", upper)
ggplot(data.frame(x = c(-3, 3)), aes(x)) +
stat_function(fun = dt, args = list(df = n - 2)) +
scale_x_continuous(breaks = c(-3:3, 1)) +
geom_vline(xintercept = c(qt(0.025, df = n - 2), qt(0.975, df = n - 2)))
帰無仮説は棄却された
$p$ 値は,
2 * pt(-t, df = n - 2)
cor.test()
で計算
cor.test(stat_test1, stat_test2)
math <- d$math
stat <- d$stat
table(math, stat)
検定統計量
$$\chi^2 = \sum_{i = 1}^{k} \frac{(O_i - E_i)^2}{E_i}$$カイ二乗分布
ggplot(data.frame(x = c(0, 20)), aes(x)) +
stat_function(fun = dchisq, args = list(df = 1)) +
stat_function(fun = dchisq, args = list(df = 2), col = "green") +
stat_function(fun = dchisq, args = list(df = 4), col = "red") +
stat_function(fun = dchisq, args = list(df = 8), col = "blue")
観測度数
library(pipeR)
m.o <- table(math, stat)
m.o
m.o %>>% rowSums
m.o %>>% colSums
m.o %>>% sum
期待度数
m.o.c <- colSums(m.o)
m.e <- rbind(m.o.c, m.o.c) * rowSums(m.o) / sum(m.o)
m.e
chi2 <- sum((m.o - m.e)^2 / m.e)
chi2
棄却域は,
n <- 2
qchisq(0.05, df = (n - 1) * (n - 1), lower.tail = FALSE)
ggplot(data.frame(x = c(0, 6)), aes(x)) +
stat_function(fun = dchisq, args = list(df = (n - 1) * (n - 1))) +
scale_x_continuous(breaks = c(0:6, 1)) +
geom_vline(xintercept = c(qchisq(0.95, df = (n - 1) * (n - 1))))
帰無仮説は棄却されなかった.
$p$ 値を計算すると,
pchisq(chi2, df = (n - 1) * (n - 1), lower.tail = FALSE)
chisq.test()
で計算すると,
chisq.test(m.o, correct = FALSE)
サンプルサイズが大きくなると,検定結果が有意になりやすい
reg_a <- matrix(c(16, 12, 4, 8), 2, 2)
rownames(reg_a) <- c("文系", "理系")
colnames(reg_a) <- c("履修した", "履修してない")
reg_a
chisq.test(reg_a, correct = FALSE)
reg_b <- reg_a * 10
reg_b
chisq.test(reg_b, correct = FALSE)
height <- c(165,150,170,168,159,170,167,178,155,159,161,162,166,171,155,160,168,172,155,167)
t.test(height, mu = 170)
帰無仮説が棄却されたので,無作為標本とは言えない
d.study <- data.frame(time = c(1, 3, 10, 12, 6, 3, 8, 4, 1, 5), score = c(20, 40, 100, 80, 50, 50, 70, 50, 10, 60))
d.study
cor.test(d.study$time, d.study$score)
帰無仮説が棄却されたので,無相関ではない
スピアマンの順位相関係数
cor(d.study$time, d.study$score, method = "spearman")
cor.test(d.study$time, d.study$score, method = "spearman")
ケンドールの順位相関係数
cor(d.study$time, d.study$score, method = "kendall")
cor.test(d.study$time, d.study$score, method = "kendall")
library(readxl)
d.food <- read_excel("Chap03_food.xlsx", sheet = 1)
head(d.food)
m.o <- table(d.food)
m.o
chisq.test(m.o, correct = FALSE)
帰無仮説を棄却できない
jap <- c(60, 40, 30, 70, 55)
soc <- c(80, 25, 35, 70, 50)
cor.test(jap, soc)
jap <- rep(jap, 2)
soc <- rep(soc, 2)
cor.test(jap, soc)
サンプルサイズが2倍になると,相関係数は変わらないが,$p$値が小さくなった
devtools::session_info()