options(repr.plot.width = 4, repr.plot.height = 4)
curve(dnorm(x,mean=0,sd=1),from=-4,to=4)
library(ggplot2)
ggplot(data.frame(x = c(-4, 4)), aes(x)) +
stat_function(fun = dnorm, args = list(mean = 0, sd = 1))
last_plot() +
stat_function(fun = dnorm, args = list(mean = 1, sd = 1), colour = "red") +
stat_function(fun = dnorm, args = list(mean = 0, sd = 2), colour = "green")
ggplot(data.frame(x = c(-3, 3)), aes(x)) +
stat_function(fun=function(x){y <- dnorm(x); y[x < -1| x > 1] <- NA; return(y);} , geom="area", alpha = 0.5) +
stat_function(fun = dnorm, args = list(mean = 0, sd = 1)) +
scale_x_continuous(breaks = c(-3:3, 1))
samples = rnorm(n = 5, mean = 50, sd = 10)
ggplot(data.frame(samples), aes(x = samples)) +
geom_histogram()
samples.large <- rnorm(n = 10000, mean = 50, sd = 10)
ggplot(data.frame(samples.large), aes(x = samples.large)) +
geom_histogram()
for (i in 1:3) {
x <- rnorm(n = 10, mean = 50, sd = 10)
print(mean(x))
}
sample_means <- numeric(length = 10000)
for(i in 1:10000){
x <- rnorm(n = 10, mean = 50, sd = 10)
sample_means[i] <- mean(x)
}
ggplot(data.frame(sample_means), aes(x = sample_means)) +
geom_histogram()
library(pipeR)
abs(sample_means - 50) %>>%
sapply(function(x){ifelse(x <= 5, 1, 0)}) %>>%
table()
$N(\mu, \sigma^2)$ の母集団からn標本を抽出したとき,標本平均の標本分布は $N(\mu, \frac{\sigma^2}{n})$ となる.
つまり,$N(50, 10^2)$ から n = 10 抽出すると, $N(50, 10^2 / 10) = N(50, 10)$ となる
mean(sample_means)
var(sample_means)
ggplot(data.frame(sample_means), aes(x = sample_means)) +
geom_histogram(aes(y = ..density..), col = "gray", alpha = 0.8) +
stat_function(fun = dnorm, args = list(mean = 50, sd = sqrt(10)))
sample_means <- numeric(length = 10000)
for(i in 1:10000){
x <- rnorm(n = 100, mean = 50, sd = 10)
sample_means[i] <- mean(x)
}
var(sample_means)
ggplot(data.frame(sample_means), aes(x = sample_means)) +
geom_histogram()
varps <- numeric(length = 10^4)
vars <- numeric(length = 10^4)
for (i in 1:10^4) {
x <- rnorm(n = 10, mean = 50, sd = 10)
varps[i] <- mean((x - mean(x))^2)
vars[i] <- var(x)
}
標本分散
mean(varps)
不偏分散
mean(vars)
sd(varps)
sd(vars)
library(tidyr)
library(Cairo)
options(repr.plot.width = 8, repr.plot.height = 4)
lab <- as_labeller(c(`varps` = "標本分散", `vars` = "不偏分散"))
data.frame(varps, vars) %>>%
gather(key, val) %>>%
ggplot(aes(val)) +
geom_histogram(breaks = seq(0, 500, 10)) +
ylab("Frequency") +
theme(strip.text.x = element_text(family = "IPAexGothic"), axis.title.x = element_blank()) -> gp
Cairo(type = "raster")
gp + facet_wrap(~key, labeller = lab)
dev.off()
library(dplyr)
data.frame(varps, vars) %>>%
mutate_each(funs(whatever = ifelse(. >= 200, 1, 0))) %>>%
rename(標本分散=varps, 不偏分散=vars) %>>%
gather(key, val) %>>%
group_by(key, val) %>>%
tally %>>%
spread(val, n)
sqrt(vars) %>>% mean
means <- numeric(length = 10^4)
medians <- numeric(length = 10^4)
for (i in 1:10^4) {
x <- rnorm(n = 10, mean = 50, sd = 10)
means[i] <- mean(x)
medians[i] <- median(x)
}
mean(means)
mean(medians)
sd(means)
sd(medians)
lab <- as_labeller(c(`means` = "標本平均", `medians` = "標本中央値"))
data.frame(means, medians) %>>%
gather(key, val) %>>%
ggplot(aes(val)) +
geom_histogram() +
ylab("Frequency") +
theme(strip.text.x = element_text(family = "IPAexGothic"), axis.title.x = element_blank()) -> gp
Cairo(type = "raster")
gp + facet_wrap(~key, labeller = lab)
dev.off()
sample_means <- numeric(length = 5000)
for(i in 1:5000) {
x <- rnorm(n = 20, mean = 50, sd = 10)
sample_means[i] <- mean(x)
}
options(repr.plot.width = 4, repr.plot.height = 4)
ggplot(data.frame(sample_means), aes(x = sample_means)) +
geom_histogram(aes(y = ..density..), col = "gray", alpha = 0.8) +
stat_function(fun = dnorm, args = list(mean = 50, sd = sqrt(10^2/20)))
library(RColorBrewer)
options(repr.plot.width = 4, repr.plot.height = 8)
display.brewer.all()
options(repr.plot.width = 4, repr.plot.height = 4)
col = brewer.pal(n = 9, name = "YlGnBu")
ggplot(data.frame(x = c(-3, 3)), aes(x)) +
stat_function(fun = dnorm, args = list(mean = 0, sd = sqrt(1/1)), col = col[1]) +
stat_function(fun = dnorm, args = list(mean = 0, sd = sqrt(1/4)), col = col[3]) +
stat_function(fun = dnorm, args = list(mean = 0, sd = sqrt(1/9)), col = col[5]) +
stat_function(fun = dnorm, args = list(mean = 0, sd = sqrt(1/16)), col = col[7]) +
stat_function(fun = dnorm, args = list(mean = 0, sd = sqrt(1/25)), col = col[9])
devtools::session_info()