sapply(c("pipeR", "dplyr", "tidyr", "ggplot2", "readr", "readxl", "Cairo", "grid", "gridExtra"),
require, character.only = TRUE)
d <- read_csv("chap11_1.csv", locale = locale(encoding = "cp932"))
d
(1.1)x, y それぞれについて度数分布,代表値,散布度
options(repr.plot.width = 8, repr.plot.height = 6)
d %>>% gather(key, val, -親子) %>>%
ggplot(aes(x = val)) + geom_histogram(breaks = seq(0, 600, 60)) + facet_wrap(~ key, nrow = 2)
d %>>% summarise_each(funs(mean), -親子)
d %>>% summarise_each(funs(median), -親子)
varp <- function(x) {
ret <- var(x) * (length(x) - 1) / length(x)
ret
}
d %>>% summarise_each(funs(varp), -親子)
d %>>% summarise_each(funs(sqrt(varp(.))), -親子)
(1.2)xとyの差について,度数分布,代表値,散布度
d <- d %>>% mutate(yx = y - x)
options(repr.plot.width = 4, repr.plot.height = 4)
d %>>% ggplot(aes(x = yx)) + geom_histogram(breaks = seq(20, 90, 10))
d %>>% summarise_each(funs(mean, median, varp, sqrt(varp(.))), yx)
(1.3)xとyの相関係数
with(cor(x, y), data = d)
(1.4)y~x
の回帰式
res.lm <- lm(y~x, data = d)
res.lm %>>% summary()
res.lm$coefficients
d %>>% ggplot(aes(x = x, y = y)) + geom_point() +
geom_abline(intercept = res.lm$coefficients["(Intercept)"], slope = res.lm$coefficients["x"])
(2)(1)からわかること,わからないこと
(3)(2)より,実態と認識のずれを調べる方法は
devtools::session_info()