8章 相関

ピアソンの積率相関係数$r$

$$ r = \frac{SP_{XY}}{\sqrt{SS_{X} SS_{Y}}} = \frac{\sum\limits^n_{i=1}(X_i - \bar{X})(Y_i - \bar{Y})}{\sqrt{\sum\limits^n_{i=1}(X_i - \bar{X})^2 \sum\limits^n_{i=1}(Y_i - \bar{Y})^2}} $$
In [1]:
r <- 0.5
x <- rnorm(1000, mean=0, sd=5)
y <- r*x + sqrt(1 - r**2) * rnorm(1000, mean=0, sd=5)
cor(x, y)
0.528401287914892
In [3]:
options(repr.plot.width=4, repr.plot.height=4)
In [4]:
plot(x, y)

相関係数の検定: $t$検定

$$ t = \frac{r}{\frac{1-r^2}{n-2}} $$
In [5]:
d <- data.frame(
    X = c(11.7, 11.9, 10.1, 13.6, 12.8, 10.5, 9.8, 10.9, 11.6, 11.8, 13.1, 12.4, 12.2), 
    Y = c(8.2, 8.2, 7.1, 9.2, 8.7, 7.7, 6.5, 7.7, 7.8, 8.0, 9.3, 8.6, 8.3)
)
d
XY
11.78.2
11.98.2
10.17.1
13.69.2
12.88.7
10.57.7
9.86.5
10.97.7
11.67.8
11.88.0
13.19.3
12.48.6
12.28.3
In [9]:
with(data=d, plot(X, Y))
In [12]:
with(data=d, cor.test(x = X, y = Y))
	Pearson's product-moment correlation

data:  X and Y
t = 12.015, df = 11, p-value = 1.149e-07
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 0.8807398 0.9894269
sample estimates:
      cor 
0.9639463 
In [13]:
d2 <- data.frame(
    X = c(12.5, 13.1, 18.9, 9.7, 16.4, 8.3, 13.7, 17.5, 11.4, 16.2, 19.3, 15.3), 
    Y = c(10.5, 8.9, 13.6, 6.3, 12.5, 10.3, 10.8, 16.7, 8.3, 9.5, 12.4, 10.1)
)
d2
XY
12.510.5
13.1 8.9
18.913.6
9.7 6.3
16.412.5
8.310.3
13.710.8
17.516.7
11.4 8.3
16.2 9.5
19.312.4
15.310.1
In [15]:
with(plot(X, Y), data=d2)

演習問題

In [16]:
with(cor.test(X, Y), data=d2)
	Pearson's product-moment correlation

data:  X and Y
t = 3.1475, df = 10, p-value = 0.01038
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 0.2210441 0.9106632
sample estimates:
      cor 
0.7054536 
In [17]:
my_cor_test <- function(x, y){
    n <- length(x)
    r <- cor(x, y)
    t <- r / (sqrt((1 - r**2) / (n - 2)))
    return(t)
}
with(my_cor_test(X, Y), data=d2)
3.14754248186613
In [19]:
nrow(d2)
12
In [20]:
qt(0.05, df=10)
-1.81246112281168
In [21]:
qt(0.95, df=10)
1.81246112281168
In [26]:
2 * (1 - pt(q=3.1475, df=10))
0.0103769750396936
In [27]:
my_cor <- function(x, y){
    sp.xy <- sum((x - mean(x)) * (y - mean(y)))
    sp.x <- sum((x - mean(x))**2)
    sp.y <- sum((y - mean(y))**2)
    return(sp.xy / sqrt(sp.x * sp.y))
}
with(my_cor(X, Y), data=d2)
0.705453567333828
In [31]:
my_cor_test <- function(x, y){
    n <- length(x)
    r <- my_cor(x, y)
    cat("r = ", r, "\n")
    t <- r / (sqrt((1 - r**2) / (n - 2)))
    cat("t = ", t, "\n")
    p_val <- 2 * (1 - pt(q=t, df=n - 2))
    cat("p = ", p_val, "\n")
    
}
with(my_cor_test(X, Y), data=d2)
r =  0.7054536 
t =  3.147542 
p =  0.01037623 
In [32]:
devtools::session_info()
Session info -------------------------------------------------------------------
Packages -----------------------------------------------------------------------
 setting  value                       
 version  R version 3.3.2 (2016-10-31)
 system   x86_64, mingw32             
 ui       RTerm                       
 language en_US.UTF-8                 
 collate  Japanese_Japan.932          
 tz       Asia/Tokyo                  
 date     2017-06-21                  

 package   * version    date       source                            
 Cairo       1.5-9      2015-09-26 CRAN (R 3.2.2)                    
 crayon      1.3.2      2016-06-28 CRAN (R 3.3.1)                    
 devtools    1.12.0     2016-06-24 CRAN (R 3.3.1)                    
 digest      0.6.12     2017-01-27 CRAN (R 3.3.3)                    
 evaluate    0.10       2016-10-11 CRAN (R 3.3.2)                    
 IRdisplay   0.4.4      2016-08-02 CRAN (R 3.3.1)                    
 IRkernel    0.8.6.9000 2017-04-13 Github (IRkernel/IRkernel@29ae7df)
 jsonlite    1.4        2017-04-08 CRAN (R 3.3.3)                    
 magrittr    1.5        2014-11-22 CRAN (R 3.2.1)                    
 memoise     1.0.0      2016-01-29 CRAN (R 3.2.3)                    
 pbdZMQ      0.2-5      2016-12-18 CRAN (R 3.3.3)                    
 R6          2.2.1      2017-05-10 CRAN (R 3.3.3)                    
 repr        0.12.0     2017-04-07 CRAN (R 3.3.3)                    
 stringi     1.1.5      2017-04-07 CRAN (R 3.3.3)                    
 stringr     1.2.0      2017-02-18 CRAN (R 3.3.3)                    
 uuid        0.1-2      2015-07-28 CRAN (R 3.3.1)                    
 withr       1.0.2      2016-06-20 CRAN (R 3.2.5)