第II部

第10章 外れ値が相関係数に及ぼす影響

In [1]:
sapply(c("pipeR", "dplyr", "tidyr", "ggplot2", "readr", "readxl", "Cairo", "grid", "gridExtra"), 
       require, character.only = TRUE)
Loading required package: pipeR
Loading required package: dplyr
Warning message:
: package 'dplyr' was built under R version 3.3.1
Attaching package: 'dplyr'

The following objects are masked from 'package:stats':

    filter, lag

The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union

Loading required package: tidyr
Loading required package: ggplot2
Loading required package: readr
Loading required package: readxl
Loading required package: Cairo
Loading required package: grid
Loading required package: gridExtra
Warning message:
: package 'gridExtra' was built under R version 3.3.1
Attaching package: 'gridExtra'

The following object is masked from 'package:dplyr':

    combine

pipeR
TRUE
dplyr
TRUE
tidyr
TRUE
ggplot2
TRUE
readr
TRUE
readxl
TRUE
Cairo
TRUE
grid
TRUE
gridExtra
TRUE
In [2]:
d.brain <- read_csv("chap10_1.csv")
d.brain
SpeciesBody_weightBrain_weight
1Mountain_Beaver1.35 8.1
2Cow465423
3Grey_Wolf36.33 119.5
4Goat 27.66115
5Guinea_Pig1.04 5.5
6Diplodocus11700 50
7Asian_Elephant2547 4603
8Donkey187.1 419
9Horse521 655
10Potar_Monkey10 115
11Cat 3.3 25.6
12Giraffe529 680
13Gorilla207 406
14Human62 1320
15African_Elephant6654 5712
16Triceratops9400 70
17Rhesus_Monkey6.8 179
18Kangaroo35 56
19Hamster0.12 1
20Mouse0.0230.4
21Rabbit2.5 12.1
22Sheep55.5 175
23Jaguar100 157
24Chimpanzee52.16 440
25Brachiosaurus87000 154.5
26Rat 0.281.9
27Mole 0.1223
28Pig192180
In [3]:
options(repr.plot.width = 4, repr.plot.height = 4)
In [4]:
d.brain %>>% 
    ggplot(aes(x = Body_weight, y = Brain_weight)) + 
        geom_point()

In [5]:
with(cor(Body_weight, Brain_weight), data = d.brain)
-0.00534116256125113
In [6]:
d.brain2 <- d.brain %>>% filter(Body_weight < 80000)
d.brain2
SpeciesBody_weightBrain_weight
1Mountain_Beaver1.35 8.1
2Cow465423
3Grey_Wolf36.33 119.5
4Goat 27.66115
5Guinea_Pig1.04 5.5
6Diplodocus11700 50
7Asian_Elephant2547 4603
8Donkey187.1 419
9Horse521 655
10Potar_Monkey10 115
11Cat 3.3 25.6
12Giraffe529 680
13Gorilla207 406
14Human62 1320
15African_Elephant6654 5712
16Triceratops9400 70
17Rhesus_Monkey6.8 179
18Kangaroo35 56
19Hamster0.12 1
20Mouse0.0230.4
21Rabbit2.5 12.1
22Sheep55.5 175
23Jaguar100 157
24Chimpanzee52.16 440
25Rat 0.281.9
26Mole 0.1223
27Pig192180
In [7]:
d.brain2 %>>% 
    ggplot(aes(x = Body_weight, y = Brain_weight)) + 
        geom_point()

In [8]:
with(cor(Body_weight, Brain_weight), data = d.brain2)
0.308242505587905
In [9]:
d.brain3 <- d.brain %>>% filter(Body_weight < 2000)
d.brain3
SpeciesBody_weightBrain_weight
1Mountain_Beaver1.35 8.1
2Cow465423
3Grey_Wolf36.33 119.5
4Goat 27.66115
5Guinea_Pig1.04 5.5
6Donkey187.1 419
7Horse521 655
8Potar_Monkey10 115
9Cat 3.3 25.6
10Giraffe529 680
11Gorilla207 406
12Human62 1320
13Rhesus_Monkey6.8 179
14Kangaroo35 56
15Hamster0.12 1
16Mouse0.0230.4
17Rabbit2.5 12.1
18Sheep55.5 175
19Jaguar100 157
20Chimpanzee52.16 440
21Rat 0.281.9
22Mole 0.1223
23Pig192180
In [10]:
d.brain3 %>>% 
    ggplot(aes(x = Body_weight, y = Brain_weight)) + 
        geom_point()

In [11]:
with(cor(Body_weight, Brain_weight), data = d.brain3)
0.542350796141974
In [12]:
d.brain4 <- d.brain %>>% filter(Body_weight < 2000 & Brain_weight < 1000)
d.brain4
SpeciesBody_weightBrain_weight
1Mountain_Beaver1.35 8.1
2Cow465423
3Grey_Wolf36.33 119.5
4Goat 27.66115
5Guinea_Pig1.04 5.5
6Donkey187.1 419
7Horse521 655
8Potar_Monkey10 115
9Cat 3.3 25.6
10Giraffe529 680
11Gorilla207 406
12Rhesus_Monkey6.8 179
13Kangaroo35 56
14Hamster0.12 1
15Mouse0.0230.4
16Rabbit2.5 12.1
17Sheep55.5 175
18Jaguar100 157
19Chimpanzee52.16 440
20Rat 0.281.9
21Mole 0.1223
22Pig192180
In [13]:
d.brain4 %>>% 
    ggplot(aes(x = Body_weight, y = Brain_weight)) + 
        geom_point()

In [14]:
with(cor(Body_weight, Brain_weight), data = d.brain4)
0.882233613797128
In [15]:
devtools::session_info()
Session info -------------------------------------------------------------------
Packages -----------------------------------------------------------------------
Session info -------------------------------------------------------------------
Packages -----------------------------------------------------------------------
 setting  value                       
 version  R version 3.3.0 (2016-05-03)
 system   x86_64, mingw32             
 ui       RTerm                       
 language (EN)                        
 collate  Japanese_Japan.932          
 tz       Asia/Tokyo                  
 date     2016-07-07                  

 package    * version    date       source                             
 assertthat   0.1        2013-12-06 CRAN (R 3.3.0)                     
 base64enc    0.1-3      2015-07-28 CRAN (R 3.3.0)                     
 Cairo      * 1.5-9      2015-09-26 CRAN (R 3.3.0)                     
 colorspace   1.2-6      2015-03-11 CRAN (R 3.3.0)                     
 DBI          0.4-1      2016-05-08 CRAN (R 3.3.0)                     
 devtools     1.11.1     2016-04-21 CRAN (R 3.3.0)                     
 digest       0.6.9      2016-01-08 CRAN (R 3.3.0)                     
 dplyr      * 0.5.0      2016-06-24 CRAN (R 3.3.1)                     
 evaluate     0.9        2016-04-29 CRAN (R 3.3.0)                     
 ggplot2    * 2.1.0      2016-03-01 CRAN (R 3.3.0)                     
 gridExtra  * 2.2.1      2016-02-29 CRAN (R 3.3.1)                     
 gtable       0.2.0      2016-02-26 CRAN (R 3.3.0)                     
 IRdisplay    0.3.0.9000 2016-06-20 Github (IRkernel/IRdisplay@69fd5d8)
 IRkernel     0.6        2016-06-20 Github (IRkernel/IRkernel@735d4ff) 
 jsonlite     0.9.22     2016-06-15 CRAN (R 3.3.0)                     
 labeling     0.3        2014-08-23 CRAN (R 3.3.0)                     
 lazyeval     0.2.0      2016-06-12 CRAN (R 3.3.0)                     
 magrittr     1.5        2014-11-22 CRAN (R 3.3.0)                     
 memoise      1.0.0      2016-01-29 CRAN (R 3.3.0)                     
 munsell      0.4.3      2016-02-13 CRAN (R 3.3.0)                     
 pbdZMQ       0.2-3      2016-05-20 CRAN (R 3.3.0)                     
 pipeR      * 0.6.1.3    2016-04-04 CRAN (R 3.3.0)                     
 plyr         1.8.4      2016-06-08 CRAN (R 3.3.0)                     
 R6           2.1.2      2016-01-26 CRAN (R 3.3.0)                     
 Rcpp         0.12.5     2016-05-14 CRAN (R 3.3.0)                     
 readr      * 0.2.2      2015-10-22 CRAN (R 3.3.0)                     
 readxl     * 0.1.1      2016-03-28 CRAN (R 3.3.0)                     
 repr         0.7        2016-05-13 CRAN (R 3.3.0)                     
 scales       0.4.0      2016-02-26 CRAN (R 3.3.0)                     
 stringi      1.1.1      2016-05-27 CRAN (R 3.3.0)                     
 stringr      1.0.0      2015-04-30 CRAN (R 3.3.0)                     
 tibble       1.0        2016-03-23 CRAN (R 3.3.0)                     
 tidyr      * 0.5.1      2016-06-14 CRAN (R 3.3.0)                     
 uuid         0.1-2      2015-07-28 CRAN (R 3.3.0)                     
 withr        1.0.1      2016-02-04 CRAN (R 3.3.0)