islr/islr.html at main · jeffreyCarlLong/islr · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>

<meta charset="utf-8">
<meta name="generator" content="quarto-1.0.36">

<meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">

<meta name="author" content="Jeffrey Long">

<title>Introduction to Statistcal Learing</title>
<style>
code{white-space: pre-wrap;}
span.smallcaps{font-variant: small-caps;}
span.underline{text-decoration: underline;}
div.column{display: inline-block; vertical-align: top; width: 50%;}
div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;}
ul.task-list{list-style: none;}
pre > code.sourceCode { white-space: pre; position: relative; }
pre > code.sourceCode > span { display: inline-block; line-height: 1.25; }
pre > code.sourceCode > span:empty { height: 1.2em; }
.sourceCode { overflow: visible; }
code.sourceCode > span { color: inherit; text-decoration: inherit; }
div.sourceCode { margin: 1em 0; }
pre.sourceCode { margin: 0; }
@media screen {
div.sourceCode { overflow: auto; }
}
@media print {
pre > code.sourceCode { white-space: pre-wrap; }
pre > code.sourceCode > span { text-indent: -5em; padding-left: 5em; }
}
pre.numberSource code
  { counter-reset: source-line 0; }
pre.numberSource code > span
  { position: relative; left: -4em; counter-increment: source-line; }
pre.numberSource code > span > a:first-child::before
  { content: counter(source-line);
    position: relative; left: -1em; text-align: right; vertical-align: baseline;
    border: none; display: inline-block;
    -webkit-touch-callout: none; -webkit-user-select: none;
    -khtml-user-select: none; -moz-user-select: none;
    -ms-user-select: none; user-select: none;
    padding: 0 4px; width: 4em;
    color: #aaaaaa;
  }
pre.numberSource { margin-left: 3em; border-left: 1px solid #aaaaaa;  padding-left: 4px; }
div.sourceCode
  {   }
@media screen {
pre > code.sourceCode > span > a:first-child::before { text-decoration: underline; }
}
code span.al { color: #ff0000; font-weight: bold; } /* Alert */
code span.an { color: #60a0b0; font-weight: bold; font-style: italic; } /* Annotation */
code span.at { color: #7d9029; } /* Attribute */
code span.bn { color: #40a070; } /* BaseN */
code span.bu { } /* BuiltIn */
code span.cf { color: #007020; font-weight: bold; } /* ControlFlow */
code span.ch { color: #4070a0; } /* Char */
code span.cn { color: #880000; } /* Constant */
code span.co { color: #60a0b0; font-style: italic; } /* Comment */
code span.cv { color: #60a0b0; font-weight: bold; font-style: italic; } /* CommentVar */
code span.do { color: #ba2121; font-style: italic; } /* Documentation */
code span.dt { color: #902000; } /* DataType */
code span.dv { color: #40a070; } /* DecVal */
code span.er { color: #ff0000; font-weight: bold; } /* Error */
code span.ex { } /* Extension */
code span.fl { color: #40a070; } /* Float */
code span.fu { color: #06287e; } /* Function */
code span.im { } /* Import */
code span.in { color: #60a0b0; font-weight: bold; font-style: italic; } /* Information */
code span.kw { color: #007020; font-weight: bold; } /* Keyword */
code span.op { color: #666666; } /* Operator */
code span.ot { color: #007020; } /* Other */
code span.pp { color: #bc7a00; } /* Preprocessor */
code span.sc { color: #4070a0; } /* SpecialChar */
code span.ss { color: #bb6688; } /* SpecialString */
code span.st { color: #4070a0; } /* String */
code span.va { color: #19177c; } /* Variable */
code span.vs { color: #4070a0; } /* VerbatimString */
code span.wa { color: #60a0b0; font-weight: bold; font-style: italic; } /* Warning */
</style>


<script src="islr_files/libs/clipboard/clipboard.min.js"></script>
<script src="islr_files/libs/quarto-html/quarto.js"></script>
<script src="islr_files/libs/quarto-html/popper.min.js"></script>
<script src="islr_files/libs/quarto-html/tippy.umd.min.js"></script>
<script src="islr_files/libs/quarto-html/anchor.min.js"></script>
<link href="islr_files/libs/quarto-html/tippy.css" rel="stylesheet">
<link href="islr_files/libs/quarto-html/quarto-syntax-highlighting.css" rel="stylesheet" id="quarto-text-highlighting-styles">
<script src="islr_files/libs/bootstrap/bootstrap.min.js"></script>
<link href="islr_files/libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
<link href="islr_files/libs/bootstrap/bootstrap.min.css" rel="stylesheet" id="quarto-bootstrap" data-mode="light">

  <script src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-chtml-full.js" type="text/javascript"></script>

</head>

<body class="fullcontent">

<div id="quarto-content" class="page-columns page-rows-contents page-layout-article">

<main class="content" id="quarto-document-content">

<header id="title-block-header" class="quarto-title-block default">
<div class="quarto-title">
<h1 class="title">Introduction to Statistcal Learing</h1>
<p class="subtitle lead">with Applications in R (ISLR) abstracted to Biomarker Discovery and Prediction</p>
</div>


<div class="quarto-title-meta">

    <div>
    <div class="quarto-title-meta-heading">Author</div>
    <div class="quarto-title-meta-contents">
             <p>Jeffrey Long </p>
          </div>
  </div>


  </div>


</header>

<section id="a-quarto-experiment" class="level2">
<h2 class="anchored" data-anchor-id="a-quarto-experiment">A Quarto Experiment</h2>
<p>Quarto enables you to weave together content and executable code into a finished document.</p>
<p>ISLR second edition is available for the author in <a href="https://hastie.su.domains/ISLR2/ISLRv2_website.pdf">a free pdf format</a>.</p>
<p>This is my biomarker experiment with Quarto and ISLR.</p>
</section>
<section id="history" class="level2">
<h2 class="anchored" data-anchor-id="history">History</h2>
<p>The Elements of Statistical Learning (ESL, by Hastie, Tibshirani, and Friedman) — was published in 2001. The authors of ISLR2 (the resource of this experiment) are Gareth James, Daniela Witten, Trevor Hastie and Robert Tibshirani.</p>
</section>
<section id="contents" class="level2">
<h2 class="anchored" data-anchor-id="contents">Contents</h2>
<ol type="1">
<li>Introduction</li>
<li>Statistical Learning</li>
<li>Linear Regression</li>
<li>Classification</li>
<li>Resampling Methods</li>
<li>Linear Model Selection and Regularization</li>
<li>Moving Beyond Linearity</li>
<li>Tree-Based Methods</li>
<li>Support Vector Machines</li>
<li>Deep Learning</li>
<li>Survival Analysis and Censored Data</li>
<li>Unsupervised Learning</li>
<li>Multiple Testing</li>
</ol>
</section>
<section id="introduction" class="level2">
<h2 class="anchored" data-anchor-id="introduction">Introduction</h2>
<p>Statistical learning refers to a vast set of tools for understanding data. These tools can be classified as supervised or unsupervised. Broadly speaking, supervised statistical learning involves building a statistical model for pre- dicting, or estimating, an output based on one or more inputs. Problems of this nature occur in fields as diverse as business, medicine, astrophysics, and public policy. With unsupervised statistical learning, there are inputs but no supervising output; nevertheless we can learn relationships and struc- ture from such data.</p>
<p><strong>The <code>Wage</code> data</strong> involves predicting a continuous or quantitative output value. This is often referred to as a regression problem. However, in certain cases we may instead wish to predict a non-numerical value—that is, a categorical or qualitative output.</p>
<p>An example of predicting non-numerical values, such as up/down is explored with <strong>the <code>Smarket</code> data</strong> are the Standard &amp; Poor’s 500 (S&amp;P) stock index over a 5-year period between 2001 and 2005.</p>
<p>Imagine the first steps of a search for biomarkers in gene expression data. We might have demographic information for a number of patients with an indication. We may wish to understand which types of patients are similar to each other by grouping individuals according to observed characteristics. This is a <em>clustering</em> problem. Unlike <code>Wage</code> and <code>Smarket</code> data, here we are not trying to predict an output variable. We will use <strong>the NCI60 data set</strong> to determine groups or clusters based on 6,830 gene expression measurements for 64 cell lines.</p>
<p>In this particular data set, it turns out that the cell lines correspond to 14 different types of cancer. There is clear evidence that cell lines with the same cancer type tend to be located near each other in clustering methods.</p>
<p>We will use <em>n</em> to represent the number of distinct data points, or observations, in our sample. We will let <em>p</em> denote the number of variables that are available for use in making predictions. For example, the <code>Wage</code> data set consists of 11 variables for 3,000 people, so we have <em>n</em> = 3,000 observations and <em>p</em> = 11 variables (such as year, age, race, and more). We indicate variable names using the font: <code>Variable Name</code>.</p>
<p>We will let <span class="math inline">\(x_{ij}\)</span> represent the value of the <em>j</em>th variable for the <em>i</em>th observation, where <em>i</em> = 1,2,…,<em>n</em> and <em>j</em> = 1,2,…,<em>p</em>. Throughout this experiment, <em>i</em> will be used to index the samples or observations (from 1 to <em>n</em>) and j will be used to index the variables (from 1 to <em>p</em>). We let <span class="math inline">\(\chi\)</span> denote an <span class="math inline">\(n × p\)</span> matrix whose (<em>i</em>, <em>j</em>)th element is <span class="math inline">\(x_{ij}\)</span> .</p>
<p>The product of <strong>A</strong> and <strong>B</strong> is denoted <strong>AB</strong>. The (<em>i</em>,<em>j</em>)th element of <strong>AB</strong> is computed by multiplying each element of the <em>i</em>th row of <strong>A</strong> by the corresponding element of the <em>j</em>th column of <strong>B</strong>. That is, <span class="math inline">\((AB)_{ij} = \Sigma^d_{k=1} a_{ik}b_{kj}\)</span>. As an example,</p>
<p>consider <span class="math inline">\(A = \begin{pmatrix}1 &amp; 2\\3 &amp; 4\end{pmatrix}\)</span> and <span class="math inline">\(B = \begin{pmatrix}5 &amp; 6\\7 &amp; 8\end{pmatrix}\)</span> .</p>
<p><span class="math inline">\(AB = \begin{pmatrix}1 &amp; 2\\3 &amp; 4\end{pmatrix}\begin{pmatrix}5 &amp; 6\\7 &amp; 8\end{pmatrix} = \begin{pmatrix}1*5+2*7 &amp; 1*6+2*8\\3*5+4*7 &amp; 3*6+4*8\end{pmatrix} = \begin{pmatrix}19 &amp; 22\\43 &amp; 50\end{pmatrix}\)</span></p>
<p>Note that this operation produces an <span class="math inline">\(r × s\)</span> matrix. It is only possible to compute <strong>AB</strong> if the number of columns of <strong>A</strong> is the same as the number of rows of <strong>B</strong>.</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb1"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a><span class="co"># install.packages("ISLR2")</span></span>
<span id="cb1-2"><a href="#cb1-2" aria-hidden="true" tabindex="-1"></a><span class="fu">library</span>(ISLR2)</span>
<span id="cb1-3"><a href="#cb1-3" aria-hidden="true" tabindex="-1"></a><span class="fu">packageDescription</span>(<span class="st">"ISLR2"</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre><code>Package: ISLR2
Version: 1.3-1
Date: 2022-01-10
Title: Introduction to Statistical Learning, Second Edition
Authors@R: c(person("Gareth", "James", role="aut"), person("Daniela",
        "Witten", role="aut"), person("Trevor", "Hastie", role=c("aut",
        "cre"), email = "hastie@stanford.edu"), person("Rob",
        "Tibshirani", role="aut"), person("Balasubramanian",
        "Narasimhan", role="ctb"))
Suggests: MASS
Description: We provide the collection of data-sets used in the book
        'An Introduction to Statistical Learning with Applications in
        R, Second Edition'. These include many data-sets that we used
        in the first edition (some with minor changes), and some new
        datasets.
Depends: R (&gt;= 3.5.0)
License: GPL-2
LazyLoad: yes
LazyData: yes
URL: https://www.statlearning.com
NeedsCompilation: no
Packaged: 2022-01-10 16:06:23 UTC; hastie
Author: Gareth James [aut], Daniela Witten [aut], Trevor Hastie [aut,
        cre], Rob Tibshirani [aut], Balasubramanian Narasimhan [ctb]
Maintainer: Trevor Hastie &lt;hastie@stanford.edu&gt;
Repository: CRAN
Date/Publication: 2022-01-10 16:22:42 UTC
Built: R 4.1.1; ; 2022-01-10 16:34:33 UTC; unix

-- File: /Library/Frameworks/R.framework/Versions/4.1-arm64/Resources/library/ISLR2/Meta/package.rds </code></pre>
</div>
</div>
<p>The website for ISLR2 is <a href="https://www.statlearning.com/">www.statlearning.com</a>.</p>
</section>
<section id="statistical-learning" class="level2">
<h2 class="anchored" data-anchor-id="statistical-learning">Statistical Learning</h2>
<p>The input variables are typically denoted using the symbol X, with a subscript to distinguish them. The inputs go by different names, such as predictors, independent variables, features, or sometimes just variables, and is typically denoted using the symbol Y.</p>
<p>Suppose that we observe a quantitative response Y and p different predictors, X1, X2, … , Xp. We assume that there is some relationship between Y and X = (X1,X2,…,Xp), which can be written in the very general form</p>
<p><span class="math inline">\(Y =f(X)+\epsilon\)</span>.</p>
<p>Here f is some fixed but unknown function of <span class="math inline">\(X_1, \hdots, X_p\)</span> , and <span class="math inline">\(\epsilon\)</span> is a random error term, which is independent of <span class="math inline">\(X\)</span> and has mean zero. In this formulation, <span class="math inline">\(f\)</span> represents the systematic information that <span class="math inline">\(X\)</span> provides about <span class="math inline">\(Y\)</span> . However, the function <span class="math inline">\(f\)</span> that connects the input variable to the output variable is in general unknown. In this situation one must estimate <span class="math inline">\(f\)</span> based on the observed points. Overall, the errors have approximately mean zero. The function <span class="math inline">\(f\)</span> may involve more than one input variable.</p>
<section id="why-estimate-f" class="level3">
<h3 class="anchored" data-anchor-id="why-estimate-f">Why Estimate f?</h3>
<p>There are two main reasons that we may wish to estimate f: prediction and inference. We discuss each in turn.</p>
<section id="prediction" class="level4">
<h4 class="anchored" data-anchor-id="prediction">Prediction</h4>
<p>In many situations, a set of inputs <span class="math inline">\(X\)</span> are readily available, but the output <span class="math inline">\(Y\)</span> cannot be easily obtained. In this setting, since the error term averages to zero, we can predict <span class="math inline">\(Y\)</span> using</p>
<p><span class="math inline">\(\hat{Y} = f(X)\)</span> ,</p>
<p>where <span class="math inline">\(\hat{f}\)</span> represents our estimate for <span class="math inline">\(f\)</span> , and <span class="math inline">\(\hat{Y}\)</span> represents the resulting prediction for <span class="math inline">\(Y\)</span> . In this setting, <span class="math inline">\(\hat{f}\)</span> is often treated as a <em>black box</em>, in the sense that one is not typically concerned with the exact form of <span class="math inline">\(\hat{f}\)</span> , provided that it yields accurate predictions for <span class="math inline">\(Y\)</span> .</p>
<p>The accuracy of <span class="math inline">\(\hat{Y}\)</span> as a prediction for <span class="math inline">\(Y\)</span> depends on two quantities, which we will call the <em>reducible error</em> and the <em>irreducible error</em>. In general, <span class="math inline">\(\hat{f}\)</span> will not be a perfect estimate for $f$, and this inaccuracy will introduce some error. This error is reducible because we can potentially improve the accuracy of <span class="math inline">\(\hat{f}\)</span> by using the most appropriate statistical learning technique to estimate <span class="math inline">\(f\)</span> . However, even if it were possible to form a perfect estimate for <span class="math inline">\(f\)</span> , so that our estimated response took the form <span class="math inline">\(\hat{Y} = f(X)\)</span> , our prediction would still have some error in it! This is because <span class="math inline">\(Y\)</span> is also a function of <span class="math inline">\(\epsilon\)</span> , which, by definition, cannot be predicted using <span class="math inline">\(X\)</span> . Therefore, variability associated with <span class="math inline">\(\epsilon\)</span> also affects the accuracy of our predictions. This is known as the irreducible error, because no matter how well we estimate <span class="math inline">\(f\)</span> , we cannot reduce the error introduced by <span class="math inline">\(\epsilon\)</span> .</p>
</section>
<section id="inference" class="level4">
<h4 class="anchored" data-anchor-id="inference">Inference</h4>
<p>We are often interested in understanding the association between <span class="math inline">\(Y\)</span> and <span class="math inline">\(X_1,...,X_p\)</span> . In this situation we wish to estimate <span class="math inline">\(f\)</span> , but our goal is not necessarily to make predictions for <span class="math inline">\(Y\)</span> . Now <span class="math inline">\(\hat{f}\)</span> cannot be treated as a black box, because we need to know its exact form. In this setting, one may be interested in answering the following questions:</p>
<p>• Which predictors are associated with the response? It is often the case that only a small fraction of the available predictors are substantially associated with <span class="math inline">\(Y\)</span> . Identifying the few important predictors among a large set of possible variables can be extremely useful, depending on the application.</p>
<p>• What is the relationship between the response and each predictor? Some predictors may have a positive relationship with <span class="math inline">\(Y\)</span> , in the sense that larger values of the predictor are associated with larger values of <span class="math inline">\(Y\)</span> . Other predictors may have the opposite relationship. Depending on the complexity of <span class="math inline">\(f\)</span> , the relationship between the response and a given predictor may also depend on the values of the other predictors.</p>
<p>• Can the relationship between <span class="math inline">\(Y\)</span> and each predictor be adequately summarized using a linear equation, or is the relationship more complicated? Historically, most methods for estimating <span class="math inline">\(f\)</span> have taken a linear form. In some situations, such an assumption is reasonable or even desirable. But often the true relationship is more complicated, in which case a linear model may not provide an accurate representation of the relationship between the input and output variables.</p>
</section>
</section>
<section id="how-do-we-estimate-f" class="level3">
<h3 class="anchored" data-anchor-id="how-do-we-estimate-f">How Do We Estimate <span class="math inline">\(f\)</span> ?</h3>
<p>Our goal is to apply a statistical learning method to the training data in order to estimate the unknown function <span class="math inline">\(f\)</span> .</p>
<ul>
<li><p>training data - observations and responses</p></li>
<li><p>parametric methods - model-based approach that makes an assumption about <span class="math inline">\(f\)</span> i.e.&nbsp;<em>linear model</em>, then fits or trains the model i.e.&nbsp;<em>least squares</em>. Beware of overfitting.</p></li>
<li><p>non-parametric - methods do not make explicit assumptions about the functional form of <span class="math inline">\(f\)</span> . Instead they seek an estimate of f that gets as close to the data points as possible without being too rough or wiggly. Such approaches can have a major advantage over parametric approaches: by avoiding the assumption of a particular functional form for <span class="math inline">\(f\)</span> , they have the potential to accurately fit a wider range of possible shapes for <span class="math inline">\(f\)</span> . Any parametric approach brings with it the possibility that the functional form used to estimate <span class="math inline">\(f\)</span> is very different from the true <span class="math inline">\(f\)</span> , in which case the resulting model will not fit the data well. In contrast, non-parametric approaches completely avoid this danger, since essentially no assumption about the form of <span class="math inline">\(f\)</span> is made. But non-parametric approaches do suffer from a major disadvantage: since they do not reduce the problem of estimating <span class="math inline">\(f\)</span> to a small number of parameters, a very large number of observations (far more than is typically needed for a parametric approach) is required in order to obtain an accurate estimate for <span class="math inline">\(f\)</span> .</p></li>
</ul>
</section>
<section id="the-trade-off-between-prediction-accuracy-and-model-interpretability" class="level3">
<h3 class="anchored" data-anchor-id="the-trade-off-between-prediction-accuracy-and-model-interpretability">The Trade-Off Between Prediction Accuracy and Model Interpretability</h3>
<p>Some methods such as linear regression are less flexible, or more restrictive, in the sense that they can produce just a relatively small range of shapes to estimate <span class="math inline">\(f\)</span> , such as linear function. Other methods such as thin plate splines are considerably more flexible because they can generate a much wider range of possible shapes to estimate <span class="math inline">\(f\)</span> .</p>
<div class="quarto-figure quarto-figure-center">
<figure class="figure">
<p><img src="interpretability_v_flexibility.png" class="img-fluid figure-img"></p>
<p></p><figcaption class="figure-caption">A representation of the tradeoff between flexibility and interpretability, using different statistical learning methods. In general, as the flexibility of a method increases, its interpretability decreases.</figcaption><p></p>
</figure>
</div>
<p>However, if we are mainly interested in inference, then restrictive models are much more interpretable. For instance, when inference is the goal, the linear model may be a good choice since it will be quite easy to understand the relationship between <span class="math inline">\(Y\)</span> and <span class="math inline">\(X_1,X_2,…,X_p\)</span> .</p>
<p>Least squares linear regression, is relatively inflexible but is quite interpretable. The lasso, relies upon the linear model but uses an alternative fitting procedure for estimating the coefficients <span class="math inline">\(\beta_0, \beta_1, . . . , \beta_p\)</span> . The <em>lasso</em> is more restrictive in es- timating the coefficients, and sets a number of them to exactly zero. Hence in this sense the lasso is a less flexible approach than linear regression. It is also more interpretable than linear regression, because in the final model the response variable will only be related to a small subset of the predictors—namely, those with nonzero coefficient estimates. Generalized additive models (GAMs) instead extend the linear model to allow for certain non-linear relationships. Consequently, GAMs are more flexible than linear regression. They are also somewhat less interpretable than linear regression, because the relationship between each predictor and the response is now modeled using a curve. Finally, fully non-linear methods such as bagging, boosting, support vector machines with non-linear kernels, and neural networks (deep learning), are highly flexible approaches that are harder to interpret.</p>
<p>When prediction is the goal, one may suspect the most flexible method is the best choice as it would give the most accurate prediction. However, overfitting often leads us to less flexible methods with improved performance.</p>
</section>
<section id="supervised-versus-unsupervised-learning" class="level3">
<h3 class="anchored" data-anchor-id="supervised-versus-unsupervised-learning">Supervised Versus Unsupervised Learning</h3>
<p>In supervised learning problems, for each observation of the predictor measurement(s) <span class="math inline">\(x_i, i = 1, . . . , n\)</span> there is an associated response measurement <span class="math inline">\(y_i\)</span> . We wish to fit a model that relates the response to the predictors, with the aim of accurately predicting the response for future observations (prediction) or better understanding the relationship between the response and the predictors (inference). Many classical statistical learning methods such as linear regression and logistic regression,as well as more modern approaches such as GAM, boosting, and support vector machines, operate in the supervised learning domain.</p>
<p>Unsupervised learning describes the somewhat more challenging situation in which for every observation <span class="math inline">\(i = 1,…,n\)</span> , we observe a vector of measurements <span class="math inline">\(x_i\)</span> but no associated response <span class="math inline">\(y_i\)</span> . It is not possible to fit a linear regression model, since there is no response variable to predict. In this setting, we are in some sense working blind; the situation is referred to as unsupervised because we lack a response variable that can supervise our analysis.</p>
<p>One statistical learning tool that we may use in this setting is cluster analysis, or clustering. The goal of cluster analysis is to ascertain, on the basis of <span class="math inline">\(x_1,…,x_n\)</span> , whether the observations fall into relatively distinct groups. Identifying such groups can be of interest because it might be that the groups differ with respect to some property of interest.</p>
</section>
<section id="regression-versus-classification-problems" class="level3">
<h3 class="anchored" data-anchor-id="regression-versus-classification-problems">Regression Versus Classification Problems</h3>
<p>Variables can be characterized as either quantitative or qualitative (also known as categorical). Quantitative variables take on numerical values. In contrast, qualitative variables take on values in one of K different classes, or categories. We tend to refer to problems with a quantitative response as regression problems, while those involving a qualitative response are often referred to as classification problems. Least squares linear regression is used with a quantitative response, whereas logistic regression is typically used with a qualitative (two-class, or binary) response. K-nearest neighbors and boosting can be used in the case of either quantitative or qualitative responses.</p>
<p>We tend to select statistical learning methods on the basis of whether the response is quantitative or qualitative. However, whether the predictors are qualitative or quantitative is generally considered less important.</p>
</section>
</section>
<section id="assessing-model-accuracy" class="level2">
<h2 class="anchored" data-anchor-id="assessing-model-accuracy">Assessing Model Accuracy</h2>
<p>No one method dominates all others over all possible data sets. Hence it is an important task to decide for any given set of data which method produces the best results. Selecting the best approach can be one of the most challenging parts of performing statistical learning in practice.</p>
<section id="measuring-quality-of-fit" class="level3">
<h3 class="anchored" data-anchor-id="measuring-quality-of-fit">Measuring Quality of Fit</h3>
<p>In order to evaluate the performance of a statistical learning method on a given data set, we need some way to measure how well its predictions actually match the observed data. That is, we need to quantify the extent to which the predicted response value for a given observation is close to the true response value for that observation. In the regression setting, the most commonly-used measure is the mean squared error (MSE), given by</p>
<p><span class="math inline">\(MSE = \frac{1}{n}\Sigma^n_{i=1}(y_i-\hat{f}(x_i))^2\)</span> ,</p>
<p>where <span class="math inline">\(\hat{f}(x_i)\)</span> is the prediction that <span class="math inline">\(\hat{f}\)</span> gives for the <span class="math inline">\(i\)</span>th observation.</p>
<p>The MSE will be small if the predicted responses are very close to the true responses, and will be large if for some of the observations, the predicted and true responses differ substantially.</p>
<p>The MSE in is computed using the training data that was used tofit the model, and so should more accurately be referred to as the training MSE. But in general, we do not really care how well the method works training on the training data. Rather, we are interested in the accuracy of the predictions that we obtain when we apply our method to previously unseen test data. Suppose that we have clinical measurements (e.g.&nbsp;weight, blood pressure, height, age, family history of disease) for a number of patients, as well as information about whether each patient has diabetes. We can use these patients to train a statistical learning method to predict risk of diabetes based on clinical measurements. In practice, we want this method to accurately predict diabetes risk for future patients based on their clinical measurements. We are not very interested in whether or not the method accurately predicts diabetes risk for patients used to train the model, since we already know which of those patients have diabetes.</p>
<p>We want to choose the method that gives the lowest test MSE, as opposed to the lowest training MSE. If we had a large number of test observations, we could compute</p>
<p><span class="math inline">\(Ave(y_0 - \hat{f}(x_0))^2\)</span> ,</p>
<p>the average squared prediction error for these test observations <span class="math inline">\((x_0,y_0)\)</span> . We’d like to select the model for which this quantity is as small as possible.</p>
<p>The degrees of freedom is a quantity that summarizes the flexibility of a curve. When a given method yields a small training MSE but a large test MSE, we are said to be overfitting the data. Regardless of whether or not overfitting has occurred, we almost always expect the training MSE to be smaller than the test MSE because most statistical learning methods either directly or indirectly seek to minimize the training MSE. Overfitting refers specifically to the case in which a less flexible model would have yielded a smaller test MSE.</p>
<p>Plotting of MSE versus degrees of freedom as model flexibility can guide model selection. Cross-validation can estimate the minimum test MSE using the training data.</p>
</section>
<section id="the-bias-variance-trade-off" class="level3">
<h3 class="anchored" data-anchor-id="the-bias-variance-trade-off">The Bias-Variance Trade-Off</h3>
<p>It is possible to show that the <strong>expected test MSE</strong>, for a given value <span class="math inline">\(x_0\)</span> , can always be decomposed into the sum of three fundamental quantities: the variance of $f(x_0)$, the squared bias of <span class="math inline">\(f(x_0)\)</span> and the variance of the error terms <span class="math inline">\(\epsilon\)</span> .</p>
<p><span class="math inline">\(E(y_0-\hat{f}(x_0))2 = Var(\hat{f}(x_0)) + [Bias(\hat{f}(x_0))^2 + Var(\epsilon)\)</span> .</p>
<p>we need to select a statistical learning method that simultaneously achieves low variance and low bias. Note that variance is inherently a nonnegative quantity, and squared bias is also nonnegative. Hence, we see that the expected test MSE can never lie below <span class="math inline">\(Var(\epsilon)\)</span> , the irreducible error.</p>
<p>Variance refers to the amount by which <span class="math inline">\(\hat{f}\)</span> would change if we estimated it using a different training data set. In general, more flexible statistical methods have higher variance. Bias refers to the error that is introduced by approximating a real-life problem, which may be extremely complicated, by a much simpler model. Generally, more flexible methods result in less bias.</p>
<p>As a general rule, as we use more flexible methods, the variance will increase and the bias will decrease. The relative rate of change of these two quantities determines whether the test MSE increases or decreases. As we increase the flexibility of a class of methods, the bias tends to initially decrease faster than the variance increases. Consequently, the expected test MSE declines. However, at some point increasing flexibility has little impact on the bias but starts to significantly increase the variance. When this happens the test MSE increases.</p>
<div class="quarto-figure quarto-figure-center">
<figure class="figure">
<p><img src="var_bias_error.png" class="img-fluid figure-img"></p>
<p></p><figcaption class="figure-caption">Squared bias (blue curve), variance (orange curve), Var(ε) (dashed line), and test MSE (red curve) for the three data sets. The vertical dotted line indicates the flexibility level corresponding to the smallest test MSE.</figcaption><p></p>
</figure>
</div>
<p>In a real-life situation in which <span class="math inline">\(f\)</span> is unobserved, it is generally not possible to explicitly compute the test MSE, bias, or variance for a statistical learning method. Nevertheless, one should always keep the <strong>bias-variance trade-off</strong> in mind.</p>
</section>
<section id="the-classification-setting" class="level3">
<h3 class="anchored" data-anchor-id="the-classification-setting">The Classification Setting</h3>
<p>When response variable <span class="math inline">\(y\)</span> is qualitative, the most common approach for quantifying the accuracy of our estimate <span class="math inline">\(\hat{f}\)</span> is the training error rate, the proportion of mistakes that are made if we apply our estimate <span class="math inline">\(\hat{f}\)</span> to the training observations:</p>
<p><span class="math inline">\(\frac{1}{n}\Sigma^n_{i=1}I(y_i!=\hat{y}_i)\)</span> .</p>
<p>Here <span class="math inline">\(\hat{y}_i\)</span> is the predicted class label for the <span class="math inline">\(i\)</span> th observation using <span class="math inline">\(f\)</span> . And <span class="math inline">\(I(y_i != \hat{y}_i)\)</span> is an <em>indicator variable</em> that equals 1 if <span class="math inline">\(y_i ̸!= \hat{y}_i\)</span> and zero if <span class="math inline">\(y_i = \hat{y}_i\)</span> . If <span class="math inline">\(I(y_i ̸!= \hat{y}_i) = 0\)</span> then the <span class="math inline">\(i\)</span> th observation was classified correctly by our classification method; otherwise it was misclassified. Hence the training error computes the fraction of incorrect classifications.</p>
<p>The test error rate associated with a set of test observations of the form <span class="math inline">\((x_0, y_0)\)</span> is given by</p>
<p><span class="math inline">\(Ave(I(y_0 ̸!=ˆ\hat{y}_0))\)</span> ,</p>
<p>where <span class="math inline">\(\hat{y}_0\)</span> is the predicted class label that results from applying the classifier to the test observation with predictor <span class="math inline">\(x_0\)</span>. A good classifier is one for which the test error is smallest.</p>
</section>
<section id="the-bayes-classifier" class="level3">
<h3 class="anchored" data-anchor-id="the-bayes-classifier">The Bayes Classifier</h3>
<p>The test error rate is minimized, on average, by a very simple classifier that assigns each observation to the most likely class, given its predictor values. In other words, we should simply assign a test observation with predictor vector <span class="math inline">\(x_0\)</span> to the class <span class="math inline">\(j\)</span> for which</p>
<p><span class="math inline">\(Pr(Y = j|X = x_0)\)</span></p>
<p>is largest. This conditional probability is the probability that <span class="math inline">\(Y = j\)</span> , given the observed predictor vector <span class="math inline">\(x_0\)</span> . This very simple classifier is called the <em>Bayes classifier</em>. In a two-class problem where there are only two possible response values, say class 1 or class 2, the Bayes classifier corresponds to predicting class one if <span class="math inline">\(Pr(Y = 1|X = x_0) &gt; 0.5\)</span> , and class two otherwise.</p>
<div class="quarto-figure quarto-figure-center">
<figure class="figure">
<p><img src="bayes_classifier.png" class="img-fluid figure-img"></p>
<p></p><figcaption class="figure-caption">A simulated data set consisting of 100 observations in each of two groups, indicated in blue and in orange. The purple dashed line represents the Bayes decision boundary. The orange background grid indicates the region in which a test observation will be assigned to the orange class, and the blue background grid indicates the region in which a test observation will be assigned to the blue class.</figcaption><p></p>
</figure>
</div>
<p>The Bayes classifier produces the lowest possible test error rate, called the Bayes error rate. Since the Bayes classifier will always choose the class for which is largest, the error rate will be <span class="math inline">\(1−max_j Pr(Y = j|X = x_0) at X = x_0\)</span> . In general, the overall Bayes error rate is given by</p>
<p><span class="math inline">\(1−E(max_jPr(Y =j|X))\)</span> ,</p>
<p>where the expectation averages the probability over all possible values of <span class="math inline">\(X\)</span> . If the Bayes error rate is greater than zero, the classes overlap in the true population so <span class="math inline">\(max_j Pr(Y = j|X = x_0) &lt; 1\)</span> for some values of <span class="math inline">\(x_0\)</span> . The Bayes error rate is analogous to the irreducible error.</p>
</section>
<section id="k-nearest-neighbors" class="level3">
<h3 class="anchored" data-anchor-id="k-nearest-neighbors">K-Nearest Neighbors</h3>
<p>In theory we would always like to predict qualitative responses using the Bayes classifier. But for real data, we do not know the conditional distribution of <span class="math inline">\(Y\)</span> given <span class="math inline">\(X\)</span> , and so computing the Bayes classifier is impossible. Therefore, the Bayes classifier serves as an unattainable gold standard against which to compare other methods. Many approaches attempt to estimate the conditional distribution of <span class="math inline">\(Y\)</span> given <span class="math inline">\(X\)</span> , and then classify a given observation to the class with highest estimated probability. One such method is the K-nearest neighbors (KNN) classifier. Given a positive integer <span class="math inline">\(K\)</span> and a test observation <span class="math inline">\(x_0\)</span> , the KNN classifier first identifies the <span class="math inline">\(K\)</span> points in the training data that are closest to <span class="math inline">\(x_0\)</span> , represented by <span class="math inline">\(N_0\)</span> . It then estimates the conditional probability for class <span class="math inline">\(j\)</span> as the fraction of points in <span class="math inline">\(N_0\)</span> whose response values equal <span class="math inline">\(j\)</span> :</p>
<p><span class="math inline">\(Pr(Y =j|X =x_0)= \frac{1}{K} \Sigma_{i\element N_0} I(y_i =j)\)</span> .</p>
<p>Finally, KNN classifies the test observation <span class="math inline">\(x_0\)</span> to the class with the largest probability.</p>
<div class="quarto-figure quarto-figure-center">
<figure class="figure">
<p><img src="KnnClassification.svg" class="img-fluid figure-img"></p>
<p></p><figcaption class="figure-caption">Example of k-NN classification. The test sample (green dot) should be classified either to blue squares or to red triangles. If k = 3 (solid line circle) it is assigned to the red triangles because there are 2 triangles and only 1 square inside the inner circle. If k = 5 (dashed line circle) it is assigned to the blue squares (3 squares vs.&nbsp;2 triangles inside the outer circle). -from Wikipedia.</figcaption><p></p>
</figure>
</div>
<div class="quarto-figure quarto-figure-center">
<figure class="figure">
<p><img src="knn_k10.png" class="img-fluid figure-img"></p>
<p></p><figcaption class="figure-caption">The black curve indicates the KNN decision boundary on data using K = 10. The Bayes decision boundary is shown as a purple dashed line. The KNN and Bayes decision boundaries are very similar.</figcaption><p></p>
</figure>
</div>
<div class="quarto-figure quarto-figure-center">
<figure class="figure">
<p><img src="knn_k1_k100.png" class="img-fluid figure-img"></p>
<p></p><figcaption class="figure-caption">A comparison of the KNN decision boundaries (solid black curves) obtained using K = 1 and K = 100 on data. With K = 1, the decision boundary is overly flexible, while with K = 100 it is not sufficiently flexible. The Bayes decision boundary is shown as a purple dashed line.</figcaption><p></p>
</figure>
</div>
<div class="quarto-figure quarto-figure-center">
<figure class="figure">
<p><img src="knn_training_error_rate.png" class="img-fluid figure-img"></p>
<p></p><figcaption class="figure-caption">The KNN training error rate (blue, 200 observations) and test error rate (orange, 5,000 observations) on the data, as the level of flexibility (assessed using 1/K on the log scale) increases, or equivalently as the number of neighbors K decreases. The black dashed line indicates the Bayes error rate. The jumpiness of the curves is due to the small size of the training data set.</figcaption><p></p>
</figure>
</div>
<p>The test error exhibits a characteristic U-shape, declining at first (with a minimum at approximately K = 10) before increasing again when the method becomes excessively flexible and overfits.</p>
</section>
</section>
<section id="lab-in-r" class="level2">
<h2 class="anchored" data-anchor-id="lab-in-r">Lab in R</h2>
<p>Posit (formerly known as RStudio), provides an R integrated development environment (IDE).</p>
<div class="cell">
<div class="sourceCode cell-code" id="cb3"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Basic Commands</span></span>
<span id="cb3-2"><a href="#cb3-2" aria-hidden="true" tabindex="-1"></a>x <span class="ot">&lt;-</span> <span class="fu">c</span>(<span class="dv">1</span>, <span class="dv">3</span>, <span class="dv">2</span>, <span class="dv">5</span>)</span>
<span id="cb3-3"><a href="#cb3-3" aria-hidden="true" tabindex="-1"></a>x <span class="ot">=</span> <span class="fu">c</span>(<span class="dv">1</span>, <span class="dv">6</span>, <span class="dv">2</span>)</span>
<span id="cb3-4"><a href="#cb3-4" aria-hidden="true" tabindex="-1"></a>y <span class="ot">=</span> <span class="fu">c</span>(<span class="dv">1</span>, <span class="dv">4</span>, <span class="dv">3</span>)</span>
<span id="cb3-5"><a href="#cb3-5" aria-hidden="true" tabindex="-1"></a><span class="fu">length</span>(x)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre><code>[1] 3</code></pre>
</div>
<div class="sourceCode cell-code" id="cb5"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb5-1"><a href="#cb5-1" aria-hidden="true" tabindex="-1"></a><span class="fu">length</span>(y)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre><code>[1] 3</code></pre>
</div>
<div class="sourceCode cell-code" id="cb7"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb7-1"><a href="#cb7-1" aria-hidden="true" tabindex="-1"></a>x<span class="sc">+</span>y</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre><code>[1]  2 10  5</code></pre>
</div>
<div class="sourceCode cell-code" id="cb9"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb9-1"><a href="#cb9-1" aria-hidden="true" tabindex="-1"></a><span class="fu">ls</span>()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre><code>[1] "x" "y"</code></pre>
</div>
<div class="sourceCode cell-code" id="cb11"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb11-1"><a href="#cb11-1" aria-hidden="true" tabindex="-1"></a><span class="fu">rm</span>(x, y)</span>
<span id="cb11-2"><a href="#cb11-2" aria-hidden="true" tabindex="-1"></a><span class="fu">ls</span>()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre><code>character(0)</code></pre>
</div>
<div class="sourceCode cell-code" id="cb13"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb13-1"><a href="#cb13-1" aria-hidden="true" tabindex="-1"></a>x <span class="ot">=</span> <span class="fu">c</span>(<span class="dv">1</span>, <span class="dv">6</span>, <span class="dv">2</span>)</span>
<span id="cb13-2"><a href="#cb13-2" aria-hidden="true" tabindex="-1"></a>y <span class="ot">=</span> <span class="fu">c</span>(<span class="dv">1</span>, <span class="dv">4</span>, <span class="dv">3</span>)</span>
<span id="cb13-3"><a href="#cb13-3" aria-hidden="true" tabindex="-1"></a><span class="fu">rm</span>(<span class="at">list =</span> <span class="fu">ls</span>())</span>
<span id="cb13-4"><a href="#cb13-4" aria-hidden="true" tabindex="-1"></a>?matrix</span>
<span id="cb13-5"><a href="#cb13-5" aria-hidden="true" tabindex="-1"></a>x <span class="ot">&lt;-</span> <span class="fu">matrix</span>(<span class="at">data =</span> <span class="fu">c</span>(<span class="dv">1</span>, <span class="dv">2</span>, <span class="dv">3</span>, <span class="dv">4</span>), <span class="at">nrow =</span> <span class="dv">2</span>, <span class="at">ncol =</span> <span class="dv">2</span>)</span>
<span id="cb13-6"><a href="#cb13-6" aria-hidden="true" tabindex="-1"></a>x</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre><code>     [,1] [,2]
[1,]    1    3
[2,]    2    4</code></pre>
</div>
<div class="sourceCode cell-code" id="cb15"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb15-1"><a href="#cb15-1" aria-hidden="true" tabindex="-1"></a><span class="fu">matrix</span>(<span class="fu">c</span>(<span class="dv">1</span>, <span class="dv">2</span>, <span class="dv">3</span>, <span class="dv">4</span>), <span class="dv">2</span>, <span class="dv">2</span>, <span class="at">byrow =</span> <span class="cn">TRUE</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre><code>     [,1] [,2]
[1,]    1    2
[2,]    3    4</code></pre>
</div>
<div class="sourceCode cell-code" id="cb17"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb17-1"><a href="#cb17-1" aria-hidden="true" tabindex="-1"></a><span class="fu">sqrt</span>(x)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre><code>         [,1]     [,2]
[1,] 1.000000 1.732051
[2,] 1.414214 2.000000</code></pre>
</div>
<div class="sourceCode cell-code" id="cb19"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb19-1"><a href="#cb19-1" aria-hidden="true" tabindex="-1"></a>x <span class="ot">&lt;-</span> <span class="fu">rnorm</span>(<span class="dv">50</span>)</span>
<span id="cb19-2"><a href="#cb19-2" aria-hidden="true" tabindex="-1"></a>y <span class="ot">&lt;-</span> x <span class="sc">+</span> <span class="fu">rnorm</span>(<span class="dv">50</span>, <span class="at">mean =</span> <span class="dv">50</span>, <span class="at">sd =</span> .<span class="dv">1</span>)</span>
<span id="cb19-3"><a href="#cb19-3" aria-hidden="true" tabindex="-1"></a><span class="fu">cor</span>(x, y)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre><code>[1] 0.9949618</code></pre>
</div>
<div class="sourceCode cell-code" id="cb21"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb21-1"><a href="#cb21-1" aria-hidden="true" tabindex="-1"></a><span class="fu">set.seed</span>(<span class="dv">1303</span>)</span>
<span id="cb21-2"><a href="#cb21-2" aria-hidden="true" tabindex="-1"></a><span class="fu">rnorm</span>(<span class="dv">50</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre><code> [1] -1.1439763145  1.3421293656  2.1853904757  0.5363925179  0.0631929665
 [6]  0.5022344825 -0.0004167247  0.5658198405 -0.5725226890 -1.1102250073
[11] -0.0486871234 -0.6956562176  0.8289174803  0.2066528551 -0.2356745091
[16] -0.5563104914 -0.3647543571  0.8623550343 -0.6307715354  0.3136021252
[21] -0.9314953177  0.8238676185  0.5233707021  0.7069214120  0.4202043256
[26] -0.2690521547 -1.5103172999 -0.6902124766 -0.1434719524 -1.0135274099
[31]  1.5732737361  0.0127465055  0.8726470499  0.4220661905 -0.0188157917
[36]  2.6157489689 -0.6931401748 -0.2663217810 -0.7206364412  1.3677342065
[41]  0.2640073322  0.6321868074 -1.3306509858  0.0268888182  1.0406363208
[46]  1.3120237985 -0.0300020767 -0.2500257125  0.0234144857  1.6598706557</code></pre>
</div>
<div class="sourceCode cell-code" id="cb23"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb23-1"><a href="#cb23-1" aria-hidden="true" tabindex="-1"></a><span class="fu">set.seed</span>(<span class="dv">3</span>)</span>
<span id="cb23-2"><a href="#cb23-2" aria-hidden="true" tabindex="-1"></a>y <span class="ot">&lt;-</span> <span class="fu">rnorm</span>(<span class="dv">100</span>)</span>
<span id="cb23-3"><a href="#cb23-3" aria-hidden="true" tabindex="-1"></a><span class="fu">mean</span>(y)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre><code>[1] 0.01103557</code></pre>
</div>
<div class="sourceCode cell-code" id="cb25"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb25-1"><a href="#cb25-1" aria-hidden="true" tabindex="-1"></a><span class="fu">var</span>(y)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre><code>[1] 0.7328675</code></pre>
</div>
<div class="sourceCode cell-code" id="cb27"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb27-1"><a href="#cb27-1" aria-hidden="true" tabindex="-1"></a><span class="fu">sqrt</span>(<span class="fu">var</span>(y)) <span class="co"># SD</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre><code>[1] 0.8560768</code></pre>
</div>
<div class="sourceCode cell-code" id="cb29"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb29-1"><a href="#cb29-1" aria-hidden="true" tabindex="-1"></a><span class="fu">sd</span>(y)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre><code>[1] 0.8560768</code></pre>
</div>
<div class="sourceCode cell-code" id="cb31"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb31-1"><a href="#cb31-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Graphics</span></span>
<span id="cb31-2"><a href="#cb31-2" aria-hidden="true" tabindex="-1"></a>x <span class="ot">&lt;-</span> <span class="fu">rnorm</span>(<span class="dv">100</span>)</span>
<span id="cb31-3"><a href="#cb31-3" aria-hidden="true" tabindex="-1"></a>y <span class="ot">&lt;-</span> <span class="fu">rnorm</span>(<span class="dv">100</span>)</span>
<span id="cb31-4"><a href="#cb31-4" aria-hidden="true" tabindex="-1"></a><span class="fu">plot</span>(x, y)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output-display">
<p><img src="islr_files/figure-html/unnamed-chunk-2-1.png" class="img-fluid" width="672"></p>
</div>
<div class="sourceCode cell-code" id="cb32"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb32-1"><a href="#cb32-1" aria-hidden="true" tabindex="-1"></a><span class="fu">plot</span>(x, y, <span class="at">xlab =</span> <span class="st">"this is the x-axis"</span>,</span>
<span id="cb32-2"><a href="#cb32-2" aria-hidden="true" tabindex="-1"></a><span class="at">ylab =</span> <span class="st">"this is the y-axis"</span>, <span class="at">main =</span> <span class="st">"Plot of X vs Y"</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output-display">
<p><img src="islr_files/figure-html/unnamed-chunk-2-2.png" class="img-fluid" width="672"></p>
</div>
<div class="sourceCode cell-code" id="cb33"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb33-1"><a href="#cb33-1" aria-hidden="true" tabindex="-1"></a><span class="fu">pdf</span>(<span class="st">"Figure.pdf"</span>)</span>
<span id="cb33-2"><a href="#cb33-2" aria-hidden="true" tabindex="-1"></a><span class="fu">plot</span>(x, y, <span class="at">col =</span> <span class="st">"green"</span>)</span>
<span id="cb33-3"><a href="#cb33-3" aria-hidden="true" tabindex="-1"></a><span class="fu">dev.off</span>()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre><code>quartz_off_screen
                2 </code></pre>
</div>
<div class="sourceCode cell-code" id="cb35"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb35-1"><a href="#cb35-1" aria-hidden="true" tabindex="-1"></a>x <span class="ot">&lt;-</span> <span class="fu">seq</span>(<span class="dv">1</span>, <span class="dv">10</span>)</span>
<span id="cb35-2"><a href="#cb35-2" aria-hidden="true" tabindex="-1"></a>x</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre><code> [1]  1  2  3  4  5  6  7  8  9 10</code></pre>
</div>
<div class="sourceCode cell-code" id="cb37"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb37-1"><a href="#cb37-1" aria-hidden="true" tabindex="-1"></a>x <span class="ot">&lt;-</span> <span class="dv">1</span><span class="sc">:</span><span class="dv">10</span></span>
<span id="cb37-2"><a href="#cb37-2" aria-hidden="true" tabindex="-1"></a>x</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre><code> [1]  1  2  3  4  5  6  7  8  9 10</code></pre>
</div>
<div class="sourceCode cell-code" id="cb39"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb39-1"><a href="#cb39-1" aria-hidden="true" tabindex="-1"></a>x <span class="ot">&lt;-</span> <span class="fu">seq</span>(<span class="sc">-</span>pi, pi, <span class="at">length =</span> <span class="dv">50</span>)</span>
<span id="cb39-2"><a href="#cb39-2" aria-hidden="true" tabindex="-1"></a>x</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre><code> [1] -3.14159265 -3.01336438 -2.88513611 -2.75690784 -2.62867957 -2.50045130
 [7] -2.37222302 -2.24399475 -2.11576648 -1.98753821 -1.85930994 -1.73108167
[13] -1.60285339 -1.47462512 -1.34639685 -1.21816858 -1.08994031 -0.96171204
[19] -0.83348377 -0.70525549 -0.57702722 -0.44879895 -0.32057068 -0.19234241
[25] -0.06411414  0.06411414  0.19234241  0.32057068  0.44879895  0.57702722
[31]  0.70525549  0.83348377  0.96171204  1.08994031  1.21816858  1.34639685
[37]  1.47462512  1.60285339  1.73108167  1.85930994  1.98753821  2.11576648
[43]  2.24399475  2.37222302  2.50045130  2.62867957  2.75690784  2.88513611
[49]  3.01336438  3.14159265</code></pre>
</div>
<div class="sourceCode cell-code" id="cb41"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb41-1"><a href="#cb41-1" aria-hidden="true" tabindex="-1"></a>y <span class="ot">&lt;-</span> x</span>
<span id="cb41-2"><a href="#cb41-2" aria-hidden="true" tabindex="-1"></a>f <span class="ot">&lt;-</span> <span class="fu">outer</span>(x, y, <span class="cf">function</span>(x, y) <span class="fu">cos</span>(y) <span class="sc">/</span> (<span class="dv">1</span> <span class="sc">+</span> x<span class="sc">^</span><span class="dv">2</span>))</span>
<span id="cb41-3"><a href="#cb41-3" aria-hidden="true" tabindex="-1"></a><span class="fu">contour</span>(x, y, f)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output-display">
<p><img src="islr_files/figure-html/unnamed-chunk-2-3.png" class="img-fluid" width="672"></p>
</div>
<div class="sourceCode cell-code" id="cb42"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb42-1"><a href="#cb42-1" aria-hidden="true" tabindex="-1"></a>fa <span class="ot">&lt;-</span> (f <span class="sc">-</span> <span class="fu">t</span>(f)) <span class="sc">/</span> <span class="dv">2</span></span>
<span id="cb42-2"><a href="#cb42-2" aria-hidden="true" tabindex="-1"></a><span class="fu">contour</span>(x, y, fa, <span class="at">nlevels =</span> <span class="dv">15</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output-display">
<p><img src="islr_files/figure-html/unnamed-chunk-2-4.png" class="img-fluid" width="672"></p>
</div>
<div class="sourceCode cell-code" id="cb43"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb43-1"><a href="#cb43-1" aria-hidden="true" tabindex="-1"></a><span class="fu">image</span>(x, y, fa)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output-display">
<p><img src="islr_files/figure-html/unnamed-chunk-2-5.png" class="img-fluid" width="672"></p>
</div>
<div class="sourceCode cell-code" id="cb44"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb44-1"><a href="#cb44-1" aria-hidden="true" tabindex="-1"></a><span class="fu">persp</span>(x, y, fa)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output-display">
<p><img src="islr_files/figure-html/unnamed-chunk-2-6.png" class="img-fluid" width="672"></p>
</div>
<div class="sourceCode cell-code" id="cb45"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb45-1"><a href="#cb45-1" aria-hidden="true" tabindex="-1"></a><span class="fu">persp</span>(x, y, fa, <span class="at">theta =</span> <span class="dv">30</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output-display">
<p><img src="islr_files/figure-html/unnamed-chunk-2-7.png" class="img-fluid" width="672"></p>
</div>
<div class="sourceCode cell-code" id="cb46"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb46-1"><a href="#cb46-1" aria-hidden="true" tabindex="-1"></a><span class="fu">persp</span>(x, y, fa, <span class="at">theta =</span> <span class="dv">30</span>, <span class="at">phi =</span> <span class="dv">20</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output-display">
<p><img src="islr_files/figure-html/unnamed-chunk-2-8.png" class="img-fluid" width="672"></p>
</div>
<div class="sourceCode cell-code" id="cb47"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb47-1"><a href="#cb47-1" aria-hidden="true" tabindex="-1"></a><span class="fu">persp</span>(x, y, fa, <span class="at">theta =</span> <span class="dv">30</span>, <span class="at">phi =</span> <span class="dv">70</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output-display">
<p><img src="islr_files/figure-html/unnamed-chunk-2-9.png" class="img-fluid" width="672"></p>
</div>
<div class="sourceCode cell-code" id="cb48"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb48-1"><a href="#cb48-1" aria-hidden="true" tabindex="-1"></a><span class="fu">persp</span>(x, y, fa, <span class="at">theta =</span> <span class="dv">30</span>, <span class="at">phi =</span> <span class="dv">40</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output-display">
<p><img src="islr_files/figure-html/unnamed-chunk-2-10.png" class="img-fluid" width="672"></p>
</div>
<div class="sourceCode cell-code" id="cb49"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb49-1"><a href="#cb49-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Indexing Data</span></span>
<span id="cb49-2"><a href="#cb49-2" aria-hidden="true" tabindex="-1"></a>A <span class="ot">&lt;-</span> <span class="fu">matrix</span>(<span class="dv">1</span><span class="sc">:</span><span class="dv">16</span>, <span class="dv">4</span>, <span class="dv">4</span>)</span>
<span id="cb49-3"><a href="#cb49-3" aria-hidden="true" tabindex="-1"></a>A</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre><code>     [,1] [,2] [,3] [,4]
[1,]    1    5    9   13
[2,]    2    6   10   14
[3,]    3    7   11   15
[4,]    4    8   12   16</code></pre>
</div>
<div class="sourceCode cell-code" id="cb51"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb51-1"><a href="#cb51-1" aria-hidden="true" tabindex="-1"></a>A[<span class="dv">2</span>,<span class="dv">3</span>]</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre><code>[1] 10</code></pre>
</div>
<div class="sourceCode cell-code" id="cb53"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb53-1"><a href="#cb53-1" aria-hidden="true" tabindex="-1"></a>A[<span class="fu">c</span>(<span class="dv">1</span>, <span class="dv">3</span>), <span class="fu">c</span>(<span class="dv">2</span>, <span class="dv">4</span>)]</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre><code>     [,1] [,2]
[1,]    5   13
[2,]    7   15</code></pre>
</div>
<div class="sourceCode cell-code" id="cb55"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb55-1"><a href="#cb55-1" aria-hidden="true" tabindex="-1"></a>A[<span class="dv">1</span><span class="sc">:</span><span class="dv">3</span>, <span class="dv">2</span><span class="sc">:</span><span class="dv">4</span>]</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre><code>     [,1] [,2] [,3]
[1,]    5    9   13
[2,]    6   10   14
[3,]    7   11   15</code></pre>
</div>
<div class="sourceCode cell-code" id="cb57"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb57-1"><a href="#cb57-1" aria-hidden="true" tabindex="-1"></a>A[<span class="dv">1</span><span class="sc">:</span><span class="dv">2</span>, ]</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre><code>     [,1] [,2] [,3] [,4]
[1,]    1    5    9   13
[2,]    2    6   10   14</code></pre>
</div>
<div class="sourceCode cell-code" id="cb59"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb59-1"><a href="#cb59-1" aria-hidden="true" tabindex="-1"></a>A[<span class="sc">-</span><span class="fu">c</span>(<span class="dv">1</span>, <span class="dv">3</span>), ]</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre><code>     [,1] [,2] [,3] [,4]
[1,]    2    6   10   14
[2,]    4    8   12   16</code></pre>
</div>
<div class="sourceCode cell-code" id="cb61"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb61-1"><a href="#cb61-1" aria-hidden="true" tabindex="-1"></a><span class="fu">dim</span>(A)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre><code>[1] 4 4</code></pre>
</div>
<div class="sourceCode cell-code" id="cb63"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb63-1"><a href="#cb63-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Loading Data</span></span>
<span id="cb63-2"><a href="#cb63-2" aria-hidden="true" tabindex="-1"></a><span class="fu">library</span>(ISLR2)</span>
<span id="cb63-3"><a href="#cb63-3" aria-hidden="true" tabindex="-1"></a><span class="fu">head</span>(Auto)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre><code>  mpg cylinders displacement horsepower weight acceleration year origin
1  18         8          307        130   3504         12.0   70      1
2  15         8          350        165   3693         11.5   70      1
3  18         8          318        150   3436         11.0   70      1
4  16         8          304        150   3433         12.0   70      1
5  17         8          302        140   3449         10.5   70      1
6  15         8          429        198   4341         10.0   70      1
                       name
1 chevrolet chevelle malibu
2         buick skylark 320
3        plymouth satellite
4             amc rebel sst
5               ford torino
6          ford galaxie 500</code></pre>
</div>
<div class="sourceCode cell-code" id="cb65"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb65-1"><a href="#cb65-1" aria-hidden="true" tabindex="-1"></a><span class="fu">write.table</span>(Auto, <span class="st">"Auto.data"</span>)</span>
<span id="cb65-2"><a href="#cb65-2" aria-hidden="true" tabindex="-1"></a>Auto <span class="ot">&lt;-</span> <span class="fu">read.table</span>(<span class="st">"Auto.data"</span>)</span>
<span id="cb65-3"><a href="#cb65-3" aria-hidden="true" tabindex="-1"></a>Auto <span class="ot">&lt;-</span> <span class="fu">read.table</span>(<span class="st">"Auto.data"</span>, <span class="at">header =</span> T, <span class="at">na.strings =</span> <span class="st">"?"</span>, <span class="at">stringsAsFactors =</span> T)</span>
<span id="cb65-4"><a href="#cb65-4" aria-hidden="true" tabindex="-1"></a><span class="co"># Auto &lt;- read.csv("Auto.csv", na.strings = "?", stringsAsFactors = T)</span></span>
<span id="cb65-5"><a href="#cb65-5" aria-hidden="true" tabindex="-1"></a>Auto <span class="ot">&lt;-</span> <span class="fu">na.omit</span>(Auto)</span>
<span id="cb65-6"><a href="#cb65-6" aria-hidden="true" tabindex="-1"></a><span class="fu">names</span>(Auto)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre><code>[1] "mpg"          "cylinders"    "displacement" "horsepower"   "weight"
[6] "acceleration" "year"         "origin"       "name"        </code></pre>
</div>
<div class="sourceCode cell-code" id="cb67"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb67-1"><a href="#cb67-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Additional Graphical and Numerical Summaries</span></span>
<span id="cb67-2"><a href="#cb67-2" aria-hidden="true" tabindex="-1"></a><span class="fu">plot</span>(Auto<span class="sc">$</span>cylinders, Auto<span class="sc">$</span>mpg)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output-display">
<p><img src="islr_files/figure-html/unnamed-chunk-2-11.png" class="img-fluid" width="672"></p>
</div>
<div class="sourceCode cell-code" id="cb68"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb68-1"><a href="#cb68-1" aria-hidden="true" tabindex="-1"></a><span class="fu">attach</span>(Auto)</span>
<span id="cb68-2"><a href="#cb68-2" aria-hidden="true" tabindex="-1"></a><span class="fu">plot</span>(cylinders, mpg)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output-display">
<p><img src="islr_files/figure-html/unnamed-chunk-2-12.png" class="img-fluid" width="672"></p>
</div>
<div class="sourceCode cell-code" id="cb69"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb69-1"><a href="#cb69-1" aria-hidden="true" tabindex="-1"></a>cylinders <span class="ot">&lt;-</span> <span class="fu">as.factor</span>(cylinders)</span>
<span id="cb69-2"><a href="#cb69-2" aria-hidden="true" tabindex="-1"></a><span class="fu">plot</span>(cylinders, mpg)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output-display">
<p><img src="islr_files/figure-html/unnamed-chunk-2-13.png" class="img-fluid" width="672"></p>
</div>
<div class="sourceCode cell-code" id="cb70"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb70-1"><a href="#cb70-1" aria-hidden="true" tabindex="-1"></a><span class="fu">plot</span>(cylinders, mpg, <span class="at">col =</span> <span class="st">"red"</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output-display">
<p><img src="islr_files/figure-html/unnamed-chunk-2-14.png" class="img-fluid" width="672"></p>
</div>
<div class="sourceCode cell-code" id="cb71"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb71-1"><a href="#cb71-1" aria-hidden="true" tabindex="-1"></a><span class="fu">plot</span>(cylinders, mpg, <span class="at">col =</span> <span class="st">"red"</span>, <span class="at">varwidth =</span> T)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output-display">
<p><img src="islr_files/figure-html/unnamed-chunk-2-15.png" class="img-fluid" width="672"></p>
</div>
<div class="sourceCode cell-code" id="cb72"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb72-1"><a href="#cb72-1" aria-hidden="true" tabindex="-1"></a><span class="fu">plot</span>(cylinders, mpg, <span class="at">col =</span> <span class="st">"red"</span>, <span class="at">varwidth =</span> T, <span class="at">horizontal =</span> T)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output-display">
<p><img src="islr_files/figure-html/unnamed-chunk-2-16.png" class="img-fluid" width="672"></p>
</div>
<div class="sourceCode cell-code" id="cb73"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb73-1"><a href="#cb73-1" aria-hidden="true" tabindex="-1"></a><span class="fu">plot</span>(cylinders, mpg, <span class="at">col =</span> <span class="st">"red"</span>, <span class="at">varwidth =</span> T, <span class="at">xlab =</span> <span class="st">"cylinders"</span>, <span class="at">ylab =</span> <span class="st">"MPG"</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output-display">
<p><img src="islr_files/figure-html/unnamed-chunk-2-17.png" class="img-fluid" width="672"></p>
</div>
<div class="sourceCode cell-code" id="cb74"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb74-1"><a href="#cb74-1" aria-hidden="true" tabindex="-1"></a><span class="fu">hist</span>(mpg)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output-display">
<p><img src="islr_files/figure-html/unnamed-chunk-2-18.png" class="img-fluid" width="672"></p>
</div>
<div class="sourceCode cell-code" id="cb75"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb75-1"><a href="#cb75-1" aria-hidden="true" tabindex="-1"></a><span class="fu">hist</span>(mpg, <span class="at">col =</span> <span class="dv">2</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output-display">
<p><img src="islr_files/figure-html/unnamed-chunk-2-19.png" class="img-fluid" width="672"></p>
</div>
<div class="sourceCode cell-code" id="cb76"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb76-1"><a href="#cb76-1" aria-hidden="true" tabindex="-1"></a><span class="fu">hist</span>(mpg, <span class="at">col =</span> <span class="dv">2</span>, <span class="at">breaks =</span> <span class="dv">15</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output-display">
<p><img src="islr_files/figure-html/unnamed-chunk-2-20.png" class="img-fluid" width="672"></p>
</div>
<div class="sourceCode cell-code" id="cb77"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb77-1"><a href="#cb77-1" aria-hidden="true" tabindex="-1"></a><span class="fu">plot</span>(horsepower, mpg)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output-display">
<p><img src="islr_files/figure-html/unnamed-chunk-2-21.png" class="img-fluid" width="672"></p>
</div>
<div class="sourceCode cell-code" id="cb78"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb78-1"><a href="#cb78-1" aria-hidden="true" tabindex="-1"></a><span class="fu">pairs</span>(iris[<span class="dv">1</span><span class="sc">:</span><span class="dv">4</span>], <span class="at">main =</span> <span class="st">"Anderson's Iris Data -- 3 species"</span>,</span>
<span id="cb78-2"><a href="#cb78-2" aria-hidden="true" tabindex="-1"></a>      <span class="at">pch =</span> <span class="dv">21</span>, <span class="at">bg =</span> <span class="fu">c</span>(<span class="st">"red"</span>, <span class="st">"green3"</span>, <span class="st">"blue"</span>)[<span class="fu">unclass</span>(iris<span class="sc">$</span>Species)])</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output-display">
<p><img src="islr_files/figure-html/unnamed-chunk-2-22.png" class="img-fluid" width="672"></p>
</div>
<div class="sourceCode cell-code" id="cb79"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb79-1"><a href="#cb79-1" aria-hidden="true" tabindex="-1"></a><span class="fu">pairs</span>(iris, <span class="at">log =</span> <span class="dv">1</span><span class="sc">:</span><span class="dv">4</span>, <span class="co"># log the first four</span></span>
<span id="cb79-2"><a href="#cb79-2" aria-hidden="true" tabindex="-1"></a>      <span class="at">main =</span> <span class="st">"Lengths and Widths in [log]"</span>, <span class="at">line.main=</span><span class="fl">1.5</span>, <span class="at">oma=</span><span class="fu">c</span>(<span class="dv">2</span>,<span class="dv">2</span>,<span class="dv">3</span>,<span class="dv">2</span>))</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output-display">
<p><img src="islr_files/figure-html/unnamed-chunk-2-23.png" class="img-fluid" width="672"></p>
</div>
<div class="sourceCode cell-code" id="cb80"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb80-1"><a href="#cb80-1" aria-hidden="true" tabindex="-1"></a><span class="fu">names</span>(iris)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre><code>[1] "Sepal.Length" "Sepal.Width"  "Petal.Length" "Petal.Width"  "Species"     </code></pre>
</div>
<div class="sourceCode cell-code" id="cb82"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb82-1"><a href="#cb82-1" aria-hidden="true" tabindex="-1"></a><span class="fu">plot</span>(iris<span class="sc">$</span>Sepal.Length,iris<span class="sc">$</span>Petal.Length)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output-display">
<p><img src="islr_files/figure-html/unnamed-chunk-2-24.png" class="img-fluid" width="672"></p>
</div>
<div class="sourceCode cell-code" id="cb83"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb83-1"><a href="#cb83-1" aria-hidden="true" tabindex="-1"></a><span class="fu">summary</span>(iris)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre><code>  Sepal.Length    Sepal.Width     Petal.Length    Petal.Width
 Min.   :4.300   Min.   :2.000   Min.   :1.000   Min.   :0.100
 1st Qu.:5.100   1st Qu.:2.800   1st Qu.:1.600   1st Qu.:0.300
 Median :5.800   Median :3.000   Median :4.350   Median :1.300
 Mean   :5.843   Mean   :3.057   Mean   :3.758   Mean   :1.199
 3rd Qu.:6.400   3rd Qu.:3.300   3rd Qu.:5.100   3rd Qu.:1.800
 Max.   :7.900   Max.   :4.400   Max.   :6.900   Max.   :2.500
       Species
 setosa    :50
 versicolor:50
 virginica :50


                </code></pre>
</div>
<div class="sourceCode cell-code" id="cb85"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb85-1"><a href="#cb85-1" aria-hidden="true" tabindex="-1"></a>college <span class="ot">&lt;-</span> College</span>
<span id="cb85-2"><a href="#cb85-2" aria-hidden="true" tabindex="-1"></a><span class="fu">names</span>(college)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre><code> [1] "Private"     "Apps"        "Accept"      "Enroll"      "Top10perc"
 [6] "Top25perc"   "F.Undergrad" "P.Undergrad" "Outstate"    "Room.Board"
[11] "Books"       "Personal"    "PhD"         "Terminal"    "S.F.Ratio"
[16] "perc.alumni" "Expend"      "Grad.Rate"  </code></pre>
</div>
<div class="sourceCode cell-code" id="cb87"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb87-1"><a href="#cb87-1" aria-hidden="true" tabindex="-1"></a><span class="fu">rownames</span>(college)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="cell-output cell-output-stdout">
<pre><code>  [1] "Abilene Christian University"
  [2] "Adelphi University"
  [3] "Adrian College"
  [4] "Agnes Scott College"
  [5] "Alaska Pacific University"
  [6] "Albertson College"
  [7] "Albertus Magnus College"
  [8] "Albion College"
  [9] "Albright College"
 [10] "Alderson-Broaddus College"
 [11] "Alfred University"
 [12] "Allegheny College"
 [13] "Allentown Coll. of St. Francis de Sales"
 [14] "Alma College"
 [15] "Alverno College"
 [16] "American International College"
 [17] "Amherst College"
 [18] "Anderson University"
 [19] "Andrews University"
 [20] "Angelo State University"
 [21] "Antioch University"
 [22] "Appalachian State University"
 [23] "Aquinas College"
 [24] "Arizona State University Main campus"
 [25] "Arkansas College (Lyon College)"
 [26] "Arkansas Tech University"
 [27] "Assumption College"
 [28] "Auburn University-Main Campus"
 [29] "Augsburg College"
 [30] "Augustana College IL"
 [31] "Augustana College"
 [32] "Austin College"
 [33] "Averett College"
 [34] "Baker University"
 [35] "Baldwin-Wallace College"
 [36] "Barat College"
 [37] "Bard College"
 [38] "Barnard College"
 [39] "Barry University"
 [40] "Baylor University"
 [41] "Beaver College"
 [42] "Bellarmine College"
 [43] "Belmont Abbey College"
 [44] "Belmont University"
 [45] "Beloit College"
 [46] "Bemidji State University"
 [47] "Benedictine College"
 [48] "Bennington College"
 [49] "Bentley College"
 [50] "Berry College"
 [51] "Bethany College"
 [52] "Bethel College KS"
 [53] "Bethel College"
 [54] "Bethune Cookman College"
 [55] "Birmingham-Southern College"
 [56] "Blackburn College"
 [57] "Bloomsburg Univ. of Pennsylvania"
 [58] "Bluefield College"
 [59] "Bluffton College"
 [60] "Boston University"
 [61] "Bowdoin College"
 [62] "Bowling Green State University"
 [63] "Bradford College"
 [64] "Bradley University"
 [65] "Brandeis University"
 [66] "Brenau University"
 [67] "Brewton-Parker College"
 [68] "Briar Cliff College"
 [69] "Bridgewater College"
 [70] "Brigham Young University at Provo"
 [71] "Brown University"
 [72] "Bryn Mawr College"
 [73] "Bucknell University"
 [74] "Buena Vista College"
 [75] "Butler University"
 [76] "Cabrini College"
 [77] "Caldwell College"
 [78] "California Lutheran University"
 [79] "California Polytechnic-San Luis"
 [80] "California State University at Fresno"
 [81] "Calvin College"
 [82] "Campbell University"
 [83] "Campbellsville College"
 [84] "Canisius College"
 [85] "Capital University"
 [86] "Capitol College"
 [87] "Carleton College"
 [88] "Carnegie Mellon University"
 [89] "Carroll College"
 [90] "Carson-Newman College"
 [91] "Carthage College"
 [92] "Case Western Reserve University"
 [93] "Castleton State College"
 [94] "Catawba College"
 [95] "Catholic University of America"
 [96] "Cazenovia College"
 [97] "Cedar Crest College"
 [98] "Cedarville College"
 [99] "Centenary College"
[100] "Centenary College of Louisiana"
[101] "Center for Creative Studies"
[102] "Central College"
[103] "Central Connecticut State University"
[104] "Central Missouri State University"
[105] "Central Washington University"
[106] "Central Wesleyan College"
[107] "Centre College"
[108] "Chapman University"
[109] "Chatham College"
[110] "Chestnut Hill College"
[111] "Christendom College"
[112] "Christian Brothers University"
[113] "Christopher Newport University"
[114] "Claflin College"
[115] "Claremont McKenna College"
[116] "Clark University"
[117] "Clarke College"
[118] "Clarkson University"
[119] "Clemson University"
[120] "Clinch Valley Coll. of  the Univ. of Virginia"
[121] "Coe College"
[122] "Coker College"
[123] "Colby College"
[124] "Colgate University"
[125] "College Misericordia"
[126] "College of Charleston"
[127] "College of Mount St. Joseph"
[128] "College of Mount St. Vincent"
[129] "College of Notre Dame"
[130] "College of Notre Dame of Maryland"
[131] "College of Saint Benedict"
[132] "College of Saint Catherine"
[133] "College of Saint Elizabeth"
[134] "College of Saint Rose"
[135] "College of Santa Fe"
[136] "College of St. Joseph"
[137] "College of St. Scholastica"
[138] "College of the Holy Cross"
[139] "College of William and Mary"
[140] "College of Wooster"
[141] "Colorado College"
[142] "Colorado State University"
[143] "Columbia College MO"
[144] "Columbia College"
[145] "Columbia University"
[146] "Concordia College at St. Paul"
[147] "Concordia Lutheran College"
[148] "Concordia University CA"
[149] "Concordia University"
[150] "Connecticut College"
[151] "Converse College"
[152] "Cornell College"
[153] "Creighton University"
[154] "Culver-Stockton College"
[155] "Cumberland College"
[156] "D'Youville College"
[157] "Dana College"
[158] "Daniel Webster College"
[159] "Dartmouth College"
[160] "Davidson College"
[161] "Defiance College"
[162] "Delta State University"
[163] "Denison University"
[164] "DePauw University"
[165] "Dickinson College"
[166] "Dickinson State University"
[167] "Dillard University"
[168] "Doane College"
[169] "Dominican College of Blauvelt"
[170] "Dordt College"
[171] "Dowling College"
[172] "Drake University"
[173] "Drew University"
[174] "Drury College"
[175] "Duke University"
[176] "Earlham College"
[177] "East Carolina University"
[178] "East Tennessee State University"
[179] "East Texas Baptist University"
[180] "Eastern College"
[181] "Eastern Connecticut State University"
[182] "Eastern Illinois University"
[183] "Eastern Mennonite College"
[184] "Eastern Nazarene College"
[185] "Eckerd College"
[186] "Elizabethtown College"
[187] "Elmira College"
[188] "Elms College"
[189] "Elon College"
[190] "Embry Riddle Aeronautical University"
[191] "Emory &amp; Henry College"
[192] "Emory University"
[193] "Emporia State University"
[194] "Erskine College"
[195] "Eureka College"
[196] "Evergreen State College"
[197] "Fairfield University"
[198] "Fayetteville State University"
[199] "Ferrum College"
[200] "Flagler College"
[201] "Florida Institute of Technology"
[202] "Florida International University"
[203] "Florida Southern College"
[204] "Florida State University"
[205] "Fontbonne College"
[206] "Fordham University"
[207] "Fort Lewis College"
[208] "Francis Marion University"
[209] "Franciscan University of Steubenville"
[210] "Franklin College"
[211] "Franklin Pierce College"
[212] "Freed-Hardeman University"
[213] "Fresno Pacific College"
[214] "Furman University"
[215] "Gannon University"
[216] "Gardner Webb University"
[217] "Geneva College"
[218] "George Fox College"
[219] "George Mason University"
[220] "George Washington University"
[221] "Georgetown College"
[222] "Georgetown University"
[223] "Georgia Institute of Technology"
[224] "Georgia State University"
[225] "Georgian Court College"
[226] "Gettysburg College"
[227] "Goldey Beacom College"
[228] "Gonzaga University"
[229] "Gordon College"
[230] "Goshen College"
[231] "Goucher College"
[232] "Grace College and Seminary"
[233] "Graceland College"
[234] "Grand Valley State University"
[235] "Green Mountain College"
[236] "Greensboro College"
[237] "Greenville College"
[238] "Grinnell College"
[239] "Grove City College"
[240] "Guilford College"
[241] "Gustavus Adolphus College"
[242] "Gwynedd Mercy College"
[243] "Hamilton College"
[244] "Hamline University"
[245] "Hampden - Sydney College"
[246] "Hampton University"
[247] "Hanover College"
[248] "Hardin-Simmons University"
[249] "Harding University"
[250] "Hartwick College"
[251] "Harvard University"
[252] "Harvey Mudd College"
[253] "Hastings College"
[254] "Hendrix College"
[255] "Hillsdale College"
[256] "Hiram College"
[257] "Hobart and William Smith Colleges"
[258] "Hofstra University"
[259] "Hollins College"
[260] "Hood College"
[261] "Hope College"
[262] "Houghton College"
[263] "Huntingdon College"
[264] "Huntington College"
[265] "Huron University"
[266] "Husson College"
[267] "Illinois Benedictine College"
[268] "Illinois College"
[269] "Illinois Institute of Technology"
[270] "Illinois State University"
[271] "Illinois Wesleyan University"
[272] "Immaculata College"
[273] "Incarnate Word College"
[274] "Indiana State University"
[275] "Indiana University at Bloomington"
[276] "Indiana Wesleyan University"
[277] "Iona College"
[278] "Iowa State University"
[279] "Ithaca College"
[280] "James Madison University"
[281] "Jamestown College"
[282] "Jersey City State College"
[283] "John Brown University"
[284] "John Carroll University"
[285] "Johns Hopkins University"
[286] "Johnson State College"
[287] "Judson College"
[288] "Juniata College"
[289] "Kansas State University"
[290] "Kansas Wesleyan University"
[291] "Keene State College"
[292] "Kentucky Wesleyan College"
[293] "Kenyon College"
[294] "Keuka College"
[295] "King's College"
[296] "King College"
[297] "Knox College"
[298] "La Roche College"
[299] "La Salle University"
[300] "Lafayette College"
[301] "LaGrange College"
[302] "Lake Forest College"
[303] "Lakeland College"
[304] "Lamar University"
[305] "Lambuth University"
[306] "Lander University"
[307] "Lawrence University"
[308] "Le Moyne College"
[309] "Lebanon Valley College"
[310] "Lehigh University"
[311] "Lenoir-Rhyne College"
[312] "Lesley College"
[313] "LeTourneau University"
[314] "Lewis and Clark College"
[315] "Lewis University"
[316] "Lincoln Memorial University"