Differences

This shows you the differences between two versions of the page.

--- r:basics [2016/09/21 08:41] – hkimscil
+++ r:basics [2019/09/25 10:28] – [Sequence] hkimscil
@@ Line 156: / Line 156: @@
 [1] 11.49988
 </code>
+$$ r = \frac {\text{covariance (x, y)}}  {sd(x) * sd(y)} $$
+<code>
+> x <- c(0,1,1,2,3,5,8,13,21,34)
+> y <- log(x+1)
+> cor(x,y)
+[1] 0.9068053
+> cov(x,y)/(sd(x)*sd(y))
+[1] 0.9068053
+> cov(x,y)/sqrt(var(x)*var(y))
+[1] 0.9068053
+</code>
 <code>> x <- c(0,1,1,2,3,NA)
@@ Line 168: / Line 183: @@
 > sd(x, na.rm=TRUE)
 [1] 1.140175
+</code>
+<WRAP info> data
+<code>small <- c(0.6739635, 1.5524619, 0.3250562, 1.2143595, 1.3107692, 2.1739663, 1.6187899, 0.8872657, 1.9170283, 0.7767406)
+medium <- c(10.526448, 9.205156, 11.427756, 8.53318, 9.763317, 9.806662, 9.150245, 10.058465, 9.18233, 7.949692)
+big <- c(99.83624, 100.70852, 99.73202, 98.53608, 100.74444, 98.58961, 100.46707, 99.88068, 100.46724, 100.49814)
+dframe <- data.frame(small, medium, big)
+</code>
+</WRAP>
+<code>
+> dlist <- list(small,medium,big)
+> dlist
+[[1]]
+ [1] 0.6739635 1.5524619 0.3250562 1.2143595 1.3107692 2.1739663
+ [7] 1.6187899 0.8872657 1.9170283 0.7767406
+[[2]]
+ [1] 10.526448  9.205156 11.427756  8.533180  9.763317  9.806662
+ [7]  9.150245 10.058465  9.182330  7.949692
+[[3]]
+ [1]  99.83624 100.70852  99.73202  98.53608 100.74444  98.58961
+ [7] 100.46707  99.88068 100.46724 100.49814
+> lapply (dlist,mean)
+[[1]]
+[1] 1.24504
+[[2]]
+[1] 9.560325
+[[3]]
+[1] 99.946
+> sapply(dlist, sd)
+[1] 0.5844025 0.9920282 0.8135503
 </code>
@@ Line 183: / Line 235: @@
   1.9170283  9.182330 100.46724
 0.7767406  7.949692 100.49814
-> mean(dframe)
+> mean(dframe)       # This does not work.
+> colMeans(dframe)   # This works. Note the function name: col+Means.
     small    medium       big
 .245040  9.560325 99.946003
-> sd(dframe)
+> sd(dframe)         # Not work.
+> sd(dframe$small)   # Instead, do separately.
+> sd(dframe$medium)
+> sd(dframe$big)
+> # OR . . . .
+> sapply(dframe, sd)
+    small    medium       big
+.5844025 0.9920282 0.8135503
+# then . . .
+> sapply(dframe, mean)
     small    medium       big
-.5844025 0.9920281 0.8135498
+.245040  9.560325 99.946004
 </code>
@@ Line 226: / Line 288: @@
 </code>
+sequence (''seq'') 는 x축의 구성을 임의적으로 만들 때 유용. 예를 들면, normal distribution graph 등.
+<code>
+x <- seq(-4, 4, length=10000)
+y <- dnorm(x, mean=0, sd=1)
+plot(x, y, type="l", lwd=1)
+</code>
 ====== Comparing Vectors ======
 <code>> a <- 3
@@ Line 240: / Line 309: @@
 > a >= pi
 [1] FALSE
+</code>
+<code>
+> a <- var(dframe)
+> b <- cov(dframe)
+> a == b
+       small medium  big
+small   TRUE   TRUE TRUE
+medium  TRUE   TRUE TRUE
+big     TRUE   TRUE TRUE
+>
 </code>
@@ Line 374: / Line 454: @@
 [1] -1.2649111 -0.6324555  0.0000000  0.6324555  1.2649111
 </code>
+<WRAP box help>get variance of v without using var() function.
+</WRAP>
 <code>> w
@@ Line 415: / Line 498: @@
 Returns TRUE if the left operand occurs in its right operand; FALSE otherwise
 </code>
+<code>classtaken = matrix(0,8,10)
+edge.list = matrix (
+    c(1,1,1,2,1,3,1,4,1,9,
+,2,2,5,2,7,2,8,
+,1,3,5,3,6,3,7,3,8,
+,2,4,6,4,9,4,10,
+,1,5,2,5,5,5,7,5,8,
+,2,6,3,6,4,6,7,
+,3,7,4,7,7,7,8,
+,1,8,2,8,6,8,9,8,10), byrow=T, nrow=36,ncol=2)
+classtaken[edge.list] = 1
+rownames(classtaken) = c("a","b", "c", "d","e", "f", "g", "h")
+colnames(classtaken) = c("writer", "comtheo", "pr","adv",
+                      "broadc","internet","camshoot", "edit",
+                      "newmedia", "cmc")
+classtaken
+c = classtaken
+tc = t(classtaken)
+stu = c %*% tc
+class = tc %*% c
+stu
+class</code>
+<WRAP box help>
+fill values less than 2 with zeros in stu matrix
+<code> stu[stu < 3] <- 0 </code>
+</WRAP>
 ====== Defining a Function ======
@@ Line 442: / Line 557: @@
+{{tag> statistics r "r basics"}}