# COMMunicationRESearch.NET

### Site Tools

b:r_cookbook:basics

# Print

> pi
[1] 3.141593
> sqrt(2)
[1] 1.414214

When you enter expressions like that, R evaluates the expression and then implicitly calls the print function. So the previous example is identical to this:

> print(pi)
[1] 3.141593
> print(sqrt(2))
[1] 1.414214

The print function has a significant limitation, however: it prints only one object at a time. Trying to print multiple items gives this mind-numbing error message:

> print("The zero occurs at", 2*pi, "radians.")
Error in print.default("The zero occurs at", 2 * pi, "radians.") :
unimplemented type 'character' in 'asLogical'

> cat("The zero occurs at", 2*pi, "radians.", "\n")
The zero occurs at 6.283185 radians.

Note: space attached, line feed not.

A simple vector

> fib <- c(0,1,1,2,3,5,8,13,21,34)
> cat("The first few Fibonacci numbers are:", fib, "...\n")
The first few Fibonacci numbers are: 0 1 1 2 3 5 8 13 21 34 ...

A serious limitation, however, is that it cannot print compound data structures such as matrices and lists.

# Variables

> variable_name <- 3

free from declaration:

> x <- 3
> print(x)
[1] 3
> x <- c("fee", "fie", "foe", "fum")
> print(x)
[1] "fee" "fie" "foe" "fum"

# List

> ls()
character(0)
> x <- 10
> y <- 50
> z <- c("three", "blind", "mice")
> f <- function(n,p) sqrt(p*(1-p)/n)
> ls()
[1] "f" "x" "y" "z"
> ls.str()
f : function (n, p)
x :  num 10
y :  num 50
z :  chr [1:3] "three" "blind" "mice"

hidden variable with “.”

> .hidvar <- 10
> ls()
[1] "f" "x" "y" "z"
> ls(all.names=TRUE)
[1] ".hidvar" "f"       "x"       "y"       "z"

# Deleting Variable

> x <- 2*pi
> x
[1] 6.283185
> rm(x)
> x
Error: object "x" not found

Note: no “undo”

Wipe out variables in a session:

> ls()
[1] "f" "x" "y" "z"
> rm(list=ls())
> ls()
character(0)

# Vector

> c(1,1,2,3,5,8,13,21)
[1]  1  1  2  3  5  8 13 21
> c(1*pi, 2*pi, 3*pi, 4*pi)
[1]  3.141593  6.283185  9.424778 12.566371
> c("Everyone", "loves", "stats.")
[1] "Everyone" "loves"    "stats."
> c(TRUE,TRUE,FALSE,TRUE)
[1]  TRUE  TRUE FALSE  TRUE

If the arguments to c(…) are themselves vectors, it flattens them and combines them into one single vector:

> v1 <- c(1,2,3)
> v2 <- c(4,5,6)
> c(v1,v2)
[1] 1 2 3 4 5 6
> v1 <- c(1,2,3)
> v3 <- c("A","B","C")
> c(v1,v3)
[1] "1" "2" "3" "A" "B" "C"
> c(3.1415, "foo")
[1] "3.1415" "foo"
> mode(c(3.1415, "foo"))
[1] "character"

# Basic (descriptive) Statistics

mean, median, standard deviation, variance, correlation, or covariance.

mean(x)
median(x)
sd(x)
var(x)
cor(x, y)
cov(x, y)

Variable x, y should be numeric (number variable, see level of measurement)

> x <- c(0,1,1,2,3,5,8,13,21,34)
> mean(x)
[1] 8.8
> median(x)
[1] 4
> sd(x)
[1] 11.03328
> var(x)
[1] 121.7333

> x <- c(0,1,1,2,3,5,8,13,21,34)
> y <- log(x+1)
> cor(x,y)
[1] 0.9068053
> cov(x,y)
[1] 11.49988

$$r = \frac {\text{covariance (x, y)}} {sd(x) * sd(y)}$$

> x <- c(0,1,1,2,3,5,8,13,21,34)
> y <- log(x+1)
> cor(x,y)
[1] 0.9068053
> cov(x,y)/(sd(x)*sd(y))
[1] 0.9068053
> cov(x,y)/sqrt(var(x)*var(y))
[1] 0.9068053

> x <- c(0,1,1,2,3,NA)
> mean(x)
[1] NA
> sd(x)
[1] NA

> x <- c(0,1,1,2,3,NA)
> mean(x, na.rm=TRUE)
[1] 1.4
> sd(x, na.rm=TRUE)
[1] 1.140175

data

small <- c(0.6739635, 1.5524619, 0.3250562, 1.2143595, 1.3107692, 2.1739663, 1.6187899, 0.8872657, 1.9170283, 0.7767406)
medium <- c(10.526448, 9.205156, 11.427756, 8.53318, 9.763317, 9.806662, 9.150245, 10.058465, 9.18233, 7.949692)
big <- c(99.83624, 100.70852, 99.73202, 98.53608, 100.74444, 98.58961, 100.46707, 99.88068, 100.46724, 100.49814)

dframe <- data.frame(small, medium, big)
> dlist <- list(small,medium,big)
> dlist
[[1]]
[1] 0.6739635 1.5524619 0.3250562 1.2143595 1.3107692 2.1739663
[7] 1.6187899 0.8872657 1.9170283 0.7767406

[[2]]
[1] 10.526448  9.205156 11.427756  8.533180  9.763317  9.806662
[7]  9.150245 10.058465  9.182330  7.949692

[[3]]
[1]  99.83624 100.70852  99.73202  98.53608 100.74444  98.58961
[7] 100.46707  99.88068 100.46724 100.49814
> lapply (dlist,mean)
[[1]]
[1] 1.24504

[[2]]
[1] 9.560325

[[3]]
[1] 99.946
> sapply(dlist, sd)
[1] 0.5844025 0.9920282 0.8135503

> print(dframe)
small    medium       big
1  0.6739635 10.526448  99.83624
2  1.5524619  9.205156 100.70852
3  0.3250562 11.427756  99.73202
4  1.2143595  8.533180  98.53608
5  1.3107692  9.763317 100.74444
6  2.1739663  9.806662  98.58961
7  1.6187899  9.150245 100.46707
8  0.8872657 10.058465  99.88068
9  1.9170283  9.182330 100.46724
10 0.7767406  7.949692 100.49814
> mean(dframe)       # This does not work.
> colMeans(dframe)   # This works. Note the function name: col+Means.
small    medium       big
1.245040  9.560325 99.946003
> sd(dframe)         # Not work.
> sd(dframe$small) # Instead, do separately. > sd(dframe$medium)
> sd(dframe$big) > # OR . . . . > sapply(dframe, sd) small medium big 0.5844025 0.9920282 0.8135503 # then . . . > sapply(dframe, mean) small medium big 1.245040 9.560325 99.946004  > var(dframe) small medium big small 0.34152627 -0.21516416 -0.04005275 medium -0.21516416 0.98411974 -0.09253855 big -0.04005275 -0.09253855 0.66186326 > cor(dframe) small medium big small 1.00000000 -0.3711367 -0.08424345 medium -0.37113670 1.0000000 -0.11466070 big -0.08424345 -0.1146607 1.00000000 > cov(dframe) small medium big small 0.34152627 -0.21516416 -0.04005275 medium -0.21516416 0.98411974 -0.09253855 big -0.04005275 -0.09253855 0.66186326 # Sequence > 1:5 [1] 1 2 3 4 5 > seq(from=1, to=5, by=2) [1] 1 3 5 > rep(1, times=5) [1] 1 1 1 1 1 > seq(from=0, to=20, length.out=5) [1] 0 5 10 15 20 > seq(from=0, to=100, length.out=5) [1] 0 25 50 75 100 sequence (seq) 는 x축의 구성을 임의적으로 만들 때 유용. 예를 들면, normal distribution graph 등. x <- seq(-4, 4, length=10000) y <- dnorm(x, mean=0, sd=1) plot(x, y, type="l", lwd=1)  # Comparing Vectors > a <- 3 > a == pi # Test for equality [1] FALSE > a != pi # Test for inequality [1] TRUE > a < pi [1] TRUE > a > pi [1] FALSE > a <= pi [1] TRUE > a >= pi [1] FALSE > a <- var(dframe) > b <- cov(dframe) > a == b small medium big small TRUE TRUE TRUE medium TRUE TRUE TRUE big TRUE TRUE TRUE >  > v <- c( 3, pi, 4) > w <- c(pi, pi, pi) > v == w # Compare two 3-element vectors [1] FALSE TRUE FALSE # Result is a 3-element vector > v != w [1] TRUE FALSE TRUE > v < w [1] TRUE FALSE FALSE > v <= w [1] TRUE TRUE FALSE > v > w [1] FALSE FALSE TRUE > v >= w [1] FALSE TRUE TRUE > v <- c(3, pi, 4) > v == pi # Compare a 3-element vector against one number [1] FALSE TRUE FALSE > v != pi [1] TRUE FALSE TRUE . . .  > v <- c(3, pi, 4) > any(v == pi) # Return TRUE if any element of v equals pi [1] TRUE > all(v == 0) # Return TRUE if all elements of v are zero [1] FALSE # Selecting Vector Elements > fib <- c(0,1,1,2,3,5,8,13,21,34) > fib [1] 0 1 1 2 3 5 8 13 21 34 > fib[1] [1] 0 > fib[2] [1] 1 > fib[3] [1] 1 > fib[4] [1] 2 > fib[5] [1] 3 > fib[1:3] # Select elements 1 through 3 [1] 0 1 1 > fib[4:9] # Select elements 4 through 9 [1] 2 3 5 8 13 21 > fib[c(1,2,4,8)] [1] 0 1 2 13 > fib[-1] # Ignore first element [1] 1 1 2 3 5 8 13 21 34 > fib[1:3] # As before [1] 0 1 1 > fib[-(1:3)] # Invert sign of index to exclude instead of select [1] 2 3 5 8 13 21 34 > fib < 10 # This vector is TRUE wherever fib is less than 10 [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE FALSE FALSE FALSE > fib[fib < 10] # Use that vector to select elements less than 10 [1] 0 1 1 2 3 5 8 > fib %% 2 == 0 # This vector is TRUE wherever fib is even [1] TRUE FALSE FALSE TRUE FALSE FALSE TRUE FALSE FALSE TRUE > fib[fib %% 2 == 0] # Use that vector to select the even elements [1] 0 2 8 34 v[ v > median(v) ] Select all elements in the lower and upper 5% v[ (v < quantile(v,0.05)) | (v > quantile(v,0.95)) ] Select all elements that exceed ±2 standard deviations from the mean v[ abs(v-mean(v)) > 2*sd(v) ] Select all elements that are neither NA nor NULL v[ !is.na(v) & !is.null(v) ] > years <- c(1960, 1964, 1976, 1994) > names(years) <- c("Kennedy", "Johnson", "Carter", "Clinton") > years Kennedy Johnson Carter Clinton 1960 1964 1976 1994 > years["Carter"] Carter 1976 > years["Clinton"] Clinton 1994 > years[c("Carter","Clinton")] Carter Clinton 1976 1994 # Performing Vector Arithmetic > v <- c(11,12,13,14,15) > w <- c(1,2,3,4,5) > v + w [1] 12 14 16 18 20 > v - w [1] 10 10 10 10 10 > v * w [1] 11 24 39 56 75 > v / w [1] 11.000000 6.000000 4.333333 3.500000 3.000000 > w ^ v [1] 1 4096 1594323 268435456 30517578125 > w [1] 1 2 3 4 5 > mean(w) [1] 3 > w - mean(w) [1] -2 -1 0 1 2 > w [1] 1 2 3 4 5 > sd(w) [1] 1.581139 > (w - mean(w)) / sd(w) [1] -1.2649111 -0.6324555 0.0000000 0.6324555 1.2649111 get variance of v without using var() function. > w [1] 1 2 3 4 5 > sqrt(w) [1] 1.000000 1.414214 1.732051 2.000000 2.236068 > log(w) [1] 0.0000000 0.6931472 1.0986123 1.3862944 1.6094379 > sin(w) [1] 0.8414710 0.9092974 0.1411200 -0.7568025 -0.9589243 Operator Meaning See also [ [[ Indexing Recipe 2.9 :: ::: Access variables in a name space$ @ Component extraction, slot extraction
^ Exponentiation (right to left)
- + Unary minus and plus
: Sequence creation Recipe 2.7, Recipe 7.14
%any% Special operators Discussion
* / Multiplication, division Discussion
== != < > ⇐ >= Comparison Recipe 2.8
! Logical negation
& && Logical “and”, short-circuit “and”
| || Logical “or”, short-circuit “or”
~ Formula Recipe 11.1
→ →> Rightward assignment Recipe 2.2
= Assignment (right to left) Recipe 2.2
← «- Assignment (right to left) Recipe 2.2
? Help Recipe 1.7
%%
Modulo operator
%/%
Integer division
%*%
Matrix multiplication
%in%
Returns TRUE if the left operand occurs in its right operand; FALSE otherwise
classtaken = matrix(0,8,10)
edge.list = matrix (
c(1,1,1,2,1,3,1,4,1,9,
2,2,2,5,2,7,2,8,
3,1,3,5,3,6,3,7,3,8,
4,2,4,6,4,9,4,10,
5,1,5,2,5,5,5,7,5,8,
6,2,6,3,6,4,6,7,
7,3,7,4,7,7,7,8,
8,1,8,2,8,6,8,9,8,10), byrow=T, nrow=36,ncol=2)
classtaken[edge.list] = 1
rownames(classtaken) = c("a","b", "c", "d","e", "f", "g", "h")
"newmedia", "cmc")
classtaken

c = classtaken
tc = t(classtaken)

stu = c %*% tc
class = tc %*% c

stu
class

fill values less than 2 with zeros in stu matrix

 stu[stu < 3] <- 0

# Defining a Function

function(param1, ..., paramN) {
expr1
.
.
.
exprM
}
> cv <- function(x) sd(x)/mean(x)
> cv(1:10)
[1] 0.5504819

> cv <- function(x) sd(x)/mean(x)
> lapply(lst, cv)

> gcd <- function(a,b) {
+     if (b == 0) return(a)
+     else return(gcd(b, a %% b))
+ }