This is an article about my R Tutorial notes.
Chapter 1 is the basic knowledge for manipulating data in R.
1. Assigning Values
#(1)Assign value to objects
x = 11
print(x)
# using an arrow
y <- 9
print(y)
# list things in the work place memory
ls()
# remove y
rm(y)
y <- 9
# assign character value to objects
xx <-"marin"
# Operations
z <- x+y
y^2
y^(1/2)
sqrt(y)
log(y)
log2(y)
abs(-14)
exp(y)
2. Vector and Matrix
#(2)Vector and Matrix
# define a vector
x1 <- c(1,3,5,7,9)
gender <- c("male","female")
x2 <- c(2:6)
# numerical sequence
seq(from=1,to=7,by=1)
# repetition
rep(1,times=10)
rep("marin",times=5)
rep(seq(from=1,to=5,by=0.25),times=2)
rep(c("m","f"),times=5)
# operations
x1 + 10
x1*10
# If two vertors are of the same length, we may add/subtract
# /mult/div corresponding elements
x1+x2
x1/x2
# get the 3rd element of a vector
x1[3]
# get elements except the 3rd
x1[-3]
# get elements from the 1st to the 3rd
x1[1:3]
# get the 3rd and the 5th (need to use c())
x1[c(3,5)]
# get elements except the 3rd and 5th
x1[-c(3,5)]
# get the element that satisfies certainrequirements
x1[x1<3]
# define the martix
mat <- matrix(c(1,2,3,4,5,6,7,8,9),nrow=3,byrow=TRUE)
# extract elements
mat[1,2]
mat[c(1,3),2]
# blank means to extract all the col/rows
mat[2, ]
mat[ ,1]
# operations
mat*10
3. Importing data
# from .csv
data1 <- read.csv(file.choose(),header=T)
# 'header=T' means to let R know the first row is headers
# or use read.table
data1 <- read.table(file.choose(),header=T,sep=",")
# form .txt
data2 <- read.delim(file.choose(),header=T)
data2 <- read.table(file.choose(),header=T,sep="\t")
4.Subsetting Data with Square Brackets ([])
In this part, I use the data set LungCapData as the example.
You may download it from here:
https://www.statslectures.com/r-scripts-datasets
#(4)Subsetting data with square brackets and logic statements
# import data
Data1 <-read.table(file="C:/Users/user/Desktop/R tutorial/LungCapData.txt",header=T,sep="\t")
# dimensions of your data
dim(Data1)
# first six rows
head(Data1)
# last 6 rows
tail(Data1)
# other cuts
Data1[c(5,6,7,8,9), ]
Data1[5:9, ]
Data1[-(5:9), ]
# see the names of variables
names(Data1)
# find the mean of variable"Age" in Data1
# use dollar sign'$' to extract variable
mean(Data1$Age)
# Attach Set of R Objects to Search Path
attach(Data1)
Age
# unattach
detach(Data1)
Data1$Age
# check the type of a variable
class(Age)
# for catergorical variables, see the levels
level(Gender)
# length of records
length(Age)
# genmerical summary of the data
summary(Data1)
# If one varibale is encoded with 1 and 0
# encode numbers to factors
x <- c(0,1,1,1,1,0,0,0,0)
class(x)
x <- as.factor(x)
class(x)
# summary will show the frequencies
summary(x)
# Let's deal with subsets!
# "==' means equals (Not for assigning values)
# calculate the mean of women's age
mean(Age[Gender=="female"])
# save subset for women and men
# to leave blank means to extract all variables
FemData <- Data1[Gender=="female", ]
MaleData <- Data1[Gender=="male", ]
MaleOver15 <- Data1[Gender=="male" & Age>15, ]
# create logical variable
temp <- Age>15
temp[1:5]
temp2 <- as.numeric(Age>15)
temp3 <- as.factor(temp2)
# create a indicator for female who smoke
FemSmoke <- Gender=="female" & Smoke=="yes"
# bind in the col
Data2 <- cbind(Data1,FemSmoke)
# bind in the row
Data3 <- rbind(Data1,Data1)
# clean all
rm(list=ls())
5. Working Directories and R Scripts
The two commands are quite useful for saving and extracting workspace data: save.image()
and load()
.
# find ur home path
path.expand("~")
# find ur current working dir
getwd()
# set ur working dir
setwd("C:/Users/user/Desktop/R tutorial")
# or...
projectWD <- "C:/Users/user/Desktop/R tutorial"
setwd(projectWD)
# save workspace
save.image("Project1.Rdata")
# clean and quit
rm(list=ls())
q()
# load previous workspace
load("Project1.Rdata")
# or...
load(file.choose())
# [tips]"Tab"key returns to instructions
# installing packages
install.packages("epiR")
# load the library( u need to load in each session)
library("epiR")
Thanks for your reading!
Jasmine Qu ;)