1. Reading from MySQL
Step 1 - Install MySQL
Step 2 - Install RMySQL - install.packages("RMySQL")
Connecting and listing databases
ucscDb<-dbConnect(MySQL(),user="genome",host="genome-mysql.cse.ucsc.edu")
# 创造句柄
result<-dbGetQuery(ucscDb,"show databases;");
#赋值给result
dbDisconnect(ucscDb);
Connecting to hg19 and listing tables
hg19<-dbConnect(MySQL(),user="genome",db="hg19",host="genome-mysql.cse.ucsc.edu") #连接db
allTables<-dbListTables(hg19)
length(allTables) #求出db中有多少个表
dbListFields(hg19,"affyU133Plus2") #求表中有多少列
dbGetQuery(hg19,"select count(*) from affyU133Plus2") #求表有多少行
Read from the table
affyData<-dbReadTable(hg19,"affyU133Plus2")
head(affyData)
Select a specific subset
query<-dbSendQuery(hg19,"select * from affyU133Plus2 where misMatches between 1 and 3")
affyMis<-fetch(query);
quantile(affyMis$misMatches)
affyMisSmall<-fetch(query,n=10);
dbClearResult(query);
Don't forget to close the connection!
dbDisconnect(hg19)
2. HDF5 (Heirarchical data format)
R HDF5 package
source("http://bioconductor.org/biocLite.R")
biocLite("rhdf5")
library(rhdf5)
created=h5createFile("example.h5")created
Create groups
created=h5createGroup("example.h5","foo")
created=h5createGroup("example.h5","baa")
created=h5createGroup("example.h5","foo/foobaa")
h5ls("example.h5")
Write to groups
A=matrix(1:10,nr=5,nc=2)
h5write(A,"example.h5","foo/A")
B=array(seq(0.1,2.0,by=0.1),dim=c(5,2,2))
attr(B,"scale")<-"liter"
h5write(B,"example.h5","foo/foobaa/B")
h5ls("example.h5")
Write a data set
df=data.frame(1L:5L,seq(0,1,length.out=5), c("ab","cde","fghi","a","s"),stringsAsFactors=FALSE)
h5write(df,"example.h5","df")
h5ls("example.h5")
Reading data
readA=h5read("example.h5","foo/A")
readB=h5read("example.h5","foo/foobaa/B")
readdf=h5read("example.h5","df")
readA
Writing and reading chunks
h5write(c(12,13,14),"example.h5","foo/A",index=list(1:3,1))
h5read("example.h5","foo/A")
3. Webscraping (HTML)
Getting data off webpages - readLines()
con=url("http://scholar.google.com/citations?user=HI-I6C0AAAAJ&hl=en")
htmlCode=readLines(con)
close(con)
htmlCode
Parsing with XML
library(XML)
url<-"http://scholar.google.com/citations?user=HI-I6C0AAAAJ&hl=en"
html<-htmlTreeParse(url,useInternalNodes=T)
xpathSApply(html,"//title",xmlValue)
xpathSApply(html,"//td[@id='col-citedby']",xmlValue)
GET from the httr package
library(httr);
html2=GET(url)
content2=content(html2,as="text")
parsedHtml=htmlParse(content2,asText=TRUE)
xpathSApply(parsedHtml,"//title",xmlValue)
Accessing websites with passwords
pg2=GET("http://httpbin.org/basic-auth/user/passwd", authenticate("user","passwd"))
pg2
Response [http://httpbin.org/basic-auth/user/passwd]
Status: 200
Content-type: application/json
{
"authenticated": true,
"user": "user"
}
Using handles
google=handle("http://google.com")
pg1=GET(handle=google,path="/")
pg2=GET(handle=google,path="search")
R Bloggers has a number of examples of web scrapinghttp://www.r-bloggers.com/?s=Web+Scraping
4. API (Application Performance Interfaces)
Step1. Creating an application
Step2. Accessing Twitter from R
myapp=oauth_app("twitter",key="yourConsumerKeyHere",secret="yourConsumerSecretHere")
sig=sign_oauth1.0(myapp,token="yourTokenHere",token_secret="yourTokenSecretHere")
homeTL=GET("https://api.twitter.com/1.1/statuses/home_timeline.json",sig)
Converting the json object
json1=content(homeTL)
json2=jsonlite::fromJSON(toJSON(json1))
json2[1,1:4]
httr allowsGET,POST,PUT,DELETErequests if you are authorized, httr works well with Facebook, Google, Twitter, Githb, etc.
5. Reading From other Sources
STATA, SPSS, SAS
Image
GIS
music