# GEO数据下载,数据处理,基因与ID的转换
```
library(GEOquery) #没下载包的自行下载
##本地已经下载好了的数据
get = getGEO(filename = "./data/GSE13535_series_matrix.txt/GSE13535_series_matrix.txt")
GPL=getGEO(filename = './data/GPL/GPL1355.soft')
exprset = exprs(get)
```
![转换前表达矩阵.png](https://upload-images.jianshu.io/upload_images/13573980-5aab3e42e7627c36.png?imageMogr2/auto-orient/strip%7CimageView2/2/w/1240)
```
exgpl=Table(GPL)
#exgpl2 = pData(get)
exprset = log2(exprset+1) # 处理数据
ids = exgpl[,c("ID","Gene Symbol")]
colnames(ids) = c("probe_id" ,"symbol") # 给ids列重新命名
save(ids,exprset,exgpl2,file = 'GSE13535input.Rdata')
length(unique(ids$symbol))
tail(sort(table(ids$symbol)))
table(sort(table(ids$symbol)))
#####开始转换数据了
table(rownames(exprset) %in% ids$probe_id)
dim(exprset)
expreSet=exprset[rownames(exprset) %in% ids$probe_id,]
dim(exprset)
dim(ids)
ids=ids[match(rownames(exprset),ids$probe_id),]
dim(ids)
head(ids)
expreSet[1:5,1:5]
tmp = by(exprset,ids$symbol,
function(x) rownames(x)[which.max(rowMeans(x))])
probes = as.character(tmp)
dim(exprset)
exprset=exprset[rownames(exprset) %in% probes ,] # 过滤
dim(exprset)#15249 22
rownames(exprset)=ids[match(rownames(exprset),ids$probe_id),2]
exprset[1:5,1:5]
```
![转换后表达矩阵.png](https://upload-images.jianshu.io/upload_images/13573980-74126181898008c1.png?imageMogr2/auto-orient/strip%7CimageView2/2/w/1240)
####参考链接1(http://www.bio-info-trainee.com/1399.html)
####参考链接2(https://github.com/jmzeng1314/5years/blob/master/learn-R/tasks/3-r-20-codes.R)