par(no.readonly = T)
显示绘图的全局参数
RISmed包,挖掘期刊和词图绘制
library(RISmed)
search_topic <- c('("Heart"[Title/Abstract] OR "Heart"[MeSH Terms])')
search_query <- EUtilsSummary(search_topic,db='pubmed',
retmax=100,datetype='pdat',
mindate=2019,maxdate=2020
)
#retmax设置最大获取量,mindate和maxdate设置检索时间
#查看检索内容和文献的pmid
summary(search_query)
QueryId(search_query)
#EUtilsGet爬取文献信息
records <- EUtilsGet(search_query)
#信息挖掘,此处以找出发表最多的杂志为例
library(tidyverse)
#获取第一篇摘要信息
records@AbstractText[1]
#获取第一篇文章类型
records@PublicationType[1]
#提取检索结果,用tibble搜集数据
pubmed <- tibble('Title'=ArticleTitle(records),
'Year'=YearPubmed(records),
'journal'=ISOAbbreviation(records))
#作图查看论文发表最多的杂志
library(ggplot2)
ggplot(pubmed,aes(fct_infreq(journal)))+ #因子按照频率排序
geom_bar()+
coord_flip()+
theme_classic()
mesh主题词提取及词云绘制
#mesh词提取
word <- records@Mesh
word <- word[!is.na(word)]
#去除重复的mesh词
distinct(word[[1]],Heading,keep_all=T) #去除第一篇的重复的mesh词
word <- lapply(word,distinct,Heading,.keep_all=T)
#批量提取第一列词
wordtable <- list()
for (i in 1:length(word)){
wordtable[[i]] = word[[i]][,1]
}
#计算词频
wordcd <- table(unlist(wordtable))
#词云可视化
library(wordcloud2)
wordcloud2(wordcd) #交互性词云
library(wordcloud)
library(RColorBrewer)
wordcd <- as.data.frame(wordcd)
wordcloud(wordcd$Var1,wordcd$Freq,col=rev(brewer.pal(7,'Set2')))
pubmed.mineR包爬取下载好的摘要信息
1.pubmed下载摘要信息
#pubmed.mineR包
library(pubmed.mineR)
#导入下载好的摘要
pubmed_abstracts <- readabs('abstract-Dimethylfu-set.txt')
#查看杂志信息
pubmed_abstracts@Journal[1:10]
#查看第一篇摘要信息
pubmed_abstracts@Abstract[1]
#设置分词参数
Sys.setlocale('LC_ALL','C')
#摘要文本分词
abswords <- word_atomizations(pubmed_abstracts)
#可视化前20个
library(ggpubr)
ggdotchart(abswords[1:50,],x='words',y='Freq', #显示前50个高频词
sorting='descending',
add = 'segments',
ggtheme=theme_pubr(),
rotate=TRUE
)