'''
'''
糗事百科爬虫
1.抓取糗事百科段子
2.过滤带有图片的段子
3.实现每按一次回车键显示一个段子的发布时间,发布人,段子内容,点赞数.
'''
importrequests
frombs4importBeautifulSoup
url='http://www.qiushibaike.com/8hr/page/1/'#后面数字是页码
#首先定义一个抓取url链接内容的函数url_down
defurl_down(url):
headers={'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.89 Safari/537.36'}
try:
data=requests.get(url,headers=headers)
data.encoding='utf-8'
returndata.text
except:
print('访问出错!')
return None
html=url_down(url)
#定义段子内容抓取函数,发布时间,发布人,段子内容,点赞数
# def get_content(html):
soup=BeautifulSoup(html,'lxml')
all_jokes=soup.find_all('div',{'class':'article block untagged mb15'})
forjokesinall_jokes:
print(jokes)
'''