环境准备
- Python 2.7
- BeautifulSoup
BeautifulSoup 安装
教程
实现代码
import urllib2
import types
from bs4 import BeautifulSoup
def parseHtml(startPageUrl):
req=urllib2.Request(startPageUrl)
response=urllib2.urlopen(req)
html=response.read()
soup = BeautifulSoup(html)
tags=soup.findAll('a')
for tag in tags:
if isinstance(tag.get('class'),list):
if 'layer-view' in tag.get('class'):
lastMax= tag.get('href')[6:-1]
print "http:"+tag.find('img')['src']
return lastMax
startPageUrl="https://huaban.com/explore/hua/?j450erym&max=0&limit=20&wfl=1"
lastMax=parseHtml(startPageUrl)
while ((not lastMax) == False):
print lastMax
startPageUrl='https://huaban.com/explore/hua/?j450erym&max='+lastMax+'&limit=20&wfl=1'
lastMax=parseHtml(startPageUrl)