import os import urllib.request from lxml import etree #下载数据 def download_img(src_list, name_list): dir_path="./bf-girl" for i in range(len(name_list)): #截取后缀名 suffix=os.path.splitext(src_list[i])[-1] #得到图片全路径 file_path=os.path.join(dir_path,name_list[i]+suffix) try: #下载 urllib.request.urlretrieve(src_list[i],file_path) print("%s---download finsh" %file_path) except Exception as e: print("%s---file missing!" % file_path) #发送请求并获取响应数据以及通过Xpath解析数据 def get_data(req): #发起请求 res=urllib.request.urlopen(req) html=res.read().decode("utf-8") html_etree=etree.HTML(html) src_list= html_etree.xpath("//div[@id='container']/div/div/a/img/@src2") name_list = html_etree.xpath("//div[@id='container']/div/div/a/img/@alt") # print(len(src_list)) download_img(src_list,name_list) #构建请求对象 def build_req(url): headers ={ 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36' } req = urllib.request.Request(url=url,headers=headers) return req def main(): start_page=int(input("请输入起始页码:")) end_page = int(input("请输入结束页码:")) #临时地址 tem_url = "http://sc.chinaz.com/tupian/rentixiezhen" for page in range(start_page,end_page+1): if page != 1: url = tem_url+"_"+str(page)+".html" else: url = tem_url+".html" req=build_req(url) get_data(req) if __name__ == "__main__": main()
Xpath网上批量下载
©著作权归作者所有,转载或内容合作请联系作者
- 文/潘晓璐 我一进店门,熙熙楼的掌柜王于贵愁眉苦脸地迎上来,“玉大人,你说我怎么就摊上这事。” “怎么了?”我有些...
- 文/花漫 我一把揭开白布。 她就那样静静地躺着,像睡着了一般。 火红的嫁衣衬着肌肤如雪。 梳的纹丝不乱的头发上,一...
- 文/苍兰香墨 我猛地睁开眼,长吁一口气:“原来是场噩梦啊……” “哼!你这毒妇竟也来了?” 一声冷哼从身侧响起,我...