import re
from selenium import webdriver
import time
import urllib.request
driver = webdriver.Chrome("/home/h/Downloads/chromedriver")
driver.maximize_window()
driver.get("https://www.zhihu.com/question/29134042")
i = 0
while i < 10:
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
time.sleep(2)
try:
driver.find_element_by_css_selector("button.QuestionMainAction").click()
print("page"+str(i))
time.sleep(1)
except:
break
result_raw = driver.page_source
content_list = re.findall("img src=\"(.+?)\" ",str(result_raw))
n = 0
while n<len(content_list):
i = time.time()
local = (r"%s.jpg"%(i))
urllib.request.urlretrieve(content_list[n], local)
print("编号:"+str(i))
n = n + 1
需要注意的是
driver = webdriver.Chrome("/home/h/Downloads/chromedriver")
中的参数。
需要先到
https://sites.google.com/a/chromium.org/chromedriver/downloads
下载对应版本的chromedriver,解压后添加到路径中即可使用。