在速度上selenium慢的真不是一点点 啊,几何倍数;,,,,,不想说什么
测试了两个两部车的UA也都各有特色,也就是说这两辆车也是需要伪装的;
chrome UA
User-Agent: Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) HeadlessChrome/65.0.3325.181 Safari/537.36
phantomjs UA
User-Agent: Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/538.1 (KHTML, like Gecko) PhantomJS/2.1.1 Safari/538.1
速度测试源码
from seleniumimport webdriver
import time
from threadingimport Thread
import requests
url ="https://www.taobao.com/markets/nanzhuang/jiushijiu?spm=a21bo.2017.201860.1.5af911d96sPN27&scm=20140637.1.61496.222272"
def re():
start = time.time()
content = requests.get(url).text
print('requests总共耗时%f秒' % (time.time() - start))
def phan():
start = time.time()
browser = webdriver.PhantomJS()
browser.get(url)
content = browser.page_source
print('phantomjs总共耗时%f秒' % (time.time() - start))
def chrome():
start = time.time()
options = webdriver.ChromeOptions()
options.add_argument('headless')
browser = webdriver.Chrome(chrome_options=options)
browser.get(url)
content = browser.page_source
print('chrome总共耗时%f秒' % (time.time() - start))
def main():
t1 = Thread(target=phan)
# 开启phantomjs线程
t1.start()
t2 = Thread(target=chrome)
# 开启headless chrome线程
t2.start()
t3 = Thread(target=re)
# 开启requests线程
t3.start()
if __name__ =='__main__':
main()