1、工作需求
之前运营同事让我协助查询订单状态,就是通过渠道给的几百个某护肤品牌官网的账号密码,一个个登陆进去查看某个订单的订单状态并记录。
刚好最近研究爬虫学习了Selenium的相关知识,于是写了一段脚本,实现了账户的自动登陆以及订单状态记录。
需求概述:通过账号密码查询某订单号的订单状态
2、解决思路
3、实现脚本以及解析
import time
from lxml import etree
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.common.exceptions import ElementNotInteractableException
def get_order_statu(user,password,order_number):
url = 'https://www.***.cn/'
browser = webdriver.Chrome()
browser.get(url)
# 等待一段时间,页面加载完成,也可通过显式等待或隐式等待实现
time.sleep(5)
try:
# 进入搜页可能有弹窗广告,找到关闭按钮,并点击
close_but = browser.find_element_by_xpath("//div[@class='closeButton']")
close_but.click()
except:
pass
# 找到账户密码框,输入账号密码,并登陆
login_button = browser.find_element_by_xpath("//div[@id='root']//span[@class='top-content-left top-logged-out']/a[2]")
login_button.click()
user_input = browser.find_element_by_xpath("//div/input[@placeholder='手机号/邮箱']")
user_input.clear()
user_input.send_keys(user)
password_input = browser.find_element_by_xpath("//div/input[@placeholder='密码']")
password_input.clear()
password_input.send_keys(password)
password_input.send_keys(Keys.ENTER)
time.sleep(5)
# 点击打开订单页
my_order = browser.find_element_b
my_own_button = browser.find_element_by_xpath("//div//span[@class='top-content-right']//a[@class='top-content-right-content']")
my_own_button.click()y_link_text('我的订单')
my_order.click()
time.sleep(5)
# 切换到订单页所在的选项卡
browser.switch_to.window(browser.window_handles[1])
source = browser.page_source
# 解析订单页源码,获取订单状态
html = etree.HTML(source)
lis = html.xpath("//ul[@class='OrderListDetail']/li")
for li in lis:
# 通过订单号比对,找到所需订单号的状态
if li.xpath(".//div[@class='orderTitle']/p[2]/text()")[0]==order_number:
order_statu = li.xpath(".//div[@class='statusTypeTable']/div/p/text()")[0]
order_price = li.xpath(".//div[@class='totalTypeTable']/div/p[1]/text()")
result = [user,order_statu,order_price]
else:
pass
results.append(result)
# 退出浏览器,browser.close()只是关闭当前页面
browser.quit()
def main():
# 读取账号密码的Excel文件
data = pd.read_excel('abc.xlsx')
data_list = data.values.tolist()
results = []
for dl in data_list:
user = str(dl[0])
password = str(dl[1])
order_number = str(dl[2])
try:
get_order_statu(user,password,order_number)
except ElementNotInteractableException:
pass
# 将结果list转换为DataFrame,并储存成csv文件
dt = pd.DataFrame(results, columns=['user','order_statu'])
# 文件结果为乱码,故将编码格式改为utf_8_sig
dt.to_csv('results.csv', encoding='utf_8_sig')
if __name__ == '__main__':
main()
4、新的需求
有的时候需要查询某账号当天是否下单以及下单情况,无法给到订单号,想到的思路就是:通过比对订单日期,找到当天的下的订单以及相关的订单信息。
5、实现脚本以及解析
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import time,csv
from lxml import etree
import pandas as pd
from selenium.common.exceptions import ElementNotInteractableException
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
# 新建存储数据的csv文件,并输入表头
fp = open('result.csv','a',newline='',encoding='utf_8_sig')
writer = csv.writer(fp)
writer.writerow(('user', 'order_number', 'order_price','order_statu'))
def get_order_statu(user, password):
url = 'https://www.***.cn/'
browser = webdriver.Chrome()
browser.get(url)
# 设立隐式等待时间为10S
wait = WebDriverWait(browser,10)
try:
close_but = wait.until(EC.presence_of_element_located((By.XPATH,"//div[@class='closeButton']")))
close_but.click()
except:
pass
# 找到账户密码框,输入账号密码,并登陆
login_button = wait.until(EC.presence_of_element_located((By.XPATH,"//div[@id='root']//span[@class='top-content-left top-logged-out']/a[2]")))
login_button.click()
user_input = wait.until(EC.presence_of_element_located((By.XPATH,"//div/input[@placeholder='手机号/邮箱']")))
user_input.clear()
user_input.send_keys(user)
password_input = browser.find_element_by_xpath("//div/input[@placeholder='密码']")
password_input.clear()
password_input.send_keys(password)
password_input.send_keys(Keys.ENTER)
time.sleep(5)
# 点击打开订单页
my_own_button = browser.find_element_by_xpath(
"//div//span[@class='top-content-right']//a[@class='top-content-right-content']")
my_own_button.click()
my_order = browser.find_element_by_link_text('我的订单')
my_order.click()
time.sleep(5)
# 切换到订单页所在的选项卡
browser.switch_to.window(browser.window_handles[1])
source = browser.page_source
# 解析订单页源码,获取订单状态
html = etree.HTML(source)
lis = html.xpath("//ul[@class='OrderListDetail']/li")
for li in lis:
dates = li.xpath(".//div[@class='orderTitle']/p[3]/text()")[0][:10]
# 验证订单日期
if dates == '2019-11-01':
order_number = li.xpath(".//div[@class='orderTitle']/p[2]/text()")[0]
order_price = li.xpath(".//div[@class='totalTypeTable']/div/p[1]/text()")[0]
order_statu = li.xpath(".//div[@class='statusTypeTable']/div/p/text()")[0]
result = [user, "'"+str(order_number),order_price, order_statu]
# 将结果写入csv文件
writer.writerow((result))
else:
pass
browser.quit()
def main():
# 读取账号密码
data = pd.read_excel('abc.xlsx')
data_list = data.values.tolist()
for dl in data_list:
user = str(dl[0])
password = str(dl[1])
try:
get_order_statu(user,password)
except ElementNotInteractableException:
pass
if __name__ == '__main__':
main()