1、安装 selinium 和 chrome 浏览器
# pip install selenium
# wget https://dl.google.com/linux/direct/google-chrome-stable_current_amd64.deb
# dpkg -i google-chrome-stable_current_amd64.deb
## 可以看到 chrome 浏览器的版本是 126.0.6478.126-1
# dpkg -l | grep chrome
ii google-chrome-stable 126.0.6478.126-1 amd64 The web browser from Google
2、安装 chromedriver
chromedriver 的版本要和 chrome 浏览器对应,比如都要是 126.xxx.xxx.xxx
根据上面的安装的 chrome 浏览器版本 126.0.6478.126-1 ,从 https://googlechromelabs.github.io/chrome-for-testing/#stable 下载对应的 chromedriver 版本,比如 https://storage.googleapis.com/chrome-for-testing-public/126.0.6478.126/linux64/chromedriver-linux64.zip
# cd /opt/
# wget https://storage.googleapis.com/chrome-for-testing-public/126.0.6478.126/linux64/chromedriver-linux64.zip
# unzip chromedriver-linux64.zip
# ls /opt/chromedriver-linux64/chromedriver
3、实现爬取的代码demo
# cat get_bn_listing_demo.py
from selenium.webdriver.chrome.service import Service
from selenium import webdriver
from selenium.webdriver.common.by import By
import time
import pandas as pd
from selenium.webdriver.chrome.options import Options
import datetime as dtdt
def main():
# 设置Chrome浏览器无头模式
options = Options()
options.add_argument("--headless")
options.add_argument('--no-sandbox')
options.add_argument('--disable-dev-shm-usage')
# 第 2 步下载的 chromedriver 路径
chromedriver_path = "/opt/chromedriver-linux64/chromedriver"
driver = webdriver.Chrome(service=Service(executable_path=chromedriver_path), options=options)
# 要爬取的示例网页地址,获取最新公告的 时间 和 标题
url = f"https://www.binance.com/en/support/announcement/new-cryptocurrency-listing?c=48&navId=48&hl=en"
titles = []
dts = []
driver.get(url)
time.sleep(5)
title = driver.find_elements(By.CLASS_NAME, 'css-1yxx6id')
for t in title:
titles.append(t.text)
# print(titles)
dt = driver.find_elements(By.CLASS_NAME, 'css-eoufru')
for t in dt:
dts.append(t.text)
# print(dts)
driver.quit()
row = {
"title": titles,
"datetime": dts,
}
df = pd.DataFrame(row)
# print(df)
filtered_df = df[df['title'].str.contains('Will List')]
print(filtered_df)
for index, row in filtered_df.iterrows():
print(f"Title: {row['title']}")
print(f"Date: {row['datetime']}")
if __name__ == "__main__":
main()
4、运行结果
5、一些问题
如何在xshell中运行代码,可能会弹出X11转发请求的窗口 ,根据提示关闭就行