最近用某在线自建站做了个网站,但是苦于 百度自动推 功能模块需要多给钱,这就不爽了 。图方便建站给你钱可以,但是就这个自动推送一下还想收钱那是万万不得,于是就自己用 python 撸了个百度自动推送出来!
下面附上代码:
import requests
import re
import time
from bs4 import BeautifulSoup as Bs4
# 百度收录的 token
baidu_token = '你的百度收录 Token'
# 网站站点
head_url = "http://www.你的网站.com"
# 判断本站 URL 的正则表达式
self_url_reg = 'http.*你的网站\.com'
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36"
}
def baidu_push( url ):
hostname = re.findall('://.*\.com',url)[0]
hostname = hostname[3:]
print(hostname)
res = requests.post('http://data.zz.baidu.com/urls?site='+hostname+'&token='+baidu_token,data=url)
return(res.text)
def get_url(urls,all_url=[]):
new_href = []
for url in urls:
# 排除上级连接
if(url in all_url):
continue
# 排除其他网站
if(not re.match(self_url_reg,url)):
continue
reaponse = requests.get(url, headers=headers)
print('---->'+url,baidu_push(url))
all_url.append(url)
soup = Bs4(reaponse.text, "lxml")
links = soup.select("a")
for link in links:
url_href = link.get("href")
if(url_href == None):
continue
if(re.match('.*javascript\:.*(\)|;)',url_href) or re.match('(mail|tel)\:',url_href)):
continue
url_href = re.sub('\/$','',url_href)
if(url_href[:2] == '//'):
url_href = 'http:' + url_href
if(url_href[:4] != 'http'):
if(url_href[:1] == '/'):
url_href = head_url + url_href
else:
url_href = head_url +'/' + url_href
# 排除上级连接
if(url_href in all_url or url_href in new_href ):
continue
# 排除其他网站
if(not re.match(self_url_reg,url_href)):
continue
# print(url_href +' ' + str(url_href in all_url or url_href in new_href) )
# time.sleep(2)
new_href.append(url_href)
new_href = list(set(new_href))
if(len(new_href)>0):
# 将新地址传入递归,拿到结果后往上传
return get_url(new_href,all_url)
# 递归完后,把最终结果网上传递回归
return all_url
if __name__ == "__main__":
urllist = get_url([head_url])
postData = ''
for url in urllist:
postData = postData + url +'\n'
print(postData)