#coding:utf-8
import os
import sys
import tld
import time
import chardet
import get_header
import random
import socket
import requests
import builtwith
import dns.resolver
import urllib2
import pymongo
import urlparse
import mongo
from BeautifulSoup import BeautifulSoup
#from Config import FileConfig
add_time = time.strftime('%Y-%m-%d',time.localtime(time.time()))
class Url_Check(object):
def __init__(self, url):
super(Url_Check, self).__init__()
self.cdninfo()
self.url = url
self.cnames = []
self.headers = []
def get_cnames(self): # get all cname
furl = urlparse.urlparse(self.url)
url = furl.netloc
# print url
rsv = dns.resolver.Resolver()
# rsv.nameservers = ['114.114.114.114']
try:
answer = dns.resolver.query(url,'CNAME')
except Exception as e:
self.cnames = None
# print "ERROR: %s" % e
else:
cname = [_.to_text() for _ in answer][0]
self.cnames.append(cname)
self.get_cname(cname)
def get_cname(self,cname): # get cname
try:
answer = dns.resolver.query(cname,'CNAME')
cname = [_.to_text() for _ in answer][0]
self.cnames.append(cname)
self.get_cname(cname)
except dns.resolver.NoAnswer:
pass
#----------------------------------------------------------------------
def conn_url(self):
""""""
try:
req = urllib2.Request(self.url,headers=get_header.get_header())
resp = urllib2.urlopen(req)
except Exception as e:
print '[-] self.url:' + self.url
print str(e)
else:
return resp
def get_headers(self): # get header
try:
resp = self.conn_url()
except Exception as e:
self.headers = None
# print "ERROR: %s" % e
else:
headers = str(resp.headers).lower()
self.headers = headers
#----------------------------------------------------------------------
def get_ip(self):
""""""
try:
domain_url = str(self.url.strip())[7:]
ip_url = socket.getaddrinfo(domain_url,'http')[0][4][0]
#ip_url = socket.gethostbyname(url)
return ip_url
except Exception,e:
pass
#----------------------------------------------------------------------
def get_title(self):
""""""
try:
html = urllib2.urlopen(self.url).read()
encoding = str(chardet.detect(html)['encoding'])
if encoding == 'GB2312':
soup = BeautifulSoup(html,fromEncoding="GB18030")
else:
soup = BeautifulSoup(html,fromEncoding=encoding)
#print url.strip() +':'+ str(encoding) +':'+ soup.title.string
return soup.title.string
except Exception,e:
print str(e)
#----------------------------------------------------------------------
def get_cms_url(self):
""""""
try:
cms_url = builtwith.parse(self.url)
except Exception,e:
pass
else:
#print 'cms_rule succccesss'
return cms_url
#----------------------------------------------------------------------
def matched(self, context, *args): # Matching string
if not isinstance(context, basestring):
context = str(context)
func = lambda x, y: y in x
# if any(func(context, pattern) for pattern in args):
# return True
# else:
# return False
for pattern in args:
if func(context,pattern):
return pattern
return False
def check(self):
try:
flag = None
self.get_cnames()
self.get_headers()
if self.cnames:
# print self.cnames
flag = self.matched(self.cnames,*self.cdn['cname'])
if flag:
print '[+] ' + self.url + flag
return {'Status':True, 'CDN':self.cdn['cname'].get(flag)}
if not flag and self.headers:
flag = self.matched(self.headers,*self.cdn['headers'])
if flag:
return {'Status':True, 'CDN':'unknown'}
return {'Status':False, 'CNAME':self.cnames, 'Headers':self.headers}
except Exception,e:
pass
def cdninfo(self):
self.cdn = {
'headers': set([
#----------------------------------------------------------------------
def update_mongo(self):
""""""
cms_url = self.get_cms_url()
title_url = self.get_title()
ip_url = self.get_ip()
cdn_url = self.check()
mongo.ls_Info.update({"URL":self.url},
{"$set": {'add_time':add_time,'title':title_url,
'IP':ip_url,'Info':cms_url,'CDN':cdn_url}},
upsert = True)
print self.url + ' end'
if __name__ == '__main__':
#url = 'http://www.163.com'
with open('test.txt') as f:
u = f.readlines()
for uu in u:
url = uu.strip('\r').strip('\n')
print url
cdn = Url_Check(url)
print cdn.check()
python 网站CDN
最后编辑于 :
©著作权归作者所有,转载或内容合作请联系作者
- 文/潘晓璐 我一进店门,熙熙楼的掌柜王于贵愁眉苦脸地迎上来,“玉大人,你说我怎么就摊上这事。” “怎么了?”我有些...
- 文/花漫 我一把揭开白布。 她就那样静静地躺着,像睡着了一般。 火红的嫁衣衬着肌肤如雪。 梳的纹丝不乱的头发上,一...
- 文/苍兰香墨 我猛地睁开眼,长吁一口气:“原来是场噩梦啊……” “哼!你这毒妇竟也来了?” 一声冷哼从身侧响起,我...
推荐阅读更多精彩内容
- 作为新人,对于如何学好PYTHON也是一头雾雨,也很想能得到别人的帮助。今天看到这篇文章,感觉学习起来有了一个方向...
- 目的:使用爬虫抓取网站异步加载数据 part1:什么是异步加载? 异步加载即网页上没有页码跳转按钮,鼠标往下滚即可...