判断网站是http://1.1.1.1还是http://www.xx.com,利用tld模块
获取网站ip方法一
import ipgetter
import requests
IP = ipgetter.myip()
url = 'http://freegeoip.net/json/'+IP
r = requests.get(url)
js = r.json()
print 'IP Adress: ' + js['ip']
print 'Country Code: ' + js['country_code']
print 'Country Name: ' + js['country_name']
print 'Region Code: ' + js['region_code']
print 'Region Name: ' + js['region_name']
print 'City Name: ' + js['city']
print 'Zip code: ' + js['zip_code']
print 'Time Zone: ' + js['time_zone']
print 'Latitude: ' + str(js['latitude'])
print 'Longitude: ' + str(js['longitude'])
获取网站ip方法二
import socket
import tld
print tld.get_tld(url)
from Config import FileConfig
from url_list import get_url_list
def get_ip(domain):
import socket
ip_url = socket.getaddrinfo(domain,'http')[0][4][0]
return ip_url
for i in get_url_list(FileConfig.URLFILE):
tt = 'www.' + tld.get_tld(i)
print tt
print get_ip(tt)
匹配网站地址为ip地址
import tld
a = []
import re
g = open('url.txt','w+')
#----------------------------------------------------------------------
def ceshi_url(url_file):
""""""
with open(url_file) as w:
urls = w.readlines()
for i in urls:
i = i.strip('\n').strip('\r')
reip = re.compile(r"(?<![0-9.])((2[0-4][0-9]|25[0-5]|[01]?[0-9]{1,2})\.){3}(2[0-4][0-9]|25[0-5]|[01]?[0-9]{1,2})(?![0-9.])")
if re.search(reip,i):
print i
else:
g.writelines(i)
g.writelines('\n')
ceshi_url('kehu.txt')
#coding:utf-8
import requests
import re
import pymongo
connection=pymongo.MongoClient('127.0.0.1',27017)
db=connection.wangzuxian
collection=db.page_404
########################################################################
class page_404(object):
""""""
#----------------------------------------------------------------------
def __init__(self, url):
"""Constructor"""
super(page_404, self).__init__()
self.url = url
self.page_waf_name = ["非法","防火墙","安全狗","SafeDog","管理员","云防御","365","创宇盾"]
self.test_path1 = '/nikendingbuzhidaowoyaoganma.html'
self.test_path2 = '/nikendingbuzhidaowoyaoganma.asp'
#self.keyWord_list = []
#----------------------------------------------------------------------
def page_content(self):
""""""
r1 = requests.get(self.url + self.test_path1)
r2 = requests.get(self.url + self.test_path2)
#print r1.content
#print 'r1'
#print r2.content
return r1.content,r2.content
#----------------------------------------------------------------------
def keyWord_page(self):
""""""
keyWord_list = []
r1 ,r2 = self.page_content()
#print r1
#print type(self.page_waf_name)
for i in self.page_waf_name:
#print i
m = re.findall(str(i),str(r1))
if len(m) != 0:
for i in m:
print i
collection.update({"URL":self.url},
{"$set": {'WAF' + i : i }},
upsert = True)
#keyWord_list.append(i)
#print keyWord_list
#return keyWord_list
#----------------------------------------------------------------------
def cmp_page(self):
""""""
r1,r2 = self.page_content()
try:
if cmp(r1,r2) == 0:
print '自定义404'
collection.update({"URL":self.url},
{"$set": {'404_page':'True'}},
upsert = True)
except Exception,e:
print str(e)
headers
import requests
resp = requests.get('http://www.4399.com')
k_list = []
v_list = []
for k in resp.headers.keys():
print k + '----' + resp.headers[k]