python2 str.encode("base64")爬坑
通过shodan的http.favicon.hash可以查询到使用同一favicon的网站,在查询网站真实IP和CMS框架时可发挥作用。
查询语法如下
http.favicon.hash:-1507567067
如果事先能知道网站的真实ip,那么可以通过shodan页面上的raw data
获取到favicon的hash
如果不知道IP,手上只有favicon,那么就只能通过脚本计算来得到hash
参考http://github.com/Ridter/get_ip_by_ico/blob/master/get_ip_by_ico.py 计算favicon.ico的hash值,但是脚本是python2的版本,由于该版本目前已过时。
找了下python2里str.encode()
的c++源码,没看到完整的代码实现。
通过观察str.encode
函数与base64.b64encode
函数的执行结果,编写了如下内容:
import pymmh3
import requests
import base64
import re
def change_format(content):
count = len(content) % 76
items = re.findall(r".{76}", content)
final_item = content[-count:]
items.append(final_item)
return "{0}\n".format("\n".join(items))
if __name__ == "__main__":
response = requests.get('https://www.baidu.com/favicon.ico')
if response.headers['Content-Type'] == "image/x-icon":
favicon = base64.b64encode(response.content).decode('utf-8')
hash = pymmh3.hash(change_format(favicon))
print(hash)
上文的github链接中使用的mmh3需要c++14.0进行编译,且该链接使用的encode('base64')
写法也已被淘汰,但是与一般的base64编码并不完全相同,函数执行后的值每隔76个字符就有一个\n
,因此根据这个特征添加函数change_format
进行处理获取favicon.ico的hash。
贴一下修改后的python3版本的get_ip_by_ico.py
#!/usr/bin/env python
# -*- coding: UTF-8 -*-
import pymmh3
import requests
import argparse
import re
import base64
from urllib.parse import urlparse
from shodan import Shodan
api = Shodan('YOUR-SHODAN-API-KEY')
LOGO = R"""
▄████ ▓█████▄▄▄█████▓ ██▓ ██▓███ ▄▄▄▄ ▓██ ██▓ ██▓ ▄████▄ ▒█████
██▒ ▀█▒▓█ ▀▓ ██▒ ▓▒▓██▒▓██░ ██▒▓█████▄▒██ ██▒▓██▒▒██▀ ▀█ ▒██▒ ██▒
▒██░▄▄▄░▒███ ▒ ▓██░ ▒░▒██▒▓██░ ██▓▒▒██▒ ▄██▒██ ██░▒██▒▒▓█ ▄ ▒██░ ██▒
░▓█ ██▓▒▓█ ▄░ ▓██▓ ░ ░██░▒██▄█▓▒ ▒▒██░█▀ ░ ▐██▓░░██░▒▓▓▄ ▄██▒▒██ ██░
░▒▓███▀▒░▒████▒ ▒██▒ ░ ░██░▒██▒ ░ ░░▓█ ▀█▓░ ██▒▓░░██░▒ ▓███▀ ░░ ████▓▒░
░▒ ▒ ░░ ▒░ ░ ▒ ░░ ░▓ ▒▓▒░ ░ ░░▒▓███▀▒ ██▒▒▒ ░▓ ░ ░▒ ▒ ░░ ▒░▒░▒░
░ ░ ░ ░ ░ ░ ▒ ░░▒ ░ ▒░▒ ░▓██ ░▒░ ▒ ░ ░ ▒ ░ ▒ ▒░
░ ░ ░ ░ ░ ▒ ░░░ ░ ░▒ ▒ ░░ ▒ ░░ ░ ░ ░ ▒
░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░ ░
░░ ░ ░
"""
def change_format(content):
count = len(content) % 76
items = re.findall(r".{76}", content)
final_item = content[-count:]
items.append(final_item)
return "{0}\n".format("\n".join(items))
def getfaviconhash(url):
try:
response = requests.get(url)
if response.headers['Content-Type'] == "image/x-icon":
favicon = base64.b64encode(response.content).decode('utf-8')
hash = pymmh3.hash(change_format(favicon))
else:
hash = None
except Exception:
print("[!] Request Error")
hash = None
return hash
def queryshodan(url):
o = urlparse(url)
if len(o.path) >= 2:
url = url
else:
url = url+"/favicon.ico"
try:
hash = getfaviconhash(url)
if hash:
query = "http.favicon.hash:{}".format(hash)
count = api.count(query)['total']
if count == 0:
print("[-] No result")
else:
print("[+] Try to get {} ip.".format(count))
for hosts in api.search_cursor(query):
print("[+] Get ip: " + hosts['ip_str'])
else:
print("[!] No icon find.")
except Exception as e:
print("[!]{0}".format(str(e)))
except KeyboardInterrupt:
print("[*] Shutting down...")
def main():
parser = argparse.ArgumentParser(
description='Get ip list which using the same favicon.ico from shodan')
parser.add_argument("-u", "--url", metavar='url',
help="the favicon.ico website url,example:http://www.baidu.com/", required=True)
passargs = parser.parse_args()
queryshodan(passargs.url)
if __name__ == '__main__':
print(LOGO)
main()
参考链接: