python2 str.encode("base64")爬坑

通过shodan的http.favicon.hash可以查询到使用同一favicon的网站，在查询网站真实IP和CMS框架时可发挥作用。

查询语法如下

http.favicon.hash:-1507567067

如果事先能知道网站的真实ip，那么可以通过shodan页面上的raw data获取到favicon的hash

http.favicon.hash

如果不知道IP，手上只有favicon，那么就只能通过脚本计算来得到hash

参考http://github.com/Ridter/get_ip_by_ico/blob/master/get_ip_by_ico.py 计算favicon.ico的hash值，但是脚本是python2的版本，由于该版本目前已过时。
找了下python2里str.encode()的c++源码，没看到完整的代码实现。
通过观察str.encode函数与base64.b64encode函数的执行结果，编写了如下内容：

import pymmh3
import requests
import base64
import re


def change_format(content):
    count = len(content) % 76
    items = re.findall(r".{76}", content)
    final_item = content[-count:]
    items.append(final_item)
    return "{0}\n".format("\n".join(items))


if __name__ == "__main__":
    response = requests.get('https://www.baidu.com/favicon.ico')
    if response.headers['Content-Type'] == "image/x-icon":
        favicon = base64.b64encode(response.content).decode('utf-8')
        hash = pymmh3.hash(change_format(favicon))
        print(hash)

上文的github链接中使用的mmh3需要c++14.0进行编译，且该链接使用的encode('base64')写法也已被淘汰，但是与一般的base64编码并不完全相同，函数执行后的值每隔76个字符就有一个\n，因此根据这个特征添加函数change_format进行处理获取favicon.ico的hash。

贴一下修改后的python3版本的get_ip_by_ico.py

#!/usr/bin/env python
# -*- coding: UTF-8 -*-
import pymmh3
import requests
import argparse
import re
import base64
from urllib.parse import urlparse
from shodan import Shodan


api = Shodan('YOUR-SHODAN-API-KEY')
LOGO = R"""
  ▄████ ▓█████▄▄▄█████▓ ██▓ ██▓███   ▄▄▄▄ ▓██   ██▓ ██▓ ▄████▄   ▒█████  
 ██▒ ▀█▒▓█   ▀▓  ██▒ ▓▒▓██▒▓██░  ██▒▓█████▄▒██  ██▒▓██▒▒██▀ ▀█  ▒██▒  ██▒
▒██░▄▄▄░▒███  ▒ ▓██░ ▒░▒██▒▓██░ ██▓▒▒██▒ ▄██▒██ ██░▒██▒▒▓█    ▄ ▒██░  ██▒
░▓█  ██▓▒▓█  ▄░ ▓██▓ ░ ░██░▒██▄█▓▒ ▒▒██░█▀  ░ ▐██▓░░██░▒▓▓▄ ▄██▒▒██   ██░
░▒▓███▀▒░▒████▒ ▒██▒ ░ ░██░▒██▒ ░  ░░▓█  ▀█▓░ ██▒▓░░██░▒ ▓███▀ ░░ ████▓▒░
 ░▒   ▒ ░░ ▒░ ░ ▒ ░░   ░▓  ▒▓▒░ ░  ░░▒▓███▀▒ ██▒▒▒ ░▓  ░ ░▒ ▒  ░░ ▒░▒░▒░ 
  ░   ░  ░ ░  ░   ░     ▒ ░░▒ ░     ▒░▒   ░▓██ ░▒░  ▒ ░  ░  ▒     ░ ▒ ▒░ 
░ ░   ░    ░    ░       ▒ ░░░        ░    ░▒ ▒ ░░   ▒ ░░        ░ ░ ░ ▒  
      ░    ░  ░         ░            ░     ░ ░      ░  ░ ░          ░ ░  
                                          ░░ ░         ░                                                          
"""


def change_format(content):
    count = len(content) % 76
    items = re.findall(r".{76}", content)
    final_item = content[-count:]
    items.append(final_item)
    return "{0}\n".format("\n".join(items))


def getfaviconhash(url):
    try:
        response = requests.get(url)
        if response.headers['Content-Type'] == "image/x-icon":
            favicon = base64.b64encode(response.content).decode('utf-8')
            hash = pymmh3.hash(change_format(favicon))
        else:
            hash = None
    except Exception:
        print("[!] Request Error")
        hash = None
    return hash


def queryshodan(url):
    o = urlparse(url)
    if len(o.path) >= 2:
        url = url
    else:
        url = url+"/favicon.ico"
    try:
        hash = getfaviconhash(url)
        if hash:
            query = "http.favicon.hash:{}".format(hash)
            count = api.count(query)['total']
            if count == 0:
                print("[-] No result")
            else:
                print("[+] Try to get {} ip.".format(count))
                for hosts in api.search_cursor(query):
                    print("[+] Get ip: " + hosts['ip_str'])
        else:
            print("[!] No icon find.")
    except Exception as e:
        print("[!]{0}".format(str(e)))
    except KeyboardInterrupt:
        print("[*] Shutting down...")


def main():
    parser = argparse.ArgumentParser(
        description='Get ip list which using the same favicon.ico from shodan')
    parser.add_argument("-u", "--url", metavar='url',
                        help="the favicon.ico website url,example:http://www.baidu.com/", required=True)
    passargs = parser.parse_args()
    queryshodan(passargs.url)


if __name__ == '__main__':
    print(LOGO)
    main()

参考链接：

python2 str.encode("base64")爬坑

python2 str.encode("base64")爬坑

python2 str.encode("base64")爬坑

相关阅读更多精彩内容

友情链接更多精彩内容