背景
一些类似的项目会使用重复的代码,导致苹果机审期间被判断为马甲包,还没到人审就被苹果审核拒掉了。
为了能够使正常迭代出功能相似的项目过审,要对我们项目的代码进行深度混淆。
方案
1、准备四六级单词库(如果使用随机字符会被机审查出来);
2、使用clang过滤出类名和方法名;
3、从四六级单词库随机组成,映射对应的类名和方法名;
4、通过映射进行混淆操作。
四六级单词库
区分首字母小写和大写的txt
安装clang
pip install clang --user
类名混淆
提取类名
# encoding: utf-8
import sys
import os
import re
import clang
from clang.cindex import *
from optparse import OptionParser, OptionGroup
def get_tu(source, lang='c', all_warnings=False, flags=[]):
"""Obtain a translation unit from source and language.
By default, the translation unit is created from source file "t.<ext>"
where <ext> is the default file extension for the specified language. By
default it is C, so "t.c" is the default file name.
Supported languages are {c, cpp, objc}.
all_warnings is a convenience argument to enable all compiler warnings.
"""
args = list(flags)
name = 't.c'
if lang == 'cpp':
name = 't.cpp'
args.append('-std=c++11')
elif lang == 'objc':
name = 't.m'
elif lang != 'c':
raise Exception('Unknown language: %s' % lang)
if all_warnings:
args += ['-Wall', '-Wextra']
return TranslationUnit.from_source(name, args, unsaved_files=[(name,
source)])
def generate_m_file(file_text, result_lines, ret_functions):
//略...
if __name__ == '__main__':
libclangPath = '/Library/Developer/CommandLineTools/usr/lib/libclang.dylib'
Config.set_library_file(libclangPath)
# Find all .h files
source_dir = sys.argv[1]
g = os.walk(source_dir)
h_files = []
ret_functions = []
for path,dir_list,file_list in g:
for file_name in file_list:
h_files.append(os.path.join(path, file_name))
for f in h_files:
with open(f, 'r') as file:
# preprocess
regex = r'#import|#include|#ifdef|#ifndef|#define|#endif|@property'
text = ''
result_text_lines = []
line_count = 0
def_block_count = 0
for line in file:
if re.findall(regex, line):
if '#ifdef' in line:
def_block_count += 1
result_text_lines.append(line)
elif '#endif' in line and def_block_count > 0:
def_block_count -= 1
result_text_lines.append(line)
else:
result_text_lines.append('\n')
line = '// ' + line
text += line
else:
text += line
result_text_lines.append('\n')
line_count += 1
# print text
m_file_name = os.path.join('fake', f.replace('.h', '.m'))
m_file_to_write = ""
# print 'processing: ' + m_file_name
generate_m_file(text, result_text_lines, ret_functions)
unique_array = list(set(ret_functions))
filter_array = ['xxxx', 'aaaa', 'dddd', 'AppDelegate', \
'PrefixHeader', 'dddddf', 'aaaadxxx']
for func_item in unique_array:
if func_item in filter_array:
continue
print func_item
filter_array
为要筛选掉不做混淆的类名
对提取的类名做映射类名
#!/usr/bin/env bash
TABLENAME=symbols
SYMBOL_DB_FILE="symbols"
STRING_SYMBOL_FILE=./process_class/t.txt
HEAD_FILE=./rename-class/rename_classes.txt
export LC_CTYPE=C
rm -f $SYMBOL_DB_FILE
rm -f $HEAD_FILE
function rand(){
min=$1
max=$(($2-$min+1))
num=$(($RANDOM+1000000000)) #增加一个10位的数再求余
echo $(($num%$max+$min))
}
function pRnd2(){
rnd=$(rand 10 4200)
randint=`expr $RANDOM % 3`
if [ $randint == 0 ];then
echo `cat "JAAA.txt" | sed -n "${rnd}p"`
elif [ $randint == 1 ];then
echo `cat "JBBB.txt" | sed -n "${rnd}p"`
else
echo `cat "JCCC.txt" | sed -n "${rnd}p"`
fi
}
my_arr=("Manager" "DataSource" "Helper" "Adapter" "Router" "Handler" "Handle" \
"Model" "Service" "Item" "Info" "Controller" "Cell" "Button" "View" "Window")
touch $HEAD_FILE
# echo "//confuse string at `date`" >> $HEAD_FILE
cat "$STRING_SYMBOL_FILE" | while read -ra line; do
#命中概率
#randint=`expr $RANDOM % 3`
#if [ $randint != 0 ];then
#continue
#fi
#取出随机字符
if [[ ! -z "$line" ]]; then
suffix=""
for loop in ${my_arr[@]}; do
if [[ $line =~ $loop ]]; then
suffix=$loop
break
fi
done
ramdom="CS$(pRnd2)$(pRnd2)${suffix}"
echo $line $ramdom
#insertValue $line $ramdom
echo "$line $ramdom" >> $HEAD_FILE
fi
done
ramdom
可以添加一些项目前缀,比如CS等。
my_arr
里面可以定义一些iOS特有的后缀。
对映射的类做混淆
#!/bin/bash
PROJECT_DIR=`cat ../path.txt`
echo $PROJECT_DIR
RENAME_CLASSES=rename_classes.txt
#First, we substitute the text in all of the files.
sed_cmd=`sed -e 's@^@s/[[:<:]]@; s@[[:space:]]\{1,\}@[[:>:]]/@; s@$@/g;@' ${RENAME_CLASSES} `
find ${PROJECT_DIR} -type f \
\( -name "*.pbxproj" -or -name "*.pch" -or -name "*.h" -or -name "*.m" -or -name "*.xib" -or -name "*.storyboard" \) \
-exec sed -i "" "${sed_cmd}" {} +
# Now, we rename the .h/.m files
while read line; do
class_from=`echo $line | sed "s/[[:space:]]\{1,\}.*//"`
class_to=`echo $line | sed "s/.*[[:space:]]\{1,\}//"`
#修改 .h .m
find ${PROJECT_DIR} -type f -regex ".*[[:<:]]${class_from}[[:>:]][^\/]*\.[hm]" -print | egrep -v '.bak$' | \
while read file_from; do
file_to=`echo $file_from | sed "s/\(.*\)[[:<:]]${class_from}[[:>:]]\([^\/]*\)/\1${class_to}\2/"`
echo mv "${file_from}" "${file_to}"
mv "${file_from}" "${file_to}"
done
#修改 .xib
find ${PROJECT_DIR} -type f -regex ".*[[:<:]]${class_from}[[:>:]][^\/]*\.xib" -print | egrep -v '.bak$' | \
while read file_from; do
file_to=`echo $file_from | sed "s/\(.*\)[[:<:]]${class_from}[[:>:]]\([^\/]*\)/\1${class_to}\2/"`
echo mv "${file_from}" "${file_to}"
mv "${file_from}" "${file_to}"
done
done < ${RENAME_CLASSES}
rename_classes.txt
是保存的映射类,shell脚本对工程进行批量替换。
方法混淆
提取方法名
# encoding: utf-8
import sys
import os
import re
import clang
from clang.cindex import *
from optparse import OptionParser, OptionGroup
def get_tu(source, lang='c', all_warnings=False, flags=[]):
"""Obtain a translation unit from source and language.
By default, the translation unit is created from source file "t.<ext>"
where <ext> is the default file extension for the specified language. By
default it is C, so "t.c" is the default file name.
Supported languages are {c, cpp, objc}.
all_warnings is a convenience argument to enable all compiler warnings.
"""
args = list(flags)
name = 't.c'
if lang == 'cpp':
name = 't.cpp'
args.append('-std=c++11')
elif lang == 'objc':
name = 't.m'
elif lang != 'c':
raise Exception('Unknown language: %s' % lang)
if all_warnings:
args += ['-Wall', '-Wextra']
return TranslationUnit.from_source(name, args, unsaved_files=[(name,
source)])
def parse_method(node):
tokens = list(node.get_tokens())
# 过滤方法名, TODO:
filter_start_words = ('init', 'set', 'get', 'image', 'view', 'reload', '_', 'will', 'did')
function = ''
for token_index in range(len(tokens)):
if tokens[token_index].spelling == ')':
function = tokens[token_index + 1].spelling
break
if len(function) > 10 and (not function.startswith(filter_start_words)):
return function
else:
return ''
# extract_type = 0x00001: 普通方法
# extract_type = 0x00011: 普通方法 + 属性
def parse_symbols(cursor, ret_symbols, extract_type):
//略...
# extract_type = 0x01100: Category, Class
# extract_type = 0x10000: Protocol
def extract_symbols(file_text, ret_symbols, extract_type):
parser = OptionParser("usage: %prog [options] {filename} [clang-args*]")
parser.disable_interspersed_args()
(opts, args) = parser.parse_args()
# if len(args) == 0:
# parser.error('invalid number arguments')
index = Index.create()
# tu = index.parse(file_text, ['-x', 'objective-c'])
tu = get_tu(file_text, lang='objc')
if not tu:
parser.error("unable to load input")
it = tu.cursor.get_children()
tu_nodes = list(it)
for cursor in tu_nodes:
if cursor.kind == CursorKind.OBJC_INTERFACE_DECL:
# print cursor.spelling
if extract_type & 0x00100:
parse_symbols(cursor, ret_symbols, extract_type)
elif cursor.kind == CursorKind.OBJC_CATEGORY_DECL:
# print cursor.spelling
if extract_type & 0x01000:
# print "Categor ============"
parse_symbols(cursor, ret_symbols, extract_type)
elif cursor.kind == CursorKind.OBJC_PROTOCOL_DECL:
if extract_type & 0x10000:
parse_symbols(cursor, ret_symbols, extract_type)
# 提取该目录下所有 .h&.m文件的方法名
def traverse_header_files(top_directory, extract_type):
g = os.walk(top_directory)
h_files = []
ret_symbols = []
for path,dir_list,file_list in g:
for file_name in file_list:
if file_name.endswith('.h') or file_name.endswith('.m'):
h_files.append(os.path.join(path, file_name))
for f in h_files:
with open(f, 'r') as file:
# preprocess
regex = r'#import|#include|#ifdef|#ifndef|#define|#endif|#if|#else|@class'
text = ''
result_text_lines = []
line_count = 0
def_block_count = 0
for line in file:
if re.findall(regex, line):
line = '// ' + line
text += line
else:
interface_idx = line.find('@interface')
if interface_idx > 0:
line = line[interface_idx:]
# print line
text += line
line_count += 1
# print 'processing: '
extract_symbols(text, ret_symbols, extract_type)
return set(ret_symbols).copy()
if __name__ == '__main__':
libclangPath = '/Library/Developer/CommandLineTools/usr/lib/libclang.dylib'
Config.set_library_file(libclangPath)
source_dir = sys.argv[1]
pods_dir = sys.argv[2]
# 提取头文件的方法名
source_dir_methods_set = traverse_header_files(source_dir, 0x00101)
# 提取 Source 的属性
filter_set_A = traverse_header_files(source_dir, 0x10110)
# 提取 Source 下的 Category的方法和属性
filter_set_B = traverse_header_files(source_dir, 0x11011)
# 提取 Pods 目录下普通类和Category 的方法和属性
filter_set_C = traverse_header_files(pods_dir, 0x11111)
# 差集
result_set = source_dir_methods_set.difference(filter_set_A).difference(filter_set_B).difference(filter_set_C)
unique_list = list(result_set)
for func_item in unique_list:
print func_item
# print 'Source len = ' + str(len(source_dir_methods_set))
# print 'Result len = ' + str(len(result_set))
source_dir为项目的代码目录,pods_dir为pods的代码目录。由于pods里面的是三方的代码,所以进行在项目代码里排除掉,再对剩下的方法进行映射。
对方法映射并宏定义写入文件
#!/usr/bin/env bash
STRING_SYMBOL_FILE=./process_method/method_list.txt
HEAD_FILE=./methodDefine.h
export LC_CTYPE=C
rm -f $HEAD_FILE
function rand(){
min=$1
max=$(($2-$min+1))
num=$(($RANDOM+1000000000)) #增加一个10位的数再求余
echo $(($num%$max+$min))
}
function pRnd1(){
rnd=$(rand 10 140000)
randt=`expr $RANDOM % 3`
if [ $randt == 0 ];then
echo `cat "a.txt" | sed -n "${rnd}p"`
elif [ $randt == 1 ];then
echo `cat "b.txt" | sed -n "${rnd}p"`
else
echo `cat "c.txt" | sed -n "${rnd}p"`
fi
}
function pRnd2(){
rnd=$(rand 10 140000)
randt=`expr $RANDOM % 3`
if [ $randt == 0 ];then
echo `cat "AAA.txt" | sed -n "${rnd}p"`
elif [ $randt == 1 ];then
echo `cat "BBB.txt" | sed -n "${rnd}p"`
else
echo `cat "CCC.txt" | sed -n "${rnd}p"`
fi
}
touch $HEAD_FILE
echo '#ifndef methodDefine_h
#define methodDefine_h' >> $HEAD_FILE
echo "//confuse string at `date`" >> $HEAD_FILE
cat "$STRING_SYMBOL_FILE" | while read -ra line; do
#命中概率
#randint=`expr $RANDOM % 3`
#if [ $randint != 0 ];then
#continue
#fi
#取出随机字符
if [[ ! -z "$line" ]]; then
ramdom="$(pRnd1)$(pRnd2)"
echo $line $ramdom
echo "#ifndef $line
#define $line $ramdom
#endif" >> $HEAD_FILE
fi
done
echo "#endif" >> $HEAD_FILE
导入文件
prefixHeader导入methodDefine.h文件,方法混淆完成