上文中介绍了基于google-breakpad的NativeCrash日志收集方法的全过程,但其有个缺陷就是生成的通用SO和dmp日志比较大,对sdk大小有严格要求的APP不是很方便,且不利于收集线上用户的NativeCrash数据(需要在wifi情况下收集)。本文介绍另外一种基于linux信号处理的轻量级方法。
实现原理
上图给出了基于linux信号异常处理的Native崩溃捕获的实现原理,即当Native层代码发生崩溃崩溃时,会发送异常signal(主要有SIGILL/SIGSEGV/SIGBUS/SIGABRT/SIGSTKFLT等),系统捕获到异常signal后,回调信号异常处理函数(需事先注册),在异常处理函数中获取C层崩溃堆栈数据,通过JNI回调至JAVA层,进行解析、上传等操作。
注册信号处理函数如下:
int sigaction(int signum,const struct sigaction *act,struct sigaction *oldact);
struct sigaction {
void (*sa_handler)(int);
void (*sa_sigaction)(int,siginfo_t *,void *);
sigset_t sa_mask;
int sa_flag;
void (*sa_restorer)(void);
}
此方案的难点在于如何在信号异常处理函数中获取native崩溃堆栈数据。网上提的比较多的方法是利用libcorkscrew.so来获取,但是libcorkscrew.so在Android4.4之后废弃了。当Android版本号大于4.4时,可以采用libbacktrace.so来获取崩溃数据。
如下给出信号异常处理函数的具体实现。
void _handleNativeCrash(const char *reason, struct siginfo *siginfo,
void *sigcontext) {
LOGD("_handleNativeCrash");
JNIEnv *env = 0;
int result = javaVM->GetEnv((void **) &env, JNI_VERSION_1_6);
if (result == JNI_EDETACHED) {
LOGW("Native crash occured in a non jvm-attached thread");
result = javaVM->AttachCurrentThread(&env, NULL);
}
if (result != JNI_OK) {
LOGE("Could not attach thread to Java VM for crash reporting.\n Crash was: %s", reason);
} else if (androidversion < 21) {
init();
char lines[MAX_BACKTRACE_LINES_LENGTH] = { 0, };
if (unwind_backtrace_signal_arch != NULL && siginfo != NULL) {
LOGI("libcorkscrew.so start");
map_info_t *map_info = acquire_my_map_info_list();
backtrace_frame_t frames[256] = { 0, };
backtrace_symbol_t symbols[256] = { 0, };
const ssize_t size = unwind_backtrace_signal_arch(siginfo,
sigcontext, map_info, frames, 0, 255);
get_backtrace_symbols(frames, size, symbols);
for (int i = 0; i < size; ++i) {
char line[MAX_BACKTRACE_LINE_LENGTH];
const char *method = symbols[i].demangled_name;
if (!method) {
method = symbols[i].symbol_name;
}
const char *file = symbols[i].map_name;
if (!file) {
file = "<unknown>";
}
size_t fieldWidth = (MAX_BACKTRACE_LINE_LENGTH - 80) / 2;
if (method) {
uintptr_t pc_offset = symbols[i].relative_pc
- symbols[i].relative_symbol_addr;
if (pc_offset) {
snprintf(line, MAX_BACKTRACE_LINE_LENGTH,
"#%02d pc %08x %.*s (%.*s+%u)", i,
symbols[i].relative_pc, fieldWidth, file,
fieldWidth, method, pc_offset);
} else {
snprintf(line, MAX_BACKTRACE_LINE_LENGTH,
"#%02d pc %08x %.*s (%.*s)", i,
symbols[i].relative_pc, fieldWidth, file,
fieldWidth, method);
}
} else {
method = "<unknown>";
snprintf(line, MAX_BACKTRACE_LINE_LENGTH,
"#%02d pc %08x %.*s", i, symbols[i].relative_pc,
fieldWidth, file);
}
snprintf(lines, sizeof(lines), "%s \n%s", lines, line);
}
free_backtrace_symbols(symbols, size);
release_my_map_info_list(map_info);
LOGI("before call makeCrashReportMethod");
env->CallStaticVoidMethod(applicationClass, makeCrashReportMethod,
env->NewStringUTF(reason), env->NewStringUTF(lines), (jint) gettid());
LOGI("after call makeCrashReportMethod");
}
}else if(android version >= 21){
if(captureNativeCrashForAndroid5(sigcontext,reason) == -1){
LOGI("--libbacktrace fail");
}
}
}
void init(){
void * libcorkscrew = dlopen("libcorkscrew.so", RTLD_LAZY | RTLD_LOCAL);
if (libcorkscrew) {
unwind_backtrace_signal_arch = (t_unwind_backtrace_signal_arch) dlsym(
libcorkscrew, "unwind_backtrace_signal_arch");
acquire_my_map_info_list = (t_acquire_my_map_info_list) dlsym(
libcorkscrew, "acquire_my_map_info_list");
release_my_map_info_list = (t_release_my_map_info_list) dlsym(
libcorkscrew, "release_my_map_info_list");
get_backtrace_symbols = (t_get_backtrace_symbols) dlsym(libcorkscrew,
"get_backtrace_symbols");
free_backtrace_symbols = (t_free_backtrace_symbols) dlsym(libcorkscrew,
"free_backtrace_symbols");
}
}
int captureNativeCrashForAndroid5(const void* ctx, const char* reason){
char lines[MAX_BACKTRACE_LINES_LENGTH] = { 0, };
void* libbacktrace = dlopen( "libbacktrace.so", RTLD_LAZY |RTLD_LOCAL );
if ( !libbacktrace ){
return -1;
}
Backtrace* (*create)(int,int,void*);
*(void**)&create = dlsym( libbacktrace, "_ZN9Backtrace6CreateEiiP12BacktraceMap" );
if ( !create ){
return -1;
}
Backtrace* t = create(getpid(), 0, NULL );
if ( !t ){
return -1;
}
int ret = t->Unwind(0, (ucontext*)ctx);
if ( !ret ) {
return -1;
}
size_t count = t->NumFrames();
for ( size_t i=0; i < MAX_BACKTRACE_LINE_LENGTH; i++ ) {
std::string line = t->FormatFrameData(i);
if(line == ""){
break;
}
snprintf(lines, sizeof(lines), "%s \n%s", lines, line.c_str());
}
dumpFile(reason,lines);
return 0;
}
Android.mk代码如下
LOCAL_PATH := $(call my-dir)
include $(CLEAR_VARS)
LOCAL_MODULE := nativecrash
LOCAL_SRC_FILES := NativeCrashHandler.cpp
LOCAL_CPPFLAGS := -Os -fvisibility=hidden
LOCAL_CFLAGS := -Os -fvisibility=hidden
LOCAL_CFLAGS += -Wno-psabi
LOCAL_CFLAGS += -funwind-tables -g
LOCAL_LDLIBS += -llog -ldl -landroid
Application.mk代码如下
APP_ABI := armeabi-v7a armeabi mips x86
APP_STL := stlport_static
APP_OPTIM := release
APP_CPPFLAGS += -Wno-error=format-security
至此两种NativeCrash收集的方法介绍完毕,两种方法各有优缺点,基于google-breakpad的方法体量较大,但获取的信息较全面。基于异常信号处理方法体量小,但信息不全面,只有崩溃线程的堆栈信息,不利于定位多线程崩溃的问题。