一 简述
android crash分为两类:一类是native crash,一类是java虚拟机的crash。
二 Native crash流程
1) native的crash是native的code运行异常,收到内核发送的signal,进而crash的流程,常见的有abort等
static void __attribute__((__unused__)) debuggerd_register_handlers(struct sigaction* action) {
char value[PROP_VALUE_MAX] = "";
bool enabled =
!(__system_property_get("ro.debuggable", value) > 0 && !strcmp(value, "1") &&
__system_property_get("debug.debuggerd.disable", value) > 0 && !strcmp(value, "1"));
if (enabled) {
sigaction(SIGABRT, action, nullptr);
sigaction(SIGBUS, action, nullptr);
sigaction(SIGFPE, action, nullptr);
sigaction(SIGILL, action, nullptr);
sigaction(SIGSEGV, action, nullptr);
sigaction(SIGSTKFLT, action, nullptr);
sigaction(SIGSYS, action, nullptr);
sigaction(SIGTRAP, action, nullptr);
}
sigaction(BIONIC_SIGNAL_DEBUGGER, action, nullptr);
}
void debuggerd_init(debuggerd_callbacks_t* callbacks) {
if (callbacks) {
g_callbacks = *callbacks;
}
size_t thread_stack_pages = 8;
void* thread_stack_allocation = mmap(nullptr, PAGE_SIZE * (thread_stack_pages + 2), PROT_NONE,
MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
if (thread_stack_allocation == MAP_FAILED) {
fatal_errno("failed to allocate debuggerd thread stack");
}
char* stack = static_cast<char*>(thread_stack_allocation) + PAGE_SIZE;
if (mprotect(stack, PAGE_SIZE * thread_stack_pages, PROT_READ | PROT_WRITE) != 0) {
fatal_errno("failed to mprotect debuggerd thread stack");
}
// Stack grows negatively, set it to the last byte in the page...
stack = (stack + thread_stack_pages * PAGE_SIZE - 1);
// and align it.
stack -= 15;
pseudothread_stack = stack;
struct sigaction action;
memset(&action, 0, sizeof(action));
sigfillset(&action.sa_mask);
action.sa_sigaction = debuggerd_signal_handler;
action.sa_flags = SA_RESTART | SA_SIGINFO;
// Use the alternate signal stack if available so we can catch stack overflows.
action.sa_flags |= SA_ONSTACK;
debuggerd_register_handlers(&action);
}
在linker中进行初始化
void linker_debuggerd_init() {
debuggerd_callbacks_t callbacks = {
.get_abort_message = []() {
return __libc_shared_globals()->abort_msg;
},
.post_dump = ¬ify_gdb_of_libraries,
.get_gwp_asan_state = []() {
return __libc_shared_globals()->gwp_asan_state;
},
.get_gwp_asan_metadata = []() {
return __libc_shared_globals()->gwp_asan_metadata;
},
};
debuggerd_init(&callbacks);
}
在runtime运行时:
Runtime::InitPlatformSignalHandlers
void InitPlatformSignalHandlersCommon(void (*newact)(int, siginfo_t*, void*),
struct sigaction* oldact,
bool handle_timeout_signal) {
struct sigaction action;
memset(&action, 0, sizeof(action));
sigemptyset(&action.sa_mask);
action.sa_sigaction = newact;
// Use the three-argument sa_sigaction handler.
action.sa_flags |= SA_SIGINFO;
// Use the alternate signal stack so we can catch stack overflows.
action.sa_flags |= SA_ONSTACK;
int rc = 0;
rc += sigaction(SIGABRT, &action, oldact);
rc += sigaction(SIGBUS, &action, oldact);
rc += sigaction(SIGFPE, &action, oldact);
rc += sigaction(SIGILL, &action, oldact);
rc += sigaction(SIGPIPE, &action, oldact);
rc += sigaction(SIGSEGV, &action, oldact);
#if defined(SIGSTKFLT)
rc += sigaction(SIGSTKFLT, &action, oldact);
#endif
rc += sigaction(SIGTRAP, &action, oldact);
// Special dump-all timeout.
if (handle_timeout_signal && GetTimeoutSignal() != -1) {
rc += sigaction(GetTimeoutSignal(), &action, oldact);
}
CHECK_EQ(rc, 0);
}
上述流程执行是在bool Runtime::Init(RuntimeArgumentMap&& runtime_options_in)中初始化
伤处两次注册都运行的,如下:
void HandleUnexpectedSignalAndroid(int signal_number, siginfo_t* info, void* raw_context) {
HandleUnexpectedSignalCommon(signal_number,
info,
raw_context,
/* handle_timeout_signal= */ false,
/* dump_on_stderr= */ false);
// Run the old signal handler.
old_action.sa_sigaction(signal_number, info, raw_context);
}
linker的action后执行,HandleUnexpectedSignalCommon先执行。
第一处打印主要是
libc : Fatal signal 6 (SIGABRT), code -1 (SI_QUEUE) in tid 21455 (RenderThread), pid 21455 (XXXX)
在linker注册的运行二进制bin crash_dump64
I crash_dump64: obtaining output fd from tombstoned, type: kDebuggerdTombstone
I tombstoned: received crash request for pid 21475
I crash_dump64: performing dump of process 21453 (target tid = 21475)
同时将app crash信息通知AMS
static bool activity_manager_notify(pid_t pid, int signal, const std::string& amfd_data) {
三 java层面crash执行流程
java层面的异常一般是try-catch无法捕获的异常,进入crash流程。
在RuntimeInit.java 设置default 异常处理
@UnsupportedAppUsage
protected static final void commonInit() {
if (DEBUG) Slog.d(TAG, "Entered RuntimeInit!");
/*
* set handlers; these apply to all threads in the VM. Apps can replace
* the default handler, but not the pre handler.
*/
LoggingHandler loggingHandler = new LoggingHandler();
RuntimeHooks.setUncaughtExceptionPreHandler(loggingHandler);
Thread.setDefaultUncaughtExceptionHandler(new KillApplicationHandler(loggingHandler));
在异常被捕获后,流程处理如下:
private static class KillApplicationHandler implements Thread.UncaughtExceptionHandler {
@Override
public void uncaughtException(Thread t, Throwable e) {
try {
ensureLogging(t, e);
// Don't re-enter -- avoid infinite loops if crash-reporting crashes.
if (mCrashing) return;
mCrashing = true;
// Try to end profiling. If a profiler is running at this point, and we kill the
// process (below), the in-memory buffer will be lost. So try to stop, which will
// flush the buffer. (This makes method trace profiling useful to debug crashes.)
if (ActivityThread.currentActivityThread() != null) {
ActivityThread.currentActivityThread().stopProfiling();
}
// Bring up crash dialog, wait for it to be dismissed
ActivityManager.getService().handleApplicationCrash(
mApplicationObject, new ApplicationErrorReport.ParcelableCrashInfo(e));
} catch (Throwable t2) {
if (t2 instanceof DeadObjectException) {
// System process is dead; ignore
} else {
try {
Clog_e(TAG, "Error reporting crash", t2);
} catch (Throwable t3) {
// Even Clog_e() fails! Oh well.
}
}
} finally {
// Try everything to make sure this process goes away.
Process.killProcess(Process.myPid());
System.exit(10);
}
}
主要执行有两步: 一个是通知AMS,另外一个是进程退出