利用mprotect+backtrace定位故障
利用mprotect保护栈空间:
在操作系统中,进程的栈空间(X86_64)默认大小:8192KB。发生栈溢出时,会产色段错误。但在协程中时,由于是用户态,保证数据安全,需要手动的对栈尾进行保护。
利用mprotect保护栈的Bottom的 page,不允许读和写
安装SIGSEGV的信号处理函数,发生stack overflow时记录更多的信息
SIGSEGV handler中需要记录发生的:addr 及 backtrace
Boost中分配具有mprotect的栈
// use mmap/mprotect to allocate 512k coroutine stacks
auto make_stack_allocator() {
return boost::context::protected_fixedsize_stack{512*1024};
}
- 安装信号处理函数
注意此处利用本地栈空间,多线程环境下,每个线程需要独立的分配栈空间,重装信号处理函数。
stack_t segv_stack;
segv_stack.ss_sp = valloc(SEGV_STACK_SIZE);
segv_stack.ss_flags = 0;
segv_stack.ss_size = SEGV_STACK_SIZE;
sigaltstack(&segv_stack, NULL);
struct sigaction action;
bzero(&action, sizeof(action));
action.sa_flags = SA_SIGINFO|SA_STACK;
action.sa_sigaction = &sigsegv_handler;
sigaction(SIGSEGV, &action, NULL);
- 信号处理函数
static void sigsegv_handler(int signum, siginfo_t *info, void *data) {
std::cout<<" Segment Fault"<<std::endl;
void *addr = info->si_addr;
char buff[256];
int fd = open("./sigsegv.bt",O_CREAT|O_RDWR|O_APPEND);
int len = snprintf(buff,256,"Addr: %p\n",addr);
write(fd,buff,len);
void* array[30];
size_t size = backtrace(array,30);
backtrace_symbols_fd(array,size,fd);
close(fd);
}
测试利用mprotect backtrace定位core dump
原理:
- 对可能产生内存越界或者内存无效访问的区域做mprotect保护
- 重新定义SIGSEGV信号的处理函数,对此保护区域的内存读写都会产生SIGSEGV信号,在此信号处理函数中记录调用栈
- mprotect需要页对齐,valloc分配的是也对齐的地址,对于栈空间可以使用:
char* buffer = (char*)(((int64_t)p) & ~(ps-1)) //ps是pagesize
此处buffer是最靠近地址p的页对齐的地址
- mprotect保护的区域,释放时需要恢复
// SA_RESETHAND: 表示在信号处理函数入口处恢复信号的默认处理句柄,否则SIGSEGV信号会无现产生下去 //或者修复导致SIGSEGV信号的问题,可以继续运行
struct sigaction action;
bzero(&action, sizeof(action));
action.sa_flags = SA_SIGINFO|SA_RESETHAND;
action.sa_sigaction = &sigsegv_handler;
sigaction(SIGSEGV, &action, NULL);
信号处理函数,利用backtrace跟踪程序的调用栈:
static void sigsegv_handler(int signum, siginfo_t *info, void *data) {
std::cout<<" Segment Fault"<<std::endl;
void *addr = info->si_addr;
char buff[256];
int fd = open("./sigsegv.bt",O_CREAT|O_RDWR|O_APPEND);
int len = snprintf(buff,256,"Addr: %p\n",addr);
write(fd,buff,len);
void* array[30];
size_t size = backtrace(array,30);
backtrace_symbols_fd(array,size,fd);
close(fd);
}
测试代码:
#include <cstdlib>
#include <cstdio>
#include <unistd.h>
#include <sys/mman.h>
#include <execinfo.h>
#include <cstring>
#include <fcntl.h>
#include <signal.h>
#include <iostream>
static void sigsegv_handler(int signum, siginfo_t *info, void *data) {
std::cout<<" Segment Fault"<<std::endl;
void *addr = info->si_addr;
char buff[256];
int fd = open("./sigsegv.bt",O_CREAT|O_RDWR|O_APPEND);
int len = snprintf(buff,256,"Addr: %p\n",addr);
write(fd,buff,len);
void* array[30];
size_t size = backtrace(array,30);
backtrace_symbols_fd(array,size,fd);
close(fd);
}
void register_signal(int signum){
struct sigaction action;
bzero(&action, sizeof(action));
sigemptyset(&action.sa_mask);
action.sa_flags = SA_SIGINFO | SA_RESETHAND;
action.sa_sigaction = &sigsegv_handler;
sigaction(signum, &action, NULL);
}
void* core_func(long stack_size){
void *stack = valloc(stack_size);
mprotect(stack, getpagesize(), PROT_NONE);
return stack;
}
void destroy(void* stack){
mprotect(stack, getpagesize(), PROT_READ|PROT_WRITE);
free(stack);
}
int main(int argc,char* argv[]){
register_signal(SIGSEGV);
void* stack = core_func(1024);
std::cout<<"Write"<<std::endl;
*(char *)(stack+10) = 'a';
std::cout<<"Write Done"<<std::endl;
destroy(stack);
}
运行结果:
# ./mp
Write
Segment Fault
段错误
# cat sigsegv.bt
Addr: 0x97e00a
./mp[0x400c4e]
/lib64/libc.so.6(+0x36400)[0x7f4f1f84f400]
./mp[0x400da4]
/lib64/libc.so.6(__libc_start_main+0xf5)[0x7f4f1f83b555]
./mp[0x400ae9]
利用addr2line定位:
# addr2line -afiCe mp 0x400d3a
0x0000000000400d3a
main
/home/working/cpp/test_mp.cc:48
利用valgrind定位:
# valgrind --leak-check=full ./mp
==9359== Memcheck, a memory error detector
==9359== Copyright (C) 2002-2017, and GNU GPL'd, by Julian Seward et al.
==9359== Using Valgrind-3.15.0 and LibVEX; rerun with -h for copyright info
==9359== Command: ./mp
==9359==
Write
Segment Fault
==9359==
==9359== Process terminating with default action of signal 11 (SIGSEGV)
==9359== Bad permissions for mapped region at address 0x5AB400A
==9359== at 0x400DA4: main (test_mp.cc:49)
==9359==
==9359== HEAP SUMMARY:
==9359== in use at exit: 1,024 bytes in 1 blocks
==9359== total heap usage: 3 allocs, 2 frees, 73,784 bytes allocated
==9359==
==9359== 1,024 bytes in 1 blocks are definitely lost in loss record 1 of 1
==9359== at 0x4C2C375: memalign (vg_replace_malloc.c:908)
==9359== by 0x4C2C40A: valloc (vg_replace_malloc.c:956)
==9359== by 0x400CFB: core_func(long) (test_mp.cc:34)
==9359== by 0x400D7B: main (test_mp.cc:47)
==9359==
==9359== LEAK SUMMARY:
==9359== definitely lost: 1,024 bytes in 1 blocks
==9359== indirectly lost: 0 bytes in 0 blocks
==9359== possibly lost: 0 bytes in 0 blocks
==9359== still reachable: 0 bytes in 0 blocks
==9359== suppressed: 0 bytes in 0 blocks
==9359==
==9359== For lists of detected and suppressed errors, rerun with: -s
==9359== ERROR SUMMARY: 1 errors from 1 contexts (suppressed: 0 from 0)
段错误
利用gcc asan内存检测:
asan可以检测:
- 检查地址相关问题,包括释放后使用、重复释放、堆溢出、栈溢出等等问题
- 检查内存泄漏问题
- 检查线程数据竞争和死锁问题
# g++ --std=c++11 -g -o mp test_mp.cc -fsanitize=address
# ./mp
Write
ASAN:DEADLYSIGNAL
=================================================================
==10792==ERROR: AddressSanitizer: SEGV on unknown address 0x62500000100a (pc 0x00000040160b bp 0x7ffc4c685c60 sp 0x7ffc4c685c40 T0)
==10792==The signal is caused by a WRITE memory access.
#0 0x40160a in main /home/working/cpp/test_mp.cc:49
#1 0x7f1570d02554 in __libc_start_main (/lib64/libc.so.6+0x22554)
#2 0x401038 (/home/working/cpp/mp+0x401038)
AddressSanitizer can not provide additional info.
SUMMARY: AddressSanitizer: SEGV /home/working/cpp/test_mp.cc:49 in main
==10792==ABORTING
addr2line
可以定位出现core dump的代码位置。用例如下:
示例代码,overflow,段错误。
int main(void) {
char *str;
/* Stored in read only part of data segment */
str = "over flow";
/* Problem: trying to modify read only memory */
*(str + 1) = 'n';
return 0;
}
产生
编译:
g++ -ggdb -o test test_mprotect
dmesg -C #清空缓存日志
./test # 产生core dump`
查看dmesg信息:
# dmesg
[3381707.691606] test[19966]: segfault at 4005a5 ip 000000000040050b sp 00007ffd68b27aa0 error 7 in test[400000+1000]
可见出现core dump的位置位于: 000000000040050b
利用addr2line定位代码位置:
addr2line -afiCe test 000000000040050b
0x000000000040050b
main
/home/working/cpp/test_mprotect.cc:8