from
How Debugger Works核心是系统调用
ptrace
,ptrace
可以让一个process窥探另一个process的内部,甚至控制另一个process。首先创建两个process,parent运行debugger,child运行target。target程序调用ptrace系统调用。参数PTRACE_TRACEME告诉内核可以让parent跟踪自己。
void run_target(const char* programname)
{
procmsg("target started. will run '%s'\n", programname);
/* Allow tracing of this process */
if (ptrace(PTRACE_TRACEME, 0, 0, 0) < 0) { perror("ptrace"); return; }
/* Replace this process's image with the given program */
execl(programname, programname, 0);
}
Indicates that this process is to be traced by its parent. Any signal (except SIGKILL) delivered to this process will cause it to stop and its parent to be notified via wait(). Also, all subsequent calls to exec() by this process will cause a SIGTRAP to be sent to it, giving the parent a chance to gain control before the new program begins execution. A process probably shouldn't make this request if its parent isn't expecting to trace it. (pid, addr, and data are ignored.)
- 也就是说在child运行execl之前停止运行,通知parent,发送一个signal。此时parent
等到了信号发生,并且用WIFSTOPPED
判断是否是child是否停止。如果是就调用ptrace但是参数是PTRACE_SINGLESTEP
,让child单步运行。单步运行意味着child每运行一个指令都要通知parent,这里的while循环就是这个意思,直到parent收到停止的信号。注意这里的icounter就是指令计数器,每收到一个信号就计数一次。
void run_debugger(pid_t child_pid)
{
int wait_status;
unsigned icounter = 0;
procmsg("debugger started\n");
/* Wait for child to stop on its first instruction */
wait(&wait_status);
while (WIFSTOPPED(wait_status)) {
icounter++;
/* Make the child execute another instruction */
if (ptrace(PTRACE_SINGLESTEP, child_pid, 0, 0) < 0) { perror("ptrace"); return;}
/* Wait for child to stop on its next instruction */
wait(&wait_status);
}
procmsg("the child executed %u instructions\n", icounter);
}
- 另外,还有其他的ptrace参数可以帮助获得child的各种信息,包括寄存器,指令等。
struct user_regs_struct regs; ptrace(PTRACE_GETREGS, child_pid, 0, ®s); unsigned instr = ptrace(PTRACE_PEEKTEXT, child_pid, regs.eip, 0); procmsg("icounter = %u. EIP = 0x%08x. instr = 0x%08x\n", icounter, regs.eip, instr);
结果如下:
$ simple_tracer traced_helloworld
[5700] debugger started
[5701] target started. will run 'traced_helloworld'
[5700] icounter = 1. EIP = 0x08048080. instr = 0x00000eba
[5700] icounter = 2. EIP = 0x08048085. instr = 0x0490a0b9
[5700] icounter = 3. EIP = 0x0804808a. instr = 0x000001bb
[5700] icounter = 4. EIP = 0x0804808f. instr = 0x000004b8
[5700] icounter = 5. EIP = 0x08048094. instr = 0x01b880cd
Hello, world!
[5700] icounter = 6. EIP = 0x08048096. instr = 0x000001b8
[5700] icounter = 7. EIP = 0x0804809b. instr = 0x000080cd
[5700] the child executed 7 instructions
attach也并不难,只用给ptrace一个参数即可,PTRACE_ATTACH
问题是每次单步实在是太费劲了,动辄几千条指令,这个时候breakpoint就是一个好的选择了。
debugger的两个基石,一个是breakpoint,另一个是能够探测debugged process的memory。
breakpoint就是一个软中断,x86上面的3号软中断就是专门用来trap to debugger的
The INT 3 instruction generates a special one byte opcode (CC) that is intended for calling the debug exception handler. (This one byte form is valuable because it can be used to replace the first byte of any instruction with a breakpoint, including other one byte instructions, without over-writing other code).
Linux接收到int 3的时候会想process发送SIGTRAP,联系到之前的child会自动向parent发送停止信号,就是因为先收到SIGTRAP。
假设由如下汇编代码,这个代码先打印hello,再打印world,我们的目的是在打印hello之后暂停进程。我们只需要将指令
mov edx, len2
替换成int 3
就行了。
section .text
; The _start symbol must be declared for the linker (ld)
global _start
_start:
; Prepare arguments for the sys_write system call:
; - eax: system call number (sys_write)
; - ebx: file descriptor (stdout)
; - ecx: pointer to string
; - edx: string length
mov edx, len1
mov ecx, msg1
mov ebx, 1
mov eax, 4
; Execute the sys_write system call
int 0x80
; Now print the other message
mov edx, len2 <---- replaced by int 3
mov ecx, msg2
mov ebx, 1
mov eax, 4
int 0x80
; Execute sys_exit
mov eax, 1
int 0x80
section .data
msg1 db 'Hello,', 0xa
len1 equ $ - msg1
msg2 db 'world!', 0xa
len2 equ $ - msg2
- 实际上debugger要做两件事:
- 记住原先的指令
- 将原先指令的第一个byte替换成int 3
- 当debugger让程序按照PTRACE_CONT运行,运行到int 3的时候收到信号,然后通知debugger child已经停止。debugger要做:
- 恢复原来的指令
- ip - 1
- 可以peek child的状态,比如寄存器,变量等
- 恢复breakpoint,因为用户没有要求delete breakpoint
/* Obtain and show child's instruction pointer */
ptrace(PTRACE_GETREGS, child_pid, 0, ®s);
procmsg("Child started. EIP = 0x%08x\n", regs.eip);
/* Look at the word at the address we're interested in */
unsigned addr = 0x8048096;
unsigned data = ptrace(PTRACE_PEEKTEXT, child_pid, (void*)addr, 0);
procmsg("Original data at 0x%08x: 0x%08x\n", addr, data);
[13028] Child started. EIP = 0x08048080
[13028] Original data at 0x08048096: 0x000007ba
/* Write the trap instruction 'int 3' into the address */
unsigned data_with_trap = (data & 0xFFFFFF00) | 0xCC;
ptrace(PTRACE_POKETEXT, child_pid, (void*)addr, (void*)data_with_trap)
;
/* See what's there again... */
unsigned readback_data = ptrace(PTRACE_PEEKTEXT, child_pid, (void*)addr, 0);
procmsg("After trap, data at 0x%08x: 0x%08x\n", addr, readback_data);
[13028] After trap, data at 0x08048096: 0x000007cc
/* Let the child run to the breakpoint and wait for it to** reach it*/
ptrace(PTRACE_CONT, child_pid, 0, 0);
wait(&wait_status);
if (WIFSTOPPED(wait_status)) {
procmsg("Child got a signal: %s\n", strsignal(WSTOPSIG(wait_status)));
}else {
perror("wait"); return;
}
/* See where the child is now */
ptrace(PTRACE_GETREGS, child_pid, 0, ®s);
procmsg("Child stopped at EIP = 0x%08x\n", regs.eip);
This prints:
Hello,[13028] Child got a signal: Trace/breakpoint trap[13028] Child stopped at EIP = 0x08048097
/* Remove the breakpoint by restoring the previous data** at the target address, and unwind the EIP back by 1 to** let the CPU execute the original instruction that was** there.*/
ptrace(PTRACE_POKETEXT, child_pid, (void*)addr, (void*)data);
regs.eip -= 1;
ptrace(PTRACE_SETREGS, child_pid, 0, ®s);
/* The child can continue running now */
ptrace(PTRACE_CONT, child_pid, 0, 0);
int 3只占用一个字节是故意为之的,否则可能损毁后面的指令
假设有下面的代码,我想在do_stuff的入口break。首先可以先差找到入口指令的地址,这里因为是个循环我们要使breakpoint能够在触发后恢复。
#include <stdio.h>
void do_stuff()
{
printf("Hello, ");
}
int main()
{
for (int i = 0; i < 4; ++i)
do_stuff();
printf("world!\n");
return 0;
}
080483e4 <do_stuff>:
80483e4: 55 push %ebp
80483e5: 89 e5 mov %esp,%ebp
80483e7: 83 ec 18 sub $0x18,%esp
80483ea: c7 04 24 f0 84 04 08 movl $0x80484f0,(%esp)
80483f1: e8 22 ff ff ff call 8048318 <puts@plt>
80483f6: c9 leave
80483f7: c3 ret
void run_debugger(pid_t child_pid)
{
procmsg("debugger started\n");
/* Wait for child to stop on its first instruction */
wait(0);
procmsg("child now at EIP = 0x%08x\n", get_child_eip(child_pid));
/* Create breakpoint and run to it*/
debug_breakpoint* bp = create_breakpoint(child_pid, (void*)0x080483e4);
procmsg("breakpoint created\n");
ptrace(PTRACE_CONT, child_pid, 0, 0);
wait(0);
/* Loop as long as the child didn't exit */
while (1) {
/* The child is stopped at a breakpoint here. Resume its
** execution until it either exits or hits the
** breakpoint again.
*/
procmsg("child stopped at breakpoint. EIP = 0x%08X\n", get_child_eip(child_pid));
procmsg("resuming\n");
int rc = resume_from_breakpoint(child_pid, bp);
if (rc == 0) {
procmsg("child exited\n");
break;
}
else if (rc == 1) {
continue;
}
else {
procmsg("unexpected: %d\n", rc);
break;
}
}
cleanup_breakpoint(bp);
}
int resume_from_breakpoint(pid_t pid, debug_breakpoint* bp)
{
struct user_regs_struct regs;
int wait_status;
ptrace(PTRACE_GETREGS, pid, 0, ®s);
/* Make sure we indeed are stopped at bp */
assert(regs.eip == (long) bp->addr + 1);
/* Now disable the breakpoint, rewind EIP back to the original instruction
** and single-step the process. This executes the original instruction that
** was replaced by the breakpoint.
*/
regs.eip = (long) bp->addr;
ptrace(PTRACE_SETREGS, pid, 0, ®s);
disable_breakpoint(pid, bp);
if (ptrace(PTRACE_SINGLESTEP, pid, 0, 0)) {
perror("ptrace");
return -1;
}
wait(&wait_status);
if (WIFEXITED(wait_status))
return 0;
/* Re-enable the breakpoint and let the process run.
*/
enable_breakpoint(pid, bp);
if (ptrace(PTRACE_CONT, pid, 0, 0) < 0) {
perror("ptrace");
return -1;
}
wait(&wait_status);
if (WIFEXITED(wait_status))
return 0;
else if (WIFSTOPPED(wait_status)) {
return 1;
}
else
return -1;
}