android jvm跳板函数分析

art虚拟机在非解释模式下,ArtMethod::Invoke调用方法会根据是否为静态方法进入跳板函数,构造栈帧,处理参数,然后跳转到对应的函数去执行。

art_quick_invoke_static_stub

/*  extern"C"
 *     void art_quick_invoke_static_stub(ArtMethod *method,   x0
 *                                       uint32_t  *args,     x1
 *                                       uint32_t argsize,    w2
 *                                       Thread *self,        x3
 *                                       JValue *result,      x4
 *                                       char   *shorty);     x5
 */

SAVE_TWO_REGS_INCREASE_FRAME栈底移动frame_adjustment,然后往栈底保存reg1和reg2

.macro SAVE_TWO_REGS_INCREASE_FRAME reg1, reg2, frame_adjustment
    stp \reg1, \reg2, [sp, #-(\frame_adjustment)]!
    .cfi_adjust_cfa_offset (\frame_adjustment)
    .cfi_rel_offset \reg1, 0
    .cfi_rel_offset \reg2, 8
.endm

.macro SAVE_TWO_REGS reg1, reg2, offset
    SAVE_TWO_REGS_BASE sp, \reg1, \reg2, \offset
.endm

.macro SAVE_TWO_REGS_BASE base, reg1, reg2, offset
    stp \reg1, \reg2, [\base, #(\offset)]
    .cfi_rel_offset \reg1, (\offset)
    .cfi_rel_offset \reg2, (\offset) + 8
.endm

(1)保存的栈
----------
x30
x29
x20
x16
x5[shorty]
x4[result]
---------- <-sp

(4) 保存当前栈顶到xFP[x29],后面函数调用完成后还原,保证栈顶平衡。
(2) 栈底向下移动 argsize+pointer,并按16字节对齐

(3)拷贝参数
--------
args[argsize-1]
.....
args[1]
args[0]
0 #先置为零,保存ArtMethod
------- <-sp

.macro INVOKE_STUB_CREATE_FRAME
//(1) start
SAVE_SIZE=6*8   // x4, x5, x19, x20, FP, LR saved.
    SAVE_TWO_REGS_INCREASE_FRAME x4, x5, SAVE_SIZE
    SAVE_TWO_REGS x19, x20, 16
    SAVE_TWO_REGS xFP, xLR, 32
//(1) end
//(4)
    mov xFP, sp                            // Use xFP for frame pointer, as it's callee-saved.
    .cfi_def_cfa_register xFP
//(2)
    add x10, x2, #(__SIZEOF_POINTER__ + 0xf) // Reserve space for ArtMethod*, arguments and
    and x10, x10, # ~0xf                   // round up for 16-byte stack alignment.
    sub sp, sp, x10                        // Adjust SP for ArtMethod*, args and alignment padding.

    mov xSELF, x3                          // Move thread pointer into SELF register.

    // Copy arguments into stack frame.
    // Use simple copy routine for now.
    // 4 bytes per slot.
    // X1 - source address
    // W2 - args length
    // X9 - destination address.
    // W10 - temporary
    add x9, sp, #8                         // Destination address is bottom of stack + null.
  
//(3)
    // Copy parameters into the stack. Use numeric label as this is a macro and Clang's assembler
    // does not have unique-id variables.
1:
    cbz w2, 2f
    sub w2, w2, #4      // Need 65536 bytes of range.
    ldr w10, [x1, x2]
    str w10, [x9, x2]
    b 1b

2:
    // Store null into ArtMethod* at bottom of frame.
    str xzr, [sp]
.endm

在将参数保存到寄存器上,注意,第一个参数arg0->x/w1上,第一个浮点保存到s/d0


ENTRY art_quick_invoke_static_stub
    // Spill registers as per AACPS64 calling convention.
    INVOKE_STUB_CREATE_FRAME

    // Fill registers x/w1 to x/w7 and s/d0 to s/d7 with parameters.
    // Parse the passed shorty to determine which register to load.
    // Load addresses for routines that load WXSD registers.
    adr  x11, .LstoreW1_2
    adr  x12, .LstoreX1_2
    adr  x13, .LstoreS0_2
    adr  x14, .LstoreD0_2

    // Initialize routine offsets to 0 for integers and floats.
    // x8 for integers, x15 for floating point.
    mov x8, #0
    mov x15, #0

    add x10, x5, #1     // Load shorty address, plus one to skip return value.

    // Loop to fill registers.
.LfillRegisters2:
    ldrb w17, [x10], #1         // Load next character in signature, and increment.
    cbz w17, .LcallFunction2    // Exit at end of signature. Shorty 0 terminated.

    cmp  w17, #'F'          // is this a float?
    bne .LisDouble2

    cmp x15, # 8*12         // Skip this load if all registers full.
    beq .Ladvance4_2

    add x17, x13, x15       // Calculate subroutine to jump to.
    br  x17

.LisDouble2:
    cmp w17, #'D'           // is this a double?
    bne .LisLong2

    cmp x15, # 8*12         // Skip this load if all registers full.
    beq .Ladvance8_2

    add x17, x14, x15       // Calculate subroutine to jump to.
    br x17

.LisLong2:
    cmp w17, #'J'           // is this a long?
    bne .LisOther2

    cmp x8, # 7*12          // Skip this load if all registers full.
    beq .Ladvance8_2

    add x17, x12, x8        // Calculate subroutine to jump to.
    br x17

.LisOther2:                 // Everything else takes one vReg.
    cmp x8, # 7*12          // Skip this load if all registers full.
    beq .Ladvance4_2

    add x17, x11, x8        // Calculate subroutine to jump to.
    br x17

.Ladvance4_2:
    add x9, x9, #4
    b .LfillRegisters2

.Ladvance8_2:
    add x9, x9, #8
    b .LfillRegisters2

// Store ints.
.LstoreW1_2:
    LOADREG x8 4 w1 .LfillRegisters2
    LOADREG x8 4 w2 .LfillRegisters2
    LOADREG x8 4 w3 .LfillRegisters2
    LOADREG x8 4 w4 .LfillRegisters2
    LOADREG x8 4 w5 .LfillRegisters2
    LOADREG x8 4 w6 .LfillRegisters2
    LOADREG x8 4 w7 .LfillRegisters2

// Store longs.
.LstoreX1_2:
    LOADREG x8 8 x1 .LfillRegisters2
    LOADREG x8 8 x2 .LfillRegisters2
    LOADREG x8 8 x3 .LfillRegisters2
    LOADREG x8 8 x4 .LfillRegisters2
    LOADREG x8 8 x5 .LfillRegisters2
    LOADREG x8 8 x6 .LfillRegisters2
    LOADREG x8 8 x7 .LfillRegisters2

// Store singles.
.LstoreS0_2:
    LOADREG x15 4 s0 .LfillRegisters2
    LOADREG x15 4 s1 .LfillRegisters2
    LOADREG x15 4 s2 .LfillRegisters2
    LOADREG x15 4 s3 .LfillRegisters2
    LOADREG x15 4 s4 .LfillRegisters2
    LOADREG x15 4 s5 .LfillRegisters2
    LOADREG x15 4 s6 .LfillRegisters2
    LOADREG x15 4 s7 .LfillRegisters2

// Store doubles.
.LstoreD0_2:
    LOADREG x15 8 d0 .LfillRegisters2
    LOADREG x15 8 d1 .LfillRegisters2
    LOADREG x15 8 d2 .LfillRegisters2
    LOADREG x15 8 d3 .LfillRegisters2
    LOADREG x15 8 d4 .LfillRegisters2
    LOADREG x15 8 d5 .LfillRegisters2
    LOADREG x15 8 d6 .LfillRegisters2
    LOADREG x15 8 d7 .LfillRegisters2


.LcallFunction2:

    INVOKE_STUB_CALL_AND_RETURN

END art_quick_invoke_static_stub

(1) ART_METHOD_QUICK_CODE_OFFSET_64为ArtMethod::EntryPointFromQuickCompiledCodeOffset(PointerSize::k64).
Int32Value()
(2) 准备好参数后跳转到要执行的方法,可以通过这个方法地址判断是什么方法类型,比如jni方法,那x9就是art_quick_generic_jni_trampoline.如果是jit转解释模式,art_quick_to_interpreter_bridge
(3) 还原之前保存的栈
(4) 恢复保存的x19,x20,xFp,XLR,x4,x5[shorty]寄存器
(5) 读取shorty[0]判断函数返回类型
(6) 如果是void,则直接返回
(7) 如果是浮点,返回值保存在s/d0寄存器中
(8) 其它类型,返回值保存在x/w0中

.macro INVOKE_STUB_CALL_AND_RETURN

    REFRESH_MARKING_REGISTER

    // load method-> METHOD_QUICK_CODE_OFFSET
//(1)
    ldr x9, [x0, #ART_METHOD_QUICK_CODE_OFFSET_64]
    // Branch to method.
//(2)
    blr x9

//(3)
    // Pop the ArtMethod* (null), arguments and alignment padding from the stack.
    mov sp, xFP
    .cfi_def_cfa_register sp

//(4)
    // Restore saved registers including value address and shorty address.
    RESTORE_TWO_REGS x19, x20, 16
    RESTORE_TWO_REGS xFP, xLR, 32
    RESTORE_TWO_REGS_DECREASE_FRAME x4, x5, SAVE_SIZE

    // Store result (w0/x0/s0/d0) appropriately, depending on resultType.
//(5)
    ldrb w10, [x5]

    // Check the return type and store the correct register into the jvalue in memory.
    // Use numeric label as this is a macro and Clang's assembler does not have unique-id variables.

    // Don't set anything for a void type.
//(6)
    cmp w10, #'V'
    beq 1f

//(7)
    // Is it a double?
    cmp w10, #'D'
    beq 2f

    // Is it a float?
//(7)
    cmp w10, #'F'
    beq 3f
//(8)
    // Just store x0. Doesn't matter if it is 64 or 32 bits.
    str x0, [x4]

1:  // Finish up.
    ret

2:  // Store double.
    str d0, [x4]
    ret

3:  // Store float.
    str s0, [x4]
    ret

.endm

art_quick_invoke_stub

/*
 *  extern"C" void art_quick_invoke_stub(ArtMethod *method,   x0
 *                                       uint32_t  *args,     x1
 *                                       uint32_t argsize,    w2
 *                                       Thread *self,        x3
 *                                       JValue *result,      x4
 *                                       char   *shorty);     x5
 */

(1) 和静态方法不同,w1保存this
(2) x0保存env,w1 this,所以第一个参数保存在w/x2

ENTRY art_quick_invoke_stub
    // Spill registers as per AACPS64 calling convention.
    INVOKE_STUB_CREATE_FRAME

    // Fill registers x/w1 to x/w7 and s/d0 to s/d7 with parameters.
    // Parse the passed shorty to determine which register to load.
    // Load addresses for routines that load WXSD registers.
    adr  x11, .LstoreW2
    adr  x12, .LstoreX2
    adr  x13, .LstoreS0
    adr  x14, .LstoreD0

    // Initialize routine offsets to 0 for integers and floats.
    // x8 for integers, x15 for floating point.
    mov x8, #0
    mov x15, #0

    add x10, x5, #1         // Load shorty address, plus one to skip return value.
//(1)
    ldr w1, [x9],#4         // Load "this" parameter, and increment arg pointer.

    // Loop to fill registers.
.LfillRegisters:
    ldrb w17, [x10], #1       // Load next character in signature, and increment.
    cbz w17, .LcallFunction   // Exit at end of signature. Shorty 0 terminated.

    cmp  w17, #'F' // is this a float?
    bne .LisDouble

    cmp x15, # 8*12         // Skip this load if all registers full.
    beq .Ladvance4

    add x17, x13, x15       // Calculate subroutine to jump to.
    br  x17

.LisDouble:
    cmp w17, #'D'           // is this a double?
    bne .LisLong

    cmp x15, # 8*12         // Skip this load if all registers full.
    beq .Ladvance8

    add x17, x14, x15       // Calculate subroutine to jump to.
    br x17

.LisLong:
    cmp w17, #'J'           // is this a long?
    bne .LisOther

    cmp x8, # 6*12          // Skip this load if all registers full.
    beq .Ladvance8

    add x17, x12, x8        // Calculate subroutine to jump to.
    br x17

.LisOther:                  // Everything else takes one vReg.
    cmp x8, # 6*12          // Skip this load if all registers full.
    beq .Ladvance4

    add x17, x11, x8        // Calculate subroutine to jump to.
    br x17

.Ladvance4:
    add x9, x9, #4
    b .LfillRegisters

.Ladvance8:
    add x9, x9, #8
    b .LfillRegisters

// Macro for loading a parameter into a register.
//  counter - the register with offset into these tables
//  size - the size of the register - 4 or 8 bytes.
//  register - the name of the register to be loaded.
.macro LOADREG counter size register return
    ldr \register , [x9], #\size
    add \counter, \counter, 12
    b \return
.endm
//(2)
// Store ints.
.LstoreW2:
    LOADREG x8 4 w2 .LfillRegisters
    LOADREG x8 4 w3 .LfillRegisters
    LOADREG x8 4 w4 .LfillRegisters
    LOADREG x8 4 w5 .LfillRegisters
    LOADREG x8 4 w6 .LfillRegisters
    LOADREG x8 4 w7 .LfillRegisters

// Store longs.
.LstoreX2:
    LOADREG x8 8 x2 .LfillRegisters
    LOADREG x8 8 x3 .LfillRegisters
    LOADREG x8 8 x4 .LfillRegisters
    LOADREG x8 8 x5 .LfillRegisters
    LOADREG x8 8 x6 .LfillRegisters
    LOADREG x8 8 x7 .LfillRegisters

// Store singles.
.LstoreS0:
    LOADREG x15 4 s0 .LfillRegisters
    LOADREG x15 4 s1 .LfillRegisters
    LOADREG x15 4 s2 .LfillRegisters
    LOADREG x15 4 s3 .LfillRegisters
    LOADREG x15 4 s4 .LfillRegisters
    LOADREG x15 4 s5 .LfillRegisters
    LOADREG x15 4 s6 .LfillRegisters
    LOADREG x15 4 s7 .LfillRegisters

// Store doubles.
.LstoreD0:
    LOADREG x15 8 d0 .LfillRegisters
    LOADREG x15 8 d1 .LfillRegisters
    LOADREG x15 8 d2 .LfillRegisters
    LOADREG x15 8 d3 .LfillRegisters
    LOADREG x15 8 d4 .LfillRegisters
    LOADREG x15 8 d5 .LfillRegisters
    LOADREG x15 8 d6 .LfillRegisters
    LOADREG x15 8 d7 .LfillRegisters


.LcallFunction:

    INVOKE_STUB_CALL_AND_RETURN

END art_quick_invoke_stub

调用trampoline时x0为ArtMethod,x1为参数1,依次类推。以下以art_quick_generic_jni_trampoline为例
(1)去执行正在的jni注册的方法
(2) jni方法返回值可能是x0也可能是d0,这个需要调用artQuickGenericJniEndTrampoline判断,然后将返回值保存在x0中
(3) 调用artQuickGenericJniTrampoline返回native code。 该值位于ArtMethod->ptr_sized_fields_.data

ENTRY art_quick_generic_jni_trampoline
    SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_X0

    // Save SP , so we can have static CFI info.
    mov x28, sp
    .cfi_def_cfa_register x28

    // This looks the same, but is different: this will be updated to point to the bottom
    // of the frame when the handle scope is inserted.
    mov xFP, sp

    mov xIP0, #5120
    sub sp, sp, xIP0

    // prepare for artQuickGenericJniTrampoline call
    // (Thread*,  SP)
    //    x0      x1   <= C calling convention
    //   xSELF    xFP  <= where they are

    mov x0, xSELF   // Thread*
    mov x1, xFP
//(3)
    bl artQuickGenericJniTrampoline  // (Thread*, sp) 

    // The C call will have registered the complete save-frame on success.
    // The result of the call is:
    // x0: pointer to native code, 0 on error.
    // x1: pointer to the bottom of the used area of the alloca, can restore stack till there.

    // Check for error = 0.
    cbz x0, .Lexception_in_native

    // Release part of the alloca.
    mov sp, x1

    // Save the code pointer
    mov xIP0, x0

    // Load parameters from frame into registers.
    // TODO Check with artQuickGenericJniTrampoline.
    //      Also, check again APPCS64 - the stack arguments are interleaved.
    ldp x0, x1, [sp]
    ldp x2, x3, [sp, #16]
    ldp x4, x5, [sp, #32]
    ldp x6, x7, [sp, #48]

    ldp d0, d1, [sp, #64]
    ldp d2, d3, [sp, #80]
    ldp d4, d5, [sp, #96]
    ldp d6, d7, [sp, #112]

    add sp, sp, #128

    blr xIP0        // native call.(1)

    // result sign extension is handled in C code
    // prepare for artQuickGenericJniEndTrampoline call
    // (Thread*, result, result_f)
    //    x0       x1       x2        <= C calling convention
    mov x1, x0      // Result (from saved).
    mov x0, xSELF   // Thread register.
    fmov x2, d0     // d0 will contain floating point result, but needs to go into x2
  // (2)
    bl artQuickGenericJniEndTrampoline

    // Pending exceptions possible.
    ldr x2, [xSELF, THREAD_EXCEPTION_OFFSET]
    cbnz x2, .Lexception_in_native

    // Tear down the alloca.
    mov sp, x28
    .cfi_def_cfa_register sp

    // Tear down the callee-save frame.
    RESTORE_SAVE_REFS_AND_ARGS_FRAME
    REFRESH_MARKING_REGISTER

    // store into fpr, for when it's a fpr return...
    fmov d0, x0
    ret

.Lexception_in_native:
    // Move to x1 then sp to please assembler.
    ldr x1, [xSELF, # THREAD_TOP_QUICK_FRAME_OFFSET]
    add sp, x1, #-1  // Remove the GenericJNI tag.
    .cfi_def_cfa_register sp
    # This will create a new save-all frame, required by the runtime.
    DELIVER_PENDING_EXCEPTION
END art_quick_generic_jni_trampoline

最后编辑于
©著作权归作者所有,转载或内容合作请联系作者
【社区内容提示】社区部分内容疑似由AI辅助生成,浏览时请结合常识与多方信息审慎甄别。
平台声明:文章内容(如有图片或视频亦包括在内)由作者上传并发布,文章内容仅代表作者本人观点,简书系信息发布平台,仅提供信息存储服务。

相关阅读更多精彩内容

友情链接更多精彩内容