art虚拟机在非解释模式下,ArtMethod::Invoke调用方法会根据是否为静态方法进入跳板函数,构造栈帧,处理参数,然后跳转到对应的函数去执行。
art_quick_invoke_static_stub
/* extern"C"
* void art_quick_invoke_static_stub(ArtMethod *method, x0
* uint32_t *args, x1
* uint32_t argsize, w2
* Thread *self, x3
* JValue *result, x4
* char *shorty); x5
*/
SAVE_TWO_REGS_INCREASE_FRAME栈底移动frame_adjustment,然后往栈底保存reg1和reg2
.macro SAVE_TWO_REGS_INCREASE_FRAME reg1, reg2, frame_adjustment
stp \reg1, \reg2, [sp, #-(\frame_adjustment)]!
.cfi_adjust_cfa_offset (\frame_adjustment)
.cfi_rel_offset \reg1, 0
.cfi_rel_offset \reg2, 8
.endm
.macro SAVE_TWO_REGS reg1, reg2, offset
SAVE_TWO_REGS_BASE sp, \reg1, \reg2, \offset
.endm
.macro SAVE_TWO_REGS_BASE base, reg1, reg2, offset
stp \reg1, \reg2, [\base, #(\offset)]
.cfi_rel_offset \reg1, (\offset)
.cfi_rel_offset \reg2, (\offset) + 8
.endm
(1)保存的栈
----------
x30
x29
x20
x16
x5[shorty]
x4[result]
---------- <-sp
(4) 保存当前栈顶到xFP[x29],后面函数调用完成后还原,保证栈顶平衡。
(2) 栈底向下移动 argsize+pointer,并按16字节对齐
(3)拷贝参数
--------
args[argsize-1]
.....
args[1]
args[0]
0 #先置为零,保存ArtMethod
------- <-sp
.macro INVOKE_STUB_CREATE_FRAME
//(1) start
SAVE_SIZE=6*8 // x4, x5, x19, x20, FP, LR saved.
SAVE_TWO_REGS_INCREASE_FRAME x4, x5, SAVE_SIZE
SAVE_TWO_REGS x19, x20, 16
SAVE_TWO_REGS xFP, xLR, 32
//(1) end
//(4)
mov xFP, sp // Use xFP for frame pointer, as it's callee-saved.
.cfi_def_cfa_register xFP
//(2)
add x10, x2, #(__SIZEOF_POINTER__ + 0xf) // Reserve space for ArtMethod*, arguments and
and x10, x10, # ~0xf // round up for 16-byte stack alignment.
sub sp, sp, x10 // Adjust SP for ArtMethod*, args and alignment padding.
mov xSELF, x3 // Move thread pointer into SELF register.
// Copy arguments into stack frame.
// Use simple copy routine for now.
// 4 bytes per slot.
// X1 - source address
// W2 - args length
// X9 - destination address.
// W10 - temporary
add x9, sp, #8 // Destination address is bottom of stack + null.
//(3)
// Copy parameters into the stack. Use numeric label as this is a macro and Clang's assembler
// does not have unique-id variables.
1:
cbz w2, 2f
sub w2, w2, #4 // Need 65536 bytes of range.
ldr w10, [x1, x2]
str w10, [x9, x2]
b 1b
2:
// Store null into ArtMethod* at bottom of frame.
str xzr, [sp]
.endm
在将参数保存到寄存器上,注意,第一个参数arg0->x/w1上,第一个浮点保存到s/d0
ENTRY art_quick_invoke_static_stub
// Spill registers as per AACPS64 calling convention.
INVOKE_STUB_CREATE_FRAME
// Fill registers x/w1 to x/w7 and s/d0 to s/d7 with parameters.
// Parse the passed shorty to determine which register to load.
// Load addresses for routines that load WXSD registers.
adr x11, .LstoreW1_2
adr x12, .LstoreX1_2
adr x13, .LstoreS0_2
adr x14, .LstoreD0_2
// Initialize routine offsets to 0 for integers and floats.
// x8 for integers, x15 for floating point.
mov x8, #0
mov x15, #0
add x10, x5, #1 // Load shorty address, plus one to skip return value.
// Loop to fill registers.
.LfillRegisters2:
ldrb w17, [x10], #1 // Load next character in signature, and increment.
cbz w17, .LcallFunction2 // Exit at end of signature. Shorty 0 terminated.
cmp w17, #'F' // is this a float?
bne .LisDouble2
cmp x15, # 8*12 // Skip this load if all registers full.
beq .Ladvance4_2
add x17, x13, x15 // Calculate subroutine to jump to.
br x17
.LisDouble2:
cmp w17, #'D' // is this a double?
bne .LisLong2
cmp x15, # 8*12 // Skip this load if all registers full.
beq .Ladvance8_2
add x17, x14, x15 // Calculate subroutine to jump to.
br x17
.LisLong2:
cmp w17, #'J' // is this a long?
bne .LisOther2
cmp x8, # 7*12 // Skip this load if all registers full.
beq .Ladvance8_2
add x17, x12, x8 // Calculate subroutine to jump to.
br x17
.LisOther2: // Everything else takes one vReg.
cmp x8, # 7*12 // Skip this load if all registers full.
beq .Ladvance4_2
add x17, x11, x8 // Calculate subroutine to jump to.
br x17
.Ladvance4_2:
add x9, x9, #4
b .LfillRegisters2
.Ladvance8_2:
add x9, x9, #8
b .LfillRegisters2
// Store ints.
.LstoreW1_2:
LOADREG x8 4 w1 .LfillRegisters2
LOADREG x8 4 w2 .LfillRegisters2
LOADREG x8 4 w3 .LfillRegisters2
LOADREG x8 4 w4 .LfillRegisters2
LOADREG x8 4 w5 .LfillRegisters2
LOADREG x8 4 w6 .LfillRegisters2
LOADREG x8 4 w7 .LfillRegisters2
// Store longs.
.LstoreX1_2:
LOADREG x8 8 x1 .LfillRegisters2
LOADREG x8 8 x2 .LfillRegisters2
LOADREG x8 8 x3 .LfillRegisters2
LOADREG x8 8 x4 .LfillRegisters2
LOADREG x8 8 x5 .LfillRegisters2
LOADREG x8 8 x6 .LfillRegisters2
LOADREG x8 8 x7 .LfillRegisters2
// Store singles.
.LstoreS0_2:
LOADREG x15 4 s0 .LfillRegisters2
LOADREG x15 4 s1 .LfillRegisters2
LOADREG x15 4 s2 .LfillRegisters2
LOADREG x15 4 s3 .LfillRegisters2
LOADREG x15 4 s4 .LfillRegisters2
LOADREG x15 4 s5 .LfillRegisters2
LOADREG x15 4 s6 .LfillRegisters2
LOADREG x15 4 s7 .LfillRegisters2
// Store doubles.
.LstoreD0_2:
LOADREG x15 8 d0 .LfillRegisters2
LOADREG x15 8 d1 .LfillRegisters2
LOADREG x15 8 d2 .LfillRegisters2
LOADREG x15 8 d3 .LfillRegisters2
LOADREG x15 8 d4 .LfillRegisters2
LOADREG x15 8 d5 .LfillRegisters2
LOADREG x15 8 d6 .LfillRegisters2
LOADREG x15 8 d7 .LfillRegisters2
.LcallFunction2:
INVOKE_STUB_CALL_AND_RETURN
END art_quick_invoke_static_stub
(1) ART_METHOD_QUICK_CODE_OFFSET_64为ArtMethod::EntryPointFromQuickCompiledCodeOffset(PointerSize::k64).
Int32Value()
(2) 准备好参数后跳转到要执行的方法,可以通过这个方法地址判断是什么方法类型,比如jni方法,那x9就是art_quick_generic_jni_trampoline.如果是jit转解释模式,art_quick_to_interpreter_bridge
(3) 还原之前保存的栈
(4) 恢复保存的x19,x20,xFp,XLR,x4,x5[shorty]寄存器
(5) 读取shorty[0]判断函数返回类型
(6) 如果是void,则直接返回
(7) 如果是浮点,返回值保存在s/d0寄存器中
(8) 其它类型,返回值保存在x/w0中
.macro INVOKE_STUB_CALL_AND_RETURN
REFRESH_MARKING_REGISTER
// load method-> METHOD_QUICK_CODE_OFFSET
//(1)
ldr x9, [x0, #ART_METHOD_QUICK_CODE_OFFSET_64]
// Branch to method.
//(2)
blr x9
//(3)
// Pop the ArtMethod* (null), arguments and alignment padding from the stack.
mov sp, xFP
.cfi_def_cfa_register sp
//(4)
// Restore saved registers including value address and shorty address.
RESTORE_TWO_REGS x19, x20, 16
RESTORE_TWO_REGS xFP, xLR, 32
RESTORE_TWO_REGS_DECREASE_FRAME x4, x5, SAVE_SIZE
// Store result (w0/x0/s0/d0) appropriately, depending on resultType.
//(5)
ldrb w10, [x5]
// Check the return type and store the correct register into the jvalue in memory.
// Use numeric label as this is a macro and Clang's assembler does not have unique-id variables.
// Don't set anything for a void type.
//(6)
cmp w10, #'V'
beq 1f
//(7)
// Is it a double?
cmp w10, #'D'
beq 2f
// Is it a float?
//(7)
cmp w10, #'F'
beq 3f
//(8)
// Just store x0. Doesn't matter if it is 64 or 32 bits.
str x0, [x4]
1: // Finish up.
ret
2: // Store double.
str d0, [x4]
ret
3: // Store float.
str s0, [x4]
ret
.endm
art_quick_invoke_stub
/*
* extern"C" void art_quick_invoke_stub(ArtMethod *method, x0
* uint32_t *args, x1
* uint32_t argsize, w2
* Thread *self, x3
* JValue *result, x4
* char *shorty); x5
*/
(1) 和静态方法不同,w1保存this
(2) x0保存env,w1 this,所以第一个参数保存在w/x2
ENTRY art_quick_invoke_stub
// Spill registers as per AACPS64 calling convention.
INVOKE_STUB_CREATE_FRAME
// Fill registers x/w1 to x/w7 and s/d0 to s/d7 with parameters.
// Parse the passed shorty to determine which register to load.
// Load addresses for routines that load WXSD registers.
adr x11, .LstoreW2
adr x12, .LstoreX2
adr x13, .LstoreS0
adr x14, .LstoreD0
// Initialize routine offsets to 0 for integers and floats.
// x8 for integers, x15 for floating point.
mov x8, #0
mov x15, #0
add x10, x5, #1 // Load shorty address, plus one to skip return value.
//(1)
ldr w1, [x9],#4 // Load "this" parameter, and increment arg pointer.
// Loop to fill registers.
.LfillRegisters:
ldrb w17, [x10], #1 // Load next character in signature, and increment.
cbz w17, .LcallFunction // Exit at end of signature. Shorty 0 terminated.
cmp w17, #'F' // is this a float?
bne .LisDouble
cmp x15, # 8*12 // Skip this load if all registers full.
beq .Ladvance4
add x17, x13, x15 // Calculate subroutine to jump to.
br x17
.LisDouble:
cmp w17, #'D' // is this a double?
bne .LisLong
cmp x15, # 8*12 // Skip this load if all registers full.
beq .Ladvance8
add x17, x14, x15 // Calculate subroutine to jump to.
br x17
.LisLong:
cmp w17, #'J' // is this a long?
bne .LisOther
cmp x8, # 6*12 // Skip this load if all registers full.
beq .Ladvance8
add x17, x12, x8 // Calculate subroutine to jump to.
br x17
.LisOther: // Everything else takes one vReg.
cmp x8, # 6*12 // Skip this load if all registers full.
beq .Ladvance4
add x17, x11, x8 // Calculate subroutine to jump to.
br x17
.Ladvance4:
add x9, x9, #4
b .LfillRegisters
.Ladvance8:
add x9, x9, #8
b .LfillRegisters
// Macro for loading a parameter into a register.
// counter - the register with offset into these tables
// size - the size of the register - 4 or 8 bytes.
// register - the name of the register to be loaded.
.macro LOADREG counter size register return
ldr \register , [x9], #\size
add \counter, \counter, 12
b \return
.endm
//(2)
// Store ints.
.LstoreW2:
LOADREG x8 4 w2 .LfillRegisters
LOADREG x8 4 w3 .LfillRegisters
LOADREG x8 4 w4 .LfillRegisters
LOADREG x8 4 w5 .LfillRegisters
LOADREG x8 4 w6 .LfillRegisters
LOADREG x8 4 w7 .LfillRegisters
// Store longs.
.LstoreX2:
LOADREG x8 8 x2 .LfillRegisters
LOADREG x8 8 x3 .LfillRegisters
LOADREG x8 8 x4 .LfillRegisters
LOADREG x8 8 x5 .LfillRegisters
LOADREG x8 8 x6 .LfillRegisters
LOADREG x8 8 x7 .LfillRegisters
// Store singles.
.LstoreS0:
LOADREG x15 4 s0 .LfillRegisters
LOADREG x15 4 s1 .LfillRegisters
LOADREG x15 4 s2 .LfillRegisters
LOADREG x15 4 s3 .LfillRegisters
LOADREG x15 4 s4 .LfillRegisters
LOADREG x15 4 s5 .LfillRegisters
LOADREG x15 4 s6 .LfillRegisters
LOADREG x15 4 s7 .LfillRegisters
// Store doubles.
.LstoreD0:
LOADREG x15 8 d0 .LfillRegisters
LOADREG x15 8 d1 .LfillRegisters
LOADREG x15 8 d2 .LfillRegisters
LOADREG x15 8 d3 .LfillRegisters
LOADREG x15 8 d4 .LfillRegisters
LOADREG x15 8 d5 .LfillRegisters
LOADREG x15 8 d6 .LfillRegisters
LOADREG x15 8 d7 .LfillRegisters
.LcallFunction:
INVOKE_STUB_CALL_AND_RETURN
END art_quick_invoke_stub
调用trampoline时x0为ArtMethod,x1为参数1,依次类推。以下以art_quick_generic_jni_trampoline为例
(1)去执行正在的jni注册的方法
(2) jni方法返回值可能是x0也可能是d0,这个需要调用artQuickGenericJniEndTrampoline判断,然后将返回值保存在x0中
(3) 调用artQuickGenericJniTrampoline返回native code。 该值位于ArtMethod->ptr_sized_fields_.data
ENTRY art_quick_generic_jni_trampoline
SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_X0
// Save SP , so we can have static CFI info.
mov x28, sp
.cfi_def_cfa_register x28
// This looks the same, but is different: this will be updated to point to the bottom
// of the frame when the handle scope is inserted.
mov xFP, sp
mov xIP0, #5120
sub sp, sp, xIP0
// prepare for artQuickGenericJniTrampoline call
// (Thread*, SP)
// x0 x1 <= C calling convention
// xSELF xFP <= where they are
mov x0, xSELF // Thread*
mov x1, xFP
//(3)
bl artQuickGenericJniTrampoline // (Thread*, sp)
// The C call will have registered the complete save-frame on success.
// The result of the call is:
// x0: pointer to native code, 0 on error.
// x1: pointer to the bottom of the used area of the alloca, can restore stack till there.
// Check for error = 0.
cbz x0, .Lexception_in_native
// Release part of the alloca.
mov sp, x1
// Save the code pointer
mov xIP0, x0
// Load parameters from frame into registers.
// TODO Check with artQuickGenericJniTrampoline.
// Also, check again APPCS64 - the stack arguments are interleaved.
ldp x0, x1, [sp]
ldp x2, x3, [sp, #16]
ldp x4, x5, [sp, #32]
ldp x6, x7, [sp, #48]
ldp d0, d1, [sp, #64]
ldp d2, d3, [sp, #80]
ldp d4, d5, [sp, #96]
ldp d6, d7, [sp, #112]
add sp, sp, #128
blr xIP0 // native call.(1)
// result sign extension is handled in C code
// prepare for artQuickGenericJniEndTrampoline call
// (Thread*, result, result_f)
// x0 x1 x2 <= C calling convention
mov x1, x0 // Result (from saved).
mov x0, xSELF // Thread register.
fmov x2, d0 // d0 will contain floating point result, but needs to go into x2
// (2)
bl artQuickGenericJniEndTrampoline
// Pending exceptions possible.
ldr x2, [xSELF, THREAD_EXCEPTION_OFFSET]
cbnz x2, .Lexception_in_native
// Tear down the alloca.
mov sp, x28
.cfi_def_cfa_register sp
// Tear down the callee-save frame.
RESTORE_SAVE_REFS_AND_ARGS_FRAME
REFRESH_MARKING_REGISTER
// store into fpr, for when it's a fpr return...
fmov d0, x0
ret
.Lexception_in_native:
// Move to x1 then sp to please assembler.
ldr x1, [xSELF, # THREAD_TOP_QUICK_FRAME_OFFSET]
add sp, x1, #-1 // Remove the GenericJNI tag.
.cfi_def_cfa_register sp
# This will create a new save-all frame, required by the runtime.
DELIVER_PENDING_EXCEPTION
END art_quick_generic_jni_trampoline