智能合约编写好之后需要通过编译器编译后才能在虚拟机上运行,智能合约的编译结果称为字节码,字节码是一串十六进制数字编码的字节数组。字节码的解析是以一个字节为单位,每个字节都表示一个EVM指令或一个操作数据。我们通过一个简单的智能合约来分析智能合约字节码对应的汇编指令的操作。
智能合约的例子如下:
pragma solidity ^0.4.19;
contract C {
uint256 a;
function C() public {
a = 1;
}
function Foo(uint256 _in) public {
a = _in;
}
}
对应的字节码如下:
606060405260043610603f576000357c0100000000000000000000000000000000000000000000000000000000900463ffffffff1680631176bd96146044575b600080fd5b3415604e57600080fd5b606260048080359060200190919050506064565b005b80600081905550505600a165627a7a72305820889b48be07282eb533ea34e9be8dc6c8e79bf4e758d05ebc9fb3c2544e9f55ae0029
EVM依次读入这个字节码的十六进制数字"60 60 60 40 52 ...",首先解析第一个字节"60",从汇编指令集中查询该值对应的指令,60对应的指令是"push",因为push指令后需要一个输入参数,因此后面的字节"60"是该push指令的参数,然后继续解析第三个个"60"。上面提到的汇编指令集完整的列表如下:
opcodes = {
0x00: ['STOP', 0, 0, 0],
0x01: ['ADD', 2, 1, 3],
0x02: ['MUL', 2, 1, 5],
0x03: ['SUB', 2, 1, 3],
0x04: ['DIV', 2, 1, 5],
0x05: ['SDIV', 2, 1, 5],
0x06: ['MOD', 2, 1, 5],
0x07: ['SMOD', 2, 1, 5],
0x08: ['ADDMOD', 3, 1, 8],
0x09: ['MULMOD', 3, 1, 8],
0x0a: ['EXP', 2, 1, 10],
0x0b: ['SIGNEXTEND', 2, 1, 5],
0x10: ['LT', 2, 1, 3],
0x11: ['GT', 2, 1, 3],
0x12: ['SLT', 2, 1, 3],
0x13: ['SGT', 2, 1, 3],
0x14: ['EQ', 2, 1, 3],
0x15: ['ISZERO', 1, 1, 3],
0x16: ['AND', 2, 1, 3],
0x17: ['OR', 2, 1, 3],
0x18: ['XOR', 2, 1, 3],
0x19: ['NOT', 1, 1, 3],
0x1a: ['BYTE', 2, 1, 3],
0x20: ['SHA3', 2, 1, 30],
0x30: ['ADDRESS', 0, 1, 2],
0x31: ['BALANCE', 1, 1, 20], # now 400
0x32: ['ORIGIN', 0, 1, 2],
0x33: ['CALLER', 0, 1, 2],
0x34: ['CALLVALUE', 0, 1, 2],
0x35: ['CALLDATALOAD', 1, 1, 3],
0x36: ['CALLDATASIZE', 0, 1, 2],
0x37: ['CALLDATACOPY', 3, 0, 3],
0x38: ['CODESIZE', 0, 1, 2],
0x39: ['CODECOPY', 3, 0, 3],
0x3a: ['GASPRICE', 0, 1, 2],
0x3b: ['EXTCODESIZE', 1, 1, 20], # now 700
0x3c: ['EXTCODECOPY', 4, 0, 20], # now 700
0x3d: ['RETURNDATASIZE', 0, 1, 2],
0x3e: ['RETURNDATACOPY', 3, 0, 3],
0x40: ['BLOCKHASH', 1, 1, 20],
0x41: ['COINBASE', 0, 1, 2],
0x42: ['TIMESTAMP', 0, 1, 2],
0x43: ['NUMBER', 0, 1, 2],
0x44: ['DIFFICULTY', 0, 1, 2],
0x45: ['GASLIMIT', 0, 1, 2],
0x50: ['POP', 1, 0, 2],
0x51: ['MLOAD', 1, 1, 3],
0x52: ['MSTORE', 2, 0, 3],
0x53: ['MSTORE8', 2, 0, 3],
0x54: ['SLOAD', 1, 1, 50], # 200 now
# actual cost 5000-20000 depending on circumstance
0x55: ['SSTORE', 2, 0, 0],
0x56: ['JUMP', 1, 0, 8],
0x57: ['JUMPI', 2, 0, 10],
0x58: ['PC', 0, 1, 2],
0x59: ['MSIZE', 0, 1, 2],
0x5a: ['GAS', 0, 1, 2],
0x5b: ['JUMPDEST', 0, 0, 1],
0x60: ['PUSH1', 0, 1, 3],
......
0x7f: ['PUSH32', 0, 1, 3],
0x80: ['DUP1', 1, 2, 3],
......
0x8f: ['DUP32', 16, 17, 3],
0x90: ['SWAP1', 2, 2, 3],
......
0x9f: ['SWAP32', 17, 17, 3],
0xa0: ['LOG0', 2, 0, 375],
0xa1: ['LOG1', 3, 0, 750],
0xa2: ['LOG2', 4, 0, 1125],
0xa3: ['LOG3', 5, 0, 1500],
0xa4: ['LOG4', 6, 0, 1875],
# 0xe1: ['SLOADBYTES', 3, 0, 50], # to be discontinued
# 0xe2: ['SSTOREBYTES', 3, 0, 0], # to be discontinued
# 0xe3: ['SSIZE', 1, 1, 50], # to be discontinued
0xf0: ['CREATE', 3, 1, 32000],
0xf1: ['CALL', 7, 1, 40], # 700 now
0xf2: ['CALLCODE', 7, 1, 40], # 700 now
0xf3: ['RETURN', 2, 0, 0],
0xf4: ['DELEGATECALL', 6, 1, 40], # 700 now
0xf5: ['CALLBLACKBOX', 7, 1, 40],
0xfa: ['STATICCALL', 6, 1, 40],
0xfd: ['REVERT', 2, 0, 0],
0xff: ['SUICIDE', 1, 0, 0], # 5000 now
}
以操作码CALL为例, 说明其中的含义如下:
- 0xf1表示操作码对应的数值,
- 7表示输入参数的个数,即从栈中弹出的数据个数
- 1表示输出参数的个数, 压回栈中的数据个数
- 40表示执行该操作需要花费gas数量
根据该指令集,对于上面的字节码可以翻译出对应的汇编指令代码:
// 这一部分是创建合约时执行的汇编指令, 等合约部署完成后,不会再被执行
.code
PUSH 60 //压入0x60 stack=[0x60]
PUSH 40 //压入0x40 stack=[0x60,0x40].
MSTORE //在memory空间中偏移64*32的位置写入值0x60, 实际上意味着开辟2k的存储空间
CALLVALUE //压入创建合约的这笔交易携带的eth数量,没有就压入0, stack=[value].
ISZERO //判断压入的value是否为0, 如果是0就压入1, 否则压入0: stack=[0x1/0x0].
PUSH [tag] 1 //压入tag1 stack=[0x1/0x0,tag1].
JUMPI //如果没有value,跳转到tag1, 否则,继续往下执行 stack=[]
PUSH 0 //stack=[0]
DUP1 //stack=[0,0]
REVERT //该指令是metropolis hardfork之后才支持的, 该指令返回memory地址=0,size=0中的数据 stack=[]
tag 1 //这里是没有携带value的部署合约交易的执行入口, 实际上就是构造函数的执行代码
JUMPDEST //没有含义, 表示跳转的目标位置
PUSH 1
PUSH 0
DUP2
SWAP1
SSTORE
POP //以上都是 构造函数中 执行a = 1;的汇编指令, 详细分析见下文.
//压入本合约字节码中.data段的长度, .data段是智能合约真正的字节码 //stack=[len]
PUSH #[$] 0000000000000000000000000000000000000000000000000000000000000000
DUP1 //stack=[len,len]
//压入.data段在本字节码中的偏移地址 stack=[len,len,offset]
PUSH [$] 0000000000000000000000000000000000000000000000000000000000000000
PUSH 0 //压入合约字节码复制到memory的起始地址,即0, stack=[len,len,offset,0]
//根据len和offset 复制下面.data开始的字节码到0地址开始的memory内存中
CODECOPY
PUSH 0 //stack=[len,0]
RETURN //将memory的空间扩展到len(如果之前不足的话) 并返回部署后的字节码. stack=[]
.data // 这一部分开始合约内容的汇编指令
0:
.code
PUSH 60
PUSH 40
MSTORE //同上分析, 开辟2k的memory空间
PUSH 4 //stack=[4]
CALLDATASIZE //压入交易携带的calldata的数据长度 stack=[4,size]
LT //检查size是否小于4, 小于:压入1, 不小于: 压入0 stack=[0]
PUSH [tag] 1 //stack=[0, tag1]
JUMPI //size小于4, 跳转到tag1, 否则继续
PUSH 0 //stack=[0]
CALLDATALOAD //加载calldata中的前32个字节 stack=[calldata[0:32]]
PUSH 100000000000000000000000000000000000000000000000000000000
SWAP1 //stack=[1000000..., calldata[0:32]]
DIV //calldata[0:32]/1000000... 表示取calldata[0:32]的前4个字节
PUSH FFFFFFFF // stack=[calldata[0:4], FFFFFFFF]
AND //确保calldata[0:4]的前28(32-4)字节是0 stack=[calldata[0:4]]
DUP1 //stack=[calldata[0:4],calldata[0:4]]
//压入Foo(uint256)的hash数据
PUSH 1176BD96 //stack=[calldata[0:4],calldata[0:4],1176BD96]
EQ //比较是否相等, 相等:压入1; 不相等:压入0
PUSH [tag] 2 //stack=[calldata[0:4],1,tag2]
JUMPI 如果相等, 跳转到tag2, 否则继续 stack=[calldata[0:4]]
//[注意]: 如果合约定义了多个函数, 会重复上面的流程, 挨个比较hash值,直到找到hash
tag 1 // 合约函数调用时, calldata 长度小于4
JUMPDEST
PUSH 0
DUP1
REVERT
tag 2 //函数Foo(uint256 _in)的参数检查
JUMPDEST
CALLVALUE //stack=[calldata[0:4],value]
ISZERO //检查value是否是0 stack=[calldata[0:4],0]
PUSH [tag] 3 //stack=[calldata[0:4],0, tag3]
JUMPI //如果参数非0, 跳转到tag3 stack=[calldata[0:4]]
PUSH 0 //否则, 返回, 撤销操作
DUP1 //stack=[calldata[0:4],0,0]
REVERT
tag 3 //函数Foo(uint256 _in)的代码实现, 中间的指令貌似有一些冗余
JUMPDEST
PUSH [tag] 4 //stack=[calldata[0:4],tag4]
PUSH 4 //stack=[calldata[0:4],tag4,4]
DUP1 //stack=[calldata[0:4],tag4,4,4]
DUP1 //stack=[calldata[0:4],tag4,4,4,4]
CALLDATALOAD //stack=[calldata[0:4],tag4,4,4,calldata[4:36]]
SWAP1 //stack=[calldata[0:4],tag4,4,calldata[4:36],4]
PUSH 20 //stack=[calldata[0:4],tag4,4,calldata[32:64],4,32]
ADD //stack=[calldata[0:4],tag4,4,calldata[32:64],0]
SWAP1 //stack=[calldata[0:4],tag4,4,0,calldata[32:64]]
SWAP2 //stack=[calldata[0:4],tag4,calldata[32:64],0,4]
SWAP1 //stack=[calldata[0:4],tag4,calldata[32:64],4,0]
POP //stack=[calldata[0:4],tag4,calldata[32:64],4]
POP //stack=[calldata[0:4],tag4,calldata[32:64]]
PUSH [tag] 5 //stack=[calldata[0:4],tag4,calldata[32:64],tag5]
JUMP //跳转到tag5 stack=[calldata[0:4],tag4,calldata[32:64]]
tag 4 function Foo(uint256 _in) publ...
JUMPDEST function Foo(uint256 _in) publ...
STOP function Foo(uint256 _in) publ...
tag 5 //函数Foo(uint256 _in) 最终执行代码
JUMPDEST
DUP1 //stack=[calldata[0:4],tag4,calldata[32:64],calldata[32:64]]
PUSH 0 //stack=[calldata[0:4],tag4,calldata[32:64],calldata[32:64],0]
DUP2 //stack=[calldata[0:4],tag4,calldata[32:64],calldata[32:64],0,calldata[32:64]]
SWAP1 //stack=[calldata[0:4],tag4,calldata[32:64],calldata[32:64],calldata[32:64],0]
SSTORE //写入参到memory地址0位置: stack=[calldata[0:4],tag4,calldata[32:64],calldata[32:64]]
POP //stack=[calldata[0:4],tag4,calldata[32:64]]
POP //stack=[calldata[0:4],tag4]
JUMP [out] //执行成功, 退出
.data
为了方便理解, 将指令的执行过程以注释的方式写在代码中。根据注释,可以看出指令对数据的处理,包括栈,存储,内存中的数据的变化。