一、源码
一、PHP数组源码初步分析
1、基本类型定义(php7.3.9)
typedef union _zend_value {
zend_long lval; /* long value */
double dval; /* double value */
zend_refcounted *counted;
zend_string *str;
zend_array *arr;
zend_object *obj;
zend_resource *res;
zend_reference *ref;
zend_ast_ref *ast;
zval *zv;
void *ptr;
zend_class_entry *ce;
zend_function *func;
struct {
uint32_t w1;
uint32_t w2;
} ww;
} zend_value;
struct _zval_struct {
zend_value value; /* value */
union {
struct {
ZEND_ENDIAN_LOHI_3(
zend_uchar type, /* active type */
zend_uchar type_flags,
union {
uint16_t call_info; /* call info for EX(This) */
uint16_t extra; /* not further specified */
} u)
} v;
uint32_t type_info;
} u1;
union {
uint32_t next; /* hash collision chain */
uint32_t cache_slot; /* cache slot (for RECV_INIT) */
uint32_t opline_num; /* opline number (for FAST_CALL) */
uint32_t lineno; /* line number (for ast nodes) */
uint32_t num_args; /* arguments number for EX(This) */
uint32_t fe_pos; /* foreach position */
uint32_t fe_iter_idx; /* foreach iterator index */
uint32_t access_flags; /* class constant access flags */
uint32_t property_guard; /* single property guard */
uint32_t constant_flags; /* constant flags */
uint32_t extra; /* not further specified */
} u2;
};
/* regular data types */
#define IS_UNDEF 0
#define IS_NULL 1
#define IS_FALSE 2
#define IS_TRUE 3
#define IS_LONG 4
#define IS_DOUBLE 5
#define IS_STRING 6
#define IS_ARRAY 7
#define IS_OBJECT 8
#define IS_RESOURCE 9
#define IS_REFERENCE 10
/* constant expressions */
#define IS_CONSTANT_AST 11
/* internal types */
#define IS_INDIRECT 13
#define IS_PTR 14
#define _IS_ERROR 15
/* fake types used only for type hinting (Z_TYPE(zv) can not use them) */
#define _IS_BOOL 16
#define IS_CALLABLE 17
#define IS_ITERABLE 18
#define IS_VOID 19
#define _IS_NUMBER 20
typedef struct _Bucket {
zval val;
zend_ulong h; /* hash value (or numeric index) */
zend_string *key; /* string key or NULL for numerics */
} Bucket;
typedef struct _zend_array HashTable;
struct _zend_array {
zend_refcounted_h gc;
union {
struct {
ZEND_ENDIAN_LOHI_4(
zend_uchar flags,
zend_uchar _unused,
zend_uchar nIteratorsCount,
zend_uchar _unused2)
} v;
uint32_t flags;
} u;
uint32_t nTableMask;
Bucket *arData;
uint32_t nNumUsed;
uint32_t nNumOfElements;
uint32_t nTableSize;
uint32_t nInternalPointer;
zend_long nNextFreeElement;
dtor_func_t pDestructor;
};
2、常见宏
#define HT_MIN_MASK ((uint32_t) -2)
#define HT_MIN_SIZE 8
#define HT_HASH_EX(data, idx) ((uint32_t*)(data))[(int32_t)(idx)]
#define HT_SET_DATA_ADDR(ht, ptr) do { \
(ht)->arData = (Bucket*)(((char*)(ptr)) + HT_HASH_SIZE((ht)->nTableMask)); \
} while (0)
3、几个GDB技巧
#根据Bucket的h值推算槽位 p (int32_t)($54.h|-8) ,$54为Bucket
p (int32_t)(9223372036854953484|-8)
-4
#根据槽位推算存储位置 p ((uint32_t*)($14.arData))[(int32_t)(-3)]
p ((uint32_t*)($14.arData))[(int32_t)(-1)]
1
#带符号数字转换
p (int32_t )4294967288
-8
#查看-8的二进制
p /t -8
11111111111111111111111111111000
4、HashTable的API
a、初始化
static zend_always_inline void _zend_hash_init_int(HashTable *ht, uint32_t nSize, dtor_func_t pDestructor, zend_bool persistent)
{
GC_SET_REFCOUNT(ht, 1);
GC_TYPE_INFO(ht) = IS_ARRAY | (persistent ? (GC_PERSISTENT << GC_FLAGS_SHIFT) : (GC_COLLECTABLE << GC_FLAGS_SHIFT));
HT_FLAGS(ht) = HASH_FLAG_STATIC_KEYS;
ht->nTableMask = HT_MIN_MASK;
HT_SET_DATA_ADDR(ht, &uninitialized_bucket);
ht->nNumUsed = 0;
ht->nNumOfElements = 0;
ht->nInternalPointer = 0;
ht->nNextFreeElement = 0;
ht->pDestructor = pDestructor;
ht->nTableSize = zend_hash_check_size(nSize);
}
注意宏 HT_SET_DATA_ADDR
HashTable结构体arData地址获取宏
#define HT_SET_DATA_ADDR(ht, ptr) do { \ /*ptr为申请内存地址 先转化为char* 再加nTableMask */
(ht)->arData = (Bucket*)(((char*)(ptr)) + HT_HASH_SIZE((ht)->nTableMask)); \
} while (0)
static const uint32_t uninitialized_bucket[-HT_MIN_MASK] =
{HT_INVALID_IDX, HT_INVALID_IDX};
c、查找 zend_hash_find_bucket
static zend_always_inline Bucket *zend_hash_find_bucket(const HashTable *ht, zend_string *key, zend_bool known_hash) {
zend_ulong h;
uint32_t nIndex;
uint32_t idx;
Bucket *p, *arData;
if (known_hash) {
h = ZSTR_H(key); //直接读取
} else {
h = zend_string_hash_val(key); //计算得到hash
}
arData = ht->arData;
nIndex = h | ht->nTableMask; //计算搜索的key应该存储到的nIndex
idx = HT_HASH_EX(arData, nIndex); //实际nIndex存储的idx
if (UNEXPECTED(idx == HT_INVALID_IDX)) { //如果idx为空 不存在 HT_INVALID_IDX=-1
return NULL;
}
p = HT_HASH_TO_BUCKET_EX(arData, idx);//获取idx存储位置对应的Bucket
if (EXPECTED(p->key == key)) { /* check for the same interned string 即同一个zend_string实例*/
return p;
}
while (1) {
if (p->h == ZSTR_H(key) && EXPECTED(p->key) && zend_string_equal_content(p->key, key)) {//hash及内容比较
return p;
}
idx = Z_NEXT(p->val);
if (idx == HT_INVALID_IDX) { //不存在拉链
return NULL;
}
p = HT_HASH_TO_BUCKET_EX(arData, idx);
if (p->key == key) { /* check for the same interned string */
return p;
}
}
}
d、扩容
static void ZEND_FASTCALL zend_hash_do_resize(HashTable *ht) {
IS_CONSISTENT(ht);
HT_ASSERT_RC1(ht);
if (ht->nNumUsed > ht->nNumOfElements + (ht->nNumOfElements >> 5)) { //已使用的Bucket > 有效Bucket+(有效Bucket /32)
/* additional term is there to amortize the cost of compaction */
zend_hash_rehash(ht);
} else if (ht->nTableSize < HT_MAX_SIZE) {// 小于最大数组限制
/* Let's double the table size */
void *new_data, *old_data = HT_GET_DATA_ADDR(ht);
uint32_t nSize = ht->nTableSize + ht->nTableSize;
Bucket *old_buckets = ht->arData;
ht->nTableSize = nSize;
new_data = pemalloc(HT_SIZE_EX(nSize, HT_SIZE_TO_MASK(nSize)), GC_FLAGS(ht) & IS_ARRAY_PERSISTENT);
ht->nTableMask = HT_SIZE_TO_MASK(ht->nTableSize);
HT_SET_DATA_ADDR(ht, new_data); //设置新arData地址
memcpy(ht->arData, old_buckets, sizeof(Bucket) * ht->nNumUsed); //直接内存拷贝
pefree(old_data, GC_FLAGS(ht) & IS_ARRAY_PERSISTENT); //释放原内存
zend_hash_rehash(ht); //rehash
} else {
zend_error_noreturn(E_ERROR, “Possible integer overflow in memory allocation (%u * %zu + %zu)”, ht->nTableSize * 2, \
sizeof(Bucket) + sizeof(uint32_t), sizeof(Bucket));
}
}
e、Hash冲突
static zend_always_inline void _zend_hash_append_ind(HashTable *ht, zend_string *key, zval *ptr) {
uint32_t idx = ht->nNumUsed++;
uint32_t nIndex;
Bucket *p = ht->arData + idx; //获取追加新位置
ZVAL_INDIRECT(&p->val, ptr);
if (!ZSTR_IS_INTERNED(key)) {
HT_FLAGS(ht) &= ~HASH_FLAG_STATIC_KEYS;
zend_string_addref(key);
zend_string_hash_val(key);
}
p->key = key;
p->h = ZSTR_H(key);
nIndex = (uint32_t)p->h | ht->nTableMask;
Z_NEXT(p->val) = HT_HASH(ht, nIndex); //将原存idx存储到p.u2.next
HT_HASH(ht, nIndex) = HT_IDX_TO_HASH(idx); //将新idx存储到nIndex
ht->nNumOfElements++;
}
#define Z_NEXT(zval) (zval).u2.next
#define HT_HASH_EX(data, idx) \
((uint32_t*)(data))[(int32_t)(idx)]
#define HT_HASH(ht, idx) \
HT_HASH_EX((ht)->arData, idx)
# define HT_IDX_TO_HASH(idx) \
(idx)
f、rehash
ZEND_API int ZEND_FASTCALL zend_hash_rehash(HashTable *ht)
{
Bucket *p;
uint32_t nIndex, i;
IS_CONSISTENT(ht);
if (UNEXPECTED(ht->nNumOfElements == 0)) {//空数组
if (HT_FLAGS(ht) & HASH_FLAG_INITIALIZED) {//如果已初始化
ht->nNumUsed = 0;
HT_HASH_RESET(ht);
}
return SUCCESS;
}
HT_HASH_RESET(ht);
i = 0;
p = ht->arData;
if (HT_IS_WITHOUT_HOLES(ht)) {//不存在删除元素
do {
nIndex = p->h | ht->nTableMask;//逐个计算索引位置
Z_NEXT(p->val) = HT_HASH(ht, nIndex);//原索引位置存储的槽位到当前Bucket的next
HT_HASH(ht, nIndex) = HT_IDX_TO_HASH(i);//将现Bucket的槽位存储到索引空间
p++;
} while (++i < ht->nNumUsed);
} else {//存在已删除情况
uint32_t old_num_used = ht->nNumUsed;
do {
if (UNEXPECTED(Z_TYPE(p->val) == IS_UNDEF)) {//已删除元素
uint32_t j = i;
Bucket *q = p;
if (EXPECTED(!HT_HAS_ITERATORS(ht))) {
while (++i < ht->nNumUsed) {
p++;//下一个Bucket
if (EXPECTED(Z_TYPE_INFO(p->val) != IS_UNDEF)) {//正常Bucket
ZVAL_COPY_VALUE(&q->val, &p->val);//拷贝
q->h = p->h;
nIndex = q->h | ht->nTableMask;
q->key = p->key;
Z_NEXT(q->val) = HT_HASH(ht, nIndex);
HT_HASH(ht, nIndex) = HT_IDX_TO_HASH(j);
if (UNEXPECTED(ht->nInternalPointer == i)) {
ht->nInternalPointer = j;
}
q++;
j++;
}
}
} else {
uint32_t iter_pos = zend_hash_iterators_lower_pos(ht, 0);
while (++i < ht->nNumUsed) {
p++;
if (EXPECTED(Z_TYPE_INFO(p->val) != IS_UNDEF)) {
ZVAL_COPY_VALUE(&q->val, &p->val);
q->h = p->h;
nIndex = q->h | ht->nTableMask;
q->key = p->key;
Z_NEXT(q->val) = HT_HASH(ht, nIndex);
HT_HASH(ht, nIndex) = HT_IDX_TO_HASH(j);
if (UNEXPECTED(ht->nInternalPointer == i)) {
ht->nInternalPointer = j;
}
if (UNEXPECTED(i >= iter_pos)) {
do {
zend_hash_iterators_update(ht, iter_pos, j);
iter_pos = zend_hash_iterators_lower_pos(ht, iter_pos + 1);
} while (iter_pos < i);
}
q++;
j++;
}
}
}
ht->nNumUsed = j;
break;
}
nIndex = p->h | ht->nTableMask;
Z_NEXT(p->val) = HT_HASH(ht, nIndex);
HT_HASH(ht, nIndex) = HT_IDX_TO_HASH(i);
p++;
} while (++i < ht->nNumUsed);
/* Migrate pointer to one past the end of the array to the new one past the end, so that
* newly inserted elements are picked up correctly. */
if (UNEXPECTED(HT_HAS_ITERATORS(ht))) {
_zend_hash_iterators_update(ht, old_num_used, ht->nNumUsed);
}
}
return SUCCESS;
}
二、实战
1、脚本ht.php
<?php
$ar = ['a'=>'abc','b'=>'bcd','c'=>'cde','d'=>'def','e'=>'fgh','q'=>'ghi','i'=>'gkl','y'=>'glz'];
var_export($ar);
unset($ar['q']);
var_export($ar);
$ar['g'] = 'glz';
var_export($ar);
$ar['h'] = 'hello';
var_export($ar);
2、GDB运行,执行如下
b zend_execute
r ht.php
p op_array
p *op_array
p $2.opcodes
p *$2.opcodes
p $3+1
p *$5
...
p $3+10
p *$23
3、获取opcode_handler如下,附猜测可能对应的php代码:
1、ZEND_ASSIGN_SPEC_CV_CONST_RETVAL_UNUSED_HANDLER $ar = ['a'=>'abc','b'=>'bcd','c'=>'cde','d'=>'def','e'=>'fgh','q'=>'ghi','i'=>'gkl','y'=>'glz'];
2、ZEND_INIT_FCALL_SPEC_CONST_HANDLER var_export
3、ZEND_SEND_VAR_SPEC_CV_HANDLER $ar
4、ZEND_DO_ICALL_SPEC_RETVAL_UNUSED_HANDLER var_export
5、ZEND_UNSET_DIM_SPEC_CV_CONST_HANDLER unset
6、ZEND_INIT_FCALL_SPEC_CONST_HANDLER var_export
7、ZEND_SEND_VAR_SPEC_CV_HANDLER $ar
8、ZEND_DO_ICALL_SPEC_RETVAL_UNUSED_HANDLER var_export
9、ZEND_ASSIGN_DIM_SPEC_CV_CONST_OP_DATA_CONST_HANDLER $ar['g'] = 'glz';
10、ZEND_NULL_HANDLER
11、ZEND_INIT_FCALL_SPEC_CONST_HANDLER var_export
12、ZEND_SEND_VAR_SPEC_CV_HANDLER $ar
13、ZEND_DO_ICALL_SPEC_RETVAL_UNUSED_HANDLER var_export
14、ZEND_ASSIGN_DIM_SPEC_CV_CONST_OP_DATA_CONST_HANDLER $ar['h'] = 'hello';
15、ZEND_NULL_HANDLER
16、ZEND_INIT_FCALL_SPEC_CONST_HANDLER var_export
17、ZEND_SEND_VAR_SPEC_CV_HANDLER $ar
18、ZEND_DO_ICALL_SPEC_RETVAL_UNUSED_HANDLER var_export
19、ZEND_RETURN_SPEC_CONST_HANDLER 1
4、打若干断点,以便查看不同阶段HashTable内容
(gdb) b ZEND_ASSIGN_SPEC_CV_CONST_RETVAL_UNUSED_HANDLER #value
(gdb) b ZEND_SEND_VAR_SPEC_CV_HANDLER #varptr
(gdb) b ZEND_ASSIGN_DIM_SPEC_CV_CONST_OP_DATA_CONST_HANDLER # 执行到最后p object_ptr
5、ZEND_ASSIGN_SPEC_CV_CONST_RETVAL_UNUSED_HANDLER
39441 value = EX_CONSTANT(opline->op2);
n
p value
p *value
p $28.value.arr
p *$28.value.arr
#$30 = {gc = {refcount = 1, u = {v = {type = 7 '\a', flags = 0 '\000', gc_info = 0}, type_info = 7}}, u = {v = {flags = 10 '\n', nApplyCount = 0 '\000', nIteratorsCount = 0 '\000', consistency = 0 '\000'},
# flags = 10}, nTableMask = 4294967288, arData = 0x7ffff5a5ea20, nNumUsed = 8, nNumOfElements = 8, nTableSize = 8, nInternalPointer = 0, nNextFreeElement = 0,
# pDestructor = 0x84c8af <_zval_ptr_dtor_wrapper>}
p $30.arData
p *$30.arData
...
p $31+7
p *$43
#查看next的具体指
p (int32_t)4294967295 # -1
#查看key值
p *$32.key.val@1
#根据h查看槽位
p (int32_t)($32.h|-8)
#根据槽位查看arDataIdex
p ((uint32_t*)($30.arData))[(int32_t)(-1)]
##观察next和key
arDataIdx next key h h|-8
0 -1 a 9223372036854953478 -2
1 -1 b 9223372036854953479 -1
2 -1 c 9223372036854953480 -8
3 -1 d 9223372036854953481 -7
4 -1 e 9223372036854953482 -6
5 0 q 9223372036854953494 -2
6 5 i 9223372036854953486 -2
7 6 y 9223372036854953502 -2
##查看-2槽位
p ((uint32_t*)($30.arData))[(int32_t)(-2)] # 7
##观察个槽位存储情况
slot idx
-1 1
-2 7
-3 -1#p (int32_t)4294967295 -> -1
-4 -1
-5 -1
-6 4
-7 3
-8 2
#总结:拉链在-2槽位形成 7->6->5->0
6、ZEND_SEND_VAR_SPEC_CV_HANDLER
p varptr
p *varptr.value.arr
#$27 = {gc = {refcount = 1, u = {v = {type = 7 '\a', flags = 0 '\000', gc_info = 0}, type_info = 7}}, u = {v = {flags = 10 '\n', nApplyCount = 0 '\000', nIteratorsCount = 0 '\000', consistency = 0 '\000'},
# flags = 10}, nTableMask = 4294967288, arData = 0x7ffff5a5eb60, nNumUsed = 8, nNumOfElements = 7, nTableSize = 8, nInternalPointer = 0, nNextFreeElement = 0,
# pDestructor = 0x84c8af <_zval_ptr_dtor_wrapper>}
p $29+5
next = 0
p $29+6
next = 0
(gdb) p *$30.key.val@1
$35 = "q"
(gdb) p *$32.key.val@1
$36 = "i"
#总结 拉链:7->6->0, 5已被跳过
#注意5的内容:type=0 如下:
$31 = {val = {value = {lval = 140737314303296, dval = 6.9533472085220443e-310, counted = 0x7ffff5a02d40, str = 0x7ffff5a02d40, arr = 0x7ffff5a02d40, obj = 0x7ffff5a02d40, res = 0x7ffff5a02d40,
ref = 0x7ffff5a02d40, ast = 0x7ffff5a02d40, zv = 0x7ffff5a02d40, ptr = 0x7ffff5a02d40, ce = 0x7ffff5a02d40, func = 0x7ffff5a02d40, ww = {w1 = 4120915264, w2 = 32767}}, u1 = {v = {type = 0 '\000',
type_flags = 0 '\000', const_flags = 0 '\000', reserved = 0 '\000'}, type_info = 0}, u2 = {next = 0, cache_slot = 0, lineno = 0, num_args = 0, fe_pos = 0, fe_iter_idx = 0, access_flags = 0,
property_guard = 0}}, h = 9223372036854953494, key = 0x7ffff5a02d00}
7、ZEND_ASSIGN_DIM_SPEC_CV_CONST_OP_DATA_CONST_HANDLER
# value = zend_assign_to_variable(variable_ptr, value, IS_CONST);
p *object_ptr
p *$47.value.arr
#$48 = {gc = {refcount = 1, u = {v = {type = 7 '\a', flags = 0 '\000', gc_info = 0}, type_info = 7}}, u = {v = {flags = 10 '\n', nApplyCount = 0 '\000', nIteratorsCount = 0 '\000', consistency = 0 '\000'},
# flags = 10}, nTableMask = 4294967288, arData = 0x7ffff5a5eb60, nNumUsed = 8, nNumOfElements = 8, nTableSize = 8, nInternalPointer = 0, nNextFreeElement = 0,
# pDestructor = 0x84c8af <_zval_ptr_dtor_wrapper>}
p $57+7
p *$64.key.val@1 # g
##总结:HashTable被rehash
8、最后一个 ZEND_SEND_VAR_SPEC_CV_HANDLER
p *varptr
#$11 = {gc = {refcount = 1, u = {v = {type = 7 '\a', flags = 0 '\000', gc_info = 0}, type_info = 7}}, u = {v = {flags = 10 '\n', nApplyCount = 0 '\000', nIteratorsCount = 0 '\000', consistency = 0 '\000'},
# flags = 10}, nTableMask = 4294967280, arData = 0x7ffff5a67540, nNumUsed = 9, nNumOfElements = 9, nTableSize = 16, nInternalPointer = 0, nNextFreeElement = 0,
# pDestructor = 0x84c8af <_zval_ptr_dtor_wrapper>}
# p (int32_t)4294967280 -> -16
#总结:数组进行了扩容