- 扩容操作
- 扩容大小
- 渐进式rehash
- 何时触发渐进式rehash
Redis提供了传统的hash表实现,但是对其中的内存管理提供了扩充,提供了扩容/缩容处理,为了尽可能的高效,在扩容缩容的过程中如果hash数据量非常多,将是一个堵塞的操作,redis设计了渐进式rehash来将rehash操作分散到每次对hash的操作中。
扩容
static int _dictExpandIfNeeded(dict *d)
{
/* Incremental rehashing already in progress. Return. */
if (dictIsRehashing(d)) return DICT_OK;
/* If the hash table is empty expand it to the initial size. */
if (d->ht[0].size == 0) return dictExpand(d, DICT_HT_INITIAL_SIZE);/*当hash表的大小为0时,触发扩容*/
/* If we reached the 1:1 ratio, and we are allowed to resize the hash
* table (global setting) or we should avoid it but the ratio between
* elements/buckets is over the "safe" threshold, we resize doubling
* the number of buckets. */
if (d->ht[0].used >= d->ht[0].size &&
(dict_can_resize ||
d->ht[0].used/d->ht[0].size > dict_force_resize_ratio) &&
dictTypeExpandAllowed(d))/*当hash表的数据量大于hash表的大小并且允许进行resize(redis当前不在进行RDB备份或者AOF重写)时,或者当hash表的数据量大于hash表大小的5倍时*/
{
return dictExpand(d, d->ht[0].used + 1);
}
return DICT_OK;
}
int _dictExpand(dict *d, unsigned long size, int* malloc_failed)
{
if (malloc_failed) *malloc_failed = 0;
/* the size is invalid if it is smaller than the number of
* elements already inside the hash table */
if (dictIsRehashing(d) || d->ht[0].used > size)
return DICT_ERR;
dictht n; /* the new hash table */
unsigned long realsize = _dictNextPower(size);/*计算新的数组大小,数组大小是大于hash元素的2的n次方*/
/* Rehashing to the same table size is not useful. */
if (realsize == d->ht[0].size) return DICT_ERR;
/* Allocate the new hash table and initialize all pointers to NULL */
n.size = realsize;
n.sizemask = realsize-1;
if (malloc_failed) {
n.table = ztrycalloc(realsize*sizeof(dictEntry*));
*malloc_failed = n.table == NULL;
if (*malloc_failed)
return DICT_ERR;
} else
n.table = zcalloc(realsize*sizeof(dictEntry*));
n.used = 0;
/* Is this the first initialization? If so it's not really a rehashing
* we just set the first hash table so that it can accept keys. */
if (d->ht[0].table == NULL) {
d->ht[0] = n;
return DICT_OK;
}
/* Prepare a second hash table for incremental rehashing */
d->ht[1] = n;
d->rehashidx = 0;
return DICT_OK;
}
static unsigned long _dictNextPower(unsigned long size)
{
unsigned long i = DICT_HT_INITIAL_SIZE;/*初识容量4*/
if (size >= LONG_MAX) return LONG_MAX + 1LU;
while(1) {
if (i >= size)
return i;
i *= 2;
}
}
扩容条件
1、当前hash表的元素大于hash表的大小,并且此时时允许resize的(当前没有进行着RDB的备份和AOF的重写)。
2、或者当前hash表的元素大于hash表数量的5倍。
扩容大小
大于hash表元素个数两倍的2的n次幂。
渐进式rehash
static void _dictRehashStep(dict *d) {
if (d->pauserehash == 0) dictRehash(d,1);
}
nt dictRehash(dict *d, int n) {/*n 表示本次迁移的bucket的数量*/
int empty_visits = n*10; /*表示本次迁移过程允许的最大的访问到空的bucket的次数 */
if (!dictIsRehashing(d)) return 0;
while(n-- && d->ht[0].used != 0) {
dictEntry *de, *nextde;
/*rehashidx表示迁移的bucket下标*/
/* Note that rehashidx can't overflow as we are sure there are more
* elements because ht[0].used != 0 */
assert(d->ht[0].size > (unsigned long)d->rehashidx);
while(d->ht[0].table[d->rehashidx] == NULL) {
d->rehashidx++;
if (--empty_visits == 0) return 1;
}
de = d->ht[0].table[d->rehashidx];
/* Move all the keys in this bucket from the old to the new hash HT */
while(de) {
uint64_t h;
nextde = de->next;
/* Get the index in the new hash table */
h = dictHashKey(d, de->key) & d->ht[1].sizemask;
de->next = d->ht[1].table[h];
d->ht[1].table[h] = de;
d->ht[0].used--;
d->ht[1].used++;
de = nextde;
}
d->ht[0].table[d->rehashidx] = NULL;
d->rehashidx++;
}
/* Check if we already rehashed the whole table... */
if (d->ht[0].used == 0) {/*如果历史hash表的元素全部迁移完成,则释放历史hash表的内存*/
zfree(d->ht[0].table);
d->ht[0] = d->ht[1];
_dictReset(&d->ht[1]);
d->rehashidx = -1;
return 0;
}
/* More to rehash... */
return 1;
}
何时触发渐进式rehash
- dictAddRaw(dict *d, void *key, dictEntry **existing)
- dictGenericDelete(dict *d, const void *key, int nofree)
- *dictFind(dict *d, const void *key)
- *dictGetRandomKey(dict *d)
- dictGetSomeKeys(dict *d, dictEntry **des, unsigned int count)
几乎是在hash的所有数据操作过程调用中,都会在进行操作前先判断当前hash是否在进行rehash操作再进行rehash操作。
总结
redis对于hash表的设计很常规,但是扩展实现了扩缩容操作。为了减少扩缩容的时间消耗,redis采用的分治法,将hash扩容之后的数据迁移工作分散到对hash的操作中,从而防止了单次数据迁移耗时高的问题,但是带来的弊端是会存在比单次完成数据迁移更多的内存消耗,因为扩容时会存量两个数组内存的消耗。