Redis的字典结构,说起来和Java的HashMap有点相似。
主要是由dict.c
来实现,在dict.h
中进行了定义。
包含了dict
、dictType
、dictEntry
、dictht
四种结构。
字典的结构定义如下:
//字典结构
typedef struct dict {
//字典类型
dictType *type;
//私有数据
void *privdata;.
//哈希表2个,0号和1号
dictht ht[2];
//用来记录rehash进度的标记,如果没有在rehash,则值为-1
long rehashidx; /* rehashing not in progress if rehashidx == -1 */
//当前正在迭代的迭代器数
unsigned long iterators; /* number of iterators currently running */
} dict;
字典结构包含了:
- 字典类型
dictType
- 私有数据
privdata
- 哈希表
ht
,有2个,一个正常使用,一个rehash的时候使用 - rehash进度
rehashidx
- 当前正在迭代的迭代器数
iterators
字典类型的定义如下:
//字典类型
typedef struct dictType {
//hash函数
uint64_t (*hashFunction)(const void *key);
//复制key
void *(*keyDup)(void *privdata, const void *key);
//复制value
void *(*valDup)(void *privdata, const void *obj);
//key比较器
int (*keyCompare)(void *privdata, const void *key1, const void *key2);
//回收key
void (*keyDestructor)(void *privdata, void *key);
//回收value
void (*valDestructor)(void *privdata, void *obj);
} dictType;
字典类型结构包含了:
- hash函数
hashFunction
,传入key计算出hash值 - 复制key
keyDup
- 复制value
valueDup
- key的比较器
keyCompare
- 回收key的析构函数
keyDestructor
- 回收value的析构函数
valueDestructor
哈希表的结构如下:
/* This is our hash table structure. Every dictionary has two of this as we
* implement incremental rehashing, for the old to the new table. */
//哈希表结构
typedef struct dictht {
//哈希表数组,内部采用拉链法
dictEntry **table
//哈希表大小
unsigned long size;
//哈希表大小掩码
unsigned long sizemask;
//哈希表现有节点的数量
unsigned long used;
} dictht;
哈希表结构包含了:
- 哈希表数组
dictEntry **table
,哈希桶dictEntry内部则是链表结构。 - 哈希表大小
size
- 哈希表大小掩码
sizemask
- 哈希表现有节点数
used
哈希桶的定义如下:
//哈希桶,k-v,链表
typedef struct dictEntry {
//key
void *key;
//value 支持4种数据类型
union {
//自定义
void *val;
//无符号64位整型
uint64_t u64;
//64位整型
int64_t s64;
//浮点型
double d;
} v;
//next下一个节点
struct dictEntry *next;
} dictEntry;
哈希桶结构包含了:
- 键名
key
- 值
v
包含了4种可选类型 - 下一个节点指针
next
从整体上来看,dict
中包含了2个dictht
哈希表,每个哈希表中包含了一个dictEntry
哈希桶数组,以及哈希桶的统计属性、rehash
进度标记,而每个哈希桶内部是一个k-v结构,且包含一个next指针,多个哈希桶挂在一个链表上。
具体的用法如下:
1. 字典的创建
//创建一个字典
dict *dictCreate(dictType *type,
void *privDataPtr)
{
dict *d = zmalloc(sizeof(*d));
_dictInit(d,type,privDataPtr);
return d;
}
/* Initialize the hash table */
int _dictInit(dict *d, dictType *type,
void *privDataPtr)
{
//0号哈希表
_dictReset(&d->ht[0]);
//1号哈希表,渐进式扩容的时候用到
_dictReset(&d->ht[1]);
//通过这个type指定数据类型,如hash、zset
d->type = type;
d->privdata = privDataPtr;
// -1 表示处于非rehash状态
d->rehashidx = -1;
d->iterators = 0;
return DICT_OK;
}
static void _dictReset(dictht *ht)
{
ht->table = NULL;
ht->size = 0;
ht->sizemask = 0;
ht->used = 0;
}
主要是进行了内存的分配及2个哈希表的初始化及一些属性的赋值操作。
type在Redis的数据结构中会用到。
2. 添加元素
/* Low level add or find:
* This function adds the entry but instead of setting a value returns the
* dictEntry structure to the user, that will make sure to fill the value
* field as he wishes.
*
* This function is also directly exposed to the user API to be called
* mainly in order to store non-pointers inside the hash value, example:
*
* entry = dictAddRaw(dict,mykey,NULL);
* if (entry != NULL) dictSetSignedIntegerVal(entry,1000);
*
* Return values:
*
* If key already exists NULL is returned, and "*existing" is populated
* with the existing entry if existing is not NULL.
*
* If key was added, the hash entry is returned to be manipulated by the caller.
*/
dictEntry *dictAddRaw(dict *d, void *key, dictEntry **existing)
{
long index;
dictEntry *entry;
dictht *ht;
//如果正在rehash,则执行rehash步骤
if (dictIsRehashing(d)) _dictRehashStep(d);
/* Get the index of the new element, or -1 if
* the element already exists. */
//查找当前key是否存在,利用hashFunction将key进行hash,如果存在,则返回NULL
if ((index = _dictKeyIndex(d, key, dictHashKey(d,key), existing)) == -1)
return NULL;
/* Allocate the memory and store the new entry.
* Insert the element in top, with the assumption that in a database
* system it is more likely that recently added entries are accessed
* more frequently. */
//检查是否正在rehash,是的话,使用新的哈希表
ht = dictIsRehashing(d) ? &d->ht[1] : &d->ht[0];
entry = zmalloc(sizeof(*entry));
//将新的entry插入链表头部
entry->next = ht->table[index];
ht->table[index] = entry;
//节点数+1
ht->used++;
/* Set the hash entry fields. */
//设置新节点的key
dictSetKey(d, entry, key);
return entry;
}
需要判断是否处于rehash状态及key是否存在。
key的寻找过程类似HashMap。
不同的是,存在着2个哈希表。
采用头插法,添加一个key。
3. 删除及断开元素
/* Remove an element, returning DICT_OK on success or DICT_ERR if the
* element was not found. */
int dictDelete(dict *ht, const void *key) {
return dictGenericDelete(ht,key,0) ? DICT_OK : DICT_ERR;
}
/* Remove an element from the table, but without actually releasing
* the key, value and dictionary entry. The dictionary entry is returned
* if the element was found (and unlinked from the table), and the user
* should later call `dictFreeUnlinkedEntry()` with it in order to release it.
* Otherwise if the key is not found, NULL is returned.
*
* This function is useful when we want to remove something from the hash
* table but want to use its value before actually deleting the entry.
* Without this function the pattern would require two lookups:
*
* entry = dictFind(...);
* // Do something with entry
* dictDelete(dictionary,entry);
*
* Thanks to this function it is possible to avoid this, and use
* instead:
*
* entry = dictUnlink(dictionary,entry);
* // Do something with entry
* dictFreeUnlinkedEntry(entry); // <- This does not need to lookup again.
*/
dictEntry *dictUnlink(dict *ht, const void *key) {
return dictGenericDelete(ht,key,1);
}
/* You need to call this function to really free the entry after a call
* to dictUnlink(). It's safe to call this function with 'he' = NULL. */
void dictFreeUnlinkedEntry(dict *d, dictEntry *he) {
if (he == NULL) return;
dictFreeKey(d, he);
dictFreeVal(d, he);
zfree(he);
}
/* Search and remove an element. This is an helper function for
* dictDelete() and dictUnlink(), please check the top comment
* of those functions. */
static dictEntry *dictGenericDelete(dict *d, const void *key, int nofree) {
uint64_t h, idx;
dictEntry *he, *prevHe;
int table;
//如果节点数量为0,则返回NULL
if (d->ht[0].used == 0 && d->ht[1].used == 0) return NULL;
//如果正在rehash,则执行rehash
if (dictIsRehashing(d)) _dictRehashStep(d);
//通过hash函数计算key的hash值
h = dictHashKey(d, key);
//遍历2个dictht
for (table = 0; table <= 1; table++) {
//计算key所在哈希桶数组的下标
idx = h & d->ht[table].sizemask;
//dictEntry
he = d->ht[table].table[idx];
prevHe = NULL;
//循环遍历链表查找key
while(he) {
if (key==he->key || dictCompareKeys(d, key, he->key)) {
/* Unlink the element from the list */
//将找到的节点存储在prevHe中,然后删除
if (prevHe)
prevHe->next = he->next;
else
d->ht[table].table[idx] = he->next;
//通过nofree参数控制是否要释放内存空间,delete的时候nofree=0,unlink的时候nofree=1
if (!nofree) {
dictFreeKey(d, he);
dictFreeVal(d, he);
zfree(he);
}
//节点数-1
d->ht[table].used--;
return he;
}
prevHe = he;
//继续查找下一个节点
he = he->next;
}
if (!dictIsRehashing(d)) break;
}
return NULL; /* not found */
}
删除元素的时候,需要同时释放内存。
断开元素的时候,不需要同时释放内存。
对于小对象key,可以直接删除元素dictDelete
。
对于大对象key,一般是断开元素dictUnlink
,再慢慢回收内存dictFreeUnlinkedEntry
。
同样,都需要判断是否rehash状态,对key进行hash,再定位哈希桶数组下标,再遍历链表查找到最终的key。
4. 扩容
/* Resize the table to the minimal size that contains all the elements,
* but with the invariant of a USED/BUCKETS ratio near to <= 1 */
//扩容
int dictResize(dict *d)
{
int minimal;
//通过dictEnableResize、dictDisableResize修改dict_can_resize
//判断是否允许resize或者是否处于渐进式hash状态,如果不允许或者正在rehash,则返回错误。
if (!dict_can_resize || dictIsRehashing(d)) return DICT_ERR;
minimal = d->ht[0].used;
//最小容量
if (minimal < DICT_HT_INITIAL_SIZE)
minimal = DICT_HT_INITIAL_SIZE;
//扩容逻辑
return dictExpand(d, minimal);
}
/* Expand or create the hash table */
int dictExpand(dict *d, unsigned long size)
{
/* the size is invalid if it is smaller than the number of
* elements already inside the hash table */
//如果正在rehash或者新的size过小,则报错。
if (dictIsRehashing(d) || d->ht[0].used > size)
return DICT_ERR;
dictht n; /* the new hash table */
//在未达到unsigned long阈值的情况下,2倍扩容。
unsigned long realsize = _dictNextPower(size);
/* Rehashing to the same table size is not useful. */
if (realsize == d->ht[0].size) return DICT_ERR;
/* Allocate the new hash table and initialize all pointers to NULL */
//调整参数值,分配内存
n.size = realsize;
n.sizemask = realsize-1;
n.table = zcalloc(realsize*sizeof(dictEntry*));
n.used = 0;
/* Is this the first initialization? If so it's not really a rehashing
* we just set the first hash table so that it can accept keys. */
//如果不是因为rehash而去扩容,则判定为初始化,那么直接赋值即可。
if (d->ht[0].table == NULL) {
d->ht[0] = n;
return DICT_OK;
}
/* Prepare a second hash table for incremental rehashing */
//将新建的哈希表指向1号哈希表,为rehash做准备。
d->ht[1] = n;
//修改进度为0,进度为-1的话则表示非rehash状态。
d->rehashidx = 0;
return DICT_OK;
}
5. 渐进式哈希rehash
/* Performs N steps of incremental rehashing. Returns 1 if there are still
* keys to move from the old to the new hash table, otherwise 0 is returned.
*
* Note that a rehashing step consists in moving a bucket (that may have more
* than one key as we use chaining) from the old to the new hash table, however
* since part of the hash table may be composed of empty spaces, it is not
* guaranteed that this function will rehash even a single bucket, since it
* will visit at max N*10 empty buckets in total, otherwise the amount of
* work it does would be unbound and the function may block for a long time. */
//渐进式哈希
int dictRehash(dict *d, int n) {
int empty_visits = n*10; /* Max number of empty buckets to visit. */
if (!dictIsRehashing(d)) return 0;
//n表示要移动的槽数
while(n-- && d->ht[0].used != 0) {
dictEntry *de, *nextde;
assert(d->ht[0].size > (unsigned long)d->rehashidx);
//如果还有元素,查找哈希槽
while(d->ht[0].table[d->rehashidx] == NULL) {
d->rehashidx++;
if (--empty_visits == 0) return 1;
}
//获取下标为rehashidx哈希桶链表
de = d->ht[0].table[d->rehashidx];
//遍历,移动元素到新的哈希表
while(de) {
uint64_t h;
//移动到下一个节点
nextde = de->next;
//获取数组下标 hash&sizemask
h = dictHashKey(d, de->key) & d->ht[1].sizemask;
de->next = d->ht[1].table[h];
d->ht[1].table[h] = de;
//0号哈希表元素个数-1
d->ht[0].used--;
//1号哈希表元素个数+1
d->ht[1].used++;
de = nextde;
}
d->ht[0].table[d->rehashidx] = NULL;
//rehash进度+1
d->rehashidx++;
}
//校验元素是否为0,是否都迁移完毕了
if (d->ht[0].used == 0) {
//移动完所有的元素,就把原来的哈希表释放
zfree(d->ht[0].table);
//把1号哈希表指向0号哈希表
d->ht[0] = d->ht[1];
_dictReset(&d->ht[1]);
//rehash状态清除
d->rehashidx = -1;
return 0;
}
/* More to rehash... */
return 1;
}
//该方法在server.c启动的时候,启动 databasesCron() 定时任务
//调用incrementallyRehash,进而调用。
int dictRehashMilliseconds(dict *d, int ms) {
long long start = timeInMilliseconds();
int rehashes = 0;
//每次rehash 100 steps,rehash时间为ms毫秒
while(dictRehash(d,100)) {
rehashes += 100;
if (timeInMilliseconds()-start > ms) break;
}
return rehashes;
}