一: Hash 算法
1. hash算法
hash称为散列表,其实本质上还是一个数组。主要是通过
hash(key)
生成一个index
去从数组中取出值。但是如果产生hash(key1) == hash(key2)
就是hash冲突。
2. 冲突解决
为了解决冲突,有 开放寻址法(open addressing)和 链表法(chaining)
链表法(chaining)
这个比较简单,类似一个二维数组,就是一个
一维数组
,存储链表
的表头,链表根据hash函数
,记录key
,并一个一个链接下去。 这里就需要记录指针,导致了相对来说存储空间的增加。
如图,假设hash函数是对6 取余数,那么{0,6,7,2} 结果如下图:
开放寻址法(open addressing)
开放寻址方法,比如我们一般的hash方法,
p = Hash(key)
,假设得到的p值有冲突,那么就,p = Hash(p + di)
,di 称为增量序列
取值为【1,2,....】 ; 而且性能依赖于装填因子
α=N/M,α 超过0.72的时候,会严重影响性能。
1.线性探测
很容易产生数据集中的问题
//对m 取余数的,m一般是hash表的长度
Hash(key) {
return key%m
}
// 获取index值
p = Hash(key)
// 如果冲突 了,就 +di 往下探测,直到不冲突
p = Hash(Hash(key)+1)
p = Hash(Hash(key)+2)
p = Hash(Hash(key)+3)
p = Hash(Hash(key)+4)
...
2.二次探测
为了让数据分布均匀一点,让数据更加离散,就出现了二次探测,
p=Hash(Hash(key)+di)
di取值可能为1,-1,4,-4,9,-9,16,-16,...k2,-k2(k<=m/2)其中m为表长,di为增量序列
// 如果冲突 了,就 +/- di^2 往下探测,直到不冲突
p = Hash(Hash(key)+1)
p = Hash(Hash(key)-1)
p = Hash(Hash(key)+4)
p = Hash(Hash(key)-4)
...
二: NSDictory内部实现
NSDictionary是可以通过CFDictionary
toll-free bridged
过来的。而且我们是可以查看CFDictionary.
用CFDictionary研究和使用
1. 首先我们查看下一下,用CFDictionary 创建
CFStringRef key1 = CFStringCreateWithCString(CFAllocatorGetDefault(), "name", kCFStringEncodingUTF8);
CFStringRef key2 = CFStringCreateWithCString(CFAllocatorGetDefault(), "age", kCFStringEncodingUTF8);
CFStringRef keys[] = {key1, key2};
CFStringRef values[] = {CFSTR("leeDev"), CFSTR("27")};
int count = sizeof(values)/sizeof(values[0]);
CFDictionaryRef dic = CFDictionaryCreate(CFAllocatorGetDefault(), (const void**)keys, (const void**)values,count, &kCFTypeDictionaryKeyCallBacks, &kCFTypeDictionaryValueCallBacks);
NSLog(@"count = %ld,value = %@",(long)CFDictionaryGetCount(dic),CFDictionaryGetValue(dic, key1));
// 可变字典
CFMutableDictionaryRef mutDic = CFDictionaryCreateMutable(CFAllocatorGetDefault(), 0, &kCFTypeDictionaryKeyCallBacks, &kCFTypeDictionaryValueCallBacks);
CFDictionaryAddValue(mutDic, keys[0], values[0]);
CFDictionarySetValue(mutDic, keys[0], CFSTR("lisi"));
NSLog(@"mutDic = %@",mutDic);
我们找到 CFDictionaryCreate
中 方法,传入4个值
- keys
- values
- count
- kCFTypeDictionaryKeyCallBacks:
- kCFTypeDictionaryValueCallBacks
2. 我们通过查看CF-855.17
中的创建源码,并删除一些异常判断的代码
CFHashRef CFDictionaryCreate(CFAllocatorRef allocator, const_any_pointer_t *klist, const_any_pointer_t *vlist, CFIndex numValues, const CFDictionaryKeyCallBacks *keyCallBacks, const CFDictionaryValueCallBacks *valueCallBacks) {
CFTypeID typeID = CFDictionaryGetTypeID();
//1.创建hash表
CFBasicHashRef ht = __CFDictionaryCreateGeneric(allocator, keyCallBacks, valueCallBacks, CFDictionary);
//2.给hash表设置容量
if (0 < numValues) CFBasicHashSetCapacity(ht, numValues);
//3. 给hash表设置 keys 和 values
for (CFIndex idx = 0; idx < numValues; idx++) {
CFBasicHashAddValue(ht, (uintptr_t)klist[idx], (uintptr_t)vlist[idx]);
}
//4.设置不可变的hash表
CFBasicHashMakeImmutable(ht);
_CFRuntimeSetInstanceTypeIDAndIsa(ht, typeID);
return (CFHashRef)ht;
}
2.1 在给hash表的添加值的时候有个方法是 CFBasicHashAddValue
static void __CFBasicHashAddValue(CFBasicHashRef ht, CFIndex bkt_idx, uintptr_t stack_key, uintptr_t stack_value) {
//1.记录操作数
ht->bits.mutations++;
//2. 如果我们的hash表的容量 不够,需要Rehash
if (CFBasicHashGetCapacity(ht) < ht->bits.used_buckets + 1) {
// 2.1.1 下面会有容量数组的介绍。
__CFBasicHashRehash(ht, 1);
//2.1.2 就是通过 开发地址方法来找hash的index
bkt_idx = __CFBasicHashFindBucket_NoCollision(ht, stack_key, 0);
} else if (__CFBasicHashIsDeleted(ht, bkt_idx)) {
ht->bits.deleted--;
}
//2.1.3 key_hash
uintptr_t key_hash = 0;
if (__CFBasicHashHasHashCache(ht)) {
key_hash = __CFBasicHashHashKey(ht, stack_key);
}
// 2.1.4 通过对value就行retain操作
stack_value = __CFBasicHashImportValue(ht, stack_value);
if (ht->bits.keys_offset) {
// 2.1.5 对key 进行retain操作
stack_key = __CFBasicHashImportKey(ht, stack_key);
}
//2.1.6 hash 表里面存入的是 CFBasicHashValue的对象
//就是把 stack_value 和 stack_key 存入到 CFBasicHashValue
__CFBasicHashSetValue(ht, bkt_idx, stack_value, false, false);
if (ht->bits.keys_offset) {
__CFBasicHashSetKey(ht, bkt_idx, stack_key, false, false);
}
if (ht->bits.counts_offset) {
__CFBasicHashIncSlotCount(ht, bkt_idx);
}
if (__CFBasicHashHasHashCache(ht)) {
__CFBasicHashGetHashes(ht)[bkt_idx] = key_hash;
}
ht->bits.used_buckets++;
}
2.1.1 容量数组
,用于字典容量的申请 ,就是说,__CFBasicHashRehash(ht, 1);
执行后,假设开始的容量是0
,那么Rehash后就是3
; 如果是3
,那么Rehash后就是6
static const uintptr_t __CFBasicHashTableCapacities[64] = {
0, 3, 6, 11, 19, 32, 52, 85, 118, 155, 237, 390, 672, 1065,
1732, 2795, 4543, 7391, 12019, 19302, 31324, 50629, 81956,
132580, 214215, 346784, 561026, 907847, 1468567, 2376414,
3844982, 6221390, 10066379, 16287773, 26354132, 42641916,
68996399, 111638327, 180634415, 292272755,
#if __LP64__
472907503UL, 765180257UL, 1238087439UL, 2003267722UL, 3241355160UL,
#if 0
5244622578UL, 8485977737UL, 13730600347UL, 22216578100UL,
35947178453UL, 58163756541UL, 94110935011UL, 152274691274UL,
246385626296UL, 398660317578UL, 645045943559UL, 1043706261135UL,
1688752204693UL, 2732458465840UL, 4421210670552UL,
7153669136706UL, 11574879807265UL, 18728548943682UL
#endif
#endif
};
2.1.2 通过开发地址
来处理,并返回当前的index
CF_INLINE CFIndex __CFBasicHashFindBucket_NoCollision(CFConstBasicHashRef ht, uintptr_t stack_key, uintptr_t key_hash) {
if (0 == ht->bits.num_buckets_idx) {
return kCFNotFound;
}
if (ht->bits.indirect_keys) {
switch (ht->bits.hash_style) {
case __kCFBasicHashLinearHashingValue: return ___CFBasicHashFindBucket_Linear_Indirect_NoCollision(ht, stack_key, key_hash);
case __kCFBasicHashDoubleHashingValue: return ___CFBasicHashFindBucket_Double_Indirect_NoCollision(ht, stack_key, key_hash);
case __kCFBasicHashExponentialHashingValue: return ___CFBasicHashFindBucket_Exponential_Indirect_NoCollision(ht, stack_key, key_hash);
}
} else {
switch (ht->bits.hash_style) {
case __kCFBasicHashLinearHashingValue: return ___CFBasicHashFindBucket_Linear_NoCollision(ht, stack_key, key_hash);
case __kCFBasicHashDoubleHashingValue: return ___CFBasicHashFindBucket_Double_NoCollision(ht, stack_key, key_hash);
case __kCFBasicHashExponentialHashingValue: return ___CFBasicHashFindBucket_Exponential_NoCollision(ht, stack_key, key_hash);
}
}
HALT;
return kCFNotFound;
}
2.2 CFBasicHashCallbacks 结构体是干嘛的?
首先 是一个
struct __CFBasicHashCallbacks
结构体,里面包含的是 函数指针,前面的kCFTypeDictionaryKeyCallBacks
和kCFTypeDictionaryValueCallBacks
其实主要是用来对key
或者value
来进行retain
等操作。
struct __CFBasicHashCallbacks {
uintptr_t (*retainValue)(CFAllocatorRef alloc, uintptr_t stack_value); // Return 2nd arg or new value
uintptr_t (*retainKey)(CFAllocatorRef alloc, uintptr_t stack_key); // Return 2nd arg or new key
void (*releaseValue)(CFAllocatorRef alloc, uintptr_t stack_value);
void (*releaseKey)(CFAllocatorRef alloc, uintptr_t stack_key);
Boolean (*equateValues)(uintptr_t coll_value1, uintptr_t stack_value2); // 1st arg is in-collection value, 2nd arg is probe parameter OR in-collection value for a second collection
Boolean (*equateKeys)(uintptr_t coll_key1, uintptr_t stack_key2); // 1st arg is in-collection key, 2nd arg is probe parameter
CFHashCode (*hashKey)(uintptr_t stack_key);
uintptr_t (*getIndirectKey)(uintptr_t coll_value); // Return key; 1st arg is in-collection value
CFStringRef (*copyValueDescription)(uintptr_t stack_value);
CFStringRef (*copyKeyDescription)(uintptr_t stack_key);
};