1.cache中存储的是什么?
上一篇博客分析了类的isa、superclass、bits
,这一篇主要分析cache
的缓存机制
1.cache_t 结构体部分代码,用到的成员变量和部分函数
struct cache_t {
private:
// explicit_atomic 显式原子性,目的是为了能够 保证 增删改查时 线程的安全性
explicit_atomic<uintptr_t> _bucketsAndMaybeMask;
//mask(高16位)+bucket(低48位) mask的本质是开辟可以存储bucket_t空间的下标
// _bucketsAndMaybeMask is a buckets_t pointer in the low 48 bits
// _bucketsAndMaybeMask: 低48位是bucket_t pointer即指向bucket_t的指针,高16位是mask
union {
struct {// 可见bucket_t在cache里面是指针的形式存在的
explicit_atomic<mask_t> _maybeMask; // _maybeMask is unused, the mask is stored in the top 16 bits.
#if __LP64__ // _maybeMask不再使用
uint16_t _flags; //状态标识,用于计算某些值
#endif
uint16_t _occupied; //已经占用单位空间的数量
};
explicit_atomic<preopt_cache_t *> _originalPreoptCache;
};
// 不同架构下掩码的值
#if CACHE_MASK_STORAGE == CACHE_MASK_STORAGE_OUTLINED // MacOS、模拟器
// _bucketsAndMaybeMask is a buckets_t pointer
// _maybeMask is the buckets mask
static constexpr uintptr_t bucketsMask = ~0ul;
static_assert(!CONFIG_USE_PREOPT_CACHES, "preoptimized caches not supported");
#elif CACHE_MASK_STORAGE == CACHE_MASK_STORAGE_HIGH_16_BIG_ADDRS
static constexpr uintptr_t maskShift = 48;
static constexpr uintptr_t maxMask = ((uintptr_t)1 << (64 - maskShift)) - 1;
static constexpr uintptr_t bucketsMask = ((uintptr_t)1 << maskShift) - 1;
static_assert(bucketsMask >= MACH_VM_MAX_ADDRESS, "Bucket field doesn't have enough bits for arbitrary pointers.");
#if CONFIG_USE_PREOPT_CACHES
static constexpr uintptr_t preoptBucketsMarker = 1ul;
static constexpr uintptr_t preoptBucketsMask = bucketsMask & ~preoptBucketsMarker;
#endif
#elif CACHE_MASK_STORAGE == CACHE_MASK_STORAGE_HIGH_16 //64位真机
// _bucketsAndMaybeMask is a buckets_t pointer in the low 48 bits
// _maybeMask is unused, the mask is stored in the top 16 bits.
// _bucketsAndMaybeMask 低48位是bucket指针,高16位是mask
// How much the mask is shifted by.
static constexpr uintptr_t maskShift = 48; //mask在64位中的高16位,要想获取到对应的值需要_bucketsAndMaybeMask里面的值左移48位
// Additional bits after the mask which must be zero. msgSend
// takes advantage of these additional bits to construct the value
// `mask << 4` from `_maskAndBuckets` in a single instruction.
static constexpr uintptr_t maskZeroBits = 4;
// The largest mask value we can store.
//可以存储mask的最大值
static constexpr uintptr_t maxMask = ((uintptr_t)1 << (64 - maskShift)) - 1; // maxMask最大mask 2^(16-1)
// The mask applied to `_maskAndBuckets` to retrieve the buckets pointer.
// bucketsMask用来在这_bucketsAndMaybeMask里面获取bucket指针
static constexpr uintptr_t bucketsMask = ((uintptr_t)1 << (maskShift - maskZeroBits)) - 1;
// Ensure we have enough bits for the buckets pointer.
static_assert(bucketsMask >= MACH_VM_MAX_ADDRESS,
"Bucket field doesn't have enough bits for arbitrary pointers.");
// 以下为private函数
bool isConstantEmptyCache() const; // 是否位空
mask_t mask() const; // 获取mask
void incrementOccupied(); //每插入一个bucket_t,occupied++ --> 标记已经占用几个位置了
void setBucketsAndMask(struct bucket_t *newBuckets, mask_t newMask); // buckets和mask写入缓存
void reallocate(mask_t oldCapacity, mask_t newCapacity, bool freeOld); //开辟缓存空间
void collect_free(bucket_t *oldBuckets, mask_t oldCapacity);//释放缓存空间
public:
// The following four fields are public for objcdt's use only.
// objcdt reaches into fields while the process is suspended
// hence doesn't care for locks and pesky little details like this
// and can safely use these.
// public函数
unsigned capacity() const;//获取capacity 开辟的缓存空间容量
struct bucket_t *buckets() const; //获取缓存空间的bucket指针
Class cls() const;
mask_t occupied() const;// 获取occupied已经占用了多少个缓存空间
void insert(SEL sel, IMP imp, id receiver);// 向缓存插入sel,imp转成bucket结构体的sel和imp进行存储
};
2.其他函数源码
// bucketsMask用来在这_bucketsAndMaybeMask里面获取bucket指针
static constexpr uintptr_t bucketsMask = ((uintptr_t)1 << (maskShift - maskZeroBits)) - 1;
buckets()
源码获取bucket
指针,第一个缓存的bucket
的地址
struct bucket_t *cache_t::buckets() const
{
uintptr_t addr = _bucketsAndMaybeMask.load(memory_order_relaxed);
return (bucket_t *)(addr & bucketsMask);
}
occupied()
源码,获取占用数量,即已经插入多少个bucket
mask_t cache_t::occupied() const
{
return _occupied;
}
void incrementOccupied()
源码
void cache_t::incrementOccupied()
{
_occupied++;
}
capacity()
源码 // 获取容量,总共开辟了几个缓存bucket
的空间
unsigned cache_t::capacity() const
{
return mask() ? mask()+1 : 0;
}
3.cache_t 中重要的成员变量和函数说明
成员变量
1. _bucketsAndMaybeMask
explicit_atomic<uintptr_t> _bucketsAndMaybeMask;
源码本身注释
// _bucketsAndMaybeMask is a buckets_t pointer in the low 48 bits
// _maybeMask is unused, the mask is stored in the top 16 bits.
_bucketsAndMaybeMask (64位)
= mask(数值)(高16位)
+ bucket指针(低48位)
存储的mask
用于mask()
函数取值,存储的bucket指针
是cache缓存bucket
的首地址
2. _maybeMask
explicit_atomic<mask_t> _maybeMask
64位真机里不再使用
3. _flags
uint16_t _flags;
状态标识,用于某些函数的计算
掩码
// _bucketsAndMaybeMask is a buckets_t pointer in the low 48 bits
// _maybeMask is unused, the mask is stored in the top 16 bits.
1. maskShift
获取mask需要左移的位数
static constexpr uintptr_t maskShift = 48;
mask
在_bucketsAndMaybeMask64位
中的高16
位,要想获取到对应的值需要_bucketsAndMaybeMask
里面的值左移48位
2. maxMask
最大mask值,1 << (16-1) = 2 ^ 15
static constexpr uintptr_t maxMask = ((uintptr_t)1 << (64 - maskShift)) - 1;
3.bucketsMask
获取bucket
的掩码
// The mask applied to _maskAndBuckets
to retrieve the buckets pointer.
static constexpr uintptr_t bucketsMask = ((uintptr_t)1 << (maskShift - maskZeroBits)) - 1;
函数
private
1. isConstantEmptyCache()
判断是否位空的cache
bool isConstantEmptyCache() const;
**2. mask()**
获取mask
mask_t mask() const;
3. incrementOccupied()
_occupied++
void incrementOccupied();
3. setBucketsAndMask()
calloc()
创建新的bucket指针
地址,开辟空间的长度是newCapacity * sizeof(bucket_t)
,sizeof(bucket_t) = 16
,比如newCapacity
长度为8
则创建8
个空的bucket_t
存储空间,将其首地址bucket指针
和mask = newCapacity - 1
写入_bucketsAndMaybeMask
缓存数据,同时_occupied = 0
,就是创建newCapacity
个可以装bucket_t
的空间,保存起来备用
void setBucketsAndMask(struct bucket_t *newBuckets, mask_t newMask);
3. reallocate()
开辟新的bucket_t存储空间,如果有旧的bucket_t存储空间,则进行释放
void reallocate(mask_t oldCapacity, mask_t newCapacity, bool freeOld);
4. collect_free()
释放旧的bucket_t存储空,即cache里的_bucketsAndMaybeMask --> buckets()不再指向其开始的存储空间
void collect_free(bucket_t *oldBuckets, mask_t oldCapacity);
public
1. capacity
获取容量 capacity
unsigned capacity() const;
2. buckets()
获取指向bucket存储空间的首地址指针
struct bucket_t *buckets() const;
3. occupied()
获取occupied,占用数量
mask_t occupied() const;
4. struct bucket_t部分源码,依然是展示成员变量和主要函数
struct bucket_t {
private:
// IMP-first is better for arm64e ptrauth and no worse for arm64.
// SEL-first is better for armv7* and i386 and x86_64.
#if __arm64__
explicit_atomic<uintptr_t> _imp; // 64位真机 _imp和_sel
explicit_atomic<SEL> _sel;
#else
explicit_atomic<SEL> _sel;
explicit_atomic<uintptr_t> _imp;
#endif
public:
static inline size_t offsetOfSel() { return offsetof(bucket_t, _sel); }
inline SEL sel() const { return _sel.load(memory_order_relaxed); } // 获取sel
inline IMP imp(UNUSED_WITHOUT_PTRAUTH bucket_t *base, Class cls) const {
uintptr_t imp = _imp.load(memory_order_relaxed); // 获取imp
if (!imp) return nil;
#if CACHE_IMP_ENCODING == CACHE_IMP_ENCODING_PTRAUTH
SEL sel = _sel.load(memory_order_relaxed);
return (IMP)
ptrauth_auth_and_resign((const void *)imp,
ptrauth_key_process_dependent_code,
modifierForSEL(base, sel, cls),
ptrauth_key_function_pointer, 0);
#elif CACHE_IMP_ENCODING == CACHE_IMP_ENCODING_ISA_XOR
return (IMP)(imp ^ (uintptr_t)cls);
#elif CACHE_IMP_ENCODING == CACHE_IMP_ENCODING_NONE
return (IMP)imp;
#else
#error Unknown method cache IMP encoding.
#endif
}
template <Atomicity, IMPEncoding>
void set(bucket_t *base, SEL newSel, IMP newImp, Class cls); // 写入sel和imp
};
bucket_t成员变量
1._sel和_imp
sizeof(bucket_t) = 16,两个成员变量 8字节+8字节= 16字节
explicit_atomic<uintptr_t> _imp; // 64位真机 _imp和_sel
explicit_atomic<SEL> _sel;
bucket_t函数
1.sel()
获取sel
inline SEL sel() const { return _sel.load(memory_order_relaxed); }
2. imp()
获取imp
inline IMP imp(UNUSED_WITHOUT_PTRAUTH bucket_t *base, Class cls) const {}
3. set()
向bucket写入sel和imp
void set(bucket_t *base, SEL newSel, IMP newImp, Class cls);
2.cache缓存机制
通过上面的cache_t
结构体成员变量和方法解析,知道了cache_t
存储bucket_t
指针,bucket_t
里面有俩个成员变量_sel
和_imp
,用来保存方法method_t
的sel
和imp
,
而且cache_t
存储bucket_t
并不是像bits
存储类信息
method
---> method_list_t
、
property
---> property_list_t
、
protocol
---> protocol_list_t
、
ivar
---> ivar_list_t
、
一样使用数组的形式,而是指针的形式指向存储bucket_t
空间的首地址
,
即不会有bucket_list_t
这样的情况出现,这是cache和bits非常大的不同?为什么这么设计,下面通过LLDB
调试现象以及cache相关函数
的源码分析来解释这一现象
1.cache LLDB调试Xcode截图
从LLDB调试
来看,cache
存储bucket
有几点疑问?
第一,bucket会丢失
,可以看到并不是所有的方法
都缓存到了cache
里面
第二,bucket会乱序
,并不是按照我们调用方法
的顺序进行存储
第三,bucket不是连续存储
的,中间有空的bucket
存储空间
第四,capacity、mask、occupied
三者之间有什么关系,分别是用来做什么的?
为什么会出现上面的三种异常的,这就要看cache存储的底层机制是什么?从insert的源码中寻找答案
LLDB调试对应数据
KCObjcBuild was compiled with optimization - stepping may behave oddly; variables may not be available.
(lldb) p/x LGPerson.class
(Class) $0 = 0x00000001000086d8 LGPerson
(lldb) p (cache_t *) 0x00000001000086e8
(cache_t *) $1 = 0x00000001000086e8
(lldb) p $1->capacity()
(unsigned int) $2 = 8
(lldb) p $1->mask()
(mask_t) $3 = 7
(lldb) p $1->occupied()
(mask_t) $4 = 7
(lldb) p $1->buckets()
(bucket_t *) $5 = 0x0000000100a9bd50
(lldb) p $1->buckets()[0].sel()
(SEL) $6 = "sayWorld333"
(lldb) p $1->buckets()[0].imp(nil, LGPerson.class)
(IMP) $7 = 0x000000010000369c (KCObjcBuild`-[LGPerson sayWorld333] at main.m:100)
(lldb) p $1->buckets()[1].sel()
(SEL) $8 = "sayWorld888"
(lldb) p $1->buckets()[1].imp(nil, LGPerson.class)
(IMP) $9 = 0x0000000100003700 (KCObjcBuild`-[LGPerson sayWorld888] at main.m:106)
(lldb) p $1->buckets()[2].sel()
(SEL) $10 = "sayWorld666"
(lldb) p $1->buckets()[2].imp(nil, LGPerson.class)
(IMP) $11 = 0x00000001000036d8 (KCObjcBuild`-[LGPerson sayWorld666] at main.m:104)
(lldb) p $1->buckets()[3].sel()
(SEL) $12 = "sayWorld444"
(lldb) p $1->buckets()[3].imp(nil, LGPerson.class)
(IMP) $13 = 0x00000001000036b0 (KCObjcBuild`-[LGPerson sayWorld444] at main.m:101)
(lldb) p $1->buckets()[4].sel()
(SEL) $14 = "sayWorld222"
(lldb) p $1->buckets()[4].imp(nil, LGPerson.class)
(IMP) $15 = 0x0000000100003688 (KCObjcBuild`-[LGPerson sayWorld222] at main.m:99)
(lldb) p $1->buckets()[5].sel()
(SEL) $16 = (null)
(lldb) p $1->buckets()[5].imp(nil, LGPerson.class)
(IMP) $17 = 0x0000000000000000
(lldb) p $1->buckets()[6].sel()
(SEL) $18 = "sayWorld777"
(lldb) p $1->buckets()[6].imp(nil, LGPerson.class)
(IMP) $19 = 0x00000001000036ec (KCObjcBuild`-[LGPerson sayWorld777] at main.m:105)
(lldb) p $1->buckets()[7].sel()
(SEL) $20 = "sayWorld555"
(lldb) p $1->buckets()[7].imp(nil, LGPerson.class)
(IMP) $21 = 0x00000001000036c4 (KCObjcBuild`-[LGPerson sayWorld555] at main.m:102)
(lldb) p $1->buckets()[8].sel()
(SEL) $22 = ""
(lldb) p $1->buckets()[8].imp(nil, LGPerson.class)
(IMP) $23 = 0x0000000000000000
(lldb) p ($1 + 1)
(cache_t *) $24 = 0x00000001000086f8
(lldb) p ($1 + 2)
(cache_t *) $25 = 0x0000000100008708
(lldb) p ($1 + 3)
(cache_t *) $26 = 0x0000000100008718
(lldb) p ($1 + 4)
(cache_t *) $27 = 0x0000000100008728
(lldb) p ($1 + 5)
(cache_t *) $28 = 0x0000000100008738
(lldb) p ($1 + 6)
(cache_t *) $29 = 0x0000000100008748
(lldb) p ($1 + 7)
(cache_t *) $30 = 0x0000000100008758
(lldb) p ($1 + 8)
(cache_t *) $31 = 0x0000000100008768
(lldb) p ($1 + 9)
(cache_t *) $32 = 0x0000000100008778
(lldb) p ($1 + 10)
(cache_t *) $33 = 0x0000000100008788
(lldb)
2.insert()函数源码
insert()函数的几个关键点
1.初始化4个(arm64情况下)空的bucket存储空间,newBucket的指针和mask写入_bucketsAndMaybeMask,同时free掉旧的bucket存储空间
2.存储空间扩容,扩容的规则和算法是怎么实现的?
3.存储bucket的位置是怎么确定的?
4.如何将sel和imp写入到bucket里面去?
cache insert调用堆栈
insert()需要的一些枚举值
/* Initial cache bucket count. INIT_CACHE_SIZE must be a power of two. */
enum {
#if CACHE_END_MARKER || (__arm64__ && !__LP64__)
// When we have a cache end marker it fills a bucket slot, so having a
// initial cache size of 2 buckets would not be efficient when one of the
// slots is always filled with the end marker. So start with a cache size
// 4 buckets.
INIT_CACHE_SIZE_LOG2 = 2,
#else
// Allow an initial bucket size of 2 buckets, since a large number of
// classes, especially metaclasses, have very few imps, and we support
// the ability to fill 100% of the cache before resizing.
INIT_CACHE_SIZE_LOG2 = 1,
#endif
INIT_CACHE_SIZE = (1 << INIT_CACHE_SIZE_LOG2),
MAX_CACHE_SIZE_LOG2 = 16,
MAX_CACHE_SIZE = (1 << MAX_CACHE_SIZE_LOG2),
FULL_UTILIZATION_CACHE_SIZE_LOG2 = 3,
FULL_UTILIZATION_CACHE_SIZE = (1 << FULL_UTILIZATION_CACHE_SIZE_LOG2),
};
cache_fill_ratio()
static inline mask_t cache_fill_ratio(mask_t capacity) {
return capacity * 7 / 8;
}
#define CACHE_END_MARKER 0
cache_hash()
// Class points to cache. SEL is key. Cache buckets store SEL+IMP.
// Caches are never built in the dyld shared cache.
static inline mask_t cache_hash(SEL sel, mask_t mask)
{
uintptr_t value = (uintptr_t)sel;
#if CONFIG_USE_PREOPT_CACHES
value ^= value >> 7;
#endif
return (mask_t)(value & mask);
}
cache_next()
#if CACHE_END_MARKER
static inline mask_t cache_next(mask_t i, mask_t mask) {
return (i+1) & mask;
}
#elif __arm64__
static inline mask_t cache_next(mask_t i, mask_t mask) {
return i ? i-1 : mask;
}
#else
#error unexpected configuration
#endif
insert()函数源码
void cache_t::insert(SEL sel, IMP imp, id receiver)
{
runtimeLock.assertLocked();
// Never cache before +initialize is done
if (slowpath(!cls()->isInitialized())) {
return;
}
if (isConstantOptimizedCache()) {
_objc_fatal("cache_t::insert() called with a preoptimized cache for %s",
cls()->nameForLogging());
}
#if DEBUG_TASK_THREADS
return _collecting_in_critical();
#else
#if CONFIG_USE_CACHE_LOCK
mutex_locker_t lock(cacheUpdateLock);
#endif
ASSERT(sel != 0 && cls()->isInitialized());
// Use the cache as-is if until we exceed our expected fill ratio.
mask_t newOccupied = occupied() + 1;
unsigned oldCapacity = capacity(), capacity = oldCapacity;
if (slowpath(isConstantEmptyCache())) { //初始化 capacity = 4
// Cache is read-only. Replace it. // capacity表示初始化多个bucket空间,容量
if (!capacity) capacity = INIT_CACHE_SIZE; // INIT_CACHE_SIZE =4 ---> 1 << 2 == 4
reallocate(oldCapacity, capacity, /* freeOld */false); // 开辟capacity 个bucket存储空间,
//bucket首地址并与 mask一起写入_bucketsAndMaybeMask
}
else if (fastpath(newOccupied + CACHE_END_MARKER <= cache_fill_ratio(capacity))) {
// Cache is less than 3/4 or 7/8 full. Use it as-is.
// newOccupied + 0 <= capacity * (7/8),如果newOccupied <= 7/8 * capacity则什么都不做,即不扩容
// capacity = 4,capacity * (7/8) = 3.5,capacity = 8,capacity * (7/8) = 7
}
#if CACHE_ALLOW_FULL_UTILIZATION
else if (capacity <= FULL_UTILIZATION_CACHE_SIZE && newOccupied + CACHE_END_MARKER <= capacity) {
// Allow 100% cache utilization for small buckets. Use it as-is.
// capacity <= 8 && newOccupied + 0 <= capacity,即newOccupied = 4的时候什么也不做
}
#endif
else {
capacity = capacity ? capacity * 2 : INIT_CACHE_SIZE; // 扩容2倍,否则两倍扩容,
if (capacity > MAX_CACHE_SIZE) {
capacity = MAX_CACHE_SIZE;
}
reallocate(oldCapacity, capacity, true); // true ---> free 掉旧的bucket存储空间
}
bucket_t *b = buckets(); // 获取bucket指针,首地址
mask_t m = capacity - 1; // m ---> mask 与capacity的关系是 mask = capacity - 1
mask_t begin = cache_hash(sel, m); //哈希算法函数利用sel的唯一性大概率begin不会重复
mask_t i = begin; // begin保持最初的哈希值,即下标,i动态变化寻找空的bucket存储空间存入sel和imp
// 哈希算法 uintptr_t value = (uintptr_t)sel; ---> (value & mask)
// 由于m = capacity - 1,所以任何值按位与上都是0-7之间
//即 capacity = 4, begin = 0-->3,capacity = 8, began = 0-->7
// Scan for the first unused slot and insert there.
// There is guaranteed to be an empty slot.
do {
if (fastpath(b[i].sel() == 0)) { // == 0表示位空,则写入sel和imp
incrementOccupied(); // _occupied++
b[i].set<Atomic, Encoded>(b, sel, imp, cls());// bucket写入sel和imp
return; //写入后返回
}
if (b[i].sel() == sel) { //如果已经存储了sel则返回
// The entry was added to the cache by some other thread
// before we grabbed the cacheUpdateLock.
return;
}
} while (fastpath((i = cache_next(i, m)) != begin));
// 如果最初的哈希值begin对应的bucket[i]有值且bucket[i]不等于要写入的sel,称为哈希冲突,则进行二次哈希
// static inline mask_t cache_next(mask_t i, mask_t mask) {
// return i ? i-1 : mask;
//}
// 假设 i = 0的位置写入了a的sel,假设capacity = 4,则 m = 3, 后面又b,c,d的sel和imp需要写入
// { a_sel },{ },{ },{ }
// 此时如果b的哈希值 在 1-3之间,则走第一个if判断直接在对应的bucket[i] = bucket[begin]位置上写入sel和imp
// cache_next是第二次哈希,用于处理第一次哈希下标begin的位置上,bucket已经写入sel和imp了,假设b的哈希值begin = 0,
// 则i = cache_nex(i, m) = 3,3! = 0,满足while条件执行两个if分支,第一个if在b[3]上写入sel和imp
// { a_sel, a_imp},{ },{ },{ b_sel, b_imp}
// 此时c放来了,begin = 3,cache_next后 i = 2,第一个if在b[2]上写入sel和imp
// { a_sel, a_imp},{ },{ c_sel, c_imp },{ b_sel, b_imp}
// 此时c放来了,begin = 0,cache_next后 i = 3,再次cache_next后 i = 2,第一个if在b[2]上写入sel和imp
// { a_sel, a_imp},{ },{ c_sel, c_imp },{ b_sel, b_imp}
// 总之,如果没有哈希冲突b[i]位置直接写入sel和imp,如果有哈希冲突的话
// i = 1 ---> i = 0
// i = 0 ---> i = 3
// i = 3 ---> i = 2
// i = 2 ---> i = 1
// 由于容量capacity肯定是够存储的,加上cache_next 这种算法,一定可以找到空的bucket写入sel和imp
// i = 1 ---> i = 0
// i = 0 ---> i = m
// i = m ---> i = m - 1
// i = m - 1 ---> i = m - 2
bad_cache(receiver, (SEL)sel);
#endif // !DEBUG_TASK_THREADS
}
insert()函数主要流程
1.初始化
---> 2.扩容
---> 3.bucket写入准备
---> 4.do-while循环写入sel和imp到bucket
1.初始化
确定capacity
和开辟与capacity对应的新的空的bucket存储空间
,bucket首地址指针
和mask
写入_bucketsAndMaybeMask
,由于没有旧的bucket指针,所以,不会free旧的bucket存储空间
2.扩容
扩容条件达到后, 即newOccupied > capacity * (7/8)
扩容两倍capacity = capacity * 2
,开辟与capacity对应的新的空的bucket存储空间
,bucket首地址指针
和mask
写入_bucketsAndMaybeMask
,并且free
掉旧的bucket存储空间
3.bucket写入准备
do - while
写入之前准备数据
bucket_t *b = buckets();
拿到bucket
指针
mask_t m = capacity - 1;
确定下标范围 0 - m
mask_t begin = cache_hash(sel, m);
sel和m哈希后得到初始存储下标begin
,0-m之间随机的
mask_t i = begin;
i
变动下标赋初值
4.do-while循环写入sel和imp到bucket
do - while循环写入
b[i].sel() == 0
,没有写入sel和imp
,则直接写入
b[i].sel() == sel
,要写入的sel和imp
已经写入到了b[i]
的位置上,则什么都不做
b[i]
已经写入sel和imp
并且不是要写入的sel和imp
,则二次哈希
寻找其他可以写入的位置
由于cache_next()
的算法和capacity
足够大,一定会找到可以写入sel和imp的bucket
LLDB调试的几个疑点解答
第一,bucket
会丢失
,可以看到并不是所有的方法
都缓存到了cache
里面
原因是扩容后会free
掉旧的bucket
存储空间,只缓存触发扩容的这个方法,所以,bucket会丢失
第二,bucket
会乱序
,并不是按照我们调用方法
的顺序进行存储
因为b[i]
中的缓存下标是用sel和mask哈希
后的值,如果哈希冲突还要进行二次哈希
,所以是一个随机值,在0-mask
之间
第三,bucket不是连续存储
的,中间有空的bucket
存储空间
同样,b[i]
下标是sel和mask哈希
后或者二次哈希
的,存储顺序
并不是调用顺序
,有空的是因为reallocate()
函数,开辟capacity
个新的、空的
、bucket
存储空间,然后将bucket
指针和mask
写入到_bucketsAndMaybeMask
,存储是随机的所以会有空的bucket存在
第四,capacity、mask、occupied
三者之间有什么关系,分别是用来做什么的?
capacity
容量,即创建多少个空的bucket空间
mask
掩码,确定存储下标, 0-mask
之间,mask = capacity - 1
occupied
,已经占用多少个bucket
存储空间,即写入了多少次sel和imp
insert()内部关键函数reallocate
1. reallocate()
核心三点
第一,allocateBuckets() ---> calloc()
开辟ewCapacity
个bucket
存储空间
第二,setBucketsAndMask()
---> 新开辟的bucket
存储空间和newCapacity
写入_bucketsAndMaybeMask
第三,collect_free()
---> collect_free
掉旧的bucket
存储空间
第一和第二容易理解,开辟对应个数的bucket存储空间,保存起来
void cache_t::reallocate(mask_t oldCapacity, mask_t newCapacity, bool freeOld)
{
bucket_t *oldBuckets = buckets();
bucket_t *newBuckets = allocateBuckets(newCapacity); //临时变量
// 开辟newCapacity个bucket存储空间,新的空的,newBuckets首地址指针
// Cache's old contents are not propagated.
// This is thought to save cache memory at the cost of extra cache fills.
// fixme re-measure this
ASSERT(newCapacity > 0);
ASSERT((uintptr_t)(mask_t)(newCapacity-1) == newCapacity-1);
setBucketsAndMask(newBuckets, newCapacity - 1); //存储newBuckets,空格,空格,没有存储sel和imp
//newBuckets首地址指针和mask写入_bucketsAndMaybeMask
if (freeOld) {
collect_free(oldBuckets, oldCapacity); //旧的free掉,所以会有丢失
}
}
allocateBuckets()
calloc()
开辟newCapacity
个bucket
存储空间
bucket_t *cache_t::allocateBuckets(mask_t newCapacity)
{
if (PrintCaches) recordNewCache(newCapacity);
return (bucket_t *)calloc(bytesForCapacity(newCapacity), 1); // calloc开辟内存,转换为bucket_t *
}
**```bytesForCapacity()```**
size_t cache_t::bytesForCapacity(uint32_t cap)
{
return sizeof(bucket_t) * cap;
}
setBucketsAndMask()
新的空的newBuckets首地址指针和mask存入_bucketsAndMaybeMask成员变量,同时_occupied = 0 清零
void cache_t::setBucketsAndMask(struct bucket_t *newBuckets, mask_t newMask)
{
uintptr_t buckets = (uintptr_t)newBuckets;
uintptr_t mask = (uintptr_t)newMask;
ASSERT(buckets <= bucketsMask);
ASSERT(mask <= maxMask);
// 新的bucket指针,newBuckets和mask写入_bucketsAndMaybeMask
// (uintptr_t)newMask << maskShift,mask左移48位存储在高16位
// newMask 和newBuckets 或 | 操作,各自存储相互不影响,
// newMask低48位是0,newBuckets高16位是0
_bucketsAndMaybeMask.store(((uintptr_t)newMask << maskShift) | (uintptr_t)newBuckets, memory_order_relaxed);
_occupied = 0; // occupied 清零
}
mask()
获取mask
---> _bucketsAndMaybeMask
成员变量通过maskShift
掩码获取``mask```
mask_t cache_t::mask() const
{
uintptr_t maskAndBuckets = _bucketsAndMaybeMask.load(memory_order_relaxed);
return maskAndBuckets >> maskShift; // 通过maskShift获取mask,高16位的值
}
2. collect_free()
触发节点
free旧的bucket存储空间的触发节点是扩容后,开辟新的free旧的,并不一定真的会free掉,只是cache的bucket指针不再指向该空间,实现了cache_::buckets() 逻辑上面的剥离
核心逻辑
1.malloc()
或者realloc()
标记这段地址空间,已经被使用
,系统开辟空间不会
占用到该段内存
2.每次扩容
后地址空间累计,一段一段
的累计,用一个二级
指针存储旧的buckets
指针,用这一次旧的bucket
缓存在二级指针的最后,,garbage_count++
标记二级指针的下标
也就是数组的最后,比如第一次8
个地址需要标记
为已经被使用
,则bucket指针指向二级指针的下标index = 7
的位置,第二次16
个,则指向index = 15
的位置,真正触发c函数free()
地址空间的时候,倒序
输出
由于有旧的bucket
指针做指向,所以,bucket垃圾回收空间
就是扩容之前的旧的bucket指向的空间
,征用被标记为已经被使用且没有赋初值
,所以依然保存原值
,即地址空间内的值
是真实的扩容之前
的cache 的buckets
指向的那段内存
3.符合条件触发真正的c语言
函数 free(dead);
free
掉指向bucket
的指针,这里的条件有两个
第一,是累计标记为被使用的空间 >= 32K
= 32 * 1024
,小于32K
,则return
返回
第二,objc_msgSend
或者其他的 cache reader
没有正在访问
该段内存且有可能会用到里面的buckets信息
简单理解就是超长垃圾回收存储空间的阈值,该段内存没有再继续被使用了
第一点的源码内英文注释
// Done if the garbage is not full
第二点的源码内英文注释
// Synchronize collection with objc_msgSend and other cache readers
// objc_msgSend (or other cache reader) is currently looking in
// the cache and might still be using some garbage.
只有两个条件同时满足
才会触发真正的c语言
函数free()
,其他情况都是标记为已经被使用
,系统开辟空间不会
占用到该段内存,cache
的buckets
不再指向,默默的备用状态
1.collect_free()
void cache_t::collect_free(bucket_t *data, mask_t capacity)
{
#if CONFIG_USE_CACHE_LOCK
cacheUpdateLock.assertLocked();
#else
runtimeLock.assertLocked();
#endif
if (PrintCaches) recordDeadCache(capacity);
// 旧的bucket存储空间被标记为已经使用,malloc(),realloc()
_garbage_make_room ();
// 垃圾回收空间累加器累计本次旧的buckets存储空间的长度,
// 这里是字节数,不是sizeof(bucket_t)的数值,即sizeof(bucket_t) * capacity
garbage_byte_size += cache_t::bytesForCapacity(capacity);
// 旧的bucket指针,指向最后面,用于collectNolock倒序free指针
garbage_refs[garbage_count++] = data;
// 有条件调用c语言函数 free(),真正释放掉旧的bucket指向的空间,不满足条件则只是标记为被使用
// 地址空间内的值,依然是旧的bucket存储的sel和imp,转为默默的备用状态
cache_t::collectNolock(false);
}
// capacity of initial garbage_refs
enum {
INIT_GARBAGE_COUNT = 128 // 128 / sizeof(bucket_t) = 128 / 16 = 8,刚好8个字节
};
2.第一次扩容
arm64
情况下,第一次扩容是8
个字节,reallocate(oldCapacity, capacity, true);
最后一个传true
表示需要collect_free
---> /* freeOld */
else { // 满足扩容条件
capacity = capacity ? capacity * 2 : INIT_CACHE_SIZE; // 扩容2倍
if (capacity > MAX_CACHE_SIZE) {
capacity = MAX_CACHE_SIZE;
}
reallocate(oldCapacity, capacity, true); //开辟新的buckets空间,collect_free旧的buckets空间
}
2.第一次扩容触发节点
// amount of memory represented by all refs in the garbage
static size_t garbage_byte_size = 0; // 垃圾回收空间累加器,
//标记为被使用地址空间的长度字节数
// do not empty the garbage until garbage_byte_size gets at least this big
static size_t garbage_threshold = 32*1024; // 最大的垃圾回收阈值,32k,小于32k则什么都不做
// table of refs to free
static bucket_t **garbage_refs = 0; // 二级指针,指向指针的指针
// current number of refs in garbage_refs
static size_t garbage_count = 0; //累计地址空间下标
// capacity of current garbage_refs
static size_t garbage_max = 0; //标记为被使用最大的字节数,这里不是sizeof(bucket_t)的数量
// capacity of initial garbage_refs
enum {
INIT_GARBAGE_COUNT = 128 // 128 / sizeof(bucket_t) = 128 / 16 = 8,刚好8个字节,第次刚好8个字节
};
3._garbage_make_room()
旧的bucket指向空间标记为被使用
static void _garbage_make_room(void)
{
static int first = 1;
// Create the collection table the first time it is needed
if (first) //第一次,8个字节空间被标记为已经被使用
{ // INIT_GARBAGE_COUNT = 128, 128 / sizeof(bucket_t) = 128 / 16 = 8,刚好8个字节
first = 0;
garbage_refs = (bucket_t**)
malloc(INIT_GARBAGE_COUNT * sizeof(void *));
garbage_max = INIT_GARBAGE_COUNT;// garbage_max垃圾回收存储空间最大值赋初值
}
// Double the table if it is full
else if (garbage_count == garbage_max)
{
garbage_refs = (bucket_t**)
realloc(garbage_refs, garbage_max * 2 * sizeof(void *));
garbage_max *= 2; // garbage_max垃圾回收存储空间最大值每次扩容两倍
// bucket_t *newBuckets = allocateBuckets(newCapacity);
// 刚好和cache bucket两倍扩容完全对应上,完美标记为被使用,指针地址安全
// 由于有旧的bucket指针做指向,所以,bucket垃圾回收空间就是扩容之前的
// 旧的bucket指向的空间,征用被标记为已经被使用且没有赋初值,所以依然保存原值
// 即地址空间内是真实的扩容之前的cache 的buckets指向的那段内存
}
}
4. collectNolock()
有条件触发真正的c语言函数free(),释放旧的buckets指向的空间,倒序释放
void cache_t::collectNolock(bool collectALot)
{
#if CONFIG_USE_CACHE_LOCK
cacheUpdateLock.assertLocked();
#else
runtimeLock.assertLocked();
#endif
// Done if the garbage is not full
if (garbage_byte_size < garbage_threshold && !collectALot) {
return; // collectALot = false,garbage_byte_size垃圾回收空间累加器
//小于垃圾回收阈值32k,则返回 ,什么都不做
}
// Synchronize collection with objc_msgSend and other cache readers
if (!collectALot) { // collectALot = false,指向分支
if (_collecting_in_critical ()) { // 是否处于objc_msgSend (or other cache reader)
// 正在查看或者有可能使用垃圾回收空间的数据的临界状态,如果是则返回,什么都不做
// objc_msgSend (or other cache reader) is currently looking in
// the cache and might still be using some garbage.
if (PrintCaches) {
_objc_inform ("CACHES: not collecting; "
"objc_msgSend in progress");
}
return;
}
}
else {
// No excuses.
while (_collecting_in_critical())
;
}
// No cache readers in progress - garbage is now deletable
// Log our progress
if (PrintCaches) {
cache_collections++;
_objc_inform ("CACHES: COLLECTING %zu bytes (%zu allocations, %zu collections)", garbage_byte_size, cache_allocations, cache_collections);
}
// 超过阈值32K并且没有被使用,则真正的会释放掉,调用c语言的free()函数
// Dispose all refs now in the garbage
// Erase each entry so debugging tools don't see stale pointers.
while (garbage_count--) { // 倒序输出
auto dead = garbage_refs[garbage_count]; // 去除指针
garbage_refs[garbage_count] = nil; // 指针= nil,避免野指针
free(dead); // 释放指向的空间
}
// Clear the garbage count and total size indicator
garbage_count = 0; //二级指针index下标清零
garbage_byte_size = 0; // 垃圾回收空间累加器清零
if (PrintCaches) {
size_t I;
size_t total_count = 0;
size_t total_size = 0;
for (i = 0; i < countof(cache_counts); i++) {
int count = cache_counts[I];
int slots = 1 << I;
size_t size = count * slots * sizeof(bucket_t);
if (!count) continue;
_objc_inform("CACHES: %4d slots: %4d caches, %6zu bytes",
slots, count, size);
total_count += count;
total_size += size;
}
_objc_inform("CACHES: total: %4zu caches, %6zu bytes",
total_count, total_size);
}
}
4.总结
cache insert 整体流程图