iOS-底层原理-类结构分析cache

1.cache中存储的是什么？

上一篇博客分析了类的isa、superclass、bits，这一篇主要分析cache的缓存机制

1.cache_t 结构体部分代码，用到的成员变量和部分函数

struct cache_t {
private:
    // explicit_atomic 显式原子性，目的是为了能够 保证 增删改查时 线程的安全性
    explicit_atomic<uintptr_t> _bucketsAndMaybeMask;
    //mask(高16位)+bucket(低48位) mask的本质是开辟可以存储bucket_t空间的下标
    // _bucketsAndMaybeMask is a buckets_t pointer in the low 48 bits                                             
    //  _bucketsAndMaybeMask:  低48位是bucket_t pointer即指向bucket_t的指针，高16位是mask  
    union { 
        struct {// 可见bucket_t在cache里面是指针的形式存在的
            explicit_atomic<mask_t>    _maybeMask;  // _maybeMask is unused, the mask is stored in the top 16 bits.
#if __LP64__                                                              // _maybeMask不再使用
            uint16_t                   _flags; //状态标识，用于计算某些值
#endif
            uint16_t                   _occupied; //已经占用单位空间的数量
        };
        explicit_atomic<preopt_cache_t *> _originalPreoptCache;
    };

// 不同架构下掩码的值
#if CACHE_MASK_STORAGE == CACHE_MASK_STORAGE_OUTLINED // MacOS、模拟器
    // _bucketsAndMaybeMask is a buckets_t pointer
    // _maybeMask is the buckets mask

    static constexpr uintptr_t bucketsMask = ~0ul;
    static_assert(!CONFIG_USE_PREOPT_CACHES, "preoptimized caches not supported");
#elif CACHE_MASK_STORAGE == CACHE_MASK_STORAGE_HIGH_16_BIG_ADDRS
    static constexpr uintptr_t maskShift = 48;
    static constexpr uintptr_t maxMask = ((uintptr_t)1 << (64 - maskShift)) - 1;
    static constexpr uintptr_t bucketsMask = ((uintptr_t)1 << maskShift) - 1;
    
    static_assert(bucketsMask >= MACH_VM_MAX_ADDRESS, "Bucket field doesn't have enough bits for arbitrary pointers.");
#if CONFIG_USE_PREOPT_CACHES
    static constexpr uintptr_t preoptBucketsMarker = 1ul;
    static constexpr uintptr_t preoptBucketsMask = bucketsMask & ~preoptBucketsMarker;
#endif
#elif CACHE_MASK_STORAGE == CACHE_MASK_STORAGE_HIGH_16 //64位真机
    // _bucketsAndMaybeMask is a buckets_t pointer in the low 48 bits
    // _maybeMask is unused, the mask is stored in the top 16 bits.
    // _bucketsAndMaybeMask 低48位是bucket指针，高16位是mask

    // How much the mask is shifted by.
    static constexpr uintptr_t maskShift = 48;  //mask在64位中的高16位，要想获取到对应的值需要_bucketsAndMaybeMask里面的值左移48位

    // Additional bits after the mask which must be zero. msgSend
    // takes advantage of these additional bits to construct the value
    // `mask << 4` from `_maskAndBuckets` in a single instruction.
    static constexpr uintptr_t maskZeroBits = 4;

    // The largest mask value we can store.
    //可以存储mask的最大值
    static constexpr uintptr_t maxMask = ((uintptr_t)1 << (64 - maskShift)) - 1; // maxMask最大mask 2^(16-1)
    
    // The mask applied to `_maskAndBuckets` to retrieve the buckets pointer.
    // bucketsMask用来在这_bucketsAndMaybeMask里面获取bucket指针
    static constexpr uintptr_t bucketsMask = ((uintptr_t)1 << (maskShift - maskZeroBits)) - 1;
    
    // Ensure we have enough bits for the buckets pointer.
    static_assert(bucketsMask >= MACH_VM_MAX_ADDRESS,
            "Bucket field doesn't have enough bits for arbitrary pointers.");

     // 以下为private函数
    bool isConstantEmptyCache() const; // 是否位空
    mask_t mask() const; // 获取mask
    void incrementOccupied(); //每插入一个bucket_t，occupied++ --> 标记已经占用几个位置了
    void setBucketsAndMask(struct bucket_t *newBuckets, mask_t newMask); // buckets和mask写入缓存
    void reallocate(mask_t oldCapacity, mask_t newCapacity, bool freeOld); //开辟缓存空间
    void collect_free(bucket_t *oldBuckets, mask_t oldCapacity);//释放缓存空间
public:
    // The following four fields are public for objcdt's use only.
    // objcdt reaches into fields while the process is suspended
    // hence doesn't care for locks and pesky little details like this
    // and can safely use these.

    // public函数
    unsigned capacity() const;//获取capacity 开辟的缓存空间容量
    struct bucket_t *buckets() const; //获取缓存空间的bucket指针
    Class cls() const;
    mask_t occupied() const;// 获取occupied已经占用了多少个缓存空间
    void insert(SEL sel, IMP imp, id receiver);// 向缓存插入sel，imp转成bucket结构体的sel和imp进行存储
};

2.其他函数源码

// bucketsMask用来在这_bucketsAndMaybeMask里面获取bucket指针

    static constexpr uintptr_t bucketsMask = ((uintptr_t)1 << (maskShift - maskZeroBits)) - 1;

buckets()源码获取bucket指针，第一个缓存的bucket的地址

struct bucket_t *cache_t::buckets() const
{
    uintptr_t addr = _bucketsAndMaybeMask.load(memory_order_relaxed);
    return (bucket_t *)(addr & bucketsMask);
}

occupied()源码，获取占用数量，即已经插入多少个bucket

mask_t cache_t::occupied() const
{
    return _occupied;
}

void incrementOccupied()源码

void cache_t::incrementOccupied() 
{
    _occupied++;
}

capacity() 源码 // 获取容量，总共开辟了几个缓存bucket的空间

unsigned cache_t::capacity() const
{
    return mask() ? mask()+1 : 0; 
}

3.cache_t 中重要的成员变量和函数说明

成员变量

1. _bucketsAndMaybeMask

explicit_atomic<uintptr_t> _bucketsAndMaybeMask;

源码本身注释
// _bucketsAndMaybeMask is a buckets_t pointer in the low 48 bits
// _maybeMask is unused, the mask is stored in the top 16 bits.
_bucketsAndMaybeMask (64位) = mask(数值)(高16位) + bucket指针(低48位)
存储的mask用于mask()函数取值，存储的bucket指针是cache缓存bucket的首地址

2. _maybeMask

explicit_atomic<mask_t>    _maybeMask

64位真机里不再使用

3. _flags

uint16_t  _flags;

状态标识，用于某些函数的计算

掩码
// _bucketsAndMaybeMask is a buckets_t pointer in the low 48 bits
// _maybeMask is unused, the mask is stored in the top 16 bits.

1. maskShift
获取mask需要左移的位数

static constexpr uintptr_t maskShift = 48;

mask在_bucketsAndMaybeMask64位 中的高16位，要想获取到对应的值需要_bucketsAndMaybeMask里面的值左移48位

2. maxMask
最大mask值，1 << (16-1) = 2 ^ 15

static constexpr uintptr_t maxMask = ((uintptr_t)1 << (64 - maskShift)) - 1;

3.bucketsMask
获取bucket的掩码
// The mask applied to _maskAndBuckets to retrieve the buckets pointer.

    static constexpr uintptr_t bucketsMask = ((uintptr_t)1 << (maskShift - maskZeroBits)) - 1;

函数
private

1. isConstantEmptyCache()
判断是否位空的cache

bool isConstantEmptyCache() const;

**2. mask()**
获取mask

mask_t mask() const;

3. incrementOccupied()
_occupied++

void incrementOccupied();

3. setBucketsAndMask()
calloc() 创建新的bucket指针地址，开辟空间的长度是newCapacity * sizeof(bucket_t)，sizeof(bucket_t) = 16，比如newCapacity长度为8则创建8个空的bucket_t存储空间，将其首地址bucket指针和mask = newCapacity - 1 写入_bucketsAndMaybeMask缓存数据，同时_occupied = 0，就是创建newCapacity个可以装bucket_t的空间，保存起来备用

void setBucketsAndMask(struct bucket_t *newBuckets, mask_t newMask);

3. reallocate()
开辟新的bucket_t存储空间，如果有旧的bucket_t存储空间，则进行释放

void reallocate(mask_t oldCapacity, mask_t newCapacity, bool freeOld);

4. collect_free()
释放旧的bucket_t存储空，即cache里的_bucketsAndMaybeMask --> buckets()不再指向其开始的存储空间

 void collect_free(bucket_t *oldBuckets, mask_t oldCapacity);

public

1. capacity
获取容量 capacity

unsigned capacity() const;

2. buckets()
获取指向bucket存储空间的首地址指针

struct bucket_t *buckets() const;

3. occupied()
获取occupied，占用数量

mask_t occupied() const;

4. struct bucket_t部分源码，依然是展示成员变量和主要函数

struct bucket_t {
private:
    // IMP-first is better for arm64e ptrauth and no worse for arm64.
    // SEL-first is better for armv7* and i386 and x86_64.
#if __arm64__
    explicit_atomic<uintptr_t> _imp; // 64位真机 _imp和_sel
    explicit_atomic<SEL> _sel;
#else
    explicit_atomic<SEL> _sel;
    explicit_atomic<uintptr_t> _imp;
#endif

public:
    static inline size_t offsetOfSel() { return offsetof(bucket_t, _sel); }
    inline SEL sel() const { return _sel.load(memory_order_relaxed); } // 获取sel

    inline IMP imp(UNUSED_WITHOUT_PTRAUTH bucket_t *base, Class cls) const {
        uintptr_t imp = _imp.load(memory_order_relaxed); // 获取imp
        if (!imp) return nil;
#if CACHE_IMP_ENCODING == CACHE_IMP_ENCODING_PTRAUTH
        SEL sel = _sel.load(memory_order_relaxed);
        return (IMP)
            ptrauth_auth_and_resign((const void *)imp,
                                    ptrauth_key_process_dependent_code,
                                    modifierForSEL(base, sel, cls),
                                    ptrauth_key_function_pointer, 0);
#elif CACHE_IMP_ENCODING == CACHE_IMP_ENCODING_ISA_XOR
        return (IMP)(imp ^ (uintptr_t)cls);
#elif CACHE_IMP_ENCODING == CACHE_IMP_ENCODING_NONE
        return (IMP)imp;
#else
#error Unknown method cache IMP encoding.
#endif
    }

    template <Atomicity, IMPEncoding>
    void set(bucket_t *base, SEL newSel, IMP newImp, Class cls); // 写入sel和imp
};

bucket_t成员变量

1._sel和_imp
sizeof(bucket_t) = 16，两个成员变量 8字节+8字节= 16字节

explicit_atomic<uintptr_t> _imp; // 64位真机 _imp和_sel
explicit_atomic<SEL> _sel;

bucket_t函数

1.sel()
获取sel

inline SEL sel() const { return _sel.load(memory_order_relaxed); }

2. imp()
获取imp

  inline IMP imp(UNUSED_WITHOUT_PTRAUTH bucket_t *base, Class cls) const {}

3. set()
向bucket写入sel和imp

 void set(bucket_t *base, SEL newSel, IMP newImp, Class cls);

2.cache缓存机制

通过上面的cache_t结构体成员变量和方法解析，知道了cache_t存储bucket_t指针，bucket_t里面有俩个成员变量_sel和_imp，用来保存方法method_t的sel和imp，

而且cache_t 存储bucket_t 并不是像bits存储类信息
method ---> method_list_t、
property---> property_list_t、
protocol ---> protocol_list_t、
ivar ---> ivar_list_t、
一样使用数组的形式，而是指针的形式指向存储bucket_t空间的首地址，

即不会有bucket_list_t这样的情况出现，这是cache和bits非常大的不同？为什么这么设计，下面通过LLDB调试现象以及cache相关函数的源码分析来解释这一现象

1.cache LLDB调试Xcode截图

从LLDB调试来看，cache存储bucket有几点疑问？

第一，bucket会丢失，可以看到并不是所有的方法都缓存到了cache里面
第二，bucket会乱序，并不是按照我们调用方法的顺序进行存储
第三，bucket不是连续存储的，中间有空的bucket存储空间
第四，capacity、mask、occupied三者之间有什么关系，分别是用来做什么的？

为什么会出现上面的三种异常的，这就要看cache存储的底层机制是什么？从insert的源码中寻找答案

LLDB调试-cache-bucket存储.jpeg

LLDB调试对应数据

KCObjcBuild was compiled with optimization - stepping may behave oddly; variables may not be available.
(lldb) p/x LGPerson.class
(Class) $0 = 0x00000001000086d8 LGPerson
(lldb) p (cache_t *) 0x00000001000086e8
(cache_t *) $1 = 0x00000001000086e8
(lldb) p $1->capacity()
(unsigned int) $2 = 8
(lldb) p $1->mask()
(mask_t) $3 = 7
(lldb) p $1->occupied()
(mask_t) $4 = 7
(lldb) p $1->buckets()
(bucket_t *) $5 = 0x0000000100a9bd50
(lldb) p $1->buckets()[0].sel() 
(SEL) $6 = "sayWorld333"
(lldb) p $1->buckets()[0].imp(nil, LGPerson.class) 
(IMP) $7 = 0x000000010000369c (KCObjcBuild`-[LGPerson sayWorld333] at main.m:100)
(lldb) p $1->buckets()[1].sel() 
(SEL) $8 = "sayWorld888"
(lldb) p $1->buckets()[1].imp(nil, LGPerson.class) 
(IMP) $9 = 0x0000000100003700 (KCObjcBuild`-[LGPerson sayWorld888] at main.m:106)
(lldb) p $1->buckets()[2].sel() 
(SEL) $10 = "sayWorld666"
(lldb) p $1->buckets()[2].imp(nil, LGPerson.class) 
(IMP) $11 = 0x00000001000036d8 (KCObjcBuild`-[LGPerson sayWorld666] at main.m:104)
(lldb) p $1->buckets()[3].sel() 
(SEL) $12 = "sayWorld444"
(lldb) p $1->buckets()[3].imp(nil, LGPerson.class) 
(IMP) $13 = 0x00000001000036b0 (KCObjcBuild`-[LGPerson sayWorld444] at main.m:101)
(lldb) p $1->buckets()[4].sel() 
(SEL) $14 = "sayWorld222"
(lldb) p $1->buckets()[4].imp(nil, LGPerson.class) 
(IMP) $15 = 0x0000000100003688 (KCObjcBuild`-[LGPerson sayWorld222] at main.m:99)
(lldb) p $1->buckets()[5].sel() 
(SEL) $16 = (null)
(lldb) p $1->buckets()[5].imp(nil, LGPerson.class) 
(IMP) $17 = 0x0000000000000000
(lldb) p $1->buckets()[6].sel() 
(SEL) $18 = "sayWorld777"
(lldb) p $1->buckets()[6].imp(nil, LGPerson.class) 
(IMP) $19 = 0x00000001000036ec (KCObjcBuild`-[LGPerson sayWorld777] at main.m:105)
(lldb) p $1->buckets()[7].sel() 
(SEL) $20 = "sayWorld555"
(lldb) p $1->buckets()[7].imp(nil, LGPerson.class) 
(IMP) $21 = 0x00000001000036c4 (KCObjcBuild`-[LGPerson sayWorld555] at main.m:102)
(lldb) p $1->buckets()[8].sel() 
(SEL) $22 = ""
(lldb) p $1->buckets()[8].imp(nil, LGPerson.class) 
(IMP) $23 = 0x0000000000000000
(lldb) p ($1 + 1)
(cache_t *) $24 = 0x00000001000086f8
(lldb) p ($1 + 2)
(cache_t *) $25 = 0x0000000100008708
(lldb) p ($1 + 3)
(cache_t *) $26 = 0x0000000100008718
(lldb) p ($1 + 4)
(cache_t *) $27 = 0x0000000100008728
(lldb) p ($1 + 5)
(cache_t *) $28 = 0x0000000100008738
(lldb) p ($1 + 6)
(cache_t *) $29 = 0x0000000100008748
(lldb) p ($1 + 7)
(cache_t *) $30 = 0x0000000100008758
(lldb) p ($1 + 8)
(cache_t *) $31 = 0x0000000100008768
(lldb) p ($1 + 9)
(cache_t *) $32 = 0x0000000100008778
(lldb) p ($1 + 10)
(cache_t *) $33 = 0x0000000100008788
(lldb)

2.insert()函数源码

insert()函数的几个关键点

1.初始化4个(arm64情况下)空的bucket存储空间，newBucket的指针和mask写入_bucketsAndMaybeMask，同时free掉旧的bucket存储空间

2.存储空间扩容，扩容的规则和算法是怎么实现的？

3.存储bucket的位置是怎么确定的？

4.如何将sel和imp写入到bucket里面去？

cache insert调用堆栈

cache insert调用堆栈.jpeg

insert()需要的一些枚举值

/* Initial cache bucket count. INIT_CACHE_SIZE must be a power of two. */
enum {
#if CACHE_END_MARKER || (__arm64__ && !__LP64__)
    // When we have a cache end marker it fills a bucket slot, so having a
    // initial cache size of 2 buckets would not be efficient when one of the
    // slots is always filled with the end marker. So start with a cache size
    // 4 buckets.
    INIT_CACHE_SIZE_LOG2 = 2,
#else
    // Allow an initial bucket size of 2 buckets, since a large number of
    // classes, especially metaclasses, have very few imps, and we support
    // the ability to fill 100% of the cache before resizing.
    INIT_CACHE_SIZE_LOG2 = 1,
#endif
    INIT_CACHE_SIZE      = (1 << INIT_CACHE_SIZE_LOG2),
    MAX_CACHE_SIZE_LOG2  = 16,
    MAX_CACHE_SIZE       = (1 << MAX_CACHE_SIZE_LOG2),
    FULL_UTILIZATION_CACHE_SIZE_LOG2 = 3,
    FULL_UTILIZATION_CACHE_SIZE = (1 << FULL_UTILIZATION_CACHE_SIZE_LOG2),
};

cache_fill_ratio()

static inline mask_t cache_fill_ratio(mask_t capacity) {
    return capacity * 7 / 8;
}

#define CACHE_END_MARKER 0

cache_hash()

// Class points to cache. SEL is key. Cache buckets store SEL+IMP.
// Caches are never built in the dyld shared cache.

static inline mask_t cache_hash(SEL sel, mask_t mask) 
{
    uintptr_t value = (uintptr_t)sel;
#if CONFIG_USE_PREOPT_CACHES
    value ^= value >> 7;
#endif
    return (mask_t)(value & mask);
}

cache_next()

#if CACHE_END_MARKER
static inline mask_t cache_next(mask_t i, mask_t mask) {
    return (i+1) & mask;
}
#elif __arm64__
static inline mask_t cache_next(mask_t i, mask_t mask) {
    return i ? i-1 : mask;
}
#else
#error unexpected configuration
#endif

insert()函数源码

void cache_t::insert(SEL sel, IMP imp, id receiver)
{
    runtimeLock.assertLocked();

    // Never cache before +initialize is done
    if (slowpath(!cls()->isInitialized())) {
        return;
    }

    if (isConstantOptimizedCache()) {
        _objc_fatal("cache_t::insert() called with a preoptimized cache for %s",
                    cls()->nameForLogging());
    }

#if DEBUG_TASK_THREADS
    return _collecting_in_critical();
#else
#if CONFIG_USE_CACHE_LOCK
    mutex_locker_t lock(cacheUpdateLock);
#endif

    ASSERT(sel != 0 && cls()->isInitialized());

    // Use the cache as-is if until we exceed our expected fill ratio.
    mask_t newOccupied = occupied() + 1;
    unsigned oldCapacity = capacity(), capacity = oldCapacity;
    if (slowpath(isConstantEmptyCache())) { //初始化 capacity  = 4
        // Cache is read-only. Replace it.                    // capacity表示初始化多个bucket空间，容量
        if (!capacity) capacity = INIT_CACHE_SIZE; //  INIT_CACHE_SIZE =4   --->  1 << 2 == 4
        reallocate(oldCapacity, capacity, /* freeOld */false);  // 开辟capacity 个bucket存储空间，
        //bucket首地址并与  mask一起写入_bucketsAndMaybeMask
    }
    else if (fastpath(newOccupied + CACHE_END_MARKER <= cache_fill_ratio(capacity))) {
        // Cache is less than 3/4 or 7/8 full. Use it as-is. 
        // newOccupied + 0 <= capacity * (7/8)，如果newOccupied <= 7/8 * capacity则什么都不做，即不扩容
        // capacity = 4，capacity * (7/8) = 3.5，capacity = 8，capacity * (7/8) = 7
    }
#if CACHE_ALLOW_FULL_UTILIZATION
    else if (capacity <= FULL_UTILIZATION_CACHE_SIZE && newOccupied + CACHE_END_MARKER <= capacity) {
        // Allow 100% cache utilization for small buckets. Use it as-is.
        //  capacity <= 8 && newOccupied + 0 <= capacity，即newOccupied = 4的时候什么也不做
    }
#endif
    else {
        capacity = capacity ? capacity * 2 : INIT_CACHE_SIZE; // 扩容2倍，否则两倍扩容，
        if (capacity > MAX_CACHE_SIZE) {
            capacity = MAX_CACHE_SIZE;
        }
        reallocate(oldCapacity, capacity, true); // true ---> free 掉旧的bucket存储空间
    }

    bucket_t *b = buckets(); // 获取bucket指针，首地址
    mask_t m = capacity - 1; // m ---> mask 与capacity的关系是 mask = capacity - 1
    mask_t begin = cache_hash(sel, m); //哈希算法函数利用sel的唯一性大概率begin不会重复
    mask_t i = begin;   // begin保持最初的哈希值，即下标，i动态变化寻找空的bucket存储空间存入sel和imp
// 哈希算法  uintptr_t value = (uintptr_t)sel;  ---> (value & mask)
// 由于m =  capacity - 1，所以任何值按位与上都是0-7之间
//即 capacity = 4， begin = 0-->3，capacity = 8， began = 0-->7


    // Scan for the first unused slot and insert there.
    // There is guaranteed to be an empty slot.
    do {
        if (fastpath(b[i].sel() == 0)) { // == 0表示位空，则写入sel和imp
            incrementOccupied(); // _occupied++
            b[i].set<Atomic, Encoded>(b, sel, imp, cls());// bucket写入sel和imp
            return; //写入后返回
        }
        if (b[i].sel() == sel) { //如果已经存储了sel则返回
            // The entry was added to the cache by some other thread
            // before we grabbed the cacheUpdateLock.
            return;
        }
    } while (fastpath((i = cache_next(i, m)) != begin));
// 如果最初的哈希值begin对应的bucket[i]有值且bucket[i]不等于要写入的sel，称为哈希冲突，则进行二次哈希
// static inline mask_t cache_next(mask_t i, mask_t mask) {
//    return i ? i-1 : mask;
//}
// 假设 i = 0的位置写入了a的sel，假设capacity = 4，则 m = 3， 后面又b，c，d的sel和imp需要写入
// { a_sel }，{   }，{   }，{   }
// 此时如果b的哈希值 在 1-3之间，则走第一个if判断直接在对应的bucket[i] = bucket[begin]位置上写入sel和imp
// cache_next是第二次哈希，用于处理第一次哈希下标begin的位置上，bucket已经写入sel和imp了，假设b的哈希值begin = 0，
// 则i = cache_nex(i, m) = 3，3! = 0，满足while条件执行两个if分支，第一个if在b[3]上写入sel和imp
// { a_sel, a_imp}，{   }，{   }，{ b_sel, b_imp}
// 此时c放来了，begin = 3，cache_next后 i = 2，第一个if在b[2]上写入sel和imp
// { a_sel, a_imp}，{   }，{  c_sel, c_imp }，{ b_sel, b_imp}
// 此时c放来了，begin = 0，cache_next后 i = 3，再次cache_next后 i = 2，第一个if在b[2]上写入sel和imp
// { a_sel, a_imp}，{   }，{  c_sel, c_imp }，{ b_sel, b_imp}
// 总之，如果没有哈希冲突b[i]位置直接写入sel和imp，如果有哈希冲突的话
// i = 1   ---> i = 0
// i = 0   ---> i = 3
// i = 3   ---> i = 2
// i = 2   ---> i = 1
// 由于容量capacity肯定是够存储的，加上cache_next 这种算法，一定可以找到空的bucket写入sel和imp
// i = 1   ---> i = 0
// i = 0   ---> i = m
// i = m   ---> i = m - 1
// i = m - 1   ---> i = m - 2


    bad_cache(receiver, (SEL)sel);
#endif // !DEBUG_TASK_THREADS
}

insert()函数主要流程

1.初始化 ---> 2.扩容 ---> 3.bucket写入准备 ---> 4.do-while循环写入sel和imp到bucket

1.初始化
确定capacity和开辟与capacity对应的新的空的bucket存储空间，bucket首地址指针和mask写入_bucketsAndMaybeMask，由于没有旧的bucket指针，所以，不会free旧的bucket存储空间

2.扩容
扩容条件达到后，即newOccupied > capacity * (7/8)
扩容两倍capacity = capacity * 2 ，开辟与capacity对应的新的空的bucket存储空间，bucket首地址指针和mask写入_bucketsAndMaybeMask，并且free掉旧的bucket存储空间

3.bucket写入准备
do - while写入之前准备数据
bucket_t *b = buckets(); 拿到bucket指针
mask_t m = capacity - 1; 确定下标范围 0 - m
mask_t begin = cache_hash(sel, m); sel和m哈希后得到初始存储下标begin，0-m之间随机的
mask_t i = begin; i变动下标赋初值

4.do-while循环写入sel和imp到bucket
do - while循环写入
b[i].sel() == 0，没有写入sel和imp，则直接写入
b[i].sel() == sel，要写入的sel和imp已经写入到了b[i]的位置上，则什么都不做
b[i]已经写入sel和imp并且不是要写入的sel和imp，则二次哈希寻找其他可以写入的位置
由于cache_next()的算法和capacity足够大，一定会找到可以写入sel和imp的bucket

LLDB调试的几个疑点解答

第一，bucket会丢失，可以看到并不是所有的方法都缓存到了cache里面
原因是扩容后会free掉旧的bucket存储空间，只缓存触发扩容的这个方法，所以，bucket会丢失

第二，bucket会乱序，并不是按照我们调用方法的顺序进行存储
因为b[i]中的缓存下标是用sel和mask哈希后的值，如果哈希冲突还要进行二次哈希，所以是一个随机值，在0-mask之间

第三，bucket不是连续存储的，中间有空的bucket存储空间
同样，b[i]下标是sel和mask哈希后或者二次哈希的，存储顺序并不是调用顺序，有空的是因为reallocate()函数，开辟capacity个新的、空的、bucket存储空间，然后将bucket指针和mask写入到_bucketsAndMaybeMask，存储是随机的所以会有空的bucket存在

第四，capacity、mask、occupied三者之间有什么关系，分别是用来做什么的？
capacity容量，即创建多少个空的bucket空间
mask掩码，确定存储下标， 0-mask之间，mask = capacity - 1
occupied，已经占用多少个bucket存储空间，即写入了多少次sel和imp

insert()内部关键函数reallocate

1. reallocate()
核心三点
第一，allocateBuckets() ---> calloc()开辟ewCapacity个bucket存储空间
第二，setBucketsAndMask() ---> 新开辟的bucket存储空间和newCapacity写入_bucketsAndMaybeMask
第三，collect_free() ---> collect_free掉旧的bucket存储空间

第一和第二容易理解，开辟对应个数的bucket存储空间，保存起来

void cache_t::reallocate(mask_t oldCapacity, mask_t newCapacity, bool freeOld)
{
    bucket_t *oldBuckets = buckets();
    bucket_t *newBuckets = allocateBuckets(newCapacity); //临时变量
    // 开辟newCapacity个bucket存储空间，新的空的，newBuckets首地址指针
    // Cache's old contents are not propagated. 
    // This is thought to save cache memory at the cost of extra cache fills.
    // fixme re-measure this

    ASSERT(newCapacity > 0);
    ASSERT((uintptr_t)(mask_t)(newCapacity-1) == newCapacity-1);

    setBucketsAndMask(newBuckets, newCapacity - 1); //存储newBuckets，空格，空格，没有存储sel和imp
    //newBuckets首地址指针和mask写入_bucketsAndMaybeMask
    if (freeOld) {
        collect_free(oldBuckets, oldCapacity); //旧的free掉，所以会有丢失
    }
}

allocateBuckets()
calloc()开辟newCapacity个bucket存储空间

bucket_t *cache_t::allocateBuckets(mask_t newCapacity)
{
    if (PrintCaches) recordNewCache(newCapacity);

    return (bucket_t *)calloc(bytesForCapacity(newCapacity), 1); // calloc开辟内存，转换为bucket_t *
}

**```bytesForCapacity()```**
size_t cache_t::bytesForCapacity(uint32_t cap)
{
    return sizeof(bucket_t) * cap;
}

setBucketsAndMask()
新的空的newBuckets首地址指针和mask存入_bucketsAndMaybeMask成员变量，同时_occupied = 0 清零

void cache_t::setBucketsAndMask(struct bucket_t *newBuckets, mask_t newMask)
{
    uintptr_t buckets = (uintptr_t)newBuckets;
    uintptr_t mask = (uintptr_t)newMask;

    ASSERT(buckets <= bucketsMask);
    ASSERT(mask <= maxMask);
   
// 新的bucket指针，newBuckets和mask写入_bucketsAndMaybeMask
// (uintptr_t)newMask << maskShift，mask左移48位存储在高16位
// newMask 和newBuckets 或 | 操作，各自存储相互不影响，
// newMask低48位是0，newBuckets高16位是0
    _bucketsAndMaybeMask.store(((uintptr_t)newMask << maskShift) | (uintptr_t)newBuckets, memory_order_relaxed);
    _occupied = 0; // occupied 清零
}

mask()
获取mask ---> _bucketsAndMaybeMask成员变量通过maskShift掩码获取``mask```

mask_t cache_t::mask() const
{
    uintptr_t maskAndBuckets = _bucketsAndMaybeMask.load(memory_order_relaxed);
    return maskAndBuckets >> maskShift; // 通过maskShift获取mask，高16位的值 
}

2. collect_free()

触发节点

free旧的bucket存储空间的触发节点是扩容后，开辟新的free旧的，并不一定真的会free掉，只是cache的bucket指针不再指向该空间，实现了cache_::buckets() 逻辑上面的剥离

核心逻辑

1.malloc()或者realloc()标记这段地址空间，已经被使用，系统开辟空间不会占用到该段内存

2.每次扩容后地址空间累计，一段一段的累计，用一个二级指针存储旧的buckets指针，用这一次旧的bucket缓存在二级指针的最后，，garbage_count++标记二级指针的下标
也就是数组的最后，比如第一次8个地址需要标记为已经被使用，则bucket指针指向二级指针的下标index = 7的位置，第二次16个，则指向index = 15的位置，真正触发c函数free()地址空间的时候，倒序输出

由于有旧的bucket指针做指向，所以，bucket垃圾回收空间就是扩容之前的旧的bucket指向的空间，征用被标记为已经被使用且没有赋初值，所以依然保存原值，即地址空间内的值是真实的扩容之前的cache 的buckets指向的那段内存

3.符合条件触发真正的c语言函数 free(dead); free掉指向bucket的指针，这里的条件有两个
第一，是累计标记为被使用的空间 >= 32K = 32 * 1024，小于32K，则return返回
第二，objc_msgSend或者其他的 cache reader 没有正在访问该段内存且有可能会用到里面的buckets信息

简单理解就是超长垃圾回收存储空间的阈值，该段内存没有再继续被使用了

第一点的源码内英文注释
// Done if the garbage is not full

第二点的源码内英文注释
// Synchronize collection with objc_msgSend and other cache readers
// objc_msgSend (or other cache reader) is currently looking in
// the cache and might still be using some garbage.

只有两个条件同时满足才会触发真正的c语言函数free()，其他情况都是标记为已经被使用，系统开辟空间不会占用到该段内存，cache的buckets不再指向，默默的备用状态

1.collect_free()

void cache_t::collect_free(bucket_t *data, mask_t capacity)
{
#if CONFIG_USE_CACHE_LOCK
    cacheUpdateLock.assertLocked();
#else
    runtimeLock.assertLocked();
#endif

    if (PrintCaches) recordDeadCache(capacity);

    // 旧的bucket存储空间被标记为已经使用，malloc(),realloc()
    _garbage_make_room ();

    // 垃圾回收空间累加器累计本次旧的buckets存储空间的长度，
    // 这里是字节数，不是sizeof(bucket_t)的数值，即sizeof(bucket_t) * capacity
    garbage_byte_size += cache_t::bytesForCapacity(capacity);

    // 旧的bucket指针，指向最后面，用于collectNolock倒序free指针
    garbage_refs[garbage_count++] = data;

    // 有条件调用c语言函数 free()，真正释放掉旧的bucket指向的空间，不满足条件则只是标记为被使用
    // 地址空间内的值，依然是旧的bucket存储的sel和imp，转为默默的备用状态
    cache_t::collectNolock(false);
}

// capacity of initial garbage_refs
enum {
    INIT_GARBAGE_COUNT = 128 // 128 / sizeof(bucket_t) = 128 / 16 = 8，刚好8个字节
};

2.第一次扩容
arm64情况下，第一次扩容是8个字节，reallocate(oldCapacity, capacity, true);最后一个传true表示需要collect_free ---> /* freeOld */

else { // 满足扩容条件
        capacity = capacity ? capacity * 2 : INIT_CACHE_SIZE; // 扩容2倍
        if (capacity > MAX_CACHE_SIZE) {
            capacity = MAX_CACHE_SIZE;
      }
      reallocate(oldCapacity, capacity, true); //开辟新的buckets空间，collect_free旧的buckets空间
}

2.第一次扩容触发节点

// amount of memory represented by all refs in the garbage
static size_t garbage_byte_size = 0; // 垃圾回收空间累加器，
//标记为被使用地址空间的长度字节数 

// do not empty the garbage until garbage_byte_size gets at least this big
static size_t garbage_threshold = 32*1024; // 最大的垃圾回收阈值，32k，小于32k则什么都不做

// table of refs to free
static bucket_t **garbage_refs = 0; // 二级指针，指向指针的指针

// current number of refs in garbage_refs
static size_t garbage_count = 0; //累计地址空间下标

// capacity of current garbage_refs
static size_t garbage_max = 0; //标记为被使用最大的字节数，这里不是sizeof(bucket_t)的数量

// capacity of initial garbage_refs
enum {
    INIT_GARBAGE_COUNT = 128 // 128 / sizeof(bucket_t) = 128 / 16 = 8，刚好8个字节，第次刚好8个字节
};

3._garbage_make_room()
旧的bucket指向空间标记为被使用

static void _garbage_make_room(void)
{
    static int first = 1;

    // Create the collection table the first time it is needed
    if (first) //第一次，8个字节空间被标记为已经被使用
    {            // INIT_GARBAGE_COUNT = 128,   128 / sizeof(bucket_t) = 128 / 16 = 8，刚好8个字节
        first = 0;
        garbage_refs = (bucket_t**)
            malloc(INIT_GARBAGE_COUNT * sizeof(void *));
        garbage_max = INIT_GARBAGE_COUNT;// garbage_max垃圾回收存储空间最大值赋初值
    }

    // Double the table if it is full
    else if (garbage_count == garbage_max)
    {
        garbage_refs = (bucket_t**)
            realloc(garbage_refs, garbage_max * 2 * sizeof(void *));
        garbage_max *= 2; // garbage_max垃圾回收存储空间最大值每次扩容两倍
                                          // bucket_t *newBuckets = allocateBuckets(newCapacity); 
                                          // 刚好和cache bucket两倍扩容完全对应上，完美标记为被使用，指针地址安全
                                          // 由于有旧的bucket指针做指向，所以，bucket垃圾回收空间就是扩容之前的
                                          // 旧的bucket指向的空间，征用被标记为已经被使用且没有赋初值，所以依然保存原值
                                          // 即地址空间内是真实的扩容之前的cache 的buckets指向的那段内存
    }
}

4. collectNolock()
有条件触发真正的c语言函数free()，释放旧的buckets指向的空间，倒序释放

void cache_t::collectNolock(bool collectALot)
{
#if CONFIG_USE_CACHE_LOCK
    cacheUpdateLock.assertLocked();
#else
    runtimeLock.assertLocked();
#endif

    // Done if the garbage is not full
    if (garbage_byte_size < garbage_threshold  &&  !collectALot) {
        return; // collectALot = false，garbage_byte_size垃圾回收空间累加器
                    //小于垃圾回收阈值32k，则返回 ，什么都不做
    }

    // Synchronize collection with objc_msgSend and other cache readers
    if (!collectALot) { // collectALot = false，指向分支
        if (_collecting_in_critical ()) { // 是否处于objc_msgSend (or other cache reader)
// 正在查看或者有可能使用垃圾回收空间的数据的临界状态，如果是则返回，什么都不做
            // objc_msgSend (or other cache reader) is currently looking in
            // the cache and might still be using some garbage.
            if (PrintCaches) {
                _objc_inform ("CACHES: not collecting; "
                              "objc_msgSend in progress");
            }
            return;
        }
    } 
    else {
        // No excuses.
        while (_collecting_in_critical()) 
            ;
    }

    // No cache readers in progress - garbage is now deletable

    // Log our progress
    if (PrintCaches) {
        cache_collections++;
        _objc_inform ("CACHES: COLLECTING %zu bytes (%zu allocations, %zu collections)", garbage_byte_size, cache_allocations, cache_collections);
    }
    
// 超过阈值32K并且没有被使用，则真正的会释放掉，调用c语言的free()函数
    // Dispose all refs now in the garbage
    // Erase each entry so debugging tools don't see stale pointers.
    while (garbage_count--) { // 倒序输出
        auto dead = garbage_refs[garbage_count]; // 去除指针
        garbage_refs[garbage_count] = nil; // 指针= nil，避免野指针
        free(dead); // 释放指向的空间
    }
    
    // Clear the garbage count and total size indicator
    garbage_count = 0; //二级指针index下标清零
    garbage_byte_size = 0; // 垃圾回收空间累加器清零

    if (PrintCaches) {
        size_t I;
        size_t total_count = 0;
        size_t total_size = 0;

        for (i = 0; i < countof(cache_counts); i++) {
            int count = cache_counts[I];
            int slots = 1 << I;
            size_t size = count * slots * sizeof(bucket_t);

            if (!count) continue;

            _objc_inform("CACHES: %4d slots: %4d caches, %6zu bytes", 
                         slots, count, size);

            total_count += count;
            total_size += size;
        }

        _objc_inform("CACHES:      total: %4zu caches, %6zu bytes", 
                     total_count, total_size);
    }
}

4.总结

cache insert 整体流程图

cache--insert--sel&imp.jpg

最后编辑于：2022.05.27 12:50:33

人面猴
序言：七十年代末，一起剥皮案震惊了整个滨河市，随后出现的几起案子，更是在滨河造成了极大的恐慌，老刑警刘岩，带你破解...
沈念sama阅读 214,377评论 6赞 496
死咒
序言：滨河连续发生了三起死亡事件，死亡现场离奇诡异，居然都是意外死亡，警方通过查阅死者的电脑和手机，发现死者居然都...
沈念sama阅读 91,390评论 3赞 389
救了他两次的神仙让他今天三更去死
文/潘晓璐我一进店门，熙熙楼的掌柜王于贵愁眉苦脸地迎上来，“玉大人，你说我怎么就摊上这事。” “怎么了？”我有些...
开封第一讲书人阅读 159,967评论 0赞 349
道士缉凶录：失踪的卖姜人
文/不坏的土叔我叫张陵，是天一观的道长。经常有香客问我，道长，这世上最难降的妖魔是什么？我笑而不...
开封第一讲书人阅读 57,344评论 1赞 288
港岛之恋（遗憾婚礼）
正文为了忘掉前任，我火速办了婚礼，结果婚礼上，老公的妹妹穿的比我还像新娘。我一直安慰自己，他们只是感情好，可当我...
茶点故事阅读 66,441评论 6赞 386
恶毒庶女顶嫁案：这布局不是一般人想出来的
文/花漫我一把揭开白布。她就那样静静地躺着，像睡着了一般。火红的嫁衣衬着肌肤如雪。梳的纹丝不乱的头发上，一...
开封第一讲书人阅读 50,492评论 1赞 292
城市分裂传说
那天，我揣着相机与录音，去河边找鬼。笑死，一个胖子当着我的面吹牛，可吹牛的内容都是我干的。我是一名探鬼主播，决...
沈念sama阅读 39,497评论 3赞 412
双鸳鸯连环套：你想象不到人心有多黑
文/苍兰香墨我猛地睁开眼，长吁一口气：“原来是场噩梦啊……” “哼！你这毒妇竟也来了？” 一声冷哼从身侧响起，我...
开封第一讲书人阅读 38,274评论 0赞 269
万荣杀人案实录
序言：老挝万荣一对情侣失踪，失踪者是张志新（化名）和其女友刘颖，没想到半个月后，有当地人在树林里发现了一具尸体，经...
沈念sama阅读 44,732评论 1赞 307
护林员之死
正文独居荒郊野岭守林人离奇死亡，尸身上长有42处带血的脓包…… 初始之章·张勋以下内容为张勋视角年9月15日...
茶点故事阅读 37,008评论 2赞 328
白月光启示录
正文我和宋清朗相恋三年，在试婚纱的时候发现自己被绿了。大学时的朋友给我发了我未婚夫和他白月光在一起吃饭的照片。...
茶点故事阅读 39,184评论 1赞 342
活死人
序言：一个原本活蹦乱跳的男人离奇死亡，死状恐怖，灵堂内的尸体忽然破棺而出，到底是诈尸还是另有隐情，我是刑警宁泽，带...
沈念sama阅读 34,837评论 4赞 337
日本核电站爆炸内幕
正文年R本政府宣布，位于F岛的核电站，受9级特大地震影响，放射性物质发生泄漏。R本人自食恶果不足惜，却给世界环境...
茶点故事阅读 40,520评论 3赞 322
男人毒药：我在死后第九天来索命
文/蒙蒙一、第九天我趴在偏房一处隐蔽的房顶上张望。院中可真热闹，春花似锦、人声如沸。这庄子的主人今日做“春日...
开封第一讲书人阅读 31,156评论 0赞 21
一桩弑父案，背后竟有这般阴谋
文/苍兰香墨我抬头看了看天上的太阳。三九已至，却和暖如春，着一层夹袄步出监牢的瞬间，已是汗流浃背。一阵脚步声响...
开封第一讲书人阅读 32,407评论 1赞 268
情欲美人皮
我被黑心中介骗来泰国打工，没想到刚下飞机就差点儿被人妖公主榨干…… 1. 我叫王不留，地道东北人。一个月前我还...
沈念sama阅读 47,056评论 2赞 365
代替公主和亲
正文我出身青楼，却偏偏与公主长得像，于是被迫代替她去往敌国和亲。传闻我的和亲对象是个残疾皇子，可洞房花烛夜当晚...
茶点故事阅读 44,074评论 2赞 352

iOS-底层原理-类结构分析cache

1.cache中存储的是什么？

2.cache缓存机制

insert()内部关键函数reallocate

4.总结

推荐阅读更多精彩内容