iOS-底层原理-类结构分析cache

1.cache中存储的是什么?

上一篇博客分析了类的isa、superclass、bits,这一篇主要分析cache的缓存机制

1.cache_t 结构体部分代码,用到的成员变量和部分函数

struct cache_t {
private:
    // explicit_atomic 显式原子性,目的是为了能够 保证 增删改查时 线程的安全性
    explicit_atomic<uintptr_t> _bucketsAndMaybeMask;
    //mask(高16位)+bucket(低48位) mask的本质是开辟可以存储bucket_t空间的下标
    // _bucketsAndMaybeMask is a buckets_t pointer in the low 48 bits                                             
    //  _bucketsAndMaybeMask:  低48位是bucket_t pointer即指向bucket_t的指针,高16位是mask  
    union { 
        struct {// 可见bucket_t在cache里面是指针的形式存在的
            explicit_atomic<mask_t>    _maybeMask;  // _maybeMask is unused, the mask is stored in the top 16 bits.
#if __LP64__                                                              // _maybeMask不再使用
            uint16_t                   _flags; //状态标识,用于计算某些值
#endif
            uint16_t                   _occupied; //已经占用单位空间的数量
        };
        explicit_atomic<preopt_cache_t *> _originalPreoptCache;
    };

// 不同架构下掩码的值
#if CACHE_MASK_STORAGE == CACHE_MASK_STORAGE_OUTLINED // MacOS、模拟器
    // _bucketsAndMaybeMask is a buckets_t pointer
    // _maybeMask is the buckets mask

    static constexpr uintptr_t bucketsMask = ~0ul;
    static_assert(!CONFIG_USE_PREOPT_CACHES, "preoptimized caches not supported");
#elif CACHE_MASK_STORAGE == CACHE_MASK_STORAGE_HIGH_16_BIG_ADDRS
    static constexpr uintptr_t maskShift = 48;
    static constexpr uintptr_t maxMask = ((uintptr_t)1 << (64 - maskShift)) - 1;
    static constexpr uintptr_t bucketsMask = ((uintptr_t)1 << maskShift) - 1;
    
    static_assert(bucketsMask >= MACH_VM_MAX_ADDRESS, "Bucket field doesn't have enough bits for arbitrary pointers.");
#if CONFIG_USE_PREOPT_CACHES
    static constexpr uintptr_t preoptBucketsMarker = 1ul;
    static constexpr uintptr_t preoptBucketsMask = bucketsMask & ~preoptBucketsMarker;
#endif
#elif CACHE_MASK_STORAGE == CACHE_MASK_STORAGE_HIGH_16 //64位真机
    // _bucketsAndMaybeMask is a buckets_t pointer in the low 48 bits
    // _maybeMask is unused, the mask is stored in the top 16 bits.
    // _bucketsAndMaybeMask 低48位是bucket指针,高16位是mask

    // How much the mask is shifted by.
    static constexpr uintptr_t maskShift = 48;  //mask在64位中的高16位,要想获取到对应的值需要_bucketsAndMaybeMask里面的值左移48位

    // Additional bits after the mask which must be zero. msgSend
    // takes advantage of these additional bits to construct the value
    // `mask << 4` from `_maskAndBuckets` in a single instruction.
    static constexpr uintptr_t maskZeroBits = 4;

    // The largest mask value we can store.
    //可以存储mask的最大值
    static constexpr uintptr_t maxMask = ((uintptr_t)1 << (64 - maskShift)) - 1; // maxMask最大mask 2^(16-1)
    
    // The mask applied to `_maskAndBuckets` to retrieve the buckets pointer.
    // bucketsMask用来在这_bucketsAndMaybeMask里面获取bucket指针
    static constexpr uintptr_t bucketsMask = ((uintptr_t)1 << (maskShift - maskZeroBits)) - 1;
    
    // Ensure we have enough bits for the buckets pointer.
    static_assert(bucketsMask >= MACH_VM_MAX_ADDRESS,
            "Bucket field doesn't have enough bits for arbitrary pointers.");

     // 以下为private函数
    bool isConstantEmptyCache() const; // 是否位空
    mask_t mask() const; // 获取mask
    void incrementOccupied(); //每插入一个bucket_t,occupied++ --> 标记已经占用几个位置了
    void setBucketsAndMask(struct bucket_t *newBuckets, mask_t newMask); // buckets和mask写入缓存
    void reallocate(mask_t oldCapacity, mask_t newCapacity, bool freeOld); //开辟缓存空间
    void collect_free(bucket_t *oldBuckets, mask_t oldCapacity);//释放缓存空间
public:
    // The following four fields are public for objcdt's use only.
    // objcdt reaches into fields while the process is suspended
    // hence doesn't care for locks and pesky little details like this
    // and can safely use these.

    // public函数
    unsigned capacity() const;//获取capacity 开辟的缓存空间容量
    struct bucket_t *buckets() const; //获取缓存空间的bucket指针
    Class cls() const;
    mask_t occupied() const;// 获取occupied已经占用了多少个缓存空间
    void insert(SEL sel, IMP imp, id receiver);// 向缓存插入sel,imp转成bucket结构体的sel和imp进行存储
};

2.其他函数源码

// bucketsMask用来在这_bucketsAndMaybeMask里面获取bucket指针

    static constexpr uintptr_t bucketsMask = ((uintptr_t)1 << (maskShift - maskZeroBits)) - 1;

buckets()源码获取bucket指针,第一个缓存的bucket的地址

struct bucket_t *cache_t::buckets() const
{
    uintptr_t addr = _bucketsAndMaybeMask.load(memory_order_relaxed);
    return (bucket_t *)(addr & bucketsMask);
}

occupied()源码,获取占用数量,即已经插入多少个bucket

mask_t cache_t::occupied() const
{
    return _occupied;
}

void incrementOccupied()源码

void cache_t::incrementOccupied() 
{
    _occupied++;
}

capacity() 源码 // 获取容量,总共开辟了几个缓存bucket的空间

unsigned cache_t::capacity() const
{
    return mask() ? mask()+1 : 0; 
}

3.cache_t 中重要的成员变量和函数说明

成员变量

1. _bucketsAndMaybeMask

explicit_atomic<uintptr_t> _bucketsAndMaybeMask;

源码本身注释
// _bucketsAndMaybeMask is a buckets_t pointer in the low 48 bits
// _maybeMask is unused, the mask is stored in the top 16 bits.
_bucketsAndMaybeMask (64位) = mask(数值)(高16位) + bucket指针(低48位)
存储的mask用于mask()函数取值,存储的bucket指针cache缓存bucket首地址

2. _maybeMask

explicit_atomic<mask_t>    _maybeMask

64位真机里不再使用

3. _flags

uint16_t  _flags;

状态标识,用于某些函数的计算

掩码
// _bucketsAndMaybeMask is a buckets_t pointer in the low 48 bits
// _maybeMask is unused, the mask is stored in the top 16 bits.

1. maskShift
获取mask需要左移的位数

static constexpr uintptr_t maskShift = 48;  

mask_bucketsAndMaybeMask64位 中的高16位,要想获取到对应的值需要_bucketsAndMaybeMask里面的值左移48位

2. maxMask
最大mask值,1 << (16-1) = 2 ^ 15

static constexpr uintptr_t maxMask = ((uintptr_t)1 << (64 - maskShift)) - 1;

3.bucketsMask
获取bucket的掩码
// The mask applied to _maskAndBuckets to retrieve the buckets pointer.

    static constexpr uintptr_t bucketsMask = ((uintptr_t)1 << (maskShift - maskZeroBits)) - 1;

函数
private

1. isConstantEmptyCache()
判断是否位空的cache

bool isConstantEmptyCache() const;

**2. mask()**
获取mask

mask_t mask() const;

3. incrementOccupied()
_occupied++

void incrementOccupied();

3. setBucketsAndMask()
calloc() 创建新的bucket指针地址,开辟空间的长度是newCapacity * sizeof(bucket_t)sizeof(bucket_t) = 16,比如newCapacity长度为8则创建8个空的bucket_t存储空间,将其首地址bucket指针和mask = newCapacity - 1 写入_bucketsAndMaybeMask缓存数据,同时_occupied = 0,就是创建newCapacity个可以装bucket_t的空间,保存起来备用

void setBucketsAndMask(struct bucket_t *newBuckets, mask_t newMask);

3. reallocate()
开辟新的bucket_t存储空间,如果有旧的bucket_t存储空间,则进行释放

void reallocate(mask_t oldCapacity, mask_t newCapacity, bool freeOld);

4. collect_free()
释放旧的bucket_t存储空,即cache里的_bucketsAndMaybeMask --> buckets()不再指向其开始的存储空间

 void collect_free(bucket_t *oldBuckets, mask_t oldCapacity);

public

1. capacity
获取容量 capacity

unsigned capacity() const;

2. buckets()
获取指向bucket存储空间的首地址指针

struct bucket_t *buckets() const;

3. occupied()
获取occupied,占用数量

mask_t occupied() const;

4. struct bucket_t部分源码,依然是展示成员变量和主要函数

struct bucket_t {
private:
    // IMP-first is better for arm64e ptrauth and no worse for arm64.
    // SEL-first is better for armv7* and i386 and x86_64.
#if __arm64__
    explicit_atomic<uintptr_t> _imp; // 64位真机 _imp和_sel
    explicit_atomic<SEL> _sel;
#else
    explicit_atomic<SEL> _sel;
    explicit_atomic<uintptr_t> _imp;
#endif

public:
    static inline size_t offsetOfSel() { return offsetof(bucket_t, _sel); }
    inline SEL sel() const { return _sel.load(memory_order_relaxed); } // 获取sel

    inline IMP imp(UNUSED_WITHOUT_PTRAUTH bucket_t *base, Class cls) const {
        uintptr_t imp = _imp.load(memory_order_relaxed); // 获取imp
        if (!imp) return nil;
#if CACHE_IMP_ENCODING == CACHE_IMP_ENCODING_PTRAUTH
        SEL sel = _sel.load(memory_order_relaxed);
        return (IMP)
            ptrauth_auth_and_resign((const void *)imp,
                                    ptrauth_key_process_dependent_code,
                                    modifierForSEL(base, sel, cls),
                                    ptrauth_key_function_pointer, 0);
#elif CACHE_IMP_ENCODING == CACHE_IMP_ENCODING_ISA_XOR
        return (IMP)(imp ^ (uintptr_t)cls);
#elif CACHE_IMP_ENCODING == CACHE_IMP_ENCODING_NONE
        return (IMP)imp;
#else
#error Unknown method cache IMP encoding.
#endif
    }

    template <Atomicity, IMPEncoding>
    void set(bucket_t *base, SEL newSel, IMP newImp, Class cls); // 写入sel和imp
};

bucket_t成员变量

1._sel和_imp
sizeof(bucket_t) = 16,两个成员变量 8字节+8字节= 16字节

explicit_atomic<uintptr_t> _imp; // 64位真机 _imp和_sel
explicit_atomic<SEL> _sel;

bucket_t函数

1.sel()
获取sel

inline SEL sel() const { return _sel.load(memory_order_relaxed); }

2. imp()
获取imp

  inline IMP imp(UNUSED_WITHOUT_PTRAUTH bucket_t *base, Class cls) const {}

3. set()
向bucket写入sel和imp

 void set(bucket_t *base, SEL newSel, IMP newImp, Class cls);

2.cache缓存机制

通过上面的cache_t结构体成员变量和方法解析,知道了cache_t存储bucket_t指针,bucket_t里面有俩个成员变量_sel_imp,用来保存方法method_tselimp

而且cache_t 存储bucket_t 并不是像bits存储类信息
method ---> method_list_t
property---> property_list_t
protocol ---> protocol_list_t
ivar ---> ivar_list_t
一样使用数组的形式,而是指针的形式指向存储bucket_t空间的首地址

即不会有bucket_list_t这样的情况出现,这是cache和bits非常大的不同?为什么这么设计,下面通过LLDB调试现象以及cache相关函数的源码分析来解释这一现象

1.cache LLDB调试Xcode截图

LLDB调试来看,cache存储bucket有几点疑问?

第一,bucket会丢失,可以看到并不是所有的方法都缓存到了cache里面
第二,bucket会乱序,并不是按照我们调用方法的顺序进行存储
第三,bucket不是连续存储的,中间有空的bucket存储空间
第四,capacity、mask、occupied三者之间有什么关系,分别是用来做什么的?

为什么会出现上面的三种异常的,这就要看cache存储的底层机制是什么?从insert的源码中寻找答案

LLDB调试-cache-bucket存储.jpeg

LLDB调试对应数据

KCObjcBuild was compiled with optimization - stepping may behave oddly; variables may not be available.
(lldb) p/x LGPerson.class
(Class) $0 = 0x00000001000086d8 LGPerson
(lldb) p (cache_t *) 0x00000001000086e8
(cache_t *) $1 = 0x00000001000086e8
(lldb) p $1->capacity()
(unsigned int) $2 = 8
(lldb) p $1->mask()
(mask_t) $3 = 7
(lldb) p $1->occupied()
(mask_t) $4 = 7
(lldb) p $1->buckets()
(bucket_t *) $5 = 0x0000000100a9bd50
(lldb) p $1->buckets()[0].sel() 
(SEL) $6 = "sayWorld333"
(lldb) p $1->buckets()[0].imp(nil, LGPerson.class) 
(IMP) $7 = 0x000000010000369c (KCObjcBuild`-[LGPerson sayWorld333] at main.m:100)
(lldb) p $1->buckets()[1].sel() 
(SEL) $8 = "sayWorld888"
(lldb) p $1->buckets()[1].imp(nil, LGPerson.class) 
(IMP) $9 = 0x0000000100003700 (KCObjcBuild`-[LGPerson sayWorld888] at main.m:106)
(lldb) p $1->buckets()[2].sel() 
(SEL) $10 = "sayWorld666"
(lldb) p $1->buckets()[2].imp(nil, LGPerson.class) 
(IMP) $11 = 0x00000001000036d8 (KCObjcBuild`-[LGPerson sayWorld666] at main.m:104)
(lldb) p $1->buckets()[3].sel() 
(SEL) $12 = "sayWorld444"
(lldb) p $1->buckets()[3].imp(nil, LGPerson.class) 
(IMP) $13 = 0x00000001000036b0 (KCObjcBuild`-[LGPerson sayWorld444] at main.m:101)
(lldb) p $1->buckets()[4].sel() 
(SEL) $14 = "sayWorld222"
(lldb) p $1->buckets()[4].imp(nil, LGPerson.class) 
(IMP) $15 = 0x0000000100003688 (KCObjcBuild`-[LGPerson sayWorld222] at main.m:99)
(lldb) p $1->buckets()[5].sel() 
(SEL) $16 = (null)
(lldb) p $1->buckets()[5].imp(nil, LGPerson.class) 
(IMP) $17 = 0x0000000000000000
(lldb) p $1->buckets()[6].sel() 
(SEL) $18 = "sayWorld777"
(lldb) p $1->buckets()[6].imp(nil, LGPerson.class) 
(IMP) $19 = 0x00000001000036ec (KCObjcBuild`-[LGPerson sayWorld777] at main.m:105)
(lldb) p $1->buckets()[7].sel() 
(SEL) $20 = "sayWorld555"
(lldb) p $1->buckets()[7].imp(nil, LGPerson.class) 
(IMP) $21 = 0x00000001000036c4 (KCObjcBuild`-[LGPerson sayWorld555] at main.m:102)
(lldb) p $1->buckets()[8].sel() 
(SEL) $22 = ""
(lldb) p $1->buckets()[8].imp(nil, LGPerson.class) 
(IMP) $23 = 0x0000000000000000
(lldb) p ($1 + 1)
(cache_t *) $24 = 0x00000001000086f8
(lldb) p ($1 + 2)
(cache_t *) $25 = 0x0000000100008708
(lldb) p ($1 + 3)
(cache_t *) $26 = 0x0000000100008718
(lldb) p ($1 + 4)
(cache_t *) $27 = 0x0000000100008728
(lldb) p ($1 + 5)
(cache_t *) $28 = 0x0000000100008738
(lldb) p ($1 + 6)
(cache_t *) $29 = 0x0000000100008748
(lldb) p ($1 + 7)
(cache_t *) $30 = 0x0000000100008758
(lldb) p ($1 + 8)
(cache_t *) $31 = 0x0000000100008768
(lldb) p ($1 + 9)
(cache_t *) $32 = 0x0000000100008778
(lldb) p ($1 + 10)
(cache_t *) $33 = 0x0000000100008788
(lldb) 

2.insert()函数源码

insert()函数的几个关键点

1.初始化4个(arm64情况下)空的bucket存储空间,newBucket的指针和mask写入_bucketsAndMaybeMask,同时free掉旧的bucket存储空间

2.存储空间扩容,扩容的规则和算法是怎么实现的?

3.存储bucket的位置是怎么确定的?

4.如何将sel和imp写入到bucket里面去?

cache insert调用堆栈


cache insert调用堆栈.jpeg

insert()需要的一些枚举值

/* Initial cache bucket count. INIT_CACHE_SIZE must be a power of two. */
enum {
#if CACHE_END_MARKER || (__arm64__ && !__LP64__)
    // When we have a cache end marker it fills a bucket slot, so having a
    // initial cache size of 2 buckets would not be efficient when one of the
    // slots is always filled with the end marker. So start with a cache size
    // 4 buckets.
    INIT_CACHE_SIZE_LOG2 = 2,
#else
    // Allow an initial bucket size of 2 buckets, since a large number of
    // classes, especially metaclasses, have very few imps, and we support
    // the ability to fill 100% of the cache before resizing.
    INIT_CACHE_SIZE_LOG2 = 1,
#endif
    INIT_CACHE_SIZE      = (1 << INIT_CACHE_SIZE_LOG2),
    MAX_CACHE_SIZE_LOG2  = 16,
    MAX_CACHE_SIZE       = (1 << MAX_CACHE_SIZE_LOG2),
    FULL_UTILIZATION_CACHE_SIZE_LOG2 = 3,
    FULL_UTILIZATION_CACHE_SIZE = (1 << FULL_UTILIZATION_CACHE_SIZE_LOG2),
};

cache_fill_ratio()

static inline mask_t cache_fill_ratio(mask_t capacity) {
    return capacity * 7 / 8;
}
#define CACHE_END_MARKER 0

cache_hash()

// Class points to cache. SEL is key. Cache buckets store SEL+IMP.
// Caches are never built in the dyld shared cache.

static inline mask_t cache_hash(SEL sel, mask_t mask) 
{
    uintptr_t value = (uintptr_t)sel;
#if CONFIG_USE_PREOPT_CACHES
    value ^= value >> 7;
#endif
    return (mask_t)(value & mask);
}

cache_next()

#if CACHE_END_MARKER
static inline mask_t cache_next(mask_t i, mask_t mask) {
    return (i+1) & mask;
}
#elif __arm64__
static inline mask_t cache_next(mask_t i, mask_t mask) {
    return i ? i-1 : mask;
}
#else
#error unexpected configuration
#endif

insert()函数源码

void cache_t::insert(SEL sel, IMP imp, id receiver)
{
    runtimeLock.assertLocked();

    // Never cache before +initialize is done
    if (slowpath(!cls()->isInitialized())) {
        return;
    }

    if (isConstantOptimizedCache()) {
        _objc_fatal("cache_t::insert() called with a preoptimized cache for %s",
                    cls()->nameForLogging());
    }

#if DEBUG_TASK_THREADS
    return _collecting_in_critical();
#else
#if CONFIG_USE_CACHE_LOCK
    mutex_locker_t lock(cacheUpdateLock);
#endif

    ASSERT(sel != 0 && cls()->isInitialized());

    // Use the cache as-is if until we exceed our expected fill ratio.
    mask_t newOccupied = occupied() + 1;
    unsigned oldCapacity = capacity(), capacity = oldCapacity;
    if (slowpath(isConstantEmptyCache())) { //初始化 capacity  = 4
        // Cache is read-only. Replace it.                    // capacity表示初始化多个bucket空间,容量
        if (!capacity) capacity = INIT_CACHE_SIZE; //  INIT_CACHE_SIZE =4   --->  1 << 2 == 4
        reallocate(oldCapacity, capacity, /* freeOld */false);  // 开辟capacity 个bucket存储空间,
        //bucket首地址并与  mask一起写入_bucketsAndMaybeMask
    }
    else if (fastpath(newOccupied + CACHE_END_MARKER <= cache_fill_ratio(capacity))) {
        // Cache is less than 3/4 or 7/8 full. Use it as-is. 
        // newOccupied + 0 <= capacity * (7/8),如果newOccupied <= 7/8 * capacity则什么都不做,即不扩容
        // capacity = 4,capacity * (7/8) = 3.5,capacity = 8,capacity * (7/8) = 7
    }
#if CACHE_ALLOW_FULL_UTILIZATION
    else if (capacity <= FULL_UTILIZATION_CACHE_SIZE && newOccupied + CACHE_END_MARKER <= capacity) {
        // Allow 100% cache utilization for small buckets. Use it as-is.
        //  capacity <= 8 && newOccupied + 0 <= capacity,即newOccupied = 4的时候什么也不做
    }
#endif
    else {
        capacity = capacity ? capacity * 2 : INIT_CACHE_SIZE; // 扩容2倍,否则两倍扩容,
        if (capacity > MAX_CACHE_SIZE) {
            capacity = MAX_CACHE_SIZE;
        }
        reallocate(oldCapacity, capacity, true); // true ---> free 掉旧的bucket存储空间
    }

    bucket_t *b = buckets(); // 获取bucket指针,首地址
    mask_t m = capacity - 1; // m ---> mask 与capacity的关系是 mask = capacity - 1
    mask_t begin = cache_hash(sel, m); //哈希算法函数利用sel的唯一性大概率begin不会重复
    mask_t i = begin;   // begin保持最初的哈希值,即下标,i动态变化寻找空的bucket存储空间存入sel和imp
// 哈希算法  uintptr_t value = (uintptr_t)sel;  ---> (value & mask)
// 由于m =  capacity - 1,所以任何值按位与上都是0-7之间
//即 capacity = 4, begin = 0-->3,capacity = 8, began = 0-->7


    // Scan for the first unused slot and insert there.
    // There is guaranteed to be an empty slot.
    do {
        if (fastpath(b[i].sel() == 0)) { // == 0表示位空,则写入sel和imp
            incrementOccupied(); // _occupied++
            b[i].set<Atomic, Encoded>(b, sel, imp, cls());// bucket写入sel和imp
            return; //写入后返回
        }
        if (b[i].sel() == sel) { //如果已经存储了sel则返回
            // The entry was added to the cache by some other thread
            // before we grabbed the cacheUpdateLock.
            return;
        }
    } while (fastpath((i = cache_next(i, m)) != begin));
// 如果最初的哈希值begin对应的bucket[i]有值且bucket[i]不等于要写入的sel,称为哈希冲突,则进行二次哈希
// static inline mask_t cache_next(mask_t i, mask_t mask) {
//    return i ? i-1 : mask;
//}
// 假设 i = 0的位置写入了a的sel,假设capacity = 4,则 m = 3, 后面又b,c,d的sel和imp需要写入
// { a_sel },{   },{   },{   }
// 此时如果b的哈希值 在 1-3之间,则走第一个if判断直接在对应的bucket[i] = bucket[begin]位置上写入sel和imp
// cache_next是第二次哈希,用于处理第一次哈希下标begin的位置上,bucket已经写入sel和imp了,假设b的哈希值begin = 0,
// 则i = cache_nex(i, m) = 3,3! = 0,满足while条件执行两个if分支,第一个if在b[3]上写入sel和imp
// { a_sel, a_imp},{   },{   },{ b_sel, b_imp}
// 此时c放来了,begin = 3,cache_next后 i = 2,第一个if在b[2]上写入sel和imp
// { a_sel, a_imp},{   },{  c_sel, c_imp },{ b_sel, b_imp}
// 此时c放来了,begin = 0,cache_next后 i = 3,再次cache_next后 i = 2,第一个if在b[2]上写入sel和imp
// { a_sel, a_imp},{   },{  c_sel, c_imp },{ b_sel, b_imp}
// 总之,如果没有哈希冲突b[i]位置直接写入sel和imp,如果有哈希冲突的话
// i = 1   ---> i = 0
// i = 0   ---> i = 3
// i = 3   ---> i = 2
// i = 2   ---> i = 1
// 由于容量capacity肯定是够存储的,加上cache_next 这种算法,一定可以找到空的bucket写入sel和imp
// i = 1   ---> i = 0
// i = 0   ---> i = m
// i = m   ---> i = m - 1
// i = m - 1   ---> i = m - 2


    bad_cache(receiver, (SEL)sel);
#endif // !DEBUG_TASK_THREADS
}

insert()函数主要流程

1.初始化 ---> 2.扩容 ---> 3.bucket写入准备 ---> 4.do-while循环写入sel和imp到bucket

1.初始化
确定capacity和开辟与capacity对应的新的空的bucket存储空间bucket首地址指针mask写入_bucketsAndMaybeMask,由于没有旧的bucket指针,所以,不会free旧的bucket存储空间

2.扩容
扩容条件达到后, 即newOccupied > capacity * (7/8)
扩容两倍capacity = capacity * 2 ,开辟与capacity对应的新的空的bucket存储空间bucket首地址指针mask写入_bucketsAndMaybeMask,并且free旧的bucket存储空间

3.bucket写入准备
do - while写入之前准备数据
bucket_t *b = buckets(); 拿到bucket指针
mask_t m = capacity - 1; 确定下标范围 0 - m
mask_t begin = cache_hash(sel, m); sel和m哈希后得到初始存储下标begin,0-m之间随机的
mask_t i = begin; i变动下标赋初值

4.do-while循环写入sel和imp到bucket
do - while循环写入
b[i].sel() == 0,没有写入sel和imp,则直接写入
b[i].sel() == sel,要写入的sel和imp已经写入到了b[i]的位置上,则什么都不做
b[i]已经写入sel和imp并且不是要写入的sel和imp,则二次哈希寻找其他可以写入的位置
由于cache_next()的算法和capacity足够大,一定会找到可以写入sel和imp的bucket

LLDB调试的几个疑点解答

第一,bucket丢失,可以看到并不是所有的方法都缓存到了cache里面
原因是扩容后会free旧的bucket存储空间,只缓存触发扩容的这个方法,所以,bucket会丢失

第二,bucket乱序,并不是按照我们调用方法的顺序进行存储
因为b[i]中的缓存下标是用sel和mask哈希后的值,如果哈希冲突还要进行二次哈希,所以是一个随机值,在0-mask之间

第三,bucket不是连续存储的,中间有空的bucket存储空间
同样,b[i]下标是sel和mask哈希后或者二次哈希的,存储顺序并不是调用顺序,有空的是因为reallocate()函数,开辟capacity新的、空的bucket存储空间,然后将bucket指针和mask写入到_bucketsAndMaybeMask,存储是随机的所以会有空的bucket存在

第四,capacity、mask、occupied三者之间有什么关系,分别是用来做什么的?
capacity容量,即创建多少个空的bucket空间
mask掩码,确定存储下标, 0-mask之间,mask = capacity - 1
occupied,已经占用多少个bucket存储空间,即写入了多少次sel和imp

insert()内部关键函数reallocate

1. reallocate()
核心三点
第一,allocateBuckets() ---> calloc()开辟ewCapacitybucket存储空间
第二,setBucketsAndMask() ---> 新开辟的bucket存储空间和newCapacity写入_bucketsAndMaybeMask
第三,collect_free() ---> collect_free旧的bucket存储空间

第一和第二容易理解,开辟对应个数的bucket存储空间,保存起来

void cache_t::reallocate(mask_t oldCapacity, mask_t newCapacity, bool freeOld)
{
    bucket_t *oldBuckets = buckets();
    bucket_t *newBuckets = allocateBuckets(newCapacity); //临时变量
    // 开辟newCapacity个bucket存储空间,新的空的,newBuckets首地址指针
    // Cache's old contents are not propagated. 
    // This is thought to save cache memory at the cost of extra cache fills.
    // fixme re-measure this

    ASSERT(newCapacity > 0);
    ASSERT((uintptr_t)(mask_t)(newCapacity-1) == newCapacity-1);

    setBucketsAndMask(newBuckets, newCapacity - 1); //存储newBuckets,空格,空格,没有存储sel和imp
    //newBuckets首地址指针和mask写入_bucketsAndMaybeMask
    if (freeOld) {
        collect_free(oldBuckets, oldCapacity); //旧的free掉,所以会有丢失
    }
}

allocateBuckets()
calloc()开辟newCapacitybucket存储空间

bucket_t *cache_t::allocateBuckets(mask_t newCapacity)
{
    if (PrintCaches) recordNewCache(newCapacity);

    return (bucket_t *)calloc(bytesForCapacity(newCapacity), 1); // calloc开辟内存,转换为bucket_t *
}

**```bytesForCapacity()```**
size_t cache_t::bytesForCapacity(uint32_t cap)
{
    return sizeof(bucket_t) * cap;
}

setBucketsAndMask()
新的空的newBuckets首地址指针和mask存入_bucketsAndMaybeMask成员变量,同时_occupied = 0 清零

void cache_t::setBucketsAndMask(struct bucket_t *newBuckets, mask_t newMask)
{
    uintptr_t buckets = (uintptr_t)newBuckets;
    uintptr_t mask = (uintptr_t)newMask;

    ASSERT(buckets <= bucketsMask);
    ASSERT(mask <= maxMask);
   
// 新的bucket指针,newBuckets和mask写入_bucketsAndMaybeMask
// (uintptr_t)newMask << maskShift,mask左移48位存储在高16位
// newMask 和newBuckets 或 | 操作,各自存储相互不影响,
// newMask低48位是0,newBuckets高16位是0
    _bucketsAndMaybeMask.store(((uintptr_t)newMask << maskShift) | (uintptr_t)newBuckets, memory_order_relaxed);
    _occupied = 0; // occupied 清零
}

mask()
获取mask ---> _bucketsAndMaybeMask成员变量通过maskShift掩码获取``mask```

mask_t cache_t::mask() const
{
    uintptr_t maskAndBuckets = _bucketsAndMaybeMask.load(memory_order_relaxed);
    return maskAndBuckets >> maskShift; // 通过maskShift获取mask,高16位的值 
}

2. collect_free()

触发节点

free旧的bucket存储空间的触发节点是扩容后,开辟新的free旧的,并不一定真的会free掉,只是cache的bucket指针不再指向该空间,实现了cache_::buckets() 逻辑上面的剥离

核心逻辑

1.malloc()或者realloc()标记这段地址空间,已经被使用,系统开辟空间不会占用到该段内存

2.每次扩容后地址空间累计,一段一段的累计,用一个二级指针存储旧的buckets指针,用这一次旧的bucket缓存在二级指针的最后,,garbage_count++标记二级指针的下标
也就是数组的最后,比如第一次8个地址需要标记已经被使用,则bucket指针指向二级指针的下标index = 7的位置,第二次16个,则指向index = 15的位置,真正触发c函数free()地址空间的时候,倒序输出

由于有旧的bucket指针做指向,所以,bucket垃圾回收空间就是扩容之前的旧的bucket指向的空间,征用被标记为已经被使用且没有赋初值,所以依然保存原值,即地址空间内的值是真实的扩容之前cache 的buckets指向的那段内存

3.符合条件触发真正的c语言函数 free(dead); free掉指向bucket的指针,这里的条件有两个
第一,是累计标记为被使用的空间 >= 32K = 32 * 1024,小于32K,则return返回
第二,objc_msgSend或者其他的 cache reader 没有正在访问该段内存且有可能会用到里面的buckets信息

简单理解就是超长垃圾回收存储空间的阈值,该段内存没有再继续被使用了

第一点的源码内英文注释
// Done if the garbage is not full

第二点的源码内英文注释
// Synchronize collection with objc_msgSend and other cache readers
// objc_msgSend (or other cache reader) is currently looking in
// the cache and might still be using some garbage.

只有两个条件同时满足才会触发真正的c语言函数free(),其他情况都是标记为已经被使用,系统开辟空间不会占用到该段内存,cachebuckets不再指向,默默的备用状态

1.collect_free()

void cache_t::collect_free(bucket_t *data, mask_t capacity)
{
#if CONFIG_USE_CACHE_LOCK
    cacheUpdateLock.assertLocked();
#else
    runtimeLock.assertLocked();
#endif

    if (PrintCaches) recordDeadCache(capacity);

    // 旧的bucket存储空间被标记为已经使用,malloc(),realloc()
    _garbage_make_room ();

    // 垃圾回收空间累加器累计本次旧的buckets存储空间的长度,
    // 这里是字节数,不是sizeof(bucket_t)的数值,即sizeof(bucket_t) * capacity
    garbage_byte_size += cache_t::bytesForCapacity(capacity);

    // 旧的bucket指针,指向最后面,用于collectNolock倒序free指针
    garbage_refs[garbage_count++] = data;

    // 有条件调用c语言函数 free(),真正释放掉旧的bucket指向的空间,不满足条件则只是标记为被使用
    // 地址空间内的值,依然是旧的bucket存储的sel和imp,转为默默的备用状态
    cache_t::collectNolock(false);
}
// capacity of initial garbage_refs
enum {
    INIT_GARBAGE_COUNT = 128 // 128 / sizeof(bucket_t) = 128 / 16 = 8,刚好8个字节
};

2.第一次扩容
arm64情况下,第一次扩容是8个字节,reallocate(oldCapacity, capacity, true);最后一个传true表示需要collect_free ---> /* freeOld */

else { // 满足扩容条件
        capacity = capacity ? capacity * 2 : INIT_CACHE_SIZE; // 扩容2倍
        if (capacity > MAX_CACHE_SIZE) {
            capacity = MAX_CACHE_SIZE;
      }
      reallocate(oldCapacity, capacity, true); //开辟新的buckets空间,collect_free旧的buckets空间
}

2.第一次扩容触发节点

// amount of memory represented by all refs in the garbage
static size_t garbage_byte_size = 0; // 垃圾回收空间累加器,
//标记为被使用地址空间的长度字节数 

// do not empty the garbage until garbage_byte_size gets at least this big
static size_t garbage_threshold = 32*1024; // 最大的垃圾回收阈值,32k,小于32k则什么都不做

// table of refs to free
static bucket_t **garbage_refs = 0; // 二级指针,指向指针的指针

// current number of refs in garbage_refs
static size_t garbage_count = 0; //累计地址空间下标

// capacity of current garbage_refs
static size_t garbage_max = 0; //标记为被使用最大的字节数,这里不是sizeof(bucket_t)的数量

// capacity of initial garbage_refs
enum {
    INIT_GARBAGE_COUNT = 128 // 128 / sizeof(bucket_t) = 128 / 16 = 8,刚好8个字节,第次刚好8个字节
};

3._garbage_make_room()
旧的bucket指向空间标记为被使用

static void _garbage_make_room(void)
{
    static int first = 1;

    // Create the collection table the first time it is needed
    if (first) //第一次,8个字节空间被标记为已经被使用
    {            // INIT_GARBAGE_COUNT = 128,   128 / sizeof(bucket_t) = 128 / 16 = 8,刚好8个字节
        first = 0;
        garbage_refs = (bucket_t**)
            malloc(INIT_GARBAGE_COUNT * sizeof(void *));
        garbage_max = INIT_GARBAGE_COUNT;// garbage_max垃圾回收存储空间最大值赋初值
    }

    // Double the table if it is full
    else if (garbage_count == garbage_max)
    {
        garbage_refs = (bucket_t**)
            realloc(garbage_refs, garbage_max * 2 * sizeof(void *));
        garbage_max *= 2; // garbage_max垃圾回收存储空间最大值每次扩容两倍
                                          // bucket_t *newBuckets = allocateBuckets(newCapacity); 
                                          // 刚好和cache bucket两倍扩容完全对应上,完美标记为被使用,指针地址安全
                                          // 由于有旧的bucket指针做指向,所以,bucket垃圾回收空间就是扩容之前的
                                          // 旧的bucket指向的空间,征用被标记为已经被使用且没有赋初值,所以依然保存原值
                                          // 即地址空间内是真实的扩容之前的cache 的buckets指向的那段内存
    }
}

4. collectNolock()
有条件触发真正的c语言函数free(),释放旧的buckets指向的空间,倒序释放

void cache_t::collectNolock(bool collectALot)
{
#if CONFIG_USE_CACHE_LOCK
    cacheUpdateLock.assertLocked();
#else
    runtimeLock.assertLocked();
#endif

    // Done if the garbage is not full
    if (garbage_byte_size < garbage_threshold  &&  !collectALot) {
        return; // collectALot = false,garbage_byte_size垃圾回收空间累加器
                    //小于垃圾回收阈值32k,则返回 ,什么都不做
    }

    // Synchronize collection with objc_msgSend and other cache readers
    if (!collectALot) { // collectALot = false,指向分支
        if (_collecting_in_critical ()) { // 是否处于objc_msgSend (or other cache reader)
// 正在查看或者有可能使用垃圾回收空间的数据的临界状态,如果是则返回,什么都不做
            // objc_msgSend (or other cache reader) is currently looking in
            // the cache and might still be using some garbage.
            if (PrintCaches) {
                _objc_inform ("CACHES: not collecting; "
                              "objc_msgSend in progress");
            }
            return;
        }
    } 
    else {
        // No excuses.
        while (_collecting_in_critical()) 
            ;
    }

    // No cache readers in progress - garbage is now deletable

    // Log our progress
    if (PrintCaches) {
        cache_collections++;
        _objc_inform ("CACHES: COLLECTING %zu bytes (%zu allocations, %zu collections)", garbage_byte_size, cache_allocations, cache_collections);
    }
    
// 超过阈值32K并且没有被使用,则真正的会释放掉,调用c语言的free()函数
    // Dispose all refs now in the garbage
    // Erase each entry so debugging tools don't see stale pointers.
    while (garbage_count--) { // 倒序输出
        auto dead = garbage_refs[garbage_count]; // 去除指针
        garbage_refs[garbage_count] = nil; // 指针= nil,避免野指针
        free(dead); // 释放指向的空间
    }
    
    // Clear the garbage count and total size indicator
    garbage_count = 0; //二级指针index下标清零
    garbage_byte_size = 0; // 垃圾回收空间累加器清零

    if (PrintCaches) {
        size_t I;
        size_t total_count = 0;
        size_t total_size = 0;

        for (i = 0; i < countof(cache_counts); i++) {
            int count = cache_counts[I];
            int slots = 1 << I;
            size_t size = count * slots * sizeof(bucket_t);

            if (!count) continue;

            _objc_inform("CACHES: %4d slots: %4d caches, %6zu bytes", 
                         slots, count, size);

            total_count += count;
            total_size += size;
        }

        _objc_inform("CACHES:      total: %4zu caches, %6zu bytes", 
                     total_count, total_size);
    }
}

4.总结

cache insert 整体流程图


cache--insert--sel&imp.jpg
最后编辑于
©著作权归作者所有,转载或内容合作请联系作者
  • 序言:七十年代末,一起剥皮案震惊了整个滨河市,随后出现的几起案子,更是在滨河造成了极大的恐慌,老刑警刘岩,带你破解...
    沈念sama阅读 214,377评论 6 496
  • 序言:滨河连续发生了三起死亡事件,死亡现场离奇诡异,居然都是意外死亡,警方通过查阅死者的电脑和手机,发现死者居然都...
    沈念sama阅读 91,390评论 3 389
  • 文/潘晓璐 我一进店门,熙熙楼的掌柜王于贵愁眉苦脸地迎上来,“玉大人,你说我怎么就摊上这事。” “怎么了?”我有些...
    开封第一讲书人阅读 159,967评论 0 349
  • 文/不坏的土叔 我叫张陵,是天一观的道长。 经常有香客问我,道长,这世上最难降的妖魔是什么? 我笑而不...
    开封第一讲书人阅读 57,344评论 1 288
  • 正文 为了忘掉前任,我火速办了婚礼,结果婚礼上,老公的妹妹穿的比我还像新娘。我一直安慰自己,他们只是感情好,可当我...
    茶点故事阅读 66,441评论 6 386
  • 文/花漫 我一把揭开白布。 她就那样静静地躺着,像睡着了一般。 火红的嫁衣衬着肌肤如雪。 梳的纹丝不乱的头发上,一...
    开封第一讲书人阅读 50,492评论 1 292
  • 那天,我揣着相机与录音,去河边找鬼。 笑死,一个胖子当着我的面吹牛,可吹牛的内容都是我干的。 我是一名探鬼主播,决...
    沈念sama阅读 39,497评论 3 412
  • 文/苍兰香墨 我猛地睁开眼,长吁一口气:“原来是场噩梦啊……” “哼!你这毒妇竟也来了?” 一声冷哼从身侧响起,我...
    开封第一讲书人阅读 38,274评论 0 269
  • 序言:老挝万荣一对情侣失踪,失踪者是张志新(化名)和其女友刘颖,没想到半个月后,有当地人在树林里发现了一具尸体,经...
    沈念sama阅读 44,732评论 1 307
  • 正文 独居荒郊野岭守林人离奇死亡,尸身上长有42处带血的脓包…… 初始之章·张勋 以下内容为张勋视角 年9月15日...
    茶点故事阅读 37,008评论 2 328
  • 正文 我和宋清朗相恋三年,在试婚纱的时候发现自己被绿了。 大学时的朋友给我发了我未婚夫和他白月光在一起吃饭的照片。...
    茶点故事阅读 39,184评论 1 342
  • 序言:一个原本活蹦乱跳的男人离奇死亡,死状恐怖,灵堂内的尸体忽然破棺而出,到底是诈尸还是另有隐情,我是刑警宁泽,带...
    沈念sama阅读 34,837评论 4 337
  • 正文 年R本政府宣布,位于F岛的核电站,受9级特大地震影响,放射性物质发生泄漏。R本人自食恶果不足惜,却给世界环境...
    茶点故事阅读 40,520评论 3 322
  • 文/蒙蒙 一、第九天 我趴在偏房一处隐蔽的房顶上张望。 院中可真热闹,春花似锦、人声如沸。这庄子的主人今日做“春日...
    开封第一讲书人阅读 31,156评论 0 21
  • 文/苍兰香墨 我抬头看了看天上的太阳。三九已至,却和暖如春,着一层夹袄步出监牢的瞬间,已是汗流浃背。 一阵脚步声响...
    开封第一讲书人阅读 32,407评论 1 268
  • 我被黑心中介骗来泰国打工, 没想到刚下飞机就差点儿被人妖公主榨干…… 1. 我叫王不留,地道东北人。 一个月前我还...
    沈念sama阅读 47,056评论 2 365
  • 正文 我出身青楼,却偏偏与公主长得像,于是被迫代替她去往敌国和亲。 传闻我的和亲对象是个残疾皇子,可洞房花烛夜当晚...
    茶点故事阅读 44,074评论 2 352

推荐阅读更多精彩内容