iOS 底层探索：类的加载下（分类）

iOS 底层探索：学习大纲 OC篇

前言

在上篇文章中，分析了类的懒加载和非懒加载的情况，在load_images时会调用loadAllCategories()方法加载分类，那么分类的加载是如何实现的呢？接下来我们就分析下分类的加载及其本质。
准备：调试代码下载

一、分类的本质

先写一个分类


@interface LGPerson (LG)

@property (nonatomic, copy) NSString *cate_name;
@property (nonatomic, assign) int cate_age;

- (void)cate_instanceMethod1;
- (void)cate_instanceMethod3;
- (void)cate_instanceMethod2;
+ (void)cate_sayClassMethod;

@end

@implementation LGPerson (LG)

- (void)cate_instanceMethod1{
    NSLog(@"%s",__func__);
}

- (void)cate_instanceMethod3{
    NSLog(@"%s",__func__);
}

- (void)cate_instanceMethod2{
    NSLog(@"%s",__func__);
}

+ (void)cate_sayClassMethod{
    NSLog(@"%s",__func__);
}
@end

之前我们通过Clang探索过类的本质，同样我们对分类进行Clang 如下：.打开终端，cd到文件目录下，执行clang -rewrite-objc main.m -o main.cpp.利用clang将main.m编译成 main.cpp ，搜索LGPerson。

// @interface LGPerson (LG)

// @property (nonatomic, copy) NSString *cate_name;
// @property (nonatomic, assign) int cate_age;

// - (void)cate_instanceMethod1;
// - (void)cate_instanceMethod3;
// - (void)cate_instanceMethod2;
// + (void)cate_sayClassMethod;

/* @end */


// @implementation LGPerson (LG)


static void _I_LGPerson_LG_cate_instanceMethod1(LGPerson * self, SEL _cmd) {
    NSLog((NSString *)&__NSConstantStringImpl__var_folders_hr_l_56yp8j4y11491njzqx6f880000gn_T_main_f129fc_mi_0,__func__);
}


static void _I_LGPerson_LG_cate_instanceMethod3(LGPerson * self, SEL _cmd) {
    NSLog((NSString *)&__NSConstantStringImpl__var_folders_hr_l_56yp8j4y11491njzqx6f880000gn_T_main_f129fc_mi_1,__func__);
}


static void _I_LGPerson_LG_cate_instanceMethod2(LGPerson * self, SEL _cmd) {
    NSLog((NSString *)&__NSConstantStringImpl__var_folders_hr_l_56yp8j4y11491njzqx6f880000gn_T_main_f129fc_mi_2,__func__);
}


static void _C_LGPerson_LG_cate_sayClassMethod(Class self, SEL _cmd) {
    NSLog((NSString *)&__NSConstantStringImpl__var_folders_hr_l_56yp8j4y11491njzqx6f880000gn_T_main_f129fc_mi_3,__func__);
}
// @end

可以看出：分类的属性没有像类的属性一样被编译成结构体NSObject_IMPL ，可参考 iOS 底层探索：isa与类关联的原理 ,继续搜索LGPerson

extern "C" __declspec(dllimport) struct _class_t OBJC_CLASS_$_LGPerson;

static struct _category_t _OBJC_$_CATEGORY_LGPerson_$_LG __attribute__ ((used, section ("__DATA,__objc_const"))) = 
{
    "LGPerson",
    0, // &OBJC_CLASS_$_LGPerson,
    (const struct _method_list_t *)&_OBJC_$_CATEGORY_INSTANCE_METHODS_LGPerson_$_LG,
    (const struct _method_list_t *)&_OBJC_$_CATEGORY_CLASS_METHODS_LGPerson_$_LG,
    0,
    (const struct _prop_list_t *)&_OBJC_$_PROP_LIST_LGPerson_$_LG,

};
static void OBJC_CATEGORY_SETUP_$_LGPerson_$_LG(void ) {
    _OBJC_$_CATEGORY_LGPerson_$_LG.cls = &OBJC_CLASS_$_LGPerson;
}

可以看到_category_t 这个结构体：


// 分类 : 方法 - attachtoclass
struct _category_t {
    const char *name;
    struct _class_t *cls;
    const struct _method_list_t *instance_methods; // 对象方法
    const struct _method_list_t *class_methods;// 类方法
    const struct _protocol_list_t *protocols;// 协议
    const struct _prop_list_t *properties;// 属性
};

在objc源码中查看：

struct category_t {
    const char *name;
    classref_t cls;
    struct method_list_t *instanceMethods;
    struct method_list_t *classMethods;
    struct protocol_list_t *protocols;
    struct property_list_t *instanceProperties;
    // Fields below this point are not always present on disk.
    struct property_list_t *_classProperties;

    method_list_t *methodsForMeta(bool isMeta) {
        if (isMeta) return classMethods;
        else return instanceMethods;
    }

    property_list_t *propertiesForMeta(bool isMeta, struct header_info *hi);
    
    protocol_list_t *protocolsForMeta(bool isMeta) {
        if (isMeta) return nullptr;
        else return protocols;
    }
};

从源码基本可以看出我们平时使用categroy的方式，对象方法，类方法，协议，和属性都可以找到对应的存储方式。并且我们发现分类结构体中是不存在成员变量的，因此分类中是不允许添加成员变量的。

我们再看看分类的方法列表里有没有属性的get、set方法：


static struct /*_method_list_t*/ { 方法列表
    unsigned int entsize;  // sizeof(struct _objc_method)
    unsigned int method_count;
    struct _objc_method method_list[3];
} _OBJC_$_CATEGORY_INSTANCE_METHODS_LGPerson_$_LG __attribute__ ((used, section ("__DATA,__objc_const"))) = {
    sizeof(_objc_method),
    3,
 // sel+签名+地址
    {{(struct objc_selector *)"cate_instanceMethod1", "v16@0:8", (void *)_I_LGPerson_LG_cate_instanceMethod1},
    {(struct objc_selector *)"cate_instanceMethod3", "v16@0:8", (void *)_I_LGPerson_LG_cate_instanceMethod3},
    {(struct objc_selector *)"cate_instanceMethod2", "v16@0:8", (void *)_I_LGPerson_LG_cate_instanceMethod2}}
};

static struct /*_method_list_t*/ {
    unsigned int entsize;  // sizeof(struct _objc_method)
    unsigned int method_count;
    struct _objc_method method_list[1];
} _OBJC_$_CATEGORY_CLASS_METHODS_LGPerson_$_LG __attribute__ ((used, section ("__DATA,__objc_const"))) = {
    sizeof(_objc_method),
    1,
    {{(struct objc_selector *)"cate_sayClassMethod", "v16@0:8", (void *)_C_LGPerson_LG_cate_sayClassMethod}}
};


extern "C" __declspec(dllimport) struct _class_t OBJC_CLASS_$_LGPerson;

static struct _category_t _OBJC_$_CATEGORY_LGPerson_$_LG __attribute__ ((used, section ("__DATA,__objc_const"))) = 
{
    "LGPerson",
    0, // &OBJC_CLASS_$_LGPerson,
    (const struct _method_list_t *)&_OBJC_$_CATEGORY_INSTANCE_METHODS_LGPerson_$_LG,
    (const struct _method_list_t *)&_OBJC_$_CATEGORY_CLASS_METHODS_LGPerson_$_LG,
    0,
    (const struct _prop_list_t *)&_OBJC_$_PROP_LIST_LGPerson_$_LG,

};

static struct /*_prop_list_t*/ {
    unsigned int entsize;  // sizeof(struct _prop_t)
    unsigned int count_of_properties;
    struct _prop_t prop_list[2];
} _OBJC_$_PROP_LIST_LGPerson_$_LG __attribute__ ((used, section ("__DATA,__objc_const"))) = {
    sizeof(_prop_t),
    2,
    {{"cate_name","T@\"NSString\",C,N"},
    {"cate_age","Ti,N"}}
};

分类中添加的属性并不会帮助我们自动生成成员变量，只会生成get set方法的声明并不实现set get方法。所以我们开发过程中一般不能在分类中使用属性，通过验证可知，分类中的属性编译可以通过，但是在运行时调用，就会出现crash，这个就不验证了。如何让它不crash，我们下一节专门讲解关联属性。

分类总结：分类的本质是一个_category_t类型的结构体，

分类是用于给原有类添加方法的,因为分类的结构体指针中，没有属性列表，只有方法列表 ;

分类中的可以写@property, 但不会生成setter/getter方法, 也不会生成实现以及私有的成员变量（编译时会报警告）;
有两个属性：name（类的名称）和 cls（类对象）

有两个 method_list_t类型的方法列表，表示分类中实现的实例方法+类方法

一个protocol_list_t类型的协议列表，表示分类中实现的协议

一个prop_list_t类型的属性列表，表示分类中定义的属性，一般在分类中添加的属性都是通过关联对象来实现

本类属性和分类属性的区别：

本类属性：在clang编译环节，会自动生成并实现对应的set和get方法

分类属性：会存在set、get方法，但是没有实现（需要runtime设置关联属性）。

分类在编译的时候将分类的信息存在struct _category_t中，那么怎么在程序运行的时候是怎么加载到内存中的呢，接下来看runtime的源码分析分类的加载

二、分类的加载

1. 分类的加载时机。

在类的加载上提到过methodizeClass ，我们再次看看methodizeClass，源码如下：

//方法的序列化
static void methodizeClass(Class cls, Class previously)
{
    runtimeLock.assertLocked();

    bool isMeta = cls->isMetaClass();
    auto rw = cls->data();
    auto ro = rw->ro();
    auto rwe = rw->ext();

    // Methodizing for the first time
    if (PrintConnecting) {
        _objc_inform("CLASS: methodizing class '%s' %s", 
                     cls->nameForLogging(), isMeta ? "(meta)" : "");
    }

    // Install methods and properties that the class implements itself.
    // 将 ro 里面的方法附加到 rw 里面去
    method_list_t *list = ro->baseMethods();
    if (list) {
// 准备好方法列表
        prepareMethodLists(cls, &list, 1, YES, isBundleClass(cls));
        if (rwe) rwe->methods.attachLists(&list, 1);
    }
    // 将 ro 里面的属性附加到 rw 里面去
    property_list_t *proplist = ro->baseProperties;
    if (rwe && proplist) {
        rwe->properties.attachLists(&proplist, 1);
    }
    // 将 ro 里面的协议附加到 rw 里面去
    protocol_list_t *protolist = ro->baseProtocols;
    if (rwe && protolist) {
        rwe->protocols.attachLists(&protolist, 1);
    }

    // 根类获得额外的方法实现，如果它们还没有。这些适用于类别替换之前。
    if (cls->isRootMetaclass()) {
        // root metaclass
        addMethod(cls, @selector(initialize), (IMP)&objc_noop_imp, "", NO);
    }
    // Attach categories.  附加分类
    if (previously) {
        if (isMeta) {
            objc::unattachedCategories.attachToClass(cls, previously,
                                                     ATTACH_METACLASS);
        } else {
            // When a class relocates, categories with class methods
            // may be registered on the class itself rather than on
            // the metaclass. Tell attachToClass to look for those.
            objc::unattachedCategories.attachToClass(cls, previously,
                                                     ATTACH_CLASS_AND_METACLASS);
        }
    }
    objc::unattachedCategories.attachToClass(cls, cls,
                                             isMeta ? ATTACH_METACLASS : ATTACH_CLASS);
    ......
}

可以发现类的数据和分类的数据是分开处理的，主要是因为在编译阶段，就已经确定好了方法的归属位置（即实例方法存储在类中，类方法存储在元类中），而分类是后面才加进来的.

其中分类需要通过attatchToClass添加到类，然后才能在外界进行使用，在此过程，我们已经知道了分类加载三步骤的后面两个步骤，分类的加载主要分为3步：
- 分类数据加载时机：根据类和分类是否实现load方法来区分不同的时机
- attachCategories 准备分类数据
- attachLists 将分类数据添加到主类中

2. 分类与本类的加载与方法调用。

关于多个分类方法的加载在类的加载上提到attachLists 。

基本上方法、协议、属性都是通过 attachLists 函数附加到对应的列表上的
void attachLists(List* const * addedLists, uint32_t addedCount) {
    if (addedCount == 0) return;

    if (hasArray()) {
        // many lists -> many lists
        //计算数组中旧lists的大小
        uint32_t oldCount = array()->count;
        //计算新的容量大小 = 旧数据大小+新数据大小
        uint32_t newCount = oldCount + addedCount;
        //根据新的容量大小，开辟一个数组，类型是 array_t，通过array()获取
        setArray((array_t *)realloc(array(), array_t::byteSize(newCount)));
        //设置数组大小
        array()->count = newCount;
        //旧的数据从 addedCount 数组下标开始 存放旧的lists，大小为 旧数据大小 * 单个旧list大小
        memmove(array()->lists + addedCount, array()->lists, 
                oldCount * sizeof(array()->lists[0]));
        //新数据从数组 首位置开始存储，存放新的lists，大小为 新数据大小 * 单个list大小
        memcpy(
               array()->lists, addedLists, 
               addedCount * sizeof(array()->lists[0]));
    }
    else if (!list  &&  addedCount == 1) {
        // 0 lists -> 1 list
        list = addedLists[0];//将list加入mlists的第一个元素，此时的list是一个一维数组
    } 
    else {

//       -----------  分类的处理   ---------------

        // 1 list -> many lists 有了一个list，有往里加很多list
        //新的list就是分类，来自LRU的算法思维，即最近最少使用
        //获取旧的list
        List* oldList = list;
        uint32_t oldCount = oldList ? 1 : 0;
        //计算容量和 = 旧list个数+新lists的个数
        uint32_t newCount = oldCount + addedCount;
        //开辟一个容量和大小的集合，类型是 array_t，即创建一个数组，放到array中，通过array()获取
        setArray((array_t *)malloc(array_t::byteSize(newCount)));
        //设置数组的大小
        array()->count = newCount;
        //判断old是否存在，old肯定是存在的，将旧的list放入到数组的末尾
        if (oldList) array()->lists[addedCount] = oldList;
        // memcpy（开始位置，放什么，放多大） 是内存平移，从数组起始位置存入新的list
        //其中array()->lists 表示首位元素位置
        memcpy(array()->lists, addedLists, 
               addedCount * sizeof(array()->lists[0]));
    }
}

我们分析过load_images 类的加载顺序：父类 - > 本类 - > 分类
在attachLists 可知类的方法调用顺序：通过attachLists的分析可知，类的方法优先存入父类，本类，最后才是分类，多个分类时，会根据编译顺序去存，存在后面方法就会放在方法列表的最前面，所以我们可以得出结论：

普通的方法中, 分类同名方法会覆盖主类的方法；

多个分类中的同名方法会只执行一个,即后编译的分类里面的方法会覆盖所有前面的同名方法(可以通过调换编译顺序来获得这个结论)。

3. 分类加载的验证

首先创建一个类和两个分类 ,

我们知道类的加载会分为懒加载类和非懒加载类；所以在分类加载的验证中我们可以大致分为四种加载情况如下：
通过上面的两个例子，我们可以大致将类和分类是否实现+load的情况分为4种

【情况1】非懒加载类 + 非懒加载分类
【情况2】懒加载类 + 懒加载分类
【情况3】非懒加载类 + 懒加载分类
【情况4】懒加载类 + 非懒加载分类

【 3.1】全部为非懒加载类的加载的情况下：分类和本类都实现 load方法

在methodizeClass 中进入attachToClass ，可以发现attachCategories方法，就会进行分类数据的加载

断点调试如下，查看堆栈

image.png

可以看出分类加载正常的流程的路径为：realizeClassWithoutSwift ->methodizeClass -> attachToClass ->attachCategories

我们通过堆栈去找到 attachCategories ，发现在attachToClass方法中调用了attachCategories，又在这个类UnattachedCategories中声明实现了attachToClass 我们断点看看会不会走attachCategories呢？

class UnattachedCategories : public ExplicitInitDenseMap<Class, category_list>
{
public:
    ...................

    void attachToClass(Class cls, Class previously, int flags)
    {
        runtimeLock.assertLocked();
        ASSERT((flags & ATTACH_CLASS) ||
               (flags & ATTACH_METACLASS) ||
               (flags & ATTACH_CLASS_AND_METACLASS));

        
        const char *mangledName  = cls->mangledName();
        const char *LGPersonName = "LGPerson";

        if (strcmp(mangledName, LGPersonName) == 0) {
            bool kc_isMeta = cls->isMetaClass();
            auto kc_rw = cls->data();
            auto kc_ro = kc_rw->ro();
            printf("%s: 这个是我要研究的 %s \n",__func__,LGPersonName);

//            if (!kc_isMeta) {
//                printf("%s: 这个是我要研究的 %s \n",__func__,LGPersonName);
//            }
        }
        
//  这里 打印 LGPerson 调试看看
        
        auto &map = get();
        auto it = map.find(previously);
        if (it != map.end()) { // 经过断点调试这里不会走 ，只有在实现多个分类的load方法后才会走这里 可以调试验证
            category_list &list = it->second;
            if (flags & ATTACH_CLASS_AND_METACLASS) {
                int otherFlags = flags & ~ATTACH_CLASS_AND_METACLASS;
                attachCategories(cls, list.array(), list.count(), otherFlags | ATTACH_CLASS);
                attachCategories(cls->ISA(), list.array(), list.count(), otherFlags | ATTACH_METACLASS);
            } else {
                attachCategories(cls, list.array(), list.count(), flags);
            }
            map.erase(it);
        }
    }

  ...................
};

断点调试发现，这里断点并不会走。只有在实现多个分类的load方法后才会走这里可以调试验证

所以我们全局搜索看看还有谁调用了attachCategories,如下：



static void load_categories_nolock(header_info *hi) {

..........

            const char *mangledName  = cls->mangledName();
            const char *LGPersonName = "LGPerson";

            if (strcmp(mangledName, LGPersonName) == 0) {
            printf("load_categories_nolock:%s: 这个是我要研究的 %s \n",__func__,LGPersonName);
            }
            // Process this category.
            if (cls->isStubClass()) {
               ......
            } else {
                // First, register the category with its target class.
                // Then, rebuild the class's method lists (etc) if
                // the class is realized.
                if (cat->instanceMethods ||  cat->protocols
                    ||  cat->instanceProperties)
                {
                    if (cls->isRealized()) {
                        attachCategories(cls, &lc, 1, ATTACH_EXISTING);
                    } else {
                        objc::unattachedCategories.addForClass(lc, cls);
                    }
                 }

                if (cat->classMethods  ||  cat->protocols
                    ||  (hasClassProperties && cat->_classProperties))
                {
                    if (cls->ISA()->isRealized()) {
                        attachCategories(cls->ISA(), &lc, 1, ATTACH_EXISTING | ATTACH_METACLASS);
                    } else {
                        objc::unattachedCategories.addForClass(lc, cls->ISA());
                    }
                }
            }
        }
    };

..........
}

通过打印也可以验证：

经过堆栈就可以发现attachCategories的调用顺序：attachCategories -> load_categories_nolock -> loadAllCategories->load_images

分析流程如图：

【 3.2】全部为懒加载类的加载的情况下：分类和本类都不实现 load方法

参考：类的加载中，我们知道懒加载类的加载的情况下会走lookUpImpOrForward -> realizeClassMaybeSwiftMaybeRelock-> realizeClassWithoutSwift的情况。
所以我们在realizeClassWithoutSwift下断点调试如下：

通过堆栈信息我们可以验证懒加载类的加载是在调用alloc -> objc_alloc方法的时候开始加载的。

不加任何断点，运行程序，获取打印日志

在readClass断点调试如下,然后读取kc_ro，即读取整个data

此时的baseMethodList的count还是16，说明也是从data中读取出来的，所以不需要经过一层缓慢的load_images加载进来，所以说明：懒加载类与懒加载分类的数据加载是在消息第一次调用时加载的。

【 3.3.0】懒加载类与非懒加载分类情况下：分类实现 load方法本类不实现

在readClass断点调试如下：

我们看到 readClass中的cls 中只读取到mach-o中本类的三个方法。

继续运行代码，进入了attachCategories处，在attachLists加入三个断点，继续运行，发现attachLists中0->1加载了HTPerosn本类函数

继续运行代码，发现进入了attachLists中`1->多(有分类的时候进入)：

此时addedCount为2，表示当前需要添加的列表有2个元素。并不是只有LGB分类。我们打印 addedLists[0] 和addedLists[1]，就找到了LGA和LGB两个分类

分析：为什么本类没有+load方法，只实现分类+load方法，也在app启动前加载出来了呢？

我们查看左边堆栈，load_images调用了prepare_load_methods：

而prepare_load_methods中会检查有没有非懒加载的分类，如果有就执行下面的循环。
循环中在add_category_to_loadable_list加载分类前，会执行realizeClassWithoutSwift先检查本类是否实现。

【 3.3.1】懒加载类与非懒加载分类情况下：分类A实现 load方法本类和分类B不实现

没有实现分类的加载方法attachCategories，我们去readClass 打印看看。

发现ro中加载好了本类和2个分类的所有数据(15个函数）读取mach-o了所有的方法。

本类无，分类B+load ，分类A无的结果与这个一样的。

【 3.4】非懒加载类与懒加载分类情况下：本类实现 load方法分类不实现

同3.3 打印查看 readClass

可以看出：ro函数列表：此时ro读取的是macho中的值，ro中已包含本类和所有函数信息（14个）。函数排序：后加载的分类函数排序在先加载的分类和本类前面。

继续运行，发现没有进入attachCategories内部。

【 3.5】总结：

分析方法：
- 1. +load方法会使类进入非懒加载，使类提前加载；
- 1. readClass中断点分析，抓取当前类LGPerson，通过读取ro内的数据判断编译器加载还是运行时加载类的信息；
- 1. addachCategories中抓取当前类LGPerson，进入attchLists，断点分析分类的加载细节（多个分类加载是方法排序分析）；
- 1. 查看堆栈信息，可观察核心方法的调用链，查看分析调用时机
本类和分类的+load区别和各种情况分析处理：比较如下：
- 1. 非懒加载类 + 非懒加载分类 ：启动时，load_images ->...... ->attachCategories中加载所有分类的数据信息；
- 1. 懒加载类 + 非懒加载分类 ：启动时，load_images ->...... ->attachCategories中加载所有分类的数据信息；
- 1. 懒加载类 + 懒加载分类 ：编译器，mach-o中已加载数据；
- 1. 非懒加载类 + 懒加载分类 ：编译器，mach-o中已加载数据；
- 1. 非懒加载类 + 懒加载分类 + 非懒加载分类 ：编译器，mach-o中已加载数据；
汇总如下图：

三、 Category分类与Extension拓展的区别

3.1 Category：类别，分类

专门用来给类添加新的方法

不能给类添加成员属性，添加了也取不到。

分类中用@property定义的变量，只会生成变量的 getter和setter方法，不能生成方法实现和带下划线的成员变量。

3.2 Extension：类拓展

可以说成是特殊的分类，已称作匿名分类

可以给类添加成员属性、属性、方法，但都是私有的

拓展必须添加在@interface声明和@implementation实现之间：

Extension拓展与@interface声明是一样的作用，但是Extension拓展中的成员变量、属性、方法都是私有的。

可以通过clang，查看编译结果进行验证。Extension类拓展的下划线成员变量、函数等，都直接加入了本类的相关位置，完成相应实现。

①类别中原则上只能增加方法（能添加属性的的原因只是通过runtime（关联对象的方式）解决无setter/getter的问题而已）；
②类扩展不仅可以增加方法，还可以增加实例变量（或者属性），只是该实例变量默认是@private类型的（
用范围只能在自身类，而不是子类或其他地方）；
③类扩展中声明的方法没被实现，编译器会报警，但是类别中的方法没被实现编译器是不会有任何警告的。这是因为类扩展是在编译阶段被添加到类中，而类别是在运行时添加到类中。
④类扩展不能像类别那样拥有独立的实现部分（@implementation部分），也就是说，类扩展所声明的方法必须依托对应类的实现部分来实现。
⑤定义在 .m 文件中的类扩展方法为私有的，定义在 .h 文件（头文件）中的类扩展方法为公有的。类扩展是在 .m 文件中声明私有方法的非常好的方式。