iOS底层原理总结 - Category本质

首先同学们思考一个问题：
Category的实现原理，以及Category为什么只能加方法不能加属性？

Category的本质

@interface HDPerson : NSObject
{
    int _age; // 年龄
}

@property (nonatomic,assign) int height; // 身高

// 跑方法
-(void)run;

@end

#import "HDPerson.h"

@implementation HDPerson

-(void)run{
    NSLog(@"HDPerson - run");
}

@end

@interface HDPerson (Student)<NSCopying,NSCoding>

@property (nonatomic,assign) int weight; // 体重

//对象方法 学习
-(void)study;

//对象方法 喝水
-(void)drink;

//类方法 玩
+(void)play;

@implementation HDPerson (Student)

-(void)study{
    NSLog(@"HDPerson (Student) - study");
}

-(void)drink{
    NSLog(@"HDPerson (Student) - drink");
}

+(void)play{
    NSLog(@"HDPerson (Student) + play");
}

@end
@interface HDPerson (Teacher)


// 对象方法 工作
-(void)work;

// 类方法 教学
+(void)teach;

@end

#import "HDPerson+Teacher.h"

@implementation HDPerson (Teacher)

-(void)work{
    NSLog(@"HDPerson (Teacher) - work");
}

+(void)teach{
    NSLog(@"HDPerson (Teacher) - teach");
}

@end

上面👆新建HDPerson类然后新建了两个分类，后续的分析依赖于上面的代码，并且还会有部分修改。

分析

我们之前讲到过实例对象的isa指针指向类对象，类对象的isa指针指向元类对象，当person调用run方法时，通过实例对象的isa指针找到类对象，然后在类对象中查找对象方法，如果没有找到，就通过类对象的superclass指针找到父类对象，接着去寻找run方法。
上面这段话没有问题，但是还不全面，在类对象中查找对象方法，怎样查找？查找的顺序是什么？

那么当调用分类的方法时，步骤是否和调用对象方法一样呢？

结论

分类中的对象方法依然是存储在类对象中的，同本类对象方法在同一个地方，调用步骤也同调用对象方法一样。如果是类方法的话，也同样是存储在元类对象中。
Category编译之后的底层结构是struct category_t，里面存储着分类的对象方法、类方法、属性、协议信息
在程序运行的时候，runtime会将Category的数据，合并到类信息中（类对象、元类对象中）

分类的底层结构

首先我们通过命令行将HDPerson+Student.m文件转化为c++文件，查看其中的编译过程。

xcrun -sdk iphoneos clang -arch arm64 -rewrite-objc HDPerson+Student.m

通过查看分类的源码我们可以找到category_t 结构体

struct _category_t {
    const char *name;
    struct _class_t *cls;
    const struct _method_list_t *instance_methods; // 对象方法数组
    const struct _method_list_t *class_methods; // 类方法数组
    const struct _protocol_list_t *protocols; // 协议列表数组
    const struct _prop_list_t *properties; // 属性列表数组
};

从源码基本可以看出我们平时使用categroy的方式，对象方法，类方法，协议，和属性都可以找到对应的存储方式。并且我们发现分类结构体中是不存在成员变量的，因此分类中是不允许添加成员变量的。分类中添加的属性并不会帮助我们自动生成成员变量，只会生成get set方法的声明，需要我们自己去实现。

紧接着，我们可以看到_method_list_t类型的结构体，如下图所示

static struct /*_method_list_t*/ {
    unsigned int entsize;  // sizeof(struct _objc_method)
    unsigned int method_count;
    struct _objc_method method_list[2];
} _OBJC_$_CATEGORY_INSTANCE_METHODS_HDPerson_$_Student __attribute__ ((used, section ("__DATA,__objc_const"))) = {
    sizeof(_objc_method),
    2,
    {{(struct objc_selector *)"study", "v16@0:8", (void *)_I_HDPerson_Student_study},
    {(struct objc_selector *)"drink", "v16@0:8", (void *)_I_HDPerson_Student_drink}}
};

上图中我们发现这个结构体OBJC_$_CATEGORY_INSTANCE_METHODS_HDPerson_$_Student从名称可以看出是INSTANCE_METHODS对象方法，并且一一对应为上面结构体内赋值。我们可以看到结构体中存储了方法占用的内存，方法数量，以及方法列表。并且从上图中找到分类中我们实现对应的对象方法，study, drink方法

接下来我们发现同样的_method_list_t类型的类方法结构体，如下图所示

static struct /*_method_list_t*/ {
    unsigned int entsize;  // sizeof(struct _objc_method)
    unsigned int method_count;
    struct _objc_method method_list[1];
} _OBJC_$_CATEGORY_CLASS_METHODS_HDPerson_$_Student __attribute__ ((used, section ("__DATA,__objc_const"))) = {
    sizeof(_objc_method),
    1,
    {{(struct objc_selector *)"play", "v16@0:8", (void *)_C_HDPerson_Student_play}}
};

同上面对象方法列表一样，这个我们可以看出是类方法列表结构体_OBJC_$_CATEGORY_CLASS_METHODS_HDPerson_$_Student同对象方法结构体相同，同样可以看到我们实现的类方法，play。

接下来是协议方法列表

static const char *_OBJC_PROTOCOL_METHOD_TYPES_NSCopying [] __attribute__ ((used, section ("__DATA,__objc_const"))) = 
{
    "@24@0:8^{_NSZone=}16"
};

static struct /*_method_list_t*/ {
    unsigned int entsize;  // sizeof(struct _objc_method)
    unsigned int method_count;
    struct _objc_method method_list[1];
} _OBJC_PROTOCOL_INSTANCE_METHODS_NSCopying __attribute__ ((used, section ("__DATA,__objc_const"))) = {
    sizeof(_objc_method),
    1,
    {{(struct objc_selector *)"copyWithZone:", "@24@0:8^{_NSZone=}16", 0}}
};

struct _protocol_t _OBJC_PROTOCOL_NSCopying __attribute__ ((used)) = {
    0,
    "NSCopying",
    0,
    (const struct method_list_t *)&_OBJC_PROTOCOL_INSTANCE_METHODS_NSCopying,
    0,
    0,
    0,
    0,
    sizeof(_protocol_t),
    0,
    (const char **)&_OBJC_PROTOCOL_METHOD_TYPES_NSCopying
};
struct _protocol_t *_OBJC_LABEL_PROTOCOL_$_NSCopying = &_OBJC_PROTOCOL_NSCopying;

static const char *_OBJC_PROTOCOL_METHOD_TYPES_NSCoding [] __attribute__ ((used, section ("__DATA,__objc_const"))) = 
{
    "v24@0:8@\"NSCoder\"16",
    "@24@0:8@\"NSCoder\"16"
};

static struct /*_method_list_t*/ {
    unsigned int entsize;  // sizeof(struct _objc_method)
    unsigned int method_count;
    struct _objc_method method_list[2];
} _OBJC_PROTOCOL_INSTANCE_METHODS_NSCoding __attribute__ ((used, section ("__DATA,__objc_const"))) = {
    sizeof(_objc_method),
    2,
    {{(struct objc_selector *)"encodeWithCoder:", "v24@0:8@16", 0},
    {(struct objc_selector *)"initWithCoder:", "@24@0:8@16", 0}}
};

struct _protocol_t _OBJC_PROTOCOL_NSCoding __attribute__ ((used)) = {
    0,
    "NSCoding",
    0,
    (const struct method_list_t *)&_OBJC_PROTOCOL_INSTANCE_METHODS_NSCoding,
    0,
    0,
    0,
    0,
    sizeof(_protocol_t),
    0,
    (const char **)&_OBJC_PROTOCOL_METHOD_TYPES_NSCoding
};
struct _protocol_t *_OBJC_LABEL_PROTOCOL_$_NSCoding = &_OBJC_PROTOCOL_NSCoding;

static struct /*_protocol_list_t*/ {
    long protocol_count;  // Note, this is 32/64 bit
    struct _protocol_t *super_protocols[2];
} _OBJC_CATEGORY_PROTOCOLS_$_HDPerson_$_Student __attribute__ ((used, section ("__DATA,__objc_const"))) = {
    2,
    &_OBJC_PROTOCOL_NSCopying,
    &_OBJC_PROTOCOL_NSCoding
};

通过上述源码可以看到先将协议方法通过_method_list_t结构体存储，之后通过_protocol_t结构体存储。分别为protocol_count 协议数量以及存储了协议方法的_protocol_t结构体。

最后我们可以看到属性列表

static struct /*_prop_list_t*/ {
    unsigned int entsize;  // sizeof(struct _prop_t)
    unsigned int count_of_properties;
    struct _prop_t prop_list[1];
} _OBJC_$_PROP_LIST_HDPerson_$_Student __attribute__ ((used, section ("__DATA,__objc_const"))) = {
    sizeof(_prop_t),
    1,
    {{"weight","Ti,N"}}
};

属性列表结构体_OBJC_$_PROP_LIST_HDPerson_$_Student同_prop_list_t结构体对应，存储属性的占用空间，属性属性数量，以及属性列表，从上图中可以看到我们自己写的weight属性。

结论

通过以上分析我们发现。分类源码中确实是在代码编译完成后将我们定义的对象方法，类方法，属性，协议等都存放在catagory_t结构体中。接下来我们在回到runtime源码查看catagory_t存储的方法，属性，协议等是如何存储在类对象中的。

分析

首先来到runtime初始化函数

/***********************************************************************
* _objc_init
* Bootstrap initialization. Registers our image notifier with dyld.
* Called by libSystem BEFORE library initialization time
**********************************************************************/

void _objc_init(void)
{
    static bool initialized = false;
    if (initialized) return;
    initialized = true;
    
    // fixme defer initialization until an objc-using image is found?
    environ_init();
    tls_init();
    static_init();
    lock_init();
    exception_init();

    _dyld_objc_notify_register(&map_images, load_images, unmap_image);
}

接着我们来到 &map_images读取模块（images这里代表模块），来到map_images_nolock函数中找到_read_images函数，在_read_images函数中我们找到分类相关代码

    for (EACH_HEADER) {
        category_t **catlist = 
            _getObjc2CategoryList(hi, &count);
        bool hasClassProperties = hi->info()->hasCategoryClassProperties();

        for (i = 0; i < count; i++) {
            category_t *cat = catlist[i];
            Class cls = remapClass(cat->cls);

            if (!cls) {
                // Category's target class is missing (probably weak-linked).
                // Disavow any knowledge of this category.
                catlist[i] = nil;
                if (PrintConnecting) {
                    _objc_inform("CLASS: IGNORING category \?\?\?(%s) %p with "
                                 "missing weak-linked target class", 
                                 cat->name, cat);
                }
                continue;
            }

            // Process this category. 
            // First, register the category with its target class. 
            // Then, rebuild the class's method lists (etc) if 
            // the class is realized. 
            bool classExists = NO;
            if (cat->instanceMethods ||  cat->protocols  
                ||  cat->instanceProperties) 
            {
                addUnattachedCategoryForClass(cat, cls, hi);
                if (cls->isRealized()) {
                    remethodizeClass(cls);
                    classExists = YES;
                }
                if (PrintConnecting) {
                    _objc_inform("CLASS: found category -%s(%s) %s", 
                                 cls->nameForLogging(), cat->name, 
                                 classExists ? "on existing class" : "");
                }
            }

            if (cat->classMethods  ||  cat->protocols  
                ||  (hasClassProperties && cat->_classProperties)) 
            {
                addUnattachedCategoryForClass(cat, cls->ISA(), hi);
                if (cls->ISA()->isRealized()) {
                    remethodizeClass(cls->ISA());
                }
                if (PrintConnecting) {
                    _objc_inform("CLASS: found category +%s(%s)", 
                                 cls->nameForLogging(), cat->name);
                }
            }
        }
    }

从上述代码中我们可以知道这段代码是用来查找有没有分类的。通过_getObjc2CategoryList函数获取到分类列表之后，进行遍历，获取其中的方法，协议，属性等。可以看到最终都调用了remethodizeClass(cls);函数。我们来到remethodizeClass(cls);函数内部查看。

static void remethodizeClass(Class cls)
{
    category_list *cats;
    bool isMeta;

    runtimeLock.assertWriting();

    isMeta = cls->isMetaClass();

    // Re-methodizing: check for more categories
    if ((cats = unattachedCategoriesForClass(cls, false/*not realizing*/))) {
        if (PrintConnecting) {
            _objc_inform("CLASS: attaching categories to class '%s' %s", 
                         cls->nameForLogging(), isMeta ? "(meta)" : "");
        }
        
        attachCategories(cls, cats, true /*flush caches*/);        
        free(cats);
    }
}

通过上述代码我们发现attachCategories函数接收了类对象cls和分类数组cats，如我们一开始写的代码所示，一个类可以有多个分类。之前我们说到分类信息存储在category_t结构体中，那么多个分类则保存在category_list中。

static void 
attachCategories(Class cls, category_list *cats, bool flush_caches)
{
    if (!cats) return;
    if (PrintReplacedMethods) printReplacements(cls, cats);

    bool isMeta = cls->isMetaClass();

    // fixme rearrange to remove these intermediate allocations
    method_list_t **mlists = (method_list_t **)
        malloc(cats->count * sizeof(*mlists));
    property_list_t **proplists = (property_list_t **)
        malloc(cats->count * sizeof(*proplists));
    protocol_list_t **protolists = (protocol_list_t **)
        malloc(cats->count * sizeof(*protolists));

    // Count backwards through cats to get newest categories first
    int mcount = 0;
    int propcount = 0;
    int protocount = 0;
    int i = cats->count;
    bool fromBundle = NO;
    while (i--) {
        auto& entry = cats->list[i];

        method_list_t *mlist = entry.cat->methodsForMeta(isMeta);
        if (mlist) {
            mlists[mcount++] = mlist;
            fromBundle |= entry.hi->isBundle();
        }

        property_list_t *proplist = 
            entry.cat->propertiesForMeta(isMeta, entry.hi);
        if (proplist) {
            proplists[propcount++] = proplist;
        }

        protocol_list_t *protolist = entry.cat->protocols;
        if (protolist) {
            protolists[protocount++] = protolist;
        }
    }

    auto rw = cls->data();

    prepareMethodLists(cls, mlists, mcount, NO, fromBundle);
    rw->methods.attachLists(mlists, mcount);
    free(mlists);
    if (flush_caches  &&  mcount > 0) flushCaches(cls);

    rw->properties.attachLists(proplists, propcount);
    free(proplists);

    rw->protocols.attachLists(protolists, protocount);
    free(protolists);
}

上述源码中可以看出，首先根据方法列表，属性列表，协议列表，malloc分配内存，根据多少个分类以及每一块方法需要多少内存来分配相应的内存地址。之后从分类数组里面往三个数组里面存放分类数组里面存放的分类方法，属性以及协议放入对应mlist、proplists、protolosts数组中，这三个数组放着所有分类的方法，属性和协议。
之后通过类对象的data()方法，拿到类对象的class_rw_t结构体rw，在class结构中我们介绍过，class_rw_t中存放着类对象的方法，属性和协议等数据，rw结构体通过类对象的data方法获取，所以rw里面存放这类对象里面的数据。
之后分别通过rw调用方法列表、属性列表、协议列表的attachList函数，将所有的分类的方法、属性、协议列表数组传进去，我们大致可以猜想到在attachList方法内部将分类和本类相应的对象方法，属性，和协议进行了合并。
我们来看一下attachLists函数内部。

void attachLists(List* const * addedLists, uint32_t addedCount) {
        if (addedCount == 0) return;

        if (hasArray()) {
            // many lists -> many lists
            uint32_t oldCount = array()->count;
            uint32_t newCount = oldCount + addedCount;
            setArray((array_t *)realloc(array(), array_t::byteSize(newCount)));
            array()->count = newCount;
            memmove(array()->lists + addedCount, array()->lists, 
                    oldCount * sizeof(array()->lists[0]));
            memcpy(array()->lists, addedLists, 
                   addedCount * sizeof(array()->lists[0]));
        }
        else if (!list  &&  addedCount == 1) {
            // 0 lists -> 1 list
            list = addedLists[0];
        } 
        else {
            // 1 list -> many lists
            List* oldList = list;
            uint32_t oldCount = oldList ? 1 : 0;
            uint32_t newCount = oldCount + addedCount;
            setArray((array_t *)malloc(array_t::byteSize(newCount)));
            array()->count = newCount;
            if (oldList) array()->lists[addedCount] = oldList;
            memcpy(array()->lists, addedLists, 
                   addedCount * sizeof(array()->lists[0]));
        }
    }

上述源代码中有两个重要的数组
array()->lists：类对象原来的方法列表，属性列表，协议列表。
addedLists：传入所有分类的方法列表，属性列表，协议列表。
attachLists函数中最重要的两个方法为memmove内存移动和memcpy内存拷贝。
具体这两个方法的区别已经实现，大家可自行去百度一下。这里我说一下结论：
经过memmove和memcpy方法之后，分类的方法，属性，协议列表被放在了类对象中原本存储的方法，属性，协议列表前面。

那么为什么要将分类方法的列表追加到本来的对象方法前面呢，这样做的目的是为了保证分类方法优先调用，我们知道当分类重写本类的方法时，会覆盖本类的方法。
其实经过上面的分析我们知道本质上并不是覆盖，而是优先调用。本类的方法依然在内存中的。

总结加载过程
1、通过Runtime加载某个类的所有Category数据。
2、把所有Category的方法、属性、协议数据，合并到一个大数组中（后面参与编译的Category数据，会在数组的前面）
3、将合并后的分类数据（方法、属性、协议），插入到类原来数据的前面

结论

Category编译之后的底层结构是struct category_t，里面存储着分类的对象方法、类方法、属性、协议信息
在程序运行的时候，runtime会将Category的数据，合并到类信息中（类对象、元类对象中）
当父类和分类中有相同的方法的时候，优先调用分类中的方法。
当同一个类的多个分类中有相同的方法的时候，看编译顺序：调用最后编译的分类中的方法。