前言
前面我们分析了对象&类&元类的层次结构,以及他们之间的关系。
显然,类模版cls是我们整个OC面向对象设计里面的核心数据结构。它保存了对象创建所需的信息(大小,成员,成员访问路径等),也保存了方法/属性/协议/成员列表/缓存等共享内容。
让我们一期通过LLDB和源码一步步探索认识类结构的底层实现。
objc_class
objc4-818.2的源码
struct objc_class {
Class _Nonnull isa OBJC_ISA_AVAILABILITY;
#if !__OBJC2__
Class _Nullable super_class OBJC2_UNAVAILABLE;
const char * _Nonnull name OBJC2_UNAVAILABLE;
long version OBJC2_UNAVAILABLE;
long info OBJC2_UNAVAILABLE;
long instance_size OBJC2_UNAVAILABLE;
struct objc_ivar_list * _Nullable ivars OBJC2_UNAVAILABLE;
struct objc_method_list * _Nullable * _Nullable methodLists OBJC2_UNAVAILABLE;
struct objc_cache * _Nonnull cache OBJC2_UNAVAILABLE;
struct objc_protocol_list * _Nullable protocols OBJC2_UNAVAILABLE;
#endif
} OBJC2_UNAVAILABLE;
上面是非objc2的,objc2之前的版本objc_class的定义,显然不是我们研究的重点。
在objc-runtime-new.h中,找到了objc2的定义如下:
struct objc_class : objc_object {
objc_class(const objc_class&) = delete;
objc_class(objc_class&&) = delete;
void operator=(const objc_class&) = delete;
void operator=(objc_class&&) = delete;
// Class ISA;
Class superclass;
cache_t cache; // formerly cache pointer and vtable
class_data_bits_t bits; // class_rw_t * plus custom rr/alloc flags
/**
此处省略大量方法定义以及针对不同平台或者cpu架构的处理,有兴趣的同学可以查看源码。
*/
}
新版的objc_clss定义是继承自objc_object,继承了isa指针成员。
当前结构体有四个成员:isa、superClass、cahe、bits。
isa指向元类
superClass指向父类
cache是缓存,跟运行时相关,我们后面单独一个篇章来探索这个缓存机制。
那么bits可能就是存储方法列表,属性列表等信息的地方。
class_data_bits_t
struct class_data_bits_t {
//可以访问objc_class的私有成员
friend objc_class;
// Values are the FAST_ flags above.
uintptr_t bits;
private:
bool getBit(uintptr_t bit) const
{
return bits & bit;
}
// Atomically set the bits in `set` and clear the bits in `clear`.
// set and clear must not overlap.
void setAndClearBits(uintptr_t set, uintptr_t clear)
{
ASSERT((set & clear) == 0);
uintptr_t newBits, oldBits = LoadExclusive(&bits);
do {
newBits = (oldBits | set) & ~clear;
} while (slowpath(!StoreReleaseExclusive(&bits, &oldBits, newBits)));
}
void setBits(uintptr_t set) {
__c11_atomic_fetch_or((_Atomic(uintptr_t) *)&bits, set, __ATOMIC_RELAXED);
}
void clearBits(uintptr_t clear) {
__c11_atomic_fetch_and((_Atomic(uintptr_t) *)&bits, ~clear, __ATOMIC_RELAXED);
}
public:
//获取data,class_rw_t
class_rw_t* data() const {
return (class_rw_t *)(bits & FAST_DATA_MASK);
}
void setData(class_rw_t *newData)
{
ASSERT(!data() || (newData->flags & (RW_REALIZING | RW_FUTURE)));
// Set during realization or construction only. No locking needed.
// Use a store-release fence because there may be concurrent
// readers of data and data's contents.
uintptr_t newBits = (bits & ~FAST_DATA_MASK) | (uintptr_t)newData;
atomic_thread_fence(memory_order_release);
bits = newBits;
}
// Get the class's ro data, even in the presence of concurrent realization.
// fixme this isn't really safe without a compiler barrier at least
// and probably a memory barrier when realizeClass changes the data field
const class_ro_t *safe_ro() const {
class_rw_t *maybe_rw = data();
if (maybe_rw->flags & RW_REALIZED) {
// maybe_rw is rw
return maybe_rw->ro();
} else {
// maybe_rw is actually ro
return (class_ro_t *)maybe_rw;
}
}
#if SUPPORT_INDEXED_ISA
void setClassArrayIndex(unsigned Idx) {
// 0 is unused as then we can rely on zero-initialisation from calloc.
ASSERT(Idx > 0);
data()->index = Idx;
}
#else
void setClassArrayIndex(__unused unsigned Idx) {
}
#endif
unsigned classArrayIndex() {
#if SUPPORT_INDEXED_ISA
return data()->index;
#else
return 0;
#endif
}
bool isAnySwift() {
return isSwiftStable() || isSwiftLegacy();
}
bool isSwiftStable() {
return getBit(FAST_IS_SWIFT_STABLE);
}
void setIsSwiftStable() {
setAndClearBits(FAST_IS_SWIFT_STABLE, FAST_IS_SWIFT_LEGACY);
}
bool isSwiftLegacy() {
return getBit(FAST_IS_SWIFT_LEGACY);
}
void setIsSwiftLegacy() {
setAndClearBits(FAST_IS_SWIFT_LEGACY, FAST_IS_SWIFT_STABLE);
}
// fixme remove this once the Swift runtime uses the stable bits
bool isSwiftStable_ButAllowLegacyForNow() {
return isAnySwift();
}
_objc_swiftMetadataInitializer swiftMetadataInitializer() {
// This function is called on un-realized classes without
// holding any locks.
// Beware of races with other realizers.
return safe_ro()->swiftMetadataInitializer();
}
};
class_data_bits_t结构体的成员非常简单,只有一个指针成员uintptr_t bits。
既然只有一个指针成员,为什么还要进行一次结构体包装?
经过class_data_bits_t结构体包装,扩展了一系列的数据的访问方法。
bits指针不仅存储了class_rw_t结构体的指针地址(可以通过data()函数获得,也是我们类数据的主要存储结构),通过扩展方法还可以对bits的一些位进行设置,来存储一些额外信息,比如:hasCustomRR、一些swift信息。这里看起来类似nonpointer_isa,对指针的空余位进行了利用,来存储更多的信息,体现了苹果对内存的优化。
class_data_bits_t更主要的作用是通过bits成员存储了class_rw_t *指针。
class_rw_t
struct class_rw_t {
// Be warned that Symbolication knows the layout of this structure.
uint32_t flags;
uint16_t witness;
#if SUPPORT_INDEXED_ISA
uint16_t index;
#endif
explicit_atomic<uintptr_t> ro_or_rw_ext;
Class firstSubclass;
Class nextSiblingClass;
private:
using ro_or_rw_ext_t = objc::PointerUnion<const class_ro_t, class_rw_ext_t, PTRAUTH_STR("class_ro_t"), PTRAUTH_STR("class_rw_ext_t")>;
const ro_or_rw_ext_t get_ro_or_rwe() const {
return ro_or_rw_ext_t{ro_or_rw_ext};
}
void set_ro_or_rwe(const class_ro_t *ro) {
ro_or_rw_ext_t{ro, &ro_or_rw_ext}.storeAt(ro_or_rw_ext, memory_order_relaxed);
}
void set_ro_or_rwe(class_rw_ext_t *rwe, const class_ro_t *ro) {
// the release barrier is so that the class_rw_ext_t::ro initialization
// is visible to lockless readers
rwe->ro = ro;
ro_or_rw_ext_t{rwe, &ro_or_rw_ext}.storeAt(ro_or_rw_ext, memory_order_release);
}
class_rw_ext_t *extAlloc(const class_ro_t *ro, bool deep = false);
public:
void setFlags(uint32_t set)
{
__c11_atomic_fetch_or((_Atomic(uint32_t) *)&flags, set, __ATOMIC_RELAXED);
}
void clearFlags(uint32_t clear)
{
__c11_atomic_fetch_and((_Atomic(uint32_t) *)&flags, ~clear, __ATOMIC_RELAXED);
}
// set and clear must not overlap
void changeFlags(uint32_t set, uint32_t clear)
{
ASSERT((set & clear) == 0);
uint32_t oldf, newf;
do {
oldf = flags;
newf = (oldf | set) & ~clear;
} while (!OSAtomicCompareAndSwap32Barrier(oldf, newf, (volatile int32_t *)&flags));
}
class_rw_ext_t *ext() const {
return get_ro_or_rwe().dyn_cast<class_rw_ext_t *>(&ro_or_rw_ext);
}
class_rw_ext_t *extAllocIfNeeded() {
auto v = get_ro_or_rwe();
if (fastpath(v.is<class_rw_ext_t *>())) {
return v.get<class_rw_ext_t *>(&ro_or_rw_ext);
} else {
return extAlloc(v.get<const class_ro_t *>(&ro_or_rw_ext));
}
}
class_rw_ext_t *deepCopy(const class_ro_t *ro) {
return extAlloc(ro, true);
}
const class_ro_t *ro() const {
auto v = get_ro_or_rwe();
if (slowpath(v.is<class_rw_ext_t *>())) {
return v.get<class_rw_ext_t *>(&ro_or_rw_ext)->ro;
}
return v.get<const class_ro_t *>(&ro_or_rw_ext);
}
void set_ro(const class_ro_t *ro) {
auto v = get_ro_or_rwe();
if (v.is<class_rw_ext_t *>()) {
v.get<class_rw_ext_t *>(&ro_or_rw_ext)->ro = ro;
} else {
set_ro_or_rwe(ro);
}
}
const method_array_t methods() const {
auto v = get_ro_or_rwe();
if (v.is<class_rw_ext_t *>()) {
return v.get<class_rw_ext_t *>(&ro_or_rw_ext)->methods;
} else {
return method_array_t{v.get<const class_ro_t *>(&ro_or_rw_ext)->baseMethods()};
}
}
const property_array_t properties() const {
auto v = get_ro_or_rwe();
if (v.is<class_rw_ext_t *>()) {
return v.get<class_rw_ext_t *>(&ro_or_rw_ext)->properties;
} else {
return property_array_t{v.get<const class_ro_t *>(&ro_or_rw_ext)->baseProperties};
}
}
const protocol_array_t protocols() const {
auto v = get_ro_or_rwe();
if (v.is<class_rw_ext_t *>()) {
return v.get<class_rw_ext_t *>(&ro_or_rw_ext)->protocols;
} else {
return protocol_array_t{v.get<const class_ro_t *>(&ro_or_rw_ext)->baseProtocols};
}
}
};
class_rw_t结构体有以下几个成员:
- uint32_t flags;
标记位,通过掩码运算,来存取一些bool信息,包括:
allowsPreoptCaches、allowsPreoptInlinedSels、instancesHaveAssociatedObjects、isInitializing、isInitialized、isRealized、isMetaClass、isMetaClassMaybeUnrealizedde - uint16_t witness;
如果一个类被实现,witness记录可以find的类的range
objc::dataSegmentsRanges.find((uintptr_t)cls, index)
witness = index - explicit_atomic<uintptr_t> ro_or_rw_ext;
进行原子性包装的指针 - Class firstSubclass;
第一个子类 - Class nextSiblingClass;
兄弟类
cls->data()->nextSiblingClass = _firstRealizedClass;
subcls->data()->nextSiblingClass = supercls->data()->firstSubclass;
单独看这四个成员,我们仍然没有找到方法,属性,协议等信息的影子。但是我们发现class_rw_t有methods()、proprties()、protocols()三个方法。他们通过ro_or_rw_ext指针来获取方法、属性、协议等信息 。
ro_or_rw_ext
这个指针成员的实现也是非常的巧妙,它的存取都要经过一次pointerUnion的转换。ro_or_rw_ext可以是class_ro_t*,也可以是class_rw_ext_t,通过指针bit位的低一位进行区分。
private:
using ro_or_rw_ext_t = objc::PointerUnion<const class_ro_t, class_rw_ext_t, PTRAUTH_STR("class_ro_t"), PTRAUTH_STR("class_rw_ext_t")>;
const ro_or_rw_ext_t get_ro_or_rwe() const {
return ro_or_rw_ext_t{ro_or_rw_ext};
}
void set_ro_or_rwe(const class_ro_t *ro) {
ro_or_rw_ext_t{ro, &ro_or_rw_ext}.storeAt(ro_or_rw_ext, memory_order_relaxed);
}
void set_ro_or_rwe(class_rw_ext_t *rwe, const class_ro_t *ro) {
// the release barrier is so that the class_rw_ext_t::ro initialization
// is visible to lockless readers
rwe->ro = ro;
ro_or_rw_ext_t{rwe, &ro_or_rw_ext}.storeAt(ro_or_rw_ext, memory_order_release);
}
class_rw_ext_t *extAlloc(const class_ro_t *ro, bool deep = false);
****************************************************************
class PointerUnion {
uintptr_t _value;
static_assert(alignof(T1) >= 2, "alignment requirement");
static_assert(alignof(T2) >= 2, "alignment requirement");
struct IsPT1 {
static const uintptr_t Num = 0;
};
struct IsPT2 {
static const uintptr_t Num = 1;
};
template <typename T> struct UNION_DOESNT_CONTAIN_TYPE {};
uintptr_t getPointer() const {
return _value & ~1;
}
uintptr_t getTag() const {
return _value & 1;
}
public:
explicit PointerUnion(const std::atomic<uintptr_t> &raw)
: _value(raw.load(std::memory_order_relaxed))
{ }
PointerUnion(T1 *t, const void *address) {
_value = (uintptr_t)Auth1::sign(t, address);
}
PointerUnion(T2 *t, const void *address) {
_value = (uintptr_t)Auth2::sign(t, address) | 1;
}
void storeAt(std::atomic<uintptr_t> &raw, std::memory_order order) const {
raw.store(_value, order);
}
template <typename T>
bool is() const {
using Ty = typename PointerUnionTypeSelector<T1 *, T, IsPT1,
PointerUnionTypeSelector<T2 *, T, IsPT2,
UNION_DOESNT_CONTAIN_TYPE<T>>>::Return;
return getTag() == Ty::Num;
}
template <typename T> T get(const void *address) const {
ASSERT(is<T>() && "Invalid accessor called");
using AuthT = typename PointerUnionTypeSelector<T1 *, T, Auth1,
PointerUnionTypeSelector<T2 *, T, Auth2,
UNION_DOESNT_CONTAIN_TYPE<T>>>::Return;
return AuthT::auth((T)getPointer(), address);
}
template <typename T> T dyn_cast(const void *address) const {
if (is<T>())
return get<T>(address);
return T();
}
};
class_rw_ext_t
struct class_rw_ext_t {
DECLARE_AUTHED_PTR_TEMPLATE(class_ro_t)
class_ro_t_authed_ptr<const class_ro_t> ro;
method_array_t methods;
property_array_t properties;
protocol_array_t protocols;
char *demangledName;
uint32_t version;
};
千呼万唤始出来,终于看到了baseMethodList、baseProtocols、 ivars、weakIvarLayout、baseProperties这些内容。那么class_ro_t又是什么呢?
class_ro_t
struct class_ro_t {
uint32_t flags;
uint32_t instanceStart;
uint32_t instanceSize;
#ifdef __LP64__
uint32_t reserved;
#endif
union {
const uint8_t * ivarLayout;
Class nonMetaclass;
};
explicit_atomic<const char *> name;
// With ptrauth, this is signed if it points to a small list, but
// may be unsigned if it points to a big list.
void *baseMethodList;
protocol_list_t * baseProtocols;
const ivar_list_t * ivars;
const uint8_t * weakIvarLayout;
property_list_t *baseProperties;
// This field exists only when RO_HAS_SWIFT_INITIALIZER is set.
_objc_swiftMetadataInitializer __ptrauth_objc_method_list_imp _swiftMetadataInitializer_NEVER_USE[0];
_objc_swiftMetadataInitializer swiftMetadataInitializer() const {
if (flags & RO_HAS_SWIFT_INITIALIZER) {
return _swiftMetadataInitializer_NEVER_USE[0];
} else {
return nil;
}
}
const char *getName() const {
return name.load(std::memory_order_acquire);
}
static const uint16_t methodListPointerDiscriminator = 0xC310;
#if 0 // FIXME: enable this when we get a non-empty definition of __ptrauth_objc_method_list_pointer from ptrauth.h.
static_assert(std::is_same<
void * __ptrauth_objc_method_list_pointer *,
void * __ptrauth(ptrauth_key_method_list_pointer, 1, methodListPointerDiscriminator) *>::value,
"Method list pointer signing discriminator must match ptrauth.h");
#endif
method_list_t *baseMethods() const {
#if __has_feature(ptrauth_calls)
method_list_t *ptr = ptrauth_strip((method_list_t *)baseMethodList, ptrauth_key_method_list_pointer);
if (ptr == nullptr)
return nullptr;
// Don't auth if the class_ro and the method list are both in the shared cache.
// This is secure since they'll be read-only, and this allows the shared cache
// to cut down on the number of signed pointers it has.
bool roInSharedCache = objc::inSharedCache((uintptr_t)this);
bool listInSharedCache = objc::inSharedCache((uintptr_t)ptr);
if (roInSharedCache && listInSharedCache)
return ptr;
// Auth all other small lists.
if (ptr->isSmallList())
ptr = ptrauth_auth_data((method_list_t *)baseMethodList,
ptrauth_key_method_list_pointer,
ptrauth_blend_discriminator(&baseMethodList,
methodListPointerDiscriminator));
return ptr;
#else
return (method_list_t *)baseMethodList;
#endif
}
uintptr_t baseMethodListPtrauthData() const {
return ptrauth_blend_discriminator(&baseMethodList,
methodListPointerDiscriminator);
}
class_ro_t *duplicate() const {
bool hasSwiftInitializer = flags & RO_HAS_SWIFT_INITIALIZER;
size_t size = sizeof(*this);
if (hasSwiftInitializer)
size += sizeof(_swiftMetadataInitializer_NEVER_USE[0]);
class_ro_t *ro = (class_ro_t *)memdup(this, size);
if (hasSwiftInitializer)
ro->_swiftMetadataInitializer_NEVER_USE[0] = this->_swiftMetadataInitializer_NEVER_USE[0];
#if __has_feature(ptrauth_calls)
// Re-sign the method list pointer if it was signed.
// NOTE: It is possible for a signed pointer to have a signature
// that is all zeroes. This is indistinguishable from a raw pointer.
// This code will treat such a pointer as signed and re-sign it. A
// false positive is safe: method list pointers are either authed or
// stripped, so if baseMethods() doesn't expect it to be signed, it
// will ignore the signature.
void *strippedBaseMethodList = ptrauth_strip(baseMethodList, ptrauth_key_method_list_pointer);
void *signedBaseMethodList = ptrauth_sign_unauthenticated(strippedBaseMethodList,
ptrauth_key_method_list_pointer,
baseMethodListPtrauthData());
if (baseMethodList == signedBaseMethodList) {
ro->baseMethodList = ptrauth_auth_and_resign(baseMethodList,
ptrauth_key_method_list_pointer,
baseMethodListPtrauthData(),
ptrauth_key_method_list_pointer,
ro->baseMethodListPtrauthData());
} else {
// Special case: a class_ro_t in the shared cache pointing to a
// method list in the shared cache will not have a signed pointer,
// but the duplicate will be expected to have a signed pointer since
// it's not in the shared cache. Detect that and sign it.
bool roInSharedCache = objc::inSharedCache((uintptr_t)this);
bool listInSharedCache = objc::inSharedCache((uintptr_t)strippedBaseMethodList);
if (roInSharedCache && listInSharedCache)
ro->baseMethodList = ptrauth_sign_unauthenticated(strippedBaseMethodList,
ptrauth_key_method_list_pointer,
ro->baseMethodListPtrauthData());
}
#endif
return ro;
}
Class getNonMetaclass() const {
ASSERT(flags & RO_META);
return nonMetaclass;
}
const uint8_t *getIvarLayout() const {
if (flags & RO_META)
return nullptr;
return ivarLayout;
}
};
总结:
写到这里,篇幅有些长了。我们发现objc2中类的数据结构的相比较之前的版本更为复杂,层级也比较深。
- object_class嵌套了class_data_bits_t类型的结构体bits
- 结构体bits只有一个uintptr_t类型的成员bits,但作为c++结构体,它扩展一些类的存取方法,在成员bits有限的bit位,存储了指向class_rw_t结构体指针,以及一些其他的信息比如:hasCustomRR、swift相关。
- class_rw_t包含了flag,witness,firstSubClass,nextsiblingClass等成员以及最重要的一个共用体指针ro_or_rw_ext。
- ro_or_rw_ext可以是class_ro_t类型,也可以是class_rw_ext_t类型。
- class_rw_ext_t包含ro、方法列表,属性列表、协议列表等
- class_ro_t不仅包含方法列表、属性列表、协议列表,还有一些其他的类相关内容。
那么class_ro_t和class_rw_ext_t有什么区别?相对以前版本,为什么苹果给类的数据结构设计这么多的层级呢?rw 、 rw_ext、ro的作用分别是什么?听下回分解吧。