Swift字典用来存储无序的相同类型数据的集合,字典会强制检测元素的类型,如果类型不同则会报错。Swift字典每个值(value)都关联唯一的键(key),键作为字典中的这个值数据的标识符。- 和数组中的元素不同,字典中的元素并没有具体顺序,需要通过
key访问到元素。- 字典的
key没有类型限制,可以是Int或String,但必须是唯一的。- 如果创建一个字典,并赋值给一个变量,则创建的字典是可以修改的。这意味着在创建字典后,可以通过添加、删除、修改的方式改变字典里的元素。
- 如果将一个字典赋值给常量,字典是不可修改的,并且字典的大小和内容都不可以修改。
基本定义
Dictionary类型的键值对必须遵循Hashable协议,因为它使用的就是哈希表
哈希表
哈希表(
Hash table)也叫散列表,是根据关键字(Key value)⽽直接访问在内存存储位置的数据结构。也就是说,它通过计算⼀个关于键值的函数,将所需查询的数据映射到表中⼀个位置来访问记录,这加快了查找速度。这个映射函数称做散列函数,存放记录的数组称做散列表。
散列函数
散列函数又称散列算法、哈希函数。它的目标是计算
key在数组中的下标。几种常用的散列函数构造方法:
- 直接寻址法
- 数字分析法
- 平⽅取中法
- 折叠法
- 随机数法
- 除留余数法
哈希冲突
再优秀的哈希算法永远无法避免出现哈希冲突。哈希冲突指的是两个不同的
key经过哈希计算后得到的数组下标是相同的。哈希冲突几种常用的解决方法:
- 开放定址法
- 拉链法
负载因⼦
负载因子 = 填⼊表中的元素个数 / 散列表的⻓度一个非空散列表的
负载因子是填⼊表中的元素个数 / 散列表的⻓度。这是面临再次散列或扩容时的决策参数,有助于确定散列函数的效率。也就是说,该参数指出了散列函数是否将关键字均匀分布。
内存布局
Dictionary是否是连续的内存地址空间?如果不是,当前元素和元素之前的关系如何确定?如何计算⼀个元素的地址?
通过字⾯量方式创建⼀个字典:
var dic = ["1": "Kody", "2": "Hank", "3": "Cooci", "4" : "Cat"]
源码分析
上述代码,通过字⾯量方式创建
Dictionary,在源码中可以直接搜索literal
打开
Dictionary.swift文件,找到init(dictionaryLiteral elements:)的定义:
@inlinable
@_effects(readonly)
@_semantics("optimize.sil.specialize.generic.size.never")
public init(dictionaryLiteral elements: (Key, Value)...) {
let native = _NativeDictionary<Key, Value>(capacity: elements.count)
for (key, value) in elements {
let (bucket, found) = native.find(key)
_precondition(!found, "Dictionary literal contains duplicate keys")
native._insert(at: bucket, key: key, value: value)
}
self.init(_native: native)
}
- 创建了一个
_NativeDictionary的实例对象- 遍历
elements,通过实例对象的find方法,找到key值对应的bucketbucket相当于要插入的位置- 将
key、value循环插入到bucket中- 最后调用
init方法found:布尔类型,如果字典初始化时,字面量里包含重复key值,运行时报错Dictionary literal contains duplicate keys
打开
NativeDictionary.swift文件,找到_NativeDictionary的定义:
@usableFromInline
@frozen
internal struct _NativeDictionary<Key: Hashable, Value> {
@usableFromInline
internal typealias Element = (key: Key, value: Value)
/// See this comments on __RawDictionaryStorage and its subclasses to
/// understand why we store an untyped storage here.
@usableFromInline
internal var _storage: __RawDictionaryStorage
/// Constructs an instance from the empty singleton.
@inlinable
internal init() {
self._storage = __RawDictionaryStorage.empty
}
/// Constructs a dictionary adopting the given storage.
@inlinable
internal init(_ storage: __owned __RawDictionaryStorage) {
self._storage = storage
}
@inlinable
internal init(capacity: Int) {
if capacity == 0 {
self._storage = __RawDictionaryStorage.empty
} else {
self._storage = _DictionaryStorage<Key, Value>.allocate(capacity: capacity)
}
}
#if _runtime(_ObjC)
@inlinable
internal init(_ cocoa: __owned __CocoaDictionary) {
self.init(cocoa, capacity: cocoa.count)
}
@inlinable
internal init(_ cocoa: __owned __CocoaDictionary, capacity: Int) {
if capacity == 0 {
self._storage = __RawDictionaryStorage.empty
} else {
_internalInvariant(cocoa.count <= capacity)
self._storage =
_DictionaryStorage<Key, Value>.convert(cocoa, capacity: capacity)
for (key, value) in cocoa {
insertNew(
key: _forceBridgeFromObjectiveC(key, Key.self),
value: _forceBridgeFromObjectiveC(value, Value.self))
}
}
}
#endif
}
_NativeDictionary是一个结构体- 使用
init(capacity:)方法初始化,如果capacity容量等于0,使用__RawDictionaryStorage.empty。否则使用_DictionaryStorage,通过allocate方法,按容量大小分配内存空间
打开
DictionaryStorage.swift文件,找到_DictionaryStorage的定义:
@usableFromInline
final internal class _DictionaryStorage<Key: Hashable, Value>
: __RawDictionaryStorage, _NSDictionaryCore {
// This type is made with allocWithTailElems, so no init is ever called.
// But we still need to have an init to satisfy the compiler.
@nonobjc
override internal init(_doNotCallMe: ()) {
_internalInvariantFailure("This class cannot be directly initialized")
}
deinit {
guard _count > 0 else { return }
if !_isPOD(Key.self) {
let keys = self._keys
for bucket in _hashTable {
(keys + bucket.offset).deinitialize(count: 1)
}
}
if !_isPOD(Value.self) {
let values = self._values
for bucket in _hashTable {
(values + bucket.offset).deinitialize(count: 1)
}
}
_count = 0
_fixLifetime(self)
}
@inlinable
final internal var _keys: UnsafeMutablePointer<Key> {
@inline(__always)
get {
return self._rawKeys.assumingMemoryBound(to: Key.self)
}
}
@inlinable
final internal var _values: UnsafeMutablePointer<Value> {
@inline(__always)
get {
return self._rawValues.assumingMemoryBound(to: Value.self)
}
}
internal var asNative: _NativeDictionary<Key, Value> {
return _NativeDictionary(self)
}
#if _runtime(_ObjC)
@objc
internal required init(
objects: UnsafePointer<AnyObject?>,
forKeys: UnsafeRawPointer,
count: Int
) {
_internalInvariantFailure("This class cannot be directly initialized")
}
@objc(copyWithZone:)
internal func copy(with zone: _SwiftNSZone?) -> AnyObject {
return self
}
@objc
internal var count: Int {
return _count
}
@objc(keyEnumerator)
internal func keyEnumerator() -> _NSEnumerator {
return _SwiftDictionaryNSEnumerator<Key, Value>(asNative)
}
@objc(countByEnumeratingWithState:objects:count:)
internal func countByEnumerating(
with state: UnsafeMutablePointer<_SwiftNSFastEnumerationState>,
objects: UnsafeMutablePointer<AnyObject>?, count: Int
) -> Int {
defer { _fixLifetime(self) }
let hashTable = _hashTable
var theState = state.pointee
if theState.state == 0 {
theState.state = 1 // Arbitrary non-zero value.
theState.itemsPtr = AutoreleasingUnsafeMutablePointer(objects)
theState.mutationsPtr = _fastEnumerationStorageMutationsPtr
theState.extra.0 = CUnsignedLong(hashTable.startBucket.offset)
}
// Test 'objects' rather than 'count' because (a) this is very rare anyway,
// and (b) the optimizer should then be able to optimize away the
// unwrapping check below.
if _slowPath(objects == nil) {
return 0
}
let unmanagedObjects = _UnmanagedAnyObjectArray(objects!)
var bucket = _HashTable.Bucket(offset: Int(theState.extra.0))
let endBucket = hashTable.endBucket
_precondition(bucket == endBucket || hashTable.isOccupied(bucket),
"Invalid fast enumeration state")
var stored = 0
for i in 0..<count {
if bucket == endBucket { break }
let key = _keys[bucket.offset]
unmanagedObjects[i] = _bridgeAnythingToObjectiveC(key)
stored += 1
bucket = hashTable.occupiedBucket(after: bucket)
}
theState.extra.0 = CUnsignedLong(bucket.offset)
state.pointee = theState
return stored
}
@objc(objectForKey:)
internal func object(forKey aKey: AnyObject) -> AnyObject? {
guard let nativeKey = _conditionallyBridgeFromObjectiveC(aKey, Key.self)
else { return nil }
let (bucket, found) = asNative.find(nativeKey)
guard found else { return nil }
let value = asNative.uncheckedValue(at: bucket)
return _bridgeAnythingToObjectiveC(value)
}
@objc(getObjects:andKeys:count:)
internal func getObjects(
_ objects: UnsafeMutablePointer<AnyObject>?,
andKeys keys: UnsafeMutablePointer<AnyObject>?,
count: Int) {
_precondition(count >= 0, "Invalid count")
guard count > 0 else { return }
var i = 0 // Current position in the output buffers
switch (_UnmanagedAnyObjectArray(keys), _UnmanagedAnyObjectArray(objects)) {
case (let unmanagedKeys?, let unmanagedObjects?):
for (key, value) in asNative {
unmanagedObjects[i] = _bridgeAnythingToObjectiveC(value)
unmanagedKeys[i] = _bridgeAnythingToObjectiveC(key)
i += 1
guard i < count else { break }
}
case (let unmanagedKeys?, nil):
for (key, _) in asNative {
unmanagedKeys[i] = _bridgeAnythingToObjectiveC(key)
i += 1
guard i < count else { break }
}
case (nil, let unmanagedObjects?):
for (_, value) in asNative {
unmanagedObjects[i] = _bridgeAnythingToObjectiveC(value)
i += 1
guard i < count else { break }
}
case (nil, nil):
// Do nothing.
break
}
}
#endif
}
_DictionaryStorage是一个类,继承自__RawDictionaryStorage,遵循_NSDictionaryCore协议_DictionaryStorage使用final关键字修饰,子类不可重写
找到
__RawDictionaryStorage的定义:
@_fixed_layout
@usableFromInline
@_objc_non_lazy_realization
internal class __RawDictionaryStorage: __SwiftNativeNSDictionary {
// NOTE: The precise layout of this type is relied on in the runtime to
// provide a statically allocated empty singleton. See
// stdlib/public/stubs/GlobalObjects.cpp for details.
/// The current number of occupied entries in this dictionary.
@usableFromInline
@nonobjc
internal final var _count: Int
/// The maximum number of elements that can be inserted into this set without
/// exceeding the hash table's maximum load factor.
@usableFromInline
@nonobjc
internal final var _capacity: Int
/// The scale of this dictionary. The number of buckets is 2 raised to the
/// power of `scale`.
@usableFromInline
@nonobjc
internal final var _scale: Int8
/// The scale corresponding to the highest `reserveCapacity(_:)` call so far,
/// or 0 if there were none. This may be used later to allow removals to
/// resize storage.
///
/// FIXME: <rdar://problem/18114559> Shrink storage on deletion
@usableFromInline
@nonobjc
internal final var _reservedScale: Int8
// Currently unused, set to zero.
@nonobjc
internal final var _extra: Int16
/// A mutation count, enabling stricter index validation.
@usableFromInline
@nonobjc
internal final var _age: Int32
/// The hash seed used to hash elements in this dictionary instance.
@usableFromInline
internal final var _seed: Int
/// A raw pointer to the start of the tail-allocated hash buffer holding keys.
@usableFromInline
@nonobjc
internal final var _rawKeys: UnsafeMutableRawPointer
/// A raw pointer to the start of the tail-allocated hash buffer holding
/// values.
@usableFromInline
@nonobjc
internal final var _rawValues: UnsafeMutableRawPointer
// This type is made with allocWithTailElems, so no init is ever called.
// But we still need to have an init to satisfy the compiler.
@nonobjc
internal init(_doNotCallMe: ()) {
_internalInvariantFailure("This class cannot be directly initialized")
}
@inlinable
@nonobjc
internal final var _bucketCount: Int {
@inline(__always) get { return 1 &<< _scale }
}
@inlinable
@nonobjc
internal final var _metadata: UnsafeMutablePointer<_HashTable.Word> {
@inline(__always) get {
let address = Builtin.projectTailElems(self, _HashTable.Word.self)
return UnsafeMutablePointer(address)
}
}
// The _HashTable struct contains pointers into tail-allocated storage, so
// this is unsafe and needs `_fixLifetime` calls in the caller.
@inlinable
@nonobjc
internal final var _hashTable: _HashTable {
@inline(__always) get {
return _HashTable(words: _metadata, bucketCount: _bucketCount)
}
}
}
__RawDictionaryStorage类,定义基础数据结构,例如_count、_capacity- 其中
_scale为2的n次方数,参与计算_bucketCount_seed为哈希种子
找到
allocate的定义:
@usableFromInline
@_effects(releasenone)
static internal func allocate(capacity: Int) -> _DictionaryStorage {
let scale = _HashTable.scale(forCapacity: capacity)
return allocate(scale: scale, age: nil, seed: nil)
}
- 调用
_HashTable的scale方法,计算scale- 调用
allocate方法,传入scale
打开
HashTable.swift文件,找到_HashTable的定义:
@usableFromInline
@frozen
internal struct _HashTable {
@usableFromInline
internal typealias Word = _UnsafeBitset.Word
@usableFromInline
internal var words: UnsafeMutablePointer<Word>
@usableFromInline
internal let bucketMask: Int
@inlinable
@inline(__always)
internal init(words: UnsafeMutablePointer<Word>, bucketCount: Int) {
_internalInvariant(bucketCount > 0 && bucketCount & (bucketCount - 1) == 0,
"bucketCount must be a power of two")
self.words = words
// The bucket count is a power of two, so subtracting 1 will never overflow
// and get us a nice mask.
self.bucketMask = bucketCount &- 1
}
@inlinable
internal var bucketCount: Int {
@inline(__always) get {
return bucketMask &+ 1
}
}
@inlinable
internal var wordCount: Int {
@inline(__always) get {
return _UnsafeBitset.wordCount(forCapacity: bucketCount)
}
}
}
words可以理解为二进制位,记录当前位置是否插入元素bucketMask等于bucketCount-1,而bucketCount是2的n次方数,所以bucketMask相当于掩码_HashTable并不直接存储数据,而是存储二进制位
回到
DictionaryStorage.swift文件,找到allocate的定义:
static internal func allocate(
scale: Int8,
age: Int32?,
seed: Int?
) -> _DictionaryStorage {
// The entry count must be representable by an Int value; hence the scale's
// peculiar upper bound.
_internalInvariant(scale >= 0 && scale < Int.bitWidth - 1)
let bucketCount = (1 as Int) &<< scale
let wordCount = _UnsafeBitset.wordCount(forCapacity: bucketCount)
let storage = Builtin.allocWithTailElems_3(
_DictionaryStorage<Key, Value>.self,
wordCount._builtinWordValue, _HashTable.Word.self,
bucketCount._builtinWordValue, Key.self,
bucketCount._builtinWordValue, Value.self)
let metadataAddr = Builtin.projectTailElems(storage, _HashTable.Word.self)
let keysAddr = Builtin.getTailAddr_Word(
metadataAddr, wordCount._builtinWordValue, _HashTable.Word.self,
Key.self)
let valuesAddr = Builtin.getTailAddr_Word(
keysAddr, bucketCount._builtinWordValue, Key.self,
Value.self)
storage._count = 0
storage._capacity = _HashTable.capacity(forScale: scale)
storage._scale = scale
storage._reservedScale = 0
storage._extra = 0
if let age = age {
storage._age = age
} else {
// The default mutation count is simply a scrambled version of the storage
// address.
storage._age = Int32(
truncatingIfNeeded: ObjectIdentifier(storage).hashValue)
}
storage._seed = seed ?? _HashTable.hashSeed(for: storage, scale: scale)
storage._rawKeys = UnsafeMutableRawPointer(keysAddr)
storage._rawValues = UnsafeMutableRawPointer(valuesAddr)
// Initialize hash table metadata.
storage._hashTable.clear()
return storage
}
- 通过
&运算符计算bucketCountwordCount通过bucketCount计算要记录的二进制位allocWithTailElems_3在类的后面分配连续的内存空间,分别存储Key和Value
Dictionary内存布局
Dictionary包含DictionaryStorage类DictionaryStorage类包含metaData、refCount、_count、_capacity- 中间
64位存储_scale、_reservedScale、_extra、_age- 接下来存储的
_seed、_rawKeys、_rawValues、metaAddr- 其中
_rawKeys、_rawValues存储的是数组地址
通过
LLDB调试来验证⼀下:通过字⾯量方式创建⼀个字典:
var dic = ["1": "Kody", "2": "Hank", "3": "Cooci", "4" : "Cat"]通过
withUnsafePointer打印dic内存地址
通过
x/8g查看dic的内存地址,里面包含了一个堆上的地址0x000000010067aed0
通过
x/8g查看内存地址0x000000010067aed0
0x00007fff99540800:metaData元类型0x0000000000000002:refCoutn引用计数0x0000000000000004:_count0x0000000000000006:_capacity0xcf1ba4a800000003:64位连续内存,包含_scale、_reservedScale、_extra、_age0x000000010067aed0:_seed种子,以对象创建的地址作为随机数的开始0x000000010067af18:_rawKeys0x000000010067af98:_rawValues通过
x/16g查看_rawKeys内存地址
- 连续内存中并不是连续存储的,
0x0000000000000032转为十进制是50,对应的是key值2的ASCII码- 后面的
0xe100000000000000表示key值2的字符串长度- 同理后面的
0x0000000000000033、0x0000000000000031、0x0000000000000034分别对应的key值是3、1、4通过
x/16g查看_rawValues内存地址
- 和
_rawKeys同理,前面的0x000000006b6e6148、0x00000069636f6f43、0x0000000079646f4b、0x0000000000746143表示value值- 后面的
0xe400000000000000、0xe500000000000000、0xe400000000000000、0xe300000000000000表示value值的字符串长度
Dictionary插⼊⼀个值
var dic = ["1": "Kody", "2": "Hank", "3": "Cooci", "4" : "Cat"]
dic["5"] = "Swift"
打开
Dictionary.swift文件,找到subscript的定义:
@inlinable
public subscript(
key: Key, default defaultValue: @autoclosure () -> Value
) -> Value {
@inline(__always)
get {
return _variant.lookup(key) ?? defaultValue()
}
@inline(__always)
_modify {
let (bucket, found) = _variant.mutatingFind(key)
let native = _variant.asNative
if !found {
let value = defaultValue()
native._insert(at: bucket, key: key, value: value)
}
let address = native._values + bucket.offset
defer { _fixLifetime(self) }
yield &address.pointee
}
}
get方法里,_variant是一个关联值的枚举类型- 通过
_variant的lookup方法,找到key值是否存在
打开
NativeDictionary.swift文件,找到lookup的定义:
@inlinable
@inline(__always)
func lookup(_ key: Key) -> Value? {
if count == 0 {
// Fast path that avoids computing the hash of the key.
return nil
}
let (bucket, found) = self.find(key)
guard found else { return nil }
return self.uncheckedValue(at: bucket)
}
- 通过
find方法,传入key,找到bucket和found
找到
find的定义:
@inlinable
@inline(__always)
internal func find(_ key: Key) -> (bucket: Bucket, found: Bool) {
return _storage.find(key)
}
调用
_storage的find方法,传入key值
打开
DictionaryStorage.swift文件,找到find的定义:
@_alwaysEmitIntoClient
@inline(never)
internal final func find<Key: Hashable>(_ key: Key) -> (bucket: _HashTable.Bucket, found: Bool) {
return find(key, hashValue: key._rawHashValue(seed: _seed))
}
- 调用
key的_rawHashValue方法,传入_seed种子- 本质上就是通过
key值得到hashValue- 调用
find方法,传入key和hashValue
找到
find(_ key:, hashValue:)的定义:
@_alwaysEmitIntoClient
@inline(never)
internal final func find<Key: Hashable>(_ key: Key, hashValue: Int) -> (bucket: _HashTable.Bucket, found: Bool) {
let hashTable = _hashTable
var bucket = hashTable.idealBucket(forHashValue: hashValue)
while hashTable._isOccupied(bucket) {
if uncheckedKey(at: bucket) == key {
return (bucket, true)
}
bucket = hashTable.bucket(wrappedAfter: bucket)
}
return (bucket, false)
}
}
- 调用
hashTable的idealBucket方法,传入hashValue,返回bucket- 通过
_isOccupied方法判断bucket是否被占用
打开
HashTable.swift文件,找到idealBucket的定义:
@inlinable
@inline(__always)
internal func idealBucket(forHashValue hashValue: Int) -> Bucket {
return Bucket(offset: hashValue & bucketMask)
}
hashValue和bucketMask进行&运算,计算index保存到bucket中
找到
_isOccupied的定义:
@inlinable
@inline(__always)
internal func _isOccupied(_ bucket: Bucket) -> Bool {
_internalInvariant(isValid(bucket))
return words[bucket.word].uncheckedContains(bucket.bit)
}
- 与原有的二进制位进行计算,使用
uncheckedContains返回0或1,查看是否包含当前bucket
找到
bucket(wrappedAfter bucket:)的定义:
@inlinable
@inline(__always)
internal func bucket(wrappedAfter bucket: Bucket) -> Bucket {
// The bucket is less than bucketCount, which is power of two less than
// Int.max. Therefore adding 1 does not overflow.
return Bucket(offset: (bucket.offset &+ 1) & bucketMask)
}
bucket.offset进行+1操作,再和bucketMask进行&运算,然后重新构建bucket
打开
NativeDictionary.swift文件,找到uncheckedValue的定义:
@inlinable
@inline(__always)
internal func uncheckedValue(at bucket: Bucket) -> Value {
defer { _fixLifetime(self) }
_internalInvariant(hashTable.isOccupied(bucket))
return _values[bucket.offset]
}
_values是连续存储的数组,直接通过bucket.offset作为index获取指定位置的元素
找到
setValue的定义:
@inlinable
internal mutating func setValue(
_ value: __owned Value,
forKey key: Key,
isUnique: Bool
) {
let (bucket, found) = mutatingFind(key, isUnique: isUnique)
if found {
(_values + bucket.offset).pointee = value
} else {
_insert(at: bucket, key: key, value: value)
}
}
- 通过
mutatingFind找到bucket和found- 如果存在,使用
_values加上bucket.offset,将value覆盖- 如果不存在,使用
_insert方法,在当前bucket位置插入key和value
Dictionary插⼊⼀个值的过程:
- 通过
key找到key. hashValue- 通过
key. hashValue和bucketMask进行&运算,计算出index- 通过
index查找是否有对应key值,如果有采用开放定址法查找下一个bucket是否为空,如果为空返回对应元素- 对于
setValue也是一样,通过key值先查找,再赋值
使用Swift代码实现简单的HashTable
struct HashTable<Key: Hashable, Value> {
typealias Element = (key: Key, value: Value)
typealias Bucket = [Element]
var buckets: [Bucket]
var count = 0
init(capacity: Int){
buckets = Array<Bucket>(repeating: [Element](), count: capacity)
}
func index(key: Key) -> Int {
return abs(key.hashValue) % buckets.count
}
subscript(key: Key) -> Value? {
get {
return getValue(key: key)
}
set {
if let value = newValue {
updateValue(value, key)
}
else{
removeValue(key)
}
}
}
func getValue(key: Key) -> Value? {
let index = self.index(key: key)
for element in buckets[index] {
if(element.key == key){
return element.value
}
}
return nil
}
mutating func updateValue(_ value: Value, _ key: Key) -> Value? {
let index = self.index(key: key)
for (i, element) in buckets[index].enumerated() {
if(element.key == key){
let originValue = element.value
buckets[index][i].value = value
return originValue
}
}
buckets[index].append((key: key, value: value))
count += 1
return nil
}
mutating func removeValue(_ key: Key) {
let index = self.index(key: key)
for (i, element) in buckets[index].enumerated() {
if(element.key == key){
buckets[index].remove(at: i)
count -= 1
}
}
}
}
var ht = HashTable<String, String>(capacity: 5)
ht["1"] = "Kody"
print(ht["1"])
//输出以下内容:
//Optional("Kody")






