HashMap

我们通常使用的dict、map基本都是hashmap,表现形式就是key-value键值对的形式，如下图：

我们可以使用链表或者数组的方式实现这个功能，但是他们各自有各自的的优缺点
数组：寻址容易，但是增加删除太麻烦，每次都要整体移动很多数据
链表：增减删除简单，只需要改变next指针的指向，但是寻址麻烦

所以hashmap采用的是数组+链表的方式：

HashMap.png

实现原理：

通过开辟一定数量的数组空间bucket，数组里的指针指向一个链表，当输入key时，通过计算hashCode,然后对数组空间数量取模，来获取数组bucket的index:即index = key.hash % capacity; 其中capacity就是数组的数量（如上图的数量是10）。为什么bucket里的指针指向的是一个链表呢?是因为两个key的hash值在取模之后有可能是一样的。这种情况就将bucketIndex一样的key，放在同一个链表上。

talk is cheap ,show me the code

struct Node{
    char * key; 
    void * value;
    struct Node * next; //指向下一个Node地址或者null
};

struct HashMap{
    //容量 (是容量，指的是bucketArr数组的的数量，不是key-value的数量)
    int capacity;
    //bucket array
    struct Node ** bucketArr;
    //添加key-value
    void(*add)(struct Node ** bucketArr,int capacity, char * key,void * value);
    //移除key-value
    void(*remove)(struct Node ** bucketArr,int capacity,char * key);
    //根据key获取value的值
    void *(*get)(struct Node ** bucketArr,int capacity,char * key);
};

//根据key,通过hash，然后计算出index
int getBucketIndex(char * key,int capacity){
    unsigned long hashCode = [NSString stringWithFormat:@"%s",key].hash;
    return hashCode%capacity;
}

//添加bucket的index指向的链表中
void addKeyValue(struct Node ** bucketArr,int capacity, char * key,void * value){
    if(!key)return;
    int index = getBucketIndex(key, capacity);
    struct Node * head = bucketArr[index];
   //循环遍历Node
    while (head!=NULL) {
        //如果链表中的Node存在相同key,直接替换value
        if(!strcmp(head->key, key)){
            head->next = value;
            return;
        }
        head = head->next;
    }
 
    //生成新node
    struct Node * newNode = malloc(sizeof(struct Node));
    newNode->key = key;
    newNode->value = value;
    newNode->next = NULL;
    //链表为空时
    if(head==NULL){
        bucketArr[index] = newNode;
    }else{
        //链表不为空时，将新的node的next指向原来的head,然后再将指向新node 地址的指针保存到bucket中。
        newNode->next = bucketArr[index];;
        bucketArr[index] = newNode;
    }
    
    //扩容(根据实际情况自定义，当keyvalue数量过多，增加capaity)
    //TODO 暂时就不实现了
}

//移除bucket的index指向的链表中的特定key
void removeKey(struct Node ** bucketArr,int capacity,char * key){
    if(!key)return;
    int index = getBucketIndex(key, capacity);
    struct Node * head = bucketArr[index];
    struct Node * pre = NULL;
    struct Node * removeNode = NULL;
    //查找bucketIndex指向的链表中是否有对应的key
     while (head!=NULL) {
         if(!strcmp(head->key, key)){
             removeNode = head;
             break;
         }
         //往后查找
         pre = head;
         head = head->next;
     }
    
    //说明链表中没有对应的要删除的Node
    if(removeNode==NULL){
        return;
    }
    
    //说明不是链表头结点
    if(pre!=NULL){
        pre->next = head->next;
    }
    //链表头结点
    else{
        bucketArr[index] = head->next;
    }
}

//根据key获取value的值
void * getKeyValue(struct Node ** bucketArr,int capacity,char * key){
    if(!key) return NULL;
    int index = getBucketIndex(key, capacity);
    struct Node * head = bucketArr[index];
    while (head!=NULL) {
        if(!strcmp(head->key, key)){
            return head->value;
            break;
        }
        head = head->next;
    }
    return NULL;
}


int main(int argc,char *argv[]){
    struct HashMap * hashMap = malloc(sizeof(struct HashMap));
    hashMap->capacity = 10;
    struct Node * nodeArr[10] = {0};
    hashMap->bucketArr = nodeArr;
    hashMap->add = addKeyValue;
    hashMap->remove = removeKey;
    hashMap->get = getKeyValue;
    //验证
    char * key1 = "姓名1";
    char * key2 = "姓名2";
    char * key3 = "姓名3";
    hashMap->add(hashMap->bucketArr, hashMap->capacity, key1, @"王二牛");
    hashMap->add(hashMap->bucketArr, hashMap->capacity, key2, @"王大牛");
    hashMap->add(hashMap->bucketArr, hashMap->capacity, key3, @"王老牛");
    
    char * keys[3] = {key1,key2,key3};
    for(int i=0;i<3;i++){
        NSString * value = (__bridge NSString *)hashMap->get(hashMap->bucketArr,hashMap->capacity,keys[i]);
        NSLog(@"%s:%@",keys[i],value);
    }
/*
2020-09-14 08:32:07.696646+0800 Test[11948:327436] 姓名1:王二牛
2020-09-14 08:32:07.698029+0800 Test[11948:327436] 姓名2:王大牛
2020-09-14 08:32:07.698129+0800 Test[11948:327436] 姓名3:王老牛
能验证get set功能
*/    

    hashMap->remove(hashMap->bucketArr, hashMap->capacity, key1);
    for(int i=0;i<3;i++){
        NSString * value = (__bridge NSString *)hashMap->get(hashMap->bucketArr,hashMap->capacity,keys[i]);
        NSLog(@"%s:%@",keys[i],value);
    }
    
/*
2020-09-14 08:32:07.698210+0800 Test[11948:327436] 姓名1:(null)
2020-09-14 08:32:07.698289+0800 Test[11948:327436] 姓名2:王大牛
2020-09-14 08:32:07.698364+0800 Test[11948:327436] 姓名3:王老牛
能验证remove set功能
*/

//TODO: free object 释放堆上创建的对象

    return 1;
}

这里说一下小插曲，add方法中的struct Node * newNode = malloc(sizeof(struct Node))新生成一个对象这个地方，我调用两次add方法，每次返回的Node的地址都是一样，导致后面add的key-value会将前面的值覆盖掉。应该是方法栈执行完后，栈对象被回收，导致每次调用alloc返回的都是同一个地址。后来改用malloc在堆上开辟空间，这样，就得手动调用free来释放对象。

总结：

HashMap是通过数组+链表的方式实现快速的增加、删除、获取等方法
增加：通过key的hash值对bucket数量取模，来获取index,进而获取单向链表，判断当链表中没有Node的key与新增的key相同，如果有则直接替换value。如果没有，则创建一个新的Node,新Node的next指向原来链表的Head，并将bucket的index指向新的Node的地址。
删除：通过key的hash值对bucket数量取模，来获取index,进而获取单向链表。然后遍历链表，遍历Node没有相同key，则直接退出，如果有Node的key相等，若是头结点则直接直接将bucketIndex指向head->next，若不是头结点，则将pre->next指向head->next来实现删除node结点。
获取：找到bucketIndex指向的链表，然后遍历链表，根据key找到对应的Node,然后返回value值。找不到返回null。
通过上面的实现，我们自然而然的也就理解了为什么hashmap是无序的了。