C++基础 - 实现 Native 层的 ArrayList

ArrayList 我们在 java 中再熟悉不过了，记得自己在学习 Collection 体系的时候，用得最多的也就是 ArrayList 。几乎很少用到 Stack 和 LinkedList ，反正只要能用就行，所以刚开始并未过多去了解。但是当我们真正了解了其内部实现算法后，在写代码的时候我们就会根据业务逻辑，有意识做一些思考了。还有就是在面试的时候，我们也经常会碰到类似的问题。

一.ArrayList 源码分析

    // 默认情况下，数组的初始化大小
    private static final int DEFAULT_CAPACITY = 10;

    // 空数组
    private static final Object[] EMPTY_ELEMENTDATA = {};

    // 空数组
    private static final Object[] DEFAULTCAPACITY_EMPTY_ELEMENTDATA = {};

    // 数据
    transient Object[] elementData;

    // 数据大小
    private int size;

    // 给数组指定初始化大小
    public ArrayList(int initialCapacity) {
        if (initialCapacity > 0) {
            // 创建数组
            this.elementData = new Object[initialCapacity];
        } else if (initialCapacity == 0) {
            this.elementData = EMPTY_ELEMENTDATA;
        } else {
            throw new IllegalArgumentException("Illegal Capacity: "+
                                               initialCapacity);
        }
    }

    public boolean add(E e) {
        // 判断是否需要扩容
        ensureCapacityInternal(size + 1);  // Increments modCount!!
        elementData[size++] = e;
        return true;
    }

    private void ensureCapacityInternal(int minCapacity) {
        if (elementData == DEFAULTCAPACITY_EMPTY_ELEMENTDATA) {
            // 如果数组为空
            minCapacity = Math.max(DEFAULT_CAPACITY, minCapacity);
        }
        
        ensureExplicitCapacity(minCapacity);
    }

    private void ensureExplicitCapacity(int minCapacity) {
        modCount++;

        // 如果超出当前数组长度，需要扩容
        if (minCapacity - elementData.length > 0)
            grow(minCapacity);
    }

    private void grow(int minCapacity) {
        // overflow-conscious code
        // 原来数组的大小
        int oldCapacity = elementData.length;
        // 默认情况下扩充为原来的一半
        int newCapacity = oldCapacity + (oldCapacity >> 1);
        if (newCapacity - minCapacity < 0)
            newCapacity = minCapacity;
        if (newCapacity - MAX_ARRAY_SIZE > 0)
            newCapacity = hugeCapacity(minCapacity);
        // minCapacity is usually close to size, so this is a win:
        // 创建一个新数组并把原来数组里的内容拷贝到新数组中
        elementData = Arrays.copyOf(elementData, newCapacity);
    }

    public static <T> T[] copyOf(T[] original, int newLength) {
        return (T[]) copyOf(original, newLength, original.getClass());
    }
    
    public static <T,U> T[] copyOf(U[] original, int newLength, Class<? extends T[]> newType) {
        @SuppressWarnings("unchecked")
        T[] copy = ((Object)newType == (Object)Object[].class)
            ? (T[]) new Object[newLength]
            : (T[]) Array.newInstance(newType.getComponentType(), newLength);
        System.arraycopy(original, 0, copy, 0,
                         Math.min(original.length, newLength));
        return copy;
    }

    public E remove(int index) {
        // 是否越界
        if (index >= size)
            throw new IndexOutOfBoundsException(outOfBoundsMsg(index));

        modCount++;
        // 获取原来的位置
        E oldValue = (E) elementData[index];
        // 如果不是最后一个，后面的需要往前面逻
        int numMoved = size - index - 1;
        if (numMoved > 0)
            System.arraycopy(elementData, index+1, elementData, index,
                             numMoved);
        // 解除掉对象的 GC root 引用
        elementData[--size] = null; 

        return oldValue;
    }
    
    // 通过 native 层去拷贝代码
    // src ：原来的数组
    // srcPos：原来数组的开始位置
    // dest：新的数组
    // destPos：新数组的开始位置
    // length：拷贝多少个
    public static native void arraycopy(Object src,  int  srcPos,
                                        Object dest, int destPos,
                                        int length);

通过上面的代码来分析，ArrayList 其内部的实现方式其实就是数组，如果没指定数组的大小，那么在第一次添加数据的时候，数组的初始大小是 10 ，每次当不够用的时候默认会扩充原来数组的 1/2 ，每次扩充数组大小都会涉及到创建新数组和数据的拷贝复制。而数组的拷贝和逻动都是由我们的 native 层代码实现，可是为什么不直接用 java 代码写呢？接下来我们去看下 native 层的实现。

二.实现 Native 层的 ArrayList

#ifndef MYAPPLICATION_ARRAYLIST_H
#define MYAPPLICATION_ARRAYLIST_H

#include <malloc.h>
//------------------类的定义-------------------//

template<class E>
class ArrayList {
public:
    // 数组头指针
    E *array = NULL;
    // 数组长度
    int len = 0;
    // 数据大小
    int index = 0;
public:
    ArrayList();

    ArrayList(int len);

    ~ArrayList();

    ArrayList(const ArrayList &list);

public:
    bool add(E e);

    int size();

    E get(int index);

    E remove(int index);

private:
    void ensureCapacityInternal(int i);

    void grow(int capacity);
};

//------------------类的实现-------------------//
template<class E>
ArrayList<E>::ArrayList() {

}

template<class E>
ArrayList<E>::ArrayList(int len) {
    if (len == 0) {
        return;
    }
    this->len = len;
    this->array = (E *) malloc(sizeof(E) * len);
}

template<class E>
ArrayList<E>::~ArrayList() {
    if (this->array) {
        free(this->array);
        this->array = NULL;
    }
}

template<class E>
ArrayList<E>::ArrayList(const ArrayList &list) {
    this->index = list.index;
    this->len = list.len;
    // 深拷贝
    this->array = (E *) malloc(sizeof(E) * len);
    memcpy(this->array,list.array,sizeof(E) * len);
}

template<class E>
E ArrayList<E>::get(int index) {
    return this->array[index];
}

template<class E>
int ArrayList<E>::size() {
    return this->index;
}

template<class E>
E ArrayList<E>::remove(int index) {
    E old_value = this->array[index];
    // 计算出需要逻动的个数
    int numMoved = this->index - index - 1;

    // 从前面不断的逻动
    for (int i = 0; i < numMoved; ++i) {
        array[index + i] = array[index + i + 1];
    }

    this->index -= 1;
    return old_value;
}

template<class E>
bool ArrayList<E>::add(E e) {
    ensureCapacityInternal(index + 1);
    this->array[index++] = e;
    return true;
}

// 是否需要调整当前数组大小
template<class E>
void ArrayList<E>::ensureCapacityInternal(int minCapacity) {
    // 当前数组是不是空，或者 len 是不是 0
    if (this->array == NULL) {
        minCapacity = 10;// 第一次初始化大小
    }

    // 判断要不要扩容
    if (minCapacity - len > 0) {
        grow(minCapacity);
    }
}

// 扩容创建新的数组
template<class E>
void ArrayList<E>::grow(int capacity) {
    // 计算新数组大小的长度
    int new_len = len + (len >> 1);

    if (capacity - new_len > 0) {
        new_len = capacity;
    }

    // 创建新的数组
    E *new_arr = (E *) malloc(sizeof(E) * new_len);

    if (this->array) {
        // 拷贝数据
        memcpy(new_arr, array, sizeof(E) * index);
        // 释放原来的内存
        free(this->array);
    }

    array = new_arr;
    len = new_len;
}

#endif //MYAPPLICATION_ARRAYLIST_H

三.System.arraycopy 源代码分析

java 中 ArrayList 数组的拷贝是通过 native 层去实现的，我看的是 jdk 1.8 的源码，如果想进一步了解其 native 层的实现，我们需要下载 jdk 1.8 的源码。

打开openjdk\hotspot\src\share\vm\prims\jvm.cpp可以看到一个方法JVM_ArrayCopy，但是该方法没有真正实现复制的代码，而是简单的检测源数组和目的数组是否为空，排除一些异常情况，方法都比较简单，我们只要顺着往下走就行了。

/* 
java.lang.System中的arraycopy方法 
*/  
JVM_ENTRY(void, JVM_ArrayCopy(JNIEnv *env, jclass ignored, jobject src, jint src_pos,  
                               jobject dst, jint dst_pos, jint length))  

  // 检查源数组和目的数组不为空  
  if (src == NULL || dst == NULL) {  
    THROW(vmSymbols::java_lang_NullPointerException());  
  }  
   // 进行解析转换
  arrayOop s = arrayOop(JNIHandles::resolve_non_null(src));  
  arrayOop d = arrayOop(JNIHandles::resolve_non_null(dst));  
  assert(s->is_oop(), "JVM_ArrayCopy: src not an oop");  
  assert(d->is_oop(), "JVM_ArrayCopy: dst not an oop");  

  //真正调用复制的方法  
  s->klass()->copy_array(s, src_pos, d, dst_pos, length, thread);  
}

/* 
java.lang.System中的arraycopy方法具体实现 
*/  
void ObjArrayKlass::copy_array(arrayOop s, int src_pos, arrayOop d,  
                               int dst_pos, int length, TRAPS) {  
  //检测s是数组  
  assert(s->is_objArray(), "must be obj array");  
  
  //目的数组不是数组对象的话，则抛出ArrayStoreException异常  
  if (!d->is_objArray()) {  
    THROW(vmSymbols::java_lang_ArrayStoreException());  
  }  
  
  // Check is all offsets and lengths are non negative  
  //检测下标参数非负  
  if (src_pos < 0 || dst_pos < 0 || length < 0) {  
    THROW(vmSymbols::java_lang_ArrayIndexOutOfBoundsException());  
  }  
  // Check if the ranges are valid  
  //检测下标参数是否越界  
  if  ( (((unsigned int) length + (unsigned int) src_pos) > (unsigned int) s->length())  
     || (((unsigned int) length + (unsigned int) dst_pos) > (unsigned int) d->length()) ) {  
    THROW(vmSymbols::java_lang_ArrayIndexOutOfBoundsException());  
  }  
  
  // Special case. Boundary cases must be checked first  
  // This allows the following call: copy_array(s, s.length(), d.length(), 0).  
  // This is correct, since the position is supposed to be an 'in between point', i.e., s.length(),  
  // points to the right of the last element.  
  //length==0则不需要复制  
  if (length==0) {  
    return;  
  }  
  //UseCompressedOops只是用来区分narrowOop和oop，具体2者有啥区别需要再研究  
  //调用do_copy函数来复制  
  if (UseCompressedOops) {  
    narrowOop* const src = objArrayOop(s)->obj_at_addr<narrowOop>(src_pos);  
    narrowOop* const dst = objArrayOop(d)->obj_at_addr<narrowOop>(dst_pos);  
    do_copy<narrowOop>(s, src, d, dst, length, CHECK);  
  } else {  
    oop* const src = objArrayOop(s)->obj_at_addr<oop>(src_pos);  
    oop* const dst = objArrayOop(d)->obj_at_addr<oop>(dst_pos);  
    do_copy<oop> (s, src, d, dst, length, CHECK);  
  }  
}  

// Either oop or narrowOop depending on UseCompressedOops.  
template <class T> void ObjArrayKlass::do_copy(arrayOop s, T* src,  
                               arrayOop d, T* dst, int length, TRAPS) {  
  
  BarrierSet* bs = Universe::heap()->barrier_set();  
  // For performance reasons, we assume we are that the write barrier we  
  // are using has optimized modes for arrays of references.  At least one  
  // of the asserts below will fail if this is not the case.  
  assert(bs->has_write_ref_array_opt(), "Barrier set must have ref array opt");  
  assert(bs->has_write_ref_array_pre_opt(), "For pre-barrier as well.");  
  
  if (s == d) {  
    // since source and destination are equal we do not need conversion checks.  
    assert(length > 0, "sanity check");  
    bs->write_ref_array_pre(dst, length);  
    //复制的函数  
    Copy::conjoint_oops_atomic(src, dst, length);  
  } else {  
    // We have to make sure all elements conform to the destination array  
    Klass* bound = ObjArrayKlass::cast(d->klass())->element_klass();  
    Klass* stype = ObjArrayKlass::cast(s->klass())->element_klass();  
    if (stype == bound || stype->is_subtype_of(bound)) {  
      // elements are guaranteed to be subtypes, so no check necessary  
      //stype对象是bound，或者stype是bound的子类抑或stype实现bound接口  
      bs->write_ref_array_pre(dst, length);  
      Copy::conjoint_oops_atomic(src, dst, length);  
    } else {  
      // slow case: need individual subtype checks  
      // note: don't use obj_at_put below because it includes a redundant store check  
      T* from = src;  
      T* end = from + length;  
      for (T* p = dst; from < end; from++, p++) {  
        // XXX this is going to be slow.  
        T element = *from;  
        // even slower now  
        bool element_is_null = oopDesc::is_null(element);  
        oop new_val = element_is_null ? oop(NULL)  
                                      : oopDesc::decode_heap_oop_not_null(element);  
        if (element_is_null ||  
            (new_val->klass())->is_subtype_of(bound)) {  
          bs->write_ref_field_pre(p, new_val);  
          *p = element;  
        } else {  
          // We must do a barrier to cover the partial copy.  
          const size_t pd = pointer_delta(p, dst, (size_t)heapOopSize);  
          // pointer delta is scaled to number of elements (length field in  
          // objArrayOop) which we assume is 32 bit.  
          assert(pd == (size_t)(int)pd, "length field overflow");  
          bs->write_ref_array((HeapWord*)dst, pd);  
          THROW(vmSymbols::java_lang_ArrayStoreException());  
          return;  
        }  
      }  
    }  
  }  
  bs->write_ref_array((HeapWord*)dst, length);  
}  

// oops, conjoint, atomic on each oop  
static void conjoint_oops_atomic(oop* from, oop* to, size_t count) {  
  assert_params_ok(from, to, LogBytesPerHeapOop);  
  pd_conjoint_oops_atomic(from, to, count);  
}

//检测是否是k的子类，或者是实现k接口  
bool is_subtype_of(Klass* k) const {  
  juint    off = k->super_check_offset();  
  Klass* sup = *(Klass**)( (address)this + off );  
  const juint secondary_offset = in_bytes(secondary_super_cache_offset());  
  if (sup == k) {  
    return true;  
  } else if (off != secondary_offset) {  
    return false;  
  } else {  
    return search_secondary_supers(k);  
  }  
}  

static void pd_conjoint_oops_atomic(oop* from, oop* to, size_t count) {  
  // Do better than this: inline memmove body  NEEDS CLEANUP  
  if (from > to) {  
    while (count-- > 0) {  
      // Copy forwards  
      *to++ = *from++;  
    }  
  } else {  
    from += count - 1;  
    to   += count - 1;  
    while (count-- > 0) {  
      // Copy backwards  
      *to-- = *from--;  
    }  
  }  
}

视频地址：https://pan.baidu.com/s/1A-1pG6IwrtR8WrxpZ75gyw
视频密码：acw5