ArrayList 我们在 java 中再熟悉不过了,记得自己在学习 Collection 体系的时候,用得最多的也就是 ArrayList 。几乎很少用到 Stack 和 LinkedList ,反正只要能用就行,所以刚开始并未过多去了解。但是当我们真正了解了其内部实现算法后,在写代码的时候我们就会根据业务逻辑,有意识做一些思考了。还有就是在面试的时候,我们也经常会碰到类似的问题。
一.ArrayList 源码分析
// 默认情况下,数组的初始化大小
private static final int DEFAULT_CAPACITY = 10;
// 空数组
private static final Object[] EMPTY_ELEMENTDATA = {};
// 空数组
private static final Object[] DEFAULTCAPACITY_EMPTY_ELEMENTDATA = {};
// 数据
transient Object[] elementData;
// 数据大小
private int size;
// 给数组指定初始化大小
public ArrayList(int initialCapacity) {
if (initialCapacity > 0) {
// 创建数组
this.elementData = new Object[initialCapacity];
} else if (initialCapacity == 0) {
this.elementData = EMPTY_ELEMENTDATA;
} else {
throw new IllegalArgumentException("Illegal Capacity: "+
initialCapacity);
}
}
public boolean add(E e) {
// 判断是否需要扩容
ensureCapacityInternal(size + 1); // Increments modCount!!
elementData[size++] = e;
return true;
}
private void ensureCapacityInternal(int minCapacity) {
if (elementData == DEFAULTCAPACITY_EMPTY_ELEMENTDATA) {
// 如果数组为空
minCapacity = Math.max(DEFAULT_CAPACITY, minCapacity);
}
ensureExplicitCapacity(minCapacity);
}
private void ensureExplicitCapacity(int minCapacity) {
modCount++;
// 如果超出当前数组长度,需要扩容
if (minCapacity - elementData.length > 0)
grow(minCapacity);
}
private void grow(int minCapacity) {
// overflow-conscious code
// 原来数组的大小
int oldCapacity = elementData.length;
// 默认情况下扩充为原来的一半
int newCapacity = oldCapacity + (oldCapacity >> 1);
if (newCapacity - minCapacity < 0)
newCapacity = minCapacity;
if (newCapacity - MAX_ARRAY_SIZE > 0)
newCapacity = hugeCapacity(minCapacity);
// minCapacity is usually close to size, so this is a win:
// 创建一个新数组并把原来数组里的内容拷贝到新数组中
elementData = Arrays.copyOf(elementData, newCapacity);
}
public static <T> T[] copyOf(T[] original, int newLength) {
return (T[]) copyOf(original, newLength, original.getClass());
}
public static <T,U> T[] copyOf(U[] original, int newLength, Class<? extends T[]> newType) {
@SuppressWarnings("unchecked")
T[] copy = ((Object)newType == (Object)Object[].class)
? (T[]) new Object[newLength]
: (T[]) Array.newInstance(newType.getComponentType(), newLength);
System.arraycopy(original, 0, copy, 0,
Math.min(original.length, newLength));
return copy;
}
public E remove(int index) {
// 是否越界
if (index >= size)
throw new IndexOutOfBoundsException(outOfBoundsMsg(index));
modCount++;
// 获取原来的位置
E oldValue = (E) elementData[index];
// 如果不是最后一个,后面的需要往前面逻
int numMoved = size - index - 1;
if (numMoved > 0)
System.arraycopy(elementData, index+1, elementData, index,
numMoved);
// 解除掉对象的 GC root 引用
elementData[--size] = null;
return oldValue;
}
// 通过 native 层去拷贝代码
// src :原来的数组
// srcPos:原来数组的开始位置
// dest:新的数组
// destPos:新数组的开始位置
// length:拷贝多少个
public static native void arraycopy(Object src, int srcPos,
Object dest, int destPos,
int length);
通过上面的代码来分析,ArrayList 其内部的实现方式其实就是数组,如果没指定数组的大小,那么在第一次添加数据的时候,数组的初始大小是 10 ,每次当不够用的时候默认会扩充原来数组的 1/2 ,每次扩充数组大小都会涉及到创建新数组和数据的拷贝复制。而数组的拷贝和逻动都是由我们的 native 层代码实现,可是为什么不直接用 java 代码写呢?接下来我们去看下 native 层的实现。
二.实现 Native 层的 ArrayList
#ifndef MYAPPLICATION_ARRAYLIST_H
#define MYAPPLICATION_ARRAYLIST_H
#include <malloc.h>
//------------------类的定义-------------------//
template<class E>
class ArrayList {
public:
// 数组头指针
E *array = NULL;
// 数组长度
int len = 0;
// 数据大小
int index = 0;
public:
ArrayList();
ArrayList(int len);
~ArrayList();
ArrayList(const ArrayList &list);
public:
bool add(E e);
int size();
E get(int index);
E remove(int index);
private:
void ensureCapacityInternal(int i);
void grow(int capacity);
};
//------------------类的实现-------------------//
template<class E>
ArrayList<E>::ArrayList() {
}
template<class E>
ArrayList<E>::ArrayList(int len) {
if (len == 0) {
return;
}
this->len = len;
this->array = (E *) malloc(sizeof(E) * len);
}
template<class E>
ArrayList<E>::~ArrayList() {
if (this->array) {
free(this->array);
this->array = NULL;
}
}
template<class E>
ArrayList<E>::ArrayList(const ArrayList &list) {
this->index = list.index;
this->len = list.len;
// 深拷贝
this->array = (E *) malloc(sizeof(E) * len);
memcpy(this->array,list.array,sizeof(E) * len);
}
template<class E>
E ArrayList<E>::get(int index) {
return this->array[index];
}
template<class E>
int ArrayList<E>::size() {
return this->index;
}
template<class E>
E ArrayList<E>::remove(int index) {
E old_value = this->array[index];
// 计算出需要逻动的个数
int numMoved = this->index - index - 1;
// 从前面不断的逻动
for (int i = 0; i < numMoved; ++i) {
array[index + i] = array[index + i + 1];
}
this->index -= 1;
return old_value;
}
template<class E>
bool ArrayList<E>::add(E e) {
ensureCapacityInternal(index + 1);
this->array[index++] = e;
return true;
}
// 是否需要调整当前数组大小
template<class E>
void ArrayList<E>::ensureCapacityInternal(int minCapacity) {
// 当前数组是不是空,或者 len 是不是 0
if (this->array == NULL) {
minCapacity = 10;// 第一次初始化大小
}
// 判断要不要扩容
if (minCapacity - len > 0) {
grow(minCapacity);
}
}
// 扩容创建新的数组
template<class E>
void ArrayList<E>::grow(int capacity) {
// 计算新数组大小的长度
int new_len = len + (len >> 1);
if (capacity - new_len > 0) {
new_len = capacity;
}
// 创建新的数组
E *new_arr = (E *) malloc(sizeof(E) * new_len);
if (this->array) {
// 拷贝数据
memcpy(new_arr, array, sizeof(E) * index);
// 释放原来的内存
free(this->array);
}
array = new_arr;
len = new_len;
}
#endif //MYAPPLICATION_ARRAYLIST_H
三.System.arraycopy 源代码分析
java 中 ArrayList 数组的拷贝是通过 native 层去实现的,我看的是 jdk 1.8 的源码,如果想进一步了解其 native 层的实现,我们需要下载 jdk 1.8 的源码。
打开openjdk\hotspot\src\share\vm\prims\jvm.cpp可以看到一个方法JVM_ArrayCopy,但是该方法没有真正实现复制的代码,而是简单的检测源数组和目的数组是否为空,排除一些异常情况,方法都比较简单,我们只要顺着往下走就行了。
/*
java.lang.System中的arraycopy方法
*/
JVM_ENTRY(void, JVM_ArrayCopy(JNIEnv *env, jclass ignored, jobject src, jint src_pos,
jobject dst, jint dst_pos, jint length))
// 检查源数组和目的数组不为空
if (src == NULL || dst == NULL) {
THROW(vmSymbols::java_lang_NullPointerException());
}
// 进行解析转换
arrayOop s = arrayOop(JNIHandles::resolve_non_null(src));
arrayOop d = arrayOop(JNIHandles::resolve_non_null(dst));
assert(s->is_oop(), "JVM_ArrayCopy: src not an oop");
assert(d->is_oop(), "JVM_ArrayCopy: dst not an oop");
//真正调用复制的方法
s->klass()->copy_array(s, src_pos, d, dst_pos, length, thread);
}
/*
java.lang.System中的arraycopy方法具体实现
*/
void ObjArrayKlass::copy_array(arrayOop s, int src_pos, arrayOop d,
int dst_pos, int length, TRAPS) {
//检测s是数组
assert(s->is_objArray(), "must be obj array");
//目的数组不是数组对象的话,则抛出ArrayStoreException异常
if (!d->is_objArray()) {
THROW(vmSymbols::java_lang_ArrayStoreException());
}
// Check is all offsets and lengths are non negative
//检测下标参数非负
if (src_pos < 0 || dst_pos < 0 || length < 0) {
THROW(vmSymbols::java_lang_ArrayIndexOutOfBoundsException());
}
// Check if the ranges are valid
//检测下标参数是否越界
if ( (((unsigned int) length + (unsigned int) src_pos) > (unsigned int) s->length())
|| (((unsigned int) length + (unsigned int) dst_pos) > (unsigned int) d->length()) ) {
THROW(vmSymbols::java_lang_ArrayIndexOutOfBoundsException());
}
// Special case. Boundary cases must be checked first
// This allows the following call: copy_array(s, s.length(), d.length(), 0).
// This is correct, since the position is supposed to be an 'in between point', i.e., s.length(),
// points to the right of the last element.
//length==0则不需要复制
if (length==0) {
return;
}
//UseCompressedOops只是用来区分narrowOop和oop,具体2者有啥区别需要再研究
//调用do_copy函数来复制
if (UseCompressedOops) {
narrowOop* const src = objArrayOop(s)->obj_at_addr<narrowOop>(src_pos);
narrowOop* const dst = objArrayOop(d)->obj_at_addr<narrowOop>(dst_pos);
do_copy<narrowOop>(s, src, d, dst, length, CHECK);
} else {
oop* const src = objArrayOop(s)->obj_at_addr<oop>(src_pos);
oop* const dst = objArrayOop(d)->obj_at_addr<oop>(dst_pos);
do_copy<oop> (s, src, d, dst, length, CHECK);
}
}
// Either oop or narrowOop depending on UseCompressedOops.
template <class T> void ObjArrayKlass::do_copy(arrayOop s, T* src,
arrayOop d, T* dst, int length, TRAPS) {
BarrierSet* bs = Universe::heap()->barrier_set();
// For performance reasons, we assume we are that the write barrier we
// are using has optimized modes for arrays of references. At least one
// of the asserts below will fail if this is not the case.
assert(bs->has_write_ref_array_opt(), "Barrier set must have ref array opt");
assert(bs->has_write_ref_array_pre_opt(), "For pre-barrier as well.");
if (s == d) {
// since source and destination are equal we do not need conversion checks.
assert(length > 0, "sanity check");
bs->write_ref_array_pre(dst, length);
//复制的函数
Copy::conjoint_oops_atomic(src, dst, length);
} else {
// We have to make sure all elements conform to the destination array
Klass* bound = ObjArrayKlass::cast(d->klass())->element_klass();
Klass* stype = ObjArrayKlass::cast(s->klass())->element_klass();
if (stype == bound || stype->is_subtype_of(bound)) {
// elements are guaranteed to be subtypes, so no check necessary
//stype对象是bound,或者stype是bound的子类抑或stype实现bound接口
bs->write_ref_array_pre(dst, length);
Copy::conjoint_oops_atomic(src, dst, length);
} else {
// slow case: need individual subtype checks
// note: don't use obj_at_put below because it includes a redundant store check
T* from = src;
T* end = from + length;
for (T* p = dst; from < end; from++, p++) {
// XXX this is going to be slow.
T element = *from;
// even slower now
bool element_is_null = oopDesc::is_null(element);
oop new_val = element_is_null ? oop(NULL)
: oopDesc::decode_heap_oop_not_null(element);
if (element_is_null ||
(new_val->klass())->is_subtype_of(bound)) {
bs->write_ref_field_pre(p, new_val);
*p = element;
} else {
// We must do a barrier to cover the partial copy.
const size_t pd = pointer_delta(p, dst, (size_t)heapOopSize);
// pointer delta is scaled to number of elements (length field in
// objArrayOop) which we assume is 32 bit.
assert(pd == (size_t)(int)pd, "length field overflow");
bs->write_ref_array((HeapWord*)dst, pd);
THROW(vmSymbols::java_lang_ArrayStoreException());
return;
}
}
}
}
bs->write_ref_array((HeapWord*)dst, length);
}
// oops, conjoint, atomic on each oop
static void conjoint_oops_atomic(oop* from, oop* to, size_t count) {
assert_params_ok(from, to, LogBytesPerHeapOop);
pd_conjoint_oops_atomic(from, to, count);
}
//检测是否是k的子类,或者是实现k接口
bool is_subtype_of(Klass* k) const {
juint off = k->super_check_offset();
Klass* sup = *(Klass**)( (address)this + off );
const juint secondary_offset = in_bytes(secondary_super_cache_offset());
if (sup == k) {
return true;
} else if (off != secondary_offset) {
return false;
} else {
return search_secondary_supers(k);
}
}
static void pd_conjoint_oops_atomic(oop* from, oop* to, size_t count) {
// Do better than this: inline memmove body NEEDS CLEANUP
if (from > to) {
while (count-- > 0) {
// Copy forwards
*to++ = *from++;
}
} else {
from += count - 1;
to += count - 1;
while (count-- > 0) {
// Copy backwards
*to-- = *from--;
}
}
}
视频地址:https://pan.baidu.com/s/1A-1pG6IwrtR8WrxpZ75gyw
视频密码:acw5