RSet和卡表
试想一下,当在ygc时,我们需要扫描整个新生代。当新生代的对象被老年代引用,则此对象就不能回收。那么怎么判断这点呢,总不能扫描老年代吧。所以这里需要使用空间换时间的数据结构:RSet和卡表。
卡表:一种points-out结构,每个card记载有没有引用别的heap分区,它是全局表,对于一些热点card,会放入Hot card cache,它也是全局表;
RSet:一种points-in结构,在卡表基础上实现,每个Heap一个,用来记录别的Heap指向自己的指针,并标记这些指针在卡表哪些范围内。
在程序运行中,如果对象的引用发生变化,就必须更新RSet。而point in结构有个缺点就是,对象被引用的次数不确定。为了提高效率,G1使用了动态化的存储方式,主要有以下三种粒度:
- 稀疏PRT(Per region Table):hash表方式存储
- 细粒度PRT:PRT指针数组
- 粗粒度:位图,记录引用者对象对应的分区
具体我们看下RSet如何管理引用的代码
// 添加引用者对象对应的卡表地址到RSet中
void OtherRegionsTable::add_reference(OopOrNarrowOopStar from, int tid) {
uint cur_hrm_ind = hr()->hrm_index();
if (G1TraceHeapRegionRememberedSet) {
gclog_or_tty->print_cr("ORT::add_reference_work(" PTR_FORMAT "->" PTR_FORMAT ").",
from,
UseCompressedOops
? (void *)oopDesc::load_decode_heap_oop((narrowOop*)from)
: (void *)oopDesc::load_decode_heap_oop((oop*)from));
}
int from_card = (int)(uintptr_t(from) >> CardTableModRefBS::card_shift);
if (G1TraceHeapRegionRememberedSet) {
gclog_or_tty->print_cr("Table for [" PTR_FORMAT "...): card %d (cache = "INT32_FORMAT")",
hr()->bottom(), from_card,
FromCardCache::at((uint)tid, cur_hrm_ind));
}
// 提高处理效率,使用卡表缓存,缓存中有表示已处理
if (FromCardCache::contains_or_replace((uint)tid, cur_hrm_ind, from_card)) {
if (G1TraceHeapRegionRememberedSet) {
gclog_or_tty->print_cr(" from-card cache hit.");
}
assert(contains_reference(from), "We just added it!");
return;
}
// Note that this may be a continued H region.
HeapRegion* from_hr = _g1h->heap_region_containing_raw(from);
RegionIdx_t from_hrm_ind = (RegionIdx_t) from_hr->hrm_index();
// If the region is already coarsened, return.
//如果Rset中已经是粗粒度关系,也就是说RSet中记录是对应的分区,直接返回
if (_coarse_map.at(from_hrm_ind)) {
if (G1TraceHeapRegionRememberedSet) {
gclog_or_tty->print_cr(" coarse map hit.");
}
assert(contains_reference(from), "We just added it!");
return;
}
// Otherwise find a per-region table to add it to.
// 添加RPT引用关系到RSet中
size_t ind = from_hrm_ind & _mod_max_fine_entries_mask;
PerRegionTable* prt = find_region_table(ind, from_hr);
if (prt == NULL) {
//加锁,多线程访问
MutexLockerEx x(_m, Mutex::_no_safepoint_check_flag);
// Confirm that it's really not there...
prt = find_region_table(ind, from_hr);
if (prt == NULL) {
//使用稀疏矩阵存储
uintptr_t from_hr_bot_card_index =
uintptr_t(from_hr->bottom())
>> CardTableModRefBS::card_shift;
CardIdx_t card_index = from_card - from_hr_bot_card_index;
assert(0 <= card_index && (size_t)card_index < HeapRegion::CardsPerRegion,
"Must be in range.");
if (G1HRRSUseSparseTable &&
_sparse_table.add_card(from_hrm_ind, card_index)) {
if (G1RecordHRRSOops) {
HeapRegionRemSet::record(hr(), from);
if (G1TraceHeapRegionRememberedSet) {
gclog_or_tty->print(" Added card " PTR_FORMAT " to region "
"[" PTR_FORMAT "...) for ref " PTR_FORMAT ".\n",
align_size_down(uintptr_t(from),
CardTableModRefBS::card_size),
hr()->bottom(), from);
}
}
if (G1TraceHeapRegionRememberedSet) {
gclog_or_tty->print_cr(" added card to sparse table.");
}
assert(contains_reference_locked(from), "We just added it!");
return;
} else {
if (G1TraceHeapRegionRememberedSet) {
gclog_or_tty->print_cr(" [tid %d] sparse table entry "
"overflow(f: %d, t: %u)",
tid, from_hrm_ind, cur_hrm_ind);
}
}
// 细粒度卡表满了,删除PRT并放入粗粒度卡表
if (_n_fine_entries == _max_fine_entries) {
prt = delete_region_table();
// There is no need to clear the links to the 'all' list here:
// prt will be reused immediately, i.e. remain in the 'all' list.
prt->init(from_hr, false /* clear_links_to_all_list */);
} else {
//稀疏矩阵满了,需要再分配一个细粒度
prt = PerRegionTable::alloc(from_hr);
link_to_all(prt);
}
//把稀疏矩阵的数据迁移到细粒度卡表中,成功后删除稀疏矩阵
PerRegionTable* first_prt = _fine_grain_regions[ind];
prt->set_collision_list_next(first_prt);
_fine_grain_regions[ind] = prt;
_n_fine_entries++;
if (G1HRRSUseSparseTable) {
// Transfer from sparse to fine-grain.
SparsePRTEntry *sprt_entry = _sparse_table.get_entry(from_hrm_ind);
assert(sprt_entry != NULL, "There should have been an entry");
for (int i = 0; i < SparsePRTEntry::cards_num(); i++) {
CardIdx_t c = sprt_entry->card(i);
if (c != SparsePRTEntry::NullEntry) {
prt->add_card(c);
}
}
// Now we can delete the sparse entry.
bool res = _sparse_table.delete_entry(from_hrm_ind);
assert(res, "It should have been there.");
}
}
assert(prt != NULL && prt->hr() == from_hr, "consequence");
}
//添加引用
prt->add_reference(from);
if (G1RecordHRRSOops) {
HeapRegionRemSet::record(hr(), from);
if (G1TraceHeapRegionRememberedSet) {
gclog_or_tty->print("Added card " PTR_FORMAT " to region "
"[" PTR_FORMAT "...) for ref " PTR_FORMAT ".\n",
align_size_down(uintptr_t(from),
CardTableModRefBS::card_size),
hr()->bottom(), from);
}
}
assert(contains_reference(from), "We just added it!");
}
Refine线程
Refine线程是G1新引入的并发线程池,主要有2个功能:
- 用于处理新生代分区的抽样:用来设置YGC的分区数目,以便满足垃圾回收的预测停顿时间,线程池中的最后一个线程就是抽样线程。
- 管理RSet:G1会将所有的引用关系放到队列DCQ中,然后使用线程来消费队列。除了Refine线程外,有些情况GC或Mutator线程也会更新RSet。DCQ通过DCQS(Dirty card queue set)来管理。
这里我们主要看如何管理RSet。DCQS是全局对象,用来存放DCQ。每个Mutator线程在初始化的时候都关联DCQS,每个Mutator有私有的队列,当私有队列满的时候,会将存放的引用关系放入DCQ中。
当DCQS满的时候,就不继续添加了,说明Refine线程处理不过来了。这个时候Mutator线程会暂停其他代码处理,转去更新RSet。
Refine线程工作原理
Refine线程处理RSet关系,但如果引用关系变更少,Refine线程就浪费了,所以需要动态激活和冻结线程。JVM通过wait和notify机制来实现。
设计思想是:当前一个Refine线程太忙,激活后一个。后一个线程发现自己太空闲,则冻结自己。那么第一个Refine线程怎么激活呢?答案是Java线程(Mutator),因为Java线程会将引用关系放入DCQ,当发现第一个Refine线程还没激活,则发送notify激活。
我们先看下ConcurrentG1RefineThread线程的run方法
void ConcurrentG1RefineThread::run() {
//初始化线程信息
initialize_in_thread();
wait_for_universe_init();
//最后一个线程用于处理YHR的数目
if (_worker_id >= cg1r()->worker_thread_num()) {
run_young_rs_sampling();
terminate();
return;
}
_vtime_start = os::elapsedVTime();
while (!_should_terminate) {
DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
// 前一线程通知后一个线程实现
wait_for_completed_buffers();
if (_should_terminate) {
break;
}
{
SuspendibleThreadSetJoiner sts;
do {
int curr_buffer_num = (int)dcqs.completed_buffers_num();
// If the number of the buffers falls down into the yellow zone,
// that means that the transition period after the evacuation pause has ended.
if (dcqs.completed_queue_padding() > 0 && curr_buffer_num <= cg1r()->yellow_zone()) {
dcqs.set_completed_queue_padding(0);
}
if (_worker_id > 0 && curr_buffer_num <= _deactivation_threshold) {
// 根据负载判断是否需要停止当前的Refine线程
deactivate();
break;
}
// 判断是否需要通知新的refine线程
if (_next != NULL && !_next->is_active() && curr_buffer_num > _next->_threshold) {
_next->activate();
}
} while (dcqs.apply_closure_to_completed_buffer(_refine_closure, _worker_id + _worker_id_offset, cg1r()->green_zone()));
// We can exit the loop above while being active if there was a yield request.
// 有yield请求时退出循环,目的是进入安全点
if (is_active()) {
deactivate();
}
}
if (os::supports_vtime()) {
_vtime_accum = (os::elapsedVTime() - _vtime_start);
} else {
_vtime_accum = 0.0;
}
}
terminate();
}
最终处理queue逻辑
bool G1RemSet::refine_card(jbyte* card_ptr, uint worker_i,
bool check_for_refs_into_cset) {
// 卡表指针对应的值不是dirty,说明已处理过.
if (*card_ptr != CardTableModRefBS::dirty_card_val()) {
return false;
}
// 找到卡表指针对应的分区.
HeapWord* start = _ct_bs->addr_for(card_ptr);
// And find the region containing it.
HeapRegion* r = _g1->heap_region_containing(start);
// 引用者是新生代或在CSet中则不需要更新,因为引用者也要被扫描
if (r->is_young()) {
return false;
}
if (r->in_collection_set()) {
return false;
}
// 如果在hot card中,则待后续批量处理
G1HotCardCache* hot_card_cache = _cg1r->hot_card_cache();
if (hot_card_cache->use_cache()) {
card_ptr = hot_card_cache->insert(card_ptr);
if (card_ptr == NULL) {
// There was no eviction. Nothing to do.
return false;
}
start = _ct_bs->addr_for(card_ptr);
r = _g1->heap_region_containing(start);
}
.
HeapWord* end = start + CardTableModRefBS::card_size_in_words;
MemRegion dirtyRegion(start, end);
//定义处理的对象
OopsInHeapRegionClosure* oops_in_heap_closure = NULL;
if (check_for_refs_into_cset) {
oops_in_heap_closure = _cset_rs_update_cl[worker_i];
}
G1UpdateRSOrPushRefOopClosure update_rs_oop_cl(_g1,
_g1->g1_rem_set(),
oops_in_heap_closure,
check_for_refs_into_cset,
worker_i);
update_rs_oop_cl.set_from(r);
G1TriggerClosure trigger_cl;
FilterIntoCSClosure into_cs_cl(NULL, _g1, &trigger_cl);
G1InvokeIfNotTriggeredClosure invoke_cl(&trigger_cl, &into_cs_cl);
G1Mux2Closure mux(&invoke_cl, &update_rs_oop_cl);
FilterOutOfRegionClosure filter_then_update_rs_oop_cl(r,
(check_for_refs_into_cset ?
(OopClosure*)&mux :
(OopClosure*)&update_rs_oop_cl));
// 具体的处理
HeapWord* stop_point =
r->oops_on_card_seq_iterate_careful(dirtyRegion,
&filter_then_update_rs_oop_cl,
filter_young,
card_ptr);
// 如果处理中出现问题,将该引用放入公共的DCQ,等待后续处理
if (*card_ptr != CardTableModRefBS::dirty_card_val()) {
*card_ptr = CardTableModRefBS::dirty_card_val();
MutexLockerEx x(Shared_DirtyCardQ_lock,
Mutex::_no_safepoint_check_flag);
DirtyCardQueue* sdcq =
JavaThread::dirty_card_queue_set().shared_dirty_card_queue();
sdcq->enqueue(card_ptr);
}
} else {
_conc_refine_cards++;
}
// This gets set to true if the card being refined has
// references that point into the collection set.
bool has_refs_into_cset = trigger_cl.triggered();
// We should only be detecting that the card contains references
// that point into the collection set if the current thread is
// a GC worker thread.
assert(!has_refs_into_cset || SafepointSynchronize::is_at_safepoint(),
"invalid result at non safepoint");
return has_refs_into_cset;
}
以上找到的是512B的内存区域,最终要找到此内存区域的第一个对象,作为引用者处理。
HeapWord*
HeapRegion::
oops_on_card_seq_iterate_careful(MemRegion mr,
FilterOutOfRegionClosure* cl,
bool filter_young,
jbyte* card_ptr) {
G1CollectedHeap* g1h = G1CollectedHeap::heap();
if (g1h->is_gc_active()) {
mr = mr.intersection(used_region_at_save_marks());
} else {
mr = mr.intersection(used_region());
}
if (mr.is_empty()) return NULL;
if (is_young() && filter_young) {
return NULL;
}
//把卡表改为clean状态,表示在处理
if (card_ptr != NULL) {
*card_ptr = CardTableModRefBS::clean_card_val();
OrderAccess::storeload();
}
// Cache the boundaries of the memory region in some const locals
HeapWord* const start = mr.start();
HeapWord* const end = mr.end();
HeapWord* cur = block_start(start);
assert(cur <= start, "Postcondition");
oop obj;
//跳过不在处理区域的对象
HeapWord* next = cur;
while (next <= start) {
cur = next;
obj = oop(cur);
if (obj->klass_or_null() == NULL) {
// Ran into an unparseable point.
return cur;
}
// Otherwise...
next = cur + block_size(cur);
}
if (!g1h->is_obj_dead(obj)) {
obj->oop_iterate(cl, mr);
}
while (cur < end) {
obj = oop(cur);
if (obj->klass_or_null() == NULL) {
// Ran into an unparseable point.
return cur;
};
// Otherwise:
next = cur + block_size(cur);
if (!g1h->is_obj_dead(obj)) {
//遍历对象
if (next < end || !obj->is_objArray()) {
obj->oop_iterate(cl);
} else {
obj->oop_iterate(cl, mr);
}
}
cur = next;
}
return NULL;
}
遍历到的对象会使用G1UpdateRSOrPushRefOopClosure来更新RSet
inline void G1UpdateRSOrPushRefOopClosure::do_oop_nv(T* p) {
oop obj = oopDesc::load_decode_heap_oop(p);
if (obj == NULL) {
return;
}
HeapRegion* to = _g1->heap_region_containing(obj);
//只处理不同分区的
if (_from == to) {
return;
}
if (_record_refs_into_cset && to->in_collection_set()) {
//Evac会进入这里,只需复制对象到栈中继续处理
if (!self_forwarded(obj)) {
//对于成功转移的对象放入G1ParScanThreadState队列处理
_push_ref_cl->do_oop(p);
}
} else {
// 更新RSet
to->rem_set()->add_reference(p, _worker_i);
}
}
Refinement Zone
据资料显示,RSet在很多情况要浪费1%-20%的内存空间,是G1调优的重要部分之一。当DCQS过慢时,Mutator线程也会帮忙处理,会影响应用处理的性能。通常我们可以在不同的负载下启用不同的线程进行DCQ处理。这个工作负载通过Refinement Zone控制,G1将Queue Set分为四个区:白、绿、黄、红。
白区[0,green):GC线程处理
绿[green,yellow): 根据queue set启动不同数量的refine
黄[yellow,red):refine线程全部启动
红[red,+oo): 除refine启动处理,mutator线程也处理
三个参数分别对应:G1ConcRefinementGreenZone、G1ConcRefinementYellowZone、G1ConcRefinementRedZone (默认为0,G1自动推导)
RSet写屏障
写屏障:在进行特定内存值改变时额外进行一些操作,如在G1中将老年代对象的引用改为新生代对象就会触发写屏障:插入额外的代码,将引用关系放到DCQ中,等待线程更新到RSet中。