参数详情
rgw gc max objs 垃圾收集器数据的分片数,也可以理解为gc work数 32
rgw gc obj min wait 对象可被删除并由垃圾回收器处理前最少等待多长时间 7200s
rgw gc processor max time 每次GCwork 执行的最长时间 3600
rgw gc processor period 垃圾回收进程的运行周期 3600
有一个问题需注意:当你gc队列中有任务,这时你强制减少rgw gc max objs数量,这时出现的结果:pool里面有数据残留.
查看系统中的待gc的任务:
[root@rgw-test ~]# radosgw-admin gc list --include-all
[
{
"tag": "54b4a9a0-ee4b-4cb7-8115-ace2272cb565.175310.41\u0000",//有待研究
"time": "2018-11-08 17:41:48.0.010174s",//object可以被删除的时间 ,由rgw gc obj min wait 和对象被rm操作的时间
"objs": [
{
"pool": "default.rgw.buckets.data",
"oid": "54b4a9a0-ee4b-4cb7-8115-ace2272cb565.175310.3__shadow_.xWHsehe8d0SY27B0wWK7Ueea0yW6xsQ_1",//object id 有待研究
"key": "",
"instance": ""
}
]
}
]
查看用于gc的pool和对象:
rados listomapkeys gc.$i --namespace gc -p default.rgw.log
rados listomapvals gc.$i --namespace gc -p default.rgw.log
就能看到对应的分片以及对应分片里面的内容
使用场景:
1.用于删除 abortmultipart遗留下的文件。
核心代码:
Int abort_multipart_upload(RGWRados *store, CephContext *cct, RGWObjectCtx *obj_ctx, RGWBucketInfo& bucket_info, RGWMPObj& mp_obj)
{
rgw_obj meta_obj;
meta_obj.init_ns(bucket_info.bucket, mp_obj.get_meta(), RGW_OBJ_NS_MULTIPART);
meta_obj.set_in_extra_data(true);
meta_obj.index_hash_source = mp_obj.get_key();
cls_rgw_obj_chain chain;
list<rgw_obj_index_key> remove_objs;
map<uint32_t, RGWUploadPartInfo> obj_parts;
bool truncated;
int marker = 0;
int ret;
do {
ret = list_multipart_parts(store, bucket_info, cct, mp_obj.get_upload_id(), mp_obj.get_meta(), 1000,
marker, obj_parts, &marker, &truncated);
if (ret < 0)
return ret;
for (auto obj_iter = obj_parts.begin(); obj_iter != obj_parts.end(); ++obj_iter) {
RGWUploadPartInfo& obj_part = obj_iter->second;
rgw_obj obj;
if (obj_part.manifest.empty()) {
string oid = mp_obj.get_part(obj_iter->second.num);
obj.init_ns(bucket_info.bucket, oid, RGW_OBJ_NS_MULTIPART);
obj.index_hash_source = mp_obj.get_key();
ret = store->delete_obj(*obj_ctx, bucket_info, obj, 0);
if (ret < 0 && ret != -ENOENT)
return ret;
} else {
store->update_gc_chain(meta_obj, obj_part.manifest, &chain);//更新gc队列
RGWObjManifest::obj_iterator oiter = obj_part.manifest.obj_begin();
if (oiter != obj_part.manifest.obj_end()) {
rgw_obj head;
rgw_raw_obj raw_head = oiter.get_location().get_raw_obj(store);
rgw_raw_obj_to_obj(bucket_info.bucket, raw_head, &head);
rgw_obj_index_key key;
head.key.get_index_key(&key);
remove_objs.push_back(key);
}
}
}
} while (truncated);
/* use upload id as tag */
ret = store->send_chain_to_gc(chain, mp_obj.get_upload_id() , false); // do it async//把mp_obj发送给gc队列
if (ret < 0) {
ldout(cct, 5) << "gc->send_chain() returned " << ret << dendl;
return ret;
}
RGWRados::Object del_target(store, bucket_info, *obj_ctx, meta_obj);
RGWRados::Object::Delete del_op(&del_target);
del_op.params.bucket_owner = bucket_info.owner;
del_op.params.versioning_status = 0;
if (!remove_objs.empty()) {
del_op.params.remove_objs = &remove_objs;
}
// and also remove the metadata obj
ret = del_op.delete_obj();
return ret == -ENOENT?-ERR_NO_SUCH_UPLOAD:ret;
}
2.用于http请求的abortmultipart
根据uploadid去删除未上传完分片
void RGWAbortMultipart::execute()
{
op_ret = -EINVAL;
string upload_id;
string meta_oid;
upload_id = s->info.args.get("uploadId");
map<string, bufferlist> attrs;
rgw_obj meta_obj;
RGWMPObj mp;
if (upload_id.empty() || s->object.empty())
return;
mp.init(s->object.name, upload_id);
meta_oid = mp.get_meta();
op_ret = get_multipart_info(store, s, meta_oid, NULL, attrs);
if (op_ret < 0)
return;
RGWObjectCtx *obj_ctx = static_cast<RGWObjectCtx *>(s->obj_ctx);
op_ret = abort_multipart_upload(store, s->cct, obj_ctx, s->bucket_info, mp);//最终调用的是这个函数
}
3.使用客户端删除
int RGWRados::Object::Delete::delete_obj()
{
RGWRados *store = target->get_store();
rgw_obj& src_obj = target->get_obj();
const string& instance = src_obj.key.instance;
rgw_obj obj = src_obj;
if (instance == "null") {
obj.key.instance.clear();
}
bool explicit_marker_version = (!params.marker_version_id.empty());
if (params.versioning_status & BUCKET_VERSIONED || explicit_marker_version) {
if (instance.empty() || explicit_marker_version) {
rgw_obj marker = obj;
if (!params.marker_version_id.empty()) {
if (params.marker_version_id != "null") {
marker.key.set_instance(params.marker_version_id);
}
} else if ((params.versioning_status & BUCKET_VERSIONS_SUSPENDED) == 0) {
store->gen_rand_obj_instance_name(&marker);
}
result.version_id = marker.key.instance;
result.delete_marker = true;
struct rgw_bucket_dir_entry_meta meta;
meta.owner = params.obj_owner.get_id().to_str();
meta.owner_display_name = params.obj_owner.get_display_name();
if (real_clock::is_zero(params.mtime)) {
meta.mtime = real_clock::now();
} else {
meta.mtime = params.mtime;
}
int r = store->set_olh(target->get_ctx(), target->get_bucket_info(), marker, true, &meta, params.olh_epoch, params.unmod_since, params.high_precision_time, params.zones_trace);
} else {
rgw_bucket_dir_entry dirent;
int r = store->bi_get_instance(target->get_bucket_info(), obj, &dirent);
if (r < 0) {
return r;
}
result.delete_marker = dirent.is_delete_marker();
r = store->unlink_obj_instance(target->get_ctx(), target->get_bucket_info(), obj, params.olh_epoch, params.zones_trace);
if (r < 0) {
return r;
}
result.version_id = instance;
}
BucketShard *bs;
int r = target->get_bucket_shard(&bs);
if (r < 0) {
ldout(store->ctx(), 5) << "failed to get BucketShard object: r=" << r << dendl;
return r;
}
if (target->bucket_info.datasync_flag_enabled()) {
r = store->data_log->add_entry(bs->bucket, bs->shard_id);
if (r < 0) {
lderr(store->ctx()) << "ERROR: failed writing data log" << dendl;
return r;
}
}
return 0;
}
rgw_rados_ref ref;
int r = store->get_obj_head_ref(target->get_bucket_info(), obj, &ref);
if (r < 0) {
return r;
}
RGWObjState *state;
r = target->get_state(&state, false);
if (r < 0)
return r;
ObjectWriteOperation op;
.....................
int ret = target->complete_atomic_modification();//抛向gc进程
if (ret < 0) {
ldout(store->ctx(), 0) << "ERROR: complete_atomic_modification returned ret=" << ret << dendl;
}
.......
return 0;
}
int RGWRados::Object::complete_atomic_modification()
{
if (!state->has_manifest || state->keep_tail)
return 0;
cls_rgw_obj_chain chain;
store->update_gc_chain(obj, state->manifest, &chain);
if (chain.empty()) {
return 0;
}
string tag = (state->tail_tag.length() > 0 ? state->tail_tag.to_str() : state->obj_tag.to_str());
return store->gc->send_chain(chain, tag, false); // do it async
}
4.覆盖写(put一个已经存在的对象)时,旧的对象片段需要gc掉。
int RGWRados::Object::Write::_do_write_meta(uint64_t size, uint64_t accounted_size,
map<string, bufferlist>& attrs,
bool assume_noent, bool modify_tail,
void *_index_op)
{
RGWRados::Bucket::UpdateIndex *index_op = static_cast<RGWRados::Bucket::UpdateIndex *>(_index_op);
RGWRados *store = target->get_store();
ObjectWriteOperation op;
RGWObjState *state;
int r = target->get_state(&state, false, assume_noent);
if (r < 0)
return r;
rgw_obj& obj = target->get_obj();
rgw_rados_ref ref;
r = store->get_obj_head_ref(target->get_bucket_info(), obj, &ref);
r = target->prepare_atomic_modification(op, reset_obj, ptag, meta.if_match, meta.if_nomatch, false, modify_tail);
if (r < 0)
return r;
if (real_clock::is_zero(meta.set_mtime)) {
meta.set_mtime = real_clock::now();
}
if (state->is_olh) {
op.setxattr(RGW_ATTR_OLH_ID_TAG, state->olh_tag);
}
struct timespec mtime_ts = real_clock::to_timespec(meta.set_mtime);
op.mtime2(&mtime_ts);
if (meta.data) {
/* if we want to overwrite the data, we also want to overwrite the
xattrs, so just remove the object */
op.write_full(*meta.data);
}
string etag;
string content_type;
bufferlist acl_bl;
map<string, bufferlist>::iterator iter;
if (meta.rmattrs) {
for (iter = meta.rmattrs->begin(); iter != meta.rmattrs->end(); ++iter) {
const string& name = iter->first;
op.rmxattr(name.c_str());
}
}
if (meta.manifest) {
/* remove existing manifest attr */
iter = attrs.find(RGW_ATTR_MANIFEST);
if (iter != attrs.end())
attrs.erase(iter);
bufferlist bl;
::encode(*meta.manifest, bl);
op.setxattr(RGW_ATTR_MANIFEST, bl);
}
if (!op.size())
return 0;
uint64_t epoch;
int64_t poolid;
bool orig_exists;
uint64_t orig_size;
if (!reset_obj) { //Multipart upload, it has immutable head.
orig_exists = false;
orig_size = 0;
} else {
orig_exists = state->exists;
orig_size = state->accounted_size;
}
bool versioned_target = (meta.olh_epoch > 0 || !obj.key.instance.empty());
bool versioned_op = (target->versioning_enabled() || is_olh || versioned_target);
if (versioned_op) {
index_op->set_bilog_flags(RGW_BILOG_FLAG_VERSIONED_OP);
}
if (!index_op->is_prepared()) {
r = index_op->prepare(CLS_RGW_OP_ADD, &state->write_tag);
if (r < 0)
return r;
}
r = ref.ioctx.operate(ref.oid, &op);
if (r < 0) { /* we can expect to get -ECANCELED if object was replaced under,
or -ENOENT if was removed, or -EEXIST if it did not exist
before and now it does */
if (r == -EEXIST && assume_noent) {
target->invalidate_state();
return r;
}
goto done_cancel;
}
epoch = ref.ioctx.get_last_version();
poolid = ref.ioctx.get_id();
...........
r = target->complete_atomic_modification();//抛给gc队列
if (r < 0) {
ldout(store->ctx(), 0) << "ERROR: complete_atomic_modification returned r=" << r << dendl;
}
r = index_op->complete(poolid, epoch, size, accounted_size,
meta.set_mtime, etag, content_type, &acl_bl,
meta.category, meta.remove_objs, meta.user_data);
............
return r;
上述代码是在L版本代码,gc在M版本进行了修改。use aio 的做法,性能能提神多少,有待研究。https://github.com/ceph/ceph/pull/20546
相关文档连接
http://www.voidcn.com/article/p-phgisild-kd.html