从已知,创建目录操作使用CEPH_MDS_OP_MKDIR
消息,分发至void Server::handle_client_mkdir(MDRequestRef& mdr)
函数处理
源码文件src/mds/Server.cc
// MKDIR
/* This function takes responsibility for the passed mdr*/
void Server::handle_client_mkdir(MDRequestRef& mdr)
{
MClientRequest *req = mdr->client_request;
// 对于结尾是.或者..的创建目录请求,直接返回已存在
if (req->get_filepath().is_last_dot_or_dotdot()) {
respond_to_request(mdr, -EEXIST);
return;
}
// 遍历获得锁,rdlock_path_xlock_dentry理解为获取对父目录开始的路径dn的读锁,以及待创建的目录dn的互斥锁
set<SimpleLock*> rdlocks, wrlocks, xlocks;
// 注意此处最后三个参数全为false,意味着不要求目录已存在,不存在的目录会创建一个null dentry
CDentry *dn = rdlock_path_xlock_dentry(mdr, 0, rdlocks, wrlocks, xlocks, false, false, false);
if (!dn) return;
// 不允许在快照目录下操作
if (mdr->snapid != CEPH_NOSNAP) {
respond_to_request(mdr, -EROFS);
return;
}
// 增加父目录inode的authlock到读锁
CDir *dir = dn->get_dir();
CInode *diri = dir->get_inode();
rdlocks.insert(&diri->authlock);
// 加锁操作,前文已详细分析
if (!mds->locker->acquire_locks(mdr, rdlocks, wrlocks, xlocks))
return;
// mkdir check access
if (!check_access(mdr, diri, MAY_WRITE))
return;
// 检查是否达到分片的子项上限
if (!check_fragment_space(mdr, dir))
return;
// 前面创建的dentry为null dentry,还未与inode关联,所以需要为之创建新的inode
// 创建前需要确保得到最新的snaprealm
SnapRealm *realm = dn->get_dir()->inode->find_snaprealm();
snapid_t follows = realm->get_newest_seq();
unsigned mode = req->head.args.mkdir.mode;
mode &= ~S_IFMT;
mode |= S_IFDIR;
CInode *newi = prepare_new_inode(mdr, dn->get_dir(), inodeno_t(req->head.ino), mode);
assert(newi);
// 将新创建的inode与dentry关联,临时放到projected_linkage
// 在mdlog分析一文中已知,当mdlog flush完成以后,此inode正式更新到dentry的linkage
dn->push_projected_linkage(newi);
// 更新新创建inode的信息
newi->inode.version = dn->pre_dirty();
newi->inode.rstat.rsubdirs = 1;
newi->inode.update_backtrace();
assert(dn->first == follows + 1);
newi->first = dn->first;
// 创建新的Dir分片
CDir *newdir = newi->get_or_open_dirfrag(mdcache, frag_t());
newdir->state_set(CDir::STATE_CREATING);
newdir->mark_complete();
newdir->fnode.version = newdir->pre_dirty();
// 准备mdlog
mdr->ls = mdlog->get_current_segment();
EUpdate *le = new EUpdate(mdlog, "mkdir");
mdlog->start_entry(le);
le->metablob.add_client_req(req->get_reqid(), req->get_oldest_client_tid());
journal_allocated_inos(mdr, &le->metablob);
mdcache->predirty_journal_parents(mdr, &le->metablob, newi, dn->get_dir(), PREDIRTY_PRIMARY|PREDIRTY_DIR, 1);
le->metablob.add_primary_dentry(dn, newi, true, true);
le->metablob.add_new_dir(newdir); // dirty AND complete AND new
// 分配读写caps给新创建的目录
int cmode = CEPH_FILE_MODE_RDWR;
Capability *cap = mds->locker->issue_new_caps(newi, cmode, mdr->session, realm, req->is_replay());
if (cap) {
cap->set_wanted(0);
// put locks in excl mode
newi->filelock.set_state(LOCK_EXCL);
newi->authlock.set_state(LOCK_EXCL);
newi->xattrlock.set_state(LOCK_EXCL);
}
// make sure this inode gets into the journal
le->metablob.add_opened_ino(newi->ino());
LogSegment *ls = mds->mdlog->get_current_segment();
ls->open_files.push_back(&newi->item_open_file);
// 提交mdlog并设置回调,在回调中将新创建的dentry等加入mdcache并设置dirty
journal_and_reply(mdr, newi, dn, le, new C_MDS_mknod_finish(this, mdr, dn, newi));
// 检查是否需要做dir的分裂.
mds->balancer->maybe_fragment(dir, false);
mdcache中,dir对象会在几种场景下被回刷保存到rados集群
-
CInode::flush
,会在admin socket的flush_path命令中被调用,显示回刷某个path,会导致其父目录commit操作 -
MDCache::create_mydir_hierarchy
,创建root目录时 -
MDCache::create_empty_hierarchy
,创建mds的MDS DIR时 -
LogSegment::try_to_expire
,mdlog的LogSegment过期清理时,会将标记为dirty的dir做commit操作