link_path_walk()
do_filp_open()里将pathname保存到nameidata里,pathname是open file的完整路径名,调用path_openat,此时是flags是带了LOOKUP_RCU flag的
struct file *do_filp_open(int dfd, struct filename *pathname, const struct open_flags *op) { struct nameidata nd; int flags = op->lookup_flags; struct file *filp; set_nameidata(&nd, dfd, pathname); filp = path_openat(&nd, op, flags | LOOKUP_RCU); if (unlikely(filp == ERR_PTR(-ECHILD))) filp = path_openat(&nd, op, flags); if (unlikely(filp == ERR_PTR(-ESTALE))) filp = path_openat(&nd, op, flags | LOOKUP_REVAL); restore_nameidata(); return filp; }
path_openat()里主要call了3个函数path_init()、link_path_walk()、do_last()
调用path_init()设置nameidata的path结构体,对于常规文件来说,比如/data/test/test.txt,设置path结构体‘指向’根目录/,即设置path.mnt/path.dentry‘指向’根目录/,为后续的目录解析做准备。path_init()的返回值s即指向open file的完整路径名字串开头。
static struct file *path_openat(struct nameidata *nd, const struct open_flags *op, unsigned flags) { struct file *file; int error; file = alloc_empty_file(op->open_flag, current_cred()); if (IS_ERR(file)) return file; if (unlikely(file->f_flags & __O_TMPFILE)) { error = do_tmpfile(nd, flags, op, file); } else if (unlikely(file->f_flags & O_PATH)) { error = do_o_path(nd, flags, file); } else { const char *s = path_init(nd, flags); while (!(error = link_path_walk(s, nd)) && (error = do_last(nd, file, op)) > 0) { nd->flags &= ~(LOOKUP_OPEN|LOOKUP_CREATE|LOOKUP_EXCL); s = trailing_symlink(nd); } terminate_walk(nd); } if (likely(!error)) { if (likely(file->f_mode & FMODE_OPENED)) return file; WARN_ON(1); error = -EINVAL; } fput(file); if (error == -EOPENSTALE) { if (flags & LOOKUP_RCU) error = -ECHILD; else error = -ESTALE; } return ERR_PTR(error); }
static int link_path_walk(const char *name, struct nameidata *nd) { int err; if (IS_ERR(name)) return PTR_ERR(name); while (*name=='/') name++; if (!*name) return 0; /* At this point we know we have a real path component. */ for(;;) { u64 hash_len; int type; err = may_lookup(nd); if (err) return err; hash_len = hash_name(nd->path.dentry, name); //计算当前目录的hash_len,这个变量高4 byte是当前目录name字串长度,低4byte是当前目录(路径)的hash值,hash值的计算是基于当前目录的父目录dentry(nd->path.dentry)来计算的,所以它跟其目录(路径)dentry是关联的 type = LAST_NORM; if (name[0] == '.') switch (hashlen_len(hash_len)) { case 2: if (name[1] == '.') { type = LAST_DOTDOT; nd->flags |= LOOKUP_JUMPED; } break; case 1: type = LAST_DOT; } if (likely(type == LAST_NORM)) { struct dentry *parent = nd->path.dentry; nd->flags &= ~LOOKUP_JUMPED; if (unlikely(parent->d_flags & DCACHE_OP_HASH)) { //to do struct qstr this = { { .hash_len = hash_len }, .name = name }; err = parent->d_op->d_hash(parent, &this); if (err < 0) return err; hash_len = this.hash_len; name = this.name; } } nd->last.hash_len = hash_len; //更新nameidata last结构体,last.hash_len,name nd->last.name = name; nd->last_type = type; name += hashlen_len(hash_len); //name指向下一级目录 if (!*name) goto OK; /* * If it wasn't NUL, we know it was '/'. Skip that * slash, and continue until no more slashes. */ do { name++; } while (unlikely(*name == '/')); if (unlikely(!*name)) { //假设open file文件名路径上没有任何symlink,则如果这个条件满足,整个路径都解析完了,还剩最后的filename留给do_last()解析,此函数将从下面的!nd->depth条件处返回。 OK: /* pathname body, done */ if (!nd->depth) //如果open file完整路径上没有任何symlink,nd->depth等于0, return 0; name = nd->stack[nd->depth - 1].name; /* trailing symlink, done */ if (!name) return 0; /* last component of nested symlink */ err = walk_component(nd, WALK_FOLLOW); //symlink case } else { /* not the last component */ err = walk_component(nd, WALK_FOLLOW | WALK_MORE); //常规目录case,即非symlink case } if (err < 0) return err; if (err) { const char *s = get_link(nd); if (IS_ERR(s)) return PTR_ERR(s); err = 0; if (unlikely(!s)) { /* jumped */ put_link(nd); } else { nd->stack[nd->depth - 1].name = name; name = s; continue; } } if (unlikely(!d_can_lookup(nd->path.dentry))) { if (nd->flags & LOOKUP_RCU) { if (unlazy_walk(nd)) return -ECHILD; } return -ENOTDIR; } } }
walk_component()首先会调用lookup_fast()在dcache里查找,如果没有找到,则直接返回0,然后进入lookup_slow()的slow path。另外lookup_fast()如果在dcache里没有找到当前目录对应的dentry,然后调用unlazy_walk()后返回了-ECHILD后会终结当前的path_openat(),重新调用path_openat(),此时调用path_openat()将不会带有LOOKUP_RCU,这样后续的flow在dcache里查找时将会使用到rcu锁和dentry的spinlock,这个会降低在dcache里查找的效率;而之前的path_openat带有LOOKUP_RCU,所以在dcache里lookup时将不需要去操作这些锁,效率会提高。
无论是在dcache里找到了当前目录对应的dentry(fast path)或者没有找到(slow path),这两个path都会去设置path结构体里的dentry、mnt成员,将当前路径更新到path结构体。
path结构体更新后,最后调用step_info/path_to_nameidata将path结构体更新到nd.path,这样nd里的path就指向了当前目录了,至此完成一级目录的解析查找,返回link_path_walk()解析下一级目录
static int walk_component(struct nameidata *nd, int flags) { struct path path; struct inode *inode; unsigned seq; int err; /* * "." and ".." are special - ".." especially so because it has * to be able to know about the current root directory and * parent relationships. */ if (unlikely(nd->last_type != LAST_NORM)) { err = handle_dots(nd, nd->last_type); if (!(flags & WALK_MORE) && nd->depth) put_link(nd); return err; } err = lookup_fast(nd, &path, &inode, &seq); if (unlikely(err <= 0)) { //上述look_fast()如果在dcache里没有找到当前目录(路径)的dentry,则会执行如下的lookup_slow() if (err < 0) return err; path.dentry = lookup_slow(&nd->last, nd->path.dentry, nd->flags); if (IS_ERR(path.dentry)) return PTR_ERR(path.dentry); path.mnt = nd->path.mnt; err = follow_managed(&path, nd); if (unlikely(err < 0)) return err; if (unlikely(d_is_negative(path.dentry))) { path_to_nameidata(&path, nd); return -ENOENT; } seq = 0; /* we are already out of RCU mode */ inode = d_backing_inode(path.dentry); } return step_into(nd, &path, flags, inode, seq); }
__lookup_slow()首先调用d_alloc_parallel给当前路径分配一个新的dentry,然后调用inode->i_op->lookup(),注意这里的inode是当前路径的父路径dentry的d_inode成员。inode->i_op是具体的文件系统inode operations函数集了,当前以ext4 fs来举例的话,就是ext4 fs的inode operations函数集,即ext4_dir_inode_operations,其lookup函数是ext4_lookup(),这个具体文件系统lookup的过程将单独在一篇文件里描述。
具体文件系统的lookup完成后,此时的dentry已经有绑定的inode了,即已经设置了其d_inode成员了,然后调用d_lookup_done()将此dentry从lookup hash链表上移除:
static struct dentry *__lookup_slow(const struct qstr *name, struct dentry *dir, unsigned int flags) { struct dentry *dentry, *old; struct inode *inode = dir->d_inode; DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); /* Don't go there if it's already dead */ if (unlikely(IS_DEADDIR(inode))) return ERR_PTR(-ENOENT); again: dentry = d_alloc_parallel(dir, name, &wq); if (IS_ERR(dentry)) return dentry; if (unlikely(!d_in_lookup(dentry))) { //这个不考虑并发的情况下这个条件不成立 if (!(flags & LOOKUP_NO_REVAL)) { int error = d_revalidate(dentry, flags); if (unlikely(error <= 0)) { if (!error) { d_invalidate(dentry); dput(dentry); goto again; } dput(dentry); dentry = ERR_PTR(error); } } } else { old = inode->i_op->lookup(inode, dentry, flags); d_lookup_done(dentry); if (unlikely(old)) { //如果dentry是一个目录的dentry,则有可能old是有效的;否则如果dentry是文件的dentry则old是null dput(dentry); dentry = old; } } return dentry; }
struct dentry *d_alloc_parallel(struct dentry *parent, const struct qstr *name, wait_queue_head_t *wq) { unsigned int hash = name->hash; struct hlist_bl_head *b = in_lookup_hash(parent, hash); //lookup hash链表 struct hlist_bl_node *node; struct dentry *new = d_alloc(parent, name); //分配一个dentry struct dentry *dentry; unsigned seq, r_seq, d_seq; if (unlikely(!new)) return ERR_PTR(-ENOMEM); retry: rcu_read_lock(); seq = smp_load_acquire(&parent->d_inode->i_dir_seq); r_seq = read_seqbegin(&rename_lock); dentry = __d_lookup_rcu(parent, name, &d_seq); //再次到dcache里查找当前目录是否有对应的dentry,在不考虑并发的条件下,将查找不到 if (unlikely(dentry)) { if (!lockref_get_not_dead(&dentry->d_lockref)) { rcu_read_unlock(); goto retry; } if (read_seqcount_retry(&dentry->d_seq, d_seq)) { rcu_read_unlock(); dput(dentry); goto retry; } rcu_read_unlock(); dput(new); return dentry; } if (unlikely(read_seqretry(&rename_lock, r_seq))) { rcu_read_unlock(); goto retry; } if (unlikely(seq & 1)) { rcu_read_unlock(); goto retry; } hlist_bl_lock(b); if (unlikely(READ_ONCE(parent->d_inode->i_dir_seq) != seq)) { hlist_bl_unlock(b); rcu_read_unlock(); goto retry; } /* * No changes for the parent since the beginning of d_lookup(). * Since all removals from the chain happen with hlist_bl_lock(), * any potential in-lookup matches are going to stay here until * we unlock the chain. All fields are stable in everything * we encounter. */ hlist_bl_for_each_entry(dentry, node, b, d_u.d_in_lookup_hash) { //从lookup hash链表上查找是否有当前路径对应的dentry,如果有找到,put上面alloc的dentry然后就直接返回找到的这个dentry了,这里有点奇怪,为什么不先查找,如果没有查找到再alloc dentry.. if (dentry->d_name.hash != hash) continue; if (dentry->d_parent != parent) continue; if (!d_same_name(dentry, parent, name)) continue; hlist_bl_unlock(b); /* now we can try to grab a reference */ if (!lockref_get_not_dead(&dentry->d_lockref)) { rcu_read_unlock(); goto retry; } rcu_read_unlock(); /* * somebody is likely to be still doing lookup for it; * wait for them to finish */ spin_lock(&dentry->d_lock); d_wait_lookup(dentry); /* * it's not in-lookup anymore; in principle we should repeat * everything from dcache lookup, but it's likely to be what * d_lookup() would've found anyway. If it is, just return it; * otherwise we really have to repeat the whole thing. */ if (unlikely(dentry->d_name.hash != hash)) goto mismatch; if (unlikely(dentry->d_parent != parent)) goto mismatch; if (unlikely(d_unhashed(dentry))) goto mismatch; if (unlikely(!d_same_name(dentry, parent, name))) goto mismatch; /* OK, it *is* a hashed match; return it */ spin_unlock(&dentry->d_lock); dput(new); return dentry; } rcu_read_unlock(); /* we can't take ->d_lock here; it's OK, though. */ new->d_flags |= DCACHE_PAR_LOOKUP; //置上这个flag,后续将很快会将这个新alloc的dentry从lookup hash链表上移除 new->d_wait = wq; hlist_bl_add_head_rcu(&new->d_u.d_in_lookup_hash, b); //将此新alloc的dentry插入lookup hash链表 hlist_bl_unlock(b); return new; mismatch: spin_unlock(&dentry->d_lock); dput(dentry); goto retry; } EXPORT_SYMBOL(d_alloc_parallel);