内核中文件路径是如何解析的？

Last updated on 9 months ago

前言

在文件描述符文章中已经介绍 fd是如何生成的；原本想接下来说下 fd 和文件是怎么关联的，但是粗略的看了来整个过程，实在是太复杂，有很多前置的知识点，很难一下子展开讲，要讲明白的话不是一两篇文章能解决的；于是打算围绕这个问题，再细分下来，逐个击破。

本章的内容就先简单的介绍文件路径再linux 中怎么解析的

在文件描述符介绍的文章中也有讲过 open 函数，这里就不重复叙述怎么进入 do_sys_open 函数

先介绍几个关键的结构体

nameidata

nameidata结构体会一直伴随着路径解析的过程

struct nameidata {
    struct path path;              // 当前正在解析的路径
    struct qstr last;              // 路径中最后一个组件的名称
    struct path root;              // 路径解析的根路径
    struct inode *inode;           // 当前正在解析的路径对应的 inode 节点
    unsigned int flags;            // 路径解析的标志位
    unsigned seq, m_seq;           // 路径解析的序列号
    int last_type;                 // 路径中最后一个组件的类型
    unsigned depth;                // 路径解析的深度
    int total_link_count;          // 路径解析过程中遇到的符号链接的总数
    struct saved {                 // 路径解析过程中的状态栈
        struct path link;          // 符号链接的路径
        struct delayed_call done;  // 延迟调用
        const char *name;          // 符号链接的名称
        unsigned seq;              // 符号链接的序列号
    } *stack, internal[EMBEDDED_LEVELS];
    struct filename *name;         // 路径解析的路径字符串
    struct nameidata *saved;       // 保存的 nameidata 结构体指针
    struct inode *link_inode;      // 符号链接目标的 inode 节点
} __randomize_layout;

file

一个file代表一个打开的文件对象，通过 fd 可以找到这个 file ，

struct file {
    union {
        struct llist_node fu_llist;    // 文件链表节点，用于管理打开的文件列表
        struct rcu_head fu_rcuhead;    // RCU 头节点，用于进行内存回收
    } f_u;
    struct path f_path;                // 文件对应的路径信息
    struct inode *f_inode;             // 文件对应的 inode 节点，缓存值，提高访问效率
    const struct file_operations *f_op;    // 文件操作函数集指针
//.....
    fmode_t f_mode;                    // 文件打开模式，如读、写、追加等
    struct mutex f_pos_lock;           // 文件位置锁，用于保护文件位置信息
    loff_t f_pos;                      // 文件当前读写位置
    struct fown_struct f_owner;        // 文件拥有者信息
    const struct cred *f_cred;         // 文件所属用户的凭证信息
    struct file_ra_state f_ra;         // 文件读取预取状态

#ifdef CONFIG_EPOLL
    /* Used by fs/eventpoll.c to link all the hooks to this file */
    struct list_head f_ep_links;       // 事件链接列表头，用于事件驱动 I/O
    struct list_head f_tfile_llink;    // 与 epoll 相关的文件链接列表头
} __randomize_layout;

dentry

dentry （directory entry）记录了父目录以及子目录项

struct dentry {
	/* RCU 查找涉及的字段 */
	unsigned int d_flags;		/* 由 d_lock 保护 */
	seqcount_t d_seq;		/* 每个目录项的序列锁 */
	struct hlist_bl_node d_hash;	/* 查找哈希列表 */
	struct dentry *d_parent;	/* 父目录 */
	struct qstr d_name;        /* 目录项名称 */
	struct inode *d_inode;	    /* 名称所属的位置 - 为 NULL 表示负 */
	unsigned char d_iname[DNAME_INLINE_LEN];	/* 短名称 */
//..

	union {
		struct list_head d_lru;		/* LRU 列表 */
		wait_queue_head_t *d_wait;	/* 仅用于查找中的等待队列 */
	};
	struct list_head d_child;	/* 父列表中的子项 */
	struct list_head d_subdirs;	/* 我们的子目录 */
	/*
	 * d_alias 和 d_rcu 可以共享内存
	 */
	union {
		struct hlist_node d_alias;	/* inode 别名列表 */
		struct hlist_bl_node d_in_lookup_hash;	/* 仅用于查找中的列表 */
		struct rcu_head d_rcu;
	} d_u;
} __randomize_layout; // 目录项结构

主要关注do_sys_open 中的 do_filp_open 函数

long do_sys_open(int dfd, const char __user *filename, int flags, umode_t mode)
{
	struct open_flags op;
	int fd = build_open_flags(flags, mode, &op);
	struct filename *tmp;
	if (fd)
		return fd;
	tmp = getname(filename);
	if (IS_ERR(tmp))
		return PTR_ERR(tmp);
	fd = get_unused_fd_flags(flags);
	//fd 申请成功
    if (fd >= 0) {
    //获取 file    
		struct file *f = do_filp_open(dfd, tmp, &op);
		if (IS_ERR(f)) {
			put_unused_fd(fd);
			fd = PTR_ERR(f);
		} else {
    // 建立 file 和 fd 的联系        
			fsnotify_open(f);
			fd_install(fd, f);
		}
	}
	putname(tmp);
	return fd;
}

do_filp_open

fd申请成功的情况下，才会开始路径解析，该函数先 nameidata 初始化了路径名字（先假设文件路径绝对路径，为 /home/hrp/test），主要关注 path_openat 的实现

struct file *do_filp_open(int dfd, struct filename *pathname,
		const struct open_flags *op)
{
	struct nameidata nd;
	int flags = op->lookup_flags;
	struct file *filp;
	
	set_nameidata(&nd, dfd, pathname);
	filp = path_openat(&nd, op, flags | LOOKUP_RCU);
	if (unlikely(filp == ERR_PTR(-ECHILD)))
		filp = path_openat(&nd, op, flags);
	if (unlikely(filp == ERR_PTR(-ESTALE)))
		filp = path_openat(&nd, op, flags | LOOKUP_REVAL);
	restore_nameidata();
	return filp;
}

path_openat

主要工作是，根据open 带的flag来分配一个 file ，接下来根据这些flag 来判断进入不同流程
这里有O_PATH 的标志位，这个标志位只是申请了文件描述符，没有开打文件；
我们关注以下这三个函数

**path_init **
link_path_walk
do_last （放在下篇文章讲）

static struct file *path_openat(struct nameidata *nd,
			const struct open_flags *op, unsigned flags)
{
	struct file *file;
	int error;
//先分配  一个file 
	file = alloc_empty_file(op->open_flag, current_cred());
	if (IS_ERR(file))
		return file;

	if (unlikely(file->f_flags & __O_TMPFILE)) {
		error = do_tmpfile(nd, flags, op, file);
	} else if (unlikely(file->f_flags & O_PATH)) {
		error = do_o_path(nd, flags, file);
	} else {
     //重点，这几个函数下面会有介绍
		const char *s = path_init(nd, flags);
		while (!(error = link_path_walk(s, nd)) &&
			(error = do_last(nd, file, op)) > 0) {
			nd->flags &= ~(LOOKUP_OPEN|LOOKUP_CREATE|LOOKUP_EXCL);
			s = trailing_symlink(nd);
		}
		terminate_walk(nd);
	}
	if (likely(!error)) {
		if (likely(file->f_mode & FMODE_OPENED))
			return file;
		WARN_ON(1);
		error = -EINVAL;
	}
	fput(file);
//....
}

path_init

前面说到的 nameidata 也只是初始化路径名，而在path_init 中会进一步初始化 nameidata

static const char *path_init(struct nameidata *nd, unsigned flags)
{
    //获取 路径，s 指向路径名的第一个字符    
	const char *s = nd->name->name;
....

	nd->last_type = LAST_ROOT; /* if there are only slashes... */
	nd->flags = flags | LOOKUP_JUMPED | LOOKUP_PARENT;
	nd->depth = 0;
    //....
	nd->root.mnt = NULL;
	nd->path.mnt = NULL;
	nd->path.dentry = NULL;
    if (flags & LOOKUP_ROOT) {
        struct dentry *root = nd->root.dentry;
        struct inode *inode = root->d_inode;
        //最开始的时候 nd->path 就是为root ，root 在下面会初始化
        nd->path = nd->root;
        nd->inode = inode;
//....
        return s;
    }

	//....
    // paht 为 /home/hrp/abc  所以 *s 为 /
    if (*s == '/') {
    // 获取根目录的 root path ， 从 current->fs 中获取 
        set_root(nd);
		if (likely(!nd_jump_root(nd)))
			return s;
		return ERR_PTR(-ECHILD);
	} else if (nd->dfd == AT_FDCWD) {
        //当前目录
		if (flags & LOOKUP_RCU) {
			struct fs_struct *fs = current->fs;
			unsigned seq;
			do {
				seq = read_seqcount_begin(&fs->seq);
				nd->path = fs->pwd;
				nd->inode = nd->path.dentry->d_inode;
				nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
			} while (read_seqcount_retry(&fs->seq, seq));
		} else {
			get_fs_pwd(current->fs, &nd->path);
			nd->inode = nd->path.dentry->d_inode;
		}
		return s;
	} else {
	....
		return s;
	}
}

link_path_walk

nameidata 初始化了 root ，root 里面包含了目录” / “ 的dentry，接下来看 link_path_walk 的实现，该函数也是路径解析的关键函数

假设路径为  /home/hrp/abc
static int link_path_walk(const char *name, struct nameidata *nd)
{
    // 如果传入的路径名为错误指针，则直接返回错误码
    // 跳过 / 找到为找到第一个路径名字做准备
    while (*name == '/')
        name++;
    if (!*name)
        return 0;

    // 开始解析路径名中的每个部分,此时  pathname  
    //(gdb) p name
    // "home/hrp/abc"
    for (;;) {
        u64 hash_len;
        int type;

        // 检查当前进程是否有权限查找给定路径中的目录项
        err = may_lookup(nd);

        ///根据 路径名字和当前目录的 dentry的hash_len，
        //高4 byte是当前目录name字串长度，
        //低4 byte是当前目录（路径）的hash值，
        // hash值的计算是基于当前目录的父目录dentry（nd->path.dentry）来计算的，所以它跟其目录（路径）dentry是关联的
        hash_len = hash_name(nd->path.dentry, name);
        type = LAST_NORM; //正常解析模式   
//判断 是不是 . 开头的路径, 
        if (name[0] == '.') 
            switch (hashlen_len(hash_len)) {
                case 2:
                    // 为 ..
                    if (name[1] == '.') {
                        type = LAST_DOTDOT;
                        nd->flags |= LOOKUP_JUMPED;
                    }
                    break;
                    //为 .
                case 1:
                    type = LAST_DOT;
            }
        // 最后一个？ 
        if (likely(type == LAST_NORM)) {
            //获取 父目录的目录项 
            struct dentry *parent = nd->path.dentry;
            nd->flags &= ~LOOKUP_JUMPED;
            //检查是否需要重新计算哈希值??
            if (unlikely(parent->d_flags & DCACHE_OP_HASH)) {
                struct qstr this = { { .hash_len = hash_len }, .name = name };
                err = parent->d_op->d_hash(parent, &this);
                if (err < 0)
                    return err;
                hash_len = this.hash_len;
                name = this.name;
            }
        }

        // 更新 nameidata 
        // 此时name 为   "home/hrp/abc"
        nd->last.hash_len = hash_len;
        nd->last.name = name;
        nd->last_type = type;
       //hash_len里面前32位是当前目录名字的长度
	   //hashlen_len 函数可以就是获取hash_len的 前32位
        name += hashlen_len(hash_len);
        if (!*name)
            goto OK;
        //此时name 为  /hrp/abc"
        // 跳过多余的斜杠，继续处理下一个部分
        do {
            name++;
        } while (unlikely(*name == '/'));
        //此时name 为  hrp/abc"

        if (unlikely(!*name)) {
OK:
            // 路径名解析完成，返回0表示成功
            if (!nd->depth)
                return 0;
            name = nd->stack[nd->depth - 1].name;
            // 如果存在符号链接，继续跟随链接解析
            if (!name)
                return 0;
            // 最后一个组件的嵌套符号链接
            err = walk_component(nd, WALK_FOLLOW);
        } else {
            // 还需要，继续处理，walk_component 这里会跟新 nd里面的dentry
            //从当前 dentry（/）找到 home目录的dentry ，并更新 
            err = walk_component(nd, WALK_FOLLOW | WALK_MORE);
        }
        if (err < 0)
            return err;
        //....
        // 检查目录项是否可查找
        if (unlikely(!d_can_lookup(nd->path.dentry))) {
            if (nd->flags & LOOKUP_RCU) {
                if (unlazy_walk(nd))
                    return -ECHILD;
            }
            return -ENOTDIR;
        }
    }
}

walk_component

这里再分析下 walk_component ， walk_component

static int walk_component(struct nameidata *nd, int flags)
{
	struct path path;
	struct inode *inode;
	unsigned seq;
	int err;
    //不是正常模式，而是相对路径  ../ ./ 之类的
	if (unlikely(nd->last_type != LAST_NORM)) {
		err = handle_dots(nd, nd->last_type);
		if (!(flags & WALK_MORE) && nd->depth)
			put_link(nd);
		return err;
	}
    // 获取新的 新路径的path ，  
	err = lookup_fast(nd, &path, &inode, &seq);
    //....
	return step_into(nd, &path, flags, inode, seq);
}

lookup_fast

static int lookup_fast(struct nameidata *nd,
		       struct path *path, struct inode **inode,
		       unsigned *seqp)
{
    //每个path 都包含了当前 dentry， 和所在挂载点（理解为父目录） 
    //
	struct vfsmount *mnt = nd->path.mnt;
	struct dentry *dentry, *parent = nd->path.dentry;
	int status = 1;
	int err;

	if (nd->flags & LOOKUP_RCU) {
		unsigned seq;
		bool negative;
        //通过 nd-last （执行lookup_fast前已经更新了） 从当前的dentry 中找到新的dentry
        //此时是在内存中查找，所以很快
		dentry = __d_lookup_rcu(parent, &nd->last, &seq);
		/*
		 * This sequence count validates that the inode matches
		 * the dentry name information from lookup.
		 */
        //从新的dentry中后去inode
		*inode = d_backing_inode(dentry);
		//....
    }
    //...
    //更新 nameidata 为下次  link_path_walk 做准备
	path->mnt = mnt;
	path->dentry = dentry;
	err = follow_managed(path, nd);
	if (likely(err > 0))
		*inode = d_backing_inode(path->dentry);
	return err;
}

总结

从 home/hrp/abc 得到hash 值，在 **’/‘ ** 中的dentry找到 **’ home ‘**的 dentry ，更新 nameidata，

以此类推直到找到目录hrp所在的dentry 为止

下一篇文章介绍解析到 hrp目录后，是怎么找到abc文件的

TODO：

是怎么通过 drenty hash到的
软连接，特殊字符（./ ../）怎么处理的
drentry 不在内存中，这种情况怎么处理？

Linux

#Linux内核

文件打洞和稀疏文件 Previous

do_last Next