文件描述符是？

Last updated on a year ago

`fd` 是什么？

文件描述符是一个与输入/输出资源相关的整数，也可以被称为文件句柄(file handle)、文件指针(file pointer)或文件引用(file reference)。简单来说，它是操作系统为了管理 I/O 操作而维护的一个表中的索引，代表着系统中打开的文件的一个“门牌号”，在linux 世界中一切皆文件，文件描述符占比很关键

文件描述符的应用

文件操作：
open() 函数打开一个文件并获取文件描述符。通过 read() 和 write() 函数可以读写文件，使用 lseek() 函数可以移动文件读写指针，fcntl() 函数用于控制文件的属性等。
进程控制：
进程之间的通信需要使用进程间通信机制(IPC)，管道(pipe)可以用于进程间的无名管道通信，socketpair() 可以创建一对已连接的 socket，以便进程间可以进行通信等。
- 网络编程：
  
  每个套接字也是由一个fd管理

文件描述符就只是单纯的数字吗？

写一个简单的demo，打开一个文件，返回一个fd，并打印fd值

#include <unistd.h>
#include <stdio.h>
#include <fcntl.h>
#include <stdlib.h>
#include <string.h>
int main(void)
{
	int fd = open("abc",O_WRONLY|O_CREAT);
   	write(fd,"dd\n",3);  //在这里【write函数】将buffer里的内容，写入文件abc.txt
    printf("fd:  %d\n",fd);
	sleep(100);

}
### 编译并执行 fd 为3
root@ubuntu:/home# gcc -o main main.c -static
root@ubuntu:/home# ./main 
fd:  3

每个进程都有个 pid，在 /proc 目录中可以找到对应的pid目录，该目录包含了进程本身相关信息的文件，其中就有 fd信息

#后台执行这个程序
root@ubuntu:/home# ./main &
[1] 2626
root@ubuntu:/home# fd:  3

#2626是 pid
root@ubuntu:/home# cd /proc/2626/
root@ubuntu:/proc/2626# ll
-r--r--r--   1 root root 0 Feb  2 18:51 arch_status
-r--------   1 root root 0 Feb  2 18:51 environ
lrwxrwxrwx   1 root root 0 Feb  2 18:51 exe -> /home/main*
dr-x------   2 root root 0 Feb  2 18:51 fd/
....
-rw-r--r--   1 root root 0 Feb  2 18:51 uid_map
-r--r--r--   1 root root 0 Feb  2 18:51 wchan
root@ubuntu:/proc/2626# cd fd
root@ubuntu:/proc/2626/fd# ls
0  1  2  3
root@ubuntu:/proc/2626/fd#

可以看到 fd 里面有0、1、2、3；3 是刚打开一个文件返回的，damo 里面我们是写入了 dd 字符到 abc 文件里面，那我们是不是可以用echo 命令重定向一些内容到文件描述符 3里面？

root@ubuntu:/proc/2626/fd# cat /home/abc
dd
root@ubuntu:/proc/2626/fd# echo "1234" > 3 
root@ubuntu:/proc/2626/fd# cat /home/abc
1234
root@ubuntu:/proc/2626/fd#

是可以的！我们在改下demo，加入一个文件被打开多次？ fd 还会是同一个吗？


root@ubuntu:/home# ./main &
[1] 2770
root@ubuntu:/home# 
#新增一个fd 
root@ubuntu:/proc/2770/fd# ls
0  1  2  3  4
root@ubuntu:/proc/2770/fd# 

# echo 追加到不同的fd，效果一样！ 
root@ubuntu:/proc/2770/fd# echo "4 add" >> 4
root@ubuntu:/proc/2770/fd# cat /home/abc
dd
4 add
root@ubuntu:/proc/2770/fd# echo "3 add" >> 3
root@ubuntu:/proc/2770/fd# cat /home/abc
dd
4 add
3 add
root@ubuntu:/proc/2770/fd#

fd 只能指向一个文件
一个多文件可以被多个fd 指向，
每个进程的fd 是隔离的，fd 只是个数字，对于不同进程指向的内容是不同的

我们也可以用 losf 命令看一个文件别多少个fd 占用

#都是 pid为 2270的经常，也验证了我们的看法
root@ubuntu:/proc/2770/fd# lsof /home/abc
COMMAND  PID USER   FD   TYPE DEVICE SIZE/OFF    NODE NAME
main    2770 root    3w   REG    8,1       15 1099779 /home/abc
main    2770 root    4w   REG    8,1       15 1099779 /home/abc
root@ubuntu:/proc/2770/fd#

从用户的使用来看， fd 像是一个连接用户与文件桥梁，不只是文件，还有很多，我更觉得像 handle

文件描述符 0,1,2 什么？

发现每个进程的文件描述符都是从3开始的，因为0,1,2被占用了，那这三个fd 的作用？

在Linux和unix系统中，文件描述符0,1,2是系统预留的，每个程序在运行后，都会至少打开三个文件描述符，分别是0、1、2，它们的意义分别有如下对应关系：

0 stdin （标准输入）

1 stdout （标准输出）

2 stderr （标准错误）

比如我们经常把一些结果过滤后重定向到一个文件

#ls 只提取前五行，重定向到 /tmp/ll
hrp@ubuntu:~$ ll | head -n 5 > /tmp/ll
hrp@ubuntu:~$ cat /tmp/ll
total 130320
drwxr-xr-x 17 hrp  hrp       4096 Feb  2 18:42 ./
drwxr-xr-x  4 root root      4096 Feb  2 19:32 ../
-rw-------  1 hrp  hrp       6287 Feb  2 19:44 .bash_history
-rw-r--r--  1 hrp  hrp        220 Jan 26 17:50 .bash_logout
hrp@ubuntu:~$

当然我们也可以直接输出到终端，这个终端输出的就是通过标准输出fd 1 输出的，我们我们把文件描述符1 重定向到文件，效果也一样

rp@ubuntu:~$ ll | head -n 5    ###（标准输出）
total 130320
drwxr-xr-x 17 hrp  hrp       4096 Feb  2 18:42 ./
drwxr-xr-x  4 root root      4096 Feb  2 19:32 ../
-rw-------  1 hrp  hrp       6287 Feb  2 19:44 .bash_history
-rw-r--r--  1 hrp  hrp        220 Jan 26 17:50 .bash_logout


hrp@ubuntu:~$ echo  > /tmp/ll
hrp@ubuntu:~$ ll | head -n 5 1>/tmp/ll  ## 标准输出 重定向到/tmp/ll
hrp@ubuntu:~$ cat /tmp/ll
total 130320
drwxr-xr-x 17 hrp  hrp       4096 Feb  2 18:42 ./
drwxr-xr-x  4 root root      4096 Feb  2 19:32 ../
-rw-------  1 hrp  hrp       6287 Feb  2 19:44 .bash_history
-rw-r--r--  1 hrp  hrp        220 Jan 26 17:50 .bash_logout
hrp@ubuntu:~$

描述文件符2，是这个终端标准错误，比如ls 不存在的文件,重定向文件描述符 2 到 /tmp/log

标准错误
hrp@ubuntu:~$ ls dd
ls: cannot access 'dd': No such file or directory
hrp@ubuntu:~$ ls dd 2>/tmp/log
hrp@ubuntu:~$ cat /tmp/log
ls: cannot access 'dd': No such file or directory
hrp@ubuntu:~$

文件描述符0 ，则是输入，会读取键盘的输入，文件的输入等等…

fd在内核中是怎么构造的？

就已 open 函数来分析，在系统调用章节，我们可以推出 open系统调用在内核代码中对应的函数

long do_sys_open(int dfd, const char __user *filename, int flags, umode_t mode)
{
	struct open_flags op;
        
	//根据标志位，填充op结构体
	int fd = build_open_flags(flags, mode, &op);
	struct filename *tmp;

	if (fd)
		return fd;
	// 1. 从用户态拷贝 字符到内核态
    // 2. 构造 filename 结构体
    tmp = getname(filename);
	if (IS_ERR(tmp))
		return PTR_ERR(tmp);
  //分配一个未使用的 fd ，这里是关键，接下来主要分析这个
	fd = get_unused_fd_flags(flags);
	if (fd >= 0) {
        //解析文件路径，得到文件的索引节点，创建文件结构体
		struct file *f = do_filp_open(dfd, tmp, &op);
		if (IS_ERR(f)) {
			put_unused_fd(fd);
			fd = PTR_ERR(f);
		} else {
			fsnotify_open(f);
			fd_install(fd, f);
		}
	}
	putname(tmp);
	return fd;
}

主要分析 get_unused_fd_flags，看看 fd 是如何分配的

//最后是 调用__alloc_fd， 传入的参数中
int get_unused_fd_flags(unsigned flags)
{
	return __alloc_fd(current->files, 0, rlimit(RLIMIT_NOFILE), flags);
}

current->files 是什么？

Linux内核通过一个被称为进程描述符的**task_struct**结构体来管理进程，这个结构体包含了一个进程所需的所有信息，#define current get_current() 宏就是获取当前的进程的task_struct，current->files 则是获取当前进程的file table structure files_struct


struct task_struct {
    ....
    	/* Open file information: */
	struct files_struct		*files;
    ....
}

files_struct 是什么？

struct files_struct {
  /*
   * read mostly part
   */
	atomic_t count;
	bool resize_in_progress;
	wait_queue_head_t resize_wait;

	struct fdtable __rcu *fdt;
	struct fdtable fdtab;  //文件描述符表
  /*
   * written part on a separate cache line in SMP
   */
	spinlock_t file_lock ____cacheline_aligned_in_smp;
    // 已经准备好下一个 fd ，并不一定真正可用，需要验证
	unsigned int next_fd; 当前fd +1 
	unsigned long close_on_exec_init[1];
	unsigned long open_fds_init[1];
	unsigned long full_fds_bits_init[1];
	struct file __rcu * fd_array[NR_OPEN_DEFAULT];
};

回到 __alloc_fd


/*
 * allocate a file descriptor, mark it busy.
 */
int __alloc_fd(struct files_struct *files,
	       unsigned start, unsigned end, unsigned flags)
{
	unsigned int fd;
	int error;
/*
*    struct fdtable {
*	unsigned int max_fds;
*	struct file __rcu **fd;    
*	unsigned long *close_on_exec;
*	unsigned long *open_fds;
*	unsigned long *full_fds_bits;
*	struct rcu_head rcu;
*};
*/
    struct fdtable *fdt;

	spin_lock(&files->file_lock);
repeat:
    //获取文件描述符位图
	fdt = files_fdtable(files);
    // 从0开始
	fd = start;
    // 以 next_fd 初始值  
	if (fd < files->next_fd)
		fd = files->next_fd;
    // 查找 有效的fd
	if (fd < fdt->max_fds)
		fd = find_next_fd(fdt, fd);

	/*
	 * N.B. For clone tasks sharing a files structure, this test
	 * will limit the total number of files that can be opened.
	 */
    //error 为1024
	error = -EMFILE;
	if (fd >= end)
		goto out;
//  //扩增fd ，fd数量已经达到上限
	error = expand_files(files, fd);
	if (error < 0)
		goto out;

	/*
	 * If we needed to expand the fs array we
	 * might have blocked - try again.
	 */
	if (error)
		goto repeat;
    
 //更新 next_fd
	if (start <= files->next_fd)
		files->next_fd = fd + 1;
    
//设置 更新 open_fds 的位图
	__set_open_fd(fd, fdt);
	if (flags & O_CLOEXEC)
		__set_close_on_exec(fd, fdt);
	else
		__clear_close_on_exec(fd, fdt);
	error = fd;


out:
	spin_unlock(&files->file_lock);
	return error;
}

有几个问题:

fdtable 是什么？

file 文件描述符表，用了位图方式记录已经打开的fd，可用的fd

struct fdtable {
    //记录当前最大的max_fds
	unsigned int max_fds;   
	struct file __rcu **fd;      /* current fd array */
    
    // unsigned long 以下三个用作位图， 64bit，0 代表fd 为使用，1 为使用
	unsigned long *close_on_exec;   
    
	//每个bit 代表一个文件描述符
    // 第35 bit为1，则表示文件描述符35已经被使用
    unsigned long *open_fds;
    
    // 每个bit代表64位数组，这个数组代表 0-63的文件描述符
    // bit0  为1 则表明0~63都使用了，为0 0~63还没被使用
	unsigned long *full_fds_bits;
	struct rcu_head rcu;
};

find_next_fd 函数是怎么找的呢？

static unsigned int find_next_fd(struct fdtable *fdt, unsigned int start)
{
    //当前最大的fd上限
	unsigned int maxfd = fdt->max_fds;
    //除以 64先找到 ，第几组文件描述符，比如现在 max_fds 为67， 得到 maxbit 为1，目前第一组还有空的，这里 maxfd / BITS_PER_LONG 说明想找到最后 一组
	unsigned int maxbit = maxfd / BITS_PER_LONG;
    //找到最开始的一组，start 即时 最大nextfd
	unsigned int bitbit = start / BITS_PER_LONG;
    //现在 才真正的开始找，最start 和 end 都有了，找到还有空闲的那一组（其实就是找首个非1的bit） 乘以BITS_PER_LONG得到真正的fd
	bitbit = find_next_zero_bit(fdt->full_fds_bits, maxbit, bitbit) * BITS_PER_LONG;
    //bitbit 超过了 maxfd，直接返回
	if (bitbit > maxfd)
		return maxfd;
    //超过satrt 才是正常的
	if (bitbit > start)
		start = bitbit;
    // 然后从 open_fds 找到 空闲fd
	return find_next_zero_bit(fdt->open_fds, maxfd, start);
}

从 find_next_fd看到， bit 是可能超过 maxfd的，那怎么处理呢？

从函数如果超过了，就直接返回，其实后面还是有处理的在 expand_files里面

/*
 * Expand files.
 * This function will expand the file structures, if the requested size exceeds
 * the current capacity and there is room for expansion.
 * Return <0 error code on error; 0 when nothing done; 1 when files were
 * expanded and execution may have blocked.
 * The files->file_lock should be held on entry, and will be held on exit.
 */
 static int expand_files(struct files_struct *files, unsigned int nr)
	__releases(files->file_lock)
	__acquires(files->file_lock)
{
	struct fdtable *fdt;
	int expanded = 0;

repeat:
    //和上面一样，先后去 fd table
	fdt = files_fdtable(files);

	/* Do we need to expand? */
    //比maxfd小直接返回
	if (nr < fdt->max_fds)
		return expanded;

	/* Can we expand? */
    //大于文件描述符限制 ，  ulimit -a 可以看 最大值
	if (nr >= sysctl_nr_open)
		return -EMFILE;
    //这种情况是 在另外一处也在扩容fd，这里上锁等待 TODO: wait_event??
	if (unlikely(files->resize_in_progress)) {
		spin_unlock(&files->file_lock);
		expanded = 1;
		wait_event(files->resize_wait, !files->resize_in_progress);
		spin_lock(&files->file_lock);
		goto repeat;
	}

	/* All good, so we try */
	files->resize_in_progress = true;
   // 扩容 fd
   //里面大致实现是  根据当前的nr 值，重新allocte 一个新的maxfd ，然后将原来的拷贝到新的 fdtalbe
	expanded = expand_fdtable(files, nr);
	files->resize_in_progress = false;

	wake_up_all(&files->resize_wait);
	return expanded;
}

next_fd 作用是？

每次获取到新的fd，基于这个fd+1得到 next_fd，好像是为了准备一下个fd；可用的fd 最终是在**__set_open_fd** 函数是有更新到 open_fd 位图中的，但是我们发现 full_fds_bits_init 位图是没有实质上的更新的，只是单凭借起始位置 strart 和 maxfd 来判断，而 start 正是 next_fd，由此发现 next_fd 作用是为了定位 full_fds_bits_init 当前位置

结语

到现为止我们知道 fd 是怎么生成，以及哪里有记录，都介绍了； open的实现到此为止，本来也是讲open，继续挖下去，感觉应先把 inode 这些原理讲清楚，这样才可以方便展开讲

Linux

#Linux内核

asan内存检测 Previous

系统调用 Next