文件描述符是？

!image-20240206231423073

fd 是什么？ `fd` 是什么？

文件描述符是一个与输入/输出资源相关的整数，也可以被称为文件句柄(file handle)、文件指针(file pointer)或文件引用(file reference)。简单来说，它是操作系统为了管理 I/O 操作而维护的一个表中的索引，代表着系统中打开的文件的一个“门牌号”，在linux 世界中一切皆文件，文件描述符占比很关键

文件描述符的应用

文件操作：

open() 函数打开一个文件并获取文件描述符。通过 read() 和 write() 函数可以读写文件，使用 lseek() 函数可以移动文件读写指针，fcntl() 函数用于控制文件的属性等。

进程控制：

进程之间的通信需要使用进程间通信机制(IPC)，管道(pipe)可以用于进程间的无名管道通信，socketpair() 可以创建一对已连接的 socket，以便进程间可以进行通信等。

网络编程：

每个套接字也是由一个fd管理

文件描述符就只是单纯的数字吗？

写一个简单的demo，打开一个文件，返回一个fd，并打印fd值


#include <unistd.h>
#include <stdio.h>
#include <fcntl.h>
#include <stdlib.h>
#include <string.h>
int main(void)
{
	int fd = open("abc",O_WRONLY|O_CREAT);
   	write(fd,"dd\n",3);  //在这里【write函数】将buffer里的内容，写入文件abc.txt
    printf("fd:  %d\n",fd);
	sleep(100);

}
### 编译并执行 fd 为3
root@ubuntu:/home# gcc -o main main.c -static
root@ubuntu:/home# ./main
fd:  3

每个进程都有个 pid，在 /proc 目录中可以找到对应的pid目录，该目录包含了进程本身相关信息的文件，其中就有 fd信息


#后台执行这个程序
root@ubuntu:/home# ./main &
[1] 2626
root@ubuntu:/home# fd:  3

#2626是 pid
root@ubuntu:/home# cd /proc/2626/
root@ubuntu:/proc/2626# ll
-r--r--r--   1 root root 0 Feb  2 18:51 arch_status
-r--------   1 root root 0 Feb  2 18:51 environ
lrwxrwxrwx   1 root root 0 Feb  2 18:51 exe -> /home/main*
dr-x------   2 root root 0 Feb  2 18:51 fd/
....
-rw-r--r--   1 root root 0 Feb  2 18:51 uid_map
-r--r--r--   1 root root 0 Feb  2 18:51 wchan
root@ubuntu:/proc/2626# cd fd
root@ubuntu:/proc/2626/fd# ls
0  1  2  3
root@ubuntu:/proc/2626/fd#

可以看到 fd 里面有0、1、2、3；3 是刚打开一个文件返回的，damo 里面我们是写入了 dd 字符到 abc 文件里面，那我们是不是可以用echo 命令重定向一些内容到文件描述符 3里面？


root@ubuntu:/proc/2626/fd# cat /home/abc
dd
root@ubuntu:/proc/2626/fd# echo "1234" > 3
root@ubuntu:/proc/2626/fd# cat /home/abc
1234
root@ubuntu:/proc/2626/fd#

是可以的！我们在改下demo，加入一个文件被打开多次？ fd 还会是同一个吗？


root@ubuntu:/home# ./main &
[1] 2770
root@ubuntu:/home#
#新增一个fd
root@ubuntu:/proc/2770/fd# ls
0  1  2  3  4
root@ubuntu:/proc/2770/fd#

# echo 追加到不同的fd，效果一样！
root@ubuntu:/proc/2770/fd# echo "4 add" >> 4
root@ubuntu:/proc/2770/fd# cat /home/abc
dd
4 add
root@ubuntu:/proc/2770/fd# echo "3 add" >> 3
root@ubuntu:/proc/2770/fd# cat /home/abc
dd
4 add
3 add
root@ubuntu:/proc/2770/fd#

fd 只能指向一个文件

一个多文件可以被多个fd 指向，

每个进程的fd 是隔离的，fd 只是个数字，对于不同进程指向的内容是不同的

我们也可以用 losf 命令看一个文件别多少个fd 占用


#都是 pid为 2270的经常，也验证了我们的看法
root@ubuntu:/proc/2770/fd# lsof /home/abc
COMMAND  PID USER   FD   TYPE DEVICE SIZE/OFF    NODE NAME
main    2770 root    3w   REG    8,1       15 1099779 /home/abc
main    2770 root    4w   REG    8,1       15 1099779 /home/abc
root@ubuntu:/proc/2770/fd#

从用户的使用来看， fd 像是一个连接用户与文件桥梁，不只是文件，还有很多，我更觉得像 handle

文件描述符 0,1,2 什么？

发现每个进程的文件描述符都是从3开始的，因为0,1,2被占用了，那这三个fd 的作用？

在Linux和unix系统中，文件描述符0,1,2是系统预留的，每个程序在运行后，都会至少打开三个文件描述符，分别是0、1、2，它们的意义分别有如下对应关系：

0 stdin （标准输入）

1 stdout （标准输出）

2 stderr （标准错误）

比如我们经常把一些结果过滤后重定向到一个文件


#ls 只提取前五行，重定向到 /tmp/ll
hrp@ubuntu:~$ ll | head -n 5 > /tmp/ll
hrp@ubuntu:~$ cat /tmp/ll
total 130320
drwxr-xr-x 17 hrp  hrp       4096 Feb  2 18:42 ./
drwxr-xr-x  4 root root      4096 Feb  2 19:32 ../
-rw-------  1 hrp  hrp       6287 Feb  2 19:44 .bash_history
-rw-r--r--  1 hrp  hrp        220 Jan 26 17:50 .bash_logout
hrp@ubuntu:~$

当然我们也可以直接输出到终端，这个终端输出的就是通过标准输出fd 1 输出的，我们我们把文件描述符1 重定向到文件，效果也一样


rp@ubuntu:~$ ll | head -n 5    ###（标准输出）
total 130320
drwxr-xr-x 17 hrp  hrp       4096 Feb  2 18:42 ./
drwxr-xr-x  4 root root      4096 Feb  2 19:32 ../
-rw-------  1 hrp  hrp       6287 Feb  2 19:44 .bash_history
-rw-r--r--  1 hrp  hrp        220 Jan 26 17:50 .bash_logout

hrp@ubuntu:~$ echo  > /tmp/ll
hrp@ubuntu:~$ ll | head -n 5 1>/tmp/ll  ## 标准输出 重定向到/tmp/ll
hrp@ubuntu:~$ cat /tmp/ll
total 130320
drwxr-xr-x 17 hrp  hrp       4096 Feb  2 18:42 ./
drwxr-xr-x  4 root root      4096 Feb  2 19:32 ../
-rw-------  1 hrp  hrp       6287 Feb  2 19:44 .bash_history
-rw-r--r--  1 hrp  hrp        220 Jan 26 17:50 .bash_logout
hrp@ubuntu:~$

描述文件符2，是这个终端标准错误，比如ls 不存在的文件,重定向文件描述符 2 到 /tmp/log


标准错误
hrp@ubuntu:~$ ls dd
ls: cannot access 'dd': No such file or directory
hrp@ubuntu:~$ ls dd 2>/tmp/log
hrp@ubuntu:~$ cat /tmp/log
ls: cannot access 'dd': No such file or directory
hrp@ubuntu:~$

文件描述符0 ，则是输入，会读取键盘的输入，文件的输入等等…

fd在内核中是怎么构造的？

就已 open 函数来分析，在系统调用章节，我们可以推出 open系统调用在内核代码中对应的函数


long do_sys_open(int dfd, const char __user *filename, int flags, umode_t mode)
{
	struct open_flags op;

	//根据标志位，填充op结构体
	int fd = build_open_flags(flags, mode, &op);
	struct filename *tmp;

	if (fd)
		return fd;
	// 1. 从用户态拷贝 字符到内核态
    // 2. 构造 filename 结构体
    tmp = getname(filename);
	if (IS_ERR(tmp))
		return PTR_ERR(tmp);
  //分配一个未使用的 fd ，这里是关键，接下来主要分析这个
	fd = get_unused_fd_flags(flags);
	if (fd >= 0) {
        //解析文件路径，得到文件的索引节点，创建文件结构体
		struct file *f = do_filp_open(dfd, tmp, &op);
		if (IS_ERR(f)) {
			put_unused_fd(fd);
			fd = PTR_ERR(f);
		} else {
			fsnotify_open(f);
			fd_install(fd, f);
		}
	}
	putname(tmp);
	return fd;
}

主要分析 get\_unused\_fd\_flags，看看 fd 是如何分配的


//最后是 调用__alloc_fd， 传入的参数中
int get_unused_fd_flags(unsigned flags)
{
	return __alloc_fd(current->files, 0, rlimit(RLIMIT_NOFILE), flags);
}

current->files 是什么？

Linux内核通过一个被称为进程描述符的 task_struct 结构体来管理进程，这个结构体包含了一个进程所需的所有信息， #define current get_current() 宏就是获取当前的进程的 task\_struct，current->files 则是获取当前进程的file table structure files\_struct


struct task_struct {
    ....
    	/* Open file information: */
	struct files_struct		*files;
    ....
}

files\_struct 是什么？


struct files_struct {
  /*
   * read mostly part
   */
	atomic_t count;
	bool resize_in_progress;
	wait_queue_head_t resize_wait;

	struct fdtable __rcu *fdt;
	struct fdtable fdtab;  //文件描述符表
  /*
   * written part on a separate cache line in SMP
   */
	spinlock_t file_lock ____cacheline_aligned_in_smp;
    // 已经准备好下一个 fd ，并不一定真正可用，需要验证
	unsigned int next_fd; 当前fd +1
	unsigned long close_on_exec_init[1];
	unsigned long open_fds_init[1];
	unsigned long full_fds_bits_init[1];
	struct file __rcu * fd_array[NR_OPEN_DEFAULT];
};

回到 \_\_alloc\_fd


/*
 * allocate a file descriptor, mark it busy.
 */
int __alloc_fd(struct files_struct *files,
	       unsigned start, unsigned end, unsigned flags)
{
	unsigned int fd;
	int error;
/*
*    struct fdtable {
*	unsigned int max_fds;
*	struct file __rcu **fd;
*	unsigned long *close_on_exec;
*	unsigned long *open_fds;
*	unsigned long *full_fds_bits;
*	struct rcu_head rcu;
*};
*/
    struct fdtable *fdt;

	spin_lock(&files->file_lock);
repeat:
    //获取文件描述符位图
	fdt = files_fdtable(files);
    // 从0开始
	fd = start;
    // 以 next_fd 初始值
	if (fd < files->next_fd)
		fd = files->next_fd;
    // 查找 有效的fd
	if (fd < fdt->max_fds)
		fd = find_next_fd(fdt, fd);

	/*
	 * N.B. For clone tasks sharing a files structure, this test
	 * will limit the total number of files that can be opened.
	 */
    //error 为1024
	error = -EMFILE;
	if (fd >= end)
		goto out;
//  //扩增fd ，fd数量已经达到上限
	error = expand_files(files, fd);
	if (error < 0)
		goto out;

	/*
	 * If we needed to expand the fs array we
	 * might have blocked - try again.
	 */
	if (error)
		goto repeat;

 //更新 next_fd
	if (start <= files->next_fd)
		files->next_fd = fd + 1;

//设置 更新 open_fds 的位图
	__set_open_fd(fd, fdt);
	if (flags & O_CLOEXEC)
		__set_close_on_exec(fd, fdt);
	else
		__clear_close_on_exec(fd, fdt);
	error = fd;

out:
	spin_unlock(&files->file_lock);
	return error;
}

有几个问题:

fdtable 是什么？

file 文件描述符表，用了位图方式记录已经打开的fd，可用的fd


struct fdtable {
      //记录当前最大的max_fds
  	unsigned int max_fds;
  	struct file __rcu **fd;      /* current fd array */

      // unsigned long 以下三个用作位图， 64bit，0 代表fd 为使用，1 为使用
  	unsigned long *close_on_exec;

  	//每个bit 代表一个文件描述符
      // 第35 bit为1，则表示文件描述符35已经被使用
      unsigned long *open_fds;

      // 每个bit代表64位数组，这个数组代表 0-63的文件描述符
      // bit0  为1 则表明0~63都使用了，为0 0~63还没被使用
  	unsigned long *full_fds_bits;
  	struct rcu_head rcu;
};

find\_next\_fd 函数是怎么找的呢？


static unsigned int find_next_fd(struct fdtable *fdt, unsigned int start)
{
      //当前最大的fd上限
  	unsigned int maxfd = fdt->max_fds;
      //除以 64先找到 ，第几组文件描述符，比如现在 max_fds 为67， 得到 maxbit 为1，目前第一组还有空的，这里 maxfd / BITS_PER_LONG 说明想找到最后 一组
  	unsigned int maxbit = maxfd / BITS_PER_LONG;
      //找到最开始的一组，start 即时 最大nextfd
  	unsigned int bitbit = start / BITS_PER_LONG;
      //现在 才真正的开始找，最start 和 end 都有了，找到还有空闲的那一组（其实就是找首个非1的bit） 乘以BITS_PER_LONG得到真正的fd
  	bitbit = find_next_zero_bit(fdt->full_fds_bits, maxbit, bitbit) * BITS_PER_LONG;
      //bitbit 超过了 maxfd，直接返回
  	if (bitbit > maxfd)
  		return maxfd;
      //超过satrt 才是正常的
  	if (bitbit > start)
  		start = bitbit;
      // 然后从 open_fds 找到 空闲fd
  	return find_next_zero_bit(fdt->open_fds, maxfd, start);
}

从 find\_next\_fd看到， bit 是可能超过 maxfd的，那怎么处理呢？

从函数如果超过了，就直接返回，其实后面还是有处理的在 expand\_files里面


/*
* Expand files.
* This function will expand the file structures, if the requested size exceeds
* the current capacity and there is room for expansion.
* Return <0 error code on error; 0 when nothing done; 1 when files were
* expanded and execution may have blocked.
* The files->file_lock should be held on entry, and will be held on exit.
*/
static int expand_files(struct files_struct *files, unsigned int nr)
  	__releases(files->file_lock)
  	__acquires(files->file_lock)
{
  	struct fdtable *fdt;
  	int expanded = 0;

repeat:
      //和上面一样，先后去 fd table
  	fdt = files_fdtable(files);

  	/* Do we need to expand? */
      //比maxfd小直接返回
  	if (nr < fdt->max_fds)
  		return expanded;

  	/* Can we expand? */
      //大于文件描述符限制 ，  ulimit -a 可以看 最大值
  	if (nr >= sysctl_nr_open)
  		return -EMFILE;
      //这种情况是 在另外一处也在扩容fd，这里上锁等待 TODO: wait_event??
  	if (unlikely(files->resize_in_progress)) {
  		spin_unlock(&files->file_lock);
  		expanded = 1;
  		wait_event(files->resize_wait, !files->resize_in_progress);
  		spin_lock(&files->file_lock);
  		goto repeat;
  	}

  	/* All good, so we try */
  	files->resize_in_progress = true;
     // 扩容 fd
     //里面大致实现是  根据当前的nr 值，重新allocte 一个新的maxfd ，然后将原来的拷贝到新的 fdtalbe
  	expanded = expand_fdtable(files, nr);
  	files->resize_in_progress = false;

  	wake_up_all(&files->resize_wait);
  	return expanded;
}

next\_fd 作用是？

每次获取到新的fd，基于这个fd+1得到 next\_fd，好像是为了准备一下个fd；可用的fd 最终是在\\\_\_set\_open\_fd\\ 函数是有更新到 open\_fd 位图中的，但是我们发现 full\_fds\_bits\_init 位图是没有实质上的更新的，只是单凭借起始位置 strart 和 maxfd 来判断，而 start 正是 next\_fd，由此发现 next\_fd 作用是为了定位 full\_fds\_bits\_init 当前位置

结语

到现为止我们知道 fd 是怎么生成，以及哪里有记录，都介绍了； open的实现到此为止，本来也是讲open，继续挖下去，感觉应先把 inode 这些原理讲清楚，这样才可以方便展开讲

菜单

分享

文件描述符是？

文件描述符是？

fd 是什么？ `fd` 是什么？

文件描述符的应用

文件描述符就只是单纯的数字吗？

文件描述符 0,1,2 什么？

fd在内核中是怎么构造的？

结语

评论

A2A 初理解：让 AI Agent 真正“互相协作”的通用协议

slow op的排查手段（更新中）

asan内存检测

模型即芯片：AI 推理新分叉

rclone拷贝桶对象失败定位过程

训练初了解：把大模型看成一个复杂函数（通俗版）

vector扩容

智能指针是线程安全的？

ceph中 RBD 使用

cas 无锁编程

分享

文件描述符是？

文件描述符是？

fd 是什么？ fd 是什么？

文件描述符的应用

文件描述符就只是单纯的数字吗？

文件描述符 0,1,2 什么 ？

fd在内核中是怎么构造的？

结语

评论

fd 是什么？ `fd` 是什么？

文件描述符 0,1,2 什么？