2021SC@SDUSC
上一条博文已经讲过,除了本身的Ext2之外,Linux支持多种文件系统(Ext4,FAT等),用户可以通过一个文件系统界面(系统调用)来操作不同的文件系统。从用户的角度看,我们并不需要关注文件系统的类型以及怎么进行具体的操作,这就是虚拟文件系统(VFS)。VFS为用户提供了一组系统调用,如读写文件reed()、write()以及移动文件指针lseek()等。
那么,内核与不同的文件系统的接口是怎么实现的呢?
这就要提到include\Linux文件夹下fs.h文件中的file_operations数据结构。
struct file_operations {
struct module *owner;
loff_t (*llseek) (struct file *, loff_t, int);
ssize_t (*read) (struct file *, char __user *, size_t, loff_t *);
ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *);
ssize_t (*read_iter) (struct kiocb *, struct iov_iter *);
ssize_t (*write_iter) (struct kiocb *, struct iov_iter *);
int (*iopoll)(struct kiocb *kiocb, bool spin);
int (*iterate) (struct file *, struct dir_context *);
int (*iterate_shared) (struct file *, struct dir_context *);
__poll_t (*poll) (struct file *, struct poll_table_struct *);
long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
long (*compat_ioctl) (struct file *, unsigned int, unsigned long);
int (*mmap) (struct file *, struct vm_area_struct *);
unsigned long mmap_supported_flags;
int (*open) (struct inode *, struct file *);
int (*flush) (struct file *, fl_owner_t id);
int (*release) (struct inode *, struct file *);
int (*fsync) (struct file *, loff_t, loff_t, int datasync);
int (*fasync) (int, struct file *, int);
int (*lock) (struct file *, int, struct file_lock *);
ssize_t (*sendpage) (struct file *, struct page *, int, size_t, loff_t *, int);
unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
int (*check_flags)(int);
int (*flock) (struct file *, int, struct file_lock *);
ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int);
ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int);
int (*setlease)(struct file *, long, struct file_lock **, void **);
long (*fallocate)(struct file *file, int mode, loff_t offset,
loff_t len);
void (*show_fdinfo)(struct seq_file *m, struct file *f);
#ifndef CONFIG_MMU
unsigned (*mmap_capabilities)(struct file *);
#endif
ssize_t (*copy_file_range)(struct file *, loff_t, struct file *,
loff_t, size_t, unsigned int);
loff_t (*remap_file_range)(struct file *file_in, loff_t pos_in,
struct file *file_out, loff_t pos_out,
loff_t len, unsigned int remap_flags);
int (*fadvise)(struct file *, loff_t, loff_t, int);
}
不难发现,该结构体内几乎全都是函数指针,具体到每一个文件系统,均需实现自己的file_operations。比如,write就指向该文件系统的用来实现写文件操作的函数。
每个进程通过open()与具体的文件建立起一个读写联系。在代表进程的task_struct数据结构中有两个指针,分别是代表文件系统信息的fs_struct和代表已打开文件信息的files_struct。
在fs_struct结构体中:
struct fs_struct {
atomic_t count;
rwlock_t lock;
int umask;
struct dentry * root, * pwd, * altroot;
struct vfsmount * rootmnt, * pwdmnt, * altrootmnt;
};
root,pwd,altroot这三个指针指向结构体dentry,而dentry记录的是目录项,所以root代表的是进程的根目录,pwd代表的是进程当前的目录, altroot代表的是替换根目录。实际上,这三个目录不一定安装在同一个文件系统中,所以用rootmnt,pwdmnt和altrootmnt这三个指针指向对应的vfsmount数据结构。
在files_struct结构体中:
struct files_struct {
/*
* read mostly part
*/
atomic_t count;
bool resize_in_progress;
wait_queue_head_t resize_wait;
struct fdtable __rcu *fdt;
struct fdtable fdtab;
/*
* written part on a separate cache line in SMP
*/
spinlock_t file_lock ____cacheline_aligned_in_smp;
unsigned int next_fd;
unsigned long close_on_exec_init[1];
unsigned long open_fds_init[1];
unsigned long full_fds_bits_init[1];
struct file __rcu * fd_array[NR_OPEN_DEFAULT];
};
files_struct结构体的主要内容就是file结构体数组,每一个file结构体保存的是进程已经打开的文件相关信息。
在file结构体中:
struct file {
union {
struct llist_node fu_llist;
struct rcu_head fu_rcuhead;
} f_u;
struct path f_path;
struct inode *f_inode; /* cached value */
const struct file_operations *f_op;
/*
* Protects f_ep, f_flags.
* Must not be taken from IRQ context.
*/
spinlock_t f_lock;
enum rw_hint f_write_hint;
atomic_long_t f_count;
unsigned int f_flags;
fmode_t f_mode;
struct mutex f_pos_lock;
loff_t f_pos;
struct fown_struct f_owner;
const struct cred *f_cred;
struct file_ra_state f_ra;
u64 f_version;
#ifdef CONFIG_SECURITY
void *f_security;
#endif
/* needed for tty driver, and maybe others */
void *private_data;
#ifdef CONFIG_EPOLL
/* Used by fs/eventpoll.c to link all the hooks to this file */
struct hlist_head *f_ep;
#endif /* #ifdef CONFIG_EPOLL */
struct address_space *f_mapping;
errseq_t f_wb_err;
errseq_t f_sb_err; /* for syncfs */
}
f_op指针指向该文件所属文件系统的file_operation结构体,通过file可以知道如何对该文件进行操作。f_dentry指针指向该文件的dentry数据结构,记录了该文件的目录。
总结
Linux通过VFS提供一组约定的数据结构(dentry、inode等),在进程与某一个文件建立联系open之后被初始化。虽然不同文件系统的实现细节不同,但是他们都会有实现类似功能的具体数据结构。