1.4.1 Linux块设备驱动程序原理(1)
顾名思义,块设备驱动程序就是支持以块的方式进行读写的设备。块设备和字符设备最大的区别在于读写数据的基本单元不同。块设备读写数据的基本单元为块,例如磁盘通常为一个sector,而字符设备的基本单元为字节。从实现角度来看,字符设备的实现比较简单,内核例程和用户态API一一对应,这种映射关系由字符设备的file_operations维护。块设备接口则相对复杂,读写API没有直接到块设备层,而是直接到文件系统层,然后再由文件系统层发起读写请求。
block_device结构代表了内核中的一个块设备。它可以表示整个磁盘或一个特定的分区。当这个结构代表一个分区时,它的bd_contains成员指向包含这个分区的设备,bd_part成员指向设备的分区结构。当这个结构代表一个块设备时,bd_disk成员指向设备的gendisk结构。
- struct block_device {
- dev_t bd_dev;
- struct inode * bd_inode; /*分区结点*/
- int bd_openers;
- struct semaphore bd_sem; /*打开/关闭锁*/
- struct semaphore bd_mount_sem; /* 加载互斥锁*/
- struct list_head bd_inodes;
- void * bd_holder;
- int bd_holders;
- struct block_device * bd_contains;
- unsigned bd_block_size;//分区块大小
- struct hd_struct * bd_part;
- unsigned bd_part_count;//打开次数
- int bd_invalidated;
- struct gendisk * bd_disk;
- struct list_head bd_list;
- struct backing_dev_info *bd_inode_backing_dev_info;
- unsigned long bd_private;
- };
gendisk是一个单独的磁盘驱动器的内核表示。内核还使用gendisk来表示分区。
- struct gendisk {
- int major; //主设备号
- int first_minor;
- int minors; //最大的次设备号数量,如果设备不能分区,该值为1
- char disk_name[32]; //主设备名
- struct hd_struct **part; //分区信息,有minors个
- struct block_device_operations *fops;//设备操作
- struct request_queue *queue; //设备管理I/O请求
- void *private_data;
- sector_t capacity;
- int flags;
- char devfs_name[64];
- int number;
- struct device *driverfs_dev;
- struct kobject kobj;
- struct timer_rand_state *random;
- int policy;
- atomic_t sync_io;
- unsigned long stamp, stamp_idle;
- int in_flight;
- #ifdef CONFIG_SMP
- struct disk_stats *dkstats;
- #else
- struct disk_stats dkstats;
- #endif
- };
gendisk结构的操作函数包括以下几个:
- struct gendisk *alloc_disk(int minors); //分配磁盘
- void add_disk(struct gendisk *disk); //增加磁盘信息
- void unlink_gendisk(struct gendisk *disk) //删除磁盘信息
- void delete_partition(struct gendisk *disk, int part); //删除分区
- void add_partition(struct gendisk *disk, int part, sector_t start, sector_t len, int flags);//添加分区
-
1.4.1 Linux块设备驱动程序原理(2)
block_device_operations结构是块设备对应的操作接口,是连接抽象的块设备操作与具体块设备操作之间的枢纽。
- struct block_device_operations {
- int (*open) (struct inode *, struct file *);
- int (*release) (struct inode *, struct file *);
- int (*ioctl) (struct inode *, struct file *, unsigned, unsigned long);
- long (*unlocked_ioctl) (struct file *, unsigned, unsigned long);
- long (*compat_ioctl) (struct file *, unsigned, unsigned long);
- int (*direct_access) (struct block_device *, sector_t, unsigned long *);
- int (*media_changed) (struct gendisk *);
- int (*revalidate_disk) (struct gendisk *);
- int (*getgeo)(struct block_device *, struct hd_geometry *);
- struct module *owner;
- };
block_device_operations并不能完全提供文件操作全部的API,实际上只提供了open、release等函数,其他的文件操作依赖于def_blk_fops:
- const struct file_operations def_blk_fops = {
- .open = blkdev_open,
- .release = blkdev_close,
- .llseek = block_llseek,
- .read = do_sync_read,
- .write = do_sync_write,
- .aio_read = generic_file_aio_read,
- .aio_write= generic_file_aio_write_nolock,
- .mmap = generic_file_mmap,
- .fsync = block_fsync,
- .unlocked_ioctl = block_ioctl,
- #ifdef CONFIG_COMPAT
- .compat_ioctl = compat_blkdev_ioctl,
- #endif
- .splice_read = generic_file_splice_read,
- .splice_write = generic_file_splice_write,
- };
系统对块设备进行读写操作时,通过块设备通用的读写操作函数将一个请求保存在该设备的操作请求队列(request queue)中,然后调用这个块设备的底层处理函数,对请求队列中的操作请求进行逐一执行。request_queue结构描述了块设备的请求队列,该结构定义如下:
- struct request_queue
- {
- struct list_head queue_head;
- struct request *last_merge;
- elevator_t elevator;
- /*请求队列列表*/
- struct request_list rq;
- request_fn_proc *request_fn;
- merge_request_fn *back_merge_fn;
- merge_request_fn *front_merge_fn;
- merge_requests_fn *merge_requests_fn;
- make_request_fn *make_request_fn;
- prep_rq_fn *prep_rq_fn;
- unplug_fn *unplug_fn;
- merge_bvec_fn *merge_bvec_fn;
- activity_fn *activity_fn;
- /*自动卸载状态*/
- struct timer_list unplug_timer;
- int unplug_thresh;
- unsigned long unplug_delay; /*自动卸载延时*/
- struct work_struct unplug_work;
- struct backing_dev_info backing_dev_info;
- void *queuedata;
- void *activity_data;
- unsigned long bounce_pfn;
- int bounce_gfp;
- unsigned long queue_flags;//各种队列标志
- /*保护队列结构,避免重入*/
- spinlock_t *queue_lock;
- /* 请求的核心结构*/
- struct kobject kobj;
- /*请求的配置*/
- unsigned long nr_requests; /* 请求的最大数*/
- unsigned int nr_congestion_on;
- unsigned int nr_congestion_off;
- unsigned short max_sectors;
- unsigned short max_phys_segments;
- unsigned short max_hw_segments;
- unsigned short hardsect_size;
- unsigned int max_segment_size;
- unsigned long seg_boundary_mask;
- unsigned int dma_alignment;
- struct blk_queue_tag *queue_tags;
- atomic_t refcnt;
- unsigned int in_flight;
- /*sg 参数配置*/
- unsigned int sg_timeout;
- unsigned int sg_reserved_size;
- };
请求队列相关的处理函数包括:
- //创建队列时提供了一个自旋锁。
- request_queue_t *blk_init_queue(request_fn_proc *rfn, spinlock_t *lock);
- //获得队列中第一个未完成的请求。
- struct request *elv_next_request(request_queue_t *q);
- void end_request(struct request *req, int uptodate);//请求完成
- void blk_stop_queue(request_queue_t *queue); //停止请求
- void blk_start_queue(request_queue_t *queue); //开始请求
- void blk_cleanup_queue(request_queue_t *);//清除请求队列
1.4.2 简单的块设备驱动程序实例
向内核注册和注销一个块设备可使用如下函数:
- int register_blkdev(unsigned int major, const char *name);
- int unregister_blkdev(unsigned int major, const char *name);
例1.10 简单的块设备驱动程序实例
代码见光盘\src\1drivermodel\1-10block。核心代码如下所示:
- static struct request_queue *Queue;
- //自定义块设备结构
- static struct simpleblockdevice
- {
- unsigned long size;
- spinlock_t lock;
- u8 *data;
- struct gendisk *gd;
- } Device;
- //处理I/O请求
- static void simpleblocktransfer(struct simpleblockdevice *dev, unsigned long sector,
- unsigned long nsect, char *buffer, int write)
- {
- unsigned long offset = sector*hardsect_size;
- unsigned long nbytes = nsect*hardsect_size;
- //判断I/O请求是否超出范围
- if ((offset + nbytes) > dev->size)
- {
- printk (KERN_NOTICE "sbd: Beyond-end write (%ld %ld)\n", offset, nbytes);
- return;
- }
- if (write)
- memcpy(dev->data + offset, buffer, nbytes);
- else
- memcpy(buffer, dev->data + offset, nbytes);
- }
- //简单请求处理
- static void simpleblockrequest(struct request_queue *q)
- {
- struct request *req;
- //获取下一个请求
- while ((req = elv_next_request(q)) != NULL)
- {
- if (! blk_fs_request(req))
- {
- printk (KERN_NOTICE "Skip non-CMD request\n");
- end_request(req, 0);
- continue;
- }
- simpleblocktransfer(&Device, req->sector, req->current_nr_sectors,
- req->buffer, rq_data_dir(req));
- end_request(req, 1);
- }
- }
- //简单的块设备ioctl函数
- int simpleblockioctl (struct inode *inode, struct file *filp,unsigned int cmd, unsigned long arg)
- {
- long size;
- struct hd_geometry geo;
- switch(cmd)
- {
- //获取磁盘信息
- case HDIO_GETGEO:
- size = Device.size*(hardsect_size/KERNEL_SECTOR_SIZE);
- geo.cylinders = (size & ~0x3f) >> 6;
- geo.heads = 4;
- geo.sectors = 16;
- geo.start = 4;
- if (copy_to_user((void *) arg, &geo, sizeof(geo)))
- return -EFAULT;
- return 0;
- }
- return -ENOTTY; /* 未知命令 */
- }
- //设备操作结构
- static struct block_device_operations simpleblockops = {
- .owner = THIS_MODULE,
- .ioctl = simpleblockioctl
- };
- static int __init simpleblockinit(void)
- {
- Device.size = nsectors*hardsect_size;
- spin_lock_init(&Device.lock);
- Device.data = vmalloc(Device.size);
- if (Device.data == NULL)
- return -ENOMEM;
- //初始化请求队列,配置处理函数为sbd_request
- Queue = blk_init_queue(simpleblockrequest, &Device.lock);
- if (Queue == NULL)
- goto out;
- blk_queue_hardsect_size(Queue, hardsect_size);
- //注册块设备
- major_num = register_blkdev(major_num, "sbd");
- if (major_num <= 0) {
- printk(KERN_WARNING "sbd: unable to get major number\n");
- goto out;
- }
- Device.gd = alloc_disk(16);
- if (! Device.gd)
- goto out_unregister;
- Device.gd->major = major_num;
- Device.gd->first_minor = 0;
- Device.gd->fops = &simpleblockops;
- Device.gd->private_data = &Device;
- strcpy (Device.gd->disk_name, "sbd0");
- //配置容量
- set_capacity(Device.gd, nsectors*(hardsect_size/KERNEL_SECTOR_SIZE));
- Device.gd->queue = Queue;
- add_disk(Device.gd);
- return 0;
- out_unregister:
- unregister_blkdev(major_num, "sbd");
- out:
- vfree(Device.data);
- return -ENOMEM;
- }
- static void __exit simpleblockexit(void)
- {
- del_gendisk(Device.gd);
- put_disk(Device.gd);
- unregister_blkdev(major_num, "sbd");
- blk_cleanup_queue(Queue);
- vfree(Device.data);
- }
- module_init(simpleblockinit);
- module_exit(simpleblockexit);
运行结果如下:
- [root@/home]#cat /proc/filesystems
- nodev sysfs
- nodev rootfs
- nodev bdev
- nodev proc
- nodev binfmt_misc
- nodev debugfs
- nodev securityfs
- nodev sockfs
- nodev usbfs
- nodev pipefs
- nodev anon_inodefs
- nodev futexfs
- nodev tmpfs
- nodev inotifyfs
- ext3
- cramfs
- nodev ramfs
- msdos
- vfat
- iso9660
- nodev nfs
- nodev nfs4
- nodev mqueue
- nodev rpc_pipefs
- [root@/home]#insmod demo.ko
- sbd0: unknown partition table
- [root@/home]#mknod /dev/sbd b 253 0
- [root@/home]#./mkfs.ext3 /dev/sbd
- mke2fs 1.40.9 (27-Apr-2008)
- Filesystem label=
- OS type: Linux
- Block size=1024 (log=0)
- Fragment size=1024 (log=0)
- 1280 inodes, 5120 blocks
- 256 blocks (5.00%) reserved for the super user
- First data block=1
- Maximum filesystem blocks=5242880
- 1 block group
- 8192 blocks per group, 8192 fragments per group
- 1280 inodes per group
- Writing inode tables: done
- Creating journal (1024 blocks): done
- Writing superblocks and filesystem accounting information: done
- This filesystem will be automatically checked every 39 mounts or
- 180 days, whichever comes first. Use tune2fs -c or -i to override.
- [root@/home]#mount -t ext3 /dev/sbd /mnt/u
- kjournald starting. Commit interval 5 seconds
- EXT3 FS on sbd0, internal journal
- EXT3-fs: mounted filesystem with ordered data mode.
- [root@/home]#df
- Filesystem 1k-blocks Used Available Use% Mounted on
- rootfs 2063504 1191136 767548 61% /
- /dev/root 2063504 1191136 767548 61% /
- /dev/sbd 4955 1063 3636 23% /mnt/u
- [root@/home]#cd /mnt/u
- [root@/mnt/u]#ls
- lost+found