首先来看scsi_prep_fn
int scsi_prep_fn(struct request_queue *q, struct request *req)
{
struct scsi_device *sdev = q->queuedata;
int ret = BLKPREP_KILL; if (req->cmd_type == REQ_TYPE_BLOCK_PC)
ret = scsi_setup_blk_pc_cmnd(sdev, req);
return scsi_prep_return(q, req, ret);
}
scsi_prep_fn只能处理来自SCSI公共层的命令,在scsi_setup_blk_pc_cmnd函数返回后,根据返回值调用scsi_prep_return进行相应的处理
int scsi_setup_blk_pc_cmnd(struct scsi_device *sdev, struct request *req)
{
struct scsi_cmnd *cmd;
int ret = scsi_prep_state_check(sdev, req); // 根据请求的标志位以及SCSI设备的状态进行初步检查 if (ret != BLKPREP_OK)
return ret; /*
* 分配一个新的scsi_cmnd描述符,将它记录在special域;如果这里已经指向了一个现有的scsi_cmnd描述符,直接使用它
*/
cmd = scsi_get_cmd_from_req(sdev, req);
if (unlikely(!cmd))
return BLKPREP_DEFER; /*
* BLOCK_PC requests may transfer data, in which case they must a bio attached to them. Or they might contain a SCSI command
* that does not transfer data, in which case they may optionally submit a request without an attached bio.
* 尽管请求来自SCSI公共服务层,但是这些请求也可以涉及数据传输,在bio中保存的数据最终需要复制到SCSI命令描述符的数据缓冲区中
* 具体的工作由scsi_init_io完成,后续分析
* 如果不涉及数据传输,就将SCSI命令缓冲区清零
*/
if (req->bio) {
int ret; BUG_ON(!req->nr_phys_segments); ret = scsi_init_io(cmd, GFP_ATOMIC);
if (unlikely(ret))
return ret;
} else {
BUG_ON(blk_rq_bytes(req)); memset(&cmd->sdb, , sizeof(cmd->sdb));
req->buffer = NULL;
} cmd->cmd_len = req->cmd_len;
if (!blk_rq_bytes(req))
cmd->sc_data_direction = DMA_NONE;
else if (rq_data_dir(req) == WRITE)
cmd->sc_data_direction = DMA_TO_DEVICE;
else
cmd->sc_data_direction = DMA_FROM_DEVICE; cmd->transfersize = blk_rq_bytes(req);
cmd->allowed = req->retries;
return BLKPREP_OK;
}
sd_prep_fn函数从request结构中的信息构造SCSI(读或写)命令,将结果保存在request的special域,sd_prep_fn只能处理来自上层的请求(REQ_TYPE_FS),以及来自SCSI层的(REQ_TYPE_BLOCK_PC)请求。有一种请求比较特殊,即所谓的DISCARD请求。这个请求来自上层,但需要被转换成SCSI请求来处理
/**
* sd_prep_fn - build a scsi (read or write) command from
* information in the request structure.
* @SCpnt: pointer to mid-level's per scsi command structure that
* contains request and into which the scsi command is written
*
* Returns 1 if successful and 0 if error (or cannot be done now).
**/
static int sd_prep_fn(struct request_queue *q, struct request *rq)
{
struct scsi_cmnd *SCpnt;
struct scsi_device *sdp = q->queuedata;
struct gendisk *disk = rq->rq_disk;
struct scsi_disk *sdkp;
sector_t block = blk_rq_pos(rq);
sector_t threshold;
unsigned int this_count = blk_rq_sectors(rq);
int ret, host_dif;
unsigned char protect; /*
* Discard request come in as REQ_TYPE_FS but we turn them into
* block PC requests to make life easier.
*/
if (rq->cmd_flags & REQ_DISCARD) {
ret = sd_setup_discard_cmnd(sdp, rq);
goto out;
} else if (rq->cmd_flags & REQ_WRITE_SAME) {
ret = sd_setup_write_same_cmnd(sdp, rq);
goto out;
} else if (rq->cmd_flags & REQ_FLUSH) {
ret = scsi_setup_flush_cmnd(sdp, rq);
goto out;
} else if (rq->cmd_type == REQ_TYPE_BLOCK_PC) {
ret = scsi_setup_blk_pc_cmnd(sdp, rq);
goto out;
} else if (rq->cmd_type != REQ_TYPE_FS) {
ret = BLKPREP_KILL;
goto out;
}
ret = scsi_setup_fs_cmnd(sdp, rq);
if (ret != BLKPREP_OK)
goto out;
SCpnt = rq->special;
sdkp = scsi_disk(disk); /* from here on until we're complete, any goto out
* is used for a killable error condition */
ret = BLKPREP_KILL; SCSI_LOG_HLQUEUE(, scmd_printk(KERN_INFO, SCpnt,
"sd_prep_fn: block=%llu, "
"count=%d\n",
(unsigned long long)block,
this_count)); /*
* 以下几种情况直接结束命令:
* 1.SCSI不在线
* 2.请求数据超出了设备容量
* 3.磁盘介质发生了变化
*/
if (!sdp || !scsi_device_online(sdp) ||
block + blk_rq_sectors(rq) > get_capacity(disk)) {
SCSI_LOG_HLQUEUE(, scmd_printk(KERN_INFO, SCpnt,
"Finishing %u sectors\n",
blk_rq_sectors(rq)));
SCSI_LOG_HLQUEUE(, scmd_printk(KERN_INFO, SCpnt,
"Retry with 0x%p\n", SCpnt));
goto out;
} if (sdp->changed) {
/*
* quietly refuse to do anything to a changed disc until
* the changed bit has been reset
*/
/* printk("SCSI disk has been changed or is not present. Prohibiting further I/O.\n"); */
goto out;
} /*
* Some SD card readers can't handle multi-sector accesses which touch the last one or two hardware sectors. Split accesses as needed.
* 某些设备(如SD卡)不能多扇区访问最后的部分扇区,需分割访问
*/
threshold = get_capacity(disk) - SD_LAST_BUGGY_SECTORS *
(sdp->sector_size / ); if (unlikely(sdp->last_sector_bug && block + this_count > threshold)) {
if (block < threshold) {
/* Access up to the threshold but not beyond */
this_count = threshold - block;
} else {
/* Access only a single hardware sector */
this_count = sdp->sector_size / ;
}
} SCSI_LOG_HLQUEUE(, scmd_printk(KERN_INFO, SCpnt, "block=%llu\n",
(unsigned long long)block)); /*
* If we have a 1K hardware sectorsize, prevent access to single 512 byte sectors.
* In theory we could handle this - in fact the scsi cdrom driver must be able to handle this because
* we typically use 1K blocksizes, and cdroms typically have 2K hardware sectorsizes.
* Of course, things are simpler with the cdrom, since it is read-only. For performance reasons,
* the filesystems should be able to handle this and not force the scsi disk driver to use bounce buffers for this.
* 磁盘的硬件扇区长度可能不是512,而是1024/2048或4096
*/
if (sdp->sector_size == ) {
if ((block & ) || (blk_rq_sectors(rq) & )) {
scmd_printk(KERN_ERR, SCpnt,
"Bad block number requested\n");
goto out;
} else {
block = block >> ;
this_count = this_count >> ;
}
}
if (sdp->sector_size == ) {
if ((block & ) || (blk_rq_sectors(rq) & )) {
scmd_printk(KERN_ERR, SCpnt,
"Bad block number requested\n");
goto out;
} else {
block = block >> ;
this_count = this_count >> ;
}
}
if (sdp->sector_size == ) {
if ((block & ) || (blk_rq_sectors(rq) & )) {
scmd_printk(KERN_ERR, SCpnt,
"Bad block number requested\n");
goto out;
} else {
block = block >> ;
this_count = this_count >> ;
}
}
if (rq_data_dir(rq) == WRITE) {
if (!sdp->writeable) {
goto out;
}
SCpnt->cmnd[] = WRITE_6;
SCpnt->sc_data_direction = DMA_TO_DEVICE; if (blk_integrity_rq(rq))
sd_dif_prepare(rq, block, sdp->sector_size); } else if (rq_data_dir(rq) == READ) {
SCpnt->cmnd[] = READ_6;
SCpnt->sc_data_direction = DMA_FROM_DEVICE;
} else {
scmd_printk(KERN_ERR, SCpnt, "Unknown command %x\n", rq->cmd_flags);
goto out;
} SCSI_LOG_HLQUEUE(, scmd_printk(KERN_INFO, SCpnt,
"%s %d/%u 512 byte blocks.\n",
(rq_data_dir(rq) == WRITE) ?
"writing" : "reading", this_count,
blk_rq_sectors(rq))); /* Set RDPROTECT/WRPROTECT if disk is formatted with DIF */
host_dif = scsi_host_dif_capable(sdp->host, sdkp->protection_type);
if (host_dif)
protect = << ;
else
protect = ; if (host_dif == SD_DIF_TYPE2_PROTECTION) {
SCpnt->cmnd = mempool_alloc(sd_cdb_pool, GFP_ATOMIC); if (unlikely(SCpnt->cmnd == NULL)) {
ret = BLKPREP_DEFER;
goto out;
} SCpnt->cmd_len = SD_EXT_CDB_SIZE;
memset(SCpnt->cmnd, , SCpnt->cmd_len);
SCpnt->cmnd[] = VARIABLE_LENGTH_CMD;
SCpnt->cmnd[] = 0x18;
SCpnt->cmnd[] = (rq_data_dir(rq) == READ) ? READ_32 : WRITE_32;
SCpnt->cmnd[] = protect | ((rq->cmd_flags & REQ_FUA) ? 0x8 : ); /* LBA */
SCpnt->cmnd[] = sizeof(block) > ? (unsigned char) (block >> ) & 0xff : ;
SCpnt->cmnd[] = sizeof(block) > ? (unsigned char) (block >> ) & 0xff : ;
SCpnt->cmnd[] = sizeof(block) > ? (unsigned char) (block >> ) & 0xff : ;
SCpnt->cmnd[] = sizeof(block) > ? (unsigned char) (block >> ) & 0xff : ;
SCpnt->cmnd[] = (unsigned char) (block >> ) & 0xff;
SCpnt->cmnd[] = (unsigned char) (block >> ) & 0xff;
SCpnt->cmnd[] = (unsigned char) (block >> ) & 0xff;
SCpnt->cmnd[] = (unsigned char) block & 0xff; /* Expected Indirect LBA */
SCpnt->cmnd[] = (unsigned char) (block >> ) & 0xff;
SCpnt->cmnd[] = (unsigned char) (block >> ) & 0xff;
SCpnt->cmnd[] = (unsigned char) (block >> ) & 0xff;
SCpnt->cmnd[] = (unsigned char) block & 0xff; /* Transfer length */
SCpnt->cmnd[] = (unsigned char) (this_count >> ) & 0xff;
SCpnt->cmnd[] = (unsigned char) (this_count >> ) & 0xff;
SCpnt->cmnd[] = (unsigned char) (this_count >> ) & 0xff;
SCpnt->cmnd[] = (unsigned char) this_count & 0xff;
} else if (sdp->use_16_for_rw) {
SCpnt->cmnd[] += READ_16 - READ_6;
SCpnt->cmnd[] = protect | ((rq->cmd_flags & REQ_FUA) ? 0x8 : );
SCpnt->cmnd[] = sizeof(block) > ? (unsigned char) (block >> ) & 0xff : ;
SCpnt->cmnd[] = sizeof(block) > ? (unsigned char) (block >> ) & 0xff : ;
SCpnt->cmnd[] = sizeof(block) > ? (unsigned char) (block >> ) & 0xff : ;
SCpnt->cmnd[] = sizeof(block) > ? (unsigned char) (block >> ) & 0xff : ;
SCpnt->cmnd[] = (unsigned char) (block >> ) & 0xff;
SCpnt->cmnd[] = (unsigned char) (block >> ) & 0xff;
SCpnt->cmnd[] = (unsigned char) (block >> ) & 0xff;
SCpnt->cmnd[] = (unsigned char) block & 0xff;
SCpnt->cmnd[] = (unsigned char) (this_count >> ) & 0xff;
SCpnt->cmnd[] = (unsigned char) (this_count >> ) & 0xff;
SCpnt->cmnd[] = (unsigned char) (this_count >> ) & 0xff;
SCpnt->cmnd[] = (unsigned char) this_count & 0xff;
SCpnt->cmnd[] = SCpnt->cmnd[] = ;
} else if ((this_count > 0xff) || (block > 0x1fffff) ||
scsi_device_protection(SCpnt->device) ||
SCpnt->device->use_10_for_rw) {
if (this_count > 0xffff)
this_count = 0xffff; SCpnt->cmnd[] += READ_10 - READ_6;
SCpnt->cmnd[] = protect | ((rq->cmd_flags & REQ_FUA) ? 0x8 : );
SCpnt->cmnd[] = (unsigned char) (block >> ) & 0xff;
SCpnt->cmnd[] = (unsigned char) (block >> ) & 0xff;
SCpnt->cmnd[] = (unsigned char) (block >> ) & 0xff;
SCpnt->cmnd[] = (unsigned char) block & 0xff;
SCpnt->cmnd[] = SCpnt->cmnd[] = ;
SCpnt->cmnd[] = (unsigned char) (this_count >> ) & 0xff;
SCpnt->cmnd[] = (unsigned char) this_count & 0xff;
} else {
if (unlikely(rq->cmd_flags & REQ_FUA)) {
/*
* This happens only if this drive failed
* 10byte rw command with ILLEGAL_REQUEST
* during operation and thus turned off
* use_10_for_rw.
*/
scmd_printk(KERN_ERR, SCpnt,
"FUA write on READ/WRITE(6) drive\n");
goto out;
} SCpnt->cmnd[] |= (unsigned char) ((block >> ) & 0x1f);
SCpnt->cmnd[] = (unsigned char) ((block >> ) & 0xff);
SCpnt->cmnd[] = (unsigned char) block & 0xff;
SCpnt->cmnd[] = (unsigned char) this_count;
SCpnt->cmnd[] = ;
}
SCpnt->sdb.length = this_count * sdp->sector_size; /* If DIF or DIX is enabled, tell HBA how to handle request */
if (host_dif || scsi_prot_sg_count(SCpnt))
sd_prot_op(SCpnt, host_dif); /*
* We shouldn't disconnect in the middle of a sector, so with a dumb
* host adapter, it's safe to assume that we can at least transfer
* this many bytes between each connect / disconnect.
*/
SCpnt->transfersize = sdp->sector_size;
SCpnt->underflow = this_count << ;
SCpnt->allowed = SD_MAX_RETRIES; /*
* This indicates that the command is ready from our end to be
* queued.
*/
ret = BLKPREP_OK;
out:
return scsi_prep_return(q, rq, ret);
}
scsi_setup_fs_cmnd函数前面和之前分析过的差不多,来自文件系统的请求是需要传输数据的,也就是要为bio描述符中的数据分配SCSI数据缓冲区,就是scsi_init_io
/*
* Setup a REQ_TYPE_FS command. These are simple read/write request
* from filesystems that still need to be translated to SCSI CDBs from
* the ULD.
*/
int scsi_setup_fs_cmnd(struct scsi_device *sdev, struct request *req)
{
struct scsi_cmnd *cmd;
int ret = scsi_prep_state_check(sdev, req); if (ret != BLKPREP_OK)
return ret; if (unlikely(sdev->scsi_dh_data && sdev->scsi_dh_data->scsi_dh
&& sdev->scsi_dh_data->scsi_dh->prep_fn)) {
ret = sdev->scsi_dh_data->scsi_dh->prep_fn(sdev, req);
if (ret != BLKPREP_OK)
return ret;
} /*
* Filesystem requests must transfer data.
*/
BUG_ON(!req->nr_phys_segments); cmd = scsi_get_cmd_from_req(sdev, req);
if (unlikely(!cmd))
return BLKPREP_DEFER; /*
* 来自上层的请求信息都在bio里,和SCSI公共层请求不一样,我们需要重新为它构造SCSI规范定义的SCSI命令
* 构造好的内容会保存在scsi_cmnd描述符的cmnd域,所以首先将这个命令缓冲区清零
*/
memset(cmd->cmnd, , BLK_MAX_CDB);
return scsi_init_io(cmd, GFP_ATOMIC);
}