Change iscsi_scsi_command_async() to use iovectors for writes.

Change iscsi_scsi_command_async() to write data-out using iovectors
attached to the scsi task structure instead of copying the data into
the buffer holding the header.
Still allow passing the data via an argument to the funtcion so that the
ABI does not change but then just conver the data to an iovector.

Update the write_to_socket functions to know about the iovectors and write them
as part of the pdu.

Convert write10_task to use iovectors.

This will allow 'zero-copy' writes through libiscsi.
However, as 'zero-copy writes does mean that we do more send() calls into
the kernel this may degrade performance for very small i/o.

A scsi write will not take at least 2 send() calls.
One send call for the iscsi header structure and a second send call for the
payload data.
This will be more expensive than the old memcpy() of payload data plus one send() call since the send() will be a lot more expensive than memcpy() of a small amount of data.
This commit is contained in:
Ronnie Sahlberg
2012-11-25 18:17:51 -08:00
parent beed0809a5
commit 3ac9fdcbff
4 changed files with 121 additions and 91 deletions

View File

@@ -211,7 +211,11 @@ struct iscsi_pdu {
void *private_data;
int written;
struct iscsi_data outdata; /* Header and Immediate Data */
struct iscsi_data outdata; /* Header for PDU to send */
uint32_t out_offset; /* Offset into data-out iovector */
uint32_t out_len; /* Amount of data to sent */
struct iscsi_data indata;
struct iscsi_data nidata; /* Non-Immediate Data */
@@ -296,7 +300,8 @@ void iscsi_set_error(struct iscsi_context *iscsi, const char *error_string,
...) __attribute__((format(printf, 2, 3)));
unsigned char *iscsi_get_user_in_buffer(struct iscsi_context *iscsi, struct iscsi_in_pdu *in, uint32_t pos, ssize_t *count);
unsigned char *scsi_task_get_data_in_buffer(struct scsi_task *task, uint32_t pos, ssize_t *count);
unsigned char *iscsi_get_user_out_buffer(struct iscsi_context *iscsi, struct iscsi_pdu *pdu, uint32_t pos, ssize_t *count);
inline void* iscsi_malloc(struct iscsi_context *iscsi, size_t size);
inline void* iscsi_zmalloc(struct iscsi_context *iscsi, size_t size);

View File

@@ -690,6 +690,9 @@ EXTERN int scsi_datain_getfullsize(struct scsi_task *task);
EXTERN void *scsi_datain_unmarshall(struct scsi_task *task);
EXTERN void *scsi_cdb_unmarshall(struct scsi_task *task, enum scsi_opcode opcode);
unsigned char *scsi_task_get_data_in_buffer(struct scsi_task *task, uint32_t pos, ssize_t *count);
unsigned char *scsi_task_get_data_out_buffer(struct scsi_task *task, uint32_t pos, ssize_t *count);
EXTERN struct scsi_task *scsi_cdb_read6(uint32_t lba, uint32_t xferlen, int blocksize);
EXTERN struct scsi_task *scsi_cdb_read10(uint32_t lba, uint32_t xferlen, int blocksize, int rdprotect, int dpo, int fua, int fua_nv, int group_number);
EXTERN struct scsi_task *scsi_cdb_read12(uint32_t lba, uint32_t xferlen, int blocksize, int rdprotect, int dpo, int fua, int fua_nv, int group_number);

View File

@@ -74,7 +74,7 @@ iscsi_scsi_response_cb(struct iscsi_context *iscsi, int status,
static int
iscsi_send_data_out(struct iscsi_context *iscsi, struct iscsi_pdu *cmd_pdu,
uint32_t ttt, uint32_t offset, uint32_t tot_len)
uint32_t ttt, uint32_t offset, uint32_t tot_len)
{
while (tot_len > 0) {
uint32_t len = tot_len;
@@ -85,11 +85,10 @@ iscsi_send_data_out(struct iscsi_context *iscsi, struct iscsi_pdu *cmd_pdu,
len = iscsi->target_max_recv_data_segment_length;
}
pdu = iscsi_allocate_pdu_with_itt_flags_size(iscsi, ISCSI_PDU_DATA_OUT,
pdu = iscsi_allocate_pdu_with_itt_flags(iscsi, ISCSI_PDU_DATA_OUT,
ISCSI_PDU_NO_PDU,
cmd_pdu->itt,
ISCSI_PDU_DELETE_WHEN_SENT|ISCSI_PDU_NO_CALLBACK,
len);
ISCSI_PDU_DELETE_WHEN_SENT|ISCSI_PDU_NO_CALLBACK);
if (pdu == NULL) {
iscsi_set_error(iscsi, "Out-of-memory, Failed to allocate "
"scsi data out pdu.");
@@ -126,18 +125,11 @@ iscsi_send_data_out(struct iscsi_context *iscsi, struct iscsi_pdu *cmd_pdu,
/* buffer offset */
iscsi_pdu_set_bufferoffset(pdu, offset);
if (iscsi_pdu_add_data(iscsi, pdu, cmd_pdu->nidata.data + offset, len)
!= 0) {
iscsi_set_error(iscsi, "Out-of-memory: Failed to "
"add outdata to the pdu.");
SLIST_REMOVE(&iscsi->outqueue, cmd_pdu);
SLIST_REMOVE(&iscsi->waitpdu, cmd_pdu);
cmd_pdu->callback(iscsi, SCSI_STATUS_ERROR, NULL,
cmd_pdu->private_data);
iscsi_free_pdu(iscsi, cmd_pdu);
iscsi_free_pdu(iscsi, pdu);
return -1;
}
pdu->out_offset = offset;
pdu->out_len = len;
/* update data segment length */
scsi_set_uint32(&pdu->outdata.data[4], pdu->out_len);
pdu->callback = cmd_pdu->callback;
pdu->private_data = cmd_pdu->private_data;
@@ -160,21 +152,19 @@ iscsi_send_data_out(struct iscsi_context *iscsi, struct iscsi_pdu *cmd_pdu,
return 0;
}
/* Using 'struct iscsi_data *d' for data-out is depreciated.
* Instead the task should have a data-out iovector attached to it.
* See iscsi_write10_task for an example.
*/
int
iscsi_scsi_command_async(struct iscsi_context *iscsi, int lun,
struct scsi_task *task, iscsi_command_cb cb,
struct iscsi_data *d, void *private_data)
struct scsi_task *task, iscsi_command_cb cb,
struct iscsi_data *d, void *private_data)
{
struct iscsi_pdu *pdu;
struct iscsi_scsi_cbdata *scsi_cbdata;
struct iscsi_data data;
uint32_t offset = 0;
int flags;
data.data = (d != NULL) ? d->data : NULL;
data.size = (d != NULL) ? d->size : 0;
if (iscsi->session_type != ISCSI_SESSION_NORMAL) {
iscsi_set_error(iscsi, "Trying to send command on "
"discovery session.");
@@ -187,6 +177,21 @@ iscsi_scsi_command_async(struct iscsi_context *iscsi, int lun,
return -1;
}
/* Convert old-style callers to the new 'iovector assigned to the task structure'
* model.
*/
if (d != NULL && d->data != NULL) {
struct scsi_iovec *iov;
iov = scsi_malloc(task, sizeof(struct scsi_iovec));
if (iov == NULL) {
return -1;
}
iov->iov_base = d->data;
iov->iov_len = d->size;
scsi_task_set_iov_out(task, iov, 1);
}
scsi_cbdata = iscsi_zmalloc(iscsi, sizeof(struct iscsi_scsi_cbdata));
if (scsi_cbdata == NULL) {
iscsi_set_error(iscsi, "Out-of-memory: failed to allocate "
@@ -200,8 +205,8 @@ iscsi_scsi_command_async(struct iscsi_context *iscsi, int lun,
scsi_set_task_private_ptr(task, scsi_cbdata);
pdu = iscsi_allocate_pdu_size(iscsi, ISCSI_PDU_SCSI_REQUEST,
ISCSI_PDU_SCSI_RESPONSE, data.size);
pdu = iscsi_allocate_pdu(iscsi, ISCSI_PDU_SCSI_REQUEST,
ISCSI_PDU_SCSI_RESPONSE);
if (pdu == NULL) {
iscsi_set_error(iscsi, "Out-of-memory, Failed to allocate "
"scsi pdu.");
@@ -220,51 +225,21 @@ iscsi_scsi_command_async(struct iscsi_context *iscsi, int lun,
break;
case SCSI_XFER_WRITE:
flags |= ISCSI_PDU_SCSI_WRITE;
if (data.size == 0) {
iscsi_set_error(iscsi, "DATA-OUT command but data "
"== NULL.");
iscsi_free_pdu(iscsi, pdu);
return -1;
}
if (data.size != task->expxferlen) {
iscsi_set_error(iscsi, "Data size:%d is not same as "
"expected data transfer "
"length:%d.", data.size,
task->expxferlen);
iscsi_free_pdu(iscsi, pdu);
return -1;
}
/* Assume all data is non-immediate data */
pdu->nidata.data = data.data;
pdu->nidata.size = data.size;
/* Are we allowed to send immediate data ? */
if (iscsi->use_immediate_data == ISCSI_IMMEDIATE_DATA_YES) {
uint32_t len = data.size;
uint32_t len = task->expxferlen;
if (len > iscsi->first_burst_length) {
len = iscsi->first_burst_length;
}
if (iscsi_pdu_add_data(iscsi, pdu, data.data, len)
!= 0) {
iscsi_set_error(iscsi, "Out-of-memory: Failed to "
"add outdata to the pdu.");
iscsi_free_pdu(iscsi, pdu);
return -1;
}
offset = len;
pdu->out_offset = 0;
pdu->out_len = len;
if (len == (uint32_t)data.size) {
/* We managed to send it all as immediate data, so there is no non-immediate data left */
pdu->nidata.data = NULL;
pdu->nidata.size = 0;
}
}
if (pdu->nidata.size > 0 && iscsi->use_initial_r2t == ISCSI_INITIAL_R2T_NO) {
/* update data segment length */
scsi_set_uint32(&pdu->outdata.data[4], pdu->out_len);
} else if (iscsi->use_initial_r2t == ISCSI_INITIAL_R2T_NO) {
/* We have more data to send, and we are allowed to send
* unsolicited data, so dont flag this PDU as final.
*/
@@ -303,13 +278,14 @@ iscsi_scsi_command_async(struct iscsi_context *iscsi, int lun,
}
/* Can we send some unsolicited data ? */
if (pdu->nidata.size != 0 && iscsi->use_initial_r2t == ISCSI_INITIAL_R2T_NO && iscsi->use_immediate_data == ISCSI_IMMEDIATE_DATA_NO) {
uint32_t len = pdu->nidata.size - offset;
if (pdu->out_len != 0 && iscsi->use_initial_r2t == ISCSI_INITIAL_R2T_NO && iscsi->use_immediate_data == ISCSI_IMMEDIATE_DATA_NO) {
uint32_t len = task->expxferlen - pdu->out_len;
if (len > iscsi->first_burst_length) {
len = iscsi->first_burst_length;
}
iscsi_send_data_out(iscsi, pdu, 0xffffffff, offset, len);
iscsi_send_data_out(iscsi, pdu, 0xffffffff,
pdu->out_offset, len);
}
/* remember cmdsn and itt so we can use task management */
@@ -320,7 +296,6 @@ iscsi_scsi_command_async(struct iscsi_context *iscsi, int lun,
return 0;
}
int
iscsi_process_scsi_reply(struct iscsi_context *iscsi, struct iscsi_pdu *pdu,
struct iscsi_in_pdu *in)
@@ -804,7 +779,6 @@ iscsi_write10_task(struct iscsi_context *iscsi, int lun, uint32_t lba,
iscsi_command_cb cb, void *private_data)
{
struct scsi_task *task;
struct iscsi_data outdata;
if (datalen % blocksize != 0) {
iscsi_set_error(iscsi, "Datalen:%d is not a multiple of the "
@@ -819,16 +793,25 @@ iscsi_write10_task(struct iscsi_context *iscsi, int lun, uint32_t lba,
"write10 cdb.");
return NULL;
}
if (data != NULL) {
struct scsi_iovec *iov;
outdata.data = data;
outdata.size = datalen;
iov = scsi_malloc(task, sizeof(struct scsi_iovec));
if (iov == NULL) {
scsi_free_scsi_task(task);
return NULL;
}
iov->iov_base = data;
iov->iov_len = datalen;
scsi_task_set_iov_out(task, iov, 1);
}
if (iscsi_scsi_command_async(iscsi, lun, task, cb, &outdata,
private_data) != 0) {
if (iscsi_scsi_command_async(iscsi, lun, task, cb,
NULL, private_data) != 0) {
scsi_free_scsi_task(task);
return NULL;
}
return task;
}
@@ -1641,3 +1624,10 @@ iscsi_scsi_cancel_all_tasks(struct iscsi_context *iscsi)
iscsi_free_pdu(iscsi, pdu);
}
}
unsigned char *
iscsi_get_user_out_buffer(struct iscsi_context *iscsi _U_, struct iscsi_pdu *pdu, uint32_t pos, ssize_t *count)
{
return scsi_task_get_data_out_buffer(pdu->scsi_cbdata->task, pos, count);
}

View File

@@ -427,41 +427,73 @@ static int
iscsi_write_to_socket(struct iscsi_context *iscsi)
{
ssize_t count;
struct iscsi_pdu *pdu;
if (iscsi->fd == -1) {
iscsi_set_error(iscsi, "trying to write but not connected");
return -1;
}
while (iscsi->outqueue) {
while ((pdu = iscsi->outqueue) != NULL) {
ssize_t total;
if (iscsi->outqueue->cmdsn > iscsi->maxcmdsn) {
if (pdu->cmdsn > iscsi->maxcmdsn) {
/* stop sending. maxcmdsn is reached */
return 0;
}
total = iscsi->outqueue->outdata.size;
total = pdu->outdata.size;
total = (total + 3) & 0xfffffffc;
count = send(iscsi->fd,
iscsi->outqueue->outdata.data
+ iscsi->outqueue->written,
total - iscsi->outqueue->written,
0);
if (count == -1) {
if (errno == EAGAIN || errno == EWOULDBLOCK) {
return 0;
/* Write header and any immediate data */
if (pdu->written < total) {
count = send(iscsi->fd,
pdu->outdata.data + pdu->written,
total - pdu->written,
0);
if (count == -1) {
if (errno == EAGAIN || errno == EWOULDBLOCK) {
return 0;
}
iscsi_set_error(iscsi, "Error when writing to "
"socket :%d", errno);
return -1;
}
iscsi_set_error(iscsi, "Error when writing to "
"socket :%d", errno);
return -1;
pdu->written += count;
}
/* if we havent written the full header yet. */
if (pdu->written != total) {
return 0;
}
iscsi->outqueue->written += count;
if (iscsi->outqueue->written == total) {
struct iscsi_pdu *pdu = iscsi->outqueue;
/* Write any iovectors that might have been passed to us */
while (pdu->out_len > 0) {
unsigned char *buf;
count = pdu->out_len;
buf = iscsi_get_user_out_buffer(iscsi, pdu, pdu->out_offset, &count);
if (buf == NULL) {
iscsi_set_error(iscsi, "Can't find iovector data for DATA-OUT");
return -1;
}
count = send(iscsi->fd,
buf,
count,
0);
if (count == -1) {
if (errno == EAGAIN || errno == EWOULDBLOCK) {
return 0;
}
iscsi_set_error(iscsi, "Error when writing to "
"socket :%d", errno);
return -1;
}
pdu->out_offset += count;
pdu->out_len -= count;
}
if (pdu->written == total) {
SLIST_REMOVE(&iscsi->outqueue, pdu);
if (pdu->flags & ISCSI_PDU_DELETE_WHEN_SENT) {
iscsi_free_pdu(iscsi, pdu);