From cd09c0f17dc0907fd27bdc3cf8f9a53121ca1b0b Mon Sep 17 00:00:00 2001 From: Peter Lieven Date: Wed, 28 Nov 2012 10:37:28 +0100 Subject: [PATCH 1/5] PDU use serial32 arithmetic for cmdsn, maxcmdsn and expcmdsn. RFC3720 says that cmdsn comparison must be done using serial32 arithmetic. This will definetly avoid a deadlock if cmdsn wraps from 2^32-1 to 0. Signed-off-by: Peter Lieven --- include/iscsi-private.h | 3 +++ lib/iscsi-command.c | 6 +++--- lib/login.c | 6 +++--- lib/pdu.c | 18 ++++++++++++++++++ lib/socket.c | 4 ++-- lib/task_mgmt.c | 2 +- 6 files changed, 30 insertions(+), 9 deletions(-) diff --git a/include/iscsi-private.h b/include/iscsi-private.h index 93335f4..2928a36 100644 --- a/include/iscsi-private.h +++ b/include/iscsi-private.h @@ -325,6 +325,9 @@ iscsi_log_message(struct iscsi_context *iscsi, int level, const char *format, .. void iscsi_add_to_outqueue(struct iscsi_context *iscsi, struct iscsi_pdu *pdu); +int +iscsi_serial32_compare(u_int32_t s1, u_int32_t s2); + #ifdef __cplusplus } #endif diff --git a/lib/iscsi-command.c b/lib/iscsi-command.c index 310e656..c6b6fe4 100644 --- a/lib/iscsi-command.c +++ b/lib/iscsi-command.c @@ -335,7 +335,7 @@ iscsi_process_scsi_reply(struct iscsi_context *iscsi, struct iscsi_pdu *pdu, } maxcmdsn = scsi_get_uint32(&in->hdr[32]); - if (maxcmdsn > iscsi->maxcmdsn) { + if (iscsi_serial32_compare(maxcmdsn,iscsi->maxcmdsn) > 0) { iscsi->maxcmdsn = maxcmdsn; } @@ -452,7 +452,7 @@ iscsi_process_scsi_data_in(struct iscsi_context *iscsi, struct iscsi_pdu *pdu, } maxcmdsn = scsi_get_uint32(&in->hdr[32]); - if (maxcmdsn > iscsi->maxcmdsn) { + if (iscsi_serial32_compare(maxcmdsn,iscsi->maxcmdsn) > 0) { iscsi->maxcmdsn = maxcmdsn; } @@ -542,7 +542,7 @@ iscsi_process_r2t(struct iscsi_context *iscsi, struct iscsi_pdu *pdu, len = scsi_get_uint32(&in->hdr[44]); maxcmdsn = scsi_get_uint32(&in->hdr[32]); - if (maxcmdsn > iscsi->maxcmdsn) { + if (iscsi_serial32_compare(maxcmdsn,iscsi->maxcmdsn) > 0) { iscsi->maxcmdsn = maxcmdsn; } diff --git a/lib/login.c b/lib/login.c index bf85b73..c918747 100644 --- a/lib/login.c +++ b/lib/login.c @@ -950,10 +950,10 @@ iscsi_process_login_reply(struct iscsi_context *iscsi, struct iscsi_pdu *pdu, iscsi->statsn = scsi_get_uint16(&in->hdr[24]); maxcmdsn = scsi_get_uint32(&in->hdr[32]); - if (maxcmdsn > iscsi->maxcmdsn) { + if (iscsi_serial32_compare(maxcmdsn,iscsi->maxcmdsn) > 0) { iscsi->maxcmdsn = maxcmdsn; } - + /* XXX here we should parse the data returned in case the target * renegotiated some some parameters. * we should also do proper handshaking if the target is not yet @@ -1136,7 +1136,7 @@ struct iscsi_in_pdu *in) uint32_t maxcmdsn; maxcmdsn = scsi_get_uint32(&in->hdr[32]); - if (maxcmdsn > iscsi->maxcmdsn) { + if (iscsi_serial32_compare(maxcmdsn,iscsi->maxcmdsn) > 0) { iscsi->maxcmdsn = maxcmdsn; } diff --git a/lib/pdu.c b/lib/pdu.c index 947c0fc..c28cd4f 100644 --- a/lib/pdu.c +++ b/lib/pdu.c @@ -30,6 +30,24 @@ #include "scsi-lowlevel.h" #include "slist.h" +/* This adds 32-bit serial comparision as defined in RFC1982. + * It returns 0 for equality, 1 if s1 is greater than s2 and + * -1 if s1 is less than s2. According to RFC1982 section 3.2 + * there are rare cases where the result of the comparision is + * undefined e.g. when s1 = 0 and s2=2^31. This cases should + * not happen in iSCSI protocol. + */ +int +iscsi_serial32_compare(u_int32_t s1, u_int32_t s2) { + if (s1 == s2) return 0; + if (s1 < s2 && s2-s1 < (u_int32_t)1<<31) return -1; + if (s1 > s2 && s1-s2 < (u_int32_t)1<<31) return 1; + if (s1 > s2 && s1-s2 > (u_int32_t)1<<31) return -1; + if (s1 < s2 && s2-s1 > (u_int32_t)1<<31) return 1; + /* undefined result */ + return -1; +} + struct iscsi_pdu * iscsi_allocate_pdu_with_itt_flags_size(struct iscsi_context *iscsi, enum iscsi_opcode opcode, enum iscsi_opcode response_opcode, uint32_t itt, uint32_t flags, size_t payload_size) diff --git a/lib/socket.c b/lib/socket.c index e1a417a..c14e590 100644 --- a/lib/socket.c +++ b/lib/socket.c @@ -302,7 +302,7 @@ iscsi_which_events(struct iscsi_context *iscsi) { int events = iscsi->is_connected ? POLLIN : POLLOUT; - if (iscsi->outqueue && iscsi->outqueue->cmdsn <= iscsi->maxcmdsn) { + if (iscsi->outqueue && iscsi_serial32_compare(iscsi->outqueue->cmdsn,iscsi->maxcmdsn) <= 0) { events |= POLLOUT; } return events; @@ -442,7 +442,7 @@ iscsi_write_to_socket(struct iscsi_context *iscsi) while (iscsi->outqueue) { ssize_t total; - if (iscsi->outqueue->cmdsn > iscsi->maxcmdsn) { + if (iscsi_serial32_compare(iscsi->outqueue->cmdsn,iscsi->maxcmdsn) > 0) { /* stop sending. maxcmdsn is reached */ return 0; } diff --git a/lib/task_mgmt.c b/lib/task_mgmt.c index 9585bd1..784fe6c 100644 --- a/lib/task_mgmt.c +++ b/lib/task_mgmt.c @@ -88,7 +88,7 @@ iscsi_process_task_mgmt_reply(struct iscsi_context *iscsi, struct iscsi_pdu *pdu response = in->hdr[2]; maxcmdsn = scsi_get_uint32(&in->hdr[32]); - if (maxcmdsn > iscsi->maxcmdsn) { + if (iscsi_serial32_compare(maxcmdsn,iscsi->maxcmdsn) > 0) { iscsi->maxcmdsn = maxcmdsn; } From 562dd468338dabe6efc56af92ce5a7eff62f4e61 Mon Sep 17 00:00:00 2001 From: Peter Lieven Date: Wed, 28 Nov 2012 10:58:33 +0100 Subject: [PATCH 2/5] PDU avoid incrementing itt to 0xffffffff This patch avoid incrementing itt to 0xffffffff which is a reserved value for immediate pdus. Avoid incrementing it to 0xfffffff to avoid unexpected behaviour. Signed-off-by: Peter Lieven --- include/iscsi-private.h | 3 +++ lib/connect.c | 4 ++-- lib/login.c | 2 +- lib/pdu.c | 16 ++++++++++++++-- 4 files changed, 20 insertions(+), 5 deletions(-) diff --git a/include/iscsi-private.h b/include/iscsi-private.h index 2928a36..a35eb4c 100644 --- a/include/iscsi-private.h +++ b/include/iscsi-private.h @@ -328,6 +328,9 @@ iscsi_add_to_outqueue(struct iscsi_context *iscsi, struct iscsi_pdu *pdu); int iscsi_serial32_compare(u_int32_t s1, u_int32_t s2); +u_int32_t +iscsi_itt_post_increment(struct iscsi_context *iscsi); + #ifdef __cplusplus } #endif diff --git a/lib/connect.c b/lib/connect.c index 4a7f878..d52baad 100644 --- a/lib/connect.c +++ b/lib/connect.c @@ -279,7 +279,7 @@ try_again: continue; } - pdu->itt = iscsi->itt++; + pdu->itt = iscsi_itt_post_increment(iscsi); iscsi_pdu_set_itt(pdu, pdu->itt); pdu->cmdsn = iscsi->cmdsn++; @@ -309,7 +309,7 @@ try_again: continue; } - pdu->itt = iscsi->itt++; + pdu->itt = iscsi_itt_post_increment(iscsi); iscsi_pdu_set_itt(pdu, pdu->itt); pdu->cmdsn = iscsi->cmdsn++; diff --git a/lib/login.c b/lib/login.c index c918747..66e2146 100644 --- a/lib/login.c +++ b/lib/login.c @@ -1067,7 +1067,7 @@ iscsi_process_login_reply(struct iscsi_context *iscsi, struct iscsi_pdu *pdu, if ((in->hdr[1] & ISCSI_PDU_LOGIN_TRANSIT) && (in->hdr[1] & ISCSI_PDU_LOGIN_NSG_FF) == ISCSI_PDU_LOGIN_NSG_FF) { iscsi->is_loggedin = 1; - iscsi->itt++; + iscsi_itt_post_increment(iscsi); iscsi->header_digest = iscsi->want_header_digest; ISCSI_LOG(iscsi, 2, "login successful"); pdu->callback(iscsi, SCSI_STATUS_GOOD, NULL, pdu->private_data); diff --git a/lib/pdu.c b/lib/pdu.c index c28cd4f..a7b6022 100644 --- a/lib/pdu.c +++ b/lib/pdu.c @@ -48,6 +48,18 @@ iscsi_serial32_compare(u_int32_t s1, u_int32_t s2) { return -1; } +u_int32_t +iscsi_itt_post_increment(struct iscsi_context *iscsi) { + u_int32_t old_itt = iscsi->itt; + iscsi->itt++; + /* 0xffffffff is a reserved value */ + if (iscsi->itt == 0xffffffff) { + iscsi->itt = 0; + } + return old_itt; +} + + struct iscsi_pdu * iscsi_allocate_pdu_with_itt_flags_size(struct iscsi_context *iscsi, enum iscsi_opcode opcode, enum iscsi_opcode response_opcode, uint32_t itt, uint32_t flags, size_t payload_size) @@ -107,14 +119,14 @@ struct iscsi_pdu * iscsi_allocate_pdu(struct iscsi_context *iscsi, enum iscsi_opcode opcode, enum iscsi_opcode response_opcode) { - return iscsi_allocate_pdu_with_itt_flags(iscsi, opcode, response_opcode, iscsi->itt++, 0); + return iscsi_allocate_pdu_with_itt_flags(iscsi, opcode, response_opcode, iscsi_itt_post_increment(iscsi), 0); } struct iscsi_pdu * iscsi_allocate_pdu_size(struct iscsi_context *iscsi, enum iscsi_opcode opcode, enum iscsi_opcode response_opcode, size_t payload_size) { - return iscsi_allocate_pdu_with_itt_flags_size(iscsi, opcode, response_opcode, iscsi->itt++, 0, payload_size); + return iscsi_allocate_pdu_with_itt_flags_size(iscsi, opcode, response_opcode, iscsi_itt_post_increment(iscsi), 0, payload_size); } From c09ee1dab8ba44aa9db2cb3d70c1407df905b4a4 Mon Sep 17 00:00:00 2001 From: Peter Lieven Date: Wed, 28 Nov 2012 11:37:23 +0100 Subject: [PATCH 3/5] Fix test 1040 to really trigger cmdsn deadlock Signed-off-by: Peter Lieven --- test-tool/1040_saturate_maxcmdsn.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/test-tool/1040_saturate_maxcmdsn.c b/test-tool/1040_saturate_maxcmdsn.c index 958b6be..e2cf5fe 100644 --- a/test-tool/1040_saturate_maxcmdsn.c +++ b/test-tool/1040_saturate_maxcmdsn.c @@ -93,8 +93,7 @@ int T1040_saturate_maxcmdsn(const char *initiator, const char *url, int data_los ret = 0; - iscsi->use_immediate_data = ISCSI_IMMEDIATE_DATA_NO; - iscsi->target_max_recv_data_segment_length = block_size; + iscsi->first_burst_length = block_size; printf("Send 1024 Writes each needing a R2T so that we saturate the maxcmdsn queue ... "); /* we dont want autoreconnect since some targets will drop the From 722eb013c89b21789d0c09ae7e1584a716246c95 Mon Sep 17 00:00:00 2001 From: Peter Lieven Date: Wed, 28 Nov 2012 13:55:55 +0100 Subject: [PATCH 4/5] TESTS really fix T1040 This finally makes T1040 trigger the cmdsn overflow deadlock. Its a bad idea to mangle on negotiated parameters as this leads to protocol errors. We also need to avoid overlapping writes as this leads also to protocol errors. Additionally we test with and with (if available) immediate data. Signed-off-by: Peter Lieven --- lib/socket.c | 1 + test-tool/1040_saturate_maxcmdsn.c | 88 +++++++++++++++++++++--------- 2 files changed, 63 insertions(+), 26 deletions(-) diff --git a/lib/socket.c b/lib/socket.c index c14e590..113fc58 100644 --- a/lib/socket.c +++ b/lib/socket.c @@ -52,6 +52,7 @@ void iscsi_add_to_outqueue(struct iscsi_context *iscsi, struct iscsi_pdu *pdu) { SLIST_ADD_END(&iscsi->outqueue, pdu); + return; } void iscsi_decrement_iface_rr() { diff --git a/test-tool/1040_saturate_maxcmdsn.c b/test-tool/1040_saturate_maxcmdsn.c index e2cf5fe..a0ef25c 100644 --- a/test-tool/1040_saturate_maxcmdsn.c +++ b/test-tool/1040_saturate_maxcmdsn.c @@ -21,19 +21,25 @@ #include "iscsi-private.h" #include "scsi-lowlevel.h" #include "iscsi-test.h" +#include static int num_cmds_in_flight; -static void test_cb(struct iscsi_context *iscsi _U_, int status _U_, +static void test_cb(struct iscsi_context *iscsi _U_, int status, void *command_data _U_, void *private_data) { struct iscsi_async_state *state = private_data; + if (status != SCSI_STATUS_GOOD) { + state->status = status; + } + if (--num_cmds_in_flight == 0) { state->finished = 1; } } +#define T1040_NO_OF_WRITES (1024) int T1040_saturate_maxcmdsn(const char *initiator, const char *url, int data_loss, int show_info) { @@ -42,7 +48,7 @@ int T1040_saturate_maxcmdsn(const char *initiator, const char *url, int data_los struct scsi_readcapacity16 *rc16; int i, ret, lun; uint32_t block_size; - unsigned char data[4096 * 2]; + unsigned char *data; struct iscsi_async_state test_state; printf("1040_saturate_maxcmdsn:\n"); @@ -81,6 +87,13 @@ int T1040_saturate_maxcmdsn(const char *initiator, const char *url, int data_los goto finished; } block_size = rc16->block_length; + + if (T1040_NO_OF_WRITES*2*iscsi->first_burst_length > rc16->block_length*(rc16->returned_lba +1)) { + printf("target is too small for this test. at least %u bytes are required\n",T1040_NO_OF_WRITES*2*iscsi->first_burst_length); + ret = -1; + scsi_free_scsi_task(task); + goto finished; + } scsi_free_scsi_task(task); @@ -93,40 +106,63 @@ int T1040_saturate_maxcmdsn(const char *initiator, const char *url, int data_los ret = 0; - iscsi->first_burst_length = block_size; - - printf("Send 1024 Writes each needing a R2T so that we saturate the maxcmdsn queue ... "); /* we dont want autoreconnect since some targets will drop the * on this condition. */ iscsi_set_noautoreconnect(iscsi, 1); - for (i = 0; i < 1024; i++) { - num_cmds_in_flight++; - task = iscsi_write10_task(iscsi, lun, 0, data, 2 * block_size, block_size, - 0, 0, 0, 0, 0, - test_cb, &test_state); - if (task == NULL) { - printf("[FAILED]\n"); - printf("Failed to send WRITE10 command: %s\n", iscsi_get_error(iscsi)); + data = malloc(2*iscsi->first_burst_length); + if (data == NULL) { + printf("failed to malloc data buffer\n"); + ret = -1; + goto finished; + } + + int run=0; + + do { + if (run || iscsi->use_immediate_data == ISCSI_IMMEDIATE_DATA_NO) { + iscsi->use_immediate_data = ISCSI_IMMEDIATE_DATA_NO; + printf("Send %d Writes w/ ISCSI_IMMEDIATE_DATA_NO each needing a R2T so that we saturate the maxcmdsn queue ... ",T1040_NO_OF_WRITES); + } else { + printf("Send %d Writes w/ ISCSI_IMMEDIATE_DATA_YES each needing a R2T so that we saturate the maxcmdsn queue ... ",T1040_NO_OF_WRITES); + } + + for (i = 0; i < T1040_NO_OF_WRITES; i++) { + num_cmds_in_flight++; + task = iscsi_write10_task(iscsi, lun, 2 * iscsi->first_burst_length * i / block_size, data, 2 * iscsi->first_burst_length, block_size, + 0, 0, 0, 0, 0, + test_cb, &test_state); + if (task == NULL) { + printf("[FAILED]\n"); + printf("Failed to send WRITE10 command: %s\n", iscsi_get_error(iscsi)); + ret++; + goto test2; + } + } + + test_state.task = task; + test_state.finished = 0; + test_state.status = 0; + wait_until_test_finished(iscsi, &test_state); + if (num_cmds_in_flight != 0) { + printf("[FAILED]\n"); + printf("Did not complete all I/O before deadline.\n"); + ret++; + goto test2; + } else if (test_state.status != 0) { + printf("[FAILED]\n"); + printf("Not all I/O commands succeeded.\n"); ret++; goto test2; } - } - test_state.task = task; - test_state.finished = 0; - test_state.status = 0; - wait_until_test_finished(iscsi, &test_state); - if (num_cmds_in_flight != 0) { - printf("[FAILED]\n"); - printf("Did not complete all I/O before deadline.\n"); - ret++; - goto test2; - } - printf("[OK]\n"); - + printf("[OK]\n"); + run++; + } while (iscsi->use_immediate_data == ISCSI_IMMEDIATE_DATA_YES); + test2: + free(data); finished: iscsi_destroy_context(iscsi); From 1f4a66abc85279f67c93d0f8b7e8ef226fc444b2 Mon Sep 17 00:00:00 2001 From: Peter Lieven Date: Wed, 28 Nov 2012 14:04:14 +0100 Subject: [PATCH 5/5] PDU queue out PDUs in order of itt. This patch fixes a deadlock case where all available cmdsns have been used for command PDUs which need additional Data-OUT PDUs to succeed (e.g. a write16 which is larger than first_burst_len). In this case the target will never increase the maxcmdsn leading to a deadlock in iscsi_write_to_socket(). If we receive the R2T for such a write request we will queue the Data-OUT PDUs but at the end of queue. As a result they will never be sent as the outqueue is already stuck. We fix this by sorting the outqueue by ascending itt. Signed-off-by: Peter Lieven --- lib/socket.c | 33 +++++++++++++++++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) diff --git a/lib/socket.c b/lib/socket.c index 113fc58..44bde4e 100644 --- a/lib/socket.c +++ b/lib/socket.c @@ -51,8 +51,37 @@ static uint32_t iface_rr = 0; void iscsi_add_to_outqueue(struct iscsi_context *iscsi, struct iscsi_pdu *pdu) { - SLIST_ADD_END(&iscsi->outqueue, pdu); - return; + if (iscsi->outqueue == NULL) { + iscsi->outqueue = pdu; + pdu->next = NULL; + return; + } + + struct iscsi_pdu *current = iscsi->outqueue; + struct iscsi_pdu *last = NULL; + + /* queue pdus in ascending order of itt. + * ensure that pakets with the same itt are kept in order. + * queue pdus with itt = 0xffffffff (SNACK / DataACK) in order but at head of queue. + */ + do { + if (current->written == 0 && (iscsi_serial32_compare(pdu->itt, current->itt) < 0 + || (pdu->itt == 0xffffffff && current->itt != 0xffffffff))) { + /* insert PDU before the current */ + if (last != NULL) { + last->next=pdu; + } else { + iscsi->outqueue=pdu; + } + pdu->next = current; + return; + } + last=current; + current=current->next; + } while (current != NULL); + + last->next = pdu; + pdu->next = NULL; } void iscsi_decrement_iface_rr() {