Merge pull request #151 from plieven/reconnect_async

Reconnect async
This commit is contained in:
Ronnie Sahlberg
2015-04-10 06:26:59 -07:00
12 changed files with 242 additions and 109 deletions

View File

@@ -154,8 +154,10 @@ struct iscsi_context {
int smalloc_free;
size_t smalloc_size;
time_t last_reconnect;
time_t next_reconnect;
int scsi_timeout;
struct iscsi_context *old_iscsi;
int retry_cnt;
};
#define ISCSI_PDU_IMMEDIATE 0x40

View File

@@ -400,7 +400,10 @@ EXTERN int iscsi_full_connect_sync(struct iscsi_context *iscsi, const char *port
EXTERN int iscsi_disconnect(struct iscsi_context *iscsi);
/*
* Disconnect a connection to a target and try to reconnect.
* Disconnect a connection to a target and try to reconnect (async version).
* This call returns immediately and the reconnect is processed in the
* background. Commands send to this connection will be queued and not
* processed until we have successfully reconnected.
*
* Returns:
* 0 reconnect was successful
@@ -408,6 +411,16 @@ EXTERN int iscsi_disconnect(struct iscsi_context *iscsi);
*/
EXTERN int iscsi_reconnect(struct iscsi_context *iscsi);
/*
* Disconnect a connection to a target and try to reconnect (sync version).
* This call will block until the connection is reestablished.
*
* Returns:
* 0 reconnect was successful
* <0 error
*/
EXTERN int iscsi_reconnect_sync(struct iscsi_context *iscsi);
/*
* Asynchronous call to perform an ISCSI login.
*

View File

@@ -130,14 +130,16 @@ iscsi_login_cb(struct iscsi_context *iscsi, int status, void *command_data _U_,
return;
}
if (ct->lun != -1) {
if (ct->lun != -1 && !iscsi->old_iscsi) {
if (iscsi_testunitready_task(iscsi, ct->lun,
iscsi_testunitready_cb, ct) == NULL) {
iscsi_set_error(iscsi, "iscsi_testunitready_async failed.");
ct->cb(iscsi, SCSI_STATUS_ERROR, NULL, ct->private_data);
iscsi_free(iscsi, ct);
}
} else {
ct->cb(iscsi, SCSI_STATUS_GOOD, NULL, ct->private_data);
iscsi_free(iscsi, ct);
}
}
@@ -171,8 +173,9 @@ iscsi_full_connect_async(struct iscsi_context *iscsi, const char *portal,
struct connect_task *ct;
iscsi->lun = lun;
if (iscsi->portal != portal)
strncpy(iscsi->portal,portal,MAX_STRING_SIZE);
if (iscsi->portal != portal) {
strncpy(iscsi->portal, portal, MAX_STRING_SIZE);
}
ct = iscsi_malloc(iscsi, sizeof(struct connect_task));
if (ct == NULL) {
@@ -246,76 +249,12 @@ void iscsi_defer_reconnect(struct iscsi_context *iscsi)
}
}
int iscsi_reconnect(struct iscsi_context *old_iscsi)
static void iscsi_reconnect_cb(struct iscsi_context *iscsi _U_, int status,
void *command_data _U_, void *private_data _U_)
{
struct iscsi_context *iscsi;
int retry = 0, i;
/* if there is already a deferred reconnect do not try again */
if (old_iscsi->reconnect_deferred) {
ISCSI_LOG(old_iscsi, 2, "reconnect initiated, but reconnect is already deferred");
return -1;
}
ISCSI_LOG(old_iscsi, 2, "reconnect initiated");
/* This is mainly for tests, where we do not want to automatically
reconnect but rather want the commands to fail with an error
if the target drops the session.
*/
if (old_iscsi->no_auto_reconnect) {
iscsi_defer_reconnect(old_iscsi);
return 0;
}
if (old_iscsi->last_reconnect) {
if (time(NULL) - old_iscsi->last_reconnect < 5) sleep(5);
}
try_again:
iscsi = iscsi_create_context(old_iscsi->initiator_name);
if (iscsi == NULL) {
ISCSI_LOG(old_iscsi, 2, "failed to create new context for reconnection");
return -1;
}
iscsi->is_reconnecting = 1;
iscsi_set_targetname(iscsi, old_iscsi->target_name);
iscsi_set_header_digest(iscsi, old_iscsi->want_header_digest);
iscsi_set_initiator_username_pwd(iscsi, old_iscsi->user, old_iscsi->passwd);
iscsi_set_target_username_pwd(iscsi, old_iscsi->target_user, old_iscsi->target_passwd);
iscsi_set_session_type(iscsi, ISCSI_SESSION_NORMAL);
iscsi->lun = old_iscsi->lun;
strncpy(iscsi->portal,old_iscsi->portal,MAX_STRING_SIZE);
strncpy(iscsi->bind_interfaces,old_iscsi->bind_interfaces,MAX_STRING_SIZE);
iscsi->bind_interfaces_cnt = old_iscsi->bind_interfaces_cnt;
iscsi->log_level = old_iscsi->log_level;
iscsi->log_fn = old_iscsi->log_fn;
iscsi->tcp_user_timeout = old_iscsi->tcp_user_timeout;
iscsi->tcp_keepidle = old_iscsi->tcp_keepidle;
iscsi->tcp_keepcnt = old_iscsi->tcp_keepcnt;
iscsi->tcp_keepintvl = old_iscsi->tcp_keepintvl;
iscsi->tcp_syncnt = old_iscsi->tcp_syncnt;
iscsi->reconnect_max_retries = old_iscsi->reconnect_max_retries;
if (iscsi_full_connect_sync(iscsi, iscsi->portal, iscsi->lun) != 0) {
int backoff = retry;
if (iscsi->reconnect_max_retries != -1 && retry >= iscsi->reconnect_max_retries) {
iscsi_defer_reconnect(old_iscsi);
iscsi_destroy_context(iscsi);
return -1;
}
int i;
if (status != SCSI_STATUS_GOOD) {
int backoff = ++iscsi->old_iscsi->retry_cnt;
if (backoff > 10) {
backoff += rand() % 10;
backoff -= 5;
@@ -323,13 +262,20 @@ try_again:
if (backoff > 30) {
backoff = 30;
}
ISCSI_LOG(old_iscsi, 1, "reconnect try %d failed, waiting %d seconds", retry, backoff);
iscsi_destroy_context(iscsi);
sleep(backoff);
retry++;
goto try_again;
if (iscsi->reconnect_max_retries != -1 &&
iscsi->old_iscsi->retry_cnt >= iscsi->reconnect_max_retries) {
/* we will exit iscsi_service with -1 the next time we enter it. */
backoff = 0;
}
ISCSI_LOG(iscsi, 1, "reconnect try %d failed, waiting %d seconds", iscsi->old_iscsi->retry_cnt, backoff);
iscsi->next_reconnect = time(NULL) + backoff;
iscsi->pending_reconnect = 1;
return;
}
struct iscsi_context *old_iscsi = iscsi->old_iscsi;
iscsi->old_iscsi = NULL;
while (old_iscsi->outqueue) {
struct iscsi_pdu *pdu = old_iscsi->outqueue;
ISCSI_LIST_REMOVE(&old_iscsi->outqueue, pdu);
@@ -372,11 +318,6 @@ try_again:
iscsi_free_pdu(old_iscsi, pdu);
}
if (dup2(iscsi->fd, old_iscsi->fd) == -1) {
iscsi_destroy_context(iscsi);
goto try_again;
}
if (old_iscsi->incoming != NULL) {
iscsi_free_iscsi_in_pdu(old_iscsi, old_iscsi->incoming);
}
@@ -388,23 +329,108 @@ try_again:
iscsi_free_pdu(old_iscsi, old_iscsi->outqueue_current);
}
close(iscsi->fd);
iscsi->fd = old_iscsi->fd;
for (i = 0; i < old_iscsi->smalloc_free; i++) {
iscsi_free(old_iscsi, old_iscsi->smalloc_ptrs[i]);
}
iscsi->mallocs+=old_iscsi->mallocs;
iscsi->frees+=old_iscsi->frees;
iscsi->mallocs += old_iscsi->mallocs;
iscsi->frees += old_iscsi->frees;
free(old_iscsi);
/* avoid a reconnect faster than 3 seconds */
iscsi->next_reconnect = time(NULL) + 3;
ISCSI_LOG(iscsi, 2, "reconnect was successful");
iscsi->pending_reconnect = 0;
iscsi->is_reconnecting = 0;
}
int iscsi_reconnect(struct iscsi_context *old_iscsi)
{
struct iscsi_context *iscsi;
/* if there is already a deferred reconnect do not try again */
if (old_iscsi->reconnect_deferred) {
ISCSI_LOG(old_iscsi, 2, "reconnect initiated, but reconnect is already deferred");
return -1;
}
/* This is mainly for tests, where we do not want to automatically
reconnect but rather want the commands to fail with an error
if the target drops the session.
*/
if (old_iscsi->no_auto_reconnect) {
iscsi_defer_reconnect(old_iscsi);
return 0;
}
if (old_iscsi->is_reconnecting && !old_iscsi->pending_reconnect) {
return 0;
}
if (time(NULL) < old_iscsi->next_reconnect) {
old_iscsi->pending_reconnect = 1;
return 0;
}
if (old_iscsi->reconnect_max_retries != -1 && old_iscsi->old_iscsi &&
old_iscsi->old_iscsi->retry_cnt >= old_iscsi->reconnect_max_retries) {
iscsi_defer_reconnect(old_iscsi);
return -1;
}
iscsi = iscsi_create_context(old_iscsi->initiator_name);
if (iscsi == NULL) {
ISCSI_LOG(old_iscsi, 2, "failed to create new context for reconnection");
return -1;
}
ISCSI_LOG(old_iscsi, 2, "reconnect initiated");
old_iscsi->is_reconnecting = 1;
iscsi->is_reconnecting = 1;
iscsi_set_targetname(iscsi, old_iscsi->target_name);
iscsi_set_header_digest(iscsi, old_iscsi->want_header_digest);
iscsi_set_initiator_username_pwd(iscsi, old_iscsi->user, old_iscsi->passwd);
iscsi_set_target_username_pwd(iscsi, old_iscsi->target_user, old_iscsi->target_passwd);
iscsi_set_session_type(iscsi, ISCSI_SESSION_NORMAL);
iscsi->lun = old_iscsi->lun;
strncpy(iscsi->portal,old_iscsi->portal,MAX_STRING_SIZE);
strncpy(iscsi->bind_interfaces,old_iscsi->bind_interfaces,MAX_STRING_SIZE);
iscsi->bind_interfaces_cnt = old_iscsi->bind_interfaces_cnt;
iscsi->log_level = old_iscsi->log_level;
iscsi->log_fn = old_iscsi->log_fn;
iscsi->tcp_user_timeout = old_iscsi->tcp_user_timeout;
iscsi->tcp_keepidle = old_iscsi->tcp_keepidle;
iscsi->tcp_keepcnt = old_iscsi->tcp_keepcnt;
iscsi->tcp_keepintvl = old_iscsi->tcp_keepintvl;
iscsi->tcp_syncnt = old_iscsi->tcp_syncnt;
iscsi->reconnect_max_retries = old_iscsi->reconnect_max_retries;
if (old_iscsi->old_iscsi) {
int i;
for (i = 0; i < old_iscsi->smalloc_free; i++) {
iscsi_free(old_iscsi, old_iscsi->smalloc_ptrs[i]);
}
iscsi->old_iscsi = old_iscsi->old_iscsi;
} else {
iscsi->old_iscsi = malloc(sizeof(struct iscsi_context));
memcpy(iscsi->old_iscsi, old_iscsi, sizeof(struct iscsi_context));
}
memcpy(old_iscsi, iscsi, sizeof(struct iscsi_context));
free(iscsi);
ISCSI_LOG(old_iscsi, 2, "reconnect was successful");
old_iscsi->is_reconnecting = 0;
old_iscsi->last_reconnect = time(NULL);
return 0;
return iscsi_full_connect_async(old_iscsi, old_iscsi->portal,
old_iscsi->lun, iscsi_reconnect_cb, NULL);
}

View File

@@ -336,7 +336,9 @@ iscsi_destroy_context(struct iscsi_context *iscsi)
} else {
ISCSI_LOG(iscsi,5,"memory is clean at iscsi_destroy_context() after %d mallocs, %d realloc(s), %d free(s) and %d reused small allocations",iscsi->mallocs,iscsi->reallocs,iscsi->frees,iscsi->smallocs);
}
iscsi_destroy_context(iscsi->old_iscsi);
memset(iscsi, 0, sizeof(struct iscsi_context));
free(iscsi);

View File

@@ -206,6 +206,11 @@ iscsi_scsi_command_async(struct iscsi_context *iscsi, int lun,
struct iscsi_pdu *pdu;
int flags;
if (iscsi->old_iscsi) {
iscsi = iscsi->old_iscsi;
ISCSI_LOG(iscsi, 2, "iscsi_scsi_command_async: queuing cmd to old_iscsi while reconnecting");
}
if (iscsi->session_type != ISCSI_SESSION_NORMAL) {
iscsi_set_error(iscsi, "Trying to send command on "
"discovery session.");

View File

@@ -2,6 +2,7 @@ LIBRARY libiscsi
EXPORTS
iscsi_connect_async
iscsi_connect_sync
iscsi_reconnect_sync
iscsi_create_context
iscsi_destroy_context
iscsi_destroy_url

View File

@@ -1,5 +1,6 @@
iscsi_connect_async
iscsi_connect_sync
iscsi_reconnect_sync
iscsi_create_context
iscsi_destroy_context
iscsi_destroy_url

View File

@@ -30,6 +30,13 @@ iscsi_nop_out_async(struct iscsi_context *iscsi, iscsi_command_cb cb,
{
struct iscsi_pdu *pdu;
if (iscsi->old_iscsi || iscsi->pending_reconnect) {
ISCSI_LOG(iscsi, (iscsi->nops_in_flight > 1) ? 1 : 6,
"NOP Out Send NOT SEND while reconnecting (nops_in_flight: %d, iscsi->maxcmdsn %08x, iscsi->expcmdsn %08x)",
iscsi->nops_in_flight, iscsi->maxcmdsn, iscsi->expcmdsn);
return 0;
}
if (iscsi->is_loggedin == 0) {
iscsi_set_error(iscsi, "trying to send nop-out while not "
"logged in");

View File

@@ -452,11 +452,11 @@ iscsi_process_pdu(struct iscsi_context *iscsi, struct iscsi_in_pdu *in)
return 0;
case 0x2:
ISCSI_LOG(iscsi, 2, "target will drop this connection. Time2Wait is %u seconds", param2);
iscsi->last_reconnect = time(NULL) + param2;
iscsi->next_reconnect = time(NULL) + param2;
return 0;
case 0x3:
ISCSI_LOG(iscsi, 2, "target will drop all connections of this session. Time2Wait is %u seconds", param2);
iscsi->last_reconnect = time(NULL) + param2;
iscsi->next_reconnect = time(NULL) + param2;
return 0;
case 0x4:
ISCSI_LOG(iscsi, 2, "target requests parameter renogitiation.");

View File

@@ -291,6 +291,14 @@ iscsi_connect_async(struct iscsi_context *iscsi, const char *portal,
}
if (iscsi->old_iscsi && iscsi->fd != iscsi->old_iscsi->fd) {
if (dup2(iscsi->fd, iscsi->old_iscsi->fd) == -1) {
return -1;
}
close(iscsi->fd);
iscsi->fd = iscsi->old_iscsi->fd;
}
iscsi->socket_status_cb = cb;
iscsi->connect_data = private_data;
@@ -383,6 +391,9 @@ iscsi_disconnect(struct iscsi_context *iscsi)
int
iscsi_get_fd(struct iscsi_context *iscsi)
{
if (iscsi->old_iscsi) {
return iscsi->old_iscsi->fd;
}
return iscsi->fd;
}
@@ -391,6 +402,11 @@ iscsi_which_events(struct iscsi_context *iscsi)
{
int events = iscsi->is_connected ? POLLIN : POLLOUT;
if (iscsi->pending_reconnect && iscsi->is_reconnecting &&
time(NULL) < iscsi->next_reconnect) {
return 0;
}
if (iscsi->outqueue_current != NULL ||
(iscsi->outqueue != NULL && !iscsi->is_corked &&
(iscsi_serial32_compare(iscsi->outqueue->cmdsn, iscsi->maxcmdsn) <= 0 ||
@@ -763,6 +779,9 @@ iscsi_service_reconnect_if_loggedin(struct iscsi_context *iscsi)
return 0;
}
}
if (iscsi->is_reconnecting) {
return 0;
}
return -1;
}
@@ -774,7 +793,13 @@ iscsi_service(struct iscsi_context *iscsi, int revents)
}
if (iscsi->pending_reconnect) {
iscsi_reconnect(iscsi);
if (time(NULL) >= iscsi->next_reconnect) {
return iscsi_reconnect(iscsi);
} else {
if (iscsi->is_reconnecting) {
return 0;
}
}
}
if (revents & POLLERR) {

View File

@@ -167,6 +167,48 @@ int iscsi_logout_sync(struct iscsi_context *iscsi)
return state.status;
}
static void
reconnect_event_loop(struct iscsi_context *iscsi, struct iscsi_sync_state *state)
{
struct pollfd pfd;
int ret;
while (iscsi->is_reconnecting) {
pfd.fd = iscsi_get_fd(iscsi);
pfd.events = iscsi_which_events(iscsi);
if ((ret = poll(&pfd, 1, 1000)) < 0) {
iscsi_set_error(iscsi, "Poll failed");
state->status = -1;
return;
}
if (iscsi_service(iscsi, pfd.revents) < 0) {
iscsi_set_error(iscsi,
"iscsi_service failed with : %s",
iscsi_get_error(iscsi));
state->status = -1;
return;
}
}
state->status = 0;
}
int iscsi_reconnect_sync(struct iscsi_context *iscsi)
{
struct iscsi_sync_state state;
memset(&state, 0, sizeof(state));
if (iscsi_reconnect(iscsi) != 0) {
iscsi_set_error(iscsi, "Failed to reconnect. %s", iscsi_get_error(iscsi));
return -1;
}
reconnect_event_loop(iscsi, &state);
return state.status;
}
static void
iscsi_task_mgmt_sync_cb(struct iscsi_context *iscsi, int status,
void *command_data, void *private_data)

View File

@@ -65,6 +65,7 @@ struct client {
uint64_t last_bytes;
int ignore_errors;
int max_reconnects;
int busy_cnt;
int err_cnt;
int retry_cnt;
@@ -110,7 +111,7 @@ void progress(struct client *client) {
uint64_t mbps = 1000000000UL * (client->bytes - client->last_bytes) / (now - client->last_ns);
printf ("%02" PRIu64 ":%02" PRIu64 ":%02" PRIu64 " - ", _runtime / 3600, (_runtime % 3600) / 60, _runtime % 60);
printf ("lba %" PRIu64 ", iops current %" PRIu64 " (%" PRIu64 " MB/s), ", client->pos, iops, mbps >> 20);
printf ("iops average %" PRIu64 " (%" PRIu64 " MB/s), in_flight %d, busy %d ", aiops, ambps >> 20, client->in_flight, client->busy_cnt);
printf ("iops average %" PRIu64 " (%" PRIu64 " MB/s), in_flight %d, busy %d ", aiops, ambps >> 20, client->in_flight, client->busy_cnt);
}
fflush(stdout);
client->last_ns = now;
@@ -215,7 +216,7 @@ void fill_read_queue(struct client *client)
}
void usage(void) {
fprintf(stderr,"Usage: iscsi-perf [-i <initiator-name>] [-m <max_requests>] [-b blocks_per_request] [-t timeout] [-r|--random] [-n|--ignore-errors] <LUN>\n");
fprintf(stderr,"Usage: iscsi-perf [-i <initiator-name>] [-m <max_requests>] [-b blocks_per_request] [-t timeout] [-r|--random] [-n|--ignore-errors] [-x <max_reconnects>] <LUN>\n");
exit(1);
}
@@ -228,7 +229,7 @@ void sig_handler (int signum ) {
proc_alarm = 1;
alarm(NOP_INTERVAL);
} else {
finished = 1;
finished++;
}
}
@@ -255,12 +256,13 @@ int main(int argc, char *argv[])
int option_index;
memset(&client, 0, sizeof(client));
client.max_reconnects = -1;
srand(time(NULL));
printf("iscsi-perf version %s - (c) 2014-2015 by Peter Lieven <pl@ĸamp.de>\n\n", VERSION);
while ((c = getopt_long(argc, argv, "i:m:b:t:nrR", long_options,
while ((c = getopt_long(argc, argv, "i:m:b:t:nrRx:", long_options,
&option_index)) != -1) {
switch (c) {
case 'i':
@@ -284,6 +286,9 @@ int main(int argc, char *argv[])
case 'R':
client.random_blocks = 1;
break;
case 'x':
client.max_reconnects = atoi(optarg);
break;
default:
fprintf(stderr, "Unrecognized option '%c'\n\n", c);
usage();
@@ -381,20 +386,24 @@ int main(int argc, char *argv[])
client.first_ns = client.last_ns = get_clock_ns();
iscsi_set_reconnect_max_retries(client.iscsi, client.max_reconnects);
fill_read_queue(&client);
alarm(NOP_INTERVAL);
while (client.in_flight && !client.err_cnt) {
while (client.in_flight && !client.err_cnt && finished < 2) {
pfd[0].fd = iscsi_get_fd(client.iscsi);
pfd[0].events = iscsi_which_events(client.iscsi);
if (proc_alarm) {
if (iscsi_get_nops_in_flight(client.iscsi) > MAX_NOP_FAILURES) {
fprintf(stderr, "\n\nABORT: NOP timeout.\n");
exit(10);
iscsi_reconnect(client.iscsi);
} else {
iscsi_nop_out_async(client.iscsi, NULL, NULL, 0, NULL);
}
if (!iscsi_get_nops_in_flight(client.iscsi)) {
finished = 0;
}
iscsi_nop_out_async(client.iscsi, NULL, NULL, 0, NULL);
proc_alarm = 0;
}
@@ -411,7 +420,7 @@ int main(int argc, char *argv[])
progress(&client);
if (!client.err_cnt) {
if (!client.err_cnt && finished < 2) {
printf ("\n\nfinished.\n");
iscsi_logout_sync(client.iscsi);
} else {