diff --git a/README b/README index 758dcd0..26b9b9f 100644 --- a/README +++ b/README @@ -39,6 +39,8 @@ Arguments: Username and password for bidirectional CHAP authentication: target_user= target_password= +Transport: +iser Example: diff --git a/examples/Makefile.am b/examples/Makefile.am index 42ad818..0de0b1f 100644 --- a/examples/Makefile.am +++ b/examples/Makefile.am @@ -1,6 +1,6 @@ AM_CPPFLAGS=-I. -I${srcdir}/../include "-D_U_=__attribute__((unused))" \ "-D_R_(A,B)=__attribute__((format(printf,A,B)))" -AM_CFLAGS=$(WARN_CFLAGS) +AM_CFLAGS=$(WARN_CFLAGS) -I/usr/include LDADD = ../lib/libiscsi.la noinst_PROGRAMS = iscsiclient iscsi-dd diff --git a/include/iscsi-private.h b/include/iscsi-private.h index d2d2cf0..c836b0e 100644 --- a/include/iscsi-private.h +++ b/include/iscsi-private.h @@ -389,6 +389,8 @@ void iscsi_init_tcp_transport(struct iscsi_context *iscsi); void iscsi_tcp_free_pdu(struct iscsi_context *iscsi, struct iscsi_pdu *pdu); +int iscsi_service_reconnect_if_loggedin(struct iscsi_context *iscsi); + struct iscsi_transport { int (*connect)(struct iscsi_context *iscsi, union socket_address *sa, int ai_family); int (*queue_pdu)(struct iscsi_context *iscsi, struct iscsi_pdu *pdu); diff --git a/include/iscsi.h b/include/iscsi.h index f993d19..47f162f 100644 --- a/include/iscsi.h +++ b/include/iscsi.h @@ -53,7 +53,8 @@ struct scsi_iovec; "[:]\"" enum iscsi_transport_type { - TCP_TRANSPORT = 0 + TCP_TRANSPORT = 0, + ISER_TRANSPORT = 1 }; EXTERN void iscsi_set_cache_allocations(struct iscsi_context *iscsi, int ca); @@ -150,6 +151,7 @@ struct iscsi_url { char target_passwd[MAX_STRING_SIZE + 1]; int lun; struct iscsi_context *iscsi; + enum iscsi_transport_type transport; }; /* @@ -270,6 +272,7 @@ EXTERN int iscsi_init_transport(struct iscsi_context *iscsi, */ EXTERN int iscsi_set_alias(struct iscsi_context *iscsi, const char *alias); + /* * Set the iqn name of the taqget to login to. * The target name must be set before a normal-login can be initiated. diff --git a/include/iser-private.h b/include/iser-private.h new file mode 100644 index 0000000..164ba2b --- /dev/null +++ b/include/iser-private.h @@ -0,0 +1,216 @@ +/* + Copyright (c) 2014-2016, Mellanox Technologies, Ltd. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2.1 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program; if not, see . +*/ +#ifndef __iser_private_h__ +#define __iser_private_h__ + +#include +#include + +#include "iscsi-private.h" +#include "scsi-lowlevel.h" +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef __linux + +#include +#include +#include + +#define unlikely(x) __builtin_expect (!!(x), 0) + +#define ISER_VER 0x10 +#define ISER_WSV 0x08 +#define ISER_RSV 0x04 + +#define NUM_MRS 0x100 +#define DATA_BUFFER_SIZE 0x40000 + +#define ISER_HEADERS_LEN (sizeof(struct iser_hdr) + ISCSI_RAW_HEADER_SIZE) + +#define ISER_RECV_DATA_SEG_LEN 128 +#define ISER_RX_PAYLOAD_SIZE (ISER_HEADERS_LEN + ISER_RECV_DATA_SEG_LEN) + +#define ISER_RX_LOGIN_SIZE (ISER_HEADERS_LEN + ISCSI_DEF_MAX_RECV_SEG_LEN) + +#define ISCSI_DEF_MAX_RECV_SEG_LEN 8192 + +#define BHSSC_FLAGS_R 0x40 +#define BHSSC_FLAGS_W 0x20 + +#define ISER_MAX_CQ_LEN 1024 + +#define ISER_ZBVA_NOT_SUPPORTED 0x80 +#define ISER_SEND_W_INV_NOT_SUPPORTED 0x40 + +enum desc_type { + ISCSI_CONTROL = 0, + ISCSI_COMMAND}; + +enum conn_state{ + CONN_ERROR = 0, + CONN_DISCONNECTED, + CONN_ESTABLISHED}; + +enum data_dir{ + DATA_WRITE = 0, + DATA_READ}; + +#define SHIFT_4K 12 +#define SIZE_4K (1ULL << SHIFT_4K) +#define MASK_4K (~(SIZE_4K-1)) + +#define ISER_DEF_XMIT_CMDS_MAX 512 +#define ISER_QP_MAX_RECV_DTOS (ISER_DEF_XMIT_CMDS_MAX) +#define ISER_MIN_POSTED_RX (ISER_DEF_XMIT_CMDS_MAX >> 2) + + +#define ISER_RX_PAD_SIZE (256 - (ISER_RX_PAYLOAD_SIZE + \ + sizeof(struct ibv_mr*) + sizeof(struct ibv_sge))) + +/** + * struct iser_hdr - iSER header + * + * @flags: flags support (zbva, remote_inv) + * @rsvd: reserved + * @write_stag: write rkey + * @write_va: write virtual address + * @reaf_stag: read rkey + * @read_va: read virtual address + */ + +struct iser_hdr { + uint8_t flags; + uint8_t rsvd[3]; + uint32_t write_stag; + uint64_t write_va; + uint32_t read_stag; + uint64_t read_va; +} __attribute__((packed)); + +/** + * struct iser_rx_desc - iSER RX descriptor (for recv wr_id) + * + * @isr_hdr: iser header + * @iscsi_data: iscsi header + * @data: received data segment + * @rx_sg: ibv_sge of receive buffer + * @pad: padding + */ + + +struct iser_rx_desc { + struct iser_hdr iser_header; + char iscsi_header[ISCSI_RAW_HEADER_SIZE]; + char data[ISER_RECV_DATA_SEG_LEN]; + struct ibv_sge rx_sg; + struct ibv_mr *hdr_mr; + char pad[ISER_RX_PAD_SIZE]; +} __attribute__((packed)); + + +/** + * struct iser_tx_desc - iSER TX descriptor (for send wr_id) + * + * @iser_hdr: iser header + * @iscsi_header: iscsi header (bhs) + * @tx_sg: sg[0] points to iser/iscsi headers + * sg[1] optionally points to either of immediate data + * unsolicited data-out or control + * @num_sge: number sges used on this TX task + * @mr: iser/iscsi headers mr + * @data_mr: mr for case we need to allocate mr for read + * @next: next descriptor on the list + */ + +struct iser_tx_desc { + struct iser_hdr iser_header; + unsigned char iscsi_header[ISCSI_RAW_HEADER_SIZE]; + struct ibv_sge tx_sg[2]; + int num_sge; + struct ibv_mr *hdr_mr; + char *data_buff; + struct ibv_mr *data_mr; + enum desc_type type; + enum data_dir data_dir; + struct iser_tx_desc *next; +}; + +struct iser_cm_hdr { + uint8_t flags; + uint8_t rsvd[3]; +} __packed; + +struct iser_pdu { + struct iscsi_pdu iscsi_pdu; + struct iser_tx_desc *desc; +}; + +struct iser_conn { + struct rdma_cm_id *cma_id; + struct rdma_event_channel *cma_channel; + struct rdma_cm_event *cma_event; + + struct ibv_pd *pd; + struct ibv_cq *cq; + struct ibv_qp *qp; + struct ibv_comp_channel *comp_channel; + + struct ibv_recv_wr rx_wr[ISER_MIN_POSTED_RX]; + + sem_t sem_connect; + + struct ibv_mr *login_resp_mr; + struct ibv_mr *login_req_mr; + unsigned char *login_buf; + unsigned char *login_req_buf; + unsigned char *login_resp_buf; + + pthread_t cmthread; + + struct iser_rx_desc *rx_descs; + uint32_t num_rx_descs; + unsigned int rx_desc_head; + + int post_recv_buf_count; + int qp_max_recv_dtos; + int min_posted_rx; + uint16_t max_cmds; + + enum conn_state conn_state; + + struct iser_tx_desc *tx_desc; +}; + + +struct iser_transport { /* struct iser_conn */ + + struct iscsi_transport t; + struct iser_conn iser_conn; +}; + +void iscsi_init_iser_transport(struct iscsi_context *iscsi); + +#endif /* __linux */ + +#endif /* __iser_private_h__ */ diff --git a/lib/Makefile.am b/lib/Makefile.am index 806a987..44bfbcf 100644 --- a/lib/Makefile.am +++ b/lib/Makefile.am @@ -10,6 +10,10 @@ if !HAVE_LIBGCRYPT libiscsi_la_SOURCES += md5.c endif +if HAVE_LINUX_ISER +libiscsi_la_SOURCES += iser.c +endif + SOCURRENT=7 SOREVISON=2 SOAGE=0 @@ -17,11 +21,15 @@ libiscsi_la_LDFLAGS = \ -version-info $(SOCURRENT):$(SOREVISON):$(SOAGE) -bindir $(bindir) \ -no-undefined -export-symbols ${srcdir}/libiscsi.syms +if HAVE_LINUX_ISER +libiscsi_la_LDFLAGS += -libverbs -lrdmacm +endif + libiscsi_la_CPPFLAGS = -I${srcdir}/../include -I$(srcdir)/include \ "-D_U_=__attribute__((unused))" \ "-D_R_(A,B)=__attribute__((format(printf,A,B)))" -AM_CFLAGS=$(WARN_CFLAGS) +AM_CFLAGS=$(WARN_CFLAGS) -I/usr/include dist_noinst_DATA = libiscsi.syms libiscsi.def diff --git a/lib/init.c b/lib/init.c index 1349695..19c2f7b 100644 --- a/lib/init.c +++ b/lib/init.c @@ -15,6 +15,10 @@ You should have received a copy of the GNU Lesser General Public License along with this program; if not, see . */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + #define _GNU_SOURCE #if defined(WIN32) @@ -32,6 +36,9 @@ #include #include "iscsi.h" #include "iscsi-private.h" +#ifdef HAVE_LINUX_ISER +#include "iser-private.h" +#endif #include "slist.h" @@ -42,6 +49,9 @@ int iscsi_init_transport(struct iscsi_context *iscsi, enum iscsi_transport_type transport) { struct tcp_transport *tcp_transport; +#ifdef HAVE_LINUX_ISER + struct iser_transport *iser_transport; +#endif if (iscsi->t) { iscsi_free(iscsi, iscsi->t); @@ -59,6 +69,17 @@ int iscsi_init_transport(struct iscsi_context *iscsi, iscsi->t = &tcp_transport->t; iscsi_init_tcp_transport(iscsi); break; +#ifdef HAVE_LINUX_ISER + case ISER_TRANSPORT: + iser_transport = iscsi_malloc(iscsi, sizeof(struct iser_transport)); + if (iser_transport == NULL) { + iscsi_set_error(iscsi, "Couldn't allocate memory for transport\n"); + return -1; + } + iscsi->t = &iser_transport->t; + iscsi_init_iser_transport(iscsi); + break; +#endif default: iscsi_set_error(iscsi, "Unfamiliar transport type"); return -1; @@ -514,6 +535,9 @@ iscsi_parse_url(struct iscsi_context *iscsi, const char *url, int full) char *lun; char *tmp; int l = 0; +#ifdef HAVE_LINUX_ISER + int is_iser = 0; +#endif if (strncmp(url, "iscsi://", 8)) { if (full) { @@ -554,6 +578,10 @@ iscsi_parse_url(struct iscsi_context *iscsi, const char *url, int full) target_user = value; } else if (!strcmp(key, "target_password")) { target_passwd = value; +#ifdef HAVE_LINUX_ISER + } else if (!strcmp(key, "iser")) { + is_iser = 1; +#endif } tmp = next; } @@ -647,6 +675,16 @@ iscsi_parse_url(struct iscsi_context *iscsi, const char *url, int full) } } +#ifdef HAVE_LINUX_ISER + if (iscsi) { + if (is_iser) { + if (iscsi_init_transport(iscsi, ISER_TRANSPORT)) + iscsi_set_error(iscsi, "Cannot set transport to iSER"); + } + } + iscsi_url->transport = is_iser; +#endif + if (full) { strncpy(iscsi_url->target, target, MAX_STRING_SIZE); iscsi_url->lun = l; diff --git a/lib/iser.c b/lib/iser.c new file mode 100644 index 0000000..eaffbd4 --- /dev/null +++ b/lib/iser.c @@ -0,0 +1,1443 @@ +/* + Copyright (c) 2014-2016, Mellanox Technologies, Ltd. All rights reserved. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2.1 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program; if not, see . +*/ + +#include +#include +#include +#include +#include "slist.h" +#include +#include +#include +#include "iscsi.h" +#include "iser-private.h" +#include "iscsi-private.h" +#include "scsi-lowlevel.h" +#include +#include +#include + +#ifdef __linux + +int cq_handle(struct iser_conn *iser_conn); + +/* + * iscsi_iser_get_fd() - Return completion queue + * event channel file descriptor. + */ +int +iscsi_iser_get_fd(struct iscsi_context *iscsi) +{ + struct iser_transport *iser_transport; + struct iser_conn *iser_conn; + + iser_transport = container_of(iscsi->t, struct iser_transport, t); + + iser_conn = &iser_transport->iser_conn; + + return iser_conn->comp_channel->fd; +} + +/* + * iscsi_iser_which_events() - Which events to wait for on file descriptor + * @iscsi_context: iscsi_context Unused + * + * Notes: + * CQ can only create POLLIN events, hence this function + * will return same value of 1 each time. + * Being used in QEMU iscsi block so we need compatability with TCP + */ +int +iscsi_iser_which_events(struct iscsi_context *iscsi _U_) +{ + /* iSER is waiting to events from CQ that are always POLLIN */ + return 1; +} + +/* + * iscsi_iser_service() - Processing CQ events + * @iscsi_context: iscsi_context + * @revents: which events to handle + * + * Notes: + * CQ can only create POLLIN events, hence this function + * will poll the cq for completion until boundary or emptiness. + */ +int +iscsi_iser_service(struct iscsi_context *iscsi, int revents) +{ + struct iser_transport *iser_transport; + int ret = 0; + struct iser_conn *iser_conn; + + iser_transport = container_of(iscsi->t, struct iser_transport, t); + iser_conn = &iser_transport->iser_conn; + + if (revents == POLLIN) + ret = cq_handle(iser_conn); + else { + iscsi_set_error(iscsi, "revents is not POLLIN"); + return -1; + } + + if (ret) { + iscsi_set_error(iscsi, "CQ handle Failed"); + return -1; + } + + return 0; +} + +/* + * iser_free_rx_descriptors() - freeing descriptors memory + * @iser_conn: ib connection context + */ +void +iser_free_rx_descriptors(struct iser_conn *iser_conn) +{ + struct iser_rx_desc *rx_desc; + struct iscsi_context *iscsi = iser_conn->cma_id->context; + int i; + + rx_desc = iser_conn->rx_descs; + for (i = 0; i < iser_conn->qp_max_recv_dtos; i++, rx_desc++) + if (ibv_dereg_mr(rx_desc->hdr_mr)) + iscsi_set_error(iscsi, "Failed ti deregister hdr mr"); + iscsi_free(iscsi, iser_conn->rx_descs); + + iser_conn->rx_descs = NULL; + + return; +} + +/* + * iser_free_login_buf() - freeing login buffer + * @iser_conn: ib connection context + */ +void +iser_free_login_buf(struct iser_conn *iser_conn) +{ + struct iscsi_context *iscsi = iser_conn->cma_id->context; + + iscsi_free(iscsi, iser_conn->login_buf); + iser_conn->login_buf = NULL; + + return; +} + +void +iser_free_reg_mr(struct iser_conn *iser_conn) +{ + struct iser_tx_desc *tx_desc = iser_conn->tx_desc; + struct iscsi_context *iscsi = iser_conn->cma_id->context; + struct iser_tx_desc *temp_tx_desc; + + while (tx_desc) { + ibv_dereg_mr(tx_desc->hdr_mr); + ibv_dereg_mr(tx_desc->data_mr); + + if (tx_desc->data_buff) + iscsi_free(iscsi, tx_desc->data_buff); + + temp_tx_desc = tx_desc; + tx_desc = tx_desc->next; + iscsi_free(iscsi, temp_tx_desc); + } + iser_conn->tx_desc = NULL; + + return; +} + +/* + * iser_free_iser_conn_res() - freeing ib context resources + * @iser_conn: ib connection context + */ +void +iser_free_iser_conn_res(struct iser_conn *iser_conn, bool destroy) +{ + int ret; + struct iscsi_context *iscsi = iser_conn->cma_id->context; + + if (iser_conn->qp) { + rdma_destroy_qp(iser_conn->cma_id); + iser_conn->qp = NULL; + } + + if (destroy) { + + if (iser_conn->cmthread) { + pthread_cancel(iser_conn->cmthread); + iser_conn->cmthread = 0; + } + + iser_free_reg_mr(iser_conn); + + if (iser_conn->login_buf) + iser_free_login_buf(iser_conn); + + if (iser_conn->rx_descs) { + iser_free_rx_descriptors(iser_conn); + iser_conn->rx_descs = NULL; + } + + if (iser_conn->login_resp_mr) { + ret = ibv_dereg_mr(iser_conn->login_resp_mr); + if (ret) + iscsi_set_error(iscsi, "Failed to deregister login response mr"); + } + + if (iser_conn->cq) { + ret = ibv_destroy_cq(iser_conn->cq); + if (ret) + iscsi_set_error(iscsi, "Failed to destroy cq"); + } + + if (iser_conn->comp_channel) { + ret = ibv_destroy_comp_channel(iser_conn->comp_channel); + if (ret) + iscsi_set_error(iscsi, "Failed to destroy completion channel"); + } + + if (iser_conn->pd) { + ret = ibv_dealloc_pd(iser_conn->pd); + if (ret) + iscsi_set_error(iscsi, "Failed to deallocate pd"); + } + } + + return; +} + +/* + * iser_conn_release() - releasing ib resources + * and destroying cm id + * @iser_conn: ib connection context + */ +void +iser_conn_release(struct iser_conn *iser_conn) +{ + int ret; + struct iscsi_context *iscsi = iser_conn->cma_id->context; + + iser_free_iser_conn_res(iser_conn,true); + + if (iser_conn->cma_id) { + ret = rdma_destroy_id(iser_conn->cma_id); + if (ret) + iscsi_set_error(iscsi, "Failed destroying cm id"); + + iser_conn->cma_id = NULL; + } + + return; +} + +/* + * iser_conn_terminate() - disconnecting rdma_cm + * @iser_conn: ib connection context + */ +void +iser_conn_terminate(struct iser_conn *iser_conn) +{ + int ret; + struct iscsi_context *iscsi = iser_conn->cma_id->context; + + if(iser_conn->cma_id) { + ret = rdma_disconnect(iser_conn->cma_id); + if (ret) + iscsi_set_error(iscsi, "Failed to disconnect, conn: 0x%p, err %d\n", + iser_conn, ret); + } + + return; +} + +/* + * iscsi_iser_disconnect() - disconnecting iSER and + * freeing resources + * @iser_conn: ib connection context + */ +int +iscsi_iser_disconnect(struct iscsi_context *iscsi) { + + struct iser_transport *iser_transport; + + iser_transport = container_of(iscsi->t, struct iser_transport, t); + + iser_conn_terminate(&iser_transport->iser_conn); + + iser_conn_release(&iser_transport->iser_conn); + + iscsi->fd = -1; + iscsi->is_connected = 0; + iscsi->is_corked = 0; + + return 0; +} + +struct iscsi_pdu* +iscsi_iser_new_pdu(struct iscsi_context *iscsi, __attribute__((unused))size_t size) { + + struct iscsi_pdu *pdu; + struct iser_pdu *iser_pdu; + + iser_pdu = iscsi_zmalloc(iscsi, sizeof(*iser_pdu)); + pdu = &iser_pdu->iscsi_pdu; + pdu->indata.data = NULL; + + return pdu; +} + +void +iscsi_iser_free_pdu(struct iscsi_context *iscsi, struct iscsi_pdu *pdu) +{ + struct iser_pdu *iser_pdu; + + if (pdu == NULL) { + iscsi_set_error(iscsi, "trying to free NULL pdu"); + return; + } + + iser_pdu = container_of(pdu, struct iser_pdu, iscsi_pdu); + + if (pdu->outdata.size <= iscsi->smalloc_size) { + iscsi_sfree(iscsi, pdu->outdata.data); + } else { + iscsi_free(iscsi, pdu->outdata.data); + } + pdu->outdata.data = NULL; + + if (pdu->indata.size <= iscsi->smalloc_size) { + iscsi_sfree(iscsi, pdu->indata.data); + } else { + iscsi_free(iscsi, pdu->indata.data); + } + + pdu->indata.data = NULL; + + if (iscsi->outqueue_current == pdu) { + iscsi->outqueue_current = NULL; + } + + iscsi_free(iscsi, iser_pdu); +} + +/** + ** iser_create_send_desc() - creating send descriptors + ** headers + ** @iser_pdu: iser pdu including iscsi pdu inside it + **/ +void iser_create_send_desc(struct iser_pdu *iser_pdu) { + + unsigned char *iscsi_header = iser_pdu->iscsi_pdu.outdata.data; + struct iser_tx_desc *tx_desc = iser_pdu->desc; + + memset(&tx_desc->iser_header, 0, sizeof(struct iser_hdr)); + tx_desc->iser_header.flags = ISER_VER; + tx_desc->num_sge = 1; + memcpy(tx_desc->iscsi_header, iscsi_header, ISCSI_RAW_HEADER_SIZE); +} + +/* + * iser_post_recvl() - posting login buffer receive request + * on receive queue + * @iser_conn: ib connection context + */ +int iser_post_recvl(struct iser_conn *iser_conn) { + + struct ibv_recv_wr rx_wr; + struct ibv_recv_wr *rx_wr_failed; + struct ibv_sge sge; + int ret; + struct iscsi_context *iscsi = iser_conn->cma_id->context; + + sge.addr = (uintptr_t)iser_conn->login_resp_buf; + sge.length = ISER_RX_LOGIN_SIZE; + sge.lkey = iser_conn->login_resp_mr->lkey; + + rx_wr.wr_id = (uintptr_t)iser_conn->login_resp_buf; + rx_wr.sg_list = &sge; + rx_wr.num_sge = 1; + rx_wr.next = NULL; + + iser_conn->post_recv_buf_count++; + ret = ibv_post_recv(iser_conn->qp, &rx_wr, &rx_wr_failed); + if (ret) { + iscsi_set_error(iscsi, "Failed to post recv login response\n"); + iser_conn->post_recv_buf_count--; + return -1; + } + + return 0; +} + +/* + * iser_post_send() - posting send requests + * on send queue + * @iser_conn: ib connection context + * @iser_tx_desc: send descriptor + * @signal: signal completion or not + * + * Notes: + * Need to handle signal better + */ +int iser_post_send(struct iser_conn *iser_conn, struct iser_tx_desc *tx_desc, bool signal) { + + int ret; + struct ibv_send_wr send_wr; + struct ibv_send_wr *send_wr_failed; + struct iscsi_context *iscsi = iser_conn->cma_id->context; + + memset(&send_wr, 0, sizeof(send_wr)); + send_wr.next = NULL; + send_wr.wr_id = (uintptr_t)tx_desc; + send_wr.sg_list = tx_desc->tx_sg; + send_wr.num_sge = tx_desc->num_sge; + send_wr.opcode = IBV_WR_SEND; + send_wr.send_flags = signal ? IBV_SEND_SIGNALED : 0; + + ret = ibv_post_send(iser_conn->qp, &send_wr, &send_wr_failed); + if (ret) { + iscsi_set_error(iscsi, "Failed to post send\n"); + return -1; + } + + return 0; +} + +/* + * iser_send_control() - sending iscsi pdu of type CONTROL + * + * @iser_transport: iser connection context + * @iser_pdu: iser pdu to send + */ +int iser_send_control(struct iser_transport *iser_transport, struct iser_pdu *iser_pdu) { + + struct iser_tx_desc *tx_desc; + struct iser_conn *iser_conn = &iser_transport->iser_conn; + int ret; + struct iscsi_context *iscsi = iser_conn->cma_id->context; + size_t datalen; + + if (iser_pdu == NULL) { + iscsi_set_error(iscsi, "Failed in iser_pdu"); + return -1; + } + datalen = iser_pdu->iscsi_pdu.outdata.size - ISCSI_RAW_HEADER_SIZE; + tx_desc = iser_pdu->desc; + tx_desc->type = ISCSI_CONTROL; + + iser_create_send_desc(iser_pdu); + + if (datalen > 0) { + char* data = (char*)&iser_pdu->iscsi_pdu.outdata.data[ISCSI_RAW_HEADER_SIZE]; + struct ibv_sge *tx_dsg = &tx_desc->tx_sg[1]; + + iser_conn->login_req_mr = ibv_reg_mr(iser_conn->pd, iser_conn->login_req_buf, + datalen , IBV_ACCESS_LOCAL_WRITE); + if (iser_conn->login_req_mr == NULL) { + iscsi_set_error(iscsi, "Failed Reg iser_conn->login_req_mr"); + return -1; + } + + memcpy(iser_conn->login_req_buf, data, datalen); + + tx_dsg->addr = (uintptr_t)iser_conn->login_req_buf; + tx_dsg->length = datalen; + tx_dsg->lkey = iser_conn->login_req_mr->lkey; + tx_desc->num_sge = 2; + } + + if (iser_pdu->iscsi_pdu.response_opcode == ISCSI_PDU_LOGIN_RESPONSE || + iscsi->session_type == ISCSI_SESSION_DISCOVERY) { + ret = iser_post_recvl(iser_conn); + if (ret) { + iscsi_set_error(iscsi, "Failed Post Recv login"); + return -1; + } + } + + ret = iser_post_send(iser_conn, tx_desc, true); + if (ret) { + iscsi_set_error(iscsi, "Failed to post send"); + return -1; + } + + return 0; + +} + +/* + * iser_initialize_headers() - Initialize task headers + * + * @iser_pdu: iser pdu + * @iser_conn: iser_connection context + */ +int +iser_initialize_headers(struct iser_pdu *iser_pdu, struct iser_conn *iser_conn) +{ + struct iser_tx_desc *tx_desc; + + tx_desc = iser_conn->tx_desc; + ISCSI_LIST_REMOVE(&iser_conn->tx_desc, tx_desc); + + iser_pdu->desc = tx_desc; + + tx_desc->tx_sg[0].addr = (uintptr_t)tx_desc; + tx_desc->tx_sg[0].length = ISER_HEADERS_LEN; + tx_desc->tx_sg[0].lkey = tx_desc->hdr_mr->lkey; + + return 0; +} + +/* + * iser_prepare_read_cmd() - prepareing read command + * + * @iser_conn: ib connection context + * @iser_pdu: iser_pdu + * + * Notes: + * In case there isn't buffer from app + * we create buffer. + */ +static int +iser_prepare_read_cmd(struct iser_conn *iser_conn,struct iser_pdu *iser_pdu) +{ + struct iser_hdr *hdr = &iser_pdu->desc->iser_header; + struct iser_tx_desc *tx_desc = iser_pdu->desc; + struct scsi_task *task = iser_pdu->iscsi_pdu.scsi_cbdata.task; + struct iscsi_context *iscsi = iser_conn->cma_id->context; + size_t data_size = task->expxferlen; + + if (data_size > 0) { + + if (task->iovector_in.iov == NULL) { + iser_pdu->iscsi_pdu.indata.data = iscsi_malloc(iscsi, data_size); + if (iser_pdu->iscsi_pdu.indata.data == NULL) { + iscsi_set_error(iscsi, "Failed to aloocate data buffer"); + return -1; + } + iser_pdu->iscsi_pdu.indata.size = data_size; + } + + tx_desc->data_dir = DATA_READ; + hdr->read_va = htobe64((uint64_t)tx_desc->data_buff); + hdr->read_stag = htobe32((uint32_t)tx_desc->data_mr->rkey); + hdr->flags |= ISER_RSV; + + return 0; + + } else { + iscsi_set_error(iscsi, "Read command with no expected transfer length"); + return -1; + } +} + +/* + * iser_prepare_write_cmd() - preparing write command + * + * @iser_conn: ib connection context + * @iser_pdu: iser pdu + */ +static int +iser_prepare_write_cmd(struct iser_conn *iser_conn, struct iser_pdu *iser_pdu) +{ + struct iser_hdr *hdr = &iser_pdu->desc->iser_header; + struct iscsi_context *iscsi = iser_conn->cma_id->context; + struct iser_tx_desc *tx_desc = iser_pdu->desc; + struct scsi_iovector *iovector = iscsi_get_scsi_task_iovector_out(iscsi, &iser_pdu->iscsi_pdu); + int i, offset = 0; + + if (iovector == NULL) { + iscsi_set_error(iscsi, "Can't find iovector data for DATA-OUT (RDMA)"); + return -1; + } + + tx_desc->data_dir = DATA_WRITE; + + for(i = 0 ; i < iovector->niov ; i++) { + memcpy(&tx_desc->data_buff[offset], iovector->iov[i].iov_base, iovector->iov[i].iov_len); + offset += iovector->iov[i].iov_len; + } + + hdr->flags |= ISER_WSV; + hdr->write_stag = htobe32((uint32_t)(tx_desc->data_mr->rkey)); + hdr->write_va = htobe64((uint64_t)(tx_desc->data_buff)); + + return 0; +} + +/* + * is_control_opcode() - check if iscsi opcode is of type CONTROL + * + * @opcode: iscsi opcode + */ +static bool +is_control_opcode(uint8_t opcode) +{ + bool is_control = false; + + switch (opcode & ISCSI_PDU_REJECT) { + case ISCSI_PDU_NOP_OUT: + case ISCSI_PDU_LOGIN_REQUEST: + case ISCSI_PDU_LOGOUT_REQUEST: + case ISCSI_PDU_TEXT_REQUEST: + is_control = true; + break; + case ISCSI_PDU_SCSI_REQUEST: + is_control = false; + break; + default: + is_control = false; + } + + return is_control; +} + +int +overflow_data_size(struct iser_pdu *iser_pdu) +{ + int data_size; + + data_size = iser_pdu->iscsi_pdu.scsi_cbdata.task->expxferlen; + + return (data_size > DATA_BUFFER_SIZE); +} + +/* + * iser_send_command() - sending iscsi pdu of type COMMAND + * + * @iser_transport: iser connection context + * @iser_pdu: iser pdu to send + * + * Nots: + * Need to fix if failed prepareation return -1 + */ +int +iser_send_command(struct iser_transport *iser_transport, + struct iser_pdu *iser_pdu) +{ + struct iser_tx_desc *tx_desc = iser_pdu->desc; + struct iser_conn *iser_conn = &iser_transport->iser_conn; + int err = 0; + struct iscsi_context *iscsi = iser_conn->cma_id->context; + + tx_desc->type = ISCSI_COMMAND; + + iser_create_send_desc(iser_pdu); + + if (overflow_data_size(iser_pdu)) { + iscsi_set_error(iscsi, "Libiscsi-iSER supports messages smaller than 512k\n"); + return -1; + } + + if (iser_pdu->desc->iscsi_header[1] & BHSSC_FLAGS_R) { + err = iser_prepare_read_cmd(iser_conn, iser_pdu); + if (err) { + iscsi_set_error(iscsi, "error in prepare read cmd\n"); + return -1; + } + } else if (iser_pdu->desc->iscsi_header[1] & BHSSC_FLAGS_W) { + err = iser_prepare_write_cmd(iser_conn, iser_pdu); + if (err) { + iscsi_set_error(iscsi, "error in prepare write cmd\n"); + return -1; + } + } + + err = iser_post_send(iser_conn, tx_desc, true); + if (err) + return -1; + return 0; +} + + +/* + * iser_queue_pdu() - sending iscsi pdu + * + * @iscsi_context: iscsi context + * @iscsi_pdu: iscsi pdu + * + * Notes: + * Need to be compatible to TCP which has real queue, + * in iSER every queue pdu already sends all pdu (post_send) + */ +int iscsi_iser_queue_pdu(struct iscsi_context *iscsi, struct iscsi_pdu *pdu) { + + struct iser_pdu *iser_pdu; + struct iser_transport *iser_transport; + struct iser_conn *iser_conn; + uint8_t opcode; + + if (pdu == NULL) { + iscsi_set_error(iscsi, "trying to queue NULL pdu"); + return -1; + } + + iser_pdu = container_of(pdu, struct iser_pdu, iscsi_pdu); + iser_transport = container_of(iscsi->t, struct iser_transport, t); + iser_conn = &iser_transport->iser_conn; + opcode = pdu->outdata.data[0]; + + iscsi_pdu_set_expstatsn(pdu, iscsi->statsn + 1); + ISCSI_LIST_ADD_END(&iscsi->waitpdu, pdu); + + if (iser_initialize_headers(iser_pdu, iser_conn)) { + iscsi_set_error(iscsi, "initialize headers Failed\n"); + return -1; + } + + if (unlikely(is_control_opcode(opcode))) { + if (iser_send_control(iser_transport, iser_pdu)) { + iscsi_set_error(iscsi, "iser_send_command Failed\n"); + return -1; + } + } else { + if (iser_send_command(iser_transport, iser_pdu)) { + iscsi_set_error(iscsi, "iser_send_command Failed\n"); + return -1; + } + } + + return 0; +} + +/* + * iser_create_iser_conn_res() - creating ib connections resources + * + * @iser_conn: ib connection context + */ +static int iser_create_iser_conn_res(struct iser_conn *iser_conn) { + + struct ibv_qp_init_attr init_attr; + int ret; + struct iscsi_context *iscsi = iser_conn->cma_id->context; + + memset(&init_attr, 0, sizeof(struct ibv_qp_init_attr)); + init_attr.qp_context = (void *)iser_conn->cma_id->context; + init_attr.send_cq = iser_conn->cq; + init_attr.recv_cq = iser_conn->cq; + init_attr.cap.max_send_wr = ISER_QP_MAX_RECV_DTOS; + init_attr.cap.max_recv_wr = ISER_QP_MAX_RECV_DTOS; + init_attr.cap.max_send_sge = 2; + init_attr.cap.max_recv_sge = 1; + init_attr.qp_type = IBV_QPT_RC; + + ret = rdma_create_qp(iser_conn->cma_id, iser_conn->pd, &init_attr); + if (ret) { + iscsi_set_error(iscsi, "Failed to create qp\n"); + return -1; + } + iser_conn->qp = iser_conn->cma_id->qp; + + return ret; +} + +/* + * iser_addr_handler() - handles RDMA_CM_EVENT_ADDR_RESOLVED + * event in rdma_cm + * + * @cma_id: connection manager id + */ +static int iser_addr_handler(struct rdma_cm_id *cma_id) { + + struct iscsi_context *iscsi = cma_id->context; + struct iser_transport *iser_transport; + struct iser_conn *iser_conn; + int ret, flags; + + iser_transport = container_of(iscsi->t, struct iser_transport, t); + iser_conn = &iser_transport->iser_conn; + + ret = rdma_resolve_route(cma_id, 1000); + if (ret) { + iscsi_set_error(iscsi, "Failed resolving address\n"); + return -1; + } + + iser_conn->pd = ibv_alloc_pd(cma_id->verbs); + if (!iser_conn->pd) { + iscsi_set_error(iscsi, "Failed to alloc pd\n"); + return -1; + } + iser_conn->comp_channel = ibv_create_comp_channel(cma_id->verbs); + if (!iser_conn->comp_channel) { + iscsi_set_error(iscsi, "Failed to create comp channel"); + goto pd_error; + } + + flags = fcntl(iser_conn->comp_channel->fd, F_GETFL); + ret = fcntl(iser_conn->comp_channel->fd, F_SETFL, flags | O_NONBLOCK); + if (ret) { + iscsi_set_error(iscsi, "Failed to set channel to nonblocking"); + return -1; + } + + iser_conn->cq = ibv_create_cq(cma_id->verbs, + ISER_MAX_CQ_LEN, + iser_transport, + iser_conn->comp_channel, + 0); + if (!iser_conn->cq) { + iscsi_set_error(iscsi, "Failed to create cq\n"); + goto pd_error; + } + + if (ibv_req_notify_cq(iser_conn->cq, 0)) { + iscsi_set_error(iscsi, "ibv_req_notify_cq failed\n"); + goto cq_error; + } + + iser_conn->login_buf = iscsi_malloc(iscsi, ISCSI_DEF_MAX_RECV_SEG_LEN + ISER_RX_LOGIN_SIZE); + if (!iser_conn->login_buf) { + iscsi_set_error(iscsi, "Failed to allocate memory for login_buf\n"); + iscsi_free(iscsi, iser_conn->login_buf); + goto cq_error; + } + + iser_conn->login_req_buf = iser_conn->login_buf; + iser_conn->login_resp_buf = iser_conn->login_buf + ISCSI_DEF_MAX_RECV_SEG_LEN; + iser_conn->login_resp_mr = ibv_reg_mr(iser_conn->pd, iser_conn->login_resp_buf, + ISER_RX_LOGIN_SIZE, IBV_ACCESS_LOCAL_WRITE); + if(!iser_conn->login_resp_mr) { + iscsi_set_error(iscsi, "Failed to reg login_resp_mr\n"); + iscsi_free(iscsi, iser_conn->login_buf); + goto cq_error; + } + + return 0; + +cq_error: + ibv_destroy_cq(iser_conn->cq); + +pd_error: + ibv_dealloc_pd(iser_conn->pd); + + return -1; +} +/* + * iser_route_handler() - handles RDMA_CM_EVENT_ROUTE_RESOLVED + * event in rdma_cm + * + * @cma_id: connection manager id + */ +static int iser_route_handler(struct rdma_cm_id *cma_id) { + + struct rdma_conn_param conn_param; + struct iser_cm_hdr req_hdr; + struct iscsi_context* iscsi = cma_id->context; + int ret; + + struct iser_transport *iser_transport = container_of(iscsi->t, struct iser_transport, t); + struct iser_conn *iser_conn = &iser_transport->iser_conn; + + ret = iser_create_iser_conn_res(iser_conn); + if (ret) { + iscsi_set_error(iscsi, "Failed to create ib conn res\n"); + goto login_mr_error; + } + + memset(&conn_param, 0, sizeof(struct rdma_conn_param)); + conn_param.responder_resources = 4; + conn_param.retry_count = 7; + conn_param.rnr_retry_count = 6; + + memset(&req_hdr, 0, sizeof(req_hdr)); + req_hdr.flags = (ISER_ZBVA_NOT_SUPPORTED | + ISER_SEND_W_INV_NOT_SUPPORTED); + conn_param.private_data = (void *)&req_hdr; + conn_param.private_data_len = sizeof(struct iser_cm_hdr); + + ret = rdma_connect(cma_id, &conn_param); + if (ret) { + iscsi_set_error(iscsi, "conn %p failure connecting: %d", iser_transport, ret); + return -1; + } + return ret; + +login_mr_error: + ibv_dereg_mr(iser_conn->login_resp_mr); + + ibv_destroy_cq(iser_conn->cq); + + ibv_dealloc_pd(iser_conn->pd); + + return -1; +} + +/* + * iser_alloc_rx_descriptors() - allocation receive descriptors + * + * @iser_conn: ib connection context + * @cmds_max: maximum in flight commands + */ +int iser_alloc_rx_descriptors(struct iser_conn *iser_conn, int cmds_max) +{ + int i,j; + struct iser_rx_desc *rx_desc; + struct ibv_sge *rx_sg; + struct iscsi_context *iscsi = iser_conn->cma_id->context; + + iser_conn->qp_max_recv_dtos = cmds_max; + iser_conn->min_posted_rx = ISER_MIN_POSTED_RX; + + iser_conn->num_rx_descs = cmds_max; + + iser_conn->rx_descs = iscsi_malloc(iscsi, iser_conn->num_rx_descs * sizeof(*rx_desc)); + if (!iser_conn->rx_descs) { + iscsi_set_error(iscsi, "Failed to allocate rx descriptors\n"); + return -1; + } + + rx_desc = iser_conn->rx_descs; + + for (i = 0; i < iser_conn->qp_max_recv_dtos; i++, rx_desc++) { + rx_desc->hdr_mr = ibv_reg_mr(iser_conn->pd, rx_desc, ISER_RX_PAYLOAD_SIZE, IBV_ACCESS_LOCAL_WRITE); + + if (rx_desc->hdr_mr == NULL) { + iscsi_set_error(iscsi, "Failed to register (%i) reg_mr\n", i); + goto fail_alloc_mrs; + } + + rx_sg = &rx_desc->rx_sg; + rx_sg->addr = (uintptr_t)rx_desc; + rx_sg->length = ISER_RX_PAYLOAD_SIZE; + rx_sg->lkey = rx_desc->hdr_mr->lkey; + } + + iser_conn->rx_desc_head = 0; + + return 0; + +fail_alloc_mrs: + + rx_desc = iser_conn->rx_descs; + for (j = 0; j < i ; j++, rx_desc++) + ibv_dereg_mr(rx_desc->hdr_mr); + + iscsi_free(iscsi, iser_conn->rx_descs); + + return -1; +} + +/* + * iser_post_recvm() - posting receive requests + * + * @iser_conn: ib connection context + * @count: amount of receive requests to post on receive queue + */ +int +iser_post_recvm(struct iser_conn *iser_conn, int count) +{ + struct ibv_recv_wr *rx_wr, *rx_wr_failed; + int i, ret; + unsigned int my_rx_head = iser_conn->rx_desc_head; + struct iser_rx_desc *rx_desc; + struct iscsi_context *iscsi = iser_conn->cma_id->context; + + for (rx_wr = iser_conn->rx_wr, i = 0; i < count; i++, rx_wr++) { + rx_desc = &iser_conn->rx_descs[my_rx_head]; + rx_wr->wr_id = (uintptr_t)rx_desc; + rx_wr->sg_list = &rx_desc->rx_sg; + rx_wr->num_sge = 1; + rx_wr->next = rx_wr + 1; + my_rx_head = (my_rx_head + 1) % iser_conn->qp_max_recv_dtos; + } + + rx_wr--; + rx_wr->next = NULL; /* mark end of work requests list */ + + iser_conn->post_recv_buf_count += count; + ret = ibv_post_recv(iser_conn->qp, iser_conn->rx_wr, &rx_wr_failed); + if (ret) { + iscsi_set_error(iscsi, "ib_post_recv failed ret=%d", ret); + iser_conn->post_recv_buf_count -= count; + } else + iser_conn->rx_desc_head = my_rx_head; + + return ret; +} + +int +iser_reg_mr(struct iser_conn *iser_conn) +{ + int i; + struct iscsi_context *iscsi = iser_conn->cma_id->context; + struct iser_tx_desc *tx_desc; + + for (i = 0 ; i < NUM_MRS ; i++) { + + tx_desc = iscsi_malloc(iscsi, sizeof(*tx_desc)); + if (tx_desc == NULL) { + iscsi_set_error(iscsi, "Out-Of-Memory, failed to allocate data buffer"); + return -1; + } + + tx_desc->hdr_mr = ibv_reg_mr(iser_conn->pd, tx_desc, ISER_HEADERS_LEN, IBV_ACCESS_LOCAL_WRITE); + if (tx_desc->hdr_mr == NULL) { + iscsi_set_error(iscsi, "Failed to register data mr"); + return -1; + } + + tx_desc->data_buff = iscsi_malloc(iscsi, DATA_BUFFER_SIZE); + if (tx_desc->data_buff == NULL) { + iscsi_set_error(iscsi, "Out-Of-Memory, failed to allocate data buffer"); + return -1; + } + + tx_desc->data_mr = ibv_reg_mr(iser_conn->pd, tx_desc->data_buff, DATA_BUFFER_SIZE, + IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_READ | IBV_ACCESS_REMOTE_WRITE); + if (tx_desc->data_mr == NULL) { + iscsi_set_error(iscsi, "Failed to register data mr"); + return -1; + } + + ISCSI_LIST_ADD_END(&iser_conn->tx_desc, tx_desc); + } + + return 0; +} + +/** + * iser_rcv_completion() - handling and processing receive completion + * + * @rx_desc: receive descriptor + * @iser_conn: ib connection context + * + * Notes: + * After changing mrs to in-advanced mrs need to add + * commant about memcpy of data from iSER buffer to + * App buffer + */ +int +iser_rcv_completion(struct iser_rx_desc *rx_desc, + struct iser_conn *iser_conn) +{ + struct iscsi_in_pdu *in = NULL; + int outstanding, count = 0, err; + struct iscsi_context *iscsi = iser_conn->cma_id->context; + + in = iscsi_malloc(iscsi, sizeof(*in)); + + if ((unsigned char *)rx_desc == iser_conn->login_resp_buf) + if (iscsi->session_type == ISCSI_SESSION_NORMAL) { + if(iser_alloc_rx_descriptors(iser_conn,255)) { + iscsi_set_error(iscsi, "iser_alloc_rx_descriptors Failed\n"); + return -1; + } + err = iser_post_recvm(iser_conn, ISER_MIN_POSTED_RX); + if (err) { + iscsi_set_error(iscsi, "posting %d rx bufs err %d", count, err); + return -1; + } + } + in->hdr = (unsigned char*)rx_desc->iscsi_header; + in->data_pos = iscsi_get_pdu_data_size(&in->hdr[0]); + in->data = (unsigned char*)rx_desc->data; + + enum iscsi_opcode opcode = in->hdr[0] & 0x3f; + uint32_t itt = scsi_get_uint32(&in->hdr[16]); + + if (opcode == ISCSI_PDU_NOP_IN && itt == 0xffffffff) + goto nop_target; + + struct iscsi_pdu *iscsi_pdu; + struct iser_pdu *iser_pdu; + for (iscsi_pdu = iscsi->waitpdu ; iscsi_pdu ; iscsi_pdu = iscsi_pdu->next) { + if(iscsi_pdu->itt == itt) + break; + } + + iser_pdu = container_of(iscsi_pdu, struct iser_pdu, iscsi_pdu); + + /* in case of read completion we need to copy data * + * from pre-allocated buffers into application buffers */ + + if (iser_pdu->desc->type == ISCSI_COMMAND && + iser_pdu->desc->data_dir == DATA_READ) { + + int i, offset = 0; + struct scsi_task *task = iser_pdu->iscsi_pdu.scsi_cbdata.task; + struct scsi_iovector *iovector_in = &task->iovector_in; + + if (iovector_in->iov == NULL) { + memcpy(iser_pdu->iscsi_pdu.indata.data, &iser_pdu->desc->data_buff[offset], + iser_pdu->iscsi_pdu.indata.size); + } else { + for (i = 0 ; i < iovector_in->niov ; i++) { + memcpy(iovector_in->iov[i].iov_base, &iser_pdu->desc->data_buff[offset], + iovector_in->iov[i].iov_len); + offset += iovector_in->iov[i].iov_len; + } + } + } + + ISCSI_LIST_ADD_END(&iser_conn->tx_desc, iser_pdu->desc); + +nop_target: + + /* decrementing conn->post_recv_buf_count only --after-- freeing the * + * task eliminates the need to worry on tasks which are completed in * + * parallel to the execution of iser_conn_term. So the code that waits * + * for the posted rx bufs refcount to become zero handles everything */ + iser_conn->post_recv_buf_count--; + + if ((unsigned char *)rx_desc == iser_conn->login_resp_buf) + goto receive; + + outstanding = iser_conn->post_recv_buf_count; + if (outstanding + iser_conn->min_posted_rx <= iser_conn->qp_max_recv_dtos) { + if(iser_conn->qp_max_recv_dtos - outstanding > iser_conn->min_posted_rx) + count = iser_conn->min_posted_rx; + else + count = iser_conn->qp_max_recv_dtos - outstanding; + err = iser_post_recvm(iser_conn, count); + if (err) { + iscsi_set_error(iscsi, "posting %d rx bufs err %d", count, err); + return -1; + } + } + +receive: + + err = iscsi_process_pdu(iscsi, in); + iscsi_free(iscsi, in); + + return err; +} + +/** + * iser_sndcompletion() - handling send completion + * + * @tx_desc: send descriptor + * @iser_conn: ib connection context + * + */ +int +iser_snd_completion(struct iser_tx_desc *tx_desc _U_, + struct iser_conn *iser_conn _U_) +{ + return 0; +} + +/** + * iser_handle_wc() - handling work completion + * + * @wc: work completion + * @iser_conn: ib connection context + * + */ +static int iser_handle_wc(struct ibv_wc *wc,struct iser_conn *iser_conn) +{ + struct iser_tx_desc *tx_desc; + struct iser_rx_desc *rx_desc; + struct iscsi_context *iscsi = iser_conn->cma_id->context; + + if (wc->status == IBV_WC_SUCCESS) { + if (wc->opcode == IBV_WC_RECV) { + rx_desc = (struct iser_rx_desc *)(uintptr_t)wc->wr_id; + + return iser_rcv_completion(rx_desc, iser_conn); + } else + if (wc->opcode == IBV_WC_SEND) { + tx_desc = (struct iser_tx_desc *)(uintptr_t)wc->wr_id; + + return iser_snd_completion(tx_desc, iser_conn); + } else { + iscsi_set_error(iscsi, "Unknown wc opcode %d\n", wc->opcode); + + return -1; + } + } else { + if (wc->status != IBV_WC_WR_FLUSH_ERR) { + ISCSI_LOG(iscsi, 3, "wr id %lx status %d vend_err %x\n", + wc->wr_id, wc->status, wc->vendor_err); + return iscsi_service_reconnect_if_loggedin(iscsi); + } else { + iscsi_set_error(iscsi, "flush error: wr id %lx\n", wc->wr_id); + + return 0; + } + } +} + +/** + * cq_event_handler() - polling and handling completions + * + * @iser_conn: ib connection context + * + * Notes: + * Need to decide about how much competion to poll + * each time. + */ +static int cq_event_handler(struct iser_conn *iser_conn) +{ + struct ibv_wc wc[16]; + unsigned int i; + unsigned int n; + unsigned int completed = 0; + + while ((n = ibv_poll_cq(iser_conn->cq, 16, wc)) > 0) { + for (i = 0; i < n; i++) + if (iser_handle_wc(&wc[i], iser_conn)) + return -1; + + completed += n; + if (completed >= 512) + break; + } + + return 0; +} + +/** + * cq_handle() - handling completion queue event + * + * @iser_conn: ib connection context + * + * Notes: + * Need to check if it is possible + * to get cq event except POLLIN. + */ +int cq_handle(struct iser_conn *iser_conn) +{ + void *ev_ctx = NULL; + int ret; + struct iscsi_context *iscsi = iser_conn->cma_id->context; + + ibv_get_cq_event(iser_conn->comp_channel, &iser_conn->cq, &ev_ctx); + ret = ibv_req_notify_cq(iser_conn->cq, 0); + + /* FIXME: aggregate ack cq event for efficiency */ + ibv_ack_cq_events(iser_conn->cq, 1); + if (ret) { + iscsi_set_error(iscsi, "failed notify or ack CQ"); + return -1; + } + + ret = cq_event_handler(iser_conn); + if (ret) { + iscsi_set_error(iscsi, "failed CQ handler"); + return -1; + } + + return 0; +} + +/* + * iser_connected_handler() - handles RDMA_CM_EVENT_ESTABLISHED + * event in rdma_cm + * + * @cma_id: connection manager id + * + */ +static int iser_connected_handler(struct rdma_cm_id *cma_id) { + + struct iscsi_context *iscsi = cma_id->context; + struct iser_transport *iser_transport; + struct iser_conn *iser_conn; + + iser_transport = container_of(iscsi->t, struct iser_transport, t); + if (iser_transport == NULL) { + iscsi_set_error(iscsi, "Failed init of transport\n"); + return -1; + } + + iser_conn = &iser_transport->iser_conn; + + iser_conn->post_recv_buf_count = 0; + + return iser_reg_mr(iser_conn); + +} + +/* + * iser_cma_handler() - handles rdma connection manager events + * + * @iser_conn: ib connection context + * @cma_id: connection manager id + * @event: rdma cm event + * + */ +int iser_cma_handler(struct iser_conn *iser_conn,struct rdma_cm_id *cma_id, struct rdma_cm_event *event) { + + int ret = 0; + + switch(event->event) { + + case RDMA_CM_EVENT_ADDR_RESOLVED: + ret = iser_addr_handler(cma_id); + break; + case RDMA_CM_EVENT_ROUTE_RESOLVED: + ret = iser_route_handler(cma_id); + break; + case RDMA_CM_EVENT_ESTABLISHED: + ret = iser_connected_handler(cma_id); + if(ret) + iser_conn->conn_state = CONN_ERROR; + else + iser_conn->conn_state = CONN_ESTABLISHED; + sem_post(&iser_conn->sem_connect); + break; + case RDMA_CM_EVENT_ADDR_ERROR: + case RDMA_CM_EVENT_ROUTE_ERROR: + case RDMA_CM_EVENT_CONNECT_ERROR: + case RDMA_CM_EVENT_UNREACHABLE: + case RDMA_CM_EVENT_REJECTED: + iser_conn->conn_state = CONN_ERROR; + ret = -1; + sem_post(&iser_conn->sem_connect); + break; + case RDMA_CM_EVENT_DISCONNECTED: + case RDMA_CM_EVENT_ADDR_CHANGE: + case RDMA_CM_EVENT_TIMEWAIT_EXIT: + iser_conn->conn_state = CONN_DISCONNECTED; + ret = -1; + sem_post(&iser_conn->sem_connect); + break; + default: + iser_conn->conn_state = CONN_ERROR; + ret = -1; + sem_post(&iser_conn->sem_connect); + break; + } + return ret; +} + +/* + * iser_connected_handler() - thread to catch rdma cm events + * + * @arg: ib connection context + * + * Notes: + * Need to check if copying event is necessery + */ +static void *cm_thread(void *arg) +{ + struct iser_conn *iser_conn = arg; + struct rdma_cm_event event_copy; + int ret; + struct iscsi_context *iscsi = iser_conn->cma_id->context; + + while (1) { + ret = rdma_get_cm_event(iser_conn->cma_channel, &iser_conn->cma_event); + if (ret) { + iscsi_set_error(iscsi, "Failed to get RDMA-CM Event\n"); + pthread_exit(NULL); + } + memcpy(&event_copy, iser_conn->cma_event, sizeof(struct rdma_cm_event)); + ret = iser_cma_handler(iser_conn, iser_conn->cma_id, &event_copy); + rdma_ack_cm_event(iser_conn->cma_event); + if (ret) { + iscsi_set_error(iscsi, "Failed to handle event\n"); + pthread_exit(NULL); + } + } +} + +/* + * iscsi_iser_connect() - creating rdma connection manager + * and connection it to target. + * + * @iscsi: iscsi context + * @sa: socket address for rdma cm connect + * @ai_family unused + * + * Notes: + * Need to move iser_reg_mr(headers) to iser_connected_handler. + */ +int iscsi_iser_connect(struct iscsi_context *iscsi, union socket_address *sa,__attribute__((unused)) int ai_family) { + + struct iser_transport *iser_transport; + struct iser_conn *iser_conn; + int ret; + + iser_transport = container_of(iscsi->t, struct iser_transport, t); + if (iser_transport == NULL) { + iscsi_set_error(iscsi, "Failed init of transport\n"); + return -1; + } + iser_conn = &iser_transport->iser_conn; + sem_init(&iser_conn->sem_connect, 0, 0); + + iser_conn->cma_channel = rdma_create_event_channel(); + + if (iser_conn->cma_channel == NULL) { + iscsi_set_error(iscsi, "Failed creating Event Channel\n"); + return -1; + } + + if (rdma_create_id(iser_conn->cma_channel, &iser_conn->cma_id, (void *)iscsi, RDMA_PS_TCP)) { + iscsi_set_error(iscsi, "Failed create channel_id"); + return -1; + } + + ret = pthread_create(&iser_conn->cmthread, NULL, cm_thread, iser_conn); + if(ret) { + iscsi_set_error(iscsi, "Failed create Connection Manager Thread"); + return -1; + } + + if(rdma_resolve_addr(iser_conn->cma_id, NULL, &sa->sa, 2000)) { + iscsi_set_error(iscsi, "Failed resolve address"); + return -1; + } + + sem_wait(&iser_conn->sem_connect); + + switch(iser_conn->conn_state) { + + case CONN_ERROR: + iscsi_set_error(iscsi, "Conn Error event"); + return -1; + + case CONN_DISCONNECTED: + iscsi_set_error(iscsi, "Conn disconnected event"); + return -1; + + case CONN_ESTABLISHED: + break; + + default: + iscsi_set_error(iscsi, "Unknown State of connection"); + return -1; + + } + + + iscsi->is_connected = 1; + iscsi->socket_status_cb(iscsi, SCSI_STATUS_GOOD, NULL, iscsi->connect_data); + iscsi->socket_status_cb = NULL; + + return 0; +} + +void iscsi_init_iser_transport(struct iscsi_context *iscsi) +{ + iscsi->t->connect = iscsi_iser_connect; + iscsi->t->queue_pdu = iscsi_iser_queue_pdu; + iscsi->t->new_pdu = iscsi_iser_new_pdu; + iscsi->t->disconnect = iscsi_iser_disconnect; + iscsi->t->free_pdu = iscsi_iser_free_pdu; + iscsi->t->service = iscsi_iser_service; + iscsi->t->get_fd = iscsi_iser_get_fd; + iscsi->t->which_events = iscsi_iser_which_events; + + return; +} + +#endif diff --git a/lib/socket.c b/lib/socket.c index d95da9a..7429478 100644 --- a/lib/socket.c +++ b/lib/socket.c @@ -816,7 +816,7 @@ iscsi_write_to_socket(struct iscsi_context *iscsi) return 0; } -static int +int iscsi_service_reconnect_if_loggedin(struct iscsi_context *iscsi) { if (iscsi->is_loggedin) { diff --git a/tests/Makefile.am b/tests/Makefile.am index 4378320..471e04e 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -1,6 +1,6 @@ AM_CPPFLAGS = -I../include "-D_U_=__attribute__((unused))" \ "-D_R_(A,B)=__attribute__((format(printf,A,B)))" -AM_CFLAGS = $(WARN_CFLAGS) +AM_CFLAGS = $(WARN_CFLAGS) -I/usr/include LDADD = ../lib/libiscsi.la noinst_PROGRAMS = prog_reconnect prog_reconnect_timeout prog_noop_reply \ diff --git a/utils/Makefile.am b/utils/Makefile.am index bde1c81..657ca9d 100644 --- a/utils/Makefile.am +++ b/utils/Makefile.am @@ -1,6 +1,6 @@ AM_CPPFLAGS = -I${srcdir}/../include "-D_U_=__attribute__((unused))" \ "-D_R_(A,B)=__attribute__((format(printf,A,B)))" -AM_CFLAGS = $(WARN_CFLAGS) +AM_CFLAGS = $(WARN_CFLAGS) -I/usr/include LDADD = ../lib/libiscsi.la bin_PROGRAMS = iscsi-inq iscsi-ls iscsi-perf iscsi-readcapacity16 \