Skip to content

Commit 189d628

Browse files
committed
Fixed a really old bug in Infiniband code: QP's max_send_sge was limited to 1
... while we use up to 2 sges (one for transport header and one for payload) per work request in InfUd and up to InfRcTransport::MAX_TX_SGE_COUNT in InfRc. Mellanox drivers/cards never seem to complain about this and the bug was discovered when porting RAMCloud to an Intel OmniPath HPC cluster.
1 parent e45e150 commit 189d628

File tree

4 files changed

+29
-19
lines changed

4 files changed

+29
-19
lines changed

src/InfRcTransport.cc

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -750,7 +750,8 @@ InfRcTransport::clientTrySetupQueuePair(IpAddress& address)
750750
ibPhysicalPort, clientSrq,
751751
commonTxCq, clientRxCq,
752752
MAX_TX_QUEUE_DEPTH,
753-
MAX_SHARED_RX_QUEUE_DEPTH);
753+
MAX_SHARED_RX_QUEUE_DEPTH,
754+
MAX_TX_SGE_COUNT);
754755
uint64_t nonce = generateRandom();
755756
LOG(DEBUG, "starting to connect to %s via local port %d, nonce 0x%lx",
756757
inet_ntoa(sin->sin_addr), clientPort, nonce);
@@ -845,7 +846,8 @@ InfRcTransport::ServerConnectHandler::handleFileEvent(int events)
845846
transport->commonTxCq,
846847
transport->serverRxCq,
847848
MAX_TX_QUEUE_DEPTH,
848-
MAX_SHARED_RX_QUEUE_DEPTH);
849+
MAX_SHARED_RX_QUEUE_DEPTH,
850+
MAX_TX_SGE_COUNT);
849851
qp->plumb(&incomingQpt);
850852
qp->setPeerName(incomingQpt.getPeerName());
851853
LOG(DEBUG, "New queue pair for %s:%u, nonce 0x%lx (total creates "

src/InfUdDriver.cc

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -186,12 +186,10 @@ InfUdDriver::InfUdDriver(Context* context, const ServiceLocator *sl)
186186
throw DriverException(HERE, errno);
187187
}
188188

189-
qp = infiniband->createQueuePair(localMac ? IBV_QPT_RAW_PACKET
190-
: IBV_QPT_UD,
191-
ibPhysicalPort, NULL,
192-
txcq, rxcq, MAX_TX_QUEUE_DEPTH,
193-
MAX_RX_QUEUE_DEPTH,
194-
QKEY);
189+
qp = infiniband->createQueuePair(
190+
localMac ? IBV_QPT_RAW_PACKET : IBV_QPT_UD,
191+
ibPhysicalPort, NULL, txcq, rxcq, MAX_TX_QUEUE_DEPTH,
192+
MAX_RX_QUEUE_DEPTH, 2, QKEY);
195193

196194
// Cache these for easier access.
197195
lid = infiniband->getLid(ibPhysicalPort);

src/Infiniband.cc

Lines changed: 19 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -69,10 +69,11 @@ Infiniband::dumpStats()
6969
Infiniband::QueuePair*
7070
Infiniband::createQueuePair(ibv_qp_type type, int ibPhysicalPort, ibv_srq *srq,
7171
ibv_cq *txcq, ibv_cq *rxcq, uint32_t maxSendWr,
72-
uint32_t maxRecvWr, uint32_t QKey)
72+
uint32_t maxRecvWr, uint32_t maxSendSges,
73+
uint32_t QKey)
7374
{
7475
return new QueuePair(*this, type, ibPhysicalPort, srq, txcq, rxcq,
75-
maxSendWr, maxRecvWr, QKey);
76+
maxSendWr, maxRecvWr, maxSendSges, QKey);
7677
}
7778

7879
/**
@@ -508,12 +509,15 @@ Infiniband::pollCompletionQueue(ibv_cq *cq, int numEntries, ibv_wc *retWcArray)
508509
* \param maxRecvWr
509510
* Maximum number of outstanding receive work requests allowed on
510511
* this QueuePair.
512+
* \param maxSendSges
513+
* Maximum number of scatter-gather entries per work request allowed on
514+
* this QueuePair.
511515
* \param QKey
512516
* UD Queue Pairs only. The QKey for this pair.
513517
*/
514518
Infiniband::QueuePair::QueuePair(Infiniband& infiniband, ibv_qp_type type,
515519
int ibPhysicalPort, ibv_srq *srq, ibv_cq *txcq, ibv_cq *rxcq,
516-
uint32_t maxSendWr, uint32_t maxRecvWr, uint32_t QKey)
520+
uint32_t maxSendWr, uint32_t maxRecvWr, uint32_t maxSendSges, uint32_t QKey)
517521
: infiniband(infiniband),
518522
type(type),
519523
ctxt(infiniband.device.ctxt),
@@ -538,7 +542,7 @@ Infiniband::QueuePair::QueuePair(Infiniband& infiniband, ibv_qp_type type,
538542
qpia.srq = srq; // use the same shared receive queue
539543
qpia.cap.max_send_wr = maxSendWr; // max outstanding send requests
540544
qpia.cap.max_recv_wr = maxRecvWr; // max outstanding recv requests
541-
qpia.cap.max_send_sge = 1; // max send scatter-gather elements
545+
qpia.cap.max_send_sge = maxSendSges; // max send scatter-gather elements
542546
qpia.cap.max_recv_sge = 1; // max recv scatter-gather elements
543547
qpia.cap.max_inline_data = // max bytes of immediate data on send q
544548
MAX_INLINE_DATA;
@@ -993,13 +997,17 @@ Infiniband::Address::getHandle() const
993997
return ah;
994998
}
995999

996-
// Must allocate a new address handle.
997-
ibv_ah_attr attr;
998-
attr.dlid = lid;
999-
attr.src_path_bits = 0;
1000-
attr.is_global = 0;
1001-
attr.sl = 0;
1002-
attr.port_num = downCast<uint8_t>(physicalPort);
1000+
// Must allocate a new address handle. See also:
1001+
// https://www.rdmamojo.com/2012/09/22/ibv_create_ah/
1002+
ibv_ah_attr attr = {
1003+
.grh = {},
1004+
.dlid = lid,
1005+
.sl = 0,
1006+
.src_path_bits = 0,
1007+
.static_rate = 0,
1008+
.is_global = 0,
1009+
.port_num = downCast<uint8_t>(physicalPort)
1010+
};
10031011
infiniband.totalAddressHandleAllocCalls += 1;
10041012
uint64_t start = Cycles::rdtsc();
10051013
ah = ibv_create_ah(infiniband.pd.pd, &attr);

src/Infiniband.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,7 @@ class Infiniband {
186186
ibv_cq *rxcq,
187187
uint32_t maxSendWr,
188188
uint32_t maxRecvWr,
189+
uint32_t maxSendSges,
189190
uint32_t QKey = 0);
190191
// exists solely as superclass constructor for MockQueuePair derivative
191192
explicit QueuePair(Infiniband& infiniband)
@@ -460,6 +461,7 @@ class Infiniband {
460461
ibv_cq *rxcq,
461462
uint32_t maxSendWr,
462463
uint32_t maxRecvWr,
464+
uint32_t maxSendSges,
463465
uint32_t QKey = 0);
464466

465467
int getLid(int port);

0 commit comments

Comments
 (0)