blob: 87c9cf92469efe0ad736b1b1c2ebf3618b7e058e [file] [log] [blame]
Matthew Smith7fc631f2018-08-28 13:42:09 -05001From bd42c77c457146bede32333558b4e0414b30683e Mon Sep 17 00:00:00 2001
2From: Yongseok Koh <yskoh@mellanox.com>
3Date: Fri, 24 Aug 2018 16:46:49 -0700
4Subject: [PATCH] net/mlx5: support externally allocated mempool
5
6When MLX PMD registers memory for DMA, it accesses the global memseg list
7of DPDK to maximize the range of registration so that LKey search can be
8more efficient. Granularity of MR registration is per page.
9
10Externally allocated memory shouldn't be used for DMA because it can't be
11searched in the memseg list and free event can't be tracked by DPDK.
12However, if the external memory is static (allocated on startup and never
13freed), such memory can also be registered by little tweak in the code.
14
15Signed-off-by: Yongseok Koh <yskoh@mellanox.com>
16---
17 drivers/net/mlx5/mlx5_mr.c | 155 +++++++++++++++++++++++++++++++++++++++++++
18 drivers/net/mlx5/mlx5_rxtx.h | 35 +++++++++-
19 2 files changed, 189 insertions(+), 1 deletion(-)
20
21diff --git a/drivers/net/mlx5/mlx5_mr.c b/drivers/net/mlx5/mlx5_mr.c
22index 08105a443..876622e91 100644
23--- a/drivers/net/mlx5/mlx5_mr.c
24+++ b/drivers/net/mlx5/mlx5_mr.c
25@@ -277,6 +277,23 @@ mr_find_next_chunk(struct mlx5_mr *mr, struct mlx5_mr_cache *entry,
26 uintptr_t end = 0;
27 uint32_t idx = 0;
28
29+ /* MR for external memory doesn't have memseg list. */
30+ if (mr->msl == NULL) {
31+ struct ibv_mr *ibv_mr = mr->ibv_mr;
32+
33+ assert(mr->ms_bmp_n == 1);
34+ assert(mr->ms_n == 1);
35+ assert(base_idx == 0);
36+ /*
37+ * Can't search it from memseg list but get it directly from
38+ * verbs MR as there's only one chunk.
39+ */
40+ entry->start = (uintptr_t)ibv_mr->addr;
41+ entry->end = (uintptr_t)ibv_mr->addr + mr->ibv_mr->length;
42+ entry->lkey = rte_cpu_to_be_32(mr->ibv_mr->lkey);
43+ /* Returning 1 ends iteration. */
44+ return 1;
45+ }
46 for (idx = base_idx; idx < mr->ms_bmp_n; ++idx) {
47 if (rte_bitmap_get(mr->ms_bmp, idx)) {
48 const struct rte_memseg_list *msl;
49@@ -818,6 +835,7 @@ mlx5_mr_mem_event_free_cb(struct rte_eth_dev *dev, const void *addr, size_t len)
50 mr = mr_lookup_dev_list(dev, &entry, start);
51 if (mr == NULL)
52 continue;
53+ assert(mr->msl); /* Can't be external memory. */
54 ms = rte_mem_virt2memseg((void *)start, msl);
55 assert(ms != NULL);
56 assert(msl->page_sz == ms->hugepage_sz);
57@@ -1070,6 +1088,139 @@ mlx5_mr_flush_local_cache(struct mlx5_mr_ctrl *mr_ctrl)
58 (void *)mr_ctrl, mr_ctrl->cur_gen);
59 }
60
61+/**
62+ * Called during rte_mempool_mem_iter() by mlx5_mr_update_ext_mp().
63+ *
64+ * Externally allocated chunk is registered and a MR is created for the chunk.
65+ * The MR object is added to the global list. If memseg list of a MR object
66+ * (mr->msl) is null, the MR object can be regarded as externally allocated
67+ * memory.
68+ *
69+ * Once external memory is registered, it should be static. If the memory is
70+ * freed and the virtual address range has different physical memory mapped
71+ * again, it may cause crash on device due to the wrong translation entry. PMD
72+ * can't track the free event of the external memory for now.
73+ */
74+static void
75+mlx5_mr_update_ext_mp_cb(struct rte_mempool *mp, void *opaque,
76+ struct rte_mempool_memhdr *memhdr,
77+ unsigned mem_idx __rte_unused)
78+{
79+ struct mr_update_mp_data *data = opaque;
80+ struct rte_eth_dev *dev = data->dev;
81+ struct priv *priv = dev->data->dev_private;
82+ struct mlx5_mr_ctrl *mr_ctrl = data->mr_ctrl;
83+ struct mlx5_mr *mr = NULL;
84+ uintptr_t addr = (uintptr_t)memhdr->addr;
85+ size_t len = memhdr->len;
86+ struct mlx5_mr_cache entry;
87+ uint32_t lkey;
88+
89+ /* If already registered, it should return. */
90+ rte_rwlock_read_lock(&priv->mr.rwlock);
91+ lkey = mr_lookup_dev(dev, &entry, addr);
92+ rte_rwlock_read_unlock(&priv->mr.rwlock);
93+ if (lkey != UINT32_MAX)
94+ return;
95+ mr = rte_zmalloc_socket(NULL,
96+ RTE_ALIGN_CEIL(sizeof(*mr),
97+ RTE_CACHE_LINE_SIZE),
98+ RTE_CACHE_LINE_SIZE, mp->socket_id);
99+ if (mr == NULL) {
100+ DRV_LOG(WARNING,
101+ "port %u unable to allocate memory for a new MR of"
102+ " mempool (%s).",
103+ dev->data->port_id, mp->name);
104+ data->ret = -1;
105+ return;
106+ }
107+ DRV_LOG(DEBUG, "port %u register MR for chunk #%d of mempool (%s)",
108+ dev->data->port_id, mem_idx, mp->name);
109+ mr->ibv_mr = mlx5_glue->reg_mr(priv->pd, (void *)addr, len,
110+ IBV_ACCESS_LOCAL_WRITE);
111+ if (mr->ibv_mr == NULL) {
112+ DRV_LOG(WARNING,
113+ "port %u fail to create a verbs MR for address (%p)",
114+ dev->data->port_id, (void *)addr);
115+ rte_free(mr);
116+ data->ret = -1;
117+ return;
118+ }
119+ mr->msl = NULL; /* Mark it is external memory. */
120+ mr->ms_bmp = NULL;
121+ mr->ms_n = 1;
122+ mr->ms_bmp_n = 1;
123+ rte_rwlock_write_lock(&priv->mr.rwlock);
124+ LIST_INSERT_HEAD(&priv->mr.mr_list, mr, mr);
125+ DRV_LOG(DEBUG,
126+ "port %u MR CREATED (%p) for external memory %p:\n"
127+ " [0x%" PRIxPTR ", 0x%" PRIxPTR "),"
128+ " lkey=0x%x base_idx=%u ms_n=%u, ms_bmp_n=%u",
129+ dev->data->port_id, (void *)mr, (void *)addr,
130+ addr, addr + len, rte_cpu_to_be_32(mr->ibv_mr->lkey),
131+ mr->ms_base_idx, mr->ms_n, mr->ms_bmp_n);
132+ /* Insert to the global cache table. */
133+ mr_insert_dev_cache(dev, mr);
134+ rte_rwlock_write_unlock(&priv->mr.rwlock);
135+ /* Insert to the local cache table */
136+ mlx5_mr_addr2mr_bh(dev, mr_ctrl, addr);
137+}
138+
139+/**
140+ * Register MR for entire memory chunks in a Mempool having externally allocated
141+ * memory and fill in local cache.
142+ *
143+ * @param dev
144+ * Pointer to Ethernet device.
145+ * @param mr_ctrl
146+ * Pointer to per-queue MR control structure.
147+ * @param mp
148+ * Pointer to registering Mempool.
149+ *
150+ * @return
151+ * 0 on success, -1 on failure.
152+ */
153+static uint32_t
154+mlx5_mr_update_ext_mp(struct rte_eth_dev *dev, struct mlx5_mr_ctrl *mr_ctrl,
155+ struct rte_mempool *mp)
156+{
157+ struct mr_update_mp_data data = {
158+ .dev = dev,
159+ .mr_ctrl = mr_ctrl,
160+ .ret = 0,
161+ };
162+
163+ rte_mempool_mem_iter(mp, mlx5_mr_update_ext_mp_cb, &data);
164+ return data.ret;
165+}
166+
167+/**
168+ * Register MR entire memory chunks in a Mempool having externally allocated
169+ * memory and search LKey of the address to return.
170+ *
171+ * @param dev
172+ * Pointer to Ethernet device.
173+ * @param addr
174+ * Search key.
175+ * @param mp
176+ * Pointer to registering Mempool where addr belongs.
177+ *
178+ * @return
179+ * LKey for address on success, UINT32_MAX on failure.
180+ */
181+uint32_t
182+mlx5_tx_update_ext_mp(struct mlx5_txq_data *txq, uintptr_t addr,
183+ struct rte_mempool *mp)
184+{
185+ struct mlx5_txq_ctrl *txq_ctrl =
186+ container_of(txq, struct mlx5_txq_ctrl, txq);
187+ struct mlx5_mr_ctrl *mr_ctrl = &txq->mr_ctrl;
188+ struct priv *priv = txq_ctrl->priv;
189+
190+ mlx5_mr_update_ext_mp(ETH_DEV(priv), mr_ctrl, mp);
191+ return mlx5_tx_addr2mr_bh(txq, addr);
192+}
193+
194 /* Called during rte_mempool_mem_iter() by mlx5_mr_update_mp(). */
195 static void
196 mlx5_mr_update_mp_cb(struct rte_mempool *mp __rte_unused, void *opaque,
197@@ -1113,6 +1264,10 @@ mlx5_mr_update_mp(struct rte_eth_dev *dev, struct mlx5_mr_ctrl *mr_ctrl,
198 };
199
200 rte_mempool_mem_iter(mp, mlx5_mr_update_mp_cb, &data);
201+ if (data.ret < 0 && rte_errno == ENXIO) {
202+ /* Mempool may have externally allocated memory. */
203+ return mlx5_mr_update_ext_mp(dev, mr_ctrl, mp);
204+ }
205 return data.ret;
206 }
207
208diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
209index f53bb43c3..b61c23b33 100644
210--- a/drivers/net/mlx5/mlx5_rxtx.h
211+++ b/drivers/net/mlx5/mlx5_rxtx.h
212@@ -347,6 +347,8 @@ uint16_t mlx5_rx_burst_vec(void *dpdk_txq, struct rte_mbuf **pkts,
213 void mlx5_mr_flush_local_cache(struct mlx5_mr_ctrl *mr_ctrl);
214 uint32_t mlx5_rx_addr2mr_bh(struct mlx5_rxq_data *rxq, uintptr_t addr);
215 uint32_t mlx5_tx_addr2mr_bh(struct mlx5_txq_data *txq, uintptr_t addr);
216+uint32_t mlx5_tx_update_ext_mp(struct mlx5_txq_data *txq, uintptr_t addr,
217+ struct rte_mempool *mp);
218
219 #ifndef NDEBUG
220 /**
221@@ -534,6 +536,24 @@ mlx5_tx_complete(struct mlx5_txq_data *txq)
222 }
223
224 /**
225+ * Get Memory Pool (MP) from mbuf. If mbuf is indirect, the pool from which the
226+ * cloned mbuf is allocated is returned instead.
227+ *
228+ * @param buf
229+ * Pointer to mbuf.
230+ *
231+ * @return
232+ * Memory pool where data is located for given mbuf.
233+ */
234+static struct rte_mempool *
235+mlx5_mb2mp(struct rte_mbuf *buf)
236+{
237+ if (unlikely(RTE_MBUF_INDIRECT(buf)))
238+ return rte_mbuf_from_indirect(buf)->pool;
239+ return buf->pool;
240+}
241+
242+/**
243 * Query LKey from a packet buffer for Rx. No need to flush local caches for Rx
244 * as mempool is pre-configured and static.
245 *
246@@ -591,7 +611,20 @@ mlx5_tx_addr2mr(struct mlx5_txq_data *txq, uintptr_t addr)
247 return mlx5_tx_addr2mr_bh(txq, addr);
248 }
249
250-#define mlx5_tx_mb2mr(rxq, mb) mlx5_tx_addr2mr(rxq, (uintptr_t)((mb)->buf_addr))
251+static __rte_always_inline uint32_t
252+mlx5_tx_mb2mr(struct mlx5_txq_data *txq, struct rte_mbuf *mb)
253+{
254+ uintptr_t addr = (uintptr_t)mb->buf_addr;
255+ uint32_t lkey = mlx5_tx_addr2mr(txq, addr);
256+
257+ if (likely(lkey != UINT32_MAX))
258+ return lkey;
259+ if (rte_errno == ENXIO) {
260+ /* Mempool may have externally allocated memory. */
261+ lkey = mlx5_tx_update_ext_mp(txq, addr, mlx5_mb2mp(mb));
262+ }
263+ return lkey;
264+}
265
266 /**
267 * Ring TX queue doorbell and flush the update if requested.
268--
2692.11.0
270