blob: be54bd27c8d336c1e363eee57f7633980ae194f6 [file] [log] [blame]
Ed Warnickecb9cada2015-12-08 15:45:58 -07001/*
2 * Copyright (c) 2015 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15#include <vnet/vnet.h>
16#include <vppinfra/vec.h>
17#include <vppinfra/format.h>
18#include <vlib/unix/cj.h>
Shesha Sreenivasamurthyb8f45b32016-02-03 09:38:36 -080019#include <assert.h>
Ed Warnickecb9cada2015-12-08 15:45:58 -070020
21#include <vnet/ethernet/ethernet.h>
22#include <vnet/devices/dpdk/dpdk.h>
23
24#include "dpdk_priv.h"
25#include <vppinfra/error.h>
26
27#define foreach_dpdk_tx_func_error \
28 _(BAD_RETVAL, "DPDK tx function returned an error") \
29 _(RING_FULL, "Tx packet drops (ring full)") \
30 _(PKT_DROP, "Tx packet drops (dpdk tx failure)") \
31 _(REPL_FAIL, "Tx packet drops (replication failure)")
32
Damjan Marion25665672016-08-16 18:56:08 +020033typedef enum
34{
Ed Warnickecb9cada2015-12-08 15:45:58 -070035#define _(f,s) DPDK_TX_FUNC_ERROR_##f,
36 foreach_dpdk_tx_func_error
37#undef _
Damjan Marion25665672016-08-16 18:56:08 +020038 DPDK_TX_FUNC_N_ERROR,
Ed Warnickecb9cada2015-12-08 15:45:58 -070039} dpdk_tx_func_error_t;
40
Damjan Marion25665672016-08-16 18:56:08 +020041static char *dpdk_tx_func_error_strings[] = {
Ed Warnickecb9cada2015-12-08 15:45:58 -070042#define _(n,s) s,
Damjan Marion25665672016-08-16 18:56:08 +020043 foreach_dpdk_tx_func_error
Ed Warnickecb9cada2015-12-08 15:45:58 -070044#undef _
45};
46
Christian Dechamplain2073cfe2016-02-19 12:26:57 -050047clib_error_t *
Damjan Marion25665672016-08-16 18:56:08 +020048dpdk_set_mac_address (vnet_hw_interface_t * hi, char *address)
Christian Dechamplain2073cfe2016-02-19 12:26:57 -050049{
Damjan Marion25665672016-08-16 18:56:08 +020050 int error;
51 dpdk_main_t *dm = &dpdk_main;
52 dpdk_device_t *xd = vec_elt_at_index (dm->devices, hi->dev_instance);
Christian Dechamplain2073cfe2016-02-19 12:26:57 -050053
Damjan Marion25665672016-08-16 18:56:08 +020054 error = rte_eth_dev_default_mac_addr_set (xd->device_index,
55 (struct ether_addr *) address);
Christian Dechamplain2073cfe2016-02-19 12:26:57 -050056
Damjan Marion25665672016-08-16 18:56:08 +020057 if (error)
58 {
59 return clib_error_return (0, "mac address set failed: %d", error);
60 }
61 else
62 {
63 return NULL;
64 }
Christian Dechamplain2073cfe2016-02-19 12:26:57 -050065}
66
67clib_error_t *
68dpdk_set_mc_filter (vnet_hw_interface_t * hi,
Damjan Marion25665672016-08-16 18:56:08 +020069 struct ether_addr mc_addr_vec[], int naddr)
Christian Dechamplain2073cfe2016-02-19 12:26:57 -050070{
71 int error;
Damjan Marion25665672016-08-16 18:56:08 +020072 dpdk_main_t *dm = &dpdk_main;
73 dpdk_device_t *xd = vec_elt_at_index (dm->devices, hi->dev_instance);
Christian Dechamplain2073cfe2016-02-19 12:26:57 -050074
Damjan Marion25665672016-08-16 18:56:08 +020075 error = rte_eth_dev_set_mc_addr_list (xd->device_index, mc_addr_vec, naddr);
Christian Dechamplain2073cfe2016-02-19 12:26:57 -050076
Damjan Marion25665672016-08-16 18:56:08 +020077 if (error)
78 {
79 return clib_error_return (0, "mc addr list failed: %d", error);
80 }
81 else
82 {
83 return NULL;
84 }
Christian Dechamplain2073cfe2016-02-19 12:26:57 -050085}
86
Damjan Marion25665672016-08-16 18:56:08 +020087struct rte_mbuf *
88dpdk_replicate_packet_mb (vlib_buffer_t * b)
Ed Warnickecb9cada2015-12-08 15:45:58 -070089{
Damjan Marion25665672016-08-16 18:56:08 +020090 vlib_main_t *vm = vlib_get_main ();
91 vlib_buffer_main_t *bm = vm->buffer_main;
92 struct rte_mbuf *first_mb = 0, *new_mb, *pkt_mb, **prev_mb_next = 0;
Ed Warnickecb9cada2015-12-08 15:45:58 -070093 u8 nb_segs, nb_segs_left;
94 u32 copy_bytes;
Damjan Marion25665672016-08-16 18:56:08 +020095 unsigned socket_id = rte_socket_id ();
Ed Warnickecb9cada2015-12-08 15:45:58 -070096
97 ASSERT (bm->pktmbuf_pools[socket_id]);
Damjan Marion25665672016-08-16 18:56:08 +020098 pkt_mb = rte_mbuf_from_vlib_buffer (b);
Ed Warnickecb9cada2015-12-08 15:45:58 -070099 nb_segs = pkt_mb->nb_segs;
100 for (nb_segs_left = nb_segs; nb_segs_left; nb_segs_left--)
101 {
Damjan Marion25665672016-08-16 18:56:08 +0200102 if (PREDICT_FALSE (pkt_mb == 0))
Ed Warnickecb9cada2015-12-08 15:45:58 -0700103 {
104 clib_warning ("Missing %d mbuf chain segment(s): "
105 "(nb_segs = %d, nb_segs_left = %d)!",
106 nb_segs - nb_segs_left, nb_segs, nb_segs_left);
107 if (first_mb)
Damjan Marion25665672016-08-16 18:56:08 +0200108 rte_pktmbuf_free (first_mb);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700109 return NULL;
110 }
111 new_mb = rte_pktmbuf_alloc (bm->pktmbuf_pools[socket_id]);
Damjan Marion25665672016-08-16 18:56:08 +0200112 if (PREDICT_FALSE (new_mb == 0))
Ed Warnickecb9cada2015-12-08 15:45:58 -0700113 {
114 if (first_mb)
Damjan Marion25665672016-08-16 18:56:08 +0200115 rte_pktmbuf_free (first_mb);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700116 return NULL;
117 }
Damjan Marion25665672016-08-16 18:56:08 +0200118
Ed Warnickecb9cada2015-12-08 15:45:58 -0700119 /*
120 * Copy packet info into 1st segment.
121 */
122 if (first_mb == 0)
123 {
124 first_mb = new_mb;
125 rte_pktmbuf_pkt_len (first_mb) = pkt_mb->pkt_len;
126 first_mb->nb_segs = pkt_mb->nb_segs;
127 first_mb->port = pkt_mb->port;
Damjan Marion25665672016-08-16 18:56:08 +0200128#ifdef DAW_FIXME // TX Offload support TBD
Ed Warnickecb9cada2015-12-08 15:45:58 -0700129 first_mb->vlan_macip = pkt_mb->vlan_macip;
130 first_mb->hash = pkt_mb->hash;
131 first_mb->ol_flags = pkt_mb->ol_flags
132#endif
133 }
134 else
135 {
Damjan Marion25665672016-08-16 18:56:08 +0200136 ASSERT (prev_mb_next != 0);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700137 *prev_mb_next = new_mb;
138 }
Damjan Marion25665672016-08-16 18:56:08 +0200139
Ed Warnickecb9cada2015-12-08 15:45:58 -0700140 /*
141 * Copy packet segment data into new mbuf segment.
142 */
143 rte_pktmbuf_data_len (new_mb) = pkt_mb->data_len;
144 copy_bytes = pkt_mb->data_len + RTE_PKTMBUF_HEADROOM;
Damjan Marion25665672016-08-16 18:56:08 +0200145 ASSERT (copy_bytes <= pkt_mb->buf_len);
146 clib_memcpy (new_mb->buf_addr, pkt_mb->buf_addr, copy_bytes);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700147
148 prev_mb_next = &new_mb->next;
149 pkt_mb = pkt_mb->next;
150 }
151
Damjan Marion25665672016-08-16 18:56:08 +0200152 ASSERT (pkt_mb == 0);
153 __rte_mbuf_sanity_check (first_mb, 1);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700154
155 return first_mb;
156}
157
Damjan Marion25665672016-08-16 18:56:08 +0200158struct rte_mbuf *
159dpdk_zerocopy_replicate_packet_mb (vlib_buffer_t * b)
Keith Burns (alagalah)52fc44d2016-03-25 09:38:50 -0700160{
Damjan Marion25665672016-08-16 18:56:08 +0200161 vlib_main_t *vm = vlib_get_main ();
162 vlib_buffer_main_t *bm = vm->buffer_main;
163 struct rte_mbuf *first_mb = 0, *new_mb, *pkt_mb, **prev_mb_next = 0;
Keith Burns (alagalah)52fc44d2016-03-25 09:38:50 -0700164 u8 nb_segs, nb_segs_left;
Damjan Marion25665672016-08-16 18:56:08 +0200165 unsigned socket_id = rte_socket_id ();
Keith Burns (alagalah)52fc44d2016-03-25 09:38:50 -0700166
167 ASSERT (bm->pktmbuf_pools[socket_id]);
Damjan Marion25665672016-08-16 18:56:08 +0200168 pkt_mb = rte_mbuf_from_vlib_buffer (b);
Keith Burns (alagalah)52fc44d2016-03-25 09:38:50 -0700169 nb_segs = pkt_mb->nb_segs;
170 for (nb_segs_left = nb_segs; nb_segs_left; nb_segs_left--)
171 {
Damjan Marion25665672016-08-16 18:56:08 +0200172 if (PREDICT_FALSE (pkt_mb == 0))
Keith Burns (alagalah)52fc44d2016-03-25 09:38:50 -0700173 {
174 clib_warning ("Missing %d mbuf chain segment(s): "
175 "(nb_segs = %d, nb_segs_left = %d)!",
176 nb_segs - nb_segs_left, nb_segs, nb_segs_left);
177 if (first_mb)
Damjan Marion25665672016-08-16 18:56:08 +0200178 rte_pktmbuf_free (first_mb);
Keith Burns (alagalah)52fc44d2016-03-25 09:38:50 -0700179 return NULL;
180 }
Damjan Marion25665672016-08-16 18:56:08 +0200181 new_mb = rte_pktmbuf_clone (pkt_mb, bm->pktmbuf_pools[socket_id]);
182 if (PREDICT_FALSE (new_mb == 0))
Keith Burns (alagalah)52fc44d2016-03-25 09:38:50 -0700183 {
184 if (first_mb)
Damjan Marion25665672016-08-16 18:56:08 +0200185 rte_pktmbuf_free (first_mb);
Keith Burns (alagalah)52fc44d2016-03-25 09:38:50 -0700186 return NULL;
187 }
Damjan Marion25665672016-08-16 18:56:08 +0200188
Keith Burns (alagalah)52fc44d2016-03-25 09:38:50 -0700189 /*
190 * Copy packet info into 1st segment.
191 */
192 if (first_mb == 0)
193 {
194 first_mb = new_mb;
195 rte_pktmbuf_pkt_len (first_mb) = pkt_mb->pkt_len;
196 first_mb->nb_segs = pkt_mb->nb_segs;
197 first_mb->port = pkt_mb->port;
Damjan Marion25665672016-08-16 18:56:08 +0200198#ifdef DAW_FIXME // TX Offload support TBD
Keith Burns (alagalah)52fc44d2016-03-25 09:38:50 -0700199 first_mb->vlan_macip = pkt_mb->vlan_macip;
200 first_mb->hash = pkt_mb->hash;
201 first_mb->ol_flags = pkt_mb->ol_flags
202#endif
203 }
204 else
205 {
Damjan Marion25665672016-08-16 18:56:08 +0200206 ASSERT (prev_mb_next != 0);
Keith Burns (alagalah)52fc44d2016-03-25 09:38:50 -0700207 *prev_mb_next = new_mb;
208 }
Damjan Marion25665672016-08-16 18:56:08 +0200209
Keith Burns (alagalah)52fc44d2016-03-25 09:38:50 -0700210 /*
211 * Copy packet segment data into new mbuf segment.
212 */
213 rte_pktmbuf_data_len (new_mb) = pkt_mb->data_len;
214
215 prev_mb_next = &new_mb->next;
216 pkt_mb = pkt_mb->next;
217 }
218
Damjan Marion25665672016-08-16 18:56:08 +0200219 ASSERT (pkt_mb == 0);
220 __rte_mbuf_sanity_check (first_mb, 1);
Keith Burns (alagalah)52fc44d2016-03-25 09:38:50 -0700221
222 return first_mb;
223
224
225}
226
Ed Warnickecb9cada2015-12-08 15:45:58 -0700227static void
228dpdk_tx_trace_buffer (dpdk_main_t * dm,
229 vlib_node_runtime_t * node,
230 dpdk_device_t * xd,
Damjan Marion25665672016-08-16 18:56:08 +0200231 u16 queue_id, u32 buffer_index, vlib_buffer_t * buffer)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700232{
Damjan Marion25665672016-08-16 18:56:08 +0200233 vlib_main_t *vm = vlib_get_main ();
234 dpdk_tx_dma_trace_t *t0;
235 struct rte_mbuf *mb;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700236
Damjan Marion25665672016-08-16 18:56:08 +0200237 mb = rte_mbuf_from_vlib_buffer (buffer);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700238
239 t0 = vlib_add_trace (vm, node, buffer, sizeof (t0[0]));
240 t0->queue_index = queue_id;
241 t0->device_index = xd->device_index;
242 t0->buffer_index = buffer_index;
Damjan Marionf1213b82016-03-13 02:22:06 +0100243 clib_memcpy (&t0->mb, mb, sizeof (t0->mb));
Damjan Marion25665672016-08-16 18:56:08 +0200244 clib_memcpy (&t0->buffer, buffer,
245 sizeof (buffer[0]) - sizeof (buffer->pre_data));
Damjan Marionf1213b82016-03-13 02:22:06 +0100246 clib_memcpy (t0->buffer.pre_data, buffer->data + buffer->current_data,
Damjan Marion25665672016-08-16 18:56:08 +0200247 sizeof (t0->buffer.pre_data));
Ed Warnickecb9cada2015-12-08 15:45:58 -0700248}
249
250/*
251 * This function calls the dpdk's tx_burst function to transmit the packets
252 * on the tx_vector. It manages a lock per-device if the device does not
Damjan Marion25665672016-08-16 18:56:08 +0200253 * support multiple queues. It returns the number of packets untransmitted
254 * on the tx_vector. If all packets are transmitted (the normal case), the
Ed Warnickecb9cada2015-12-08 15:45:58 -0700255 * function returns 0.
Damjan Marion25665672016-08-16 18:56:08 +0200256 *
257 * The tx_burst function may not be able to transmit all packets because the
Ed Warnickecb9cada2015-12-08 15:45:58 -0700258 * dpdk ring is full. If a flowcontrol callback function has been configured
Damjan Marion25665672016-08-16 18:56:08 +0200259 * then the function simply returns. If no callback has been configured, the
260 * function will retry calling tx_burst with the remaining packets. This will
Ed Warnickecb9cada2015-12-08 15:45:58 -0700261 * continue until all packets are transmitted or tx_burst indicates no packets
262 * could be transmitted. (The caller can drop the remaining packets.)
263 *
264 * The function assumes there is at least one packet on the tx_vector.
265 */
266static_always_inline
Damjan Marion25665672016-08-16 18:56:08 +0200267 u32 tx_burst_vector_internal (vlib_main_t * vm,
268 dpdk_device_t * xd,
269 struct rte_mbuf **tx_vector)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700270{
Damjan Marion25665672016-08-16 18:56:08 +0200271 dpdk_main_t *dm = &dpdk_main;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700272 u32 n_packets;
273 u32 tx_head;
274 u32 tx_tail;
275 u32 n_retry;
276 int rv;
277 int queue_id;
278 tx_ring_hdr_t *ring;
279
Damjan Marion25665672016-08-16 18:56:08 +0200280 ring = vec_header (tx_vector, sizeof (*ring));
Ed Warnickecb9cada2015-12-08 15:45:58 -0700281
282 n_packets = ring->tx_head - ring->tx_tail;
283
284 tx_head = ring->tx_head % DPDK_TX_RING_SIZE;
285
286 /*
287 * Ensure rte_eth_tx_burst is not called with 0 packets, which can lead to
288 * unpredictable results.
289 */
Damjan Marion25665672016-08-16 18:56:08 +0200290 ASSERT (n_packets > 0);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700291
292 /*
293 * Check for tx_vector overflow. If this fails it is a system configuration
294 * error. The ring should be sized big enough to handle the largest un-flowed
295 * off burst from a traffic manager. A larger size also helps performance
296 * a bit because it decreases the probability of having to issue two tx_burst
297 * calls due to a ring wrap.
298 */
Damjan Marion25665672016-08-16 18:56:08 +0200299 ASSERT (n_packets < DPDK_TX_RING_SIZE);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700300
301 /*
302 * If there is no flowcontrol callback, there is only temporary buffering
303 * on the tx_vector and so the tail should always be 0.
304 */
Damjan Marion25665672016-08-16 18:56:08 +0200305 ASSERT (dm->flowcontrol_callback || ring->tx_tail == 0);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700306
307 /*
Damjan Marion25665672016-08-16 18:56:08 +0200308 * If there is a flowcontrol callback, don't retry any incomplete tx_bursts.
Ed Warnickecb9cada2015-12-08 15:45:58 -0700309 * Apply backpressure instead. If there is no callback, keep retrying until
Damjan Marion25665672016-08-16 18:56:08 +0200310 * a tx_burst sends no packets. n_retry of 255 essentially means no retry
Ed Warnickecb9cada2015-12-08 15:45:58 -0700311 * limit.
312 */
313 n_retry = dm->flowcontrol_callback ? 0 : 255;
314
315 queue_id = vm->cpu_index;
316
Damjan Marion25665672016-08-16 18:56:08 +0200317 do
318 {
Ed Warnickecb9cada2015-12-08 15:45:58 -0700319 /* start the burst at the tail */
320 tx_tail = ring->tx_tail % DPDK_TX_RING_SIZE;
321
Damjan Marion25665672016-08-16 18:56:08 +0200322 /*
Ed Warnickecb9cada2015-12-08 15:45:58 -0700323 * This device only supports one TX queue,
324 * and we're running multi-threaded...
325 */
Damjan Marion56431702016-09-19 13:18:09 +0200326 if (PREDICT_FALSE ((xd->flags & DPDK_DEVICE_FLAG_VHOST_USER) == 0 &&
Damjan Marion25665672016-08-16 18:56:08 +0200327 xd->lockp != 0))
328 {
329 queue_id = queue_id % xd->tx_q_used;
330 while (__sync_lock_test_and_set (xd->lockp[queue_id], 1))
331 /* zzzz */
332 queue_id = (queue_id + 1) % xd->tx_q_used;
333 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700334
Damjan Marion56431702016-09-19 13:18:09 +0200335 if (PREDICT_TRUE (xd->flags & DPDK_DEVICE_FLAG_PMD))
Damjan Marion25665672016-08-16 18:56:08 +0200336 {
337 if (PREDICT_TRUE (tx_head > tx_tail))
338 {
339 /* no wrap, transmit in one burst */
340 rv = rte_eth_tx_burst (xd->device_index,
341 (uint16_t) queue_id,
342 &tx_vector[tx_tail],
343 (uint16_t) (tx_head - tx_tail));
344 }
345 else
346 {
347 /*
348 * This can only happen if there is a flowcontrol callback.
349 * We need to split the transmit into two calls: one for
350 * the packets up to the wrap point, and one to continue
351 * at the start of the ring.
352 * Transmit pkts up to the wrap point.
353 */
354 rv = rte_eth_tx_burst (xd->device_index,
355 (uint16_t) queue_id,
356 &tx_vector[tx_tail],
357 (uint16_t) (DPDK_TX_RING_SIZE -
358 tx_tail));
Ed Warnickecb9cada2015-12-08 15:45:58 -0700359
Damjan Marion25665672016-08-16 18:56:08 +0200360 /*
361 * If we transmitted everything we wanted, then allow 1 retry
362 * so we can try to transmit the rest. If we didn't transmit
363 * everything, stop now.
364 */
365 n_retry = (rv == DPDK_TX_RING_SIZE - tx_tail) ? 1 : 0;
366 }
367 }
Damjan Marion1f0da172016-07-13 22:44:18 +0200368#if DPDK_VHOST_USER
Damjan Marion56431702016-09-19 13:18:09 +0200369 else if (xd->flags & DPDK_DEVICE_FLAG_VHOST_USER)
Damjan Marion25665672016-08-16 18:56:08 +0200370 {
371 u32 offset = 0;
372 if (xd->need_txlock)
373 {
374 queue_id = 0;
375 while (__sync_lock_test_and_set (xd->lockp[queue_id], 1));
376 }
377 else
378 {
379 dpdk_device_and_queue_t *dq;
380 vec_foreach (dq, dm->devices_by_cpu[vm->cpu_index])
381 {
382 if (xd->device_index == dq->device)
383 break;
384 }
385 assert (dq);
386 offset = dq->queue_id * VIRTIO_QNUM;
387 }
388 if (PREDICT_TRUE (tx_head > tx_tail))
389 {
390 int i;
391 u32 bytes = 0;
392 struct rte_mbuf **pkts = &tx_vector[tx_tail];
393 for (i = 0; i < (tx_head - tx_tail); i++)
394 {
395 struct rte_mbuf *buff = pkts[i];
396 bytes += rte_pktmbuf_data_len (buff);
397 }
Shesha Sreenivasamurthy94550842016-03-02 10:33:26 -0800398
Damjan Marion25665672016-08-16 18:56:08 +0200399 /* no wrap, transmit in one burst */
400 rv =
401 rte_vhost_enqueue_burst (&xd->vu_vhost_dev,
402 offset + VIRTIO_RXQ,
403 &tx_vector[tx_tail],
404 (uint16_t) (tx_head - tx_tail));
405 if (PREDICT_TRUE (rv > 0))
406 {
407 dpdk_vu_vring *vring =
408 &(xd->vu_intf->vrings[offset + VIRTIO_TXQ]);
409 vring->packets += rv;
410 vring->bytes += bytes;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700411
Damjan Marion25665672016-08-16 18:56:08 +0200412 if (dpdk_vhost_user_want_interrupt
413 (xd, offset + VIRTIO_RXQ))
414 {
415 vring = &(xd->vu_intf->vrings[offset + VIRTIO_RXQ]);
416 vring->n_since_last_int += rv;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700417
Damjan Marion25665672016-08-16 18:56:08 +0200418 f64 now = vlib_time_now (vm);
419 if (vring->int_deadline < now ||
420 vring->n_since_last_int >
421 dm->conf->vhost_coalesce_frames)
422 dpdk_vhost_user_send_interrupt (vm, xd,
423 offset + VIRTIO_RXQ);
424 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700425
Damjan Marion25665672016-08-16 18:56:08 +0200426 int c = rv;
427 while (c--)
428 rte_pktmbuf_free (tx_vector[tx_tail + c]);
429 }
430 }
431 else
432 {
433 /*
434 * If we transmitted everything we wanted, then allow 1 retry
435 * so we can try to transmit the rest. If we didn't transmit
436 * everything, stop now.
437 */
438 int i;
439 u32 bytes = 0;
440 struct rte_mbuf **pkts = &tx_vector[tx_tail];
441 for (i = 0; i < (DPDK_TX_RING_SIZE - tx_tail); i++)
442 {
443 struct rte_mbuf *buff = pkts[i];
444 bytes += rte_pktmbuf_data_len (buff);
445 }
446 rv =
447 rte_vhost_enqueue_burst (&xd->vu_vhost_dev,
448 offset + VIRTIO_RXQ,
449 &tx_vector[tx_tail],
450 (uint16_t) (DPDK_TX_RING_SIZE -
451 tx_tail));
Shesha Sreenivasamurthy94550842016-03-02 10:33:26 -0800452
Damjan Marion25665672016-08-16 18:56:08 +0200453 if (PREDICT_TRUE (rv > 0))
454 {
455 dpdk_vu_vring *vring =
456 &(xd->vu_intf->vrings[offset + VIRTIO_TXQ]);
457 vring->packets += rv;
458 vring->bytes += bytes;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700459
Damjan Marion25665672016-08-16 18:56:08 +0200460 if (dpdk_vhost_user_want_interrupt
461 (xd, offset + VIRTIO_RXQ))
462 {
463 vring = &(xd->vu_intf->vrings[offset + VIRTIO_RXQ]);
464 vring->n_since_last_int += rv;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700465
Damjan Marion25665672016-08-16 18:56:08 +0200466 f64 now = vlib_time_now (vm);
467 if (vring->int_deadline < now ||
468 vring->n_since_last_int >
469 dm->conf->vhost_coalesce_frames)
470 dpdk_vhost_user_send_interrupt (vm, xd,
471 offset + VIRTIO_RXQ);
472 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700473
Damjan Marion25665672016-08-16 18:56:08 +0200474 int c = rv;
475 while (c--)
476 rte_pktmbuf_free (tx_vector[tx_tail + c]);
477 }
Shesha Sreenivasamurthy9ad5adc2016-02-19 13:36:53 -0800478
Damjan Marion25665672016-08-16 18:56:08 +0200479 n_retry = (rv == DPDK_TX_RING_SIZE - tx_tail) ? 1 : 0;
480 }
481
482 if (xd->need_txlock)
483 *xd->lockp[queue_id] = 0;
484 }
Damjan Marion1f0da172016-07-13 22:44:18 +0200485#endif
Damjan Marione90892e2016-02-23 19:20:28 +0100486#if RTE_LIBRTE_KNI
Damjan Marion56431702016-09-19 13:18:09 +0200487 else if (xd->flags & DPDK_DEVICE_FLAG_KNI)
Damjan Marion25665672016-08-16 18:56:08 +0200488 {
489 if (PREDICT_TRUE (tx_head > tx_tail))
490 {
491 /* no wrap, transmit in one burst */
492 rv = rte_kni_tx_burst (xd->kni,
493 &tx_vector[tx_tail],
494 (uint16_t) (tx_head - tx_tail));
495 }
496 else
497 {
498 /*
499 * This can only happen if there is a flowcontrol callback.
500 * We need to split the transmit into two calls: one for
501 * the packets up to the wrap point, and one to continue
502 * at the start of the ring.
503 * Transmit pkts up to the wrap point.
504 */
505 rv = rte_kni_tx_burst (xd->kni,
506 &tx_vector[tx_tail],
507 (uint16_t) (DPDK_TX_RING_SIZE -
508 tx_tail));
Ed Warnickecb9cada2015-12-08 15:45:58 -0700509
Damjan Marion25665672016-08-16 18:56:08 +0200510 /*
511 * If we transmitted everything we wanted, then allow 1 retry
512 * so we can try to transmit the rest. If we didn't transmit
513 * everything, stop now.
514 */
515 n_retry = (rv == DPDK_TX_RING_SIZE - tx_tail) ? 1 : 0;
516 }
517 }
Damjan Marione90892e2016-02-23 19:20:28 +0100518#endif
Ed Warnickecb9cada2015-12-08 15:45:58 -0700519 else
Damjan Marion25665672016-08-16 18:56:08 +0200520 {
521 ASSERT (0);
522 rv = 0;
523 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700524
Damjan Marion56431702016-09-19 13:18:09 +0200525 if (PREDICT_FALSE ((xd->flags & DPDK_DEVICE_FLAG_VHOST_USER) == 0 &&
Damjan Marion25665672016-08-16 18:56:08 +0200526 xd->lockp != 0))
527 *xd->lockp[queue_id] = 0;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700528
Damjan Marion25665672016-08-16 18:56:08 +0200529 if (PREDICT_FALSE (rv < 0))
530 {
531 // emit non-fatal message, bump counter
532 vnet_main_t *vnm = dm->vnet_main;
533 vnet_interface_main_t *im = &vnm->interface_main;
534 u32 node_index;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700535
Damjan Marion25665672016-08-16 18:56:08 +0200536 node_index = vec_elt_at_index (im->hw_interfaces,
537 xd->vlib_hw_if_index)->tx_node_index;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700538
Damjan Marion25665672016-08-16 18:56:08 +0200539 vlib_error_count (vm, node_index, DPDK_TX_FUNC_ERROR_BAD_RETVAL, 1);
540 clib_warning ("rte_eth_tx_burst[%d]: error %d", xd->device_index,
541 rv);
542 return n_packets; // untransmitted packets
543 }
544 ring->tx_tail += (u16) rv;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700545 n_packets -= (uint16_t) rv;
Damjan Marion25665672016-08-16 18:56:08 +0200546 }
547 while (rv && n_packets && (n_retry > 0));
Ed Warnickecb9cada2015-12-08 15:45:58 -0700548
549 return n_packets;
550}
551
552
553/*
554 * This function transmits any packets on the interface's tx_vector and returns
Damjan Marion25665672016-08-16 18:56:08 +0200555 * the number of packets untransmitted on the tx_vector. If the tx_vector is
556 * empty the function simply returns 0.
Ed Warnickecb9cada2015-12-08 15:45:58 -0700557 *
558 * It is intended to be called by a traffic manager which has flowed-off an
559 * interface to see if the interface can be flowed-on again.
560 */
Damjan Marion25665672016-08-16 18:56:08 +0200561u32
562dpdk_interface_tx_vector (vlib_main_t * vm, u32 dev_instance)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700563{
Damjan Marion25665672016-08-16 18:56:08 +0200564 dpdk_main_t *dm = &dpdk_main;
565 dpdk_device_t *xd;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700566 int queue_id;
Damjan Marion25665672016-08-16 18:56:08 +0200567 struct rte_mbuf **tx_vector;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700568 tx_ring_hdr_t *ring;
Damjan Marion25665672016-08-16 18:56:08 +0200569
Ed Warnickecb9cada2015-12-08 15:45:58 -0700570 /* param is dev_instance and not hw_if_index to save another lookup */
571 xd = vec_elt_at_index (dm->devices, dev_instance);
572
573 queue_id = vm->cpu_index;
574 tx_vector = xd->tx_vectors[queue_id];
575
576 /* If no packets on the ring, don't bother calling tx function */
Damjan Marion25665672016-08-16 18:56:08 +0200577 ring = vec_header (tx_vector, sizeof (*ring));
578 if (ring->tx_head == ring->tx_tail)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700579 {
580 return 0;
581 }
582
583 return tx_burst_vector_internal (vm, xd, tx_vector);
584}
585
586/*
587 * Transmits the packets on the frame to the interface associated with the
Damjan Marion25665672016-08-16 18:56:08 +0200588 * node. It first copies packets on the frame to a tx_vector containing the
589 * rte_mbuf pointers. It then passes this vector to tx_burst_vector_internal
Ed Warnickecb9cada2015-12-08 15:45:58 -0700590 * which calls the dpdk tx_burst function.
591 *
592 * The tx_vector is treated slightly differently depending on whether or
593 * not a flowcontrol callback function has been configured. If there is no
594 * callback, the tx_vector is a temporary array of rte_mbuf packet pointers.
Damjan Marion25665672016-08-16 18:56:08 +0200595 * Its entries are written and consumed before the function exits.
Ed Warnickecb9cada2015-12-08 15:45:58 -0700596 *
597 * If there is a callback then the transmit is being invoked in the presence
598 * of a traffic manager. Here the tx_vector is treated like a ring of rte_mbuf
599 * pointers. If not all packets can be transmitted, the untransmitted packets
600 * stay on the tx_vector until the next call. The callback allows the traffic
601 * manager to flow-off dequeues to the interface. The companion function
602 * dpdk_interface_tx_vector() allows the traffic manager to detect when
603 * it should flow-on the interface again.
604 */
605static uword
606dpdk_interface_tx (vlib_main_t * vm,
Damjan Marion25665672016-08-16 18:56:08 +0200607 vlib_node_runtime_t * node, vlib_frame_t * f)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700608{
Damjan Marion25665672016-08-16 18:56:08 +0200609 dpdk_main_t *dm = &dpdk_main;
610 vnet_interface_output_runtime_t *rd = (void *) node->runtime_data;
611 dpdk_device_t *xd = vec_elt_at_index (dm->devices, rd->dev_instance);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700612 u32 n_packets = f->n_vectors;
613 u32 n_left;
Damjan Marion25665672016-08-16 18:56:08 +0200614 u32 *from;
615 struct rte_mbuf **tx_vector;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700616 int i;
617 int queue_id;
618 u32 my_cpu;
619 u32 tx_pkts = 0;
620 tx_ring_hdr_t *ring;
621 u32 n_on_ring;
622
623 my_cpu = vm->cpu_index;
624
625 queue_id = my_cpu;
626
627 tx_vector = xd->tx_vectors[queue_id];
Damjan Marion25665672016-08-16 18:56:08 +0200628 ring = vec_header (tx_vector, sizeof (*ring));
Ed Warnickecb9cada2015-12-08 15:45:58 -0700629
630 n_on_ring = ring->tx_head - ring->tx_tail;
631 from = vlib_frame_vector_args (f);
632
Damjan Marion25665672016-08-16 18:56:08 +0200633 ASSERT (n_packets <= VLIB_FRAME_SIZE);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700634
Damjan Marion25665672016-08-16 18:56:08 +0200635 if (PREDICT_FALSE (n_on_ring + n_packets > DPDK_TX_RING_SIZE))
Ed Warnickecb9cada2015-12-08 15:45:58 -0700636 {
637 /*
Damjan Marion25665672016-08-16 18:56:08 +0200638 * Overflowing the ring should never happen.
Ed Warnickecb9cada2015-12-08 15:45:58 -0700639 * If it does then drop the whole frame.
640 */
641 vlib_error_count (vm, node->node_index, DPDK_TX_FUNC_ERROR_RING_FULL,
Damjan Marion25665672016-08-16 18:56:08 +0200642 n_packets);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700643
Damjan Marion25665672016-08-16 18:56:08 +0200644 while (n_packets--)
645 {
646 u32 bi0 = from[n_packets];
647 vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0);
648 struct rte_mbuf *mb0 = rte_mbuf_from_vlib_buffer (b0);
649 rte_pktmbuf_free (mb0);
650 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700651 return n_on_ring;
652 }
653
Damjan Marion25665672016-08-16 18:56:08 +0200654 if (PREDICT_FALSE (dm->tx_pcap_enable))
Ed Warnickecb9cada2015-12-08 15:45:58 -0700655 {
656 n_left = n_packets;
657 while (n_left > 0)
Damjan Marion25665672016-08-16 18:56:08 +0200658 {
Ed Warnickecb9cada2015-12-08 15:45:58 -0700659 u32 bi0 = from[0];
Damjan Marion25665672016-08-16 18:56:08 +0200660 vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700661 if (dm->pcap_sw_if_index == 0 ||
Damjan Marion25665672016-08-16 18:56:08 +0200662 dm->pcap_sw_if_index == vnet_buffer (b0)->sw_if_index[VLIB_TX])
663 pcap_add_buffer (&dm->pcap_main, vm, bi0, 512);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700664 from++;
665 n_left--;
666 }
667 }
668
669 from = vlib_frame_vector_args (f);
670 n_left = n_packets;
671 i = ring->tx_head % DPDK_TX_RING_SIZE;
672
673 while (n_left >= 4)
674 {
675 u32 bi0, bi1;
676 u32 pi0, pi1;
Damjan Marion25665672016-08-16 18:56:08 +0200677 struct rte_mbuf *mb0, *mb1;
678 struct rte_mbuf *prefmb0, *prefmb1;
679 vlib_buffer_t *b0, *b1;
680 vlib_buffer_t *pref0, *pref1;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700681 i16 delta0, delta1;
682 u16 new_data_len0, new_data_len1;
683 u16 new_pkt_len0, new_pkt_len1;
684 u32 any_clone;
685
686 pi0 = from[2];
687 pi1 = from[3];
688 pref0 = vlib_get_buffer (vm, pi0);
689 pref1 = vlib_get_buffer (vm, pi1);
690
Damjan Marion25665672016-08-16 18:56:08 +0200691 prefmb0 = rte_mbuf_from_vlib_buffer (pref0);
692 prefmb1 = rte_mbuf_from_vlib_buffer (pref1);
Damjan Marion19010202016-03-24 17:17:47 +0100693
Damjan Marion25665672016-08-16 18:56:08 +0200694 CLIB_PREFETCH (prefmb0, CLIB_CACHE_LINE_BYTES, LOAD);
695 CLIB_PREFETCH (pref0, CLIB_CACHE_LINE_BYTES, LOAD);
696 CLIB_PREFETCH (prefmb1, CLIB_CACHE_LINE_BYTES, LOAD);
697 CLIB_PREFETCH (pref1, CLIB_CACHE_LINE_BYTES, LOAD);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700698
699 bi0 = from[0];
700 bi1 = from[1];
701 from += 2;
Damjan Marion25665672016-08-16 18:56:08 +0200702
Ed Warnickecb9cada2015-12-08 15:45:58 -0700703 b0 = vlib_get_buffer (vm, bi0);
704 b1 = vlib_get_buffer (vm, bi1);
705
Damjan Marion25665672016-08-16 18:56:08 +0200706 mb0 = rte_mbuf_from_vlib_buffer (b0);
707 mb1 = rte_mbuf_from_vlib_buffer (b1);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700708
Dave Barachb5adaea2016-06-17 14:09:56 -0400709 any_clone = (b0->flags & VLIB_BUFFER_RECYCLE)
Damjan Marion25665672016-08-16 18:56:08 +0200710 | (b1->flags & VLIB_BUFFER_RECYCLE);
711 if (PREDICT_FALSE (any_clone != 0))
712 {
713 if (PREDICT_FALSE ((b0->flags & VLIB_BUFFER_RECYCLE) != 0))
Keith Burns (alagalah)c02f02d2016-04-19 12:54:12 -0700714 {
Damjan Marion25665672016-08-16 18:56:08 +0200715 struct rte_mbuf *mb0_new = dpdk_replicate_packet_mb (b0);
716 if (PREDICT_FALSE (mb0_new == 0))
Keith Burns (alagalah)c02f02d2016-04-19 12:54:12 -0700717 {
718 vlib_error_count (vm, node->node_index,
719 DPDK_TX_FUNC_ERROR_REPL_FAIL, 1);
720 b0->flags |= VLIB_BUFFER_REPL_FAIL;
721 }
722 else
723 mb0 = mb0_new;
724 vec_add1 (dm->recycle[my_cpu], bi0);
725 }
Damjan Marion25665672016-08-16 18:56:08 +0200726 if (PREDICT_FALSE ((b1->flags & VLIB_BUFFER_RECYCLE) != 0))
Keith Burns (alagalah)c02f02d2016-04-19 12:54:12 -0700727 {
Damjan Marion25665672016-08-16 18:56:08 +0200728 struct rte_mbuf *mb1_new = dpdk_replicate_packet_mb (b1);
729 if (PREDICT_FALSE (mb1_new == 0))
Keith Burns (alagalah)c02f02d2016-04-19 12:54:12 -0700730 {
731 vlib_error_count (vm, node->node_index,
732 DPDK_TX_FUNC_ERROR_REPL_FAIL, 1);
733 b1->flags |= VLIB_BUFFER_REPL_FAIL;
734 }
735 else
736 mb1 = mb1_new;
737 vec_add1 (dm->recycle[my_cpu], bi1);
738 }
739 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700740
Damjan Marion25665672016-08-16 18:56:08 +0200741 delta0 = PREDICT_FALSE (b0->flags & VLIB_BUFFER_REPL_FAIL) ? 0 :
Keith Burns (alagalah)c02f02d2016-04-19 12:54:12 -0700742 vlib_buffer_length_in_chain (vm, b0) - (i16) mb0->pkt_len;
Damjan Marion25665672016-08-16 18:56:08 +0200743 delta1 = PREDICT_FALSE (b1->flags & VLIB_BUFFER_REPL_FAIL) ? 0 :
Keith Burns (alagalah)c02f02d2016-04-19 12:54:12 -0700744 vlib_buffer_length_in_chain (vm, b1) - (i16) mb1->pkt_len;
Damjan Marion25665672016-08-16 18:56:08 +0200745
746 new_data_len0 = (u16) ((i16) mb0->data_len + delta0);
747 new_data_len1 = (u16) ((i16) mb1->data_len + delta1);
748 new_pkt_len0 = (u16) ((i16) mb0->pkt_len + delta0);
749 new_pkt_len1 = (u16) ((i16) mb1->pkt_len + delta1);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700750
751 b0->current_length = new_data_len0;
752 b1->current_length = new_data_len1;
753 mb0->data_len = new_data_len0;
754 mb1->data_len = new_data_len1;
755 mb0->pkt_len = new_pkt_len0;
756 mb1->pkt_len = new_pkt_len1;
757
Damjan Marion25665672016-08-16 18:56:08 +0200758 mb0->data_off = (PREDICT_FALSE (b0->flags & VLIB_BUFFER_REPL_FAIL)) ?
759 mb0->data_off : (u16) (RTE_PKTMBUF_HEADROOM + b0->current_data);
760 mb1->data_off = (PREDICT_FALSE (b1->flags & VLIB_BUFFER_REPL_FAIL)) ?
761 mb1->data_off : (u16) (RTE_PKTMBUF_HEADROOM + b1->current_data);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700762
Damjan Marion25665672016-08-16 18:56:08 +0200763 if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE))
Keith Burns (alagalah)c02f02d2016-04-19 12:54:12 -0700764 {
Damjan Marion25665672016-08-16 18:56:08 +0200765 if (b0->flags & VLIB_BUFFER_IS_TRACED)
Keith Burns (alagalah)c02f02d2016-04-19 12:54:12 -0700766 dpdk_tx_trace_buffer (dm, node, xd, queue_id, bi0, b0);
Damjan Marion25665672016-08-16 18:56:08 +0200767 if (b1->flags & VLIB_BUFFER_IS_TRACED)
Keith Burns (alagalah)c02f02d2016-04-19 12:54:12 -0700768 dpdk_tx_trace_buffer (dm, node, xd, queue_id, bi1, b1);
769 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700770
Damjan Marion25665672016-08-16 18:56:08 +0200771 if (PREDICT_TRUE (any_clone == 0))
772 {
Keith Burns (alagalah)c02f02d2016-04-19 12:54:12 -0700773 tx_vector[i % DPDK_TX_RING_SIZE] = mb0;
Damjan Marion25665672016-08-16 18:56:08 +0200774 i++;
Keith Burns (alagalah)c02f02d2016-04-19 12:54:12 -0700775 tx_vector[i % DPDK_TX_RING_SIZE] = mb1;
Damjan Marion25665672016-08-16 18:56:08 +0200776 i++;
777 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700778 else
Damjan Marion25665672016-08-16 18:56:08 +0200779 {
780 /* cloning was done, need to check for failure */
781 if (PREDICT_TRUE ((b0->flags & VLIB_BUFFER_REPL_FAIL) == 0))
782 {
Keith Burns (alagalah)c02f02d2016-04-19 12:54:12 -0700783 tx_vector[i % DPDK_TX_RING_SIZE] = mb0;
Damjan Marion25665672016-08-16 18:56:08 +0200784 i++;
785 }
786 if (PREDICT_TRUE ((b1->flags & VLIB_BUFFER_REPL_FAIL) == 0))
787 {
Keith Burns (alagalah)c02f02d2016-04-19 12:54:12 -0700788 tx_vector[i % DPDK_TX_RING_SIZE] = mb1;
Damjan Marion25665672016-08-16 18:56:08 +0200789 i++;
790 }
791 }
792
Ed Warnickecb9cada2015-12-08 15:45:58 -0700793 n_left -= 2;
794 }
795 while (n_left > 0)
796 {
797 u32 bi0;
Damjan Marion25665672016-08-16 18:56:08 +0200798 struct rte_mbuf *mb0;
799 vlib_buffer_t *b0;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700800 i16 delta0;
801 u16 new_data_len0;
802 u16 new_pkt_len0;
803
804 bi0 = from[0];
805 from++;
Damjan Marion25665672016-08-16 18:56:08 +0200806
Ed Warnickecb9cada2015-12-08 15:45:58 -0700807 b0 = vlib_get_buffer (vm, bi0);
808
Damjan Marion25665672016-08-16 18:56:08 +0200809 mb0 = rte_mbuf_from_vlib_buffer (b0);
810 if (PREDICT_FALSE ((b0->flags & VLIB_BUFFER_RECYCLE) != 0))
Keith Burns (alagalah)c02f02d2016-04-19 12:54:12 -0700811 {
Damjan Marion25665672016-08-16 18:56:08 +0200812 struct rte_mbuf *mb0_new = dpdk_replicate_packet_mb (b0);
813 if (PREDICT_FALSE (mb0_new == 0))
Keith Burns (alagalah)c02f02d2016-04-19 12:54:12 -0700814 {
815 vlib_error_count (vm, node->node_index,
816 DPDK_TX_FUNC_ERROR_REPL_FAIL, 1);
817 b0->flags |= VLIB_BUFFER_REPL_FAIL;
818 }
819 else
820 mb0 = mb0_new;
821 vec_add1 (dm->recycle[my_cpu], bi0);
822 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700823
Damjan Marion25665672016-08-16 18:56:08 +0200824 delta0 = PREDICT_FALSE (b0->flags & VLIB_BUFFER_REPL_FAIL) ? 0 :
Keith Burns (alagalah)c02f02d2016-04-19 12:54:12 -0700825 vlib_buffer_length_in_chain (vm, b0) - (i16) mb0->pkt_len;
Damjan Marion25665672016-08-16 18:56:08 +0200826
827 new_data_len0 = (u16) ((i16) mb0->data_len + delta0);
828 new_pkt_len0 = (u16) ((i16) mb0->pkt_len + delta0);
829
Ed Warnickecb9cada2015-12-08 15:45:58 -0700830 b0->current_length = new_data_len0;
831 mb0->data_len = new_data_len0;
832 mb0->pkt_len = new_pkt_len0;
Damjan Marion25665672016-08-16 18:56:08 +0200833 mb0->data_off = (PREDICT_FALSE (b0->flags & VLIB_BUFFER_REPL_FAIL)) ?
834 mb0->data_off : (u16) (RTE_PKTMBUF_HEADROOM + b0->current_data);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700835
Damjan Marion25665672016-08-16 18:56:08 +0200836 if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE))
Keith Burns (alagalah)c02f02d2016-04-19 12:54:12 -0700837 if (b0->flags & VLIB_BUFFER_IS_TRACED)
838 dpdk_tx_trace_buffer (dm, node, xd, queue_id, bi0, b0);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700839
Damjan Marion25665672016-08-16 18:56:08 +0200840 if (PREDICT_TRUE ((b0->flags & VLIB_BUFFER_REPL_FAIL) == 0))
841 {
Keith Burns (alagalah)c02f02d2016-04-19 12:54:12 -0700842 tx_vector[i % DPDK_TX_RING_SIZE] = mb0;
Damjan Marion25665672016-08-16 18:56:08 +0200843 i++;
844 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700845 n_left--;
846 }
847
848 /* account for additional packets in the ring */
849 ring->tx_head += n_packets;
850 n_on_ring = ring->tx_head - ring->tx_tail;
851
852 /* transmit as many packets as possible */
853 n_packets = tx_burst_vector_internal (vm, xd, tx_vector);
854
855 /*
856 * tx_pkts is the number of packets successfully transmitted
857 * This is the number originally on ring minus the number remaining on ring
858 */
Damjan Marion25665672016-08-16 18:56:08 +0200859 tx_pkts = n_on_ring - n_packets;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700860
Damjan Marion25665672016-08-16 18:56:08 +0200861 if (PREDICT_FALSE (dm->flowcontrol_callback != 0))
Ed Warnickecb9cada2015-12-08 15:45:58 -0700862 {
Damjan Marion25665672016-08-16 18:56:08 +0200863 if (PREDICT_FALSE (n_packets))
864 {
865 /* Callback may want to enable flowcontrol */
866 dm->flowcontrol_callback (vm, xd->vlib_hw_if_index,
867 ring->tx_head - ring->tx_tail);
868 }
869 else
870 {
871 /* Reset head/tail to avoid unnecessary wrap */
872 ring->tx_head = 0;
873 ring->tx_tail = 0;
874 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700875 }
Damjan Marion25665672016-08-16 18:56:08 +0200876 else
Ed Warnickecb9cada2015-12-08 15:45:58 -0700877 {
878 /* If there is no callback then drop any non-transmitted packets */
Damjan Marion25665672016-08-16 18:56:08 +0200879 if (PREDICT_FALSE (n_packets))
880 {
881 vlib_simple_counter_main_t *cm;
882 vnet_main_t *vnm = vnet_get_main ();
Ed Warnickecb9cada2015-12-08 15:45:58 -0700883
Damjan Marion25665672016-08-16 18:56:08 +0200884 cm = vec_elt_at_index (vnm->interface_main.sw_if_counters,
885 VNET_INTERFACE_COUNTER_TX_ERROR);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700886
Damjan Marion25665672016-08-16 18:56:08 +0200887 vlib_increment_simple_counter (cm, my_cpu, xd->vlib_sw_if_index,
888 n_packets);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700889
Damjan Marion25665672016-08-16 18:56:08 +0200890 vlib_error_count (vm, node->node_index, DPDK_TX_FUNC_ERROR_PKT_DROP,
Keith Burns (alagalah)c02f02d2016-04-19 12:54:12 -0700891 n_packets);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700892
Damjan Marion25665672016-08-16 18:56:08 +0200893 while (n_packets--)
894 rte_pktmbuf_free (tx_vector[ring->tx_tail + n_packets]);
895 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700896
Damjan Marion25665672016-08-16 18:56:08 +0200897 /* Reset head/tail to avoid unnecessary wrap */
Keith Burns (alagalah)c02f02d2016-04-19 12:54:12 -0700898 ring->tx_head = 0;
899 ring->tx_tail = 0;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700900 }
901
902 /* Recycle replicated buffers */
Damjan Marion25665672016-08-16 18:56:08 +0200903 if (PREDICT_FALSE (vec_len (dm->recycle[my_cpu])))
Ed Warnickecb9cada2015-12-08 15:45:58 -0700904 {
Damjan Marion25665672016-08-16 18:56:08 +0200905 vlib_buffer_free (vm, dm->recycle[my_cpu],
906 vec_len (dm->recycle[my_cpu]));
907 _vec_len (dm->recycle[my_cpu]) = 0;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700908 }
909
Damjan Marion25665672016-08-16 18:56:08 +0200910 ASSERT (ring->tx_head >= ring->tx_tail);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700911
912 return tx_pkts;
913}
914
Damjan Marion25665672016-08-16 18:56:08 +0200915static int
916dpdk_device_renumber (vnet_hw_interface_t * hi, u32 new_dev_instance)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700917{
Damjan Marion1f0da172016-07-13 22:44:18 +0200918#if DPDK_VHOST_USER
Damjan Marion25665672016-08-16 18:56:08 +0200919 dpdk_main_t *dm = &dpdk_main;
920 dpdk_device_t *xd = vec_elt_at_index (dm->devices, hi->dev_instance);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700921
Damjan Marion56431702016-09-19 13:18:09 +0200922 if (!xd || (xd->flags & DPDK_DEVICE_FLAG_VHOST_USER) == 0)
Damjan Marion25665672016-08-16 18:56:08 +0200923 {
924 clib_warning
925 ("cannot renumber non-vhost-user interface (sw_if_index: %d)",
926 hi->sw_if_index);
927 return 0;
928 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700929
930 xd->vu_if_id = new_dev_instance;
Damjan Marion1f0da172016-07-13 22:44:18 +0200931#endif
Ed Warnickecb9cada2015-12-08 15:45:58 -0700932 return 0;
933}
934
Damjan Marion25665672016-08-16 18:56:08 +0200935static void
936dpdk_clear_hw_interface_counters (u32 instance)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700937{
Damjan Marion25665672016-08-16 18:56:08 +0200938 dpdk_main_t *dm = &dpdk_main;
939 dpdk_device_t *xd = vec_elt_at_index (dm->devices, instance);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700940
941 /*
Damjan Marionb28e4982016-08-22 22:34:38 +0200942 * Set the "last_cleared_stats" to the current stats, so that
943 * things appear to clear from a display perspective.
Ed Warnickecb9cada2015-12-08 15:45:58 -0700944 */
Damjan Marionb28e4982016-08-22 22:34:38 +0200945 dpdk_update_counters (xd, vlib_time_now (dm->vlib_main));
Sean Hopea4f16a02016-03-28 13:11:31 -0400946
Damjan Marionb28e4982016-08-22 22:34:38 +0200947 clib_memcpy (&xd->last_cleared_stats, &xd->stats, sizeof (xd->stats));
948 clib_memcpy (xd->last_cleared_xstats, xd->xstats,
949 vec_len (xd->last_cleared_xstats) *
950 sizeof (xd->last_cleared_xstats[0]));
Shesha Sreenivasamurthy94550842016-03-02 10:33:26 -0800951
Damjan Marion1f0da172016-07-13 22:44:18 +0200952#if DPDK_VHOST_USER
Damjan Marion56431702016-09-19 13:18:09 +0200953 if (PREDICT_FALSE (xd->flags & DPDK_DEVICE_FLAG_VHOST_USER))
Damjan Marion25665672016-08-16 18:56:08 +0200954 {
955 int i;
956 for (i = 0; i < xd->rx_q_used * VIRTIO_QNUM; i++)
957 {
958 xd->vu_intf->vrings[i].packets = 0;
959 xd->vu_intf->vrings[i].bytes = 0;
960 }
Shesha Sreenivasamurthy94550842016-03-02 10:33:26 -0800961 }
Damjan Marion1f0da172016-07-13 22:44:18 +0200962#endif
Ed Warnickecb9cada2015-12-08 15:45:58 -0700963}
964
Damjan Marione90892e2016-02-23 19:20:28 +0100965#ifdef RTE_LIBRTE_KNI
Ed Warnickecb9cada2015-12-08 15:45:58 -0700966static int
Damjan Marion25665672016-08-16 18:56:08 +0200967kni_config_network_if (u8 port_id, u8 if_up)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700968{
Damjan Marion25665672016-08-16 18:56:08 +0200969 vnet_main_t *vnm = vnet_get_main ();
970 dpdk_main_t *dm = &dpdk_main;
971 dpdk_device_t *xd;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700972 uword *p;
973
974 p = hash_get (dm->dpdk_device_by_kni_port_id, port_id);
Damjan Marion25665672016-08-16 18:56:08 +0200975 if (p == 0)
976 {
977 clib_warning ("unknown interface");
978 return 0;
979 }
980 else
981 {
982 xd = vec_elt_at_index (dm->devices, p[0]);
983 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700984
985 vnet_hw_interface_set_flags (vnm, xd->vlib_hw_if_index,
Damjan Marion25665672016-08-16 18:56:08 +0200986 if_up ? VNET_HW_INTERFACE_FLAG_LINK_UP |
987 ETH_LINK_FULL_DUPLEX : 0);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700988 return 0;
989}
990
991static int
Damjan Marion25665672016-08-16 18:56:08 +0200992kni_change_mtu (u8 port_id, unsigned new_mtu)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700993{
Damjan Marion25665672016-08-16 18:56:08 +0200994 vnet_main_t *vnm = vnet_get_main ();
995 dpdk_main_t *dm = &dpdk_main;
996 dpdk_device_t *xd;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700997 uword *p;
Damjan Marion25665672016-08-16 18:56:08 +0200998 vnet_hw_interface_t *hif;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700999
1000 p = hash_get (dm->dpdk_device_by_kni_port_id, port_id);
Damjan Marion25665672016-08-16 18:56:08 +02001001 if (p == 0)
1002 {
1003 clib_warning ("unknown interface");
1004 return 0;
1005 }
1006 else
1007 {
1008 xd = vec_elt_at_index (dm->devices, p[0]);
1009 }
Ed Warnickecb9cada2015-12-08 15:45:58 -07001010 hif = vnet_get_hw_interface (vnm, xd->vlib_hw_if_index);
1011
1012 hif->max_packet_bytes = new_mtu;
1013
1014 return 0;
1015}
Damjan Marione90892e2016-02-23 19:20:28 +01001016#endif
Ed Warnickecb9cada2015-12-08 15:45:58 -07001017
1018static clib_error_t *
1019dpdk_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags)
1020{
Damjan Marion25665672016-08-16 18:56:08 +02001021 vnet_hw_interface_t *hif = vnet_get_hw_interface (vnm, hw_if_index);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001022 uword is_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
Damjan Marion25665672016-08-16 18:56:08 +02001023 dpdk_main_t *dm = &dpdk_main;
1024 dpdk_device_t *xd = vec_elt_at_index (dm->devices, hif->dev_instance);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001025 int rv = 0;
1026
Damjan Marione90892e2016-02-23 19:20:28 +01001027#ifdef RTE_LIBRTE_KNI
Damjan Marion56431702016-09-19 13:18:09 +02001028 if (xd->flags & DPDK_DEVICE_FLAG_KNI)
Damjan Marion25665672016-08-16 18:56:08 +02001029 {
Ed Warnickecb9cada2015-12-08 15:45:58 -07001030 if (is_up)
Damjan Marion25665672016-08-16 18:56:08 +02001031 {
1032 struct rte_kni_conf conf;
1033 struct rte_kni_ops ops;
1034 vlib_main_t *vm = vlib_get_main ();
1035 vlib_buffer_main_t *bm = vm->buffer_main;
1036 memset (&conf, 0, sizeof (conf));
1037 snprintf (conf.name, RTE_KNI_NAMESIZE, "vpp%u", xd->kni_port_id);
1038 conf.mbuf_size = VLIB_BUFFER_DATA_SIZE;
1039 memset (&ops, 0, sizeof (ops));
1040 ops.port_id = xd->kni_port_id;
1041 ops.change_mtu = kni_change_mtu;
1042 ops.config_network_if = kni_config_network_if;
Ed Warnickecb9cada2015-12-08 15:45:58 -07001043
Damjan Marion25665672016-08-16 18:56:08 +02001044 xd->kni =
1045 rte_kni_alloc (bm->pktmbuf_pools[rte_socket_id ()], &conf, &ops);
1046 if (!xd->kni)
1047 {
1048 clib_warning ("failed to allocate kni interface");
1049 }
1050 else
1051 {
1052 hif->max_packet_bytes = 1500; /* kni interface default value */
Damjan Marionb28e4982016-08-22 22:34:38 +02001053 xd->flags |= DPDK_DEVICE_FLAG_ADMIN_UP;
Damjan Marion25665672016-08-16 18:56:08 +02001054 }
1055 }
Ed Warnickecb9cada2015-12-08 15:45:58 -07001056 else
Damjan Marion25665672016-08-16 18:56:08 +02001057 {
Damjan Marionb28e4982016-08-22 22:34:38 +02001058 xd->flags &= ~DPDK_DEVICE_FLAG_ADMIN_UP;
Damjan Marion25665672016-08-16 18:56:08 +02001059 int kni_rv;
Dave Barachdd522cb2016-08-10 16:56:16 -04001060
Damjan Marion25665672016-08-16 18:56:08 +02001061 kni_rv = rte_kni_release (xd->kni);
1062 if (kni_rv < 0)
1063 clib_warning ("rte_kni_release returned %d", kni_rv);
1064 }
Ed Warnickecb9cada2015-12-08 15:45:58 -07001065 return 0;
Damjan Marion25665672016-08-16 18:56:08 +02001066 }
Damjan Marione90892e2016-02-23 19:20:28 +01001067#endif
Damjan Marion1f0da172016-07-13 22:44:18 +02001068#if DPDK_VHOST_USER
Damjan Marion56431702016-09-19 13:18:09 +02001069 if (xd->flags & DPDK_DEVICE_FLAG_VHOST_USER)
Ed Warnickecb9cada2015-12-08 15:45:58 -07001070 {
1071 if (is_up)
Damjan Marion25665672016-08-16 18:56:08 +02001072 {
1073 if (xd->vu_is_running)
1074 vnet_hw_interface_set_flags (vnm, xd->vlib_hw_if_index,
1075 VNET_HW_INTERFACE_FLAG_LINK_UP |
1076 ETH_LINK_FULL_DUPLEX);
Damjan Marionb28e4982016-08-22 22:34:38 +02001077 xd->flags |= DPDK_DEVICE_FLAG_ADMIN_UP;
Damjan Marion25665672016-08-16 18:56:08 +02001078 }
Ed Warnickecb9cada2015-12-08 15:45:58 -07001079 else
Damjan Marion25665672016-08-16 18:56:08 +02001080 {
1081 vnet_hw_interface_set_flags (vnm, xd->vlib_hw_if_index, 0);
Damjan Marionb28e4982016-08-22 22:34:38 +02001082 xd->flags &= ~DPDK_DEVICE_FLAG_ADMIN_UP;
Damjan Marion25665672016-08-16 18:56:08 +02001083 }
Ed Warnickecb9cada2015-12-08 15:45:58 -07001084
1085 return 0;
1086 }
Damjan Marion1f0da172016-07-13 22:44:18 +02001087#endif
Ed Warnickecb9cada2015-12-08 15:45:58 -07001088
1089
1090 if (is_up)
1091 {
1092 f64 now = vlib_time_now (dm->vlib_main);
1093
Damjan Marionb28e4982016-08-22 22:34:38 +02001094 if ((xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) == 0)
Ed Warnickecb9cada2015-12-08 15:45:58 -07001095 rv = rte_eth_dev_start (xd->device_index);
1096
Damjan Marionb28e4982016-08-22 22:34:38 +02001097 if (xd->flags & DPDK_DEVICE_FLAG_PROMISC)
Damjan Marion25665672016-08-16 18:56:08 +02001098 rte_eth_promiscuous_enable (xd->device_index);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001099 else
Damjan Marion25665672016-08-16 18:56:08 +02001100 rte_eth_promiscuous_disable (xd->device_index);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001101
1102 rte_eth_allmulticast_enable (xd->device_index);
Damjan Marionb28e4982016-08-22 22:34:38 +02001103 xd->flags |= DPDK_DEVICE_FLAG_ADMIN_UP;
Ed Warnickecb9cada2015-12-08 15:45:58 -07001104 dpdk_update_counters (xd, now);
1105 dpdk_update_link_state (xd, now);
1106 }
1107 else
1108 {
Damjan Marionb28e4982016-08-22 22:34:38 +02001109 xd->flags &= ~DPDK_DEVICE_FLAG_ADMIN_UP;
Georgi Savov3a035982016-02-25 12:56:03 -05001110
Ed Warnickecb9cada2015-12-08 15:45:58 -07001111 rte_eth_allmulticast_disable (xd->device_index);
1112 vnet_hw_interface_set_flags (vnm, xd->vlib_hw_if_index, 0);
Damjan Marionb28e4982016-08-22 22:34:38 +02001113 rte_eth_dev_stop (xd->device_index);
John Lo3b2944d2016-07-18 18:06:14 -04001114
1115 /* For bonded interface, stop slave links */
Damjan Marion25665672016-08-16 18:56:08 +02001116 if (xd->pmd == VNET_DPDK_PMD_BOND)
1117 {
1118 u8 slink[16];
1119 int nlink = rte_eth_bond_slaves_get (xd->device_index, slink, 16);
1120 while (nlink >= 1)
John Lo3b2944d2016-07-18 18:06:14 -04001121 {
1122 u8 dpdk_port = slink[--nlink];
1123 rte_eth_dev_stop (dpdk_port);
1124 }
Damjan Marion25665672016-08-16 18:56:08 +02001125 }
Ed Warnickecb9cada2015-12-08 15:45:58 -07001126 }
1127
1128 if (rv < 0)
Damjan Marion25665672016-08-16 18:56:08 +02001129 clib_warning ("rte_eth_dev_%s error: %d", is_up ? "start" : "stop", rv);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001130
1131 return /* no error */ 0;
1132}
1133
1134/*
1135 * Dynamically redirect all pkts from a specific interface
1136 * to the specified node
1137 */
Damjan Marion25665672016-08-16 18:56:08 +02001138static void
1139dpdk_set_interface_next_node (vnet_main_t * vnm, u32 hw_if_index,
1140 u32 node_index)
Ed Warnickecb9cada2015-12-08 15:45:58 -07001141{
Damjan Marion25665672016-08-16 18:56:08 +02001142 dpdk_main_t *xm = &dpdk_main;
Ed Warnickecb9cada2015-12-08 15:45:58 -07001143 vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index);
Damjan Marion25665672016-08-16 18:56:08 +02001144 dpdk_device_t *xd = vec_elt_at_index (xm->devices, hw->dev_instance);
1145
Ed Warnickecb9cada2015-12-08 15:45:58 -07001146 /* Shut off redirection */
1147 if (node_index == ~0)
1148 {
1149 xd->per_interface_next_index = node_index;
1150 return;
1151 }
Damjan Marion25665672016-08-16 18:56:08 +02001152
1153 xd->per_interface_next_index =
Ed Warnickecb9cada2015-12-08 15:45:58 -07001154 vlib_node_add_next (xm->vlib_main, dpdk_input_node.index, node_index);
1155}
1156
1157
1158static clib_error_t *
1159dpdk_subif_add_del_function (vnet_main_t * vnm,
Damjan Marion25665672016-08-16 18:56:08 +02001160 u32 hw_if_index,
1161 struct vnet_sw_interface_t *st, int is_add)
Ed Warnickecb9cada2015-12-08 15:45:58 -07001162{
Damjan Marion25665672016-08-16 18:56:08 +02001163 dpdk_main_t *xm = &dpdk_main;
Ed Warnickecb9cada2015-12-08 15:45:58 -07001164 vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index);
Damjan Marion25665672016-08-16 18:56:08 +02001165 dpdk_device_t *xd = vec_elt_at_index (xm->devices, hw->dev_instance);
1166 vnet_sw_interface_t *t = (vnet_sw_interface_t *) st;
Ed Warnickecb9cada2015-12-08 15:45:58 -07001167 int r, vlan_offload;
Dave Barach3f3b0852016-06-09 08:39:47 -04001168 u32 prev_subifs = xd->vlan_subifs;
Ed Warnickecb9cada2015-12-08 15:45:58 -07001169
Damjan Marion25665672016-08-16 18:56:08 +02001170 if (is_add)
1171 xd->vlan_subifs++;
1172 else if (xd->vlan_subifs)
1173 xd->vlan_subifs--;
Ed Warnickecb9cada2015-12-08 15:45:58 -07001174
Damjan Marion56431702016-09-19 13:18:09 +02001175 if ((xd->flags & DPDK_DEVICE_FLAG_PMD) == 0)
Damjan Marion25665672016-08-16 18:56:08 +02001176 return 0;
Todd Foggoa (tfoggoa)ad8b4722016-03-28 13:52:32 -04001177
1178 /* currently we program VLANS only for IXGBE VF and I40E VF */
Damjan Marion25665672016-08-16 18:56:08 +02001179 if ((xd->pmd != VNET_DPDK_PMD_IXGBEVF) && (xd->pmd != VNET_DPDK_PMD_I40EVF))
1180 return 0;
Ed Warnickecb9cada2015-12-08 15:45:58 -07001181
1182 if (t->sub.eth.flags.no_tags == 1)
Damjan Marion25665672016-08-16 18:56:08 +02001183 return 0;
Ed Warnickecb9cada2015-12-08 15:45:58 -07001184
Damjan Marion25665672016-08-16 18:56:08 +02001185 if ((t->sub.eth.flags.one_tag != 1) || (t->sub.eth.flags.exact_match != 1))
1186 {
1187 xd->vlan_subifs = prev_subifs;
1188 return clib_error_return (0, "unsupported VLAN setup");
1189 }
Ed Warnickecb9cada2015-12-08 15:45:58 -07001190
Damjan Marion25665672016-08-16 18:56:08 +02001191 vlan_offload = rte_eth_dev_get_vlan_offload (xd->device_index);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001192 vlan_offload |= ETH_VLAN_FILTER_OFFLOAD;
1193
Damjan Marion25665672016-08-16 18:56:08 +02001194 if ((r = rte_eth_dev_set_vlan_offload (xd->device_index, vlan_offload)))
1195 {
1196 xd->vlan_subifs = prev_subifs;
1197 return clib_error_return (0, "rte_eth_dev_set_vlan_offload[%d]: err %d",
1198 xd->device_index, r);
1199 }
Ed Warnickecb9cada2015-12-08 15:45:58 -07001200
1201
Damjan Marion25665672016-08-16 18:56:08 +02001202 if ((r =
1203 rte_eth_dev_vlan_filter (xd->device_index, t->sub.eth.outer_vlan_id,
1204 is_add)))
1205 {
1206 xd->vlan_subifs = prev_subifs;
1207 return clib_error_return (0, "rte_eth_dev_vlan_filter[%d]: err %d",
1208 xd->device_index, r);
1209 }
Ed Warnickecb9cada2015-12-08 15:45:58 -07001210
1211 return 0;
1212}
1213
Damjan Marion25665672016-08-16 18:56:08 +02001214/* *INDENT-OFF* */
Ed Warnickecb9cada2015-12-08 15:45:58 -07001215VNET_DEVICE_CLASS (dpdk_device_class) = {
1216 .name = "dpdk",
1217 .tx_function = dpdk_interface_tx,
1218 .tx_function_n_errors = DPDK_TX_FUNC_N_ERROR,
1219 .tx_function_error_strings = dpdk_tx_func_error_strings,
1220 .format_device_name = format_dpdk_device_name,
1221 .format_device = format_dpdk_device,
1222 .format_tx_trace = format_dpdk_tx_dma_trace,
1223 .clear_counters = dpdk_clear_hw_interface_counters,
1224 .admin_up_down_function = dpdk_interface_admin_up_down,
1225 .subif_add_del_function = dpdk_subif_add_del_function,
1226 .rx_redirect_to_node = dpdk_set_interface_next_node,
1227 .no_flatten_output_chains = 1,
1228 .name_renumber = dpdk_device_renumber,
1229};
1230
Damjan Marion25665672016-08-16 18:56:08 +02001231VLIB_DEVICE_TX_FUNCTION_MULTIARCH (dpdk_device_class, dpdk_interface_tx)
1232/* *INDENT-ON* */
Damjan Marion1c80e832016-05-11 23:07:18 +02001233
Damjan Marion25665672016-08-16 18:56:08 +02001234void
1235dpdk_set_flowcontrol_callback (vlib_main_t * vm,
1236 dpdk_flowcontrol_callback_t callback)
Ed Warnickecb9cada2015-12-08 15:45:58 -07001237{
1238 dpdk_main.flowcontrol_callback = callback;
1239}
1240
1241#define UP_DOWN_FLAG_EVENT 1
1242
1243
Damjan Marion25665672016-08-16 18:56:08 +02001244u32
1245dpdk_get_admin_up_down_in_progress (void)
Ed Warnickecb9cada2015-12-08 15:45:58 -07001246{
1247 return dpdk_main.admin_up_down_in_progress;
1248}
1249
Todd Foggoae3eefff2016-05-20 22:10:34 -04001250uword
Ed Warnickecb9cada2015-12-08 15:45:58 -07001251admin_up_down_process (vlib_main_t * vm,
Damjan Marion25665672016-08-16 18:56:08 +02001252 vlib_node_runtime_t * rt, vlib_frame_t * f)
Ed Warnickecb9cada2015-12-08 15:45:58 -07001253{
Damjan Marion25665672016-08-16 18:56:08 +02001254 clib_error_t *error = 0;
Ed Warnickecb9cada2015-12-08 15:45:58 -07001255 uword event_type;
1256 uword *event_data = 0;
Ed Warnickecb9cada2015-12-08 15:45:58 -07001257 u32 sw_if_index;
1258 u32 flags;
1259
Damjan Marion25665672016-08-16 18:56:08 +02001260 while (1)
1261 {
Ed Warnickecb9cada2015-12-08 15:45:58 -07001262 vlib_process_wait_for_event (vm);
1263
1264 event_type = vlib_process_get_events (vm, &event_data);
1265
1266 dpdk_main.admin_up_down_in_progress = 1;
1267
Damjan Marion25665672016-08-16 18:56:08 +02001268 switch (event_type)
1269 {
1270 case UP_DOWN_FLAG_EVENT:
1271 {
1272 if (vec_len (event_data) == 2)
1273 {
1274 sw_if_index = event_data[0];
1275 flags = event_data[1];
1276 error =
1277 vnet_sw_interface_set_flags (vnet_get_main (), sw_if_index,
1278 flags);
1279 clib_error_report (error);
1280 }
1281 }
1282 break;
1283 }
Ed Warnickecb9cada2015-12-08 15:45:58 -07001284
1285 vec_reset_length (event_data);
1286
1287 dpdk_main.admin_up_down_in_progress = 0;
1288
1289 }
Damjan Marion25665672016-08-16 18:56:08 +02001290 return 0; /* or not */
Ed Warnickecb9cada2015-12-08 15:45:58 -07001291}
1292
Damjan Marion25665672016-08-16 18:56:08 +02001293/* *INDENT-OFF* */
Ed Warnickecb9cada2015-12-08 15:45:58 -07001294VLIB_REGISTER_NODE (admin_up_down_process_node,static) = {
1295 .function = admin_up_down_process,
1296 .type = VLIB_NODE_TYPE_PROCESS,
1297 .name = "admin-up-down-process",
1298 .process_log2_n_stack_bytes = 17, // 256KB
1299};
Damjan Marion25665672016-08-16 18:56:08 +02001300/* *INDENT-ON* */
Ed Warnickecb9cada2015-12-08 15:45:58 -07001301
1302/*
Damjan Marion25665672016-08-16 18:56:08 +02001303 * Asynchronously invoke vnet_sw_interface_set_flags via the admin_up_down
1304 * process. Useful for avoiding long blocking delays (>150ms) in the dpdk
Ed Warnickecb9cada2015-12-08 15:45:58 -07001305 * drivers.
1306 * WARNING: when posting this event, no other interface-related calls should
1307 * be made (e.g. vnet_create_sw_interface()) while the event is being
Damjan Marion25665672016-08-16 18:56:08 +02001308 * processed (admin_up_down_in_progress). This is required in order to avoid
Ed Warnickecb9cada2015-12-08 15:45:58 -07001309 * race conditions in manipulating interface data structures.
1310 */
Damjan Marion25665672016-08-16 18:56:08 +02001311void
1312post_sw_interface_set_flags (vlib_main_t * vm, u32 sw_if_index, u32 flags)
Ed Warnickecb9cada2015-12-08 15:45:58 -07001313{
Damjan Marion25665672016-08-16 18:56:08 +02001314 uword *d = vlib_process_signal_event_data
1315 (vm, admin_up_down_process_node.index,
1316 UP_DOWN_FLAG_EVENT, 2, sizeof (u32));
Christophe Fontaine95c84152016-05-11 08:36:24 +00001317 d[0] = sw_if_index;
1318 d[1] = flags;
Ed Warnickecb9cada2015-12-08 15:45:58 -07001319}
1320
1321/*
Sean Hopea4f16a02016-03-28 13:11:31 -04001322 * Return a copy of the DPDK port stats in dest.
1323 */
Damjan Marion25665672016-08-16 18:56:08 +02001324clib_error_t *
1325dpdk_get_hw_interface_stats (u32 hw_if_index, struct rte_eth_stats *dest)
Sean Hopea4f16a02016-03-28 13:11:31 -04001326{
Damjan Marion25665672016-08-16 18:56:08 +02001327 dpdk_main_t *dm = &dpdk_main;
1328 vnet_main_t *vnm = vnet_get_main ();
1329 vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, hw_if_index);
1330 dpdk_device_t *xd = vec_elt_at_index (dm->devices, hi->dev_instance);
Sean Hopea4f16a02016-03-28 13:11:31 -04001331
Damjan Marion25665672016-08-16 18:56:08 +02001332 if (!dest)
1333 {
1334 return clib_error_return (0, "Missing or NULL argument");
1335 }
1336 if (!xd)
1337 {
1338 return clib_error_return (0,
1339 "Unable to get DPDK device from HW interface");
1340 }
Sean Hopea4f16a02016-03-28 13:11:31 -04001341
1342 dpdk_update_counters (xd, vlib_time_now (dm->vlib_main));
1343
Damjan Marion25665672016-08-16 18:56:08 +02001344 clib_memcpy (dest, &xd->stats, sizeof (xd->stats));
Sean Hopea4f16a02016-03-28 13:11:31 -04001345 return (0);
1346}
Todd Foggoa (tfoggoa)a30d40d2016-03-17 16:54:30 -04001347
1348/*
1349 * Return the number of dpdk mbufs
1350 */
Damjan Marion25665672016-08-16 18:56:08 +02001351u32
1352dpdk_num_mbufs (void)
Todd Foggoa (tfoggoa)a30d40d2016-03-17 16:54:30 -04001353{
Damjan Marion25665672016-08-16 18:56:08 +02001354 dpdk_main_t *dm = &dpdk_main;
Todd Foggoa (tfoggoa)a30d40d2016-03-17 16:54:30 -04001355
Damjan Marion64ae6692016-05-25 18:40:13 +02001356 return dm->conf->num_mbufs;
Todd Foggoa (tfoggoa)a30d40d2016-03-17 16:54:30 -04001357}
1358
1359/*
Todd Foggoa (tfoggoa)a30d40d2016-03-17 16:54:30 -04001360 * Return the pmd type for a given hardware interface
1361 */
Damjan Marion25665672016-08-16 18:56:08 +02001362dpdk_pmd_t
1363dpdk_get_pmd_type (vnet_hw_interface_t * hi)
Todd Foggoa (tfoggoa)a30d40d2016-03-17 16:54:30 -04001364{
Damjan Marion25665672016-08-16 18:56:08 +02001365 dpdk_main_t *dm = &dpdk_main;
1366 dpdk_device_t *xd;
Todd Foggoa (tfoggoa)a30d40d2016-03-17 16:54:30 -04001367
1368 assert (hi);
1369
1370 xd = vec_elt_at_index (dm->devices, hi->dev_instance);
1371
1372 assert (xd);
1373
1374 return xd->pmd;
1375}
1376
1377/*
1378 * Return the cpu socket for a given hardware interface
1379 */
Damjan Marion25665672016-08-16 18:56:08 +02001380i8
1381dpdk_get_cpu_socket (vnet_hw_interface_t * hi)
Todd Foggoa (tfoggoa)a30d40d2016-03-17 16:54:30 -04001382{
Damjan Marion25665672016-08-16 18:56:08 +02001383 dpdk_main_t *dm = &dpdk_main;
1384 dpdk_device_t *xd;
Todd Foggoa (tfoggoa)a30d40d2016-03-17 16:54:30 -04001385
1386 assert (hi);
1387
Damjan Marion25665672016-08-16 18:56:08 +02001388 xd = vec_elt_at_index (dm->devices, hi->dev_instance);
Todd Foggoa (tfoggoa)a30d40d2016-03-17 16:54:30 -04001389
1390 assert (xd);
1391
1392 return xd->cpu_socket;
1393}
Damjan Marion25665672016-08-16 18:56:08 +02001394
1395/*
1396 * fd.io coding-style-patch-verification: ON
1397 *
1398 * Local Variables:
1399 * eval: (c-set-style "gnu")
1400 * End:
1401 */