blob: 69fc11c927e6d50515c17f95b2998188b1104eb0 [file] [log] [blame]
Damjan Marion83243a02016-02-29 13:09:30 +01001/*
2 *------------------------------------------------------------------
3 * af_packet.c - linux kernel packet interface
4 *
5 * Copyright (c) 2016 Cisco and/or its affiliates.
6 * Licensed under the Apache License, Version 2.0 (the "License");
7 * you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at:
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 *------------------------------------------------------------------
18 */
19
20#include <linux/if_packet.h>
21
22#include <vlib/vlib.h>
23#include <vlib/unix/unix.h>
24#include <vnet/ip/ip.h>
25#include <vnet/ethernet/ethernet.h>
Damjan Marion8bdc63b2016-11-02 14:48:21 +010026#include <vnet/devices/devices.h>
Damjan Marion22311502016-10-28 20:30:15 +020027#include <vnet/feature/feature.h>
Damjan Marion83243a02016-02-29 13:09:30 +010028
29#include <vnet/devices/af_packet/af_packet.h>
30
31#define foreach_af_packet_input_error
32
Damjan Marion00a9dca2016-08-17 17:05:46 +020033typedef enum
34{
Damjan Marion83243a02016-02-29 13:09:30 +010035#define _(f,s) AF_PACKET_INPUT_ERROR_##f,
36 foreach_af_packet_input_error
37#undef _
Damjan Marion00a9dca2016-08-17 17:05:46 +020038 AF_PACKET_INPUT_N_ERROR,
Damjan Marion83243a02016-02-29 13:09:30 +010039} af_packet_input_error_t;
40
Damjan Marion00a9dca2016-08-17 17:05:46 +020041static char *af_packet_input_error_strings[] = {
Damjan Marion83243a02016-02-29 13:09:30 +010042#define _(n,s) s,
Damjan Marion00a9dca2016-08-17 17:05:46 +020043 foreach_af_packet_input_error
Damjan Marion83243a02016-02-29 13:09:30 +010044#undef _
45};
46
Damjan Marion00a9dca2016-08-17 17:05:46 +020047typedef struct
48{
Damjan Marion83243a02016-02-29 13:09:30 +010049 u32 next_index;
50 u32 hw_if_index;
51 int block;
52 struct tpacket2_hdr tph;
53} af_packet_input_trace_t;
54
Damjan Marion00a9dca2016-08-17 17:05:46 +020055static u8 *
56format_af_packet_input_trace (u8 * s, va_list * args)
Damjan Marion83243a02016-02-29 13:09:30 +010057{
58 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
59 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
Damjan Marion00a9dca2016-08-17 17:05:46 +020060 af_packet_input_trace_t *t = va_arg (*args, af_packet_input_trace_t *);
Damjan Marion83243a02016-02-29 13:09:30 +010061 uword indent = format_get_indent (s);
62
63 s = format (s, "af_packet: hw_if_index %d next-index %d",
64 t->hw_if_index, t->next_index);
65
Damjan Marion00a9dca2016-08-17 17:05:46 +020066 s =
67 format (s,
68 "\n%Utpacket2_hdr:\n%Ustatus 0x%x len %u snaplen %u mac %u net %u"
69 "\n%Usec 0x%x nsec 0x%x vlan %U"
Damjan Marion83243a02016-02-29 13:09:30 +010070#ifdef TP_STATUS_VLAN_TPID_VALID
Damjan Marion00a9dca2016-08-17 17:05:46 +020071 " vlan_tpid %u"
Damjan Marion83243a02016-02-29 13:09:30 +010072#endif
Damjan Marion00a9dca2016-08-17 17:05:46 +020073 ,
74 format_white_space, indent + 2,
75 format_white_space, indent + 4,
76 t->tph.tp_status,
77 t->tph.tp_len,
78 t->tph.tp_snaplen,
79 t->tph.tp_mac,
80 t->tph.tp_net,
81 format_white_space, indent + 4,
82 t->tph.tp_sec,
83 t->tph.tp_nsec, format_ethernet_vlan_tci, t->tph.tp_vlan_tci
Damjan Marion83243a02016-02-29 13:09:30 +010084#ifdef TP_STATUS_VLAN_TPID_VALID
Damjan Marion00a9dca2016-08-17 17:05:46 +020085 , t->tph.tp_vlan_tpid
Damjan Marion83243a02016-02-29 13:09:30 +010086#endif
Damjan Marion00a9dca2016-08-17 17:05:46 +020087 );
Damjan Marion83243a02016-02-29 13:09:30 +010088 return s;
89}
90
91always_inline void
Damjan Marion00a9dca2016-08-17 17:05:46 +020092buffer_add_to_chain (vlib_main_t * vm, u32 bi, u32 first_bi, u32 prev_bi)
Damjan Marion83243a02016-02-29 13:09:30 +010093{
Damjan Marion00a9dca2016-08-17 17:05:46 +020094 vlib_buffer_t *b = vlib_get_buffer (vm, bi);
95 vlib_buffer_t *first_b = vlib_get_buffer (vm, first_bi);
96 vlib_buffer_t *prev_b = vlib_get_buffer (vm, prev_bi);
Damjan Marion83243a02016-02-29 13:09:30 +010097
98 /* update first buffer */
Damjan Marion00a9dca2016-08-17 17:05:46 +020099 first_b->total_length_not_including_first_buffer += b->current_length;
Damjan Marion83243a02016-02-29 13:09:30 +0100100
101 /* update previous buffer */
102 prev_b->next_buffer = bi;
103 prev_b->flags |= VLIB_BUFFER_NEXT_PRESENT;
104
105 /* update current buffer */
106 b->next_buffer = 0;
Damjan Marion83243a02016-02-29 13:09:30 +0100107}
108
109always_inline uword
Damjan Marion00a9dca2016-08-17 17:05:46 +0200110af_packet_device_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
Mohsin KAZMIcf751ec2017-01-18 11:59:45 +0100111 vlib_frame_t * frame, af_packet_if_t * apif)
Damjan Marion83243a02016-02-29 13:09:30 +0100112{
Damjan Marion00a9dca2016-08-17 17:05:46 +0200113 af_packet_main_t *apm = &af_packet_main;
Damjan Marion83243a02016-02-29 13:09:30 +0100114 struct tpacket2_hdr *tph;
Damjan Marion8bdc63b2016-11-02 14:48:21 +0100115 u32 next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
Damjan Marion83243a02016-02-29 13:09:30 +0100116 u32 block = 0;
117 u32 rx_frame;
118 u32 n_free_bufs;
119 u32 n_rx_packets = 0;
120 u32 n_rx_bytes = 0;
Damjan Marion00a9dca2016-08-17 17:05:46 +0200121 u32 *to_next = 0;
Damjan Marion83243a02016-02-29 13:09:30 +0100122 u32 block_size = apif->rx_req->tp_block_size;
123 u32 frame_size = apif->rx_req->tp_frame_size;
124 u32 frame_num = apif->rx_req->tp_frame_nr;
Damjan Marion00a9dca2016-08-17 17:05:46 +0200125 u8 *block_start = apif->rx_ring + block * block_size;
Damjan Marion83243a02016-02-29 13:09:30 +0100126 uword n_trace = vlib_get_trace_count (vm, node);
Mohsin KAZMIcf751ec2017-01-18 11:59:45 +0100127 u32 cpu_index = os_get_cpu_number ();
Damjan Marion83243a02016-02-29 13:09:30 +0100128 u32 n_buffer_bytes = vlib_buffer_free_list_buffer_size (vm,
Damjan Marion00a9dca2016-08-17 17:05:46 +0200129 VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX);
Damjan Marion83243a02016-02-29 13:09:30 +0100130 u32 min_bufs = apif->rx_req->tp_frame_size / n_buffer_bytes;
131
Dave Barach13f3c452016-03-29 11:56:41 -0400132 if (apif->per_interface_next_index != ~0)
Damjan Marion00a9dca2016-08-17 17:05:46 +0200133 next_index = apif->per_interface_next_index;
Dave Barach13f3c452016-03-29 11:56:41 -0400134
Damjan Marion553f6bd2016-09-07 11:54:22 +0200135 n_free_bufs = vec_len (apm->rx_buffers[cpu_index]);
Damjan Marion00a9dca2016-08-17 17:05:46 +0200136 if (PREDICT_FALSE (n_free_bufs < VLIB_FRAME_SIZE))
Damjan Marion83243a02016-02-29 13:09:30 +0100137 {
Damjan Marion553f6bd2016-09-07 11:54:22 +0200138 vec_validate (apm->rx_buffers[cpu_index],
139 VLIB_FRAME_SIZE + n_free_bufs - 1);
Damjan Marion00a9dca2016-08-17 17:05:46 +0200140 n_free_bufs +=
Damjan Marion553f6bd2016-09-07 11:54:22 +0200141 vlib_buffer_alloc (vm, &apm->rx_buffers[cpu_index][n_free_bufs],
Damjan Marion00a9dca2016-08-17 17:05:46 +0200142 VLIB_FRAME_SIZE);
Damjan Marion553f6bd2016-09-07 11:54:22 +0200143 _vec_len (apm->rx_buffers[cpu_index]) = n_free_bufs;
Damjan Marion83243a02016-02-29 13:09:30 +0100144 }
145
146 rx_frame = apif->next_rx_frame;
147 tph = (struct tpacket2_hdr *) (block_start + rx_frame * frame_size);
148 while ((tph->tp_status & TP_STATUS_USER) && (n_free_bufs > min_bufs))
149 {
Damjan Marion22311502016-10-28 20:30:15 +0200150 vlib_buffer_t *b0 = 0, *first_b0 = 0;
Dave Barach13f3c452016-03-29 11:56:41 -0400151 u32 next0 = next_index;
Damjan Marion83243a02016-02-29 13:09:30 +0100152
153 u32 n_left_to_next;
154 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
155 while ((tph->tp_status & TP_STATUS_USER) && (n_free_bufs > min_bufs) &&
156 n_left_to_next)
157 {
158 u32 data_len = tph->tp_snaplen;
159 u32 offset = 0;
Benoît Ganned5304452016-04-08 22:25:05 -0700160 u32 bi0 = 0, first_bi0 = 0, prev_bi0;
Damjan Marion83243a02016-02-29 13:09:30 +0100161
162 while (data_len)
163 {
164 /* grab free buffer */
Damjan Marion553f6bd2016-09-07 11:54:22 +0200165 u32 last_empty_buffer =
166 vec_len (apm->rx_buffers[cpu_index]) - 1;
Damjan Marion83243a02016-02-29 13:09:30 +0100167 prev_bi0 = bi0;
Damjan Marion553f6bd2016-09-07 11:54:22 +0200168 bi0 = apm->rx_buffers[cpu_index][last_empty_buffer];
Damjan Marion83243a02016-02-29 13:09:30 +0100169 b0 = vlib_get_buffer (vm, bi0);
Damjan Marion553f6bd2016-09-07 11:54:22 +0200170 _vec_len (apm->rx_buffers[cpu_index]) = last_empty_buffer;
Damjan Marion83243a02016-02-29 13:09:30 +0100171 n_free_bufs--;
172
173 /* copy data */
Damjan Marion00a9dca2016-08-17 17:05:46 +0200174 u32 bytes_to_copy =
175 data_len > n_buffer_bytes ? n_buffer_bytes : data_len;
Damjan Marion83243a02016-02-29 13:09:30 +0100176 b0->current_data = 0;
Damjan Marion00a9dca2016-08-17 17:05:46 +0200177 clib_memcpy (vlib_buffer_get_current (b0),
178 (u8 *) tph + tph->tp_mac + offset, bytes_to_copy);
Damjan Marion83243a02016-02-29 13:09:30 +0100179
180 /* fill buffer header */
Damjan Marion83243a02016-02-29 13:09:30 +0100181 b0->current_length = bytes_to_copy;
182
183 if (offset == 0)
184 {
185 b0->total_length_not_including_first_buffer = 0;
186 b0->flags = VLIB_BUFFER_TOTAL_LENGTH_VALID;
Damjan Marion00a9dca2016-08-17 17:05:46 +0200187 vnet_buffer (b0)->sw_if_index[VLIB_RX] = apif->sw_if_index;
188 vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
Damjan Marion83243a02016-02-29 13:09:30 +0100189 first_bi0 = bi0;
Damjan Marion00a9dca2016-08-17 17:05:46 +0200190 first_b0 = vlib_get_buffer (vm, first_bi0);
Damjan Marion83243a02016-02-29 13:09:30 +0100191 }
192 else
Damjan Marion00a9dca2016-08-17 17:05:46 +0200193 buffer_add_to_chain (vm, bi0, first_bi0, prev_bi0);
Damjan Marion83243a02016-02-29 13:09:30 +0100194
195 offset += bytes_to_copy;
196 data_len -= bytes_to_copy;
197 }
198 n_rx_packets++;
199 n_rx_bytes += tph->tp_snaplen;
200 to_next[0] = first_bi0;
201 to_next += 1;
202 n_left_to_next--;
203
204 /* trace */
Damjan Marion00a9dca2016-08-17 17:05:46 +0200205 VLIB_BUFFER_TRACE_TRAJECTORY_INIT (first_b0);
206 if (PREDICT_FALSE (n_trace > 0))
Damjan Marion83243a02016-02-29 13:09:30 +0100207 {
208 af_packet_input_trace_t *tr;
Damjan Marion00a9dca2016-08-17 17:05:46 +0200209 vlib_trace_buffer (vm, node, next0, first_b0, /* follow_chain */
210 0);
Damjan Marion83243a02016-02-29 13:09:30 +0100211 vlib_set_trace_count (vm, node, --n_trace);
212 tr = vlib_add_trace (vm, node, first_b0, sizeof (*tr));
213 tr->next_index = next0;
214 tr->hw_if_index = apif->hw_if_index;
Damjan Marion00a9dca2016-08-17 17:05:46 +0200215 clib_memcpy (&tr->tph, tph, sizeof (struct tpacket2_hdr));
Damjan Marion83243a02016-02-29 13:09:30 +0100216 }
Damjan Marion22311502016-10-28 20:30:15 +0200217
218 /* redirect if feature path enabled */
Damjan Marion87cd1192016-11-04 11:00:27 +0100219 vnet_feature_start_device_input_x1 (apif->sw_if_index, &next0, b0,
220 0);
Damjan Marion22311502016-10-28 20:30:15 +0200221
Damjan Marion83243a02016-02-29 13:09:30 +0100222 /* enque and take next packet */
223 vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
224 n_left_to_next, first_bi0, next0);
225
Damjan Marion00a9dca2016-08-17 17:05:46 +0200226 /* next packet */
Damjan Marion83243a02016-02-29 13:09:30 +0100227 tph->tp_status = TP_STATUS_KERNEL;
228 rx_frame = (rx_frame + 1) % frame_num;
229 tph = (struct tpacket2_hdr *) (block_start + rx_frame * frame_size);
230 }
231
232 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
233 }
234
235 apif->next_rx_frame = rx_frame;
236
237 vlib_increment_combined_counter
Damjan Marion00a9dca2016-08-17 17:05:46 +0200238 (vnet_get_main ()->interface_main.combined_sw_if_counters
Damjan Marion83243a02016-02-29 13:09:30 +0100239 + VNET_INTERFACE_COUNTER_RX,
Damjan Marion00a9dca2016-08-17 17:05:46 +0200240 os_get_cpu_number (), apif->hw_if_index, n_rx_packets, n_rx_bytes);
Damjan Marion83243a02016-02-29 13:09:30 +0100241
Damjan Marionb3bb1012017-02-28 21:55:28 +0100242 vnet_device_increment_rx_packets (cpu_index, n_rx_packets);
Damjan Marion83243a02016-02-29 13:09:30 +0100243 return n_rx_packets;
244}
245
246static uword
247af_packet_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
248 vlib_frame_t * frame)
249{
250 int i;
251 u32 n_rx_packets = 0;
Mohsin KAZMIcf751ec2017-01-18 11:59:45 +0100252 u32 cpu_index = os_get_cpu_number ();
Damjan Marion00a9dca2016-08-17 17:05:46 +0200253 af_packet_main_t *apm = &af_packet_main;
Mohsin KAZMIcf751ec2017-01-18 11:59:45 +0100254 af_packet_if_t *apif;
Damjan Marion83243a02016-02-29 13:09:30 +0100255
Mohsin KAZMIcf751ec2017-01-18 11:59:45 +0100256 for (i = 0; i < vec_len (apm->interfaces); i++)
257 {
258 apif = vec_elt_at_index (apm->interfaces, i);
259 if (apif->is_admin_up &&
260 (i % apm->input_cpu_count) ==
261 (cpu_index - apm->input_cpu_first_index))
262 n_rx_packets += af_packet_device_input_fn (vm, node, frame, apif);
263 }
Damjan Marion83243a02016-02-29 13:09:30 +0100264
265 return n_rx_packets;
266}
267
Damjan Marion00a9dca2016-08-17 17:05:46 +0200268/* *INDENT-OFF* */
Damjan Marion83243a02016-02-29 13:09:30 +0100269VLIB_REGISTER_NODE (af_packet_input_node) = {
270 .function = af_packet_input_fn,
271 .name = "af-packet-input",
Damjan Marion51327ac2016-11-09 11:59:42 +0100272 .sibling_of = "device-input",
Damjan Marion83243a02016-02-29 13:09:30 +0100273 .format_trace = format_af_packet_input_trace,
274 .type = VLIB_NODE_TYPE_INPUT,
Mohsin KAZMIcf751ec2017-01-18 11:59:45 +0100275 /**
276 * default state is INTERRUPT mode, switch to POLLING if worker threads are enabled
277 */
Damjan Marion83243a02016-02-29 13:09:30 +0100278 .state = VLIB_NODE_STATE_INTERRUPT,
279 .n_errors = AF_PACKET_INPUT_N_ERROR,
280 .error_strings = af_packet_input_error_strings,
Damjan Marion83243a02016-02-29 13:09:30 +0100281};
Damjan Marion1c80e832016-05-11 23:07:18 +0200282
283VLIB_NODE_FUNCTION_MULTIARCH (af_packet_input_node, af_packet_input_fn)
Damjan Marion00a9dca2016-08-17 17:05:46 +0200284/* *INDENT-ON* */
Damjan Marion1c80e832016-05-11 23:07:18 +0200285
Damjan Marion00a9dca2016-08-17 17:05:46 +0200286
287/*
288 * fd.io coding-style-patch-verification: ON
289 *
290 * Local Variables:
291 * eval: (c-set-style "gnu")
292 * End:
293 */