blob: 1f3b4e9a8f105d8c4def844e84bd8ff888b26a0c [file] [log] [blame]
Ed Warnickecb9cada2015-12-08 15:45:58 -07001/*
2 * mc_socket.c: socket based multicast for vlib mc
3 *
4 * Copyright (c) 2010 Cisco and/or its affiliates.
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18#include <vlib/vlib.h>
19#include <vlib/unix/mc_socket.h>
20
21#include <sys/ioctl.h> /* for FIONBIO */
Dave Barach9b8ffd92016-07-08 08:13:45 -040022#include <netinet/tcp.h> /* for TCP_NODELAY */
Ed Warnickecb9cada2015-12-08 15:45:58 -070023#include <net/if.h> /* for struct ifreq */
24
Dave Barach9b8ffd92016-07-08 08:13:45 -040025static u8 *
26format_socket_peer_id (u8 * s, va_list * args)
Ed Warnickecb9cada2015-12-08 15:45:58 -070027{
28 u64 peer_id_as_u64 = va_arg (*args, u64);
29 mc_peer_id_t peer_id;
30 peer_id.as_u64 = peer_id_as_u64;
31 u32 a = mc_socket_peer_id_get_address (peer_id);
32 u32 p = mc_socket_peer_id_get_port (peer_id);
33
Dave Barach9b8ffd92016-07-08 08:13:45 -040034 s = format (s, "%U:%04x", format_network_address, AF_INET, &a, ntohs (p));
Ed Warnickecb9cada2015-12-08 15:45:58 -070035
36 return s;
37}
38
Dave Barach9b8ffd92016-07-08 08:13:45 -040039typedef void (mc_msg_handler_t) (mc_main_t * mcm, void *msg,
40 u32 buffer_index);
Ed Warnickecb9cada2015-12-08 15:45:58 -070041
Dave Barach9b8ffd92016-07-08 08:13:45 -040042always_inline void
43msg_handler (mc_main_t * mcm,
44 u32 buffer_index, u32 handler_frees_buffer, void *_h)
Ed Warnickecb9cada2015-12-08 15:45:58 -070045{
Dave Barach9b8ffd92016-07-08 08:13:45 -040046 vlib_main_t *vm = mcm->vlib_main;
47 mc_msg_handler_t *h = _h;
48 vlib_buffer_t *b = vlib_get_buffer (vm, buffer_index);
49 void *the_msg = vlib_buffer_get_current (b);
Ed Warnickecb9cada2015-12-08 15:45:58 -070050
51 h (mcm, the_msg, buffer_index);
Dave Barach9b8ffd92016-07-08 08:13:45 -040052 if (!handler_frees_buffer)
Ed Warnickecb9cada2015-12-08 15:45:58 -070053 vlib_buffer_free_one (vm, buffer_index);
54}
55
56static uword
57append_buffer_index_to_iovec (vlib_main_t * vm,
Dave Barach9b8ffd92016-07-08 08:13:45 -040058 u32 buffer_index, struct iovec **iovs_return)
Ed Warnickecb9cada2015-12-08 15:45:58 -070059{
Dave Barach9b8ffd92016-07-08 08:13:45 -040060 struct iovec *i;
61 vlib_buffer_t *b;
Ed Warnickecb9cada2015-12-08 15:45:58 -070062 u32 bi = buffer_index;
63 u32 l = 0;
64
65 while (1)
66 {
67 b = vlib_get_buffer (vm, bi);
68 vec_add2 (*iovs_return, i, 1);
69 i->iov_base = vlib_buffer_get_current (b);
70 i->iov_len = b->current_length;
71 l += i->iov_len;
Dave Barach9b8ffd92016-07-08 08:13:45 -040072 if (!(b->flags & VLIB_BUFFER_NEXT_PRESENT))
Ed Warnickecb9cada2015-12-08 15:45:58 -070073 break;
74 bi = b->next_buffer;
75 }
76
77 return l;
78}
79
80static clib_error_t *
81sendmsg_helper (mc_socket_main_t * msm,
Dave Barach9b8ffd92016-07-08 08:13:45 -040082 int socket, struct sockaddr_in *tx_addr, u32 buffer_index)
Ed Warnickecb9cada2015-12-08 15:45:58 -070083{
Dave Barach9b8ffd92016-07-08 08:13:45 -040084 vlib_main_t *vm = msm->mc_main.vlib_main;
Ed Warnickecb9cada2015-12-08 15:45:58 -070085 struct msghdr h;
86 word n_bytes, n_bytes_tx, n_retries;
87
Dave Barachb7b92992018-10-17 10:38:51 -040088 clib_memset (&h, 0, sizeof (h));
Ed Warnickecb9cada2015-12-08 15:45:58 -070089 h.msg_name = tx_addr;
90 h.msg_namelen = sizeof (tx_addr[0]);
91
92 if (msm->iovecs)
Damjan Marion8bea5892022-04-04 22:40:45 +020093 vec_set_len (msm->iovecs, 0);
Ed Warnickecb9cada2015-12-08 15:45:58 -070094
95 n_bytes = append_buffer_index_to_iovec (vm, buffer_index, &msm->iovecs);
96 ASSERT (n_bytes <= msm->mc_main.transport.max_packet_size);
97 if (n_bytes > msm->mc_main.transport.max_packet_size)
Paul Vinciguerra5481ad42020-01-28 14:47:17 -050098 clib_error ("sending packet larger than interface MTU %d bytes", n_bytes);
Ed Warnickecb9cada2015-12-08 15:45:58 -070099
100 h.msg_iov = msm->iovecs;
101 h.msg_iovlen = vec_len (msm->iovecs);
102
103 n_retries = 0;
104 while ((n_bytes_tx = sendmsg (socket, &h, /* flags */ 0)) != n_bytes
Dave Barach9b8ffd92016-07-08 08:13:45 -0400105 && errno == EAGAIN)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700106 n_retries++;
107 if (n_bytes_tx != n_bytes)
108 {
109 clib_unix_warning ("sendmsg");
110 return 0;
111 }
112 if (n_retries)
113 {
Dave Barach9b8ffd92016-07-08 08:13:45 -0400114 ELOG_TYPE_DECLARE (e) =
115 {
116 .format = "sendmsg-helper: %d retries",.format_args = "i4",};
117 struct
118 {
119 u32 retries;
120 } *ed = 0;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700121
122 ed = ELOG_DATA (&vm->elog_main, e);
123 ed->retries = n_retries;
124 }
125 return 0;
126}
127
128static clib_error_t *
Dave Barach9b8ffd92016-07-08 08:13:45 -0400129tx_buffer (void *transport, mc_transport_type_t type, u32 buffer_index)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700130{
Dave Barach9b8ffd92016-07-08 08:13:45 -0400131 mc_socket_main_t *msm = (mc_socket_main_t *) transport;
132 vlib_main_t *vm = msm->mc_main.vlib_main;
133 mc_multicast_socket_t *ms = &msm->multicast_sockets[type];
134 clib_error_t *error;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700135 error = sendmsg_helper (msm, ms->socket, &ms->tx_addr, buffer_index);
136 if (type != MC_TRANSPORT_USER_REQUEST_TO_RELAY)
137 vlib_buffer_free_one (vm, buffer_index);
138 return error;
139}
140
141static clib_error_t *
142tx_ack (void *transport, mc_peer_id_t dest_peer_id, u32 buffer_index)
143{
144 struct sockaddr_in tx_addr;
Dave Barach9b8ffd92016-07-08 08:13:45 -0400145 mc_socket_main_t *msm = (mc_socket_main_t *) transport;
146 vlib_main_t *vm = msm->mc_main.vlib_main;
147 clib_error_t *error;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700148
Dave Barachb7b92992018-10-17 10:38:51 -0400149 clib_memset (&tx_addr, 0, sizeof (tx_addr));
Ed Warnickecb9cada2015-12-08 15:45:58 -0700150 tx_addr.sin_family = AF_INET;
151 tx_addr.sin_addr.s_addr = mc_socket_peer_id_get_address (dest_peer_id);
152 tx_addr.sin_port = mc_socket_peer_id_get_port (dest_peer_id);
153
154 error = sendmsg_helper (msm, msm->ack_socket, &tx_addr, buffer_index);
155 vlib_buffer_free_one (vm, buffer_index);
156 return error;
157}
158
159static clib_error_t *
160recvmsg_helper (mc_socket_main_t * msm,
161 int socket,
Dave Barach9b8ffd92016-07-08 08:13:45 -0400162 struct sockaddr_in *rx_addr,
163 u32 * buffer_index, u32 drop_message)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700164{
Dave Barach9b8ffd92016-07-08 08:13:45 -0400165 vlib_main_t *vm = msm->mc_main.vlib_main;
166 vlib_buffer_t *b;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700167 uword n_left, n_alloc, n_mtu, i, i_rx;
Damjan Marion8934a042019-02-09 23:29:26 +0100168 const uword buffer_size = vlib_buffer_get_default_data_size (vm);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700169 word n_bytes_left;
170
171 /* Make sure we have at least a MTU worth of buffers. */
172 n_mtu = msm->rx_mtu_n_buffers;
173 n_left = vec_len (msm->rx_buffers);
174 if (n_left < n_mtu)
175 {
176 uword max_alloc = 8 * n_mtu;
177 vec_validate (msm->rx_buffers, max_alloc - 1);
Dave Barach9b8ffd92016-07-08 08:13:45 -0400178 n_alloc =
179 vlib_buffer_alloc (vm, msm->rx_buffers + n_left, max_alloc - n_left);
Damjan Marion8bea5892022-04-04 22:40:45 +0200180 vec_set_len (msm->rx_buffers, n_left + n_alloc);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700181 }
182
183 ASSERT (vec_len (msm->rx_buffers) >= n_mtu);
184 vec_validate (msm->iovecs, n_mtu - 1);
185
186 /* Allocate RX buffers from end of rx_buffers.
187 Turn them into iovecs to pass to readv. */
188 i_rx = vec_len (msm->rx_buffers) - 1;
189 for (i = 0; i < n_mtu; i++)
190 {
191 b = vlib_get_buffer (vm, msm->rx_buffers[i_rx - i]);
192 msm->iovecs[i].iov_base = b->data;
193 msm->iovecs[i].iov_len = buffer_size;
194 }
Damjan Marion8bea5892022-04-04 22:40:45 +0200195 vec_set_len (msm->iovecs, n_mtu);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700196
197 {
198 struct msghdr h;
199
Dave Barachb7b92992018-10-17 10:38:51 -0400200 clib_memset (&h, 0, sizeof (h));
Ed Warnickecb9cada2015-12-08 15:45:58 -0700201 if (rx_addr)
202 {
203 h.msg_name = rx_addr;
204 h.msg_namelen = sizeof (rx_addr[0]);
205 }
206 h.msg_iov = msm->iovecs;
207 h.msg_iovlen = vec_len (msm->iovecs);
208
209 n_bytes_left = recvmsg (socket, &h, 0);
210 if (n_bytes_left < 0)
211 return clib_error_return_unix (0, "recvmsg");
212 }
213
214 if (drop_message)
215 {
216 *buffer_index = ~0;
217 return 0;
218 }
219
220 *buffer_index = msm->rx_buffers[i_rx];
221 while (1)
222 {
223 b = vlib_get_buffer (vm, msm->rx_buffers[i_rx]);
224
225 b->flags = 0;
226 b->current_data = 0;
Dave Barach9b8ffd92016-07-08 08:13:45 -0400227 b->current_length =
228 n_bytes_left < buffer_size ? n_bytes_left : buffer_size;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700229
230 n_bytes_left -= buffer_size;
231
232 if (n_bytes_left <= 0)
233 break;
234
235 i_rx--;
236 b->flags |= VLIB_BUFFER_NEXT_PRESENT;
237 b->next_buffer = msm->rx_buffers[i_rx];
238 }
239
Damjan Marion8bea5892022-04-04 22:40:45 +0200240 vec_set_len (msm->rx_buffers, i_rx);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700241
Dave Barach9b8ffd92016-07-08 08:13:45 -0400242 return 0 /* no error */ ;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700243}
244
Dave Barach9b8ffd92016-07-08 08:13:45 -0400245static clib_error_t *
Damjan Marion56dd5432017-09-08 19:52:02 +0200246mastership_socket_read_ready (clib_file_t * uf)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700247{
Dave Barach9b8ffd92016-07-08 08:13:45 -0400248 mc_socket_main_t *msm = (mc_socket_main_t *) uf->private_data;
249 mc_main_t *mcm = &msm->mc_main;
250 mc_multicast_socket_t *ms =
251 &msm->multicast_sockets[MC_TRANSPORT_MASTERSHIP];
252 clib_error_t *error;
Damjan Marion2d207bc2018-03-27 21:07:58 +0200253 u32 bi = 0;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700254
Dave Barach9b8ffd92016-07-08 08:13:45 -0400255 error = recvmsg_helper (msm, ms->socket, /* rx_addr */ 0, &bi, /* drop_message */
256 0);
257 if (!error)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700258 msg_handler (mcm, bi,
259 /* handler_frees_buffer */ 0,
260 mc_msg_master_assert_handler);
261
262 return error;
263}
264
Dave Barach9b8ffd92016-07-08 08:13:45 -0400265static clib_error_t *
Damjan Marion56dd5432017-09-08 19:52:02 +0200266to_relay_socket_read_ready (clib_file_t * uf)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700267{
Dave Barach9b8ffd92016-07-08 08:13:45 -0400268 mc_socket_main_t *msm = (mc_socket_main_t *) uf->private_data;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700269 mc_main_t *mcm = &msm->mc_main;
Dave Barach9b8ffd92016-07-08 08:13:45 -0400270 vlib_main_t *vm = msm->mc_main.vlib_main;
271 mc_multicast_socket_t *ms_to_relay =
272 &msm->multicast_sockets[MC_TRANSPORT_USER_REQUEST_TO_RELAY];
273 mc_multicast_socket_t *ms_from_relay =
274 &msm->multicast_sockets[MC_TRANSPORT_USER_REQUEST_FROM_RELAY];
275 clib_error_t *error;
Damjan Marion2d207bc2018-03-27 21:07:58 +0200276 u32 bi = 0;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700277 u32 is_master = mcm->relay_state == MC_RELAY_STATE_MASTER;
278
279 /* Not the ordering master? Turf the msg */
280 error = recvmsg_helper (msm, ms_to_relay->socket, /* rx_addr */ 0, &bi,
Dave Barach9b8ffd92016-07-08 08:13:45 -0400281 /* drop_message */ !is_master);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700282
283 /* If we are the master, number and rebroadcast the msg. */
Dave Barach9b8ffd92016-07-08 08:13:45 -0400284 if (!error && is_master)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700285 {
Dave Barach9b8ffd92016-07-08 08:13:45 -0400286 vlib_buffer_t *b = vlib_get_buffer (vm, bi);
287 mc_msg_user_request_t *mp = vlib_buffer_get_current (b);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700288 mp->global_sequence = clib_host_to_net_u32 (mcm->relay_global_sequence);
289 mcm->relay_global_sequence++;
Dave Barach9b8ffd92016-07-08 08:13:45 -0400290 error =
291 sendmsg_helper (msm, ms_from_relay->socket, &ms_from_relay->tx_addr,
292 bi);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700293 vlib_buffer_free_one (vm, bi);
294 }
295
296 return error;
297}
298
Dave Barach9b8ffd92016-07-08 08:13:45 -0400299static clib_error_t *
Damjan Marion56dd5432017-09-08 19:52:02 +0200300from_relay_socket_read_ready (clib_file_t * uf)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700301{
Dave Barach9b8ffd92016-07-08 08:13:45 -0400302 mc_socket_main_t *msm = (mc_socket_main_t *) uf->private_data;
303 mc_main_t *mcm = &msm->mc_main;
304 mc_multicast_socket_t *ms =
305 &msm->multicast_sockets[MC_TRANSPORT_USER_REQUEST_FROM_RELAY];
306 clib_error_t *error;
Damjan Marion2d207bc2018-03-27 21:07:58 +0200307 u32 bi = 0;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700308
Dave Barach9b8ffd92016-07-08 08:13:45 -0400309 error = recvmsg_helper (msm, ms->socket, /* rx_addr */ 0, &bi, /* drop_message */
310 0);
311 if (!error)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700312 {
313 msg_handler (mcm, bi, /* handler_frees_buffer */ 1,
314 mc_msg_user_request_handler);
315 }
316 return error;
317}
318
Dave Barach9b8ffd92016-07-08 08:13:45 -0400319static clib_error_t *
Damjan Marion56dd5432017-09-08 19:52:02 +0200320join_socket_read_ready (clib_file_t * uf)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700321{
Dave Barach9b8ffd92016-07-08 08:13:45 -0400322 mc_socket_main_t *msm = (mc_socket_main_t *) uf->private_data;
323 mc_main_t *mcm = &msm->mc_main;
324 vlib_main_t *vm = mcm->vlib_main;
325 mc_multicast_socket_t *ms = &msm->multicast_sockets[MC_TRANSPORT_JOIN];
326 clib_error_t *error;
Damjan Marion2d207bc2018-03-27 21:07:58 +0200327 u32 bi = 0;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700328
Dave Barach9b8ffd92016-07-08 08:13:45 -0400329 error = recvmsg_helper (msm, ms->socket, /* rx_addr */ 0, &bi, /* drop_message */
330 0);
331 if (!error)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700332 {
Dave Barach9b8ffd92016-07-08 08:13:45 -0400333 vlib_buffer_t *b = vlib_get_buffer (vm, bi);
334 mc_msg_join_or_leave_request_t *mp = vlib_buffer_get_current (b);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700335
336 switch (clib_host_to_net_u32 (mp->type))
337 {
338 case MC_MSG_TYPE_join_or_leave_request:
339 msg_handler (mcm, bi, /* handler_frees_buffer */ 0,
340 mc_msg_join_or_leave_request_handler);
341 break;
342
343 case MC_MSG_TYPE_join_reply:
344 msg_handler (mcm, bi, /* handler_frees_buffer */ 0,
345 mc_msg_join_reply_handler);
346 break;
347
348 default:
349 ASSERT (0);
350 break;
351 }
352 }
353 return error;
354}
355
Dave Barach9b8ffd92016-07-08 08:13:45 -0400356static clib_error_t *
Damjan Marion56dd5432017-09-08 19:52:02 +0200357ack_socket_read_ready (clib_file_t * uf)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700358{
Dave Barach9b8ffd92016-07-08 08:13:45 -0400359 mc_socket_main_t *msm = (mc_socket_main_t *) uf->private_data;
360 mc_main_t *mcm = &msm->mc_main;
361 clib_error_t *error;
Damjan Marion2d207bc2018-03-27 21:07:58 +0200362 u32 bi = 0;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700363
Dave Barach9b8ffd92016-07-08 08:13:45 -0400364 error = recvmsg_helper (msm, msm->ack_socket, /* rx_addr */ 0, &bi,
365 /* drop_message */ 0);
366 if (!error)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700367 msg_handler (mcm, bi, /* handler_frees_buffer */ 0,
368 mc_msg_user_ack_handler);
369 return error;
370}
371
Dave Barach9b8ffd92016-07-08 08:13:45 -0400372static void
373catchup_cleanup (mc_socket_main_t * msm,
Damjan Marion56dd5432017-09-08 19:52:02 +0200374 mc_socket_catchup_t * c, clib_file_main_t * um,
375 clib_file_t * uf)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700376{
377 hash_unset (msm->catchup_index_by_file_descriptor, uf->file_descriptor);
Damjan Marion56dd5432017-09-08 19:52:02 +0200378 clib_file_del (um, uf);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700379 vec_free (c->input_vector);
380 vec_free (c->output_vector);
381 pool_put (msm->catchups, c);
382}
383
384static mc_socket_catchup_t *
Dave Barach9b8ffd92016-07-08 08:13:45 -0400385find_catchup_from_file_descriptor (mc_socket_main_t * msm,
386 int file_descriptor)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700387{
Dave Barach9b8ffd92016-07-08 08:13:45 -0400388 uword *p =
389 hash_get (msm->catchup_index_by_file_descriptor, file_descriptor);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700390 return p ? pool_elt_at_index (msm->catchups, p[0]) : 0;
391}
392
Dave Barach9b8ffd92016-07-08 08:13:45 -0400393static clib_error_t *
Damjan Marion56dd5432017-09-08 19:52:02 +0200394catchup_socket_read_ready (clib_file_t * uf, int is_server)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700395{
Damjan Marion56dd5432017-09-08 19:52:02 +0200396 clib_file_main_t *um = &file_main;
Dave Barach9b8ffd92016-07-08 08:13:45 -0400397 mc_socket_main_t *msm = (mc_socket_main_t *) uf->private_data;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700398 mc_main_t *mcm = &msm->mc_main;
Dave Barach9b8ffd92016-07-08 08:13:45 -0400399 mc_socket_catchup_t *c =
400 find_catchup_from_file_descriptor (msm, uf->file_descriptor);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700401 word l, n, is_eof;
402
403 l = vec_len (c->input_vector);
404 vec_resize (c->input_vector, 4096);
Dave Barach9b8ffd92016-07-08 08:13:45 -0400405 n =
406 read (uf->file_descriptor, c->input_vector + l,
407 vec_len (c->input_vector) - l);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700408 is_eof = n == 0;
409
410 if (n < 0)
411 {
412 if (errno == EAGAIN)
413 n = 0;
414 else
415 {
416 catchup_cleanup (msm, c, um, uf);
417 return clib_error_return_unix (0, "read");
418 }
419 }
420
Damjan Marion8bea5892022-04-04 22:40:45 +0200421 vec_set_len (c->input_vector, l + n);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700422
423 if (is_eof && vec_len (c->input_vector) > 0)
424 {
425 if (is_server)
426 {
Dave Barach9b8ffd92016-07-08 08:13:45 -0400427 mc_msg_catchup_request_handler (mcm, (void *) c->input_vector,
428 c - msm->catchups);
Damjan Marion8bea5892022-04-04 22:40:45 +0200429 vec_set_len (c->input_vector, 0);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700430 }
431 else
432 {
Dave Barach9b8ffd92016-07-08 08:13:45 -0400433 mc_msg_catchup_reply_handler (mcm, (void *) c->input_vector,
434 c - msm->catchups);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700435 c->input_vector = 0; /* reply handler is responsible for freeing vector */
436 catchup_cleanup (msm, c, um, uf);
437 }
438 }
439
Dave Barach9b8ffd92016-07-08 08:13:45 -0400440 return 0 /* no error */ ;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700441}
442
Dave Barach9b8ffd92016-07-08 08:13:45 -0400443static clib_error_t *
Damjan Marion56dd5432017-09-08 19:52:02 +0200444catchup_server_read_ready (clib_file_t * uf)
Dave Barach9b8ffd92016-07-08 08:13:45 -0400445{
446 return catchup_socket_read_ready (uf, /* is_server */ 1);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700447}
448
Dave Barach9b8ffd92016-07-08 08:13:45 -0400449static clib_error_t *
Damjan Marion56dd5432017-09-08 19:52:02 +0200450catchup_client_read_ready (clib_file_t * uf)
Dave Barach9b8ffd92016-07-08 08:13:45 -0400451{
452 if (MC_EVENT_LOGGING)
453 {
454 mc_socket_main_t *msm = (mc_socket_main_t *) uf->private_data;
455 vlib_main_t *vm = msm->mc_main.vlib_main;
456
457 ELOG_TYPE (e, "catchup_client_read_ready");
458 ELOG (&vm->elog_main, e, 0);
459 }
460 return catchup_socket_read_ready (uf, /* is_server */ 0);
461}
462
463static clib_error_t *
Damjan Marion56dd5432017-09-08 19:52:02 +0200464catchup_socket_write_ready (clib_file_t * uf, int is_server)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700465{
Damjan Marion56dd5432017-09-08 19:52:02 +0200466 clib_file_main_t *um = &file_main;
Dave Barach9b8ffd92016-07-08 08:13:45 -0400467 mc_socket_main_t *msm = (mc_socket_main_t *) uf->private_data;
468 mc_socket_catchup_t *c =
469 find_catchup_from_file_descriptor (msm, uf->file_descriptor);
470 clib_error_t *error = 0;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700471 int n;
472
473 if (c->connect_in_progress)
474 {
475 u32 len, value;
476
477 c->connect_in_progress = 0;
478 len = sizeof (value);
Dave Barach9b8ffd92016-07-08 08:13:45 -0400479 if (getsockopt (c->socket, SOL_SOCKET, SO_ERROR, &value, &len) < 0)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700480 {
481 error = clib_error_return_unix (0, "getsockopt SO_ERROR");
482 goto error_quit;
483 }
484 if (value != 0)
485 {
Dave Barach9b8ffd92016-07-08 08:13:45 -0400486 error =
487 clib_error_return_code (0, value, CLIB_ERROR_ERRNO_VALID,
488 "connect fails");
Ed Warnickecb9cada2015-12-08 15:45:58 -0700489 goto error_quit;
490 }
491 }
492
Dave Barach9b8ffd92016-07-08 08:13:45 -0400493 while (1)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700494 {
495 u32 n_this_write;
Dave Barach9b8ffd92016-07-08 08:13:45 -0400496
497 n_this_write =
498 clib_min (vec_len (c->output_vector) - c->output_vector_n_written,
499 msm->rx_mtu_n_bytes -
500 64 /* ip + tcp + option allowance */ );
Ed Warnickecb9cada2015-12-08 15:45:58 -0700501
502 if (n_this_write <= 0)
Dave Barach9b8ffd92016-07-08 08:13:45 -0400503 break;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700504
Dave Barach9b8ffd92016-07-08 08:13:45 -0400505 do
506 {
507 n = write (uf->file_descriptor,
508 c->output_vector + c->output_vector_n_written,
509 n_this_write);
510 }
511 while (n < 0 && errno == EAGAIN);
512
Ed Warnickecb9cada2015-12-08 15:45:58 -0700513 if (n < 0)
Dave Barach9b8ffd92016-07-08 08:13:45 -0400514 {
515 error = clib_error_return_unix (0, "write");
516 goto error_quit;
517 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700518 c->output_vector_n_written += n;
Dave Barach9b8ffd92016-07-08 08:13:45 -0400519 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700520
521 if (c->output_vector_n_written >= vec_len (c->output_vector))
522 {
Dave Barach9b8ffd92016-07-08 08:13:45 -0400523 if (!is_server)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700524 {
Dave Barach9b8ffd92016-07-08 08:13:45 -0400525 uf->flags &= ~UNIX_FILE_DATA_AVAILABLE_TO_WRITE;
Damjan Marion56dd5432017-09-08 19:52:02 +0200526 file_main.file_update (uf, UNIX_FILE_UPDATE_MODIFY);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700527 /* Send EOF to other side. */
528 shutdown (uf->file_descriptor, SHUT_WR);
529 return error;
530 }
531 else
532 {
533 error_quit:
534 catchup_cleanup (msm, c, um, uf);
535 }
536 }
537 return error;
538}
539
Dave Barach9b8ffd92016-07-08 08:13:45 -0400540static clib_error_t *
Damjan Marion56dd5432017-09-08 19:52:02 +0200541catchup_server_write_ready (clib_file_t * uf)
Dave Barach9b8ffd92016-07-08 08:13:45 -0400542{
543 return catchup_socket_write_ready (uf, /* is_server */ 1);
544}
Ed Warnickecb9cada2015-12-08 15:45:58 -0700545
Dave Barach9b8ffd92016-07-08 08:13:45 -0400546static clib_error_t *
Damjan Marion56dd5432017-09-08 19:52:02 +0200547catchup_client_write_ready (clib_file_t * uf)
Dave Barach9b8ffd92016-07-08 08:13:45 -0400548{
549 return catchup_socket_write_ready (uf, /* is_server */ 0);
550}
Ed Warnickecb9cada2015-12-08 15:45:58 -0700551
Dave Barach9b8ffd92016-07-08 08:13:45 -0400552static clib_error_t *
Damjan Marion56dd5432017-09-08 19:52:02 +0200553catchup_socket_error_ready (clib_file_t * uf)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700554{
Damjan Marion56dd5432017-09-08 19:52:02 +0200555 clib_file_main_t *um = &file_main;
Dave Barach9b8ffd92016-07-08 08:13:45 -0400556 mc_socket_main_t *msm = (mc_socket_main_t *) uf->private_data;
557 mc_socket_catchup_t *c =
558 find_catchup_from_file_descriptor (msm, uf->file_descriptor);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700559 catchup_cleanup (msm, c, um, uf);
560 return clib_error_return (0, "error");
561}
562
Dave Barach9b8ffd92016-07-08 08:13:45 -0400563static clib_error_t *
Damjan Marion56dd5432017-09-08 19:52:02 +0200564catchup_listen_read_ready (clib_file_t * uf)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700565{
Dave Barach9b8ffd92016-07-08 08:13:45 -0400566 mc_socket_main_t *msm = (mc_socket_main_t *) uf->private_data;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700567 struct sockaddr_in client_addr;
568 int client_len;
569 mc_socket_catchup_t *c;
Damjan Marion56dd5432017-09-08 19:52:02 +0200570 clib_file_t template = { 0 };
Ed Warnickecb9cada2015-12-08 15:45:58 -0700571
572 pool_get (msm->catchups, c);
Dave Barachb7b92992018-10-17 10:38:51 -0400573 clib_memset (c, 0, sizeof (c[0]));
Ed Warnickecb9cada2015-12-08 15:45:58 -0700574
Dave Barach9b8ffd92016-07-08 08:13:45 -0400575 client_len = sizeof (client_addr);
576
Ed Warnickecb9cada2015-12-08 15:45:58 -0700577 /* Acquires the non-blocking attrib from the server socket. */
Dave Barach9b8ffd92016-07-08 08:13:45 -0400578 c->socket = accept (uf->file_descriptor,
579 (struct sockaddr *) &client_addr,
580 (socklen_t *) & client_len);
581
Ed Warnickecb9cada2015-12-08 15:45:58 -0700582 if (c->socket < 0)
583 {
584 pool_put (msm->catchups, c);
585 return clib_error_return_unix (0, "accept");
586 }
587
588 if (MC_EVENT_LOGGING)
589 {
Dave Barach9b8ffd92016-07-08 08:13:45 -0400590 mc_main_t *mcm = &msm->mc_main;
591 vlib_main_t *vm = mcm->vlib_main;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700592
Dave Barach9b8ffd92016-07-08 08:13:45 -0400593 ELOG_TYPE_DECLARE (e) =
594 {
595 .format = "catchup accepted from 0x%lx",.format_args = "i4",};
596 struct
597 {
598 u32 addr;
599 } *ed = 0;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700600
601 ed = ELOG_DATA (&vm->elog_main, e);
Dave Barach9b8ffd92016-07-08 08:13:45 -0400602 ed->addr = ntohl (client_addr.sin_addr.s_addr);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700603 }
604
605 /* Disable the Nagle algorithm, ship catchup pkts immediately */
606 {
607 int one = 1;
Dave Barach9b8ffd92016-07-08 08:13:45 -0400608 if ((setsockopt (c->socket, IPPROTO_TCP,
609 TCP_NODELAY, (void *) &one, sizeof (one))) < 0)
610 {
611 clib_unix_warning ("catchup socket: set TCP_NODELAY");
612 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700613 }
614
615 template.read_function = catchup_server_read_ready;
616 template.write_function = catchup_server_write_ready;
617 template.error_function = catchup_socket_error_ready;
618 template.file_descriptor = c->socket;
Paul Vinciguerra5481ad42020-01-28 14:47:17 -0500619 template.description = format (0, "multicast catchup socket");
Ed Warnickecb9cada2015-12-08 15:45:58 -0700620 template.private_data = pointer_to_uword (msm);
Damjan Marion56dd5432017-09-08 19:52:02 +0200621 c->clib_file_index = clib_file_add (&file_main, &template);
Dave Barach9b8ffd92016-07-08 08:13:45 -0400622 hash_set (msm->catchup_index_by_file_descriptor, c->socket,
623 c - msm->catchups);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700624
625 return 0;
626}
627
628/* Return and bind to an unused port. */
Dave Barach9b8ffd92016-07-08 08:13:45 -0400629static word
630find_and_bind_to_free_port (word sock, word port)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700631{
632 for (; port < 1 << 16; port++)
633 {
634 struct sockaddr_in a;
635
Dave Barachb7b92992018-10-17 10:38:51 -0400636 clib_memset (&a, 0, sizeof (a)); /* Warnings be gone */
Ed Warnickecb9cada2015-12-08 15:45:58 -0700637
638 a.sin_family = PF_INET;
639 a.sin_addr.s_addr = INADDR_ANY;
640 a.sin_port = htons (port);
641
642 if (bind (sock, (struct sockaddr *) &a, sizeof (a)) >= 0)
643 break;
644 }
Dave Barach9b8ffd92016-07-08 08:13:45 -0400645
Ed Warnickecb9cada2015-12-08 15:45:58 -0700646 return port < 1 << 16 ? port : -1;
647}
648
649static clib_error_t *
650setup_mutlicast_socket (mc_socket_main_t * msm,
651 mc_multicast_socket_t * ms,
Dave Barach9b8ffd92016-07-08 08:13:45 -0400652 char *type, uword udp_port)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700653{
654 int one = 1;
655 struct ip_mreq mcast_req;
656
Dave Barach9b8ffd92016-07-08 08:13:45 -0400657 if (!msm->multicast_ttl)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700658 msm->multicast_ttl = 1;
659
660 /* mastership (multicast) TX socket */
661 if ((ms->socket = socket (PF_INET, SOCK_DGRAM, IPPROTO_UDP)) < 0)
Dave Barach9b8ffd92016-07-08 08:13:45 -0400662 return clib_error_return_unix (0, "%s socket", type);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700663
664 {
665 u8 ttl = msm->multicast_ttl;
666
Dave Barach9b8ffd92016-07-08 08:13:45 -0400667 if ((setsockopt (ms->socket, IPPROTO_IP,
668 IP_MULTICAST_TTL, (void *) &ttl, sizeof (ttl))) < 0)
669 return clib_error_return_unix (0, "%s set multicast ttl", type);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700670 }
671
Dave Barach9b8ffd92016-07-08 08:13:45 -0400672 if (setsockopt (ms->socket, SOL_SOCKET, SO_REUSEADDR, &one, sizeof (one)) <
673 0)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700674 return clib_error_return_unix (0, "%s setsockopt SO_REUSEADDR", type);
675
Dave Barachb7b92992018-10-17 10:38:51 -0400676 clib_memset (&ms->tx_addr, 0, sizeof (ms->tx_addr));
Ed Warnickecb9cada2015-12-08 15:45:58 -0700677 ms->tx_addr.sin_family = AF_INET;
Dave Barach9b8ffd92016-07-08 08:13:45 -0400678 ms->tx_addr.sin_addr.s_addr =
679 htonl (msm->multicast_tx_ip4_address_host_byte_order);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700680 ms->tx_addr.sin_port = htons (udp_port);
Dave Barach9b8ffd92016-07-08 08:13:45 -0400681
682 if (bind (ms->socket, (struct sockaddr *) &ms->tx_addr,
683 sizeof (ms->tx_addr)) < 0)
684 return clib_error_return_unix (0, "%s bind", type);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700685
Dave Barachb7b92992018-10-17 10:38:51 -0400686 clib_memset (&mcast_req, 0, sizeof (mcast_req));
Dave Barach9b8ffd92016-07-08 08:13:45 -0400687 mcast_req.imr_multiaddr.s_addr =
688 htonl (msm->multicast_tx_ip4_address_host_byte_order);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700689 mcast_req.imr_interface.s_addr = msm->if_ip4_address_net_byte_order;
690
Dave Barach9b8ffd92016-07-08 08:13:45 -0400691 if ((setsockopt (ms->socket, IPPROTO_IP,
692 IP_ADD_MEMBERSHIP, (void *) &mcast_req,
693 sizeof (mcast_req))) < 0)
694 return clib_error_return_unix (0, "%s IP_ADD_MEMBERSHIP setsockopt",
695 type);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700696
697 if (ioctl (ms->socket, FIONBIO, &one) < 0)
698 return clib_error_return_unix (0, "%s set FIONBIO", type);
699
700 /* FIXME remove this when we support tx_ready. */
701 {
702 u32 len = 1 << 20;
703 socklen_t sl = sizeof (len);
Dave Barach9b8ffd92016-07-08 08:13:45 -0400704 if (setsockopt (ms->socket, SOL_SOCKET, SO_SNDBUF, &len, sl) < 0)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700705 clib_unix_error ("setsockopt");
706 }
707
708 return 0;
709}
710
711static clib_error_t *
Dave Barach9b8ffd92016-07-08 08:13:45 -0400712socket_setup (mc_socket_main_t * msm)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700713{
714 int one = 1;
Dave Barach9b8ffd92016-07-08 08:13:45 -0400715 clib_error_t *error;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700716 u32 port;
717
Dave Barach9b8ffd92016-07-08 08:13:45 -0400718 if (!msm->base_multicast_udp_port_host_byte_order)
719 msm->base_multicast_udp_port_host_byte_order =
720 0xffff - ((MC_N_TRANSPORT_TYPE + 2 /* ack socket, catchup socket */ )
721 - 1);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700722
723 port = msm->base_multicast_udp_port_host_byte_order;
724
725 error = setup_mutlicast_socket (msm,
Dave Barach9b8ffd92016-07-08 08:13:45 -0400726 &msm->multicast_sockets
727 [MC_TRANSPORT_MASTERSHIP], "mastership",
Ed Warnickecb9cada2015-12-08 15:45:58 -0700728 port++);
729 if (error)
730 return error;
731
732 error = setup_mutlicast_socket (msm,
733 &msm->multicast_sockets[MC_TRANSPORT_JOIN],
Dave Barach9b8ffd92016-07-08 08:13:45 -0400734 "join", port++);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700735 if (error)
736 return error;
737
738 error = setup_mutlicast_socket (msm,
Dave Barach9b8ffd92016-07-08 08:13:45 -0400739 &msm->multicast_sockets
740 [MC_TRANSPORT_USER_REQUEST_TO_RELAY],
741 "to relay", port++);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700742 if (error)
743 return error;
744
745 error = setup_mutlicast_socket (msm,
Dave Barach9b8ffd92016-07-08 08:13:45 -0400746 &msm->multicast_sockets
747 [MC_TRANSPORT_USER_REQUEST_FROM_RELAY],
748 "from relay", port++);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700749 if (error)
750 return error;
751
752 /* ACK rx socket */
753 msm->ack_socket = socket (PF_INET, SOCK_DGRAM, IPPROTO_UDP);
754 if (msm->ack_socket < 0)
Dave Barach9b8ffd92016-07-08 08:13:45 -0400755 return clib_error_return_unix (0, "ack socket");
Ed Warnickecb9cada2015-12-08 15:45:58 -0700756
757 msm->ack_udp_port = find_and_bind_to_free_port (msm->ack_socket, port++);
758
759 if (ioctl (msm->ack_socket, FIONBIO, &one) < 0)
760 return clib_error_return_unix (0, "ack socket FIONBIO");
761
Dave Barach9b8ffd92016-07-08 08:13:45 -0400762 msm->catchup_server_socket = socket (AF_INET, SOCK_STREAM, 0);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700763 if (msm->catchup_server_socket < 0)
764 return clib_error_return_unix (0, "catchup server socket");
Dave Barach9b8ffd92016-07-08 08:13:45 -0400765
766 msm->catchup_tcp_port =
767 find_and_bind_to_free_port (msm->catchup_server_socket, port++);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700768
769 if (ioctl (msm->catchup_server_socket, FIONBIO, &one) < 0)
770 return clib_error_return_unix (0, "catchup server socket FIONBIO");
771
Dave Barach9b8ffd92016-07-08 08:13:45 -0400772 if (listen (msm->catchup_server_socket, 5) < 0)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700773 return clib_error_return_unix (0, "catchup server socket listen");
Dave Barach9b8ffd92016-07-08 08:13:45 -0400774
Ed Warnickecb9cada2015-12-08 15:45:58 -0700775 /* epoll setup for multicast mastership socket */
776 {
Damjan Marion56dd5432017-09-08 19:52:02 +0200777 clib_file_t template = { 0 };
Ed Warnickecb9cada2015-12-08 15:45:58 -0700778
779 template.read_function = mastership_socket_read_ready;
Dave Barach9b8ffd92016-07-08 08:13:45 -0400780 template.file_descriptor =
781 msm->multicast_sockets[MC_TRANSPORT_MASTERSHIP].socket;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700782 template.private_data = (uword) msm;
Damjan Marion56dd5432017-09-08 19:52:02 +0200783 clib_file_add (&file_main, &template);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700784
785 /* epoll setup for multicast to_relay socket */
786 template.read_function = to_relay_socket_read_ready;
Dave Barach9b8ffd92016-07-08 08:13:45 -0400787 template.file_descriptor =
788 msm->multicast_sockets[MC_TRANSPORT_USER_REQUEST_TO_RELAY].socket;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700789 template.private_data = (uword) msm;
Paul Vinciguerra5481ad42020-01-28 14:47:17 -0500790 template.description = format (0, "multicast to_relay socket");
Damjan Marion56dd5432017-09-08 19:52:02 +0200791 clib_file_add (&file_main, &template);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700792
793 /* epoll setup for multicast from_relay socket */
794 template.read_function = from_relay_socket_read_ready;
Dave Barach9b8ffd92016-07-08 08:13:45 -0400795 template.file_descriptor =
796 msm->multicast_sockets[MC_TRANSPORT_USER_REQUEST_FROM_RELAY].socket;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700797 template.private_data = (uword) msm;
Paul Vinciguerra5481ad42020-01-28 14:47:17 -0500798 template.description = format (0, "multicast from_relay socket");
Damjan Marion56dd5432017-09-08 19:52:02 +0200799 clib_file_add (&file_main, &template);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700800
801 template.read_function = join_socket_read_ready;
Dave Barach9b8ffd92016-07-08 08:13:45 -0400802 template.file_descriptor =
803 msm->multicast_sockets[MC_TRANSPORT_JOIN].socket;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700804 template.private_data = (uword) msm;
Paul Vinciguerra5481ad42020-01-28 14:47:17 -0500805 template.description = format (0, "multicast join socket");
Damjan Marion56dd5432017-09-08 19:52:02 +0200806 clib_file_add (&file_main, &template);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700807
808 /* epoll setup for ack rx socket */
809 template.read_function = ack_socket_read_ready;
810 template.file_descriptor = msm->ack_socket;
811 template.private_data = (uword) msm;
Paul Vinciguerra5481ad42020-01-28 14:47:17 -0500812 template.description = format (0, "multicast ack rx socket");
Damjan Marion56dd5432017-09-08 19:52:02 +0200813 clib_file_add (&file_main, &template);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700814
815 /* epoll setup for TCP catchup server */
816 template.read_function = catchup_listen_read_ready;
817 template.file_descriptor = msm->catchup_server_socket;
818 template.private_data = (uword) msm;
Paul Vinciguerra5481ad42020-01-28 14:47:17 -0500819 template.description = format (0, "multicast tcp catchup socket");
Damjan Marion56dd5432017-09-08 19:52:02 +0200820 clib_file_add (&file_main, &template);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700821 }
822
823 return 0;
824}
825
826static void *
Dave Barach9b8ffd92016-07-08 08:13:45 -0400827catchup_add_pending_output (mc_socket_catchup_t * c, uword n_bytes,
828 u8 * set_output_vector)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700829{
Damjan Marion56dd5432017-09-08 19:52:02 +0200830 clib_file_t *uf = pool_elt_at_index (file_main.file_pool,
831 c->clib_file_index);
Dave Barach9b8ffd92016-07-08 08:13:45 -0400832 u8 *result = 0;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700833
834 if (set_output_vector)
835 c->output_vector = set_output_vector;
836 else
837 vec_add2 (c->output_vector, result, n_bytes);
838 if (vec_len (c->output_vector) > 0)
839 {
840 int skip_update = 0 != (uf->flags & UNIX_FILE_DATA_AVAILABLE_TO_WRITE);
841 uf->flags |= UNIX_FILE_DATA_AVAILABLE_TO_WRITE;
Dave Barach9b8ffd92016-07-08 08:13:45 -0400842 if (!skip_update)
Damjan Marion56dd5432017-09-08 19:52:02 +0200843 file_main.file_update (uf, UNIX_FILE_UPDATE_MODIFY);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700844 }
845 return result;
846}
847
Dave Barach9b8ffd92016-07-08 08:13:45 -0400848static uword
849catchup_request_fun (void *transport_main,
850 u32 stream_index, mc_peer_id_t catchup_peer_id)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700851{
Dave Barach9b8ffd92016-07-08 08:13:45 -0400852 mc_socket_main_t *msm = (mc_socket_main_t *) transport_main;
853 mc_main_t *mcm = &msm->mc_main;
854 vlib_main_t *vm = mcm->vlib_main;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700855 mc_socket_catchup_t *c;
856 struct sockaddr_in addr;
Damjan Marion56dd5432017-09-08 19:52:02 +0200857 clib_file_main_t *um = &file_main;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700858 int one = 1;
859
860 pool_get (msm->catchups, c);
Dave Barachb7b92992018-10-17 10:38:51 -0400861 clib_memset (c, 0, sizeof (*c));
Ed Warnickecb9cada2015-12-08 15:45:58 -0700862
Dave Barach9b8ffd92016-07-08 08:13:45 -0400863 c->socket = socket (AF_INET, SOCK_STREAM, 0);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700864 if (c->socket < 0)
865 {
866 clib_unix_warning ("socket");
867 return 0;
868 }
869
870 if (ioctl (c->socket, FIONBIO, &one) < 0)
871 {
872 clib_unix_warning ("FIONBIO");
873 return 0;
874 }
875
Dave Barachb7b92992018-10-17 10:38:51 -0400876 clib_memset (&addr, 0, sizeof (addr));
Ed Warnickecb9cada2015-12-08 15:45:58 -0700877 addr.sin_family = AF_INET;
878 addr.sin_addr.s_addr = mc_socket_peer_id_get_address (catchup_peer_id);
879 addr.sin_port = mc_socket_peer_id_get_port (catchup_peer_id);
Dave Barach9b8ffd92016-07-08 08:13:45 -0400880
Ed Warnickecb9cada2015-12-08 15:45:58 -0700881 c->connect_in_progress = 1;
882
883 if (MC_EVENT_LOGGING)
884 {
Dave Barach9b8ffd92016-07-08 08:13:45 -0400885 ELOG_TYPE_DECLARE (e) =
886 {
887 .format = "connecting to peer 0x%Lx",.format_args = "i8",};
888 struct
889 {
890 u64 peer;
891 } *ed;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700892 ed = ELOG_DATA (&vm->elog_main, e);
893 ed->peer = catchup_peer_id.as_u64;
894 }
Dave Barach9b8ffd92016-07-08 08:13:45 -0400895
896 if (connect (c->socket, (const void *) &addr, sizeof (addr))
Ed Warnickecb9cada2015-12-08 15:45:58 -0700897 < 0 && errno != EINPROGRESS)
898 {
899 clib_unix_warning ("connect to %U fails",
900 format_socket_peer_id, catchup_peer_id);
901 return 0;
902 }
Dave Barach9b8ffd92016-07-08 08:13:45 -0400903
Ed Warnickecb9cada2015-12-08 15:45:58 -0700904 {
Damjan Marion56dd5432017-09-08 19:52:02 +0200905 clib_file_t template = { 0 };
Dave Barach9b8ffd92016-07-08 08:13:45 -0400906
Ed Warnickecb9cada2015-12-08 15:45:58 -0700907 template.read_function = catchup_client_read_ready;
908 template.write_function = catchup_client_write_ready;
909 template.error_function = catchup_socket_error_ready;
910 template.file_descriptor = c->socket;
911 template.private_data = (uword) msm;
Paul Vinciguerra5481ad42020-01-28 14:47:17 -0500912 template.description = format (0, "multicast socket");
Damjan Marion56dd5432017-09-08 19:52:02 +0200913 c->clib_file_index = clib_file_add (um, &template);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700914
Dave Barach9b8ffd92016-07-08 08:13:45 -0400915 hash_set (msm->catchup_index_by_file_descriptor, c->socket,
916 c - msm->catchups);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700917 }
918
919 {
Dave Barach9b8ffd92016-07-08 08:13:45 -0400920 mc_msg_catchup_request_t *mp;
921 mp = catchup_add_pending_output (c, sizeof (mp[0]), /* set_output_vector */
922 0);
923 mp->peer_id = msm->mc_main.transport.our_catchup_peer_id;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700924 mp->stream_index = stream_index;
925 mc_byte_swap_msg_catchup_request (mp);
926 }
927
928 return c - msm->catchups;
929}
930
Dave Barach9b8ffd92016-07-08 08:13:45 -0400931static void
932catchup_send_fun (void *transport_main, uword opaque, u8 * data)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700933{
Dave Barach9b8ffd92016-07-08 08:13:45 -0400934 mc_socket_main_t *msm = (mc_socket_main_t *) transport_main;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700935 mc_socket_catchup_t *c = pool_elt_at_index (msm->catchups, opaque);
936 catchup_add_pending_output (c, 0, data);
937}
938
939static int
Dave Barach9b8ffd92016-07-08 08:13:45 -0400940find_interface_ip4_address (char *if_name, u32 * ip4_address, u32 * mtu)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700941{
942 int fd;
943 struct ifreq ifr;
Dave Barach9b8ffd92016-07-08 08:13:45 -0400944 struct sockaddr_in *sa;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700945
946 /* Dig up our IP address */
947 fd = socket (PF_INET, AF_INET, 0);
Dave Barach9b8ffd92016-07-08 08:13:45 -0400948 if (fd < 0)
949 {
950 clib_unix_error ("socket");
951 return -1;
952 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700953
954 ifr.ifr_addr.sa_family = AF_INET;
Dave Barach9b8ffd92016-07-08 08:13:45 -0400955 strncpy (ifr.ifr_name, if_name, sizeof (ifr.ifr_name) - 1);
956 if (ioctl (fd, SIOCGIFADDR, &ifr) < 0)
957 {
958 clib_unix_error ("ioctl(SIOCFIGADDR)");
Dave Barachdd522cb2016-08-10 16:56:16 -0400959 close (fd);
Dave Barach9b8ffd92016-07-08 08:13:45 -0400960 return -1;
961 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700962
963 sa = (void *) &ifr.ifr_addr;
Damjan Marionf1213b82016-03-13 02:22:06 +0100964 clib_memcpy (ip4_address, &sa->sin_addr.s_addr, sizeof (ip4_address[0]));
Ed Warnickecb9cada2015-12-08 15:45:58 -0700965
966 if (ioctl (fd, SIOCGIFMTU, &ifr) < 0)
Dave Barachb2a6e252016-07-27 10:00:58 -0400967 {
968 close (fd);
969 return -1;
970 }
Ed Warnickecb9cada2015-12-08 15:45:58 -0700971 if (mtu)
Dave Barach9b8ffd92016-07-08 08:13:45 -0400972 *mtu = ifr.ifr_mtu - ( /* IP4 header */ 20 + /* UDP header */ 8);
Ed Warnickecb9cada2015-12-08 15:45:58 -0700973
974 close (fd);
975
976 return 0;
977}
978
979clib_error_t *
980mc_socket_main_init (mc_socket_main_t * msm, char **intfc_probe_list,
Dave Barach9b8ffd92016-07-08 08:13:45 -0400981 int n_intfcs_to_probe)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700982{
Dave Barach9b8ffd92016-07-08 08:13:45 -0400983 clib_error_t *error;
984 mc_main_t *mcm;
Ed Warnickecb9cada2015-12-08 15:45:58 -0700985 u32 mtu;
986
987 mcm = &msm->mc_main;
988
989 /* 239.255.0.7 */
Dave Barach9b8ffd92016-07-08 08:13:45 -0400990 if (!msm->multicast_tx_ip4_address_host_byte_order)
Ed Warnickecb9cada2015-12-08 15:45:58 -0700991 msm->multicast_tx_ip4_address_host_byte_order = 0xefff0007;
992
993 {
994 u32 i, a, win;
995
996 win = 0;
997 if (msm->multicast_interface_name)
998 {
Dave Barach9b8ffd92016-07-08 08:13:45 -0400999 win =
1000 !find_interface_ip4_address (msm->multicast_interface_name, &a,
1001 &mtu);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001002 }
1003 else
1004 {
1005 for (i = 0; i < n_intfcs_to_probe; i++)
Dave Barach9b8ffd92016-07-08 08:13:45 -04001006 if (!find_interface_ip4_address (intfc_probe_list[i], &a, &mtu))
Ed Warnickecb9cada2015-12-08 15:45:58 -07001007 {
1008 win = 1;
1009 msm->multicast_interface_name = intfc_probe_list[i];
1010 break;
1011 }
1012 }
1013
Dave Barach9b8ffd92016-07-08 08:13:45 -04001014 if (!win)
Ed Warnickecb9cada2015-12-08 15:45:58 -07001015 return clib_error_return (0, "can't find interface ip4 address");
1016
1017 msm->if_ip4_address_net_byte_order = a;
1018 }
1019
1020 msm->rx_mtu_n_bytes = mtu;
Damjan Marion5de3fec2019-02-06 14:22:32 +01001021 msm->rx_mtu_n_buffers =
Damjan Marion8934a042019-02-09 23:29:26 +01001022 msm->rx_mtu_n_bytes / vlib_buffer_get_default_data_size (vm);
Damjan Marion5de3fec2019-02-06 14:22:32 +01001023 msm->rx_mtu_n_buffers +=
Damjan Marion8934a042019-02-09 23:29:26 +01001024 (msm->rx_mtu_n_bytes % vlib_buffer_get_default_data_size (vm)) != 0;
Ed Warnickecb9cada2015-12-08 15:45:58 -07001025
1026 error = socket_setup (msm);
1027 if (error)
1028 return error;
1029
1030 mcm->transport.our_ack_peer_id =
Dave Barach9b8ffd92016-07-08 08:13:45 -04001031 mc_socket_set_peer_id (msm->if_ip4_address_net_byte_order,
1032 msm->ack_udp_port);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001033
1034 mcm->transport.our_catchup_peer_id =
Dave Barach9b8ffd92016-07-08 08:13:45 -04001035 mc_socket_set_peer_id (msm->if_ip4_address_net_byte_order,
1036 msm->catchup_tcp_port);
Ed Warnickecb9cada2015-12-08 15:45:58 -07001037
1038 mcm->transport.tx_buffer = tx_buffer;
1039 mcm->transport.tx_ack = tx_ack;
1040 mcm->transport.catchup_request_fun = catchup_request_fun;
1041 mcm->transport.catchup_send_fun = catchup_send_fun;
1042 mcm->transport.format_peer_id = format_socket_peer_id;
1043 mcm->transport.opaque = msm;
1044 mcm->transport.max_packet_size = mtu;
1045
1046 mc_main_init (mcm, "socket");
1047
1048 return error;
1049}
Dave Barach9b8ffd92016-07-08 08:13:45 -04001050
1051/*
1052 * fd.io coding-style-patch-verification: ON
1053 *
1054 * Local Variables:
1055 * eval: (c-set-style "gnu")
1056 * End:
1057 */