Kyle Swenson | 7d38e03 | 2023-07-10 11:16:56 -0600 | [diff] [blame^] | 1 | // SPDX-License-Identifier: GPL-2.0 |
| 2 | /* |
| 3 | * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. |
| 4 | */ |
| 5 | |
| 6 | #include "queueing.h" |
| 7 | #include "timers.h" |
| 8 | #include "device.h" |
| 9 | #include "peer.h" |
| 10 | #include "socket.h" |
| 11 | #include "messages.h" |
| 12 | #include "cookie.h" |
| 13 | |
| 14 | #include <linux/simd.h> |
| 15 | #include <linux/uio.h> |
| 16 | #include <linux/inetdevice.h> |
| 17 | #include <linux/socket.h> |
| 18 | #include <net/ip_tunnels.h> |
| 19 | #include <net/udp.h> |
| 20 | #include <net/sock.h> |
| 21 | |
| 22 | static void wg_packet_send_handshake_initiation(struct wg_peer *peer) |
| 23 | { |
| 24 | struct message_handshake_initiation packet; |
| 25 | |
| 26 | if (!wg_birthdate_has_expired(atomic64_read(&peer->last_sent_handshake), |
| 27 | REKEY_TIMEOUT)) |
| 28 | return; /* This function is rate limited. */ |
| 29 | |
| 30 | atomic64_set(&peer->last_sent_handshake, ktime_get_coarse_boottime_ns()); |
| 31 | net_dbg_ratelimited("%s: Sending handshake initiation to peer %llu (%pISpfsc)\n", |
| 32 | peer->device->dev->name, peer->internal_id, |
| 33 | &peer->endpoint.addr); |
| 34 | |
| 35 | if (wg_noise_handshake_create_initiation(&packet, &peer->handshake)) { |
| 36 | wg_cookie_add_mac_to_packet(&packet, sizeof(packet), peer); |
| 37 | wg_timers_any_authenticated_packet_traversal(peer); |
| 38 | wg_timers_any_authenticated_packet_sent(peer); |
| 39 | atomic64_set(&peer->last_sent_handshake, |
| 40 | ktime_get_coarse_boottime_ns()); |
| 41 | wg_socket_send_buffer_to_peer(peer, &packet, sizeof(packet), |
| 42 | HANDSHAKE_DSCP); |
| 43 | wg_timers_handshake_initiated(peer); |
| 44 | } |
| 45 | } |
| 46 | |
| 47 | void wg_packet_handshake_send_worker(struct work_struct *work) |
| 48 | { |
| 49 | struct wg_peer *peer = container_of(work, struct wg_peer, |
| 50 | transmit_handshake_work); |
| 51 | |
| 52 | wg_packet_send_handshake_initiation(peer); |
| 53 | wg_peer_put(peer); |
| 54 | } |
| 55 | |
| 56 | void wg_packet_send_queued_handshake_initiation(struct wg_peer *peer, |
| 57 | bool is_retry) |
| 58 | { |
| 59 | if (!is_retry) |
| 60 | peer->timer_handshake_attempts = 0; |
| 61 | |
| 62 | rcu_read_lock_bh(); |
| 63 | /* We check last_sent_handshake here in addition to the actual function |
| 64 | * we're queueing up, so that we don't queue things if not strictly |
| 65 | * necessary: |
| 66 | */ |
| 67 | if (!wg_birthdate_has_expired(atomic64_read(&peer->last_sent_handshake), |
| 68 | REKEY_TIMEOUT) || |
| 69 | unlikely(READ_ONCE(peer->is_dead))) |
| 70 | goto out; |
| 71 | |
| 72 | wg_peer_get(peer); |
| 73 | /* Queues up calling packet_send_queued_handshakes(peer), where we do a |
| 74 | * peer_put(peer) after: |
| 75 | */ |
| 76 | if (!queue_work(peer->device->handshake_send_wq, |
| 77 | &peer->transmit_handshake_work)) |
| 78 | /* If the work was already queued, we want to drop the |
| 79 | * extra reference: |
| 80 | */ |
| 81 | wg_peer_put(peer); |
| 82 | out: |
| 83 | rcu_read_unlock_bh(); |
| 84 | } |
| 85 | |
| 86 | void wg_packet_send_handshake_response(struct wg_peer *peer) |
| 87 | { |
| 88 | struct message_handshake_response packet; |
| 89 | |
| 90 | atomic64_set(&peer->last_sent_handshake, ktime_get_coarse_boottime_ns()); |
| 91 | net_dbg_ratelimited("%s: Sending handshake response to peer %llu (%pISpfsc)\n", |
| 92 | peer->device->dev->name, peer->internal_id, |
| 93 | &peer->endpoint.addr); |
| 94 | |
| 95 | if (wg_noise_handshake_create_response(&packet, &peer->handshake)) { |
| 96 | wg_cookie_add_mac_to_packet(&packet, sizeof(packet), peer); |
| 97 | if (wg_noise_handshake_begin_session(&peer->handshake, |
| 98 | &peer->keypairs)) { |
| 99 | wg_timers_session_derived(peer); |
| 100 | wg_timers_any_authenticated_packet_traversal(peer); |
| 101 | wg_timers_any_authenticated_packet_sent(peer); |
| 102 | atomic64_set(&peer->last_sent_handshake, |
| 103 | ktime_get_coarse_boottime_ns()); |
| 104 | wg_socket_send_buffer_to_peer(peer, &packet, |
| 105 | sizeof(packet), |
| 106 | HANDSHAKE_DSCP); |
| 107 | } |
| 108 | } |
| 109 | } |
| 110 | |
| 111 | void wg_packet_send_handshake_cookie(struct wg_device *wg, |
| 112 | struct sk_buff *initiating_skb, |
| 113 | __le32 sender_index) |
| 114 | { |
| 115 | struct message_handshake_cookie packet; |
| 116 | |
| 117 | net_dbg_skb_ratelimited("%s: Sending cookie response for denied handshake message for %pISpfsc\n", |
| 118 | wg->dev->name, initiating_skb); |
| 119 | wg_cookie_message_create(&packet, initiating_skb, sender_index, |
| 120 | &wg->cookie_checker); |
| 121 | wg_socket_send_buffer_as_reply_to_skb(wg, initiating_skb, &packet, |
| 122 | sizeof(packet)); |
| 123 | } |
| 124 | |
| 125 | static void keep_key_fresh(struct wg_peer *peer) |
| 126 | { |
| 127 | struct noise_keypair *keypair; |
| 128 | bool send; |
| 129 | |
| 130 | rcu_read_lock_bh(); |
| 131 | keypair = rcu_dereference_bh(peer->keypairs.current_keypair); |
| 132 | send = keypair && READ_ONCE(keypair->sending.is_valid) && |
| 133 | (atomic64_read(&keypair->sending_counter) > REKEY_AFTER_MESSAGES || |
| 134 | (keypair->i_am_the_initiator && |
| 135 | wg_birthdate_has_expired(keypair->sending.birthdate, REKEY_AFTER_TIME))); |
| 136 | rcu_read_unlock_bh(); |
| 137 | |
| 138 | if (unlikely(send)) |
| 139 | wg_packet_send_queued_handshake_initiation(peer, false); |
| 140 | } |
| 141 | |
| 142 | static unsigned int calculate_skb_padding(struct sk_buff *skb) |
| 143 | { |
| 144 | unsigned int padded_size, last_unit = skb->len; |
| 145 | |
| 146 | if (unlikely(!PACKET_CB(skb)->mtu)) |
| 147 | return ALIGN(last_unit, MESSAGE_PADDING_MULTIPLE) - last_unit; |
| 148 | |
| 149 | /* We do this modulo business with the MTU, just in case the networking |
| 150 | * layer gives us a packet that's bigger than the MTU. In that case, we |
| 151 | * wouldn't want the final subtraction to overflow in the case of the |
| 152 | * padded_size being clamped. Fortunately, that's very rarely the case, |
| 153 | * so we optimize for that not happening. |
| 154 | */ |
| 155 | if (unlikely(last_unit > PACKET_CB(skb)->mtu)) |
| 156 | last_unit %= PACKET_CB(skb)->mtu; |
| 157 | |
| 158 | padded_size = min(PACKET_CB(skb)->mtu, |
| 159 | ALIGN(last_unit, MESSAGE_PADDING_MULTIPLE)); |
| 160 | return padded_size - last_unit; |
| 161 | } |
| 162 | |
| 163 | static bool encrypt_packet(struct sk_buff *skb, struct noise_keypair *keypair, |
| 164 | simd_context_t *simd_context) |
| 165 | { |
| 166 | unsigned int padding_len, plaintext_len, trailer_len; |
| 167 | struct scatterlist sg[MAX_SKB_FRAGS + 8]; |
| 168 | struct message_data *header; |
| 169 | struct sk_buff *trailer; |
| 170 | int num_frags; |
| 171 | |
| 172 | /* Force hash calculation before encryption so that flow analysis is |
| 173 | * consistent over the inner packet. |
| 174 | */ |
| 175 | skb_get_hash(skb); |
| 176 | |
| 177 | /* Calculate lengths. */ |
| 178 | padding_len = calculate_skb_padding(skb); |
| 179 | trailer_len = padding_len + noise_encrypted_len(0); |
| 180 | plaintext_len = skb->len + padding_len; |
| 181 | |
| 182 | /* Expand data section to have room for padding and auth tag. */ |
| 183 | num_frags = skb_cow_data(skb, trailer_len, &trailer); |
| 184 | if (unlikely(num_frags < 0 || num_frags > ARRAY_SIZE(sg))) |
| 185 | return false; |
| 186 | |
| 187 | /* Set the padding to zeros, and make sure it and the auth tag are part |
| 188 | * of the skb. |
| 189 | */ |
| 190 | memset(skb_tail_pointer(trailer), 0, padding_len); |
| 191 | |
| 192 | /* Expand head section to have room for our header and the network |
| 193 | * stack's headers. |
| 194 | */ |
| 195 | if (unlikely(skb_cow_head(skb, DATA_PACKET_HEAD_ROOM) < 0)) |
| 196 | return false; |
| 197 | |
| 198 | /* Finalize checksum calculation for the inner packet, if required. */ |
| 199 | if (unlikely(skb->ip_summed == CHECKSUM_PARTIAL && |
| 200 | skb_checksum_help(skb))) |
| 201 | return false; |
| 202 | |
| 203 | /* Only after checksumming can we safely add on the padding at the end |
| 204 | * and the header. |
| 205 | */ |
| 206 | skb_set_inner_network_header(skb, 0); |
| 207 | header = (struct message_data *)skb_push(skb, sizeof(*header)); |
| 208 | header->header.type = cpu_to_le32(MESSAGE_DATA); |
| 209 | header->key_idx = keypair->remote_index; |
| 210 | header->counter = cpu_to_le64(PACKET_CB(skb)->nonce); |
| 211 | pskb_put(skb, trailer, trailer_len); |
| 212 | |
| 213 | /* Now we can encrypt the scattergather segments */ |
| 214 | sg_init_table(sg, num_frags); |
| 215 | if (skb_to_sgvec(skb, sg, sizeof(struct message_data), |
| 216 | noise_encrypted_len(plaintext_len)) <= 0) |
| 217 | return false; |
| 218 | return chacha20poly1305_encrypt_sg_inplace(sg, plaintext_len, NULL, 0, |
| 219 | PACKET_CB(skb)->nonce, |
| 220 | keypair->sending.key, |
| 221 | simd_context); |
| 222 | } |
| 223 | |
| 224 | void wg_packet_send_keepalive(struct wg_peer *peer) |
| 225 | { |
| 226 | struct sk_buff *skb; |
| 227 | |
| 228 | if (skb_queue_empty(&peer->staged_packet_queue)) { |
| 229 | skb = alloc_skb(DATA_PACKET_HEAD_ROOM + MESSAGE_MINIMUM_LENGTH, |
| 230 | GFP_ATOMIC); |
| 231 | if (unlikely(!skb)) |
| 232 | return; |
| 233 | skb_reserve(skb, DATA_PACKET_HEAD_ROOM); |
| 234 | skb->dev = peer->device->dev; |
| 235 | PACKET_CB(skb)->mtu = skb->dev->mtu; |
| 236 | skb_queue_tail(&peer->staged_packet_queue, skb); |
| 237 | net_dbg_ratelimited("%s: Sending keepalive packet to peer %llu (%pISpfsc)\n", |
| 238 | peer->device->dev->name, peer->internal_id, |
| 239 | &peer->endpoint.addr); |
| 240 | } |
| 241 | |
| 242 | wg_packet_send_staged_packets(peer); |
| 243 | } |
| 244 | |
| 245 | static void wg_packet_create_data_done(struct wg_peer *peer, struct sk_buff *first) |
| 246 | { |
| 247 | struct sk_buff *skb, *next; |
| 248 | bool is_keepalive, data_sent = false; |
| 249 | |
| 250 | wg_timers_any_authenticated_packet_traversal(peer); |
| 251 | wg_timers_any_authenticated_packet_sent(peer); |
| 252 | skb_list_walk_safe(first, skb, next) { |
| 253 | is_keepalive = skb->len == message_data_len(0); |
| 254 | if (likely(!wg_socket_send_skb_to_peer(peer, skb, |
| 255 | PACKET_CB(skb)->ds) && !is_keepalive)) |
| 256 | data_sent = true; |
| 257 | } |
| 258 | |
| 259 | if (likely(data_sent)) |
| 260 | wg_timers_data_sent(peer); |
| 261 | |
| 262 | keep_key_fresh(peer); |
| 263 | } |
| 264 | |
| 265 | void wg_packet_tx_worker(struct work_struct *work) |
| 266 | { |
| 267 | struct wg_peer *peer = container_of(work, struct wg_peer, transmit_packet_work); |
| 268 | struct noise_keypair *keypair; |
| 269 | enum packet_state state; |
| 270 | struct sk_buff *first; |
| 271 | |
| 272 | while ((first = wg_prev_queue_peek(&peer->tx_queue)) != NULL && |
| 273 | (state = atomic_read_acquire(&PACKET_CB(first)->state)) != |
| 274 | PACKET_STATE_UNCRYPTED) { |
| 275 | wg_prev_queue_drop_peeked(&peer->tx_queue); |
| 276 | keypair = PACKET_CB(first)->keypair; |
| 277 | |
| 278 | if (likely(state == PACKET_STATE_CRYPTED)) |
| 279 | wg_packet_create_data_done(peer, first); |
| 280 | else |
| 281 | kfree_skb_list(first); |
| 282 | |
| 283 | wg_noise_keypair_put(keypair, false); |
| 284 | wg_peer_put(peer); |
| 285 | if (need_resched()) |
| 286 | cond_resched(); |
| 287 | } |
| 288 | } |
| 289 | |
| 290 | void wg_packet_encrypt_worker(struct work_struct *work) |
| 291 | { |
| 292 | struct crypt_queue *queue = container_of(work, struct multicore_worker, |
| 293 | work)->ptr; |
| 294 | struct sk_buff *first, *skb, *next; |
| 295 | simd_context_t simd_context; |
| 296 | |
| 297 | simd_get(&simd_context); |
| 298 | while ((first = ptr_ring_consume_bh(&queue->ring)) != NULL) { |
| 299 | enum packet_state state = PACKET_STATE_CRYPTED; |
| 300 | |
| 301 | skb_list_walk_safe(first, skb, next) { |
| 302 | if (likely(encrypt_packet(skb, |
| 303 | PACKET_CB(first)->keypair, |
| 304 | &simd_context))) { |
| 305 | wg_reset_packet(skb, true); |
| 306 | } else { |
| 307 | state = PACKET_STATE_DEAD; |
| 308 | break; |
| 309 | } |
| 310 | } |
| 311 | wg_queue_enqueue_per_peer_tx(first, state); |
| 312 | |
| 313 | simd_relax(&simd_context); |
| 314 | } |
| 315 | simd_put(&simd_context); |
| 316 | } |
| 317 | |
| 318 | static void wg_packet_create_data(struct wg_peer *peer, struct sk_buff *first) |
| 319 | { |
| 320 | struct wg_device *wg = peer->device; |
| 321 | int ret = -EINVAL; |
| 322 | |
| 323 | rcu_read_lock_bh(); |
| 324 | if (unlikely(READ_ONCE(peer->is_dead))) |
| 325 | goto err; |
| 326 | |
| 327 | ret = wg_queue_enqueue_per_device_and_peer(&wg->encrypt_queue, &peer->tx_queue, first, |
| 328 | wg->packet_crypt_wq, &wg->encrypt_queue.last_cpu); |
| 329 | if (unlikely(ret == -EPIPE)) |
| 330 | wg_queue_enqueue_per_peer_tx(first, PACKET_STATE_DEAD); |
| 331 | err: |
| 332 | rcu_read_unlock_bh(); |
| 333 | if (likely(!ret || ret == -EPIPE)) |
| 334 | return; |
| 335 | wg_noise_keypair_put(PACKET_CB(first)->keypair, false); |
| 336 | wg_peer_put(peer); |
| 337 | kfree_skb_list(first); |
| 338 | } |
| 339 | |
| 340 | void wg_packet_purge_staged_packets(struct wg_peer *peer) |
| 341 | { |
| 342 | spin_lock_bh(&peer->staged_packet_queue.lock); |
| 343 | peer->device->dev->stats.tx_dropped += peer->staged_packet_queue.qlen; |
| 344 | __skb_queue_purge(&peer->staged_packet_queue); |
| 345 | spin_unlock_bh(&peer->staged_packet_queue.lock); |
| 346 | } |
| 347 | |
| 348 | void wg_packet_send_staged_packets(struct wg_peer *peer) |
| 349 | { |
| 350 | struct noise_keypair *keypair; |
| 351 | struct sk_buff_head packets; |
| 352 | struct sk_buff *skb; |
| 353 | |
| 354 | /* Steal the current queue into our local one. */ |
| 355 | __skb_queue_head_init(&packets); |
| 356 | spin_lock_bh(&peer->staged_packet_queue.lock); |
| 357 | skb_queue_splice_init(&peer->staged_packet_queue, &packets); |
| 358 | spin_unlock_bh(&peer->staged_packet_queue.lock); |
| 359 | if (unlikely(skb_queue_empty(&packets))) |
| 360 | return; |
| 361 | |
| 362 | /* First we make sure we have a valid reference to a valid key. */ |
| 363 | rcu_read_lock_bh(); |
| 364 | keypair = wg_noise_keypair_get( |
| 365 | rcu_dereference_bh(peer->keypairs.current_keypair)); |
| 366 | rcu_read_unlock_bh(); |
| 367 | if (unlikely(!keypair)) |
| 368 | goto out_nokey; |
| 369 | if (unlikely(!READ_ONCE(keypair->sending.is_valid))) |
| 370 | goto out_nokey; |
| 371 | if (unlikely(wg_birthdate_has_expired(keypair->sending.birthdate, |
| 372 | REJECT_AFTER_TIME))) |
| 373 | goto out_invalid; |
| 374 | |
| 375 | /* After we know we have a somewhat valid key, we now try to assign |
| 376 | * nonces to all of the packets in the queue. If we can't assign nonces |
| 377 | * for all of them, we just consider it a failure and wait for the next |
| 378 | * handshake. |
| 379 | */ |
| 380 | skb_queue_walk(&packets, skb) { |
| 381 | /* 0 for no outer TOS: no leak. TODO: at some later point, we |
| 382 | * might consider using flowi->tos as outer instead. |
| 383 | */ |
| 384 | PACKET_CB(skb)->ds = ip_tunnel_ecn_encap(0, ip_hdr(skb), skb); |
| 385 | PACKET_CB(skb)->nonce = |
| 386 | atomic64_inc_return(&keypair->sending_counter) - 1; |
| 387 | if (unlikely(PACKET_CB(skb)->nonce >= REJECT_AFTER_MESSAGES)) |
| 388 | goto out_invalid; |
| 389 | } |
| 390 | |
| 391 | packets.prev->next = NULL; |
| 392 | wg_peer_get(keypair->entry.peer); |
| 393 | PACKET_CB(packets.next)->keypair = keypair; |
| 394 | wg_packet_create_data(peer, packets.next); |
| 395 | return; |
| 396 | |
| 397 | out_invalid: |
| 398 | WRITE_ONCE(keypair->sending.is_valid, false); |
| 399 | out_nokey: |
| 400 | wg_noise_keypair_put(keypair, false); |
| 401 | |
| 402 | /* We orphan the packets if we're waiting on a handshake, so that they |
| 403 | * don't block a socket's pool. |
| 404 | */ |
| 405 | skb_queue_walk(&packets, skb) |
| 406 | skb_orphan(skb); |
| 407 | /* Then we put them back on the top of the queue. We're not too |
| 408 | * concerned about accidentally getting things a little out of order if |
| 409 | * packets are being added really fast, because this queue is for before |
| 410 | * packets can even be sent and it's small anyway. |
| 411 | */ |
| 412 | spin_lock_bh(&peer->staged_packet_queue.lock); |
| 413 | skb_queue_splice(&packets, &peer->staged_packet_queue); |
| 414 | spin_unlock_bh(&peer->staged_packet_queue.lock); |
| 415 | |
| 416 | /* If we're exiting because there's something wrong with the key, it |
| 417 | * means we should initiate a new handshake. |
| 418 | */ |
| 419 | wg_packet_send_queued_handshake_initiation(peer, false); |
| 420 | } |