blob: 37238dffd947611b771f1ba394b9bf7fb95671d4 [file] [log] [blame]
Kyle Swenson8d8f6542021-03-15 11:02:55 -06001/*
2 * Copyright (c) 2009, Microsoft Corporation.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15 * Place - Suite 330, Boston, MA 02111-1307 USA.
16 *
17 * Authors:
18 * Haiyang Zhang <haiyangz@microsoft.com>
19 * Hank Janssen <hjanssen@microsoft.com>
20 */
21#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
22
23#include <linux/kernel.h>
24#include <linux/sched.h>
25#include <linux/wait.h>
26#include <linux/mm.h>
27#include <linux/slab.h>
28#include <linux/list.h>
29#include <linux/module.h>
30#include <linux/completion.h>
31#include <linux/delay.h>
32#include <linux/hyperv.h>
33
34#include "hyperv_vmbus.h"
35
36static void init_vp_index(struct vmbus_channel *channel,
37 const uuid_le *type_guid);
38
39/**
40 * vmbus_prep_negotiate_resp() - Create default response for Hyper-V Negotiate message
41 * @icmsghdrp: Pointer to msg header structure
42 * @icmsg_negotiate: Pointer to negotiate message structure
43 * @buf: Raw buffer channel data
44 *
45 * @icmsghdrp is of type &struct icmsg_hdr.
46 * @negop is of type &struct icmsg_negotiate.
47 * Set up and fill in default negotiate response message.
48 *
49 * The fw_version specifies the framework version that
50 * we can support and srv_version specifies the service
51 * version we can support.
52 *
53 * Mainly used by Hyper-V drivers.
54 */
55bool vmbus_prep_negotiate_resp(struct icmsg_hdr *icmsghdrp,
56 struct icmsg_negotiate *negop, u8 *buf,
57 int fw_version, int srv_version)
58{
59 int icframe_major, icframe_minor;
60 int icmsg_major, icmsg_minor;
61 int fw_major, fw_minor;
62 int srv_major, srv_minor;
63 int i;
64 bool found_match = false;
65
66 icmsghdrp->icmsgsize = 0x10;
67 fw_major = (fw_version >> 16);
68 fw_minor = (fw_version & 0xFFFF);
69
70 srv_major = (srv_version >> 16);
71 srv_minor = (srv_version & 0xFFFF);
72
73 negop = (struct icmsg_negotiate *)&buf[
74 sizeof(struct vmbuspipe_hdr) +
75 sizeof(struct icmsg_hdr)];
76
77 icframe_major = negop->icframe_vercnt;
78 icframe_minor = 0;
79
80 icmsg_major = negop->icmsg_vercnt;
81 icmsg_minor = 0;
82
83 /*
84 * Select the framework version number we will
85 * support.
86 */
87
88 for (i = 0; i < negop->icframe_vercnt; i++) {
89 if ((negop->icversion_data[i].major == fw_major) &&
90 (negop->icversion_data[i].minor == fw_minor)) {
91 icframe_major = negop->icversion_data[i].major;
92 icframe_minor = negop->icversion_data[i].minor;
93 found_match = true;
94 }
95 }
96
97 if (!found_match)
98 goto fw_error;
99
100 found_match = false;
101
102 for (i = negop->icframe_vercnt;
103 (i < negop->icframe_vercnt + negop->icmsg_vercnt); i++) {
104 if ((negop->icversion_data[i].major == srv_major) &&
105 (negop->icversion_data[i].minor == srv_minor)) {
106 icmsg_major = negop->icversion_data[i].major;
107 icmsg_minor = negop->icversion_data[i].minor;
108 found_match = true;
109 }
110 }
111
112 /*
113 * Respond with the framework and service
114 * version numbers we can support.
115 */
116
117fw_error:
118 if (!found_match) {
119 negop->icframe_vercnt = 0;
120 negop->icmsg_vercnt = 0;
121 } else {
122 negop->icframe_vercnt = 1;
123 negop->icmsg_vercnt = 1;
124 }
125
126 negop->icversion_data[0].major = icframe_major;
127 negop->icversion_data[0].minor = icframe_minor;
128 negop->icversion_data[1].major = icmsg_major;
129 negop->icversion_data[1].minor = icmsg_minor;
130 return found_match;
131}
132
133EXPORT_SYMBOL_GPL(vmbus_prep_negotiate_resp);
134
135/*
136 * alloc_channel - Allocate and initialize a vmbus channel object
137 */
138static struct vmbus_channel *alloc_channel(void)
139{
140 static atomic_t chan_num = ATOMIC_INIT(0);
141 struct vmbus_channel *channel;
142
143 channel = kzalloc(sizeof(*channel), GFP_ATOMIC);
144 if (!channel)
145 return NULL;
146
147 channel->id = atomic_inc_return(&chan_num);
148 spin_lock_init(&channel->inbound_lock);
149 spin_lock_init(&channel->lock);
150
151 INIT_LIST_HEAD(&channel->sc_list);
152 INIT_LIST_HEAD(&channel->percpu_list);
153
154 return channel;
155}
156
157/*
158 * free_channel - Release the resources used by the vmbus channel object
159 */
160static void free_channel(struct vmbus_channel *channel)
161{
162 kfree(channel);
163}
164
165static void percpu_channel_enq(void *arg)
166{
167 struct vmbus_channel *channel = arg;
168 int cpu = smp_processor_id();
169
170 list_add_tail(&channel->percpu_list, &hv_context.percpu_list[cpu]);
171}
172
173static void percpu_channel_deq(void *arg)
174{
175 struct vmbus_channel *channel = arg;
176
177 list_del(&channel->percpu_list);
178}
179
180
181void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid)
182{
183 struct vmbus_channel_relid_released msg;
184 unsigned long flags;
185 struct vmbus_channel *primary_channel;
186
187 memset(&msg, 0, sizeof(struct vmbus_channel_relid_released));
188 msg.child_relid = relid;
189 msg.header.msgtype = CHANNELMSG_RELID_RELEASED;
190 vmbus_post_msg(&msg, sizeof(struct vmbus_channel_relid_released));
191
192 if (channel == NULL)
193 return;
194
195 BUG_ON(!channel->rescind);
196
197 if (channel->target_cpu != get_cpu()) {
198 put_cpu();
199 smp_call_function_single(channel->target_cpu,
200 percpu_channel_deq, channel, true);
201 } else {
202 percpu_channel_deq(channel);
203 put_cpu();
204 }
205
206 if (channel->primary_channel == NULL) {
207 spin_lock_irqsave(&vmbus_connection.channel_lock, flags);
208 list_del(&channel->listentry);
209 spin_unlock_irqrestore(&vmbus_connection.channel_lock, flags);
210
211 primary_channel = channel;
212 } else {
213 primary_channel = channel->primary_channel;
214 spin_lock_irqsave(&primary_channel->lock, flags);
215 list_del(&channel->sc_list);
216 primary_channel->num_sc--;
217 spin_unlock_irqrestore(&primary_channel->lock, flags);
218 }
219
220 /*
221 * We need to free the bit for init_vp_index() to work in the case
222 * of sub-channel, when we reload drivers like hv_netvsc.
223 */
224 cpumask_clear_cpu(channel->target_cpu,
225 &primary_channel->alloced_cpus_in_node);
226
227 free_channel(channel);
228}
229
230void vmbus_free_channels(void)
231{
232 struct vmbus_channel *channel, *tmp;
233
234 list_for_each_entry_safe(channel, tmp, &vmbus_connection.chn_list,
235 listentry) {
236 /* hv_process_channel_removal() needs this */
237 channel->rescind = true;
238
239 vmbus_device_unregister(channel->device_obj);
240 }
241}
242
243/*
244 * vmbus_process_offer - Process the offer by creating a channel/device
245 * associated with this offer
246 */
247static void vmbus_process_offer(struct vmbus_channel *newchannel)
248{
249 struct vmbus_channel *channel;
250 bool fnew = true;
251 unsigned long flags;
252
253 /* Make sure this is a new offer */
254 spin_lock_irqsave(&vmbus_connection.channel_lock, flags);
255
256 list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
257 if (!uuid_le_cmp(channel->offermsg.offer.if_type,
258 newchannel->offermsg.offer.if_type) &&
259 !uuid_le_cmp(channel->offermsg.offer.if_instance,
260 newchannel->offermsg.offer.if_instance)) {
261 fnew = false;
262 break;
263 }
264 }
265
266 if (fnew)
267 list_add_tail(&newchannel->listentry,
268 &vmbus_connection.chn_list);
269
270 spin_unlock_irqrestore(&vmbus_connection.channel_lock, flags);
271
272 if (!fnew) {
273 /*
274 * Check to see if this is a sub-channel.
275 */
276 if (newchannel->offermsg.offer.sub_channel_index != 0) {
277 /*
278 * Process the sub-channel.
279 */
280 newchannel->primary_channel = channel;
281 spin_lock_irqsave(&channel->lock, flags);
282 list_add_tail(&newchannel->sc_list, &channel->sc_list);
283 channel->num_sc++;
284 spin_unlock_irqrestore(&channel->lock, flags);
285 } else
286 goto err_free_chan;
287 }
288
289 init_vp_index(newchannel, &newchannel->offermsg.offer.if_type);
290
291 if (newchannel->target_cpu != get_cpu()) {
292 put_cpu();
293 smp_call_function_single(newchannel->target_cpu,
294 percpu_channel_enq,
295 newchannel, true);
296 } else {
297 percpu_channel_enq(newchannel);
298 put_cpu();
299 }
300
301 /*
302 * This state is used to indicate a successful open
303 * so that when we do close the channel normally, we
304 * can cleanup properly
305 */
306 newchannel->state = CHANNEL_OPEN_STATE;
307
308 if (!fnew) {
309 if (channel->sc_creation_callback != NULL)
310 channel->sc_creation_callback(newchannel);
311 return;
312 }
313
314 /*
315 * Start the process of binding this offer to the driver
316 * We need to set the DeviceObject field before calling
317 * vmbus_child_dev_add()
318 */
319 newchannel->device_obj = vmbus_device_create(
320 &newchannel->offermsg.offer.if_type,
321 &newchannel->offermsg.offer.if_instance,
322 newchannel);
323 if (!newchannel->device_obj)
324 goto err_deq_chan;
325
326 /*
327 * Add the new device to the bus. This will kick off device-driver
328 * binding which eventually invokes the device driver's AddDevice()
329 * method.
330 */
331 if (vmbus_device_register(newchannel->device_obj) != 0) {
332 pr_err("unable to add child device object (relid %d)\n",
333 newchannel->offermsg.child_relid);
334 kfree(newchannel->device_obj);
335 goto err_deq_chan;
336 }
337 return;
338
339err_deq_chan:
340 spin_lock_irqsave(&vmbus_connection.channel_lock, flags);
341 list_del(&newchannel->listentry);
342 spin_unlock_irqrestore(&vmbus_connection.channel_lock, flags);
343
344 if (newchannel->target_cpu != get_cpu()) {
345 put_cpu();
346 smp_call_function_single(newchannel->target_cpu,
347 percpu_channel_deq, newchannel, true);
348 } else {
349 percpu_channel_deq(newchannel);
350 put_cpu();
351 }
352
353err_free_chan:
354 free_channel(newchannel);
355}
356
357enum {
358 IDE = 0,
359 SCSI,
360 NIC,
361 ND_NIC,
362 MAX_PERF_CHN,
363};
364
365/*
366 * This is an array of device_ids (device types) that are performance critical.
367 * We attempt to distribute the interrupt load for these devices across
368 * all available CPUs.
369 */
370static const struct hv_vmbus_device_id hp_devs[] = {
371 /* IDE */
372 { HV_IDE_GUID, },
373 /* Storage - SCSI */
374 { HV_SCSI_GUID, },
375 /* Network */
376 { HV_NIC_GUID, },
377 /* NetworkDirect Guest RDMA */
378 { HV_ND_GUID, },
379};
380
381
382/*
383 * We use this state to statically distribute the channel interrupt load.
384 */
385static int next_numa_node_id;
386
387/*
388 * Starting with Win8, we can statically distribute the incoming
389 * channel interrupt load by binding a channel to VCPU.
390 * We do this in a hierarchical fashion:
391 * First distribute the primary channels across available NUMA nodes
392 * and then distribute the subchannels amongst the CPUs in the NUMA
393 * node assigned to the primary channel.
394 *
395 * For pre-win8 hosts or non-performance critical channels we assign the
396 * first CPU in the first NUMA node.
397 */
398static void init_vp_index(struct vmbus_channel *channel, const uuid_le *type_guid)
399{
400 u32 cur_cpu;
401 int i;
402 bool perf_chn = false;
403 struct vmbus_channel *primary = channel->primary_channel;
404 int next_node;
405 struct cpumask available_mask;
406 struct cpumask *alloced_mask;
407
408 for (i = IDE; i < MAX_PERF_CHN; i++) {
409 if (!memcmp(type_guid->b, hp_devs[i].guid,
410 sizeof(uuid_le))) {
411 perf_chn = true;
412 break;
413 }
414 }
415 if ((vmbus_proto_version == VERSION_WS2008) ||
416 (vmbus_proto_version == VERSION_WIN7) || (!perf_chn)) {
417 /*
418 * Prior to win8, all channel interrupts are
419 * delivered on cpu 0.
420 * Also if the channel is not a performance critical
421 * channel, bind it to cpu 0.
422 */
423 channel->numa_node = 0;
424 channel->target_cpu = 0;
425 channel->target_vp = hv_context.vp_index[0];
426 return;
427 }
428
429 /*
430 * We distribute primary channels evenly across all the available
431 * NUMA nodes and within the assigned NUMA node we will assign the
432 * first available CPU to the primary channel.
433 * The sub-channels will be assigned to the CPUs available in the
434 * NUMA node evenly.
435 */
436 if (!primary) {
437 while (true) {
438 next_node = next_numa_node_id++;
439 if (next_node == nr_node_ids)
440 next_node = next_numa_node_id = 0;
441 if (cpumask_empty(cpumask_of_node(next_node)))
442 continue;
443 break;
444 }
445 channel->numa_node = next_node;
446 primary = channel;
447 }
448 alloced_mask = &hv_context.hv_numa_map[primary->numa_node];
449
450 if (cpumask_weight(alloced_mask) ==
451 cpumask_weight(cpumask_of_node(primary->numa_node))) {
452 /*
453 * We have cycled through all the CPUs in the node;
454 * reset the alloced map.
455 */
456 cpumask_clear(alloced_mask);
457 }
458
459 cpumask_xor(&available_mask, alloced_mask,
460 cpumask_of_node(primary->numa_node));
461
462 cur_cpu = -1;
463
464 /*
465 * Normally Hyper-V host doesn't create more subchannels than there
466 * are VCPUs on the node but it is possible when not all present VCPUs
467 * on the node are initialized by guest. Clear the alloced_cpus_in_node
468 * to start over.
469 */
470 if (cpumask_equal(&primary->alloced_cpus_in_node,
471 cpumask_of_node(primary->numa_node)))
472 cpumask_clear(&primary->alloced_cpus_in_node);
473
474 while (true) {
475 cur_cpu = cpumask_next(cur_cpu, &available_mask);
476 if (cur_cpu >= nr_cpu_ids) {
477 cur_cpu = -1;
478 cpumask_copy(&available_mask,
479 cpumask_of_node(primary->numa_node));
480 continue;
481 }
482
483 /*
484 * NOTE: in the case of sub-channel, we clear the sub-channel
485 * related bit(s) in primary->alloced_cpus_in_node in
486 * hv_process_channel_removal(), so when we reload drivers
487 * like hv_netvsc in SMP guest, here we're able to re-allocate
488 * bit from primary->alloced_cpus_in_node.
489 */
490 if (!cpumask_test_cpu(cur_cpu,
491 &primary->alloced_cpus_in_node)) {
492 cpumask_set_cpu(cur_cpu,
493 &primary->alloced_cpus_in_node);
494 cpumask_set_cpu(cur_cpu, alloced_mask);
495 break;
496 }
497 }
498
499 channel->target_cpu = cur_cpu;
500 channel->target_vp = hv_context.vp_index[cur_cpu];
501}
502
503static void vmbus_wait_for_unload(void)
504{
505 int cpu = smp_processor_id();
506 void *page_addr = hv_context.synic_message_page[cpu];
507 struct hv_message *msg = (struct hv_message *)page_addr +
508 VMBUS_MESSAGE_SINT;
509 struct vmbus_channel_message_header *hdr;
510 bool unloaded = false;
511
512 while (1) {
513 if (msg->header.message_type == HVMSG_NONE) {
514 mdelay(10);
515 continue;
516 }
517
518 hdr = (struct vmbus_channel_message_header *)msg->u.payload;
519 if (hdr->msgtype == CHANNELMSG_UNLOAD_RESPONSE)
520 unloaded = true;
521
522 msg->header.message_type = HVMSG_NONE;
523 /*
524 * header.message_type needs to be written before we do
525 * wrmsrl() below.
526 */
527 mb();
528
529 if (msg->header.message_flags.msg_pending)
530 wrmsrl(HV_X64_MSR_EOM, 0);
531
532 if (unloaded)
533 break;
534 }
535}
536
537/*
538 * vmbus_unload_response - Handler for the unload response.
539 */
540static void vmbus_unload_response(struct vmbus_channel_message_header *hdr)
541{
542 /*
543 * This is a global event; just wakeup the waiting thread.
544 * Once we successfully unload, we can cleanup the monitor state.
545 */
546 complete(&vmbus_connection.unload_event);
547}
548
549void vmbus_initiate_unload(void)
550{
551 struct vmbus_channel_message_header hdr;
552
553 /* Pre-Win2012R2 hosts don't support reconnect */
554 if (vmbus_proto_version < VERSION_WIN8_1)
555 return;
556
557 init_completion(&vmbus_connection.unload_event);
558 memset(&hdr, 0, sizeof(struct vmbus_channel_message_header));
559 hdr.msgtype = CHANNELMSG_UNLOAD;
560 vmbus_post_msg(&hdr, sizeof(struct vmbus_channel_message_header));
561
562 /*
563 * vmbus_initiate_unload() is also called on crash and the crash can be
564 * happening in an interrupt context, where scheduling is impossible.
565 */
566 if (!in_interrupt())
567 wait_for_completion(&vmbus_connection.unload_event);
568 else
569 vmbus_wait_for_unload();
570}
571
572/*
573 * vmbus_onoffer - Handler for channel offers from vmbus in parent partition.
574 *
575 */
576static void vmbus_onoffer(struct vmbus_channel_message_header *hdr)
577{
578 struct vmbus_channel_offer_channel *offer;
579 struct vmbus_channel *newchannel;
580
581 offer = (struct vmbus_channel_offer_channel *)hdr;
582
583 /* Allocate the channel object and save this offer. */
584 newchannel = alloc_channel();
585 if (!newchannel) {
586 pr_err("Unable to allocate channel object\n");
587 return;
588 }
589
590 /*
591 * By default we setup state to enable batched
592 * reading. A specific service can choose to
593 * disable this prior to opening the channel.
594 */
595 newchannel->batched_reading = true;
596
597 /*
598 * Setup state for signalling the host.
599 */
600 newchannel->sig_event = (struct hv_input_signal_event *)
601 (ALIGN((unsigned long)
602 &newchannel->sig_buf,
603 HV_HYPERCALL_PARAM_ALIGN));
604
605 newchannel->sig_event->connectionid.asu32 = 0;
606 newchannel->sig_event->connectionid.u.id = VMBUS_EVENT_CONNECTION_ID;
607 newchannel->sig_event->flag_number = 0;
608 newchannel->sig_event->rsvdz = 0;
609
610 if (vmbus_proto_version != VERSION_WS2008) {
611 newchannel->is_dedicated_interrupt =
612 (offer->is_dedicated_interrupt != 0);
613 newchannel->sig_event->connectionid.u.id =
614 offer->connection_id;
615 }
616
617 memcpy(&newchannel->offermsg, offer,
618 sizeof(struct vmbus_channel_offer_channel));
619 newchannel->monitor_grp = (u8)offer->monitorid / 32;
620 newchannel->monitor_bit = (u8)offer->monitorid % 32;
621
622 vmbus_process_offer(newchannel);
623}
624
625/*
626 * vmbus_onoffer_rescind - Rescind offer handler.
627 *
628 * We queue a work item to process this offer synchronously
629 */
630static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr)
631{
632 struct vmbus_channel_rescind_offer *rescind;
633 struct vmbus_channel *channel;
634 unsigned long flags;
635 struct device *dev;
636
637 rescind = (struct vmbus_channel_rescind_offer *)hdr;
638 channel = relid2channel(rescind->child_relid);
639
640 if (channel == NULL) {
641 hv_process_channel_removal(NULL, rescind->child_relid);
642 return;
643 }
644
645 spin_lock_irqsave(&channel->lock, flags);
646 channel->rescind = true;
647 spin_unlock_irqrestore(&channel->lock, flags);
648
649 if (channel->device_obj) {
650 /*
651 * We will have to unregister this device from the
652 * driver core.
653 */
654 dev = get_device(&channel->device_obj->device);
655 if (dev) {
656 vmbus_device_unregister(channel->device_obj);
657 put_device(dev);
658 }
659 } else {
660 hv_process_channel_removal(channel,
661 channel->offermsg.child_relid);
662 }
663}
664
665/*
666 * vmbus_onoffers_delivered -
667 * This is invoked when all offers have been delivered.
668 *
669 * Nothing to do here.
670 */
671static void vmbus_onoffers_delivered(
672 struct vmbus_channel_message_header *hdr)
673{
674}
675
676/*
677 * vmbus_onopen_result - Open result handler.
678 *
679 * This is invoked when we received a response to our channel open request.
680 * Find the matching request, copy the response and signal the requesting
681 * thread.
682 */
683static void vmbus_onopen_result(struct vmbus_channel_message_header *hdr)
684{
685 struct vmbus_channel_open_result *result;
686 struct vmbus_channel_msginfo *msginfo;
687 struct vmbus_channel_message_header *requestheader;
688 struct vmbus_channel_open_channel *openmsg;
689 unsigned long flags;
690
691 result = (struct vmbus_channel_open_result *)hdr;
692
693 /*
694 * Find the open msg, copy the result and signal/unblock the wait event
695 */
696 spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
697
698 list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
699 msglistentry) {
700 requestheader =
701 (struct vmbus_channel_message_header *)msginfo->msg;
702
703 if (requestheader->msgtype == CHANNELMSG_OPENCHANNEL) {
704 openmsg =
705 (struct vmbus_channel_open_channel *)msginfo->msg;
706 if (openmsg->child_relid == result->child_relid &&
707 openmsg->openid == result->openid) {
708 memcpy(&msginfo->response.open_result,
709 result,
710 sizeof(
711 struct vmbus_channel_open_result));
712 complete(&msginfo->waitevent);
713 break;
714 }
715 }
716 }
717 spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
718}
719
720/*
721 * vmbus_ongpadl_created - GPADL created handler.
722 *
723 * This is invoked when we received a response to our gpadl create request.
724 * Find the matching request, copy the response and signal the requesting
725 * thread.
726 */
727static void vmbus_ongpadl_created(struct vmbus_channel_message_header *hdr)
728{
729 struct vmbus_channel_gpadl_created *gpadlcreated;
730 struct vmbus_channel_msginfo *msginfo;
731 struct vmbus_channel_message_header *requestheader;
732 struct vmbus_channel_gpadl_header *gpadlheader;
733 unsigned long flags;
734
735 gpadlcreated = (struct vmbus_channel_gpadl_created *)hdr;
736
737 /*
738 * Find the establish msg, copy the result and signal/unblock the wait
739 * event
740 */
741 spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
742
743 list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
744 msglistentry) {
745 requestheader =
746 (struct vmbus_channel_message_header *)msginfo->msg;
747
748 if (requestheader->msgtype == CHANNELMSG_GPADL_HEADER) {
749 gpadlheader =
750 (struct vmbus_channel_gpadl_header *)requestheader;
751
752 if ((gpadlcreated->child_relid ==
753 gpadlheader->child_relid) &&
754 (gpadlcreated->gpadl == gpadlheader->gpadl)) {
755 memcpy(&msginfo->response.gpadl_created,
756 gpadlcreated,
757 sizeof(
758 struct vmbus_channel_gpadl_created));
759 complete(&msginfo->waitevent);
760 break;
761 }
762 }
763 }
764 spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
765}
766
767/*
768 * vmbus_ongpadl_torndown - GPADL torndown handler.
769 *
770 * This is invoked when we received a response to our gpadl teardown request.
771 * Find the matching request, copy the response and signal the requesting
772 * thread.
773 */
774static void vmbus_ongpadl_torndown(
775 struct vmbus_channel_message_header *hdr)
776{
777 struct vmbus_channel_gpadl_torndown *gpadl_torndown;
778 struct vmbus_channel_msginfo *msginfo;
779 struct vmbus_channel_message_header *requestheader;
780 struct vmbus_channel_gpadl_teardown *gpadl_teardown;
781 unsigned long flags;
782
783 gpadl_torndown = (struct vmbus_channel_gpadl_torndown *)hdr;
784
785 /*
786 * Find the open msg, copy the result and signal/unblock the wait event
787 */
788 spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
789
790 list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
791 msglistentry) {
792 requestheader =
793 (struct vmbus_channel_message_header *)msginfo->msg;
794
795 if (requestheader->msgtype == CHANNELMSG_GPADL_TEARDOWN) {
796 gpadl_teardown =
797 (struct vmbus_channel_gpadl_teardown *)requestheader;
798
799 if (gpadl_torndown->gpadl == gpadl_teardown->gpadl) {
800 memcpy(&msginfo->response.gpadl_torndown,
801 gpadl_torndown,
802 sizeof(
803 struct vmbus_channel_gpadl_torndown));
804 complete(&msginfo->waitevent);
805 break;
806 }
807 }
808 }
809 spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
810}
811
812/*
813 * vmbus_onversion_response - Version response handler
814 *
815 * This is invoked when we received a response to our initiate contact request.
816 * Find the matching request, copy the response and signal the requesting
817 * thread.
818 */
819static void vmbus_onversion_response(
820 struct vmbus_channel_message_header *hdr)
821{
822 struct vmbus_channel_msginfo *msginfo;
823 struct vmbus_channel_message_header *requestheader;
824 struct vmbus_channel_version_response *version_response;
825 unsigned long flags;
826
827 version_response = (struct vmbus_channel_version_response *)hdr;
828 spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
829
830 list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
831 msglistentry) {
832 requestheader =
833 (struct vmbus_channel_message_header *)msginfo->msg;
834
835 if (requestheader->msgtype ==
836 CHANNELMSG_INITIATE_CONTACT) {
837 memcpy(&msginfo->response.version_response,
838 version_response,
839 sizeof(struct vmbus_channel_version_response));
840 complete(&msginfo->waitevent);
841 }
842 }
843 spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
844}
845
846/* Channel message dispatch table */
847struct vmbus_channel_message_table_entry
848 channel_message_table[CHANNELMSG_COUNT] = {
849 {CHANNELMSG_INVALID, 0, NULL},
850 {CHANNELMSG_OFFERCHANNEL, 0, vmbus_onoffer},
851 {CHANNELMSG_RESCIND_CHANNELOFFER, 0, vmbus_onoffer_rescind},
852 {CHANNELMSG_REQUESTOFFERS, 0, NULL},
853 {CHANNELMSG_ALLOFFERS_DELIVERED, 1, vmbus_onoffers_delivered},
854 {CHANNELMSG_OPENCHANNEL, 0, NULL},
855 {CHANNELMSG_OPENCHANNEL_RESULT, 1, vmbus_onopen_result},
856 {CHANNELMSG_CLOSECHANNEL, 0, NULL},
857 {CHANNELMSG_GPADL_HEADER, 0, NULL},
858 {CHANNELMSG_GPADL_BODY, 0, NULL},
859 {CHANNELMSG_GPADL_CREATED, 1, vmbus_ongpadl_created},
860 {CHANNELMSG_GPADL_TEARDOWN, 0, NULL},
861 {CHANNELMSG_GPADL_TORNDOWN, 1, vmbus_ongpadl_torndown},
862 {CHANNELMSG_RELID_RELEASED, 0, NULL},
863 {CHANNELMSG_INITIATE_CONTACT, 0, NULL},
864 {CHANNELMSG_VERSION_RESPONSE, 1, vmbus_onversion_response},
865 {CHANNELMSG_UNLOAD, 0, NULL},
866 {CHANNELMSG_UNLOAD_RESPONSE, 1, vmbus_unload_response},
867};
868
869/*
870 * vmbus_onmessage - Handler for channel protocol messages.
871 *
872 * This is invoked in the vmbus worker thread context.
873 */
874void vmbus_onmessage(void *context)
875{
876 struct hv_message *msg = context;
877 struct vmbus_channel_message_header *hdr;
878 int size;
879
880 hdr = (struct vmbus_channel_message_header *)msg->u.payload;
881 size = msg->header.payload_size;
882
883 if (hdr->msgtype >= CHANNELMSG_COUNT) {
884 pr_err("Received invalid channel message type %d size %d\n",
885 hdr->msgtype, size);
886 print_hex_dump_bytes("", DUMP_PREFIX_NONE,
887 (unsigned char *)msg->u.payload, size);
888 return;
889 }
890
891 if (channel_message_table[hdr->msgtype].message_handler)
892 channel_message_table[hdr->msgtype].message_handler(hdr);
893 else
894 pr_err("Unhandled channel message type %d\n", hdr->msgtype);
895}
896
897/*
898 * vmbus_request_offers - Send a request to get all our pending offers.
899 */
900int vmbus_request_offers(void)
901{
902 struct vmbus_channel_message_header *msg;
903 struct vmbus_channel_msginfo *msginfo;
904 int ret;
905
906 msginfo = kmalloc(sizeof(*msginfo) +
907 sizeof(struct vmbus_channel_message_header),
908 GFP_KERNEL);
909 if (!msginfo)
910 return -ENOMEM;
911
912 msg = (struct vmbus_channel_message_header *)msginfo->msg;
913
914 msg->msgtype = CHANNELMSG_REQUESTOFFERS;
915
916
917 ret = vmbus_post_msg(msg,
918 sizeof(struct vmbus_channel_message_header));
919 if (ret != 0) {
920 pr_err("Unable to request offers - %d\n", ret);
921
922 goto cleanup;
923 }
924
925cleanup:
926 kfree(msginfo);
927
928 return ret;
929}
930
931/*
932 * Retrieve the (sub) channel on which to send an outgoing request.
933 * When a primary channel has multiple sub-channels, we try to
934 * distribute the load equally amongst all available channels.
935 */
936struct vmbus_channel *vmbus_get_outgoing_channel(struct vmbus_channel *primary)
937{
938 struct list_head *cur, *tmp;
939 int cur_cpu;
940 struct vmbus_channel *cur_channel;
941 struct vmbus_channel *outgoing_channel = primary;
942 int next_channel;
943 int i = 1;
944
945 if (list_empty(&primary->sc_list))
946 return outgoing_channel;
947
948 next_channel = primary->next_oc++;
949
950 if (next_channel > (primary->num_sc)) {
951 primary->next_oc = 0;
952 return outgoing_channel;
953 }
954
955 cur_cpu = hv_context.vp_index[get_cpu()];
956 put_cpu();
957 list_for_each_safe(cur, tmp, &primary->sc_list) {
958 cur_channel = list_entry(cur, struct vmbus_channel, sc_list);
959 if (cur_channel->state != CHANNEL_OPENED_STATE)
960 continue;
961
962 if (cur_channel->target_vp == cur_cpu)
963 return cur_channel;
964
965 if (i == next_channel)
966 return cur_channel;
967
968 i++;
969 }
970
971 return outgoing_channel;
972}
973EXPORT_SYMBOL_GPL(vmbus_get_outgoing_channel);
974
975static void invoke_sc_cb(struct vmbus_channel *primary_channel)
976{
977 struct list_head *cur, *tmp;
978 struct vmbus_channel *cur_channel;
979
980 if (primary_channel->sc_creation_callback == NULL)
981 return;
982
983 list_for_each_safe(cur, tmp, &primary_channel->sc_list) {
984 cur_channel = list_entry(cur, struct vmbus_channel, sc_list);
985
986 primary_channel->sc_creation_callback(cur_channel);
987 }
988}
989
990void vmbus_set_sc_create_callback(struct vmbus_channel *primary_channel,
991 void (*sc_cr_cb)(struct vmbus_channel *new_sc))
992{
993 primary_channel->sc_creation_callback = sc_cr_cb;
994}
995EXPORT_SYMBOL_GPL(vmbus_set_sc_create_callback);
996
997bool vmbus_are_subchannels_present(struct vmbus_channel *primary)
998{
999 bool ret;
1000
1001 ret = !list_empty(&primary->sc_list);
1002
1003 if (ret) {
1004 /*
1005 * Invoke the callback on sub-channel creation.
1006 * This will present a uniform interface to the
1007 * clients.
1008 */
1009 invoke_sc_cb(primary);
1010 }
1011
1012 return ret;
1013}
1014EXPORT_SYMBOL_GPL(vmbus_are_subchannels_present);