Add workaround for tunnel connectivity 06/5606/1
authorafenner <andrew.fenner@est.tech>
Mon, 27 Jul 2020 13:53:19 +0000 (14:53 +0100)
committerFatih Degirmenci <fdegir@gmail.com>
Sat, 1 Aug 2020 21:21:35 +0000 (21:21 +0000)
This patch creates a DeamonSet (pod on each node) that pings all the ipip tunnels of all the other nodes.
It should be removed once we find why the tunnels aren't open without pinging

Signed-off-by: afenner <andrew.fenner@est.tech>
Change-Id: Ic4e8241aa2a014daa0d299186d07615ee79030a2
Signed-off-by: afenner <andrew.fenner@est.tech>
(cherry picked from commit b7924a953ce9f8769dfe1a3b1db65c92bf929275)

apps/ceph/kubespray/playbooks/roles/install/tasks/main.yaml
apps/ceph/kubespray/playbooks/roles/install/templates/ping-tunnel-workaround.yaml.j2 [new file with mode: 0644]
apps/ceph/kubespray/playbooks/roles/install/vars/offline-deployment.yaml
apps/ceph/kubespray/playbooks/roles/install/vars/online-deployment.yaml
vars/kubernetes.yaml

index 6d3a676aa0d2c91bc1a7c923ea18ddfff8ead747..4c4ebe9b93dc053f1397ff3776d355b7ef92aba8 100644 (file)
   loop_control:
     loop_var: config_file
 
+- name: Implement Workaround for connectivity problem - ping all tunnels
+  k8s:
+    state: present
+    definition: "{{ lookup('template', config_file) }}"
+  with_items:
+    - ping-tunnel-workaround.yaml.j2
+  loop_control:
+    loop_var: config_file
+
 - name: Wait until OPERATOR pod is available
   k8s_facts:
     kind: Pod
@@ -88,7 +97,7 @@
   until:
     - rook_mgr_status.resources is defined
     - rook_mgr_status.resources
-  retries: 20
+  retries: 40
   delay: 5
 
 - name: Create rook cluster
   register: rook_cluster_status
   until:
     - rook_cluster_status.resources
-  retries: 10
+  retries: 20
   delay: 5
 
 - name: Wait until MGR pods are available
   until:
     - rook_mgr_status.resources is defined
     - rook_mgr_status.resources
-  retries: 30
+  retries: 40
   delay: 10
 
 - name: Wait until OSD pods are available
   until:
     - rook_osd_status.resources is defined
     - rook_osd_status.resources
-  retries: 30
+  retries: 60
   delay: 10
 
 - name: Create rook block storage
diff --git a/apps/ceph/kubespray/playbooks/roles/install/templates/ping-tunnel-workaround.yaml.j2 b/apps/ceph/kubespray/playbooks/roles/install/templates/ping-tunnel-workaround.yaml.j2
new file mode 100644 (file)
index 0000000..089f729
--- /dev/null
@@ -0,0 +1,29 @@
+apiVersion: apps/v1
+kind: DaemonSet
+metadata:
+  name: pingtunnelworkaround
+  namespace: "{{ rook_namespace }}"
+  labels:
+    app: pingtunnelworkaround
+spec:
+  selector:
+    matchLabels:
+      app: pingtunnelworkaround
+  template:
+    metadata:
+      labels:
+        app: pingtunnelworkaround
+    spec:
+      tolerations:
+      # this toleration is to have the daemonset runnable on master nodes
+      # remove it if your masters can't run pods
+      - key: node-role.kubernetes.io/master
+        effect: NoSchedule
+      nodeSelector:
+        kubernetes.io/os: linux
+      hostNetwork: true
+      containers:
+      - name: busybox
+        image: {{ busybox_repository }}/library/busybox:{{ busybox_version }}
+        command: ["/bin/sh"]
+        args:  ["-c", "while true ; do ip route | grep tunl0 | awk -F/ '{print $1}' | xargs -n 1 ping -c 1; sleep 5; done"]
index 9a4c206085b34d3060ff4dfa6d44e2df0fa79cd4..aea92a7ef5ff361924543e8a8c843e58fb550b9c 100644 (file)
@@ -19,6 +19,7 @@
 
 ceph_repository: "{{ server_fqdn }}/ceph/ceph"
 rook_repository: "{{ server_fqdn }}/rook/ceph"
+busybox_repository: "{{ server_fqdn }}/library/busybox"
 cephcsi_repository: "{{ server_fqdn }}/cephcsi/cephcsi"
 csi_node_driver_registrar_repository: "{{ server_fqdn }}/k8scsi/csi-node-driver-registrar"
 csi_provisioner_repository: "{{ server_fqdn }}/k8scsi/csi-provisioner"
index 21a9bb7c3b29819d6a31bfb4c013c5a65b02c4c3..76caf867d91b21301c4ccb67cfb226d13c48f79a 100644 (file)
@@ -19,6 +19,7 @@
 
 ceph_repository: "docker.io/ceph/ceph"
 rook_repository: "rook/ceph"
+busybox_repository: "docker.io/library/busybox"
 cephcsi_repository: "quay.io/cephcsi/cephcsi"
 csi_node_driver_registrar_repository: "quay.io/k8scsi/csi-node-driver-registrar"
 csi_provisioner_repository: "quay.io/k8scsi/csi-provisioner"
index 160b487c9820ef471c5e897db7090970a173f86a..98b50aaeda4d0e7ff8f41bf5e1454f4c7492bb2b 100644 (file)
@@ -88,6 +88,7 @@ cni_binary_checksum: "994fbfcdbb2eedcfa87e48d8edb9bb365f4e2747a7e47658482556c12f
 # Kubernetes: Versions of rook, ceph and their dependencies
 # -------------------------------------------------------------------------------
 rook_version: "v1.1.2"
+busybox_version: "1.32.0"
 ceph_version: "v14.2.4-20190917"
 cephcsi_version: "v1.2.1"
 csi_node_driver_registrar_version: "v1.1.0"