From: afenner Date: Mon, 27 Jul 2020 13:53:19 +0000 (+0100) Subject: Add workaround for tunnel connectivity X-Git-Url: https://gerrit.nordix.org/gitweb?p=infra%2Fstack%2Fkubernetes.git;a=commitdiff_plain;h=b7924a953ce9f8769dfe1a3b1db65c92bf929275 Add workaround for tunnel connectivity This patch creates a DeamonSet (pod on each node) that pings all the ipip tunnels of all the other nodes. It should be removed once we find why the tunnels aren't open without pinging Signed-off-by: afenner Change-Id: Ic4e8241aa2a014daa0d299186d07615ee79030a2 Signed-off-by: afenner --- diff --git a/apps/ceph/kubespray/playbooks/roles/install/tasks/main.yaml b/apps/ceph/kubespray/playbooks/roles/install/tasks/main.yaml index b977018..2ff4191 100644 --- a/apps/ceph/kubespray/playbooks/roles/install/tasks/main.yaml +++ b/apps/ceph/kubespray/playbooks/roles/install/tasks/main.yaml @@ -76,6 +76,15 @@ loop_control: loop_var: config_file +- name: Implement Workaround for connectivity problem - ping all tunnels + k8s: + state: present + definition: "{{ lookup('template', config_file) }}" + with_items: + - ping-tunnel-workaround.yaml.j2 + loop_control: + loop_var: config_file + - name: Wait until OPERATOR pod is available k8s_facts: kind: Pod @@ -88,7 +97,7 @@ until: - rook_mgr_status.resources is defined - rook_mgr_status.resources - retries: 20 + retries: 40 delay: 5 - name: Create rook cluster @@ -111,7 +120,7 @@ register: rook_cluster_status until: - rook_cluster_status.resources - retries: 10 + retries: 20 delay: 5 - name: Wait until MGR pods are available @@ -126,7 +135,7 @@ until: - rook_mgr_status.resources is defined - rook_mgr_status.resources - retries: 30 + retries: 40 delay: 10 - name: Wait until OSD pods are available @@ -141,7 +150,7 @@ until: - rook_osd_status.resources is defined - rook_osd_status.resources - retries: 30 + retries: 60 delay: 10 - name: Create rook block storage diff --git a/apps/ceph/kubespray/playbooks/roles/install/templates/ping-tunnel-workaround.yaml.j2 b/apps/ceph/kubespray/playbooks/roles/install/templates/ping-tunnel-workaround.yaml.j2 new file mode 100644 index 0000000..089f729 --- /dev/null +++ b/apps/ceph/kubespray/playbooks/roles/install/templates/ping-tunnel-workaround.yaml.j2 @@ -0,0 +1,29 @@ +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: pingtunnelworkaround + namespace: "{{ rook_namespace }}" + labels: + app: pingtunnelworkaround +spec: + selector: + matchLabels: + app: pingtunnelworkaround + template: + metadata: + labels: + app: pingtunnelworkaround + spec: + tolerations: + # this toleration is to have the daemonset runnable on master nodes + # remove it if your masters can't run pods + - key: node-role.kubernetes.io/master + effect: NoSchedule + nodeSelector: + kubernetes.io/os: linux + hostNetwork: true + containers: + - name: busybox + image: {{ busybox_repository }}/library/busybox:{{ busybox_version }} + command: ["/bin/sh"] + args: ["-c", "while true ; do ip route | grep tunl0 | awk -F/ '{print $1}' | xargs -n 1 ping -c 1; sleep 5; done"] diff --git a/apps/ceph/kubespray/playbooks/roles/install/vars/offline-deployment.yaml b/apps/ceph/kubespray/playbooks/roles/install/vars/offline-deployment.yaml index 9a4c206..aea92a7 100644 --- a/apps/ceph/kubespray/playbooks/roles/install/vars/offline-deployment.yaml +++ b/apps/ceph/kubespray/playbooks/roles/install/vars/offline-deployment.yaml @@ -19,6 +19,7 @@ ceph_repository: "{{ server_fqdn }}/ceph/ceph" rook_repository: "{{ server_fqdn }}/rook/ceph" +busybox_repository: "{{ server_fqdn }}/library/busybox" cephcsi_repository: "{{ server_fqdn }}/cephcsi/cephcsi" csi_node_driver_registrar_repository: "{{ server_fqdn }}/k8scsi/csi-node-driver-registrar" csi_provisioner_repository: "{{ server_fqdn }}/k8scsi/csi-provisioner" diff --git a/apps/ceph/kubespray/playbooks/roles/install/vars/online-deployment.yaml b/apps/ceph/kubespray/playbooks/roles/install/vars/online-deployment.yaml index 21a9bb7..76caf86 100644 --- a/apps/ceph/kubespray/playbooks/roles/install/vars/online-deployment.yaml +++ b/apps/ceph/kubespray/playbooks/roles/install/vars/online-deployment.yaml @@ -19,6 +19,7 @@ ceph_repository: "docker.io/ceph/ceph" rook_repository: "rook/ceph" +busybox_repository: "docker.io/library/busybox" cephcsi_repository: "quay.io/cephcsi/cephcsi" csi_node_driver_registrar_repository: "quay.io/k8scsi/csi-node-driver-registrar" csi_provisioner_repository: "quay.io/k8scsi/csi-provisioner" diff --git a/vars/kubernetes.yaml b/vars/kubernetes.yaml index fa13a64..a5d1a2f 100644 --- a/vars/kubernetes.yaml +++ b/vars/kubernetes.yaml @@ -69,6 +69,7 @@ kubectl_version: "{{ kubernetes_version }}" # Kubernetes: Versions of rook, ceph and their dependencies # ------------------------------------------------------------------------------- rook_version: "v1.1.2" +busybox_version: "1.32.0" ceph_version: "v14.2.4-20190917" cephcsi_version: "v1.2.1" csi_node_driver_registrar_version: "v1.1.0"