Add retries to handle intermittent network issues

Change-Id: I1fbdbd27b9b480dfafed87b12cdad23e4f3626f6
Issue-ID: INT-586
Signed-off-by: Gary Wu <gary.i.wu@huawei.com>
diff --git a/deployment/heat/onap-oom/k8s_vm_entrypoint.sh b/deployment/heat/onap-oom/k8s_vm_entrypoint.sh
index 54a6741..52f1204 100644
--- a/deployment/heat/onap-oom/k8s_vm_entrypoint.sh
+++ b/deployment/heat/onap-oom/k8s_vm_entrypoint.sh
@@ -32,29 +32,34 @@
 Acquire::https::Proxy "DIRECT";
 EOF
 fi
-apt-get -y update
 
 mkdir -p /dockerdata-nfs
 echo "__rancher_private_ip_addr__:/dockerdata-nfs /dockerdata-nfs nfs auto,nofail,noatime,nolock,intr,tcp,actimeo=1800 0 0" | tee -a /etc/fstab
 
-apt-get -y install linux-image-extra-$(uname -r) jq nfs-common
-
-cd ~
-
-# install docker 17.03
-curl -s https://releases.rancher.com/install-docker/__docker_version__.sh | sh
-usermod -aG docker ubuntu
-
 # Fix virtual memory allocation for onap-log:elasticsearch:
 echo "vm.max_map_count=262144" >> /etc/sysctl.conf
 sysctl -p
 
-sleep 100
+
+while ! hash jq &> /dev/null; do
+    apt-get -y update
+    apt-get -y install linux-image-extra-$(uname -r) jq nfs-common
+    sleep 10
+done
+
+# install docker 17.03
+while ! hash docker &> /dev/null; do
+    curl -s https://releases.rancher.com/install-docker/__docker_version__.sh | sh
+    usermod -aG docker ubuntu
+    sleep 10
+done
 
 while [ ! -e /dockerdata-nfs/rancher_agent_cmd.sh ]; do
     mount /dockerdata-nfs
-    sleep 5
+    sleep 10
 done
+
+cd ~
 cp /dockerdata-nfs/rancher_agent_cmd.sh .
 sed -i "s/docker run/docker run -e CATTLE_AGENT_IP=${HOST_IP}/g" rancher_agent_cmd.sh
 source rancher_agent_cmd.sh
diff --git a/deployment/heat/onap-oom/rancher_vm_entrypoint.sh b/deployment/heat/onap-oom/rancher_vm_entrypoint.sh
index c1b13a4..9e70da6 100644
--- a/deployment/heat/onap-oom/rancher_vm_entrypoint.sh
+++ b/deployment/heat/onap-oom/rancher_vm_entrypoint.sh
@@ -59,9 +59,12 @@
 Acquire::https::Proxy "DIRECT";
 EOF
 fi
-apt-get -y update
-apt-get -y install linux-image-extra-$(uname -r) jq make nfs-kernel-server moreutils
 
+while ! hash jq &> /dev/null; do
+    apt-get -y update
+    apt-get -y install linux-image-extra-$(uname -r) jq make nfs-kernel-server moreutils
+    sleep 10
+done
 
 # use RAM disk for /dockerdata-nfs for testing
 #echo "tmpfs /dockerdata-nfs tmpfs noatime 1 2" >> /etc/fstab
@@ -92,8 +95,11 @@
 cd ~
 
 # install docker __docker_version__
-curl -s https://releases.rancher.com/install-docker/__docker_version__.sh | sh
-usermod -aG docker ubuntu
+while ! hash docker &> /dev/null; do
+    curl -s https://releases.rancher.com/install-docker/__docker_version__.sh | sh
+    usermod -aG docker ubuntu
+    sleep 10
+done
 
 # install rancher __rancher_version__
 docker run --restart unless-stopped -d -p 8080:8080  -e CATTLE_BOOTSTRAP_REQUIRED_IMAGE=__docker_proxy__/rancher/agent:v__rancher_agent_version__ __docker_proxy__/rancher/server:v__rancher_version__
@@ -112,9 +118,8 @@
 echo export RANCHER_IP=__rancher_private_ip_addr__ > api-keys-rc
 source api-keys-rc
 
-sleep 50
 until curl -s -o projects.json -H "Accept: application/json" http://$RANCHER_IP:8080/v2-beta/projects; do
-    sleep 10
+    sleep 30
 done
 OLD_PID=$(jq -r '.data[0].id' projects.json)
 
@@ -195,10 +200,14 @@
 export KUBECONFIG=/root/.kube/config
 kubectl config view
 
+# Enable auto-completion for kubectl
+echo "source <(kubectl completion bash)" >> ~/.bashrc
+
+
 # wait for kubernetes to initialze
-sleep 100
+sleep 3m
 until [ $(kubectl get pods --namespace kube-system | tail -n +2 | grep -c Running) -ge 6 ]; do
-    sleep 10
+    sleep 1m
 done
 
 
@@ -246,7 +255,7 @@
 helm init --client-only
 helm init --upgrade
 helm serve &
-sleep 3
+sleep 10
 helm repo add local http://127.0.0.1:8879
 helm repo list
 make all
@@ -255,9 +264,9 @@
 helm deploy dev local/onap -f ~/integration-override.yaml --namespace onap | tee ~/helm-deploy.log
 helm list
 
-# Enable auto-completion for kubectl
-echo "source <(kubectl completion bash)" >> ~/.bashrc
 
 # Check ONAP status:
-sleep 3
+sleep 10
 kubectl get pods --all-namespaces
+kubectl get nodes
+kubectl top nodes
diff --git a/deployment/heat/onap-oom/scripts/deploy.sh b/deployment/heat/onap-oom/scripts/deploy.sh
index e97c5a3..459af34 100755
--- a/deployment/heat/onap-oom/scripts/deploy.sh
+++ b/deployment/heat/onap-oom/scripts/deploy.sh
@@ -153,12 +153,13 @@
 
 ssh-keygen -R $RANCHER_IP
 
+sleep 2m
 ssh -o StrictHostKeychecking=no -i $SSH_KEY ubuntu@$RANCHER_IP "sed -u '/Cloud-init.*finished/q' <(tail -n+0 -f /var/log/cloud-init-output.log)"
 
-for n in $(seq 1 6); do
-    echo "Wait count $n of 6"
+for n in $(seq 1 8); do
+    echo "Wait count $n of 8"
     sleep 15m
-    timeout 15m ssh -i $SSH_KEY ubuntu@$RANCHER_IP  'sudo su -l root -c "/root/oom/kubernetes/robot/ete-k8s.sh onap health"'
+    ssh -i $SSH_KEY ubuntu@$RANCHER_IP  'sudo su -l root -c "/root/oom/kubernetes/robot/ete-k8s.sh onap health"'
     RESULT=$?
     if [ $RESULT -eq 0 ]; then
   	break