Add retries to handle intermittent network issues
Change-Id: I1fbdbd27b9b480dfafed87b12cdad23e4f3626f6
Issue-ID: INT-586
Signed-off-by: Gary Wu <gary.i.wu@huawei.com>
diff --git a/deployment/heat/onap-oom/k8s_vm_entrypoint.sh b/deployment/heat/onap-oom/k8s_vm_entrypoint.sh
index 54a6741..52f1204 100644
--- a/deployment/heat/onap-oom/k8s_vm_entrypoint.sh
+++ b/deployment/heat/onap-oom/k8s_vm_entrypoint.sh
@@ -32,29 +32,34 @@
Acquire::https::Proxy "DIRECT";
EOF
fi
-apt-get -y update
mkdir -p /dockerdata-nfs
echo "__rancher_private_ip_addr__:/dockerdata-nfs /dockerdata-nfs nfs auto,nofail,noatime,nolock,intr,tcp,actimeo=1800 0 0" | tee -a /etc/fstab
-apt-get -y install linux-image-extra-$(uname -r) jq nfs-common
-
-cd ~
-
-# install docker 17.03
-curl -s https://releases.rancher.com/install-docker/__docker_version__.sh | sh
-usermod -aG docker ubuntu
-
# Fix virtual memory allocation for onap-log:elasticsearch:
echo "vm.max_map_count=262144" >> /etc/sysctl.conf
sysctl -p
-sleep 100
+
+while ! hash jq &> /dev/null; do
+ apt-get -y update
+ apt-get -y install linux-image-extra-$(uname -r) jq nfs-common
+ sleep 10
+done
+
+# install docker 17.03
+while ! hash docker &> /dev/null; do
+ curl -s https://releases.rancher.com/install-docker/__docker_version__.sh | sh
+ usermod -aG docker ubuntu
+ sleep 10
+done
while [ ! -e /dockerdata-nfs/rancher_agent_cmd.sh ]; do
mount /dockerdata-nfs
- sleep 5
+ sleep 10
done
+
+cd ~
cp /dockerdata-nfs/rancher_agent_cmd.sh .
sed -i "s/docker run/docker run -e CATTLE_AGENT_IP=${HOST_IP}/g" rancher_agent_cmd.sh
source rancher_agent_cmd.sh
diff --git a/deployment/heat/onap-oom/rancher_vm_entrypoint.sh b/deployment/heat/onap-oom/rancher_vm_entrypoint.sh
index c1b13a4..9e70da6 100644
--- a/deployment/heat/onap-oom/rancher_vm_entrypoint.sh
+++ b/deployment/heat/onap-oom/rancher_vm_entrypoint.sh
@@ -59,9 +59,12 @@
Acquire::https::Proxy "DIRECT";
EOF
fi
-apt-get -y update
-apt-get -y install linux-image-extra-$(uname -r) jq make nfs-kernel-server moreutils
+while ! hash jq &> /dev/null; do
+ apt-get -y update
+ apt-get -y install linux-image-extra-$(uname -r) jq make nfs-kernel-server moreutils
+ sleep 10
+done
# use RAM disk for /dockerdata-nfs for testing
#echo "tmpfs /dockerdata-nfs tmpfs noatime 1 2" >> /etc/fstab
@@ -92,8 +95,11 @@
cd ~
# install docker __docker_version__
-curl -s https://releases.rancher.com/install-docker/__docker_version__.sh | sh
-usermod -aG docker ubuntu
+while ! hash docker &> /dev/null; do
+ curl -s https://releases.rancher.com/install-docker/__docker_version__.sh | sh
+ usermod -aG docker ubuntu
+ sleep 10
+done
# install rancher __rancher_version__
docker run --restart unless-stopped -d -p 8080:8080 -e CATTLE_BOOTSTRAP_REQUIRED_IMAGE=__docker_proxy__/rancher/agent:v__rancher_agent_version__ __docker_proxy__/rancher/server:v__rancher_version__
@@ -112,9 +118,8 @@
echo export RANCHER_IP=__rancher_private_ip_addr__ > api-keys-rc
source api-keys-rc
-sleep 50
until curl -s -o projects.json -H "Accept: application/json" http://$RANCHER_IP:8080/v2-beta/projects; do
- sleep 10
+ sleep 30
done
OLD_PID=$(jq -r '.data[0].id' projects.json)
@@ -195,10 +200,14 @@
export KUBECONFIG=/root/.kube/config
kubectl config view
+# Enable auto-completion for kubectl
+echo "source <(kubectl completion bash)" >> ~/.bashrc
+
+
# wait for kubernetes to initialze
-sleep 100
+sleep 3m
until [ $(kubectl get pods --namespace kube-system | tail -n +2 | grep -c Running) -ge 6 ]; do
- sleep 10
+ sleep 1m
done
@@ -246,7 +255,7 @@
helm init --client-only
helm init --upgrade
helm serve &
-sleep 3
+sleep 10
helm repo add local http://127.0.0.1:8879
helm repo list
make all
@@ -255,9 +264,9 @@
helm deploy dev local/onap -f ~/integration-override.yaml --namespace onap | tee ~/helm-deploy.log
helm list
-# Enable auto-completion for kubectl
-echo "source <(kubectl completion bash)" >> ~/.bashrc
# Check ONAP status:
-sleep 3
+sleep 10
kubectl get pods --all-namespaces
+kubectl get nodes
+kubectl top nodes
diff --git a/deployment/heat/onap-oom/scripts/deploy.sh b/deployment/heat/onap-oom/scripts/deploy.sh
index e97c5a3..459af34 100755
--- a/deployment/heat/onap-oom/scripts/deploy.sh
+++ b/deployment/heat/onap-oom/scripts/deploy.sh
@@ -153,12 +153,13 @@
ssh-keygen -R $RANCHER_IP
+sleep 2m
ssh -o StrictHostKeychecking=no -i $SSH_KEY ubuntu@$RANCHER_IP "sed -u '/Cloud-init.*finished/q' <(tail -n+0 -f /var/log/cloud-init-output.log)"
-for n in $(seq 1 6); do
- echo "Wait count $n of 6"
+for n in $(seq 1 8); do
+ echo "Wait count $n of 8"
sleep 15m
- timeout 15m ssh -i $SSH_KEY ubuntu@$RANCHER_IP 'sudo su -l root -c "/root/oom/kubernetes/robot/ete-k8s.sh onap health"'
+ ssh -i $SSH_KEY ubuntu@$RANCHER_IP 'sudo su -l root -c "/root/oom/kubernetes/robot/ete-k8s.sh onap health"'
RESULT=$?
if [ $RESULT -eq 0 ]; then
break