Updates: * Switch to python3.8 * Switch to SI95 from NNG (rmr v3 vs rmr v1) * The switch to SI95 led to a rabbit hole in which we eventually discovered that rmr_send may sometimes block for an arbitrary period of time. Because of this issue, a1's sends are now threaded. Please see the longer comment about this in a1rmr. * Bump version of py xapp frame (SDL used only) in A1 * Bump version of go xapp frame (0.0.24 -> 0.4.2) in integration tests * Add some additional logging in A1 Issue-ID: RIC-156 Change-Id: I8ed5356bea60a3ccea6fa2d03981ad634b98bab1 Signed-off-by: Tommy Carpenter <tc677g@att.com>

commit: 102b89592db01d8361a754c11a85699e5d2e965c [log] [tgz]
author: Tommy Carpenter <tc677g@att.com> Fri Mar 20 10:02:46 2020 -0400
committer: Tommy Carpenter <tc677g@att.com> Fri Mar 20 12:27:06 2020 -0400
tree: 34a70a8625fd6a98fae5e4f06292203f63ebb1d1
parent: 4014a85d3cc546bf39ffa64c5cfe6e21944d2afb [diff]
diff --git a/Dockerfile b/Dockerfile
index 8872e4e..d18d7d1 100644
--- a/Dockerfile
+++ b/Dockerfile

@@ -17,10 +17,9 @@
 
 # This container uses a 2 stage build!
 # Tips and tricks were learned from: https://pythonspeed.com/articles/multi-stage-docker-python/
-FROM python:3.7-alpine AS compile-image
+FROM python:3.8-alpine AS compile-image
 # Gevent needs gcc
 RUN apk update && apk add gcc musl-dev
-# do the install of a1
 
 # Switch to a non-root user for security reasons
 # This is only really needed in stage 2 however this makes the copying easier and straitforward! --user doesn't do the same thing if run as root!
@@ -35,14 +34,13 @@
 
 ###########
 # 2nd stage
-FROM python:3.7-alpine
+FROM python:3.8-alpine
 # dir that rmr routing file temp goes into
 RUN mkdir -p /opt/route/
 # python copy; this basically makes the 2 stage python build work
 COPY --from=compile-image /home/a1user/.local /home/a1user/.local
-# copy rmr .sos from the builder image
-COPY --from=nexus3.o-ran-sc.org:10004/bldr-alpine3-go:1-rmr1.13.1 /usr/local/lib64/libnng.so /usr/local/lib64/libnng.so
-COPY --from=nexus3.o-ran-sc.org:10004/bldr-alpine3-go:1-rmr1.13.1 /usr/local/lib64/librmr_nng.so /usr/local/lib64/librmr_nng.so
+# copy rmr .so from the builder image
+COPY --from=nexus3.o-ran-sc.org:10004/bldr-alpine3-go:3-rmr-si95-nng-3.6.1 /usr/local/lib64/librmr_si.so /usr/local/lib64/librmr_si.so
 # Switch to a non-root user for security reasons. a1 does not currently write into any dirs so no chowns are needed at this time.
 RUN addgroup -S a1user && adduser -S -G a1user a1user
 USER a1user

diff --git a/Dockerfile-Unit-Test b/Dockerfile-Unit-Test
index b71302a..a0a9a02 100644
--- a/Dockerfile-Unit-Test
+++ b/Dockerfile-Unit-Test

@@ -14,12 +14,10 @@
 #   See the License for the specific language governing permissions and
 #   limitations under the License.
 # ==================================================================================
-FROM python:3.7-alpine
+FROM python:3.8-alpine
 
-# copy rmr .sos from the builder image
-COPY --from=nexus3.o-ran-sc.org:10004/bldr-alpine3-go:1-rmr1.13.1 /usr/local/lib64/libnng.so /usr/local/lib64/libnng.so
-COPY --from=nexus3.o-ran-sc.org:10004/bldr-alpine3-go:1-rmr1.13.1 /usr/local/lib64/librmr_nng.so /usr/local/lib64/librmr_nng.so
-
+# copy rmr .so from the builder image
+COPY --from=nexus3.o-ran-sc.org:10004/bldr-alpine3-go:3-rmr-si95-nng-3.6.1 /usr/local/lib64/librmr_si.so /usr/local/lib64/librmr_si.so
 
 # dir that rmr routing file temp goes into
 RUN mkdir -p /opt/route/
@@ -37,4 +35,4 @@
 WORKDIR /tmp
 
 # Run the unit tests
-RUN tox -e py37,flake8
+RUN tox -e py38,flake8

diff --git a/a1/a1rmr.py b/a1/a1rmr.py
index 58ec1c0..771842a 100644
--- a/a1/a1rmr.py
+++ b/a1/a1rmr.py

@@ -97,10 +97,13 @@
             sbuf = rmr.rmr_alloc_msg(self.mrc, len(pay), payload=pay, gen_transaction_id=True, mtype=mtype, sub_id=subid)
             sbuf.contents.sub_id = subid
             pre_send_summary = rmr.message_summary(sbuf)
+            mdc_logger.debug("Trying to send message: {}".format(pre_send_summary))
             sbuf = rmr.rmr_send_msg(self.mrc, sbuf)  # send
             if self._assert_good_send(sbuf, pre_send_summary):
                 rmr.rmr_free_msg(sbuf)  # free
-                break
+                return
+
+        mdc_logger.debug("A1 did NOT send the message successfully after {} retries!".format(RETRY_TIMES))
 
     def _rts_msg(self, pay, sbuf_rts, mtype):
         """
@@ -114,6 +117,13 @@
                 break
         return sbuf_rts  # in some cases rts may return a new sbuf
 
+    def _handle_sends(self):
+        # send out all messages waiting for us
+        while not self.instance_send_queue.empty():
+            work_item = self.instance_send_queue.get(block=False, timeout=None)
+            payload = json.dumps(messages.a1_to_handler(*work_item)).encode("utf-8")
+            self._send_msg(payload, A1_POLICY_REQUEST, work_item[1])
+
     def loop(self):
         """
         This loop runs forever, and has 3 jobs:
@@ -125,11 +135,13 @@
         mdc_logger.debug("Work loop starting")
         while self.keep_going:
 
-            # send out all messages waiting for us
-            while not self.instance_send_queue.empty():
-                work_item = self.instance_send_queue.get(block=False, timeout=None)
-                payload = json.dumps(messages.a1_to_handler(*work_item)).encode("utf-8")
-                self._send_msg(payload, A1_POLICY_REQUEST, work_item[1])
+            # Update 3/20/2020
+            # We now handle our sends in a thread (that will just exit when it's done) because there is a difference between how send works in SI95 vs NNG.
+            # Send_msg via NNG formerly never blocked.
+            # However under SI95 this send may block for some arbitrary period of time on the first send to an endpoint for which a connection is not established
+            # If this send takes too long, this loop blocks, and the healthcheck will fail, which will cause A1s healthcheck to fail, which will cause Kubernetes to whack A1 and all kinds of horrible things happen.
+            # Therefore, now under SI95, we thread this.
+            Thread(target=self._handle_sends).start()
 
             # read our mailbox
             for (msg, sbuf) in self.rcv_func():
@@ -156,12 +168,13 @@
                     try:
                         # got a query, do a lookup and send out all instances
                         pti = json.loads(msg["payload"])["policy_type_id"]
-                        mdc_logger.debug("Received query for: {0}".format(pti))
-                        for pii in data.get_instance_list(pti):
+                        instance_list = data.get_instance_list(pti)  # will raise if a bad type
+                        mdc_logger.debug("Received a query for a good type: {0}".format(msg))
+                        for pii in instance_list:
                             instance = data.get_policy_instance(pti, pii)
                             payload = json.dumps(messages.a1_to_handler("CREATE", pti, pii, instance)).encode("utf-8")
                             sbuf = self._rts_msg(payload, sbuf, A1_POLICY_REQUEST)
-                    except (PolicyTypeNotFound, PolicyInstanceNotFound):
+                    except (PolicyTypeNotFound):
                         mdc_logger.debug("Received a query for a non-existent type: {0}".format(msg))
                     except (KeyError, TypeError, json.decoder.JSONDecodeError):
                         mdc_logger.debug("Dropping malformed policy query message: {0}".format(msg))
@@ -175,6 +188,8 @@
             self.last_ran = time.time()
             time.sleep(1)
 
+        mdc_logger.debug("RMR Thread Ending!")
+
 
 # Public
 

diff --git a/a1/controller.py b/a1/controller.py
index 4210266..2de69b8 100644
--- a/a1/controller.py
+++ b/a1/controller.py

@@ -63,8 +63,10 @@
     3. checks that our SDL connection is healthy
     """
     if not a1rmr.healthcheck_rmr_thread():
+        mdc_logger.debug("A1 is not healthy due to the rmr thread")
         return "rmr thread is unhealthy", 500
     if not data.SDL.healthcheck():
+        mdc_logger.debug("A1 is not healthy because it does not have a connection to SDL")
         return "sdl connection is unhealthy", 500
     return "", 200
 
@@ -86,6 +88,7 @@
 
     def put_type_handler():
         data.store_policy_type(policy_type_id, body)
+        mdc_logger.debug("Policy type {} created.".format(policy_type_id))
         return "", 201
 
     body = connexion.request.json
@@ -106,6 +109,7 @@
 
     def delete_policy_type_handler():
         data.delete_policy_type(policy_type_id)
+        mdc_logger.debug("Policy type {} deleted.".format(policy_type_id))
         return "", 204
 
     return _try_func_return(delete_policy_type_handler)

diff --git a/container-tag.yaml b/container-tag.yaml
index 2507613..c05d3bf 100644
--- a/container-tag.yaml
+++ b/container-tag.yaml

@@ -1,4 +1,4 @@
 # The Jenkins job uses this string for the tag in the image name
 # for example nexus3.o-ran-sc.org:10004/my-image-name:my-tag
 ---
-tag: 2.1.4
+tag: 2.1.5

diff --git a/docs/developer-guide.rst b/docs/developer-guide.rst
index b458b1f..6ef21f9 100644
--- a/docs/developer-guide.rst
+++ b/docs/developer-guide.rst

@@ -36,22 +36,26 @@
 
 Version bumping rmr
 -------------------
-As of 2020/02/13, A1, Dockerfile-Unit-Test,  and all three integration test receivers use a base image from o-ran-sc.
+As of 2020/02/13, A1 (Dockerfile), Dockerfile-Unit-Test,  and all three integration test receivers use a base image from o-ran-sc.
 The rmr version is in that base image.
-However, the one item in this repo that must be kept in sync is ``rmr-version.yaml``. This controls what rmr gets installed for unit testing.
+When version changes are made in that image, rebuilding those 5 containers in the A1 repo will pick it up (or just A1 itself for prod usage).
+
+However, there are two items in this repo that must be kept in sync:  ``rmr-version.yaml``, which  controls what rmr gets installed for unit testing in Jenkins, and ``integration_tests/install_rmr.sh`` which is a useful script for a variety of local testing.
 
 Version bumping pyrmr
 ---------------------
 rmr-python is the python binding to rmr . Installing rmr per the above does not install it.
-Bumping the rmr python version dependency requires changes in:
+Bumping the rmr python version is done via ``setup.py``
 
-1) ``setup.py``
+Version bumping python itself
+-----------------------------
+If you want to update the version of python itself (ie just done from 37 to 38):
 
-2) ``integration_tests/Dockerfile-test-delay-receiver``
+1) ``Dockerfile``
 
-3) ``integration_tests/Dockerfile-query-receiver``
+2) ``Dockerfile-Unit-Test``
 
-Run the integration tests after attempting this.
+3) ``tox.ini``
 
 Unit Testing
 ------------

diff --git a/docs/release-notes.rst b/docs/release-notes.rst
index 0ca68d5..439937c 100644
--- a/docs/release-notes.rst
+++ b/docs/release-notes.rst

@@ -14,6 +14,19 @@
    :depth: 3
    :local:
 
+
+[2.1.5] - 3/19/2020
+-------------------
+::
+
+    * Switch to python3.8
+    * Switch to SI95 from NNG (rmr v3 vs rmr v1)
+    * The switch to SI95 led to a rabbit hole in which we eventually discovered that rmr_send may sometimes block for an arbitrary period of time. Because of this issue, a1's sends are now threaded. Please see the longer comment about this in a1rmr.
+    * Bump version of py xapp frame (SDL used only) in A1
+    * Bump version of go xapp frame (0.0.24 -> 0.4.2) in integration tests
+    * Add some additional logging in A1
+
+
 [2.1.4] - 3/6/2020
 -------------------
 ::

diff --git a/integration_tests/a1mediator/Chart.yaml b/integration_tests/a1mediator/Chart.yaml
index bc20e74..3432ee7 100644
--- a/integration_tests/a1mediator/Chart.yaml
+++ b/integration_tests/a1mediator/Chart.yaml

@@ -1,4 +1,4 @@
 apiVersion: v1
 description: A1 Helm chart for Kubernetes
 name: a1mediator
-version: 2.1.4
+version: 2.1.5

diff --git a/integration_tests/install_rmr.sh b/integration_tests/install_rmr.sh
index 70ee489..5ddc168 100755
--- a/integration_tests/install_rmr.sh
+++ b/integration_tests/install_rmr.sh

@@ -1,5 +1,5 @@
 #!/bin/sh
-git clone --branch 1.13.1 https://gerrit.oran-osc.org/r/ric-plt/lib/rmr \
+git clone --branch 3.6.1 https://gerrit.oran-osc.org/r/ric-plt/lib/rmr \
     && cd rmr \
     && mkdir .build; cd .build \
     && echo "<<<installing rmr devel headers>>>" \

diff --git a/integration_tests/test_a1.tavern.yaml b/integration_tests/test_a1.tavern.yaml
index a025643..bdd8d45 100644
--- a/integration_tests/test_a1.tavern.yaml
+++ b/integration_tests/test_a1.tavern.yaml

@@ -540,10 +540,11 @@
       body: [qt1, qt2]
 
   # after the query, a1 should send, query receiver should send back, and the policy should be in effect
-
+  # sometimes in kubernetes, this test takes a long time to work because of an k8s issue
+  # empirically we find that the si95 rmr finally "detects" failure after about 75 seconds, retries, and then works.
   - name: test the query status get
-    max_retries: 3
-    delay_before: 6  # give it a few seconds for rmr ; delay reciever sleeps for 5 seconds by default
+    max_retries: 100
+    delay_before: 1
     request:
       url: http://localhost:10000/a1-p/policytypes/1006001/policies/qt1/status
       method: GET
@@ -554,8 +555,8 @@
         has_been_deleted: False
 
   - name: test the query status get 2
-    max_retries: 3
-    delay_before: 6  # give it a few seconds for rmr ; delay reciever sleeps for 5 seconds by default
+    max_retries: 100
+    delay_before: 1
     request:
       url: http://localhost:10000/a1-p/policytypes/1006001/policies/qt2/status
       method: GET

diff --git a/integration_tests/testxappcode/Dockerfile-delay-receiver b/integration_tests/testxappcode/Dockerfile-delay-receiver
index 15c6f28..2398424 100644
--- a/integration_tests/testxappcode/Dockerfile-delay-receiver
+++ b/integration_tests/testxappcode/Dockerfile-delay-receiver

@@ -16,8 +16,7 @@
 # ==================================================================================
 
 # This Dockerfile uses a two stage Docker build
-
-FROM nexus3.o-ran-sc.org:10004/bldr-alpine3-go:1-rmr1.13.1
+FROM nexus3.o-ran-sc.org:10004/bldr-alpine3-go:3-rmr-si95-nng-3.6.1
 
 # go will complain if there is a go.mod at the root of the GOPATH so we can't.
 RUN mkdir myxapp
@@ -33,8 +32,7 @@
 
 # 2nd stage
 FROM alpine:3.11
-COPY --from=0 /usr/local/lib64/libnng.so* /usr/local/lib64/
-COPY --from=0 /usr/local/lib64/librmr_nng* /usr/local/lib64/
+COPY --from=0 /usr/local/lib64/librmr_si.so* /usr/local/lib64/
 COPY --from=0 /go/myxapp/receiver .
 COPY delay-config-file.yaml .
 

diff --git a/integration_tests/testxappcode/Dockerfile-query-receiver b/integration_tests/testxappcode/Dockerfile-query-receiver
index 20fb082..9b83c9b 100644
--- a/integration_tests/testxappcode/Dockerfile-query-receiver
+++ b/integration_tests/testxappcode/Dockerfile-query-receiver

@@ -16,8 +16,7 @@
 # ==================================================================================
 
 # This Dockerfile uses a two stage Docker build
-
-FROM nexus3.o-ran-sc.org:10004/bldr-alpine3-go:1-rmr1.13.1
+FROM nexus3.o-ran-sc.org:10004/bldr-alpine3-go:3-rmr-si95-nng-3.6.1
 
 # go will complain if there is a go.mod at the root of the GOPATH so we can't.
 RUN mkdir myxapp
@@ -33,8 +32,7 @@
 
 # 2nd stage
 FROM alpine:3.11
-COPY --from=0 /usr/local/lib64/libnng.so* /usr/local/lib64/
-COPY --from=0 /usr/local/lib64/librmr_nng* /usr/local/lib64/
+COPY --from=0 /usr/local/lib64/librmr_si.so* /usr/local/lib64/
 COPY --from=0 /go/myxapp/receiver .
 COPY query-config-file.yaml .
 

diff --git a/integration_tests/testxappcode/Dockerfile-test-receiver b/integration_tests/testxappcode/Dockerfile-test-receiver
index 174af0e..bf9323a 100644
--- a/integration_tests/testxappcode/Dockerfile-test-receiver
+++ b/integration_tests/testxappcode/Dockerfile-test-receiver

@@ -18,7 +18,7 @@
 # This Dockerfile uses a two stage Docker build
 
 # The first stage is defined here: https://gerrit.o-ran-sc.org/r/gitweb?p=ci-management.git;a=blob;f=docker/bldr-alpine3-go/Dockerfile;h=a1e31f07e6113d4a02202793ace6ebc780d71583;hb=3711ffcbfe06f6c872bf4a0871eb5f2a2fcd83ae
-FROM nexus3.o-ran-sc.org:10004/bldr-alpine3-go:1-rmr1.13.1
+FROM nexus3.o-ran-sc.org:10004/bldr-alpine3-go:3-rmr-si95-nng-3.6.1
 
 # go will complain if there is a go.mod at the root of the GOPATH so we can't.
 RUN mkdir myxapp
@@ -34,8 +34,7 @@
 
 # 2nd stage
 FROM alpine:3.11
-COPY --from=0 /usr/local/lib64/libnng.so* /usr/local/lib64/
-COPY --from=0 /usr/local/lib64/librmr_nng* /usr/local/lib64/
+COPY --from=0 /usr/local/lib64/librmr_si.so* /usr/local/lib64/
 COPY --from=0 /go/myxapp/receiver .
 COPY test-config-file.yaml .
 

diff --git a/integration_tests/testxappcode/go.mod b/integration_tests/testxappcode/go.mod
index 9a5691f..615e4ad 100644
--- a/integration_tests/testxappcode/go.mod
+++ b/integration_tests/testxappcode/go.mod

@@ -3,17 +3,12 @@
 module gerrit.o-ran-sc.org/r/ric-plt/xapp-frame/example-xapp
 
 require (
-	gerrit.o-ran-sc.org/r/ric-plt/xapp-frame v0.0.24
-	github.com/go-openapi/errors v0.19.3 // indirect
+	gerrit.o-ran-sc.org/r/ric-plt/xapp-frame v0.4.2
 	github.com/go-openapi/runtime v0.19.11 // indirect
 	github.com/go-openapi/spec v0.19.6 // indirect
-	github.com/go-openapi/strfmt v0.19.4 // indirect
-	github.com/go-openapi/swag v0.19.7 // indirect
-	github.com/go-openapi/validate v0.19.6 // indirect
-	github.com/jessevdk/go-flags v1.4.0 // indirect
 )
 
-replace gerrit.o-ran-sc.org/r/ric-plt/xapp-frame => gerrit.o-ran-sc.org/r/ric-plt/xapp-frame.git v0.0.24
+replace gerrit.o-ran-sc.org/r/ric-plt/xapp-frame => gerrit.o-ran-sc.org/r/ric-plt/xapp-frame.git v0.4.2
 
 replace gerrit.o-ran-sc.org/r/ric-plt/sdlgo => gerrit.o-ran-sc.org/r/ric-plt/sdlgo.git v0.5.0
 

diff --git a/rmr-version.yaml b/rmr-version.yaml
index 5808bc4..07573f1 100644
--- a/rmr-version.yaml
+++ b/rmr-version.yaml

@@ -1,3 +1,3 @@
 # CI script installs RMR from PackageCloud using this version
 ---
-version: 1.13.1
+version: 3.6.1

diff --git a/setup.py b/setup.py
index 87ba3ab..1179239 100644
--- a/setup.py
+++ b/setup.py

@@ -18,7 +18,7 @@
 
 setup(
     name="a1",
-    version="2.1.4",
+    version="2.1.5",
     packages=find_packages(exclude=["tests.*", "tests"]),
     author="Tommy Carpenter",
     description="RIC A1 Mediator for policy/intent changes",
@@ -30,10 +30,9 @@
         "Flask",
         "connexion[swagger-ui]",
         "gevent",
-        "rmr>=2.2.0",
+        "rmr>=4.0.0,<5.0.0",
         "mdclogpy",
-        "ricxappframe>=0.2.0",
-        "ricsdl>=2.0.3,<3.0.0",
+        "ricxappframe>=0.4.0",
     ],
     package_data={"a1": ["openapi.yaml"]},
 )

diff --git a/tox.ini b/tox.ini
index 2e6ad9e..2faf2c6 100644
--- a/tox.ini
+++ b/tox.ini

@@ -1,6 +1,6 @@
 # ==================================================================================
-#       Copyright (c) 2019 Nokia
-#       Copyright (c) 2018-2019 AT&T Intellectual Property.
+#       Copyright (c) 2019-2020 Nokia
+#       Copyright (c) 2018-2020 AT&T Intellectual Property.
 #
 #   Licensed under the Apache License, Version 2.0 (the "License");
 #   you may not use this file except in compliance with the License.
@@ -14,11 +14,15 @@
 #   See the License for the specific language governing permissions and
 #   limitations under the License.
 # ==================================================================================
+
+# Code
+
 [tox]
-envlist = py37,flake8,docs,docs-linkcheck
+envlist = py38,flake8,docs,docs-linkcheck
 minversion = 2.0
 
 [testenv]
+basepython = python3.8
 deps=
     pytest
     coverage
@@ -36,7 +40,7 @@
     coverage xml -i
 
 [testenv:flake8]
-basepython = python3.7
+basepython = python3.8
 skip_install = true
 deps = flake8
 commands = flake8 setup.py a1 tests
@@ -44,6 +48,8 @@
 [flake8]
 extend-ignore = E501,E741,E731
 
+# Docs
+
 # verbatim as asked for by the docs instructions: https://wiki.o-ran-sc.org/display/DOC/Configure+Repo+for+Documentation
 [testenv:docs]
 basepython = python3.7
commit	102b89592db01d8361a754c11a85699e5d2e965c	[log] [tgz]
author	Tommy Carpenter <tc677g@att.com>	Fri Mar 20 10:02:46 2020 -0400
committer	Tommy Carpenter <tc677g@att.com>	Fri Mar 20 12:27:06 2020 -0400
tree	34a70a8625fd6a98fae5e4f06292203f63ebb1d1
parent	4014a85d3cc546bf39ffa64c5cfe6e21944d2afb [diff]