2.4.4 policy-handler - log process memory - in search of the memory leak that is falsely reported by docker stats, the following runtime logging was added = process_memory - rss and other memory of the current process = virtual_memory - the memory info of the whole system = thread_stacks - the active threads with the full stack on each Change-Id: I5f5ab3a477bfba3aecc5963547aa82da6269670b Signed-off-by: Alex Shatov <alexs@att.com> Issue-ID: DCAEGEN2-514

commit: 5105258de50958e3b060f961dd0ddc88d71b7560 [log] [tgz]
author: Alex Shatov <alexs@att.com> Fri May 18 15:13:40 2018 -0400
committer: Alex Shatov <alexs@att.com> Fri May 18 15:13:40 2018 -0400
tree: 573a683e3c80dcaa59dfa21541d44f566886df9d
parent: f2d7bef13705812c1bf147c2fb65162fbf385c6b [diff]
diff --git a/etc/config.json b/etc/config.json
index 774142f..e54569b 100644
--- a/etc/config.json
+++ b/etc/config.json

@@ -1,5 +1,4 @@
 {
-    "version" : "2.4.3",
     "wservice_port" : 25577,
     "policy_handler" : {
         "system" : "policy_handler"

diff --git a/policyhandler/onap/audit.py b/policyhandler/onap/audit.py
index 48988fe..0aa1c50 100644
--- a/policyhandler/onap/audit.py
+++ b/policyhandler/onap/audit.py

@@ -26,7 +26,6 @@
 """
 
 import copy
-import gc
 import json
 import os
 import re
@@ -38,10 +37,9 @@
 from datetime import datetime
 from enum import Enum
 
-import psutil
-
 from .CommonLogger import CommonLogger
 from .health import Health
+from .process_info import ProcessInfo
 
 REQUEST_X_ECOMP_REQUESTID = "X-ECOMP-RequestID"
 REQUEST_REMOTE_ADDR = "Remote-Addr"
@@ -117,58 +115,6 @@
         return response_code.name.lower().replace("_", " ")
 
 
-class ProcessInfo(object):
-    """static class to calculate process info"""
-    _KILO_SYMBOLS = ('KB', 'MB', 'GB', 'TB', 'PB', 'EB', 'ZB', 'YB')
-    _KILO_POWERS = {}
-
-    @staticmethod
-    def init():
-        """init static constants"""
-        for i, kilo_symbol in enumerate(ProcessInfo._KILO_SYMBOLS):
-            ProcessInfo._KILO_POWERS[kilo_symbol] = 1 << (i + 1) * 10
-        ProcessInfo._KILO_SYMBOLS = list(reversed(ProcessInfo._KILO_SYMBOLS))
-
-    @staticmethod
-    def bytes_to_human(byte_count):
-        """converts byte count to human value in kilo-powers"""
-        for kilo_symbol in ProcessInfo._KILO_SYMBOLS:
-            kilo_power = ProcessInfo._KILO_POWERS[kilo_symbol]
-            if byte_count >= kilo_power:
-                value = float(byte_count) / kilo_power
-                return '%.1f%s' % (value, kilo_symbol)
-        return "%sB" % byte_count
-
-    @staticmethod
-    def mem_info():
-        """calculates the memory usage of the current process"""
-        process = psutil.Process()
-        with process.oneshot():
-            mem = process.memory_full_info()
-            return {
-                "uss" : ProcessInfo.bytes_to_human(mem.uss),
-                "rss" : ProcessInfo.bytes_to_human(mem.rss),
-                "swap" : ProcessInfo.bytes_to_human(getattr(mem, "swap", 0)),
-                "pss" : ProcessInfo.bytes_to_human(getattr(mem, "pss", 0))
-            }
-
-
-    @staticmethod
-    def gc_info(full=False):
-        """gets info from garbage collector"""
-        gc_info = {
-            "gc_count" : str(gc.get_count()),
-            "gc_threshold" : str(gc.get_threshold())
-        }
-        try:
-            if gc.garbage:
-                gc_info["gc_garbage"] = ([repr(stuck) for stuck in gc.garbage]
-                                         if full else len(gc.garbage))
-        except Exception:
-            pass
-        return gc_info
-
-
 class _Audit(object):
     """put the audit object on stack per each initiating request in the system
 
@@ -226,17 +172,22 @@
                 "started" : str(_Audit._started),
                 "utcnow" : str(utcnow),
                 "uptime" : str(utcnow - _Audit._started),
-                "active_threads" : sorted([thr.name for thr in threading.enumerate()]),
+                "active_threads" : ProcessInfo.active_threads(),
                 "gc" : ProcessInfo.gc_info(full),
-                "mem_info" : ProcessInfo.mem_info()
+                "virtual_memory" : ProcessInfo.virtual_memory(),
+                "process_memory" : ProcessInfo.process_memory()
             },
             "stats" : _Audit._health.dump(),
             "soft" : {"python" : _Audit._py_ver, "packages" : _Audit._packages}
         }
-        health_txt = "{} health: {}".format(_Audit._service_name, json.dumps(health))
-        self.info(health_txt)
+        self.info("{} health: {}".format(_Audit._service_name, json.dumps(health)))
         return health
 
+    def process_info(self):
+        """get the debug info on all the threads and memory"""
+        process_info = ProcessInfo.get_all()
+        self.info("{} process_info: {}".format(_Audit._service_name, json.dumps(process_info)))
+        return process_info
 
     def __init__(self, job_name=None, request_id=None, req_message=None, **kwargs):
         """create audit object per each request in the system

diff --git a/policyhandler/onap/process_info.py b/policyhandler/onap/process_info.py
new file mode 100644
index 0000000..9fb6334
--- /dev/null
+++ b/policyhandler/onap/process_info.py

@@ -0,0 +1,152 @@
+# ================================================================================
+# Copyright (c) 2018 AT&T Intellectual Property. All rights reserved.
+# ================================================================================
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============LICENSE_END=========================================================
+#
+# ECOMP is a trademark and service mark of AT&T Intellectual Property.
+
+"""generic class to keep get real time info about the current process"""
+
+import gc
+import sys
+import threading
+import traceback
+from functools import wraps
+
+import psutil
+
+
+def safe_operation(func):
+    """safequard the function against any exception"""
+    if not func:
+        return
+
+    @wraps(func)
+    def wrapper(*args, **kwargs):
+        """wrapper around the function"""
+        try:
+            return func(*args, **kwargs)
+        except Exception as ex:
+            return "%s: %s" % (type(ex).__name__, str(ex))
+    return wrapper
+
+
+class ProcessInfo(object):
+    """static class to calculate process info"""
+    _BIBYTES_SYMBOLS = ('KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB')
+    _BIBYTES_VALS = {}
+    _inited = False
+    _lock = threading.Lock()
+
+    @staticmethod
+    def init():
+        """init static constants"""
+        if ProcessInfo._inited:
+            return
+        with ProcessInfo._lock:
+            if ProcessInfo._inited:
+                return
+
+            for i, bibytes_symbol in enumerate(ProcessInfo._BIBYTES_SYMBOLS):
+                ProcessInfo._BIBYTES_VALS[bibytes_symbol] = 1 << (i + 1) * 10
+            ProcessInfo._BIBYTES_SYMBOLS = list(reversed(ProcessInfo._BIBYTES_SYMBOLS))
+            ProcessInfo._inited = True
+
+    @staticmethod
+    def bytes_to_bibytes(byte_count):
+        """converts byte count to human value in kibi-mebi-gibi-...-bytes"""
+        if byte_count is None:
+            return "unknown"
+        if not byte_count or not isinstance(byte_count, int):
+            return byte_count
+        ProcessInfo.init()
+
+        for bibytes_symbol in ProcessInfo._BIBYTES_SYMBOLS:
+            bibytes_value = ProcessInfo._BIBYTES_VALS[bibytes_symbol]
+            if byte_count >= bibytes_value:
+                value = float(byte_count) / bibytes_value
+                return '%.2f %s' % (value, bibytes_symbol)
+        return "%s B" % byte_count
+
+    @staticmethod
+    @safe_operation
+    def process_memory():
+        """calculates the memory usage of the current process"""
+        process = psutil.Process()
+        with process.oneshot():
+            return dict((k, ProcessInfo.bytes_to_bibytes(v))
+                        for k, v in process.memory_full_info()._asdict().iteritems())
+
+
+    @staticmethod
+    @safe_operation
+    def virtual_memory():
+        """calculates the virtual memory usage of the whole vm"""
+        return dict((k, ProcessInfo.bytes_to_bibytes(v))
+                    for k, v in psutil.virtual_memory()._asdict().iteritems())
+
+
+    @staticmethod
+    @safe_operation
+    def active_threads():
+        """list of active threads"""
+        return sorted([thr.name + "(" + str(thr.ident) + ")" for thr in threading.enumerate()])
+
+
+    @staticmethod
+    @safe_operation
+    def thread_stacks():
+        """returns the current threads with their stack"""
+        thread_names = dict((thr.ident, thr.name) for thr in threading.enumerate())
+        return [
+            {
+                "thread_id" : thread_id,
+                "thread_name" : thread_names.get(thread_id),
+                "thread_stack" : [
+                    {
+                        "filename" : filename,
+                        "lineno" : lineno,
+                        "function" : function_name,
+                        "line" : line.strip() if line else None
+                    }
+                    for filename, lineno, function_name, line in traceback.extract_stack(stack)
+                ]
+            }
+            for thread_id, stack in sys._current_frames().items()
+        ]
+
+
+    @staticmethod
+    @safe_operation
+    def gc_info(full=False):
+        """gets info from garbage collector"""
+        gc_info = {
+            "gc_count" : str(gc.get_count()),
+            "gc_threshold" : str(gc.get_threshold())
+        }
+        if gc.garbage:
+            gc_info["gc_garbage"] = ([repr(stuck) for stuck in gc.garbage]
+                                     if full else len(gc.garbage))
+        return gc_info
+
+    @staticmethod
+    def get_all():
+        """all info"""
+        return {
+            "active_threads" : ProcessInfo.active_threads(),
+            "gc" : ProcessInfo.gc_info(full=True),
+            "process_memory" : ProcessInfo.process_memory(),
+            "virtual_memory" : ProcessInfo.virtual_memory(),
+            "thread_stacks" : ProcessInfo.thread_stacks()
+        }

diff --git a/policyhandler/policy_updater.py b/policyhandler/policy_updater.py
index 38ce93a..9f24d3d 100644
--- a/policyhandler/policy_updater.py
+++ b/policyhandler/policy_updater.py

@@ -236,6 +236,7 @@
 
         PolicyUpdater._logger.info("policy_handler health: %s",
                                    json.dumps(aud_catch_up.health(full=True)))
+        PolicyUpdater._logger.info("process_info: %s", json.dumps(aud_catch_up.process_info()))
         return success
 
 

diff --git a/policyhandler/policy_utils.py b/policyhandler/policy_utils.py
index 69978b6..c96d4f6 100644
--- a/policyhandler/policy_utils.py
+++ b/policyhandler/policy_utils.py

@@ -134,7 +134,8 @@
         try:
             return json.loads(json_str)
         except (ValueError, TypeError) as err:
-            Utils._logger.warn("unexpected json %s: %s", str(json_str), str(err))
+            Utils._logger.warn("unexpected json error(%s): len(%s) str[:100]: (%s)",
+                               str(err), len(json_str), str(json_str)[:100])
         return json_str
 
     @staticmethod

diff --git a/policyhandler/web_server.py b/policyhandler/web_server.py
index 5314791..041a442 100644
--- a/policyhandler/web_server.py
+++ b/policyhandler/web_server.py

@@ -198,6 +198,7 @@
         PolicyWeb.logger.info("%s: --------- the end -----------", req_info)
         res = str(datetime.utcnow())
         audit.info_requested(res)
+        PolicyWeb.logger.info("process_info: %s", json.dumps(audit.process_info()))
         return "goodbye! shutdown requested {0}".format(res)
 
     @cherrypy.expose

diff --git a/pom.xml b/pom.xml
index 592beb2..0484790 100644
--- a/pom.xml
+++ b/pom.xml

@@ -30,7 +30,7 @@
   <groupId>org.onap.dcaegen2.platform</groupId>
   <artifactId>policy-handler</artifactId>
   <name>dcaegen2-platform-policy-handler</name>
-  <version>2.4.3-SNAPSHOT</version>
+  <version>2.4.4-SNAPSHOT</version>
   <url>http://maven.apache.org</url>
   <properties>
     <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>

diff --git a/run_policy.sh b/run_policy.sh
index a95aee9..28b5bf8 100644
--- a/run_policy.sh
+++ b/run_policy.sh

@@ -29,7 +29,6 @@
 python -m policyhandler/policy_handler >> ${LOG_FILE} 2>&1 &
 PID=$!
 
-echo "running policy_handler as" ${PID} "log" ${LOG_FILE} | tee -a ${LOG_FILE}
 function finish {
   echo "killing policy_handler ${PID}" $(date +%Y_%m%d-%H:%M:%S.%N) | tee -a ${LOG_FILE}
   kill -9 ${PID}
@@ -37,6 +36,13 @@
 }
 trap finish SIGHUP SIGINT SIGTERM
 
+echo "running policy_handler as" ${PID} "log" ${LOG_FILE} | tee -a ${LOG_FILE}
+uname -a  | tee -a ${LOG_FILE}
+free -h  | tee -a ${LOG_FILE}
+df -h    | tee -a ${LOG_FILE}
+ps afxvw | tee -a ${LOG_FILE}
+ss -aepi | tee -a ${LOG_FILE}
+
 wait ${PID}
 echo "---------------------------------------------" >> ${LOG_FILE} 2>&1
 mv ${LOG_FILE} ${LOG_FILE}.$(date +%Y-%m-%d_%H%M%S)

diff --git a/setup.py b/setup.py
index 5556814..33011e9 100644
--- a/setup.py
+++ b/setup.py

@@ -23,7 +23,7 @@
 setup(

     name='policyhandler',

     description='DCAE-Controller policy-handler to communicate with policy-engine',

-    version="2.4.3",

+    version="2.4.4",

     author='Alex Shatov',

     packages=['policyhandler'],

     zip_safe=False,


diff --git a/tests/mock_settings.py b/tests/mock_settings.py
new file mode 100644
index 0000000..f211dde
--- /dev/null
+++ b/tests/mock_settings.py

@@ -0,0 +1,64 @@
+# ============LICENSE_START=======================================================
+# Copyright (c) 2018 AT&T Intellectual Property. All rights reserved.
+# ================================================================================
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============LICENSE_END=========================================================
+#
+# ECOMP is a trademark and service mark of AT&T Intellectual Property.
+
+import json
+import logging
+import subprocess
+import sys
+import uuid
+from datetime import datetime
+
+from policyhandler.config import Config
+from policyhandler.onap.audit import Audit
+from policyhandler.policy_handler import LogWriter
+
+try:
+    POLICY_HANDLER_VERSION = subprocess.check_output(["python", "setup.py", "--version"]).strip()
+except subprocess.CalledProcessError:
+    POLICY_HANDLER_VERSION = "2.4.1"
+
+
+class Settings(object):
+    """init all locals"""
+    logger = None
+    RUN_TS = datetime.utcnow().isoformat()[:-3] + 'Z'
+    dicovered_config = None
+    deploy_handler_instance_uuid = str(uuid.uuid4())
+
+    @staticmethod
+    def init():
+        """init configs"""
+        Config.load_from_file()
+
+        with open("etc_upload/config.json", 'r') as config_json:
+            Settings.dicovered_config = json.load(config_json)
+
+        Config.load_from_file("etc_upload/config.json")
+
+        Config.config["catch_up"] = {"interval": 10, "max_skips": 2}
+
+        Settings.logger = logging.getLogger("policy_handler.unit_test")
+        sys.stdout = LogWriter(Settings.logger.info)
+        sys.stderr = LogWriter(Settings.logger.error)
+
+        print "print ========== run_policy_handler =========="
+        Settings.logger.info("========== run_policy_handler ==========")
+        Audit.init(Config.get_system_name(), POLICY_HANDLER_VERSION, Config.LOGGER_CONFIG_FILE_PATH)
+
+        Settings.logger.info("starting policy_handler with config:")
+        Settings.logger.info(Audit.log_json_dumps(Config.config))

diff --git a/tests/test_policyhandler.py b/tests/test_policyhandler.py
index c1176a9..f2dd9eb 100644
--- a/tests/test_policyhandler.py
+++ b/tests/test_policyhandler.py

@@ -20,15 +20,12 @@
 
 import copy
 import json
-import logging
 import re
-import subprocess
-import sys
 import time
 import uuid
-from datetime import datetime
 
 import pytest
+
 import cherrypy
 from cherrypy.test.helper import CPWebCase
 
@@ -36,24 +33,22 @@
 from policyhandler.deploy_handler import DeployHandler
 from policyhandler.discovery import DiscoveryClient
 from policyhandler.onap.audit import (REQUEST_X_ECOMP_REQUESTID, Audit,
-                                      AuditHttpCode, Metrics)
+                                      AuditHttpCode)
 from policyhandler.policy_consts import (ERRORED_POLICIES, ERRORED_SCOPES,
                                          LATEST_POLICIES, POLICY_BODY,
                                          POLICY_CONFIG, POLICY_ID, POLICY_NAME,
                                          POLICY_VERSION, SCOPE_PREFIXES)
-from policyhandler.policy_handler import LogWriter
 from policyhandler.policy_receiver import (LOADED_POLICIES, POLICY_VER,
                                            REMOVED_POLICIES, PolicyReceiver)
 from policyhandler.policy_rest import PolicyRest
 from policyhandler.policy_utils import PolicyUtils, Utils
 from policyhandler.web_server import _PolicyWeb
 
-try:
-    POLICY_HANDLER_VERSION = subprocess.check_output(["python", "setup.py", "--version"]).strip()
-except subprocess.CalledProcessError:
-    POLICY_HANDLER_VERSION = "2.4.1"
+from .mock_settings import Settings
 
 
+Settings.init()
+
 class MonkeyHttpResponse(object):
     """Monkey http reposne"""
     def __init__(self, headers):
@@ -101,40 +96,6 @@
     Settings.logger.info("teardown fix_discovery")
 
 
-class Settings(object):
-    """init all locals"""
-    logger = None
-    RUN_TS = datetime.utcnow().isoformat()[:-3] + 'Z'
-    dicovered_config = None
-    deploy_handler_instance_uuid = str(uuid.uuid4())
-
-    @staticmethod
-    def init():
-        """init configs"""
-        Config.load_from_file()
-
-        with open("etc_upload/config.json", 'r') as config_json:
-            Settings.dicovered_config = json.load(config_json)
-
-        Config.load_from_file("etc_upload/config.json")
-
-        Config.config["catch_up"] = {"interval": 10, "max_skips": 2}
-
-        Settings.logger = logging.getLogger("policy_handler.unit_test")
-        sys.stdout = LogWriter(Settings.logger.info)
-        sys.stderr = LogWriter(Settings.logger.error)
-
-        print "print ========== run_policy_handler =========="
-        Settings.logger.info("========== run_policy_handler ==========")
-        Audit.init(Config.get_system_name(), POLICY_HANDLER_VERSION, Config.LOGGER_CONFIG_FILE_PATH)
-
-        Settings.logger.info("starting policy_handler with config:")
-        Settings.logger.info(Audit.log_json_dumps(Config.config))
-
-
-Settings.init()
-
-
 class MonkeyPolicyBody(object):
     """policy body that policy-engine returns"""
     @staticmethod
@@ -169,6 +130,7 @@
     """pretend this is the policy-engine"""
     _scope_prefix = Config.config["scope_prefixes"][0]
     LOREM_IPSUM = """Lorem ipsum dolor sit amet consectetur ametist""".split()
+    LONG_TEXT = "0123456789" * 100
     _policies = []
 
     @staticmethod
@@ -192,10 +154,20 @@
                 if re.match(policy_name, policy[POLICY_NAME])]
 
     @staticmethod
+    def get_configs_all():
+        """get all policies the way the policy-engine finds"""
+        policies = [copy.deepcopy(policy)
+                    for policy in MonkeyPolicyEngine._policies]
+        for policy in policies:
+            policy["config"] = MonkeyPolicyEngine.LONG_TEXT
+        return policies
+
+    @staticmethod
     def get_policy_id(policy_index):
         """get the policy_id by index"""
-        return MonkeyPolicyEngine._scope_prefix \
-             + MonkeyPolicyEngine.LOREM_IPSUM[policy_index % len(MonkeyPolicyEngine.LOREM_IPSUM)]
+        return (MonkeyPolicyEngine._scope_prefix
+                + MonkeyPolicyEngine.LOREM_IPSUM[
+                    policy_index % len(MonkeyPolicyEngine.LOREM_IPSUM)])
 
     @staticmethod
     def gen_policy_latest(policy_index):
@@ -250,6 +222,23 @@
     yield fix_pdp_post  # provide the fixture value
     Settings.logger.info("teardown fix_pdp_post")
 
+
+@pytest.fixture()
+def fix_pdp_post_big(monkeypatch):
+    """monkeyed request /getConfig to PDP"""
+    def monkeyed_policy_rest_post(full_path, json=None, headers=None):
+        """monkeypatch for the POST to policy-engine"""
+        res_json = MonkeyPolicyEngine.get_configs_all()
+        return MonkeyedResponse(full_path, res_json, json, headers)
+
+    Settings.logger.info("setup fix_pdp_post_big")
+    PolicyRest._lazy_init()
+    monkeypatch.setattr('policyhandler.policy_rest.PolicyRest._requests_session.post',
+                        monkeyed_policy_rest_post)
+    yield fix_pdp_post_big  # provide the fixture value
+    Settings.logger.info("teardown fix_pdp_post_big")
+
+
 class MockException(Exception):
     """mock exception"""
     pass
@@ -289,18 +278,18 @@
     yield fix_select_latest_policies_boom
     Settings.logger.info("teardown fix_select_latest_policies_boom")
 
-def monkeyed_deploy_handler(full_path, json=None, headers=None):
-    """monkeypatch for deploy_handler"""
-    return MonkeyedResponse(
-        full_path,
-        {"server_instance_uuid": Settings.deploy_handler_instance_uuid},
-        json, headers
-    )
-
 
 @pytest.fixture()
 def fix_deploy_handler(monkeypatch, fix_discovery):
     """monkeyed discovery request.get"""
+    def monkeyed_deploy_handler(full_path, json=None, headers=None):
+        """monkeypatch for deploy_handler"""
+        return MonkeyedResponse(
+            full_path,
+            {"server_instance_uuid": Settings.deploy_handler_instance_uuid},
+            json, headers
+        )
+
     Settings.logger.info("setup fix_deploy_handler")
     audit = Audit(req_message="fix_deploy_handler")
     DeployHandler._lazy_init(audit)
@@ -310,6 +299,39 @@
     Settings.logger.info("teardown fix_deploy_handler")
 
 
+@pytest.fixture()
+def fix_deploy_handler_fail(monkeypatch, fix_discovery):
+    """monkeyed failed discovery request.get"""
+    def monkeyed_deploy_handler(full_path, json=None, headers=None):
+        """monkeypatch for deploy_handler"""
+        res = MonkeyedResponse(
+            full_path,
+            {"server_instance_uuid": Settings.deploy_handler_instance_uuid},
+            json, headers
+        )
+        res.status_code = 413
+        return res
+
+    @staticmethod
+    def monkeyed_deploy_handler_init(audit_ignore, rediscover=False):
+        """monkeypatch for deploy_handler init"""
+        DeployHandler._url = None
+
+    Settings.logger.info("setup fix_deploy_handler_fail")
+    config_catch_up = Config.config["catch_up"]
+    Config.config["catch_up"] = {"interval": 1, "max_skips": 0}
+
+    audit = Audit(req_message="fix_deploy_handler_fail")
+    DeployHandler._lazy_init(audit, rediscover=True)
+    monkeypatch.setattr('policyhandler.deploy_handler.DeployHandler._requests_session.post',
+                        monkeyed_deploy_handler)
+    monkeypatch.setattr('policyhandler.deploy_handler.DeployHandler._lazy_init',
+                        monkeyed_deploy_handler_init)
+    yield fix_deploy_handler_fail
+    Settings.logger.info("teardown fix_deploy_handler_fail")
+    Config.config["catch_up"] = config_catch_up
+
+
 def monkeyed_cherrypy_engine_exit():
     """monkeypatch for deploy_handler"""
     Settings.logger.info("cherrypy_engine_exit()")
@@ -404,45 +426,6 @@
     assert Utils.are_the_same(policy_latest, expected_policy)
 
 
-def test_healthcheck():
-    """test /healthcheck"""
-    audit = Audit(job_name="test_healthcheck",
-                  req_message="get /healthcheck")
-    metrics = Metrics(aud_parent=audit, targetEntity="test_healthcheck")
-    metrics.metrics_start("test /healthcheck")
-    time.sleep(0.1)
-
-    metrics.metrics("test /healthcheck")
-    health = audit.health(full=True)
-    audit.audit_done(result=json.dumps(health))
-
-    Settings.logger.info("healthcheck: %s", json.dumps(health))
-    assert bool(health)
-
-
-def test_healthcheck_with_error():
-    """test /healthcheck"""
-    audit = Audit(job_name="test_healthcheck_with_error",
-                  req_message="get /healthcheck")
-    metrics = Metrics(aud_parent=audit, targetEntity="test_healthcheck_with_error")
-    metrics.metrics_start("test /healthcheck")
-    time.sleep(0.2)
-    audit.error("error from test_healthcheck_with_error")
-    audit.fatal("fatal from test_healthcheck_with_error")
-    audit.debug("debug from test_healthcheck_with_error")
-    audit.warn("debug from test_healthcheck_with_error")
-    audit.info_requested("debug from test_healthcheck_with_error")
-    if audit.is_success():
-        audit.set_http_status_code(AuditHttpCode.DATA_NOT_FOUND_ERROR.value)
-    audit.set_http_status_code(AuditHttpCode.SERVER_INTERNAL_ERROR.value)
-    metrics.metrics("test /healthcheck")
-
-    health = audit.health(full=True)
-    audit.audit_done(result=json.dumps(health))
-
-    Settings.logger.info("healthcheck: %s", json.dumps(health))
-    assert bool(health)
-
 
 @pytest.mark.usefixtures("fix_pdp_post")
 class WebServerTest(CPWebCase):
@@ -943,3 +926,31 @@
         self.assertStatus('200 OK')
         Settings.logger.info("got shutdown: %s", self.body)
         time.sleep(1)
+
+
+@pytest.mark.usefixtures(
+    "fix_pdp_post_big",
+    "fix_deploy_handler_fail",
+    "fix_policy_receiver_websocket"
+)
+def test_catch_ups_failed_dh():
+    """test run policy handler with catchups and failed deployment-handler"""
+    Settings.logger.info("start test_catch_ups_failed_dh")
+    audit = Audit(job_name="test_catch_ups_failed_dh",
+                  req_message="start test_catch_ups_failed_dh")
+    PolicyReceiver.run(audit)
+
+    Settings.logger.info("sleep 50 before shutdown...")
+    time.sleep(50)
+
+    health = audit.health(full=True)
+    audit.audit_done(result=json.dumps(health))
+
+    Settings.logger.info("healthcheck: %s", json.dumps(health))
+    assert bool(health)
+
+    PolicyReceiver.shutdown(audit)
+    time.sleep(1)
+
+    health = audit.health(full=True)
+    Settings.logger.info("healthcheck: %s", json.dumps(health))

diff --git a/tests/test_step_timer.py b/tests/test_step_timer.py
index 631a44f..808f99e 100644
--- a/tests/test_step_timer.py
+++ b/tests/test_step_timer.py

@@ -29,7 +29,7 @@
 Config.load_from_file()
 
 
-class MockTimer(object):
+class MockTimerController(object):
     """testing step_timer"""
     logger = logging.getLogger("policy_handler.unit_test.step_timer")
 
@@ -50,7 +50,7 @@
         self.status_ts = datetime.utcnow()
         self.exe_ts = None
         self.exe_interval = None
-        self.set_status(MockTimer.INIT)
+        self.set_status(MockTimerController.INIT)
 
     def __enter__(self):
         """constructor"""
@@ -64,65 +64,66 @@
         """timer event"""
         self.exe_ts = datetime.utcnow()
         self.exe_interval = (self.exe_ts - self.status_ts).total_seconds()
-        MockTimer.logger.info("run on_time[%s] (%s, %s) in %s for %s",
-            self.run_counter, json.dumps(args), json.dumps(kwargs),
-            self.exe_interval, self.get_status())
+        MockTimerController.logger.info("run on_time[%s] (%s, %s) in %s for %s",
+                                        self.run_counter, json.dumps(args), json.dumps(kwargs),
+                                        self.exe_interval, self.get_status())
         time.sleep(3)
-        MockTimer.logger.info("done on_time[%s] (%s, %s) in %s for %s",
-            self.run_counter, json.dumps(args), json.dumps(kwargs),
-            self.exe_interval, self.get_status())
+        MockTimerController.logger.info("done on_time[%s] (%s, %s) in %s for %s",
+                                        self.run_counter, json.dumps(args), json.dumps(kwargs),
+                                        self.exe_interval, self.get_status())
 
     def verify_last_event(self):
         """assertions needs to be in the main thread"""
         if self.exe_interval is None:
-            MockTimer.logger.info("not executed: %s", self.get_status())
+            MockTimerController.logger.info("not executed: %s", self.get_status())
             return
 
-        MockTimer.logger.info("verify exe %s for %s", self.exe_interval, self.get_status())
+        MockTimerController.logger.info("verify exe %s for %s",
+                                        self.exe_interval, self.get_status())
         assert self.exe_interval >= self.interval
         assert self.exe_interval < 2 * self.interval
-        MockTimer.logger.info("success %s", self.get_status())
+        MockTimerController.logger.info("success %s", self.get_status())
 
     def run_timer(self):
         """create and start the step_timer"""
         if self.step_timer:
             self.step_timer.next()
-            self.set_status(MockTimer.NEXT)
+            self.set_status(MockTimerController.NEXT)
             return
 
         self.step_timer = StepTimer(
-            self.name, self.interval, MockTimer.on_time,
-            MockTimer.logger,
+            self.name, self.interval, MockTimerController.on_time,
+            MockTimerController.logger,
             self
         )
         self.step_timer.start()
-        self.set_status(MockTimer.STARTED)
+        self.set_status(MockTimerController.STARTED)
 
     def pause_timer(self):
         """pause step_timer"""
         if self.step_timer:
             self.step_timer.pause()
-            self.set_status(MockTimer.PAUSED)
+            self.set_status(MockTimerController.PAUSED)
 
     def stop_timer(self):
         """stop step_timer"""
         if self.step_timer:
-            self.set_status(MockTimer.STOPPING)
+            self.set_status(MockTimerController.STOPPING)
             self.step_timer.stop()
             self.step_timer.join()
             self.step_timer = None
-            self.set_status(MockTimer.STOPPED)
+            self.set_status(MockTimerController.STOPPED)
 
     def set_status(self, status):
         """set the status of the timer"""
-        if status in [MockTimer.NEXT, MockTimer.STARTED]:
+        if status in [MockTimerController.NEXT, MockTimerController.STARTED]:
             self.run_counter += 1
 
         self.status = status
         utcnow = datetime.utcnow()
         time_step = (utcnow - self.status_ts).total_seconds()
         self.status_ts = utcnow
-        MockTimer.logger.info("%s: %s", time_step, self.get_status())
+        MockTimerController.logger.info("[%s]: %s", time_step, self.get_status())
 
     def get_status(self):
         """string representation"""
@@ -137,8 +138,8 @@
 
 def test_step_timer():
     """test step_timer"""
-    MockTimer.logger.info("============ test_step_timer =========")
-    with MockTimer("step_timer", 5) as step_timer:
+    MockTimerController.logger.info("============ test_step_timer =========")
+    with MockTimerController("step_timer", 5) as step_timer:
         step_timer.run_timer()
         time.sleep(1)
         step_timer.verify_last_event()
@@ -165,8 +166,8 @@
 
 def test_interrupt_step_timer():
     """test step_timer"""
-    MockTimer.logger.info("============ test_interrupt_step_timer =========")
-    with MockTimer("step_timer", 5) as step_timer:
+    MockTimerController.logger.info("============ test_interrupt_step_timer =========")
+    with MockTimerController("step_timer", 5) as step_timer:
         step_timer.run_timer()
         time.sleep(1)
         step_timer.verify_last_event()

diff --git a/tests/test_zzz_memory.py b/tests/test_zzz_memory.py
index 31a8dea..e70cf59 100644
--- a/tests/test_zzz_memory.py
+++ b/tests/test_zzz_memory.py

@@ -20,20 +20,13 @@
 
 import gc
 import json
-import logging
-import subprocess
-import sys
+import time
 
-from policyhandler.config import Config
-from policyhandler.onap.audit import Audit
-from policyhandler.policy_handler import LogWriter
+from policyhandler.onap.audit import Audit, AuditHttpCode, Metrics
 
-Config.load_from_file()
+from .mock_settings import Settings
 
-try:
-    POLICY_HANDLER_VERSION = subprocess.check_output(["python", "setup.py", "--version"]).strip()
-except subprocess.CalledProcessError:
-    POLICY_HANDLER_VERSION = "2.4.1"
+Settings.init()
 
 class Node(object):
     """making the cycled objects"""
@@ -44,16 +37,49 @@
         return '%s(%s)' % (self.__class__.__name__, self.name)
 
 
+def test_healthcheck():
+    """test /healthcheck"""
+    audit = Audit(job_name="test_healthcheck",
+                  req_message="get /healthcheck")
+    metrics = Metrics(aud_parent=audit, targetEntity="test_healthcheck")
+    metrics.metrics_start("test /healthcheck")
+    time.sleep(0.1)
+
+    metrics.metrics("test /healthcheck")
+    health = audit.health(full=True)
+    audit.audit_done(result=json.dumps(health))
+
+    Settings.logger.info("healthcheck: %s", json.dumps(health))
+    assert bool(health)
+
+
+def test_healthcheck_with_error():
+    """test /healthcheck"""
+    audit = Audit(job_name="test_healthcheck_with_error",
+                  req_message="get /healthcheck")
+    metrics = Metrics(aud_parent=audit, targetEntity="test_healthcheck_with_error")
+    metrics.metrics_start("test /healthcheck")
+    time.sleep(0.2)
+    audit.error("error from test_healthcheck_with_error")
+    audit.fatal("fatal from test_healthcheck_with_error")
+    audit.debug("debug from test_healthcheck_with_error")
+    audit.warn("debug from test_healthcheck_with_error")
+    audit.info_requested("debug from test_healthcheck_with_error")
+    if audit.is_success():
+        audit.set_http_status_code(AuditHttpCode.DATA_NOT_FOUND_ERROR.value)
+    audit.set_http_status_code(AuditHttpCode.SERVER_INTERNAL_ERROR.value)
+    metrics.metrics("test /healthcheck")
+
+    health = audit.health(full=True)
+    audit.audit_done(result=json.dumps(health))
+
+    Settings.logger.info("healthcheck: %s", json.dumps(health))
+    assert bool(health)
+
+
 def test_healthcheck_with_garbage():
     """test /healthcheck"""
-
-    Audit.init(Config.get_system_name(), POLICY_HANDLER_VERSION, Config.LOGGER_CONFIG_FILE_PATH)
-
-
-    logger = logging.getLogger("policy_handler.unit_test_memory")
-    sys.stdout = LogWriter(logger.info)
-    sys.stderr = LogWriter(logger.error)
-
+    gc_found = gc.collect()
     gc.set_debug(gc.DEBUG_LEAK)
 
     node1 = Node("one")
@@ -70,19 +96,22 @@
     health = audit.health(full=True)
     audit.audit_done(result=json.dumps(health))
 
-    logger.info("test_healthcheck_with_garbage[%s]: %s", gc_found, json.dumps(health))
+    Settings.logger.info("test_healthcheck_with_garbage[%s]: %s", gc_found, json.dumps(health))
     assert bool(health)
     assert bool(health.get("runtime", {}).get("gc", {}).get("gc_garbage"))
 
-    logger.info("clearing up garbage...")
+    Settings.logger.info("clearing up garbage...")
     for obj in gc.garbage:
         if isinstance(obj, Node):
-            logger.info("in garbage: %s 0x%x", obj, id(obj))
+            Settings.logger.info("in garbage: %s 0x%x", obj, id(obj))
             obj.next = None
 
     gc_found = gc.collect()
-    health = audit.health(full=True)
-    logger.info("after clear test_healthcheck_with_garbage[%s]: %s", gc_found, json.dumps(health))
-    assert bool(health)
+    Settings.logger.info("after clear test_healthcheck_with_garbage[%s]: %s",
+                         gc_found, json.dumps(audit.health(full=True)))
 
-    gc.set_debug(not gc.DEBUG_LEAK)
+    gc.set_debug(False)
+
+    gc_found = gc.collect()
+    Settings.logger.info("after turned off gc debug test_healthcheck_with_garbage[%s]: %s",
+                         gc_found, json.dumps(audit.health(full=True)))

diff --git a/version.properties b/version.properties
index b3509ec..9282bc9 100644
--- a/version.properties
+++ b/version.properties

@@ -1,6 +1,6 @@
 major=2

 minor=4

-patch=3

+patch=4

 base_version=${major}.${minor}.${patch}

 release_version=${base_version}

 snapshot_version=${base_version}-SNAPSHOT
commit	5105258de50958e3b060f961dd0ddc88d71b7560	[log] [tgz]
author	Alex Shatov <alexs@att.com>	Fri May 18 15:13:40 2018 -0400
committer	Alex Shatov <alexs@att.com>	Fri May 18 15:13:40 2018 -0400
tree	573a683e3c80dcaa59dfa21541d44f566886df9d
parent	f2d7bef13705812c1bf147c2fb65162fbf385c6b [diff]