blob: d728e48e1fbc4988d040b723a6e5f59f6b9194b7 [file] [log] [blame]
# ================================================================================
# Copyright (c) 2018-2019 AT&T Intellectual Property. All rights reserved.
# ================================================================================
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============LICENSE_END=========================================================
#
"""utils and conversions"""
import json
import logging
import os
from copy import deepcopy
from typing import Pattern
class ToBeImplementedException(Exception):
"""exception for to be implemented features of policy-handler"""
pass
class Utils(object):
"""general purpose utils"""
_logger = logging.getLogger("policy_handler.utils")
@staticmethod
def get_logger(file_path):
"""get the logger for the file_path == __file__"""
logger_path = []
file_path = os.path.realpath(file_path)
logger_path.append(os.path.basename(file_path)[:-3])
while file_path:
file_path = os.path.dirname(file_path)
folder_name = os.path.basename(file_path)
if folder_name == "policyhandler" or len(logger_path) > 5:
break
if folder_name == "tests":
logger_path.append("unit_test")
break
logger_path.append(folder_name)
logger_path.append("policy_handler")
return logging.getLogger(".".join(reversed(logger_path)))
@staticmethod
def safe_json_parse(json_str):
"""try parsing json without exception - returns the json_str back if fails"""
if not json_str:
return json_str
try:
return json.loads(json_str)
except (ValueError, TypeError) as err:
Utils._logger.warning("unexpected json error(%s): len(%s) str[:100]: (%s)",
str(err), len(json_str), str(json_str)[:100])
return json_str
@staticmethod
def are_the_same(body_1, body_2, json_dumps=None):
"""check whether both objects are the same"""
if not json_dumps:
json_dumps = json.dumps
if (body_1 and not body_2) or (not body_1 and body_2):
Utils._logger.debug("only one is empty %s != %s", body_1, body_2)
return False
if body_1 is None and body_2 is None:
return True
if isinstance(body_1, list) and isinstance(body_2, list):
if len(body_1) != len(body_2):
Utils._logger.debug("len %s != %s", json_dumps(body_1), json_dumps(body_2))
return False
for val_1, val_2 in zip(body_1, body_2):
if not Utils.are_the_same(val_1, val_2, json_dumps):
return False
return True
if isinstance(body_1, dict) and isinstance(body_2, dict):
if body_1.keys() ^ body_2.keys():
Utils._logger.debug("keys %s != %s", json_dumps(body_1), json_dumps(body_2))
return False
for key, val_1 in body_1.items():
if not Utils.are_the_same(val_1, body_2[key], json_dumps):
return False
return True
# ... here when primitive values or mismatched types ...
the_same_values = (body_1 == body_2)
if not the_same_values:
Utils._logger.debug("values %s != %s", body_1, body_2)
return the_same_values
class RegexCoarser(object):
"""
utility to combine or coarse the collection of regex patterns
into a single regex that is at least not narrower (wider or the same)
than the collection regexes
inspired by https://github.com/spadgos/regex-combiner in js
"""
ENDER = '***'
GROUPERS = {'{': '}', '[': ']', '(': ')'}
MODIFIERS = '*?+'
CHOICE_STARTER = '('
HIDDEN_CHOICE_STARTER = '(?:'
ANY_CHARS = '.*'
LINE_START = '^'
def __init__(self, regex_patterns=None):
"""regex coarser"""
self.trie = {}
self.patterns = []
self.add_regex_patterns(regex_patterns)
def get_combined_regex_pattern(self):
"""gets the pattern for the combined regex"""
trie = deepcopy(self.trie)
RegexCoarser._compress(trie)
return RegexCoarser._trie_to_pattern(trie)
def get_coarse_regex_patterns(self, max_length=100):
"""gets the patterns for the coarse regex"""
trie = deepcopy(self.trie)
RegexCoarser._compress(trie)
patterns = RegexCoarser._trie_to_pattern(trie, True)
root_patterns = []
for pattern in patterns:
left, _, choice = pattern.partition(RegexCoarser.CHOICE_STARTER)
if choice and left and left.strip() != RegexCoarser.LINE_START and not left.isspace():
pattern = left + RegexCoarser.ANY_CHARS
root_patterns.append(pattern)
root_patterns = RegexCoarser._join_patterns(root_patterns, max_length)
if not root_patterns or root_patterns == ['']:
return []
return root_patterns
def add_regex_patterns(self, new_regex_patterns):
"""adds the new_regex patterns to RegexPatternCoarser"""
if not new_regex_patterns or not isinstance(new_regex_patterns, list):
return
for new_regex_pattern in new_regex_patterns:
self.add_regex_pattern(new_regex_pattern)
def add_regex_pattern(self, new_regex_pattern):
"""adds the new_regex to RegexPatternCoarser"""
new_regex_pattern = RegexCoarser._regex_pattern_to_string(new_regex_pattern)
if not new_regex_pattern:
return
self.patterns.append(new_regex_pattern)
tokens = RegexCoarser._tokenize(new_regex_pattern)
last_token_idx = len(tokens) - 1
trie_node = self.trie
for idx, token in enumerate(tokens):
if token not in trie_node:
trie_node[token] = {}
if idx == last_token_idx:
trie_node[token][RegexCoarser.ENDER] = {}
trie_node = trie_node[token]
@staticmethod
def _regex_pattern_to_string(regex_pattern):
"""convert regex pattern to string"""
if not regex_pattern:
return ''
if isinstance(regex_pattern, str):
return regex_pattern
if isinstance(regex_pattern, Pattern):
return regex_pattern.pattern
return None
@staticmethod
def _tokenize(regex_pattern):
"""tokenize the regex pattern for trie assignment"""
tokens = []
token = ''
group_ender = None
use_next = False
for char in regex_pattern:
if use_next:
use_next = False
token += char
char = None
if char == '\\':
use_next = True
token += char
continue
if not group_ender and char in RegexCoarser.GROUPERS:
group_ender = RegexCoarser.GROUPERS[char]
token = char
char = None
if char is None:
pass
elif char == group_ender:
token += char
group_ender = None
if char == '}': # this group is a modifier
tokens[len(tokens) - 1] += token
token = ''
continue
elif char in RegexCoarser.MODIFIERS:
if group_ender:
token += char
else:
tokens[len(tokens) - 1] += char
continue
else:
token += char
if not group_ender:
tokens.append(token)
token = ''
if token:
tokens.append(token)
return tokens
@staticmethod
def _compress(trie):
"""compress trie into shortest leaves"""
for key, subtrie in trie.items():
RegexCoarser._compress(subtrie)
subkeys = list(subtrie.keys())
if len(subkeys) == 1:
trie[key + subkeys[0]] = subtrie[subkeys[0]]
del trie[key]
@staticmethod
def _trie_to_pattern(trie, top_keep=False):
"""convert trie to the regex pattern"""
patterns = [
key.replace(RegexCoarser.ENDER, '') + RegexCoarser._trie_to_pattern(subtrie)
for key, subtrie in trie.items()
]
if top_keep:
return patterns
return RegexCoarser._join_patterns(patterns)[0]
@staticmethod
def _join_patterns(patterns, max_length=0):
"""convert list of patterns to the segmented list of dense regex patterns"""
if not patterns:
return ['']
if len(patterns) == 1:
return patterns
if not max_length:
return [RegexCoarser.HIDDEN_CHOICE_STARTER + '|'.join(patterns) + ')']
long_patterns = []
join_patterns = []
for pattern in patterns:
len_pattern = len(pattern)
if not len_pattern:
continue
if len_pattern >= max_length:
long_patterns.append(pattern)
continue
for idx, patterns_to_join in enumerate(join_patterns):
patterns_to_join, len_patterns_to_join = patterns_to_join
if len_pattern + len_patterns_to_join < max_length:
patterns_to_join.append(pattern)
len_patterns_to_join += len_pattern
join_patterns[idx] = (patterns_to_join, len_patterns_to_join)
len_pattern = 0
break
if len_pattern:
join_patterns.append(([pattern], len_pattern))
join_patterns.sort(key=lambda x: x[1])
if join_patterns:
# pattern, _, choice = pattern.endswith(RegexCoarser.ANY_CHARS)
join_patterns = [
RegexCoarser.HIDDEN_CHOICE_STARTER + '|'.join(patterns_to_join) + ')'
for patterns_to_join, _ in join_patterns
]
return join_patterns + long_patterns