| # ================================================================================ |
| # Copyright (c) 2018-2020 AT&T Intellectual Property. All rights reserved. |
| # ================================================================================ |
| # Licensed under the Apache License, Version 2.0 (the "License"); |
| # you may not use this file except in compliance with the License. |
| # You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| # ============LICENSE_END========================================================= |
| # |
| |
| """utils and conversions""" |
| |
| import json |
| import logging |
| import os |
| from copy import deepcopy |
| from typing import Pattern |
| |
| class Utils(object): |
| """general purpose utils""" |
| _logger = logging.getLogger("policy_handler.utils") |
| |
| @staticmethod |
| def get_logger(file_path): |
| """get the logger for the file_path == __file__""" |
| logger_path = [] |
| file_path = os.path.realpath(file_path) |
| logger_path.append(os.path.basename(file_path)[:-3]) |
| while file_path: |
| file_path = os.path.dirname(file_path) |
| folder_name = os.path.basename(file_path) |
| if folder_name == "policyhandler" or len(logger_path) > 5: |
| break |
| if folder_name == "tests": |
| logger_path.append("unit_test") |
| break |
| logger_path.append(folder_name) |
| |
| logger_path.append("policy_handler") |
| return logging.getLogger(".".join(reversed(logger_path))) |
| |
| @staticmethod |
| def safe_json_parse(json_str): |
| """try parsing json without exception - returns the json_str back if fails""" |
| if not json_str: |
| return json_str |
| try: |
| return json.loads(json_str) |
| except (ValueError, TypeError) as err: |
| Utils._logger.warning("unexpected json error(%s): len(%s) str[:100]: (%s)", |
| str(err), len(json_str), str(json_str)[:100]) |
| return json_str |
| |
| @staticmethod |
| def are_the_same(body_1, body_2, json_dumps=None): |
| """check whether both objects are the same""" |
| if not json_dumps: |
| json_dumps = json.dumps |
| if (body_1 and not body_2) or (not body_1 and body_2): |
| Utils._logger.debug("only one is empty %s != %s", body_1, body_2) |
| return False |
| |
| if body_1 is None and body_2 is None: |
| return True |
| |
| if isinstance(body_1, list) and isinstance(body_2, list): |
| if len(body_1) != len(body_2): |
| Utils._logger.debug("len %s != %s", json_dumps(body_1), json_dumps(body_2)) |
| return False |
| |
| for val_1, val_2 in zip(body_1, body_2): |
| if not Utils.are_the_same(val_1, val_2, json_dumps): |
| return False |
| return True |
| |
| if isinstance(body_1, dict) and isinstance(body_2, dict): |
| if body_1.keys() ^ body_2.keys(): |
| Utils._logger.debug("keys %s != %s", json_dumps(body_1), json_dumps(body_2)) |
| return False |
| |
| for key, val_1 in body_1.items(): |
| val_2 = body_2[key] |
| if isinstance(val_1, str) or isinstance(val_2, str): |
| if val_1 != val_2: |
| Utils._logger.debug("key-values %s != %s", |
| json_dumps({key: val_1}), json_dumps({key: val_2})) |
| return False |
| continue |
| |
| if not Utils.are_the_same(val_1, body_2[key], json_dumps): |
| return False |
| return True |
| |
| # ... here when primitive values or mismatched types ... |
| the_same_values = (body_1 == body_2) |
| if not the_same_values: |
| Utils._logger.debug("values %s != %s", body_1, body_2) |
| return the_same_values |
| |
| class RegexCoarser(object): |
| """ |
| utility to combine or coarse the collection of regex patterns |
| into a single regex that is at least not narrower (wider or the same) |
| than the collection regexes |
| |
| inspired by https://github.com/spadgos/regex-combiner in js |
| """ |
| ENDER = '***' |
| GROUPERS = {'{': '}', '[': ']', '(': ')'} |
| MODIFIERS = '*?+' |
| CHOICE_STARTER = '(' |
| HIDDEN_CHOICE_STARTER = '(?:' |
| ANY_CHARS = '.*' |
| LINE_START = '^' |
| |
| def __init__(self, regex_patterns=None): |
| """regex coarser""" |
| self.trie = {} |
| self.patterns = [] |
| self.add_regex_patterns(regex_patterns) |
| |
| def get_combined_regex_pattern(self): |
| """gets the pattern for the combined regex""" |
| trie = deepcopy(self.trie) |
| RegexCoarser._compress(trie) |
| return RegexCoarser._trie_to_pattern(trie) |
| |
| def get_coarse_regex_patterns(self, max_length=100): |
| """gets the patterns for the coarse regex""" |
| trie = deepcopy(self.trie) |
| RegexCoarser._compress(trie) |
| patterns = RegexCoarser._trie_to_pattern(trie, True) |
| |
| root_patterns = [] |
| for pattern in patterns: |
| left, _, choice = pattern.partition(RegexCoarser.CHOICE_STARTER) |
| if choice and left and left.strip() != RegexCoarser.LINE_START and not left.isspace(): |
| pattern = left + RegexCoarser.ANY_CHARS |
| root_patterns.append(pattern) |
| root_patterns = RegexCoarser._join_patterns(root_patterns, max_length) |
| |
| if not root_patterns or root_patterns == ['']: |
| return [] |
| return root_patterns |
| |
| |
| def add_regex_patterns(self, new_regex_patterns): |
| """adds the new_regex patterns to RegexPatternCoarser""" |
| if not new_regex_patterns or not isinstance(new_regex_patterns, list): |
| return |
| for new_regex_pattern in new_regex_patterns: |
| self.add_regex_pattern(new_regex_pattern) |
| |
| def add_regex_pattern(self, new_regex_pattern): |
| """adds the new_regex to RegexPatternCoarser""" |
| new_regex_pattern = RegexCoarser._regex_pattern_to_string(new_regex_pattern) |
| if not new_regex_pattern: |
| return |
| |
| self.patterns.append(new_regex_pattern) |
| |
| tokens = RegexCoarser._tokenize(new_regex_pattern) |
| last_token_idx = len(tokens) - 1 |
| trie_node = self.trie |
| for idx, token in enumerate(tokens): |
| if token not in trie_node: |
| trie_node[token] = {} |
| if idx == last_token_idx: |
| trie_node[token][RegexCoarser.ENDER] = {} |
| trie_node = trie_node[token] |
| |
| @staticmethod |
| def _regex_pattern_to_string(regex_pattern): |
| """convert regex pattern to string""" |
| if not regex_pattern: |
| return '' |
| |
| if isinstance(regex_pattern, str): |
| return regex_pattern |
| |
| if isinstance(regex_pattern, Pattern): |
| return regex_pattern.pattern |
| return None |
| |
| @staticmethod |
| def _tokenize(regex_pattern): |
| """tokenize the regex pattern for trie assignment""" |
| tokens = [] |
| token = '' |
| group_ender = None |
| use_next = False |
| |
| for char in regex_pattern: |
| if use_next: |
| use_next = False |
| token += char |
| char = None |
| |
| if char == '\\': |
| use_next = True |
| token += char |
| continue |
| |
| if not group_ender and char in RegexCoarser.GROUPERS: |
| group_ender = RegexCoarser.GROUPERS[char] |
| token = char |
| char = None |
| |
| if char is None: |
| pass |
| elif char == group_ender: |
| token += char |
| group_ender = None |
| if char == '}': # this group is a modifier |
| tokens[len(tokens) - 1] += token |
| token = '' |
| continue |
| elif char in RegexCoarser.MODIFIERS: |
| if group_ender: |
| token += char |
| else: |
| tokens[len(tokens) - 1] += char |
| continue |
| else: |
| token += char |
| |
| if not group_ender: |
| tokens.append(token) |
| token = '' |
| |
| if token: |
| tokens.append(token) |
| return tokens |
| |
| @staticmethod |
| def _compress(trie): |
| """compress trie into shortest leaves""" |
| for key, subtrie in trie.items(): |
| RegexCoarser._compress(subtrie) |
| subkeys = list(subtrie.keys()) |
| if len(subkeys) == 1: |
| trie[key + subkeys[0]] = subtrie[subkeys[0]] |
| del trie[key] |
| |
| @staticmethod |
| def _trie_to_pattern(trie, top_keep=False): |
| """convert trie to the regex pattern""" |
| patterns = [ |
| key.replace(RegexCoarser.ENDER, '') + RegexCoarser._trie_to_pattern(subtrie) |
| for key, subtrie in trie.items() |
| ] |
| |
| if top_keep: |
| return patterns |
| |
| return RegexCoarser._join_patterns(patterns)[0] |
| |
| @staticmethod |
| def _join_patterns(patterns, max_length=0): |
| """convert list of patterns to the segmented list of dense regex patterns""" |
| if not patterns: |
| return [''] |
| |
| if len(patterns) == 1: |
| return patterns |
| |
| if not max_length: |
| return [RegexCoarser.HIDDEN_CHOICE_STARTER + '|'.join(patterns) + ')'] |
| |
| long_patterns = [] |
| join_patterns = [] |
| for pattern in patterns: |
| len_pattern = len(pattern) |
| if not len_pattern: |
| continue |
| if len_pattern >= max_length: |
| long_patterns.append(pattern) |
| continue |
| |
| for idx, patterns_to_join in enumerate(join_patterns): |
| patterns_to_join, len_patterns_to_join = patterns_to_join |
| if len_pattern + len_patterns_to_join < max_length: |
| patterns_to_join.append(pattern) |
| len_patterns_to_join += len_pattern |
| join_patterns[idx] = (patterns_to_join, len_patterns_to_join) |
| len_pattern = 0 |
| break |
| if len_pattern: |
| join_patterns.append(([pattern], len_pattern)) |
| join_patterns.sort(key=lambda x: x[1]) |
| |
| if join_patterns: |
| # pattern, _, choice = pattern.endswith(RegexCoarser.ANY_CHARS) |
| join_patterns = [ |
| RegexCoarser.HIDDEN_CHOICE_STARTER + '|'.join(patterns_to_join) + ')' |
| for patterns_to_join, _ in join_patterns |
| ] |
| |
| return join_patterns + long_patterns |