Alex Shatov | 1369bea | 2018-01-10 11:00:50 -0500 | [diff] [blame] | 1 | # ================================================================================ |
Alex Shatov | 9a4d3c5 | 2019-04-01 11:32:06 -0400 | [diff] [blame^] | 2 | # Copyright (c) 2018-2019 AT&T Intellectual Property. All rights reserved. |
Alex Shatov | 1369bea | 2018-01-10 11:00:50 -0500 | [diff] [blame] | 3 | # ================================================================================ |
| 4 | # Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | # you may not use this file except in compliance with the License. |
| 6 | # You may obtain a copy of the License at |
| 7 | # |
| 8 | # http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | # |
| 10 | # Unless required by applicable law or agreed to in writing, software |
| 11 | # distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | # See the License for the specific language governing permissions and |
| 14 | # limitations under the License. |
| 15 | # ============LICENSE_END========================================================= |
| 16 | # |
Alex Shatov | 1369bea | 2018-01-10 11:00:50 -0500 | [diff] [blame] | 17 | |
Alex Shatov | 9a4d3c5 | 2019-04-01 11:32:06 -0400 | [diff] [blame^] | 18 | """utils and conversions""" |
Alex Shatov | f53e5e7 | 2018-01-11 11:15:56 -0500 | [diff] [blame] | 19 | |
Alex Shatov | 1369bea | 2018-01-10 11:00:50 -0500 | [diff] [blame] | 20 | import json |
Alex Shatov | a2b2629 | 2018-03-13 17:50:51 -0400 | [diff] [blame] | 21 | import logging |
Alex Shatov | 9a4d3c5 | 2019-04-01 11:32:06 -0400 | [diff] [blame^] | 22 | import os |
Alex Shatov | d7f34d4 | 2018-08-07 12:11:35 -0400 | [diff] [blame] | 23 | from copy import deepcopy |
| 24 | from typing import Pattern |
Alex Shatov | 1369bea | 2018-01-10 11:00:50 -0500 | [diff] [blame] | 25 | |
Alex Shatov | 9a4d3c5 | 2019-04-01 11:32:06 -0400 | [diff] [blame^] | 26 | class ToBeImplementedException(Exception): |
| 27 | """exception for to be implemented features of policy-handler""" |
| 28 | pass |
Alex Shatov | 1369bea | 2018-01-10 11:00:50 -0500 | [diff] [blame] | 29 | |
Alex Shatov | d7f34d4 | 2018-08-07 12:11:35 -0400 | [diff] [blame] | 30 | |
Alex Shatov | a2b2629 | 2018-03-13 17:50:51 -0400 | [diff] [blame] | 31 | class Utils(object): |
| 32 | """general purpose utils""" |
| 33 | _logger = logging.getLogger("policy_handler.utils") |
| 34 | |
| 35 | @staticmethod |
Alex Shatov | 9a4d3c5 | 2019-04-01 11:32:06 -0400 | [diff] [blame^] | 36 | def get_logger(file_path): |
| 37 | """get the logger for the file_path == __file__""" |
| 38 | logger_path = [] |
| 39 | file_path = os.path.realpath(file_path) |
| 40 | logger_path.append(os.path.basename(file_path)[:-3]) |
| 41 | while file_path: |
| 42 | file_path = os.path.dirname(file_path) |
| 43 | folder_name = os.path.basename(file_path) |
| 44 | if folder_name == "policyhandler" or len(logger_path) > 5: |
| 45 | break |
| 46 | if folder_name == "tests": |
| 47 | logger_path.append("unit_test") |
| 48 | break |
| 49 | logger_path.append(folder_name) |
| 50 | |
| 51 | logger_path.append("policy_handler") |
| 52 | return logging.getLogger(".".join(reversed(logger_path))) |
| 53 | |
| 54 | @staticmethod |
Alex Shatov | a2b2629 | 2018-03-13 17:50:51 -0400 | [diff] [blame] | 55 | def safe_json_parse(json_str): |
| 56 | """try parsing json without exception - returns the json_str back if fails""" |
| 57 | if not json_str: |
| 58 | return json_str |
| 59 | try: |
| 60 | return json.loads(json_str) |
| 61 | except (ValueError, TypeError) as err: |
Alex Shatov | c9ec231 | 2018-06-14 12:06:42 -0400 | [diff] [blame] | 62 | Utils._logger.warning("unexpected json error(%s): len(%s) str[:100]: (%s)", |
| 63 | str(err), len(json_str), str(json_str)[:100]) |
Alex Shatov | a2b2629 | 2018-03-13 17:50:51 -0400 | [diff] [blame] | 64 | return json_str |
| 65 | |
| 66 | @staticmethod |
Alex Shatov | 1d69337 | 2018-08-24 13:15:04 -0400 | [diff] [blame] | 67 | def are_the_same(body_1, body_2, json_dumps=None): |
Alex Shatov | a2b2629 | 2018-03-13 17:50:51 -0400 | [diff] [blame] | 68 | """check whether both objects are the same""" |
Alex Shatov | 1d69337 | 2018-08-24 13:15:04 -0400 | [diff] [blame] | 69 | if not json_dumps: |
| 70 | json_dumps = json.dumps |
Alex Shatov | a2b2629 | 2018-03-13 17:50:51 -0400 | [diff] [blame] | 71 | if (body_1 and not body_2) or (not body_1 and body_2): |
| 72 | Utils._logger.debug("only one is empty %s != %s", body_1, body_2) |
| 73 | return False |
| 74 | |
| 75 | if body_1 is None and body_2 is None: |
| 76 | return True |
| 77 | |
| 78 | if isinstance(body_1, list) and isinstance(body_2, list): |
| 79 | if len(body_1) != len(body_2): |
Alex Shatov | 1d69337 | 2018-08-24 13:15:04 -0400 | [diff] [blame] | 80 | Utils._logger.debug("len %s != %s", json_dumps(body_1), json_dumps(body_2)) |
Alex Shatov | a2b2629 | 2018-03-13 17:50:51 -0400 | [diff] [blame] | 81 | return False |
| 82 | |
| 83 | for val_1, val_2 in zip(body_1, body_2): |
Alex Shatov | 1d69337 | 2018-08-24 13:15:04 -0400 | [diff] [blame] | 84 | if not Utils.are_the_same(val_1, val_2, json_dumps): |
Alex Shatov | a2b2629 | 2018-03-13 17:50:51 -0400 | [diff] [blame] | 85 | return False |
| 86 | return True |
| 87 | |
| 88 | if isinstance(body_1, dict) and isinstance(body_2, dict): |
Alex Shatov | c9ec231 | 2018-06-14 12:06:42 -0400 | [diff] [blame] | 89 | if body_1.keys() ^ body_2.keys(): |
Alex Shatov | 1d69337 | 2018-08-24 13:15:04 -0400 | [diff] [blame] | 90 | Utils._logger.debug("keys %s != %s", json_dumps(body_1), json_dumps(body_2)) |
Alex Shatov | a2b2629 | 2018-03-13 17:50:51 -0400 | [diff] [blame] | 91 | return False |
| 92 | |
Alex Shatov | c9ec231 | 2018-06-14 12:06:42 -0400 | [diff] [blame] | 93 | for key, val_1 in body_1.items(): |
Alex Shatov | 1d69337 | 2018-08-24 13:15:04 -0400 | [diff] [blame] | 94 | if not Utils.are_the_same(val_1, body_2[key], json_dumps): |
Alex Shatov | a2b2629 | 2018-03-13 17:50:51 -0400 | [diff] [blame] | 95 | return False |
| 96 | return True |
| 97 | |
| 98 | # ... here when primitive values or mismatched types ... |
| 99 | the_same_values = (body_1 == body_2) |
| 100 | if not the_same_values: |
| 101 | Utils._logger.debug("values %s != %s", body_1, body_2) |
| 102 | return the_same_values |
Alex Shatov | d7f34d4 | 2018-08-07 12:11:35 -0400 | [diff] [blame] | 103 | |
| 104 | class RegexCoarser(object): |
| 105 | """ |
| 106 | utility to combine or coarse the collection of regex patterns |
| 107 | into a single regex that is at least not narrower (wider or the same) |
| 108 | than the collection regexes |
| 109 | |
| 110 | inspired by https://github.com/spadgos/regex-combiner in js |
| 111 | """ |
| 112 | ENDER = '***' |
| 113 | GROUPERS = {'{': '}', '[': ']', '(': ')'} |
| 114 | MODIFIERS = '*?+' |
| 115 | CHOICE_STARTER = '(' |
| 116 | HIDDEN_CHOICE_STARTER = '(?:' |
| 117 | ANY_CHARS = '.*' |
| 118 | LINE_START = '^' |
| 119 | |
| 120 | def __init__(self, regex_patterns=None): |
| 121 | """regex coarser""" |
| 122 | self.trie = {} |
| 123 | self.patterns = [] |
| 124 | self.add_regex_patterns(regex_patterns) |
| 125 | |
| 126 | def get_combined_regex_pattern(self): |
| 127 | """gets the pattern for the combined regex""" |
| 128 | trie = deepcopy(self.trie) |
| 129 | RegexCoarser._compress(trie) |
| 130 | return RegexCoarser._trie_to_pattern(trie) |
| 131 | |
| 132 | def get_coarse_regex_patterns(self, max_length=100): |
| 133 | """gets the patterns for the coarse regex""" |
| 134 | trie = deepcopy(self.trie) |
| 135 | RegexCoarser._compress(trie) |
| 136 | patterns = RegexCoarser._trie_to_pattern(trie, True) |
| 137 | |
| 138 | root_patterns = [] |
| 139 | for pattern in patterns: |
| 140 | left, _, choice = pattern.partition(RegexCoarser.CHOICE_STARTER) |
| 141 | if choice and left and left.strip() != RegexCoarser.LINE_START and not left.isspace(): |
| 142 | pattern = left + RegexCoarser.ANY_CHARS |
| 143 | root_patterns.append(pattern) |
| 144 | root_patterns = RegexCoarser._join_patterns(root_patterns, max_length) |
| 145 | |
| 146 | if not root_patterns or root_patterns == ['']: |
| 147 | return [] |
| 148 | return root_patterns |
| 149 | |
| 150 | |
| 151 | def add_regex_patterns(self, new_regex_patterns): |
| 152 | """adds the new_regex patterns to RegexPatternCoarser""" |
| 153 | if not new_regex_patterns or not isinstance(new_regex_patterns, list): |
| 154 | return |
| 155 | for new_regex_pattern in new_regex_patterns: |
| 156 | self.add_regex_pattern(new_regex_pattern) |
| 157 | |
| 158 | def add_regex_pattern(self, new_regex_pattern): |
| 159 | """adds the new_regex to RegexPatternCoarser""" |
| 160 | new_regex_pattern = RegexCoarser._regex_pattern_to_string(new_regex_pattern) |
| 161 | if not new_regex_pattern: |
| 162 | return |
| 163 | |
| 164 | self.patterns.append(new_regex_pattern) |
| 165 | |
| 166 | tokens = RegexCoarser._tokenize(new_regex_pattern) |
| 167 | last_token_idx = len(tokens) - 1 |
| 168 | trie_node = self.trie |
| 169 | for idx, token in enumerate(tokens): |
| 170 | if token not in trie_node: |
| 171 | trie_node[token] = {} |
| 172 | if idx == last_token_idx: |
| 173 | trie_node[token][RegexCoarser.ENDER] = {} |
| 174 | trie_node = trie_node[token] |
| 175 | |
| 176 | @staticmethod |
| 177 | def _regex_pattern_to_string(regex_pattern): |
| 178 | """convert regex pattern to string""" |
| 179 | if not regex_pattern: |
| 180 | return '' |
| 181 | |
| 182 | if isinstance(regex_pattern, str): |
| 183 | return regex_pattern |
| 184 | |
| 185 | if isinstance(regex_pattern, Pattern): |
| 186 | return regex_pattern.pattern |
| 187 | return None |
| 188 | |
| 189 | @staticmethod |
| 190 | def _tokenize(regex_pattern): |
| 191 | """tokenize the regex pattern for trie assignment""" |
| 192 | tokens = [] |
| 193 | token = '' |
| 194 | group_ender = None |
| 195 | use_next = False |
| 196 | |
| 197 | for char in regex_pattern: |
| 198 | if use_next: |
| 199 | use_next = False |
| 200 | token += char |
| 201 | char = None |
| 202 | |
| 203 | if char == '\\': |
| 204 | use_next = True |
| 205 | token += char |
| 206 | continue |
| 207 | |
| 208 | if not group_ender and char in RegexCoarser.GROUPERS: |
| 209 | group_ender = RegexCoarser.GROUPERS[char] |
| 210 | token = char |
| 211 | char = None |
| 212 | |
| 213 | if char is None: |
| 214 | pass |
| 215 | elif char == group_ender: |
| 216 | token += char |
| 217 | group_ender = None |
| 218 | if char == '}': # this group is a modifier |
| 219 | tokens[len(tokens) - 1] += token |
| 220 | token = '' |
| 221 | continue |
| 222 | elif char in RegexCoarser.MODIFIERS: |
| 223 | if group_ender: |
| 224 | token += char |
| 225 | else: |
| 226 | tokens[len(tokens) - 1] += char |
| 227 | continue |
| 228 | else: |
| 229 | token += char |
| 230 | |
| 231 | if not group_ender: |
| 232 | tokens.append(token) |
| 233 | token = '' |
| 234 | |
| 235 | if token: |
| 236 | tokens.append(token) |
| 237 | return tokens |
| 238 | |
| 239 | @staticmethod |
| 240 | def _compress(trie): |
| 241 | """compress trie into shortest leaves""" |
| 242 | for key, subtrie in trie.items(): |
| 243 | RegexCoarser._compress(subtrie) |
| 244 | subkeys = list(subtrie.keys()) |
| 245 | if len(subkeys) == 1: |
| 246 | trie[key + subkeys[0]] = subtrie[subkeys[0]] |
| 247 | del trie[key] |
| 248 | |
| 249 | @staticmethod |
| 250 | def _trie_to_pattern(trie, top_keep=False): |
| 251 | """convert trie to the regex pattern""" |
| 252 | patterns = [ |
| 253 | key.replace(RegexCoarser.ENDER, '') + RegexCoarser._trie_to_pattern(subtrie) |
| 254 | for key, subtrie in trie.items() |
| 255 | ] |
| 256 | |
| 257 | if top_keep: |
| 258 | return patterns |
| 259 | |
| 260 | return RegexCoarser._join_patterns(patterns)[0] |
| 261 | |
| 262 | @staticmethod |
| 263 | def _join_patterns(patterns, max_length=0): |
| 264 | """convert list of patterns to the segmented list of dense regex patterns""" |
| 265 | if not patterns: |
| 266 | return [''] |
| 267 | |
| 268 | if len(patterns) == 1: |
| 269 | return patterns |
| 270 | |
| 271 | if not max_length: |
| 272 | return [RegexCoarser.HIDDEN_CHOICE_STARTER + '|'.join(patterns) + ')'] |
| 273 | |
| 274 | long_patterns = [] |
| 275 | join_patterns = [] |
| 276 | for pattern in patterns: |
| 277 | len_pattern = len(pattern) |
| 278 | if not len_pattern: |
| 279 | continue |
| 280 | if len_pattern >= max_length: |
| 281 | long_patterns.append(pattern) |
| 282 | continue |
| 283 | |
| 284 | for idx, patterns_to_join in enumerate(join_patterns): |
| 285 | patterns_to_join, len_patterns_to_join = patterns_to_join |
| 286 | if len_pattern + len_patterns_to_join < max_length: |
| 287 | patterns_to_join.append(pattern) |
| 288 | len_patterns_to_join += len_pattern |
| 289 | join_patterns[idx] = (patterns_to_join, len_patterns_to_join) |
| 290 | len_pattern = 0 |
| 291 | break |
| 292 | if len_pattern: |
| 293 | join_patterns.append(([pattern], len_pattern)) |
| 294 | join_patterns.sort(key=lambda x: x[1]) |
| 295 | |
| 296 | if join_patterns: |
| 297 | # pattern, _, choice = pattern.endswith(RegexCoarser.ANY_CHARS) |
| 298 | join_patterns = [ |
| 299 | RegexCoarser.HIDDEN_CHOICE_STARTER + '|'.join(patterns_to_join) + ')' |
| 300 | for patterns_to_join, _ in join_patterns |
| 301 | ] |
| 302 | |
| 303 | return join_patterns + long_patterns |