blob: d728e48e1fbc4988d040b723a6e5f59f6b9194b7 [file] [log] [blame]
Alex Shatov1369bea2018-01-10 11:00:50 -05001# ================================================================================
Alex Shatov9a4d3c52019-04-01 11:32:06 -04002# Copyright (c) 2018-2019 AT&T Intellectual Property. All rights reserved.
Alex Shatov1369bea2018-01-10 11:00:50 -05003# ================================================================================
4# Licensed under the Apache License, Version 2.0 (the "License");
5# you may not use this file except in compliance with the License.
6# You may obtain a copy of the License at
7#
8# http://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS,
12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13# See the License for the specific language governing permissions and
14# limitations under the License.
15# ============LICENSE_END=========================================================
16#
Alex Shatov1369bea2018-01-10 11:00:50 -050017
Alex Shatov9a4d3c52019-04-01 11:32:06 -040018"""utils and conversions"""
Alex Shatovf53e5e72018-01-11 11:15:56 -050019
Alex Shatov1369bea2018-01-10 11:00:50 -050020import json
Alex Shatova2b26292018-03-13 17:50:51 -040021import logging
Alex Shatov9a4d3c52019-04-01 11:32:06 -040022import os
Alex Shatovd7f34d42018-08-07 12:11:35 -040023from copy import deepcopy
24from typing import Pattern
Alex Shatov1369bea2018-01-10 11:00:50 -050025
Alex Shatov9a4d3c52019-04-01 11:32:06 -040026class ToBeImplementedException(Exception):
27 """exception for to be implemented features of policy-handler"""
28 pass
Alex Shatov1369bea2018-01-10 11:00:50 -050029
Alex Shatovd7f34d42018-08-07 12:11:35 -040030
Alex Shatova2b26292018-03-13 17:50:51 -040031class Utils(object):
32 """general purpose utils"""
33 _logger = logging.getLogger("policy_handler.utils")
34
35 @staticmethod
Alex Shatov9a4d3c52019-04-01 11:32:06 -040036 def get_logger(file_path):
37 """get the logger for the file_path == __file__"""
38 logger_path = []
39 file_path = os.path.realpath(file_path)
40 logger_path.append(os.path.basename(file_path)[:-3])
41 while file_path:
42 file_path = os.path.dirname(file_path)
43 folder_name = os.path.basename(file_path)
44 if folder_name == "policyhandler" or len(logger_path) > 5:
45 break
46 if folder_name == "tests":
47 logger_path.append("unit_test")
48 break
49 logger_path.append(folder_name)
50
51 logger_path.append("policy_handler")
52 return logging.getLogger(".".join(reversed(logger_path)))
53
54 @staticmethod
Alex Shatova2b26292018-03-13 17:50:51 -040055 def safe_json_parse(json_str):
56 """try parsing json without exception - returns the json_str back if fails"""
57 if not json_str:
58 return json_str
59 try:
60 return json.loads(json_str)
61 except (ValueError, TypeError) as err:
Alex Shatovc9ec2312018-06-14 12:06:42 -040062 Utils._logger.warning("unexpected json error(%s): len(%s) str[:100]: (%s)",
63 str(err), len(json_str), str(json_str)[:100])
Alex Shatova2b26292018-03-13 17:50:51 -040064 return json_str
65
66 @staticmethod
Alex Shatov1d693372018-08-24 13:15:04 -040067 def are_the_same(body_1, body_2, json_dumps=None):
Alex Shatova2b26292018-03-13 17:50:51 -040068 """check whether both objects are the same"""
Alex Shatov1d693372018-08-24 13:15:04 -040069 if not json_dumps:
70 json_dumps = json.dumps
Alex Shatova2b26292018-03-13 17:50:51 -040071 if (body_1 and not body_2) or (not body_1 and body_2):
72 Utils._logger.debug("only one is empty %s != %s", body_1, body_2)
73 return False
74
75 if body_1 is None and body_2 is None:
76 return True
77
78 if isinstance(body_1, list) and isinstance(body_2, list):
79 if len(body_1) != len(body_2):
Alex Shatov1d693372018-08-24 13:15:04 -040080 Utils._logger.debug("len %s != %s", json_dumps(body_1), json_dumps(body_2))
Alex Shatova2b26292018-03-13 17:50:51 -040081 return False
82
83 for val_1, val_2 in zip(body_1, body_2):
Alex Shatov1d693372018-08-24 13:15:04 -040084 if not Utils.are_the_same(val_1, val_2, json_dumps):
Alex Shatova2b26292018-03-13 17:50:51 -040085 return False
86 return True
87
88 if isinstance(body_1, dict) and isinstance(body_2, dict):
Alex Shatovc9ec2312018-06-14 12:06:42 -040089 if body_1.keys() ^ body_2.keys():
Alex Shatov1d693372018-08-24 13:15:04 -040090 Utils._logger.debug("keys %s != %s", json_dumps(body_1), json_dumps(body_2))
Alex Shatova2b26292018-03-13 17:50:51 -040091 return False
92
Alex Shatovc9ec2312018-06-14 12:06:42 -040093 for key, val_1 in body_1.items():
Alex Shatov1d693372018-08-24 13:15:04 -040094 if not Utils.are_the_same(val_1, body_2[key], json_dumps):
Alex Shatova2b26292018-03-13 17:50:51 -040095 return False
96 return True
97
98 # ... here when primitive values or mismatched types ...
99 the_same_values = (body_1 == body_2)
100 if not the_same_values:
101 Utils._logger.debug("values %s != %s", body_1, body_2)
102 return the_same_values
Alex Shatovd7f34d42018-08-07 12:11:35 -0400103
104class RegexCoarser(object):
105 """
106 utility to combine or coarse the collection of regex patterns
107 into a single regex that is at least not narrower (wider or the same)
108 than the collection regexes
109
110 inspired by https://github.com/spadgos/regex-combiner in js
111 """
112 ENDER = '***'
113 GROUPERS = {'{': '}', '[': ']', '(': ')'}
114 MODIFIERS = '*?+'
115 CHOICE_STARTER = '('
116 HIDDEN_CHOICE_STARTER = '(?:'
117 ANY_CHARS = '.*'
118 LINE_START = '^'
119
120 def __init__(self, regex_patterns=None):
121 """regex coarser"""
122 self.trie = {}
123 self.patterns = []
124 self.add_regex_patterns(regex_patterns)
125
126 def get_combined_regex_pattern(self):
127 """gets the pattern for the combined regex"""
128 trie = deepcopy(self.trie)
129 RegexCoarser._compress(trie)
130 return RegexCoarser._trie_to_pattern(trie)
131
132 def get_coarse_regex_patterns(self, max_length=100):
133 """gets the patterns for the coarse regex"""
134 trie = deepcopy(self.trie)
135 RegexCoarser._compress(trie)
136 patterns = RegexCoarser._trie_to_pattern(trie, True)
137
138 root_patterns = []
139 for pattern in patterns:
140 left, _, choice = pattern.partition(RegexCoarser.CHOICE_STARTER)
141 if choice and left and left.strip() != RegexCoarser.LINE_START and not left.isspace():
142 pattern = left + RegexCoarser.ANY_CHARS
143 root_patterns.append(pattern)
144 root_patterns = RegexCoarser._join_patterns(root_patterns, max_length)
145
146 if not root_patterns or root_patterns == ['']:
147 return []
148 return root_patterns
149
150
151 def add_regex_patterns(self, new_regex_patterns):
152 """adds the new_regex patterns to RegexPatternCoarser"""
153 if not new_regex_patterns or not isinstance(new_regex_patterns, list):
154 return
155 for new_regex_pattern in new_regex_patterns:
156 self.add_regex_pattern(new_regex_pattern)
157
158 def add_regex_pattern(self, new_regex_pattern):
159 """adds the new_regex to RegexPatternCoarser"""
160 new_regex_pattern = RegexCoarser._regex_pattern_to_string(new_regex_pattern)
161 if not new_regex_pattern:
162 return
163
164 self.patterns.append(new_regex_pattern)
165
166 tokens = RegexCoarser._tokenize(new_regex_pattern)
167 last_token_idx = len(tokens) - 1
168 trie_node = self.trie
169 for idx, token in enumerate(tokens):
170 if token not in trie_node:
171 trie_node[token] = {}
172 if idx == last_token_idx:
173 trie_node[token][RegexCoarser.ENDER] = {}
174 trie_node = trie_node[token]
175
176 @staticmethod
177 def _regex_pattern_to_string(regex_pattern):
178 """convert regex pattern to string"""
179 if not regex_pattern:
180 return ''
181
182 if isinstance(regex_pattern, str):
183 return regex_pattern
184
185 if isinstance(regex_pattern, Pattern):
186 return regex_pattern.pattern
187 return None
188
189 @staticmethod
190 def _tokenize(regex_pattern):
191 """tokenize the regex pattern for trie assignment"""
192 tokens = []
193 token = ''
194 group_ender = None
195 use_next = False
196
197 for char in regex_pattern:
198 if use_next:
199 use_next = False
200 token += char
201 char = None
202
203 if char == '\\':
204 use_next = True
205 token += char
206 continue
207
208 if not group_ender and char in RegexCoarser.GROUPERS:
209 group_ender = RegexCoarser.GROUPERS[char]
210 token = char
211 char = None
212
213 if char is None:
214 pass
215 elif char == group_ender:
216 token += char
217 group_ender = None
218 if char == '}': # this group is a modifier
219 tokens[len(tokens) - 1] += token
220 token = ''
221 continue
222 elif char in RegexCoarser.MODIFIERS:
223 if group_ender:
224 token += char
225 else:
226 tokens[len(tokens) - 1] += char
227 continue
228 else:
229 token += char
230
231 if not group_ender:
232 tokens.append(token)
233 token = ''
234
235 if token:
236 tokens.append(token)
237 return tokens
238
239 @staticmethod
240 def _compress(trie):
241 """compress trie into shortest leaves"""
242 for key, subtrie in trie.items():
243 RegexCoarser._compress(subtrie)
244 subkeys = list(subtrie.keys())
245 if len(subkeys) == 1:
246 trie[key + subkeys[0]] = subtrie[subkeys[0]]
247 del trie[key]
248
249 @staticmethod
250 def _trie_to_pattern(trie, top_keep=False):
251 """convert trie to the regex pattern"""
252 patterns = [
253 key.replace(RegexCoarser.ENDER, '') + RegexCoarser._trie_to_pattern(subtrie)
254 for key, subtrie in trie.items()
255 ]
256
257 if top_keep:
258 return patterns
259
260 return RegexCoarser._join_patterns(patterns)[0]
261
262 @staticmethod
263 def _join_patterns(patterns, max_length=0):
264 """convert list of patterns to the segmented list of dense regex patterns"""
265 if not patterns:
266 return ['']
267
268 if len(patterns) == 1:
269 return patterns
270
271 if not max_length:
272 return [RegexCoarser.HIDDEN_CHOICE_STARTER + '|'.join(patterns) + ')']
273
274 long_patterns = []
275 join_patterns = []
276 for pattern in patterns:
277 len_pattern = len(pattern)
278 if not len_pattern:
279 continue
280 if len_pattern >= max_length:
281 long_patterns.append(pattern)
282 continue
283
284 for idx, patterns_to_join in enumerate(join_patterns):
285 patterns_to_join, len_patterns_to_join = patterns_to_join
286 if len_pattern + len_patterns_to_join < max_length:
287 patterns_to_join.append(pattern)
288 len_patterns_to_join += len_pattern
289 join_patterns[idx] = (patterns_to_join, len_patterns_to_join)
290 len_pattern = 0
291 break
292 if len_pattern:
293 join_patterns.append(([pattern], len_pattern))
294 join_patterns.sort(key=lambda x: x[1])
295
296 if join_patterns:
297 # pattern, _, choice = pattern.endswith(RegexCoarser.ANY_CHARS)
298 join_patterns = [
299 RegexCoarser.HIDDEN_CHOICE_STARTER + '|'.join(patterns_to_join) + ')'
300 for patterns_to_join, _ in join_patterns
301 ]
302
303 return join_patterns + long_patterns