Chris Luke | 90f52bf | 2016-09-12 08:55:13 -0400 | [diff] [blame] | 1 | # Copyright (c) 2016 Comcast Cable Communications Management, LLC. |
| 2 | # |
| 3 | # Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | # you may not use this file except in compliance with the License. |
| 5 | # You may obtain a copy of the License at: |
| 6 | # |
| 7 | # http://www.apache.org/licenses/LICENSE-2.0 |
| 8 | # |
| 9 | # Unless required by applicable law or agreed to in writing, software |
| 10 | # distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | # See the License for the specific language governing permissions and |
| 13 | # limitations under the License. |
| 14 | |
| 15 | # Generation template class |
| 16 | |
Paul Vinciguerra | 464e5e0 | 2019-11-01 15:07:32 -0400 | [diff] [blame] | 17 | import html.parser |
| 18 | import json |
| 19 | import logging |
| 20 | import os |
| 21 | import sys |
Nathan Skrzypczak | 9ad39c0 | 2021-08-19 11:38:06 +0200 | [diff] [blame] | 22 | import re |
Paul Vinciguerra | 464e5e0 | 2019-11-01 15:07:32 -0400 | [diff] [blame] | 23 | |
| 24 | import jinja2 |
Chris Luke | 90f52bf | 2016-09-12 08:55:13 -0400 | [diff] [blame] | 25 | |
| 26 | # Classes register themselves in this dictionary |
| 27 | """Mapping of known processors to their classes""" |
| 28 | siphons = {} |
| 29 | |
Chris Luke | c3f92ad | 2016-10-05 15:45:19 -0400 | [diff] [blame] | 30 | """Mapping of known output formats to their classes""" |
| 31 | formats = {} |
| 32 | |
Chris Luke | 90f52bf | 2016-09-12 08:55:13 -0400 | [diff] [blame] | 33 | |
Chris Luke | 90f52bf | 2016-09-12 08:55:13 -0400 | [diff] [blame] | 34 | class Siphon(object): |
Paul Vinciguerra | 464e5e0 | 2019-11-01 15:07:32 -0400 | [diff] [blame] | 35 | """Generate rendered output for siphoned data.""" |
Chris Luke | 90f52bf | 2016-09-12 08:55:13 -0400 | [diff] [blame] | 36 | |
| 37 | # Set by subclasses |
| 38 | """Our siphon name""" |
| 39 | name = None |
| 40 | |
| 41 | # Set by subclasses |
| 42 | """Name of an identifier used by this siphon""" |
| 43 | identifier = None |
| 44 | |
| 45 | # Set by subclasses |
| 46 | """The pyparsing object to use to parse with""" |
| 47 | _parser = None |
| 48 | |
| 49 | """The input data""" |
| 50 | _cmds = None |
| 51 | |
| 52 | """Group key to (directory,file) mapping""" |
| 53 | _group = None |
| 54 | |
| 55 | """Logging handler""" |
| 56 | log = None |
| 57 | |
| 58 | """Directory to look for siphon rendering templates""" |
| 59 | template_directory = None |
| 60 | |
Nathan Skrzypczak | 9ad39c0 | 2021-08-19 11:38:06 +0200 | [diff] [blame] | 61 | """Directory to output parts in""" |
| 62 | outdir = None |
| 63 | |
Chris Luke | 90f52bf | 2016-09-12 08:55:13 -0400 | [diff] [blame] | 64 | """Template environment, if we're using templates""" |
| 65 | _tplenv = None |
| 66 | |
Nathan Skrzypczak | 9ad39c0 | 2021-08-19 11:38:06 +0200 | [diff] [blame] | 67 | def __init__(self, template_directory, format, outdir, repository_link): |
Chris Luke | 90f52bf | 2016-09-12 08:55:13 -0400 | [diff] [blame] | 68 | super(Siphon, self).__init__() |
| 69 | self.log = logging.getLogger("siphon.process.%s" % self.name) |
| 70 | |
Chris Luke | c3f92ad | 2016-10-05 15:45:19 -0400 | [diff] [blame] | 71 | # Get our output format details |
| 72 | fmt_klass = formats[format] |
| 73 | fmt = fmt_klass() |
| 74 | self._format = fmt |
Chris Luke | 90f52bf | 2016-09-12 08:55:13 -0400 | [diff] [blame] | 75 | |
Chris Luke | c3f92ad | 2016-10-05 15:45:19 -0400 | [diff] [blame] | 76 | # Sort out the template search path |
| 77 | def _tpldir(name): |
| 78 | return os.sep.join((template_directory, fmt.name, name)) |
| 79 | |
| 80 | self.template_directory = template_directory |
| 81 | searchpath = [ |
| 82 | _tpldir(self.name), |
| 83 | _tpldir("default"), |
| 84 | ] |
Nathan Skrzypczak | 9ad39c0 | 2021-08-19 11:38:06 +0200 | [diff] [blame] | 85 | self.outdir = outdir |
Chris Luke | c3f92ad | 2016-10-05 15:45:19 -0400 | [diff] [blame] | 86 | loader = jinja2.FileSystemLoader(searchpath=searchpath) |
| 87 | self._tplenv = jinja2.Environment( |
| 88 | loader=loader, |
| 89 | trim_blocks=True, |
Nathan Skrzypczak | 9ad39c0 | 2021-08-19 11:38:06 +0200 | [diff] [blame] | 90 | autoescape=False, |
Chris Luke | c3f92ad | 2016-10-05 15:45:19 -0400 | [diff] [blame] | 91 | keep_trailing_newline=True) |
| 92 | |
| 93 | # Convenience, get a reference to the internal escape and |
Paul Vinciguerra | 464e5e0 | 2019-11-01 15:07:32 -0400 | [diff] [blame] | 94 | # unescape methods in html.parser. These then become |
Chris Luke | c3f92ad | 2016-10-05 15:45:19 -0400 | [diff] [blame] | 95 | # available to templates to use, if needed. |
Paul Vinciguerra | 464e5e0 | 2019-11-01 15:07:32 -0400 | [diff] [blame] | 96 | self._h = html.parser.HTMLParser() |
| 97 | self.escape = html.escape |
| 98 | self.unescape = html.unescape |
Chris Luke | 90f52bf | 2016-09-12 08:55:13 -0400 | [diff] [blame] | 99 | |
Nathan Skrzypczak | 9ad39c0 | 2021-08-19 11:38:06 +0200 | [diff] [blame] | 100 | # TODO: customize release |
| 101 | self.repository_link = repository_link |
| 102 | |
Chris Luke | 90f52bf | 2016-09-12 08:55:13 -0400 | [diff] [blame] | 103 | # Output renderers |
| 104 | |
| 105 | """Returns an object to be used as the sorting key in the item index.""" |
| 106 | def index_sort_key(self, group): |
| 107 | return group |
| 108 | |
| 109 | """Returns a string to use as the header at the top of the item index.""" |
| 110 | def index_header(self): |
| 111 | return self.template("index_header") |
| 112 | |
| 113 | """Returns the string fragment to use for each section in the item |
| 114 | index.""" |
| 115 | def index_section(self, group): |
| 116 | return self.template("index_section", group=group) |
| 117 | |
| 118 | """Returns the string fragment to use for each entry in the item index.""" |
| 119 | def index_entry(self, meta, item): |
| 120 | return self.template("index_entry", meta=meta, item=item) |
| 121 | |
| 122 | """Returns an object, typically a string, to be used as the sorting key |
| 123 | for items within a section.""" |
| 124 | def item_sort_key(self, item): |
| 125 | return item['name'] |
| 126 | |
| 127 | """Returns a key for grouping items together.""" |
| 128 | def group_key(self, directory, file, macro, name): |
| 129 | _global = self._cmds['_global'] |
| 130 | |
| 131 | if file in _global and 'group_label' in _global[file]: |
| 132 | self._group[file] = (directory, file) |
| 133 | return file |
| 134 | |
| 135 | self._group[directory] = (directory, None) |
| 136 | return directory |
| 137 | |
| 138 | """Returns a key for identifying items within a grouping.""" |
| 139 | def item_key(self, directory, file, macro, name): |
| 140 | return name |
| 141 | |
| 142 | """Returns a string to use as the header when rendering the item.""" |
| 143 | def item_header(self, group): |
| 144 | return self.template("item_header", group=group) |
| 145 | |
| 146 | """Returns a string to use as the body when rendering the item.""" |
| 147 | def item_format(self, meta, item): |
| 148 | return self.template("item_format", meta=meta, item=item) |
| 149 | |
| 150 | """Returns a string to use as the label for the page reference.""" |
| 151 | def page_label(self, group): |
| 152 | return "_".join(( |
| 153 | self.name, |
| 154 | self.sanitize_label(group) |
| 155 | )) |
| 156 | |
| 157 | """Returns a title to use for a page.""" |
| 158 | def page_title(self, group): |
| 159 | _global = self._cmds['_global'] |
| 160 | (directory, file) = self._group[group] |
| 161 | |
| 162 | if file and file in _global and 'group_label' in _global[file]: |
| 163 | return _global[file]['group_label'] |
| 164 | |
| 165 | if directory in _global and 'group_label' in _global[directory]: |
| 166 | return _global[directory]['group_label'] |
| 167 | |
| 168 | return directory |
| 169 | |
| 170 | """Returns a string to use as the label for the section reference.""" |
| 171 | def item_label(self, group, item): |
| 172 | return "__".join(( |
| 173 | self.name, |
| 174 | item |
| 175 | )) |
| 176 | |
| 177 | """Label sanitizer; for creating Doxygen references""" |
| 178 | def sanitize_label(self, value): |
| 179 | return value.replace(" ", "_") \ |
| 180 | .replace("/", "_") \ |
| 181 | .replace(".", "_") |
| 182 | |
| 183 | """Template processor""" |
| 184 | def template(self, name, **kwargs): |
Paul Vinciguerra | 464e5e0 | 2019-11-01 15:07:32 -0400 | [diff] [blame] | 185 | tpl = self._tplenv.get_template(name + self._format.extension) |
| 186 | return tpl.render( |
Chris Luke | 90f52bf | 2016-09-12 08:55:13 -0400 | [diff] [blame] | 187 | this=self, |
| 188 | **kwargs) |
| 189 | |
Chris Luke | 90f52bf | 2016-09-12 08:55:13 -0400 | [diff] [blame] | 190 | # Processing methods |
| 191 | |
| 192 | """Parse the input file into a more usable dictionary structure.""" |
| 193 | def load_json(self, files): |
| 194 | self._cmds = {} |
| 195 | self._group = {} |
| 196 | |
| 197 | line_num = 0 |
| 198 | line_start = 0 |
| 199 | for filename in files: |
| 200 | filename = os.path.relpath(filename) |
| 201 | self.log.info("Parsing items in file \"%s\"." % filename) |
| 202 | data = None |
| 203 | with open(filename, "r") as fd: |
| 204 | data = json.load(fd) |
| 205 | |
| 206 | self._cmds['_global'] = data['global'] |
| 207 | |
| 208 | # iterate the items loaded and regroup it |
| 209 | for item in data["items"]: |
| 210 | try: |
| 211 | o = self._parser.parse(item['block']) |
Paul Vinciguerra | 464e5e0 | 2019-11-01 15:07:32 -0400 | [diff] [blame] | 212 | except Exception: |
| 213 | self.log.error("Exception parsing item: %s\n%s" |
| 214 | % (json.dumps(item, separators=(',', ': '), |
| 215 | indent=4), |
| 216 | item['block'])) |
Chris Luke | 90f52bf | 2016-09-12 08:55:13 -0400 | [diff] [blame] | 217 | raise |
| 218 | |
| 219 | # Augment the item with metadata |
| 220 | o["meta"] = {} |
| 221 | for key in item: |
| 222 | if key == 'block': |
| 223 | continue |
| 224 | o['meta'][key] = item[key] |
| 225 | |
| 226 | # Load some interesting fields |
| 227 | directory = item['directory'] |
| 228 | file = item['file'] |
| 229 | macro = o["macro"] |
| 230 | name = o["name"] |
| 231 | |
| 232 | # Generate keys to group items by |
| 233 | group_key = self.group_key(directory, file, macro, name) |
| 234 | item_key = self.item_key(directory, file, macro, name) |
| 235 | |
| 236 | if group_key not in self._cmds: |
| 237 | self._cmds[group_key] = {} |
| 238 | |
| 239 | self._cmds[group_key][item_key] = o |
| 240 | |
| 241 | """Iterate over the input data, calling render methods to generate the |
| 242 | output.""" |
| 243 | def process(self, out=None): |
| 244 | |
| 245 | if out is None: |
| 246 | out = sys.stdout |
| 247 | |
| 248 | # Accumulated body contents |
| 249 | contents = "" |
| 250 | |
| 251 | # Write the header for this siphon type |
| 252 | out.write(self.index_header()) |
| 253 | |
| 254 | # Sort key helper for the index |
| 255 | def group_sort_key(group): |
| 256 | return self.index_sort_key(group) |
| 257 | |
| 258 | # Iterate the dictionary and process it |
| 259 | for group in sorted(self._cmds.keys(), key=group_sort_key): |
| 260 | if group.startswith('_'): |
| 261 | continue |
| 262 | |
Paul Vinciguerra | 464e5e0 | 2019-11-01 15:07:32 -0400 | [diff] [blame] | 263 | self.log.info("Processing items in group \"%s\" (%s)." % |
| 264 | (group, group_sort_key(group))) |
Chris Luke | 90f52bf | 2016-09-12 08:55:13 -0400 | [diff] [blame] | 265 | |
| 266 | # Generate the section index entry (write it now) |
| 267 | out.write(self.index_section(group)) |
| 268 | |
| 269 | # Generate the item header (save for later) |
| 270 | contents += self.item_header(group) |
| 271 | |
| 272 | def item_sort_key(key): |
| 273 | return self.item_sort_key(self._cmds[group][key]) |
| 274 | |
| 275 | for key in sorted(self._cmds[group].keys(), key=item_sort_key): |
Paul Vinciguerra | 464e5e0 | 2019-11-01 15:07:32 -0400 | [diff] [blame] | 276 | self.log.debug("--- Processing key \"%s\" (%s)." % |
| 277 | (key, item_sort_key(key))) |
Chris Luke | 90f52bf | 2016-09-12 08:55:13 -0400 | [diff] [blame] | 278 | |
| 279 | o = self._cmds[group][key] |
| 280 | meta = { |
| 281 | "directory": o['meta']['directory'], |
| 282 | "file": o['meta']['file'], |
| 283 | "macro": o['macro'], |
Chris Luke | af405f7 | 2016-09-26 15:51:56 -0700 | [diff] [blame] | 284 | "name": o['name'], |
Chris Luke | 90f52bf | 2016-09-12 08:55:13 -0400 | [diff] [blame] | 285 | "key": key, |
| 286 | "label": self.item_label(group, key), |
| 287 | } |
| 288 | |
| 289 | # Generate the index entry for the item (write it now) |
| 290 | out.write(self.index_entry(meta, o)) |
| 291 | |
| 292 | # Generate the item itself (save for later) |
| 293 | contents += self.item_format(meta, o) |
| 294 | |
Nathan Skrzypczak | 9ad39c0 | 2021-08-19 11:38:06 +0200 | [diff] [blame] | 295 | page_name = self.separate_page_names(group) |
| 296 | if page_name != "": |
| 297 | path = os.path.join(self.outdir, page_name) |
| 298 | with open(path, "w+") as page: |
| 299 | page.write(contents) |
| 300 | contents = "" |
| 301 | |
Chris Luke | 90f52bf | 2016-09-12 08:55:13 -0400 | [diff] [blame] | 302 | # Deliver the accumulated body output |
| 303 | out.write(contents) |
Chris Luke | c3f92ad | 2016-10-05 15:45:19 -0400 | [diff] [blame] | 304 | |
Nathan Skrzypczak | 9ad39c0 | 2021-08-19 11:38:06 +0200 | [diff] [blame] | 305 | def do_cliexstart(self, matchobj): |
| 306 | title = matchobj.group(1) |
| 307 | title = ' '.join(title.splitlines()) |
| 308 | content = matchobj.group(2) |
| 309 | content = re.sub(r"\n", r"\n ", content) |
| 310 | return "\n\n.. code-block:: console\n\n %s\n %s\n\n" % (title, content) |
| 311 | |
| 312 | def do_clistart(self, matchobj): |
| 313 | content = matchobj.group(1) |
| 314 | content = re.sub(r"\n", r"\n ", content) |
| 315 | return "\n\n.. code-block:: console\n\n %s\n\n" % content |
| 316 | |
| 317 | def do_cliexcmd(self, matchobj): |
| 318 | content = matchobj.group(1) |
| 319 | content = ' '.join(content.splitlines()) |
| 320 | return "\n\n.. code-block:: console\n\n %s\n\n" % content |
| 321 | |
| 322 | def process_list(self, matchobj): |
| 323 | content = matchobj.group(1) |
| 324 | content = self.reindent(content, 2) |
| 325 | return "@@@@%s\nBBBB" % content |
| 326 | |
| 327 | def process_special(self, s): |
| 328 | # ----------- markers to remove |
| 329 | s = re.sub(r"@cliexpar\s*", r"", s) |
| 330 | s = re.sub(r"@parblock\s*", r"", s) |
| 331 | s = re.sub(r"@endparblock\s*", r"", s) |
| 332 | s = re.sub(r"<br>", "", s) |
| 333 | # ----------- emphasis |
| 334 | # <b><em> |
| 335 | s = re.sub(r"<b><em>\s*", "``", s) |
| 336 | s = re.sub(r"\s*</b></em>", "``", s) |
| 337 | s = re.sub(r"\s*</em></b>", "``", s) |
| 338 | # <b> |
| 339 | s = re.sub(r"<b>\s*", "**", s) |
| 340 | s = re.sub(r"\s*</b>", "**", s) |
| 341 | # <code> |
| 342 | s = re.sub(r"<code>\s*", "``", s) |
| 343 | s = re.sub(r"\s*</code>", "``", s) |
| 344 | # <em> |
| 345 | s = re.sub(r"'?<em>\s*", r"``", s) |
| 346 | s = re.sub(r"\s*</em>'?", r"``", s) |
| 347 | # @c <something> |
| 348 | s = re.sub(r"@c\s(\S+)", r"``\1``", s) |
| 349 | # ----------- todos |
| 350 | s = re.sub(r"@todo[^\n]*", "", s) |
| 351 | s = re.sub(r"@TODO[^\n]*", "", s) |
| 352 | # ----------- code blocks |
| 353 | s = re.sub(r"@cliexcmd{(.+?)}", self.do_cliexcmd, s, flags=re.DOTALL) |
| 354 | s = re.sub(r"@cliexstart{(.+?)}(.+?)@cliexend", self.do_cliexstart, s, flags=re.DOTALL) |
| 355 | s = re.sub(r"@clistart(.+?)@cliend", self.do_clistart, s, flags=re.DOTALL) |
| 356 | # ----------- lists |
| 357 | s = re.sub(r"^\s*-", r"\n@@@@", s, flags=re.MULTILINE) |
| 358 | s = re.sub(r"@@@@(.*?)\n\n+", self.process_list, s, flags=re.DOTALL) |
| 359 | s = re.sub(r"BBBB@@@@", r"-", s) |
| 360 | s = re.sub(r"@@@@", r"-", s) |
| 361 | s = re.sub(r"BBBB", r"\n\n", s) |
| 362 | # ----------- Cleanup remains |
| 363 | s = re.sub(r"@cliexend\s*", r"", s) |
| 364 | return s |
| 365 | |
| 366 | def separate_page_names(self, group): |
| 367 | return "" |
| 368 | |
| 369 | # This push the given textblock <indent> spaces right |
| 370 | def reindent(self, s, indent): |
| 371 | ind = " " * indent |
| 372 | s = re.sub(r"\n", "\n" + ind, s) |
| 373 | return s |
| 374 | |
| 375 | # This aligns the given textblock left (no indent) |
| 376 | def noindent(self, s): |
| 377 | s = re.sub(r"\n[ \f\v\t]*", "\n", s) |
| 378 | return s |
Chris Luke | c3f92ad | 2016-10-05 15:45:19 -0400 | [diff] [blame] | 379 | |
Chris Luke | c3f92ad | 2016-10-05 15:45:19 -0400 | [diff] [blame] | 380 | class Format(object): |
Paul Vinciguerra | 464e5e0 | 2019-11-01 15:07:32 -0400 | [diff] [blame] | 381 | """Output format class""" |
Chris Luke | c3f92ad | 2016-10-05 15:45:19 -0400 | [diff] [blame] | 382 | |
| 383 | """Name of this output format""" |
| 384 | name = None |
| 385 | |
| 386 | """Expected file extension of templates that build this format""" |
| 387 | extension = None |
| 388 | |
| 389 | |
Chris Luke | c3f92ad | 2016-10-05 15:45:19 -0400 | [diff] [blame] | 390 | class FormatMarkdown(Format): |
Paul Vinciguerra | 464e5e0 | 2019-11-01 15:07:32 -0400 | [diff] [blame] | 391 | """Markdown output format""" |
Chris Luke | c3f92ad | 2016-10-05 15:45:19 -0400 | [diff] [blame] | 392 | name = "markdown" |
| 393 | extension = ".md" |
| 394 | |
Paul Vinciguerra | 464e5e0 | 2019-11-01 15:07:32 -0400 | [diff] [blame] | 395 | |
Chris Luke | c3f92ad | 2016-10-05 15:45:19 -0400 | [diff] [blame] | 396 | # Register 'markdown' |
| 397 | formats["markdown"] = FormatMarkdown |
| 398 | |
| 399 | |
Chris Luke | c3f92ad | 2016-10-05 15:45:19 -0400 | [diff] [blame] | 400 | class FormatItemlist(Format): |
Paul Vinciguerra | 464e5e0 | 2019-11-01 15:07:32 -0400 | [diff] [blame] | 401 | """Itemlist output format""" |
Chris Luke | c3f92ad | 2016-10-05 15:45:19 -0400 | [diff] [blame] | 402 | name = "itemlist" |
| 403 | extension = ".itemlist" |
| 404 | |
Paul Vinciguerra | 464e5e0 | 2019-11-01 15:07:32 -0400 | [diff] [blame] | 405 | |
Chris Luke | c3f92ad | 2016-10-05 15:45:19 -0400 | [diff] [blame] | 406 | # Register 'itemlist' |
| 407 | formats["itemlist"] = FormatItemlist |