blob: 341b7cba2995ae705f62e3a965f8fcbbaa469709 [file] [log] [blame]
Chris Luke90f52bf2016-09-12 08:55:13 -04001# Copyright (c) 2016 Comcast Cable Communications Management, LLC.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at:
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
15# Generation template class
16
Paul Vinciguerra464e5e02019-11-01 15:07:32 -040017import html.parser
18import json
19import logging
20import os
21import sys
Nathan Skrzypczak9ad39c02021-08-19 11:38:06 +020022import re
Paul Vinciguerra464e5e02019-11-01 15:07:32 -040023
24import jinja2
Chris Luke90f52bf2016-09-12 08:55:13 -040025
26# Classes register themselves in this dictionary
27"""Mapping of known processors to their classes"""
28siphons = {}
29
Chris Lukec3f92ad2016-10-05 15:45:19 -040030"""Mapping of known output formats to their classes"""
31formats = {}
32
Chris Luke90f52bf2016-09-12 08:55:13 -040033
Chris Luke90f52bf2016-09-12 08:55:13 -040034class Siphon(object):
Paul Vinciguerra464e5e02019-11-01 15:07:32 -040035 """Generate rendered output for siphoned data."""
Chris Luke90f52bf2016-09-12 08:55:13 -040036
37 # Set by subclasses
38 """Our siphon name"""
39 name = None
40
41 # Set by subclasses
42 """Name of an identifier used by this siphon"""
43 identifier = None
44
45 # Set by subclasses
46 """The pyparsing object to use to parse with"""
47 _parser = None
48
49 """The input data"""
50 _cmds = None
51
52 """Group key to (directory,file) mapping"""
53 _group = None
54
55 """Logging handler"""
56 log = None
57
58 """Directory to look for siphon rendering templates"""
59 template_directory = None
60
Nathan Skrzypczak9ad39c02021-08-19 11:38:06 +020061 """Directory to output parts in"""
62 outdir = None
63
Chris Luke90f52bf2016-09-12 08:55:13 -040064 """Template environment, if we're using templates"""
65 _tplenv = None
66
Nathan Skrzypczak9ad39c02021-08-19 11:38:06 +020067 def __init__(self, template_directory, format, outdir, repository_link):
Chris Luke90f52bf2016-09-12 08:55:13 -040068 super(Siphon, self).__init__()
69 self.log = logging.getLogger("siphon.process.%s" % self.name)
70
Chris Lukec3f92ad2016-10-05 15:45:19 -040071 # Get our output format details
72 fmt_klass = formats[format]
73 fmt = fmt_klass()
74 self._format = fmt
Chris Luke90f52bf2016-09-12 08:55:13 -040075
Chris Lukec3f92ad2016-10-05 15:45:19 -040076 # Sort out the template search path
77 def _tpldir(name):
78 return os.sep.join((template_directory, fmt.name, name))
79
80 self.template_directory = template_directory
81 searchpath = [
82 _tpldir(self.name),
83 _tpldir("default"),
84 ]
Nathan Skrzypczak9ad39c02021-08-19 11:38:06 +020085 self.outdir = outdir
Chris Lukec3f92ad2016-10-05 15:45:19 -040086 loader = jinja2.FileSystemLoader(searchpath=searchpath)
87 self._tplenv = jinja2.Environment(
88 loader=loader,
89 trim_blocks=True,
Nathan Skrzypczak9ad39c02021-08-19 11:38:06 +020090 autoescape=False,
Klement Sekerad9b0c6f2022-04-26 19:02:15 +020091 keep_trailing_newline=True,
92 )
Chris Lukec3f92ad2016-10-05 15:45:19 -040093
94 # Convenience, get a reference to the internal escape and
Paul Vinciguerra464e5e02019-11-01 15:07:32 -040095 # unescape methods in html.parser. These then become
Chris Lukec3f92ad2016-10-05 15:45:19 -040096 # available to templates to use, if needed.
Paul Vinciguerra464e5e02019-11-01 15:07:32 -040097 self._h = html.parser.HTMLParser()
98 self.escape = html.escape
99 self.unescape = html.unescape
Chris Luke90f52bf2016-09-12 08:55:13 -0400100
Nathan Skrzypczak9ad39c02021-08-19 11:38:06 +0200101 # TODO: customize release
102 self.repository_link = repository_link
103
Chris Luke90f52bf2016-09-12 08:55:13 -0400104 # Output renderers
105
106 """Returns an object to be used as the sorting key in the item index."""
Klement Sekerad9b0c6f2022-04-26 19:02:15 +0200107
Chris Luke90f52bf2016-09-12 08:55:13 -0400108 def index_sort_key(self, group):
109 return group
110
111 """Returns a string to use as the header at the top of the item index."""
Klement Sekerad9b0c6f2022-04-26 19:02:15 +0200112
Chris Luke90f52bf2016-09-12 08:55:13 -0400113 def index_header(self):
114 return self.template("index_header")
115
116 """Returns the string fragment to use for each section in the item
117 index."""
Klement Sekerad9b0c6f2022-04-26 19:02:15 +0200118
Chris Luke90f52bf2016-09-12 08:55:13 -0400119 def index_section(self, group):
120 return self.template("index_section", group=group)
121
122 """Returns the string fragment to use for each entry in the item index."""
Klement Sekerad9b0c6f2022-04-26 19:02:15 +0200123
Chris Luke90f52bf2016-09-12 08:55:13 -0400124 def index_entry(self, meta, item):
125 return self.template("index_entry", meta=meta, item=item)
126
127 """Returns an object, typically a string, to be used as the sorting key
128 for items within a section."""
Klement Sekerad9b0c6f2022-04-26 19:02:15 +0200129
Chris Luke90f52bf2016-09-12 08:55:13 -0400130 def item_sort_key(self, item):
Klement Sekerad9b0c6f2022-04-26 19:02:15 +0200131 return item["name"]
Chris Luke90f52bf2016-09-12 08:55:13 -0400132
133 """Returns a key for grouping items together."""
Chris Luke90f52bf2016-09-12 08:55:13 -0400134
Klement Sekerad9b0c6f2022-04-26 19:02:15 +0200135 def group_key(self, directory, file, macro, name):
136 _global = self._cmds["_global"]
137
138 if file in _global and "group_label" in _global[file]:
Chris Luke90f52bf2016-09-12 08:55:13 -0400139 self._group[file] = (directory, file)
140 return file
141
142 self._group[directory] = (directory, None)
143 return directory
144
145 """Returns a key for identifying items within a grouping."""
Klement Sekerad9b0c6f2022-04-26 19:02:15 +0200146
Chris Luke90f52bf2016-09-12 08:55:13 -0400147 def item_key(self, directory, file, macro, name):
148 return name
149
150 """Returns a string to use as the header when rendering the item."""
Klement Sekerad9b0c6f2022-04-26 19:02:15 +0200151
Chris Luke90f52bf2016-09-12 08:55:13 -0400152 def item_header(self, group):
153 return self.template("item_header", group=group)
154
155 """Returns a string to use as the body when rendering the item."""
Klement Sekerad9b0c6f2022-04-26 19:02:15 +0200156
Chris Luke90f52bf2016-09-12 08:55:13 -0400157 def item_format(self, meta, item):
158 return self.template("item_format", meta=meta, item=item)
159
160 """Returns a string to use as the label for the page reference."""
Klement Sekerad9b0c6f2022-04-26 19:02:15 +0200161
Chris Luke90f52bf2016-09-12 08:55:13 -0400162 def page_label(self, group):
Klement Sekerad9b0c6f2022-04-26 19:02:15 +0200163 return "_".join((self.name, self.sanitize_label(group)))
Chris Luke90f52bf2016-09-12 08:55:13 -0400164
165 """Returns a title to use for a page."""
Klement Sekerad9b0c6f2022-04-26 19:02:15 +0200166
Chris Luke90f52bf2016-09-12 08:55:13 -0400167 def page_title(self, group):
Klement Sekerad9b0c6f2022-04-26 19:02:15 +0200168 _global = self._cmds["_global"]
Chris Luke90f52bf2016-09-12 08:55:13 -0400169 (directory, file) = self._group[group]
170
Klement Sekerad9b0c6f2022-04-26 19:02:15 +0200171 if file and file in _global and "group_label" in _global[file]:
172 return _global[file]["group_label"]
Chris Luke90f52bf2016-09-12 08:55:13 -0400173
Klement Sekerad9b0c6f2022-04-26 19:02:15 +0200174 if directory in _global and "group_label" in _global[directory]:
175 return _global[directory]["group_label"]
Chris Luke90f52bf2016-09-12 08:55:13 -0400176
177 return directory
178
179 """Returns a string to use as the label for the section reference."""
Klement Sekerad9b0c6f2022-04-26 19:02:15 +0200180
Chris Luke90f52bf2016-09-12 08:55:13 -0400181 def item_label(self, group, item):
Klement Sekerad9b0c6f2022-04-26 19:02:15 +0200182 return "__".join((self.name, item))
Chris Luke90f52bf2016-09-12 08:55:13 -0400183
184 """Label sanitizer; for creating Doxygen references"""
Klement Sekerad9b0c6f2022-04-26 19:02:15 +0200185
Chris Luke90f52bf2016-09-12 08:55:13 -0400186 def sanitize_label(self, value):
Klement Sekerad9b0c6f2022-04-26 19:02:15 +0200187 return value.replace(" ", "_").replace("/", "_").replace(".", "_")
Chris Luke90f52bf2016-09-12 08:55:13 -0400188
189 """Template processor"""
Klement Sekerad9b0c6f2022-04-26 19:02:15 +0200190
Chris Luke90f52bf2016-09-12 08:55:13 -0400191 def template(self, name, **kwargs):
Paul Vinciguerra464e5e02019-11-01 15:07:32 -0400192 tpl = self._tplenv.get_template(name + self._format.extension)
Klement Sekerad9b0c6f2022-04-26 19:02:15 +0200193 return tpl.render(this=self, **kwargs)
Chris Luke90f52bf2016-09-12 08:55:13 -0400194
Chris Luke90f52bf2016-09-12 08:55:13 -0400195 # Processing methods
196
197 """Parse the input file into a more usable dictionary structure."""
Klement Sekerad9b0c6f2022-04-26 19:02:15 +0200198
Chris Luke90f52bf2016-09-12 08:55:13 -0400199 def load_json(self, files):
200 self._cmds = {}
201 self._group = {}
202
203 line_num = 0
204 line_start = 0
205 for filename in files:
206 filename = os.path.relpath(filename)
Klement Sekerad9b0c6f2022-04-26 19:02:15 +0200207 self.log.info('Parsing items in file "%s".' % filename)
Chris Luke90f52bf2016-09-12 08:55:13 -0400208 data = None
209 with open(filename, "r") as fd:
210 data = json.load(fd)
211
Klement Sekerad9b0c6f2022-04-26 19:02:15 +0200212 self._cmds["_global"] = data["global"]
Chris Luke90f52bf2016-09-12 08:55:13 -0400213
214 # iterate the items loaded and regroup it
215 for item in data["items"]:
216 try:
Klement Sekerad9b0c6f2022-04-26 19:02:15 +0200217 o = self._parser.parse(item["block"])
Paul Vinciguerra464e5e02019-11-01 15:07:32 -0400218 except Exception:
Klement Sekerad9b0c6f2022-04-26 19:02:15 +0200219 self.log.error(
220 "Exception parsing item: %s\n%s"
221 % (
222 json.dumps(item, separators=(",", ": "), indent=4),
223 item["block"],
224 )
225 )
Chris Luke90f52bf2016-09-12 08:55:13 -0400226 raise
227
228 # Augment the item with metadata
229 o["meta"] = {}
230 for key in item:
Klement Sekerad9b0c6f2022-04-26 19:02:15 +0200231 if key == "block":
Chris Luke90f52bf2016-09-12 08:55:13 -0400232 continue
Klement Sekerad9b0c6f2022-04-26 19:02:15 +0200233 o["meta"][key] = item[key]
Chris Luke90f52bf2016-09-12 08:55:13 -0400234
235 # Load some interesting fields
Klement Sekerad9b0c6f2022-04-26 19:02:15 +0200236 directory = item["directory"]
237 file = item["file"]
Chris Luke90f52bf2016-09-12 08:55:13 -0400238 macro = o["macro"]
239 name = o["name"]
240
241 # Generate keys to group items by
242 group_key = self.group_key(directory, file, macro, name)
243 item_key = self.item_key(directory, file, macro, name)
244
245 if group_key not in self._cmds:
246 self._cmds[group_key] = {}
247
248 self._cmds[group_key][item_key] = o
249
250 """Iterate over the input data, calling render methods to generate the
251 output."""
Klement Sekerad9b0c6f2022-04-26 19:02:15 +0200252
Chris Luke90f52bf2016-09-12 08:55:13 -0400253 def process(self, out=None):
254
255 if out is None:
256 out = sys.stdout
257
258 # Accumulated body contents
259 contents = ""
260
261 # Write the header for this siphon type
262 out.write(self.index_header())
263
264 # Sort key helper for the index
265 def group_sort_key(group):
266 return self.index_sort_key(group)
267
268 # Iterate the dictionary and process it
269 for group in sorted(self._cmds.keys(), key=group_sort_key):
Klement Sekerad9b0c6f2022-04-26 19:02:15 +0200270 if group.startswith("_"):
Chris Luke90f52bf2016-09-12 08:55:13 -0400271 continue
272
Klement Sekerad9b0c6f2022-04-26 19:02:15 +0200273 self.log.info(
274 'Processing items in group "%s" (%s).' % (group, group_sort_key(group))
275 )
Chris Luke90f52bf2016-09-12 08:55:13 -0400276
277 # Generate the section index entry (write it now)
278 out.write(self.index_section(group))
279
280 # Generate the item header (save for later)
281 contents += self.item_header(group)
282
283 def item_sort_key(key):
284 return self.item_sort_key(self._cmds[group][key])
285
286 for key in sorted(self._cmds[group].keys(), key=item_sort_key):
Klement Sekerad9b0c6f2022-04-26 19:02:15 +0200287 self.log.debug(
288 '--- Processing key "%s" (%s).' % (key, item_sort_key(key))
289 )
Chris Luke90f52bf2016-09-12 08:55:13 -0400290
291 o = self._cmds[group][key]
292 meta = {
Klement Sekerad9b0c6f2022-04-26 19:02:15 +0200293 "directory": o["meta"]["directory"],
294 "file": o["meta"]["file"],
295 "macro": o["macro"],
296 "name": o["name"],
Chris Luke90f52bf2016-09-12 08:55:13 -0400297 "key": key,
298 "label": self.item_label(group, key),
299 }
300
301 # Generate the index entry for the item (write it now)
302 out.write(self.index_entry(meta, o))
303
304 # Generate the item itself (save for later)
305 contents += self.item_format(meta, o)
306
Nathan Skrzypczak9ad39c02021-08-19 11:38:06 +0200307 page_name = self.separate_page_names(group)
308 if page_name != "":
309 path = os.path.join(self.outdir, page_name)
310 with open(path, "w+") as page:
311 page.write(contents)
312 contents = ""
313
Chris Luke90f52bf2016-09-12 08:55:13 -0400314 # Deliver the accumulated body output
315 out.write(contents)
Chris Lukec3f92ad2016-10-05 15:45:19 -0400316
Nathan Skrzypczak9ad39c02021-08-19 11:38:06 +0200317 def do_cliexstart(self, matchobj):
318 title = matchobj.group(1)
Klement Sekerad9b0c6f2022-04-26 19:02:15 +0200319 title = " ".join(title.splitlines())
Nathan Skrzypczak9ad39c02021-08-19 11:38:06 +0200320 content = matchobj.group(2)
321 content = re.sub(r"\n", r"\n ", content)
322 return "\n\n.. code-block:: console\n\n %s\n %s\n\n" % (title, content)
323
324 def do_clistart(self, matchobj):
325 content = matchobj.group(1)
326 content = re.sub(r"\n", r"\n ", content)
327 return "\n\n.. code-block:: console\n\n %s\n\n" % content
328
329 def do_cliexcmd(self, matchobj):
330 content = matchobj.group(1)
Klement Sekerad9b0c6f2022-04-26 19:02:15 +0200331 content = " ".join(content.splitlines())
Nathan Skrzypczak9ad39c02021-08-19 11:38:06 +0200332 return "\n\n.. code-block:: console\n\n %s\n\n" % content
333
334 def process_list(self, matchobj):
335 content = matchobj.group(1)
336 content = self.reindent(content, 2)
337 return "@@@@%s\nBBBB" % content
338
339 def process_special(self, s):
340 # ----------- markers to remove
341 s = re.sub(r"@cliexpar\s*", r"", s)
342 s = re.sub(r"@parblock\s*", r"", s)
343 s = re.sub(r"@endparblock\s*", r"", s)
344 s = re.sub(r"<br>", "", s)
345 # ----------- emphasis
346 # <b><em>
347 s = re.sub(r"<b><em>\s*", "``", s)
348 s = re.sub(r"\s*</b></em>", "``", s)
349 s = re.sub(r"\s*</em></b>", "``", s)
350 # <b>
351 s = re.sub(r"<b>\s*", "**", s)
352 s = re.sub(r"\s*</b>", "**", s)
353 # <code>
354 s = re.sub(r"<code>\s*", "``", s)
355 s = re.sub(r"\s*</code>", "``", s)
356 # <em>
357 s = re.sub(r"'?<em>\s*", r"``", s)
358 s = re.sub(r"\s*</em>'?", r"``", s)
359 # @c <something>
360 s = re.sub(r"@c\s(\S+)", r"``\1``", s)
361 # ----------- todos
362 s = re.sub(r"@todo[^\n]*", "", s)
363 s = re.sub(r"@TODO[^\n]*", "", s)
364 # ----------- code blocks
365 s = re.sub(r"@cliexcmd{(.+?)}", self.do_cliexcmd, s, flags=re.DOTALL)
Klement Sekerad9b0c6f2022-04-26 19:02:15 +0200366 s = re.sub(
367 r"@cliexstart{(.+?)}(.+?)@cliexend", self.do_cliexstart, s, flags=re.DOTALL
368 )
Nathan Skrzypczak9ad39c02021-08-19 11:38:06 +0200369 s = re.sub(r"@clistart(.+?)@cliend", self.do_clistart, s, flags=re.DOTALL)
370 # ----------- lists
371 s = re.sub(r"^\s*-", r"\n@@@@", s, flags=re.MULTILINE)
372 s = re.sub(r"@@@@(.*?)\n\n+", self.process_list, s, flags=re.DOTALL)
373 s = re.sub(r"BBBB@@@@", r"-", s)
374 s = re.sub(r"@@@@", r"-", s)
375 s = re.sub(r"BBBB", r"\n\n", s)
376 # ----------- Cleanup remains
377 s = re.sub(r"@cliexend\s*", r"", s)
378 return s
379
380 def separate_page_names(self, group):
381 return ""
382
383 # This push the given textblock <indent> spaces right
384 def reindent(self, s, indent):
385 ind = " " * indent
386 s = re.sub(r"\n", "\n" + ind, s)
387 return s
388
389 # This aligns the given textblock left (no indent)
390 def noindent(self, s):
391 s = re.sub(r"\n[ \f\v\t]*", "\n", s)
392 return s
Chris Lukec3f92ad2016-10-05 15:45:19 -0400393
Klement Sekerad9b0c6f2022-04-26 19:02:15 +0200394
Chris Lukec3f92ad2016-10-05 15:45:19 -0400395class Format(object):
Paul Vinciguerra464e5e02019-11-01 15:07:32 -0400396 """Output format class"""
Chris Lukec3f92ad2016-10-05 15:45:19 -0400397
398 """Name of this output format"""
399 name = None
400
401 """Expected file extension of templates that build this format"""
402 extension = None
403
404
Chris Lukec3f92ad2016-10-05 15:45:19 -0400405class FormatMarkdown(Format):
Paul Vinciguerra464e5e02019-11-01 15:07:32 -0400406 """Markdown output format"""
Klement Sekerad9b0c6f2022-04-26 19:02:15 +0200407
Chris Lukec3f92ad2016-10-05 15:45:19 -0400408 name = "markdown"
409 extension = ".md"
410
Paul Vinciguerra464e5e02019-11-01 15:07:32 -0400411
Chris Lukec3f92ad2016-10-05 15:45:19 -0400412# Register 'markdown'
413formats["markdown"] = FormatMarkdown
414
415
Chris Lukec3f92ad2016-10-05 15:45:19 -0400416class FormatItemlist(Format):
Paul Vinciguerra464e5e02019-11-01 15:07:32 -0400417 """Itemlist output format"""
Klement Sekerad9b0c6f2022-04-26 19:02:15 +0200418
Chris Lukec3f92ad2016-10-05 15:45:19 -0400419 name = "itemlist"
420 extension = ".itemlist"
421
Paul Vinciguerra464e5e02019-11-01 15:07:32 -0400422
Chris Lukec3f92ad2016-10-05 15:45:19 -0400423# Register 'itemlist'
424formats["itemlist"] = FormatItemlist