Blame - docs/_scripts/siphon/process.py - fdio/vpp

blob: e3a70152487fcca1626f7d5c8f93243e76c20864 [file] [log] [blame]

Chris Luke	90f52bf	2016-09-12 08:55:13 -0400	[diff] [blame]	1	# Copyright (c) 2016 Comcast Cable Communications Management, LLC.
				2	#
				3	# Licensed under the Apache License, Version 2.0 (the "License");
				4	# you may not use this file except in compliance with the License.
				5	# You may obtain a copy of the License at:
				6	#
				7	# http://www.apache.org/licenses/LICENSE-2.0
				8	#
				9	# Unless required by applicable law or agreed to in writing, software
				10	# distributed under the License is distributed on an "AS IS" BASIS,
				11	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				12	# See the License for the specific language governing permissions and
				13	# limitations under the License.
				14
				15	# Generation template class
				16
Paul Vinciguerra	464e5e0	2019-11-01 15:07:32 -0400	[diff] [blame]	17	import html.parser
				18	import json
				19	import logging
				20	import os
				21	import sys
Nathan Skrzypczak	9ad39c0	2021-08-19 11:38:06 +0200	[diff] [blame]	22	import re
Paul Vinciguerra	464e5e0	2019-11-01 15:07:32 -0400	[diff] [blame]	23
				24	import jinja2
Chris Luke	90f52bf	2016-09-12 08:55:13 -0400	[diff] [blame]	25
				26	# Classes register themselves in this dictionary
				27	"""Mapping of known processors to their classes"""
				28	siphons = {}
				29
Chris Luke	c3f92ad	2016-10-05 15:45:19 -0400	[diff] [blame]	30	"""Mapping of known output formats to their classes"""
				31	formats = {}
				32
Chris Luke	90f52bf	2016-09-12 08:55:13 -0400	[diff] [blame]	33
Chris Luke	90f52bf	2016-09-12 08:55:13 -0400	[diff] [blame]	34	class Siphon(object):
Paul Vinciguerra	464e5e0	2019-11-01 15:07:32 -0400	[diff] [blame]	35	"""Generate rendered output for siphoned data."""
Chris Luke	90f52bf	2016-09-12 08:55:13 -0400	[diff] [blame]	36
				37	# Set by subclasses
				38	"""Our siphon name"""
				39	name = None
				40
				41	# Set by subclasses
				42	"""Name of an identifier used by this siphon"""
				43	identifier = None
				44
				45	# Set by subclasses
				46	"""The pyparsing object to use to parse with"""
				47	_parser = None
				48
				49	"""The input data"""
				50	_cmds = None
				51
				52	"""Group key to (directory,file) mapping"""
				53	_group = None
				54
				55	"""Logging handler"""
				56	log = None
				57
				58	"""Directory to look for siphon rendering templates"""
				59	template_directory = None
				60
Nathan Skrzypczak	9ad39c0	2021-08-19 11:38:06 +0200	[diff] [blame]	61	"""Directory to output parts in"""
				62	outdir = None
				63
Chris Luke	90f52bf	2016-09-12 08:55:13 -0400	[diff] [blame]	64	"""Template environment, if we're using templates"""
				65	_tplenv = None
				66
Nathan Skrzypczak	9ad39c0	2021-08-19 11:38:06 +0200	[diff] [blame]	67	def __init__(self, template_directory, format, outdir, repository_link):
Chris Luke	90f52bf	2016-09-12 08:55:13 -0400	[diff] [blame]	68	super(Siphon, self).__init__()
				69	self.log = logging.getLogger("siphon.process.%s" % self.name)
				70
Chris Luke	c3f92ad	2016-10-05 15:45:19 -0400	[diff] [blame]	71	# Get our output format details
				72	fmt_klass = formats[format]
				73	fmt = fmt_klass()
				74	self._format = fmt
Chris Luke	90f52bf	2016-09-12 08:55:13 -0400	[diff] [blame]	75
Chris Luke	c3f92ad	2016-10-05 15:45:19 -0400	[diff] [blame]	76	# Sort out the template search path
				77	def _tpldir(name):
				78	return os.sep.join((template_directory, fmt.name, name))
				79
				80	self.template_directory = template_directory
				81	searchpath = [
				82	_tpldir(self.name),
				83	_tpldir("default"),
				84	]
Nathan Skrzypczak	9ad39c0	2021-08-19 11:38:06 +0200	[diff] [blame]	85	self.outdir = outdir
Chris Luke	c3f92ad	2016-10-05 15:45:19 -0400	[diff] [blame]	86	loader = jinja2.FileSystemLoader(searchpath=searchpath)
				87	self._tplenv = jinja2.Environment(
				88	loader=loader,
				89	trim_blocks=True,
Nathan Skrzypczak	9ad39c0	2021-08-19 11:38:06 +0200	[diff] [blame]	90	autoescape=False,
Chris Luke	c3f92ad	2016-10-05 15:45:19 -0400	[diff] [blame]	91	keep_trailing_newline=True)
				92
				93	# Convenience, get a reference to the internal escape and
Paul Vinciguerra	464e5e0	2019-11-01 15:07:32 -0400	[diff] [blame]	94	# unescape methods in html.parser. These then become
Chris Luke	c3f92ad	2016-10-05 15:45:19 -0400	[diff] [blame]	95	# available to templates to use, if needed.
Paul Vinciguerra	464e5e0	2019-11-01 15:07:32 -0400	[diff] [blame]	96	self._h = html.parser.HTMLParser()
				97	self.escape = html.escape
				98	self.unescape = html.unescape
Chris Luke	90f52bf	2016-09-12 08:55:13 -0400	[diff] [blame]	99
Nathan Skrzypczak	9ad39c0	2021-08-19 11:38:06 +0200	[diff] [blame]	100	# TODO: customize release
				101	self.repository_link = repository_link
				102
Chris Luke	90f52bf	2016-09-12 08:55:13 -0400	[diff] [blame]	103	# Output renderers
				104
				105	"""Returns an object to be used as the sorting key in the item index."""
				106	def index_sort_key(self, group):
				107	return group
				108
				109	"""Returns a string to use as the header at the top of the item index."""
				110	def index_header(self):
				111	return self.template("index_header")
				112
				113	"""Returns the string fragment to use for each section in the item
				114	index."""
				115	def index_section(self, group):
				116	return self.template("index_section", group=group)
				117
				118	"""Returns the string fragment to use for each entry in the item index."""
				119	def index_entry(self, meta, item):
				120	return self.template("index_entry", meta=meta, item=item)
				121
				122	"""Returns an object, typically a string, to be used as the sorting key
				123	for items within a section."""
				124	def item_sort_key(self, item):
				125	return item['name']
				126
				127	"""Returns a key for grouping items together."""
				128	def group_key(self, directory, file, macro, name):
				129	_global = self._cmds['_global']
				130
				131	if file in _global and 'group_label' in _global[file]:
				132	self._group[file] = (directory, file)
				133	return file
				134
				135	self._group[directory] = (directory, None)
				136	return directory
				137
				138	"""Returns a key for identifying items within a grouping."""
				139	def item_key(self, directory, file, macro, name):
				140	return name
				141
				142	"""Returns a string to use as the header when rendering the item."""
				143	def item_header(self, group):
				144	return self.template("item_header", group=group)
				145
				146	"""Returns a string to use as the body when rendering the item."""
				147	def item_format(self, meta, item):
				148	return self.template("item_format", meta=meta, item=item)
				149
				150	"""Returns a string to use as the label for the page reference."""
				151	def page_label(self, group):
				152	return "_".join((
				153	self.name,
				154	self.sanitize_label(group)
				155	))
				156
				157	"""Returns a title to use for a page."""
				158	def page_title(self, group):
				159	_global = self._cmds['_global']
				160	(directory, file) = self._group[group]
				161
				162	if file and file in _global and 'group_label' in _global[file]:
				163	return _global[file]['group_label']
				164
				165	if directory in _global and 'group_label' in _global[directory]:
				166	return _global[directory]['group_label']
				167
				168	return directory
				169
				170	"""Returns a string to use as the label for the section reference."""
				171	def item_label(self, group, item):
				172	return "__".join((
				173	self.name,
				174	item
				175	))
				176
				177	"""Label sanitizer; for creating Doxygen references"""
				178	def sanitize_label(self, value):
				179	return value.replace(" ", "_") \
				180	.replace("/", "_") \
				181	.replace(".", "_")
				182
				183	"""Template processor"""
				184	def template(self, name, **kwargs):
Paul Vinciguerra	464e5e0	2019-11-01 15:07:32 -0400	[diff] [blame]	185	tpl = self._tplenv.get_template(name + self._format.extension)
				186	return tpl.render(
Chris Luke	90f52bf	2016-09-12 08:55:13 -0400	[diff] [blame]	187	this=self,
				188	**kwargs)
				189
Chris Luke	90f52bf	2016-09-12 08:55:13 -0400	[diff] [blame]	190	# Processing methods
				191
				192	"""Parse the input file into a more usable dictionary structure."""
				193	def load_json(self, files):
				194	self._cmds = {}
				195	self._group = {}
				196
				197	line_num = 0
				198	line_start = 0
				199	for filename in files:
				200	filename = os.path.relpath(filename)
				201	self.log.info("Parsing items in file \"%s\"." % filename)
				202	data = None
				203	with open(filename, "r") as fd:
				204	data = json.load(fd)
				205
				206	self._cmds['_global'] = data['global']
				207
				208	# iterate the items loaded and regroup it
				209	for item in data["items"]:
				210	try:
				211	o = self._parser.parse(item['block'])
Paul Vinciguerra	464e5e0	2019-11-01 15:07:32 -0400	[diff] [blame]	212	except Exception:
				213	self.log.error("Exception parsing item: %s\n%s"
				214	% (json.dumps(item, separators=(',', ': '),
				215	indent=4),
				216	item['block']))
Chris Luke	90f52bf	2016-09-12 08:55:13 -0400	[diff] [blame]	217	raise
				218
				219	# Augment the item with metadata
				220	o["meta"] = {}
				221	for key in item:
				222	if key == 'block':
				223	continue
				224	o['meta'][key] = item[key]
				225
				226	# Load some interesting fields
				227	directory = item['directory']
				228	file = item['file']
				229	macro = o["macro"]
				230	name = o["name"]
				231
				232	# Generate keys to group items by
				233	group_key = self.group_key(directory, file, macro, name)
				234	item_key = self.item_key(directory, file, macro, name)
				235
				236	if group_key not in self._cmds:
				237	self._cmds[group_key] = {}
				238
				239	self._cmds[group_key][item_key] = o
				240
				241	"""Iterate over the input data, calling render methods to generate the
				242	output."""
				243	def process(self, out=None):
				244
				245	if out is None:
				246	out = sys.stdout
				247
				248	# Accumulated body contents
				249	contents = ""
				250
				251	# Write the header for this siphon type
				252	out.write(self.index_header())
				253
				254	# Sort key helper for the index
				255	def group_sort_key(group):
				256	return self.index_sort_key(group)
				257
				258	# Iterate the dictionary and process it
				259	for group in sorted(self._cmds.keys(), key=group_sort_key):
				260	if group.startswith('_'):
				261	continue
				262
Paul Vinciguerra	464e5e0	2019-11-01 15:07:32 -0400	[diff] [blame]	263	self.log.info("Processing items in group \"%s\" (%s)." %
				264	(group, group_sort_key(group)))
Chris Luke	90f52bf	2016-09-12 08:55:13 -0400	[diff] [blame]	265
				266	# Generate the section index entry (write it now)
				267	out.write(self.index_section(group))
				268
				269	# Generate the item header (save for later)
				270	contents += self.item_header(group)
				271
				272	def item_sort_key(key):
				273	return self.item_sort_key(self._cmds[group][key])
				274
				275	for key in sorted(self._cmds[group].keys(), key=item_sort_key):
Paul Vinciguerra	464e5e0	2019-11-01 15:07:32 -0400	[diff] [blame]	276	self.log.debug("--- Processing key \"%s\" (%s)." %
				277	(key, item_sort_key(key)))
Chris Luke	90f52bf	2016-09-12 08:55:13 -0400	[diff] [blame]	278
				279	o = self._cmds[group][key]
				280	meta = {
				281	"directory": o['meta']['directory'],
				282	"file": o['meta']['file'],
				283	"macro": o['macro'],
Chris Luke	af405f7	2016-09-26 15:51:56 -0700	[diff] [blame]	284	"name": o['name'],
Chris Luke	90f52bf	2016-09-12 08:55:13 -0400	[diff] [blame]	285	"key": key,
				286	"label": self.item_label(group, key),
				287	}
				288
				289	# Generate the index entry for the item (write it now)
				290	out.write(self.index_entry(meta, o))
				291
				292	# Generate the item itself (save for later)
				293	contents += self.item_format(meta, o)
				294
Nathan Skrzypczak	9ad39c0	2021-08-19 11:38:06 +0200	[diff] [blame]	295	page_name = self.separate_page_names(group)
				296	if page_name != "":
				297	path = os.path.join(self.outdir, page_name)
				298	with open(path, "w+") as page:
				299	page.write(contents)
				300	contents = ""
				301
Chris Luke	90f52bf	2016-09-12 08:55:13 -0400	[diff] [blame]	302	# Deliver the accumulated body output
				303	out.write(contents)
Chris Luke	c3f92ad	2016-10-05 15:45:19 -0400	[diff] [blame]	304
Nathan Skrzypczak	9ad39c0	2021-08-19 11:38:06 +0200	[diff] [blame]	305	def do_cliexstart(self, matchobj):
				306	title = matchobj.group(1)
				307	title = ' '.join(title.splitlines())
				308	content = matchobj.group(2)
				309	content = re.sub(r"\n", r"\n ", content)
				310	return "\n\n.. code-block:: console\n\n %s\n %s\n\n" % (title, content)
				311
				312	def do_clistart(self, matchobj):
				313	content = matchobj.group(1)
				314	content = re.sub(r"\n", r"\n ", content)
				315	return "\n\n.. code-block:: console\n\n %s\n\n" % content
				316
				317	def do_cliexcmd(self, matchobj):
				318	content = matchobj.group(1)
				319	content = ' '.join(content.splitlines())
				320	return "\n\n.. code-block:: console\n\n %s\n\n" % content
				321
				322	def process_list(self, matchobj):
				323	content = matchobj.group(1)
				324	content = self.reindent(content, 2)
				325	return "@@@@%s\nBBBB" % content
				326
				327	def process_special(self, s):
				328	# ----------- markers to remove
				329	s = re.sub(r"@cliexpar\s*", r"", s)
				330	s = re.sub(r"@parblock\s*", r"", s)
				331	s = re.sub(r"@endparblock\s*", r"", s)
				332	s = re.sub(r"<br>", "", s)
				333	# ----------- emphasis
				334	# <b><em>
				335	s = re.sub(r"<b><em>\s*", "``", s)
				336	s = re.sub(r"\s*</b></em>", "``", s)
				337	s = re.sub(r"\s*</em></b>", "``", s)
				338	# <b>
				339	s = re.sub(r"<b>\s", "*", s)
				340	s = re.sub(r"\s</b>", "*", s)
				341	# <code>
				342	s = re.sub(r"<code>\s*", "``", s)
				343	s = re.sub(r"\s*</code>", "``", s)
				344	# <em>
				345	s = re.sub(r"'?<em>\s*", r"``", s)
				346	s = re.sub(r"\s*</em>'?", r"``", s)
				347	# @c <something>
				348	s = re.sub(r"@c\s(\S+)", r"``\1``", s)
				349	# ----------- todos
				350	s = re.sub(r"@todo[^\n]*", "", s)
				351	s = re.sub(r"@TODO[^\n]*", "", s)
				352	# ----------- code blocks
				353	s = re.sub(r"@cliexcmd{(.+?)}", self.do_cliexcmd, s, flags=re.DOTALL)
				354	s = re.sub(r"@cliexstart{(.+?)}(.+?)@cliexend", self.do_cliexstart, s, flags=re.DOTALL)
				355	s = re.sub(r"@clistart(.+?)@cliend", self.do_clistart, s, flags=re.DOTALL)
				356	# ----------- lists
				357	s = re.sub(r"^\s*-", r"\n@@@@", s, flags=re.MULTILINE)
				358	s = re.sub(r"@@@@(.*?)\n\n+", self.process_list, s, flags=re.DOTALL)
				359	s = re.sub(r"BBBB@@@@", r"-", s)
				360	s = re.sub(r"@@@@", r"-", s)
				361	s = re.sub(r"BBBB", r"\n\n", s)
				362	# ----------- Cleanup remains
				363	s = re.sub(r"@cliexend\s*", r"", s)
				364	return s
				365
				366	def separate_page_names(self, group):
				367	return ""
				368
				369	# This push the given textblock <indent> spaces right
				370	def reindent(self, s, indent):
				371	ind = " " * indent
				372	s = re.sub(r"\n", "\n" + ind, s)
				373	return s
				374
				375	# This aligns the given textblock left (no indent)
				376	def noindent(self, s):
				377	s = re.sub(r"\n[ \f\v\t]*", "\n", s)
				378	return s
Chris Luke	c3f92ad	2016-10-05 15:45:19 -0400	[diff] [blame]	379
Chris Luke	c3f92ad	2016-10-05 15:45:19 -0400	[diff] [blame]	380	class Format(object):
Paul Vinciguerra	464e5e0	2019-11-01 15:07:32 -0400	[diff] [blame]	381	"""Output format class"""
Chris Luke	c3f92ad	2016-10-05 15:45:19 -0400	[diff] [blame]	382
				383	"""Name of this output format"""
				384	name = None
				385
				386	"""Expected file extension of templates that build this format"""
				387	extension = None
				388
				389
Chris Luke	c3f92ad	2016-10-05 15:45:19 -0400	[diff] [blame]	390	class FormatMarkdown(Format):
Paul Vinciguerra	464e5e0	2019-11-01 15:07:32 -0400	[diff] [blame]	391	"""Markdown output format"""
Chris Luke	c3f92ad	2016-10-05 15:45:19 -0400	[diff] [blame]	392	name = "markdown"
				393	extension = ".md"
				394
Paul Vinciguerra	464e5e0	2019-11-01 15:07:32 -0400	[diff] [blame]	395
Chris Luke	c3f92ad	2016-10-05 15:45:19 -0400	[diff] [blame]	396	# Register 'markdown'
				397	formats["markdown"] = FormatMarkdown
				398
				399
Chris Luke	c3f92ad	2016-10-05 15:45:19 -0400	[diff] [blame]	400	class FormatItemlist(Format):
Paul Vinciguerra	464e5e0	2019-11-01 15:07:32 -0400	[diff] [blame]	401	"""Itemlist output format"""
Chris Luke	c3f92ad	2016-10-05 15:45:19 -0400	[diff] [blame]	402	name = "itemlist"
				403	extension = ".itemlist"
				404
Paul Vinciguerra	464e5e0	2019-11-01 15:07:32 -0400	[diff] [blame]	405
Chris Luke	c3f92ad	2016-10-05 15:45:19 -0400	[diff] [blame]	406	# Register 'itemlist'
				407	formats["itemlist"] = FormatItemlist