docs/_scripts/siphon/process.py - fdio/vpp - Gitiles

 # Copyright (c) 2016 Comcast Cable Communications Management, LLC.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at:
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.

 # Generation template class

 import html.parser
 import json
 import logging
 import os
 import sys
 import re

 import jinja2

 # Classes register themselves in this dictionary
 """Mapping of known processors to their classes"""
 siphons = {}

 """Mapping of known output formats to their classes"""
 formats = {}


 class Siphon(object):
     """Generate rendered output for siphoned data."""

     # Set by subclasses
     """Our siphon name"""
     name = None

     # Set by subclasses
     """Name of an identifier used by this siphon"""
     identifier = None

     # Set by subclasses
     """The pyparsing object to use to parse with"""
     _parser = None

     """The input data"""
     _cmds = None

     """Group key to (directory,file) mapping"""
     _group = None

     """Logging handler"""
     log = None

     """Directory to look for siphon rendering templates"""
     template_directory = None

     """Directory to output parts in"""
     outdir = None

     """Template environment, if we're using templates"""
     _tplenv = None

     def __init__(self, template_directory, format, outdir, repository_link):
         super(Siphon, self).__init__()
         self.log = logging.getLogger("siphon.process.%s" % self.name)

         # Get our output format details
         fmt_klass = formats[format]
         fmt = fmt_klass()
         self._format = fmt

         # Sort out the template search path
         def _tpldir(name):
             return os.sep.join((template_directory, fmt.name, name))

         self.template_directory = template_directory
         searchpath = [
             _tpldir(self.name),
             _tpldir("default"),
         ]
         self.outdir = outdir
         loader = jinja2.FileSystemLoader(searchpath=searchpath)
         self._tplenv = jinja2.Environment(
             loader=loader,
             trim_blocks=True,
             autoescape=False,
             keep_trailing_newline=True)

         # Convenience, get a reference to the internal escape and
         # unescape methods in html.parser. These then become
         # available to templates to use, if needed.
         self._h = html.parser.HTMLParser()
         self.escape = html.escape
         self.unescape = html.unescape

         # TODO: customize release
         self.repository_link = repository_link

     # Output renderers

     """Returns an object to be used as the sorting key in the item index."""
     def index_sort_key(self, group):
         return group

     """Returns a string to use as the header at the top of the item index."""
     def index_header(self):
         return self.template("index_header")

     """Returns the string fragment to use for each section in the item
     index."""
     def index_section(self, group):
         return self.template("index_section", group=group)

     """Returns the string fragment to use for each entry in the item index."""
     def index_entry(self, meta, item):
         return self.template("index_entry", meta=meta, item=item)

     """Returns an object, typically a string, to be used as the sorting key
     for items within a section."""
     def item_sort_key(self, item):
         return item['name']

     """Returns a key for grouping items together."""
     def group_key(self, directory, file, macro, name):
         _global = self._cmds['_global']

         if file in _global and 'group_label' in _global[file]:
             self._group[file] = (directory, file)
             return file

         self._group[directory] = (directory, None)
         return directory

     """Returns a key for identifying items within a grouping."""
     def item_key(self, directory, file, macro, name):
         return name

     """Returns a string to use as the header when rendering the item."""
     def item_header(self, group):
         return self.template("item_header", group=group)

     """Returns a string to use as the body when rendering the item."""
     def item_format(self, meta, item):
         return self.template("item_format", meta=meta, item=item)

     """Returns a string to use as the label for the page reference."""
     def page_label(self, group):
         return "_".join((
             self.name,
             self.sanitize_label(group)
         ))

     """Returns a title to use for a page."""
     def page_title(self, group):
         _global = self._cmds['_global']
         (directory, file) = self._group[group]

         if file and file in _global and 'group_label' in _global[file]:
             return _global[file]['group_label']

         if directory in _global and 'group_label' in _global[directory]:
             return _global[directory]['group_label']

         return directory

     """Returns a string to use as the label for the section reference."""
     def item_label(self, group, item):
         return "__".join((
             self.name,
             item
         ))

     """Label sanitizer; for creating Doxygen references"""
     def sanitize_label(self, value):
         return value.replace(" ", "_") \
                     .replace("/", "_") \
                     .replace(".", "_")

     """Template processor"""
     def template(self, name, **kwargs):
         tpl = self._tplenv.get_template(name + self._format.extension)
         return tpl.render(
             this=self,
             **kwargs)

     # Processing methods

     """Parse the input file into a more usable dictionary structure."""
     def load_json(self, files):
         self._cmds = {}
         self._group = {}

         line_num = 0
         line_start = 0
         for filename in files:
             filename = os.path.relpath(filename)
             self.log.info("Parsing items in file \"%s\"." % filename)
             data = None
             with open(filename, "r") as fd:
                 data = json.load(fd)

             self._cmds['_global'] = data['global']

             # iterate the items loaded and regroup it
             for item in data["items"]:
                 try:
                     o = self._parser.parse(item['block'])
                 except Exception:
                     self.log.error("Exception parsing item: %s\n%s"
                                    % (json.dumps(item, separators=(',', ': '),
                                                  indent=4),
                                       item['block']))
                     raise

                 # Augment the item with metadata
                 o["meta"] = {}
                 for key in item:
                     if key == 'block':
                         continue
                     o['meta'][key] = item[key]

                 # Load some interesting fields
                 directory = item['directory']
                 file = item['file']
                 macro = o["macro"]
                 name = o["name"]

                 # Generate keys to group items by
                 group_key = self.group_key(directory, file, macro, name)
                 item_key = self.item_key(directory, file, macro, name)

                 if group_key not in self._cmds:
                     self._cmds[group_key] = {}

                 self._cmds[group_key][item_key] = o

     """Iterate over the input data, calling render methods to generate the
     output."""
     def process(self, out=None):

         if out is None:
             out = sys.stdout

         # Accumulated body contents
         contents = ""

         # Write the header for this siphon type
         out.write(self.index_header())

         # Sort key helper for the index
         def group_sort_key(group):
             return self.index_sort_key(group)

         # Iterate the dictionary and process it
         for group in sorted(self._cmds.keys(), key=group_sort_key):
             if group.startswith('_'):
                 continue

             self.log.info("Processing items in group \"%s\" (%s)." %
                           (group, group_sort_key(group)))

             # Generate the section index entry (write it now)
             out.write(self.index_section(group))

             # Generate the item header (save for later)
             contents += self.item_header(group)

             def item_sort_key(key):
                 return self.item_sort_key(self._cmds[group][key])

             for key in sorted(self._cmds[group].keys(), key=item_sort_key):
                 self.log.debug("--- Processing key \"%s\" (%s)." %
                                (key, item_sort_key(key)))

                 o = self._cmds[group][key]
                 meta = {
                     "directory": o['meta']['directory'],
                     "file": o['meta']['file'],
                     "macro": o['macro'],
                     "name": o['name'],
                     "key": key,
                     "label": self.item_label(group, key),
                 }

                 # Generate the index entry for the item (write it now)
                 out.write(self.index_entry(meta, o))

                 # Generate the item itself (save for later)
                 contents += self.item_format(meta, o)

             page_name = self.separate_page_names(group)
             if page_name != "":
                 path = os.path.join(self.outdir, page_name)
                 with open(path, "w+") as page:
                     page.write(contents)
                 contents = ""

         # Deliver the accumulated body output
         out.write(contents)

     def do_cliexstart(self, matchobj):
         title = matchobj.group(1)
         title = ' '.join(title.splitlines())
         content = matchobj.group(2)
         content = re.sub(r"\n", r"\n    ", content)
         return "\n\n.. code-block:: console\n\n    %s\n    %s\n\n" % (title, content)

     def do_clistart(self, matchobj):
         content = matchobj.group(1)
         content = re.sub(r"\n", r"\n    ", content)
         return "\n\n.. code-block:: console\n\n    %s\n\n" % content

     def do_cliexcmd(self, matchobj):
         content = matchobj.group(1)
         content = ' '.join(content.splitlines())
         return "\n\n.. code-block:: console\n\n    %s\n\n" % content

     def process_list(self, matchobj):
         content = matchobj.group(1)
         content = self.reindent(content, 2)
         return "@@@@%s\nBBBB" % content

     def process_special(self, s):
         # ----------- markers to remove
         s = re.sub(r"@cliexpar\s*", r"", s)
         s = re.sub(r"@parblock\s*", r"", s)
         s = re.sub(r"@endparblock\s*", r"", s)
         s = re.sub(r"<br>", "", s)
         # ----------- emphasis
         # <b><em>
         s = re.sub(r"<b><em>\s*", "``", s)
         s = re.sub(r"\s*</b></em>", "``", s)
         s = re.sub(r"\s*</em></b>", "``", s)
         # <b>
         s = re.sub(r"<b>\s*", "**", s)
         s = re.sub(r"\s*</b>", "**", s)
         # <code>
         s = re.sub(r"<code>\s*", "``", s)
         s = re.sub(r"\s*</code>", "``", s)
         # <em>
         s = re.sub(r"'?<em>\s*", r"``", s)
         s = re.sub(r"\s*</em>'?", r"``", s)
         # @c <something>
         s = re.sub(r"@c\s(\S+)", r"``\1``", s)
         # ----------- todos
         s = re.sub(r"@todo[^\n]*", "", s)
         s = re.sub(r"@TODO[^\n]*", "", s)
         # ----------- code blocks
         s = re.sub(r"@cliexcmd{(.+?)}", self.do_cliexcmd, s, flags=re.DOTALL)
         s = re.sub(r"@cliexstart{(.+?)}(.+?)@cliexend", self.do_cliexstart, s, flags=re.DOTALL)
         s = re.sub(r"@clistart(.+?)@cliend", self.do_clistart, s, flags=re.DOTALL)
         # ----------- lists
         s = re.sub(r"^\s*-", r"\n@@@@", s, flags=re.MULTILINE)
         s = re.sub(r"@@@@(.*?)\n\n+", self.process_list, s, flags=re.DOTALL)
         s = re.sub(r"BBBB@@@@", r"-", s)
         s = re.sub(r"@@@@", r"-", s)
         s = re.sub(r"BBBB", r"\n\n", s)
         # ----------- Cleanup remains
         s = re.sub(r"@cliexend\s*", r"", s)
         return s

     def separate_page_names(self, group):
         return ""

     # This push the given textblock <indent> spaces right
     def reindent(self, s, indent):
         ind = " " * indent
         s = re.sub(r"\n", "\n" + ind, s)
         return s

     # This aligns the given textblock left (no indent)
     def noindent(self, s):
         s = re.sub(r"\n[ \f\v\t]*", "\n", s)
         return s

 class Format(object):
     """Output format class"""

     """Name of this output format"""
     name = None

     """Expected file extension of templates that build this format"""
     extension = None


 class FormatMarkdown(Format):
     """Markdown output format"""
     name = "markdown"
     extension = ".md"


 # Register 'markdown'
 formats["markdown"] = FormatMarkdown


 class FormatItemlist(Format):
     """Itemlist output format"""
     name = "itemlist"
     extension = ".itemlist"


 # Register 'itemlist'
 formats["itemlist"] = FormatItemlist
	# Copyright (c) 2016 Comcast Cable Communications Management, LLC.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at:
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.

	# Generation template class

	import html.parser
	import json
	import logging
	import os
	import sys
	import re

	import jinja2

	# Classes register themselves in this dictionary
	"""Mapping of known processors to their classes"""
	siphons = {}

	"""Mapping of known output formats to their classes"""
	formats = {}


	class Siphon(object):
	"""Generate rendered output for siphoned data."""

	# Set by subclasses
	"""Our siphon name"""
	name = None

	# Set by subclasses
	"""Name of an identifier used by this siphon"""
	identifier = None

	# Set by subclasses
	"""The pyparsing object to use to parse with"""
	_parser = None

	"""The input data"""
	_cmds = None

	"""Group key to (directory,file) mapping"""
	_group = None

	"""Logging handler"""
	log = None

	"""Directory to look for siphon rendering templates"""
	template_directory = None

	"""Directory to output parts in"""
	outdir = None

	"""Template environment, if we're using templates"""
	_tplenv = None

	def __init__(self, template_directory, format, outdir, repository_link):
	super(Siphon, self).__init__()
	self.log = logging.getLogger("siphon.process.%s" % self.name)

	# Get our output format details
	fmt_klass = formats[format]
	fmt = fmt_klass()
	self._format = fmt

	# Sort out the template search path
	def _tpldir(name):
	return os.sep.join((template_directory, fmt.name, name))

	self.template_directory = template_directory
	searchpath = [
	_tpldir(self.name),
	_tpldir("default"),
	]
	self.outdir = outdir
	loader = jinja2.FileSystemLoader(searchpath=searchpath)
	self._tplenv = jinja2.Environment(
	loader=loader,
	trim_blocks=True,
	autoescape=False,
	keep_trailing_newline=True)

	# Convenience, get a reference to the internal escape and
	# unescape methods in html.parser. These then become
	# available to templates to use, if needed.
	self._h = html.parser.HTMLParser()
	self.escape = html.escape
	self.unescape = html.unescape

	# TODO: customize release
	self.repository_link = repository_link

	# Output renderers

	"""Returns an object to be used as the sorting key in the item index."""
	def index_sort_key(self, group):
	return group

	"""Returns a string to use as the header at the top of the item index."""
	def index_header(self):
	return self.template("index_header")

	"""Returns the string fragment to use for each section in the item
	index."""
	def index_section(self, group):
	return self.template("index_section", group=group)

	"""Returns the string fragment to use for each entry in the item index."""
	def index_entry(self, meta, item):
	return self.template("index_entry", meta=meta, item=item)

	"""Returns an object, typically a string, to be used as the sorting key
	for items within a section."""
	def item_sort_key(self, item):
	return item['name']

	"""Returns a key for grouping items together."""
	def group_key(self, directory, file, macro, name):
	_global = self._cmds['_global']

	if file in _global and 'group_label' in _global[file]:
	self._group[file] = (directory, file)
	return file

	self._group[directory] = (directory, None)
	return directory

	"""Returns a key for identifying items within a grouping."""
	def item_key(self, directory, file, macro, name):
	return name

	"""Returns a string to use as the header when rendering the item."""
	def item_header(self, group):
	return self.template("item_header", group=group)

	"""Returns a string to use as the body when rendering the item."""
	def item_format(self, meta, item):
	return self.template("item_format", meta=meta, item=item)

	"""Returns a string to use as the label for the page reference."""
	def page_label(self, group):
	return "_".join((
	self.name,
	self.sanitize_label(group)
	))

	"""Returns a title to use for a page."""
	def page_title(self, group):
	_global = self._cmds['_global']
	(directory, file) = self._group[group]

	if file and file in _global and 'group_label' in _global[file]:
	return _global[file]['group_label']

	if directory in _global and 'group_label' in _global[directory]:
	return _global[directory]['group_label']

	return directory

	"""Returns a string to use as the label for the section reference."""
	def item_label(self, group, item):
	return "__".join((
	self.name,
	item
	))

	"""Label sanitizer; for creating Doxygen references"""
	def sanitize_label(self, value):
	return value.replace(" ", "_") \
	.replace("/", "_") \
	.replace(".", "_")

	"""Template processor"""
	def template(self, name, **kwargs):
	tpl = self._tplenv.get_template(name + self._format.extension)
	return tpl.render(
	this=self,
	**kwargs)

	# Processing methods

	"""Parse the input file into a more usable dictionary structure."""
	def load_json(self, files):
	self._cmds = {}
	self._group = {}

	line_num = 0
	line_start = 0
	for filename in files:
	filename = os.path.relpath(filename)
	self.log.info("Parsing items in file \"%s\"." % filename)
	data = None
	with open(filename, "r") as fd:
	data = json.load(fd)

	self._cmds['_global'] = data['global']

	# iterate the items loaded and regroup it
	for item in data["items"]:
	try:
	o = self._parser.parse(item['block'])
	except Exception:
	self.log.error("Exception parsing item: %s\n%s"
	% (json.dumps(item, separators=(',', ': '),
	indent=4),
	item['block']))
	raise

	# Augment the item with metadata
	o["meta"] = {}
	for key in item:
	if key == 'block':
	continue
	o['meta'][key] = item[key]

	# Load some interesting fields
	directory = item['directory']
	file = item['file']
	macro = o["macro"]
	name = o["name"]

	# Generate keys to group items by
	group_key = self.group_key(directory, file, macro, name)
	item_key = self.item_key(directory, file, macro, name)

	if group_key not in self._cmds:
	self._cmds[group_key] = {}

	self._cmds[group_key][item_key] = o

	"""Iterate over the input data, calling render methods to generate the
	output."""
	def process(self, out=None):

	if out is None:
	out = sys.stdout

	# Accumulated body contents
	contents = ""

	# Write the header for this siphon type
	out.write(self.index_header())

	# Sort key helper for the index
	def group_sort_key(group):
	return self.index_sort_key(group)

	# Iterate the dictionary and process it
	for group in sorted(self._cmds.keys(), key=group_sort_key):
	if group.startswith('_'):
	continue

	self.log.info("Processing items in group \"%s\" (%s)." %
	(group, group_sort_key(group)))

	# Generate the section index entry (write it now)
	out.write(self.index_section(group))

	# Generate the item header (save for later)
	contents += self.item_header(group)

	def item_sort_key(key):
	return self.item_sort_key(self._cmds[group][key])

	for key in sorted(self._cmds[group].keys(), key=item_sort_key):
	self.log.debug("--- Processing key \"%s\" (%s)." %
	(key, item_sort_key(key)))

	o = self._cmds[group][key]
	meta = {
	"directory": o['meta']['directory'],
	"file": o['meta']['file'],
	"macro": o['macro'],
	"name": o['name'],
	"key": key,
	"label": self.item_label(group, key),
	}

	# Generate the index entry for the item (write it now)
	out.write(self.index_entry(meta, o))

	# Generate the item itself (save for later)
	contents += self.item_format(meta, o)

	page_name = self.separate_page_names(group)
	if page_name != "":
	path = os.path.join(self.outdir, page_name)
	with open(path, "w+") as page:
	page.write(contents)
	contents = ""

	# Deliver the accumulated body output
	out.write(contents)

	def do_cliexstart(self, matchobj):
	title = matchobj.group(1)
	title = ' '.join(title.splitlines())
	content = matchobj.group(2)
	content = re.sub(r"\n", r"\n ", content)
	return "\n\n.. code-block:: console\n\n %s\n %s\n\n" % (title, content)

	def do_clistart(self, matchobj):
	content = matchobj.group(1)
	content = re.sub(r"\n", r"\n ", content)
	return "\n\n.. code-block:: console\n\n %s\n\n" % content

	def do_cliexcmd(self, matchobj):
	content = matchobj.group(1)
	content = ' '.join(content.splitlines())
	return "\n\n.. code-block:: console\n\n %s\n\n" % content

	def process_list(self, matchobj):
	content = matchobj.group(1)
	content = self.reindent(content, 2)
	return "@@@@%s\nBBBB" % content

	def process_special(self, s):
	# ----------- markers to remove
	s = re.sub(r"@cliexpar\s*", r"", s)
	s = re.sub(r"@parblock\s*", r"", s)
	s = re.sub(r"@endparblock\s*", r"", s)
	s = re.sub(r"<br>", "", s)
	# ----------- emphasis
	# <b><em>
	s = re.sub(r"<b><em>\s*", "``", s)
	s = re.sub(r"\s*</b></em>", "``", s)
	s = re.sub(r"\s*</em></b>", "``", s)
	# <b>
	s = re.sub(r"<b>\s", "*", s)
	s = re.sub(r"\s</b>", "*", s)
	# <code>
	s = re.sub(r"<code>\s*", "``", s)
	s = re.sub(r"\s*</code>", "``", s)
	# <em>
	s = re.sub(r"'?<em>\s*", r"``", s)
	s = re.sub(r"\s*</em>'?", r"``", s)
	# @c <something>
	s = re.sub(r"@c\s(\S+)", r"``\1``", s)
	# ----------- todos
	s = re.sub(r"@todo[^\n]*", "", s)
	s = re.sub(r"@TODO[^\n]*", "", s)
	# ----------- code blocks
	s = re.sub(r"@cliexcmd{(.+?)}", self.do_cliexcmd, s, flags=re.DOTALL)
	s = re.sub(r"@cliexstart{(.+?)}(.+?)@cliexend", self.do_cliexstart, s, flags=re.DOTALL)
	s = re.sub(r"@clistart(.+?)@cliend", self.do_clistart, s, flags=re.DOTALL)
	# ----------- lists
	s = re.sub(r"^\s*-", r"\n@@@@", s, flags=re.MULTILINE)
	s = re.sub(r"@@@@(.*?)\n\n+", self.process_list, s, flags=re.DOTALL)
	s = re.sub(r"BBBB@@@@", r"-", s)
	s = re.sub(r"@@@@", r"-", s)
	s = re.sub(r"BBBB", r"\n\n", s)
	# ----------- Cleanup remains
	s = re.sub(r"@cliexend\s*", r"", s)
	return s

	def separate_page_names(self, group):
	return ""

	# This push the given textblock <indent> spaces right
	def reindent(self, s, indent):
	ind = " " * indent
	s = re.sub(r"\n", "\n" + ind, s)
	return s

	# This aligns the given textblock left (no indent)
	def noindent(self, s):
	s = re.sub(r"\n[ \f\v\t]*", "\n", s)
	return s

	class Format(object):
	"""Output format class"""

	"""Name of this output format"""
	name = None

	"""Expected file extension of templates that build this format"""
	extension = None


	class FormatMarkdown(Format):
	"""Markdown output format"""
	name = "markdown"
	extension = ".md"


	# Register 'markdown'
	formats["markdown"] = FormatMarkdown


	class FormatItemlist(Format):
	"""Itemlist output format"""
	name = "itemlist"
	extension = ".itemlist"


	# Register 'itemlist'
	formats["itemlist"] = FormatItemlist