summaryrefslogtreecommitdiffstats
path: root/docs/build_manpage.py
diff options
context:
space:
mode:
Diffstat (limited to 'docs/build_manpage.py')
-rwxr-xr-x[-rw-r--r--]docs/build_manpage.py235
1 files changed, 234 insertions, 1 deletions
diff --git a/docs/build_manpage.py b/docs/build_manpage.py
index ab730481..a0ab4edb 100644..100755
--- a/docs/build_manpage.py
+++ b/docs/build_manpage.py
@@ -1,2 +1,235 @@
#!/usr/bin/env python3
-print("Manpage build not yet supported")
+from datetime import date
+from io import StringIO
+from lxml import etree
+import markdown
+from markdown.extensions import Extension
+import re
+import sys
+import yaml
+
+# Prevent our markdown parser from trying to help by interpreting things in angle brackets as HTML tags.
+class EscapeHtml(Extension):
+ def extendMarkdown(self, md, md_globals):
+ del md.preprocessors['html_block']
+ del md.inlinePatterns['html']
+
+class RoffWalker(object):
+ def __init__(self, tree, output=sys.stdout):
+ self.tree = tree
+ self.target = output
+ self.f = StringIO()
+
+ def walk(self):
+ self._walk(self.tree, parent_tag=None)
+ # We don't want to start lines with \. because that can confuse man
+ # For lines that start with \., we need to prefix them with \& so it
+ # knows not to treat that line as a directive
+ data = re.sub(r'^\\\.', r'\&.', self.f.getvalue(), flags=re.MULTILINE)
+ self.target.write(data)
+
+ def _ul_is_special(self, root):
+ if len(root) != 1:
+ return False
+ child = root[0]
+ if child.tag != 'li':
+ return False
+ msg = ''.join(child.itertext()).strip()
+ return msg.endswith(':')
+
+ def _walk_child(self, root):
+ if len(root) > 0:
+ self._walk(root[0], parent_tag=root.tag)
+
+ def _write_element(self, root, ensure_newline=True):
+ if root.text is not None:
+ text = self._sanitize(root.text)
+ self.__write_raw(text)
+ self._walk_child(root)
+ self._write_tail(root, ensure_newline=ensure_newline)
+
+ def _write_tail(self, root, ensure_newline=False, inline=False):
+ if root.tail is not None:
+ if inline or root.tail != '\n':
+ text = self._sanitize(root.tail)
+ if text.endswith('\n'):
+ ensure_newline = False
+ self.__write_raw(text)
+ if ensure_newline:
+ self.__write_raw('\n')
+
+ def _walk(self, root, parent_tag=None):
+ last_tag = None
+ while root is not None:
+ if root.tag == 'h1':
+ self.__write_cmd('.TH "JQ" "1" "{}" "" ""'.format(date.today().strftime('%B %Y')))
+ self.__write_cmd('.SH "NAME"')
+ # TODO: properly parse this
+ self.__write_raw(r'\fBjq\fR \- Command\-line JSON processor' + "\n")
+
+ elif root.tag == 'h2':
+ self.__write_cmd('.SH "{}"'.format(''.join(root.itertext()).strip()))
+
+ elif root.tag == 'h3':
+ text = ''.join(root.itertext()).strip()
+ self.__write_cmd('.SS "{}"'.format(self._h3_sanitize(text)))
+
+ elif root.tag == 'p':
+ if last_tag not in ['h2', 'h3'] and parent_tag not in ['li']:
+ self.__write_cmd('.P')
+ self._write_element(root, ensure_newline=(parent_tag != 'li'))
+
+ elif root.tag == 'ul':
+ if self._ul_is_special(root):
+ li = root[0]
+ self.__write_cmd('.TP')
+ self._write_element(li)
+ next = root.getnext()
+ while next is not None and next.tag == 'p':
+ if next.getnext() is not None and next.getnext().tag == 'pre':
+ # we don't want to .IP these, because it'll look funny with the code indent
+ break
+ self.__write_cmd('.IP')
+ self._write_element(next)
+ root = next
+ next = root.getnext()
+ else:
+ self._walk_child(root)
+ self._write_tail(root)
+ # A pre tag after the end of a list doesn't want two of the indentation commands
+ if root.getnext() is None or root.getnext().tag != 'pre':
+ self.__write_cmd('.IP "" 0')
+
+ elif root.tag == 'li':
+ self.__write_cmd(r'.IP "\(bu" 4')
+ if root.text is not None and root.text.strip() != '':
+ text = self._sanitize(root.text)
+ self.__write_raw(text)
+ self._walk_child(root)
+ self._write_tail(root, ensure_newline=True)
+
+ elif root.tag == 'strong':
+ if root.text is not None:
+ text = self._sanitize(root.text)
+ self.__write_raw('\\fB{}\\fR'.format(text))
+
+ self._write_tail(root, inline=True)
+
+ elif root.tag == 'em':
+ if root.text is not None:
+ text = self._sanitize(root.text)
+ self.__write_raw('\\fI{}\\fR'.format(text))
+ self._write_tail(root, inline=True)
+
+ elif root.tag == 'code':
+ if root.text is not None:
+ text = self._code_sanitize(root.text)
+ self.__write_raw('\\fB{}\\fR'.format(text))
+ self._write_tail(root, inline=True)
+
+ elif root.tag == 'pre':
+ self.__write_cmd('.IP "" 4')
+ self.__write_cmd('.nf\n') # extra newline for spacing reasons
+ next = root
+ first = True
+ while next is not None and next.tag == 'pre':
+ if not first:
+ self.__write_raw('\n')
+ text = ''.join(next.itertext(with_tail=False))
+ self.__write_raw(self._pre_sanitize(text))
+ first = False
+ root = next
+ next = next.getnext()
+ self.__write_cmd('.fi')
+ self.__write_cmd('.IP "" 0')
+
+ else:
+ self._walk_child(root)
+
+ last_tag = root.tag
+ root = root.getnext()
+
+ def _base_sanitize(self, text):
+ text = re.sub(r'\\', r'\\e', text)
+ text = re.sub(r'\.', r'\\.', text)
+ text = re.sub("'", r"\'", text)
+ text = re.sub('-', r'\-', text)
+ return text
+
+ def _pre_sanitize(self, text):
+ return self._base_sanitize(text)
+
+ def _code_sanitize(self, text):
+ text = self._base_sanitize(text)
+ text = re.sub(r'\s', ' ', text)
+ return text
+
+ def _h3_sanitize(self, text):
+ text = self._base_sanitize(text)
+ text = re.sub(' \n|\n ', ' ', text)
+ text = re.sub('\n', ' ', text)
+ return text
+
+ def _sanitize(self, text):
+ text = self._base_sanitize(text)
+ text = re.sub(r'<([^>]+)>', r'\\fI\1\\fR', text)
+ text = re.sub(r' +', ' ', text)
+ text = re.sub('\n', ' ', text)
+ return text
+
+ def __write_cmd(self, dat):
+ print('.', dat, sep='\n', file=self.f)
+ pass
+
+ def __write_raw(self, dat):
+ print(dat, sep='', end='', file=self.f)
+ pass
+
+def load_yml_file(fn):
+ with open(fn) as f:
+ return yaml.load(f)
+
+def dedent_body(body):
+ lines = [re.sub(r'^ (\S)', r'\1', l) for l in body.split('\n')]
+ return '\n'.join(lines)
+
+def convert_manual_to_markdown():
+ f = StringIO()
+ manual = load_yml_file("content/manual/manual.yml")
+ f.write(manual.get('manpage_intro', '\n'))
+ f.write(dedent_body(manual.get('body', '\n')))
+ for section in manual.get('sections', []):
+ f.write('## {}\n'.format(section.get('title', '').upper()))
+ f.write(dedent_body(section.get('body', '\n')))
+ f.write('\n')
+ for entry in section.get('entries', []):
+ f.write('### {}\n'.format(entry.get('title', '')))
+ f.write(dedent_body(entry.get('body', '\n')))
+ f.write('\n')
+ if entry.get('examples') is not None:
+ f.write("~~~~\n")
+ first = True
+ for example in entry.get('examples'):
+ if not first:
+ f.write('\n')
+ f.write("jq '{}'\n".format(example.get('program', '')))
+ f.write(" {}\n".format(example.get('input', '')))
+ output = [str(x) for x in example.get('output', [])]
+ f.write("=> {}\n".format(', '.join(output)))
+ first = False
+ f.write("~~~~\n")
+ f.write('\n')
+ f.write(manual.get('manpage_epilogue', ''))
+ return f.getvalue()
+
+# Convert manual.yml to our special markdown format
+markdown_data = convert_manual_to_markdown()
+
+# Convert markdown to html
+html_data = markdown.markdown(markdown_data, extensions=[EscapeHtml(), 'fenced_code'])
+
+# Parse the html into a tree so we can walk it
+tr = etree.HTML(html_data, etree.HTMLParser())
+
+# Convert the markdown to ROFF
+RoffWalker(tr).walk()