diff options
author | William Langford <wlangfor@gmail.com> | 2019-02-20 20:53:10 -0500 |
---|---|---|
committer | Nico Williams <nico@cryptonector.com> | 2019-02-26 11:10:38 -0600 |
commit | 175dbc4e25098e50d5173578d6b36df7abb5c549 (patch) | |
tree | f032d392d32e20c41e24aab819bad6a1e7ce0de6 | |
parent | c1f11855e31e3975ef74c7d19304bee4e1b64e71 (diff) |
Remove ruby dependency from manpage build
-rw-r--r-- | Makefile.am | 4 | ||||
-rw-r--r-- | docs/Pipfile | 3 | ||||
-rw-r--r-- | docs/Pipfile.lock | 34 | ||||
-rwxr-xr-x[-rw-r--r--] | docs/build_manpage.py | 235 | ||||
-rw-r--r-- | docs/content/manual/manual.yml | 62 |
5 files changed, 302 insertions, 36 deletions
diff --git a/Makefile.am b/Makefile.am index 9f2de435..8cb4a1a6 100644 --- a/Makefile.am +++ b/Makefile.am @@ -165,8 +165,8 @@ install-binaries: $(BUILT_SOURCES) $(MAKE) $(AM_MAKEFLAGS) install-exec DOC_FILES = docs/content docs/public docs/templates docs/site.yml \ - docs/Pipfile docs/Pipfile.lock docs/build_website.py \ - docs/README.md jq.1.prebuilt + docs/Pipfile docs/Pipfile.lock docs/build_manpage.py \ + docs/build_manpage.py docs/README.md jq.1.prebuilt EXTRA_DIST = $(DOC_FILES) $(man_MANS) $(TESTS) $(TEST_LOG_COMPILER) \ jq.1.prebuilt jq.spec src/lexer.c src/lexer.h src/parser.c \ diff --git a/docs/Pipfile b/docs/Pipfile index e68cfbba..24eaf578 100644 --- a/docs/Pipfile +++ b/docs/Pipfile @@ -8,4 +8,5 @@ verify_ssl = true [packages] jinja2 = "*" pyyaml = "*" -markdown = "*"
\ No newline at end of file +markdown = "*" +lxml = "*"
\ No newline at end of file diff --git a/docs/Pipfile.lock b/docs/Pipfile.lock index 33ff75b5..3c1d2d47 100644 --- a/docs/Pipfile.lock +++ b/docs/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "6cffc39e0d43a6d7c78f39636c7ed1b047f7b819b158213a96d7cbfefd6524d7" + "sha256": "16a9ef323592a417142c30be17e58c3cf36dc2bfdbf4757106a796b22262a1ce" }, "pipfile-spec": 6, "requires": {}, @@ -22,6 +22,38 @@ "index": "pypi", "version": "==2.10" }, + "lxml": { + "hashes": [ + "sha256:0537eee4902e8bf4f41bfee8133f7edf96533dd175930a12086d6a40d62376b2", + "sha256:0562ec748abd230ab87d73384e08fa784f9b9cee89e28696087d2d22c052cc27", + "sha256:09e91831e749fbf0f24608694e4573be0ef51430229450c39c83176cc2e2d353", + "sha256:1ae4c0722fc70c0d4fba43ae33c2885f705e96dce1db41f75ae14a2d2749b428", + "sha256:1c630c083d782cbaf1f7f37f6cac87bda9cff643cf2803a5f180f30d97955cef", + "sha256:2fe74e3836bd8c0fa7467ffae05545233c7f37de1eb765cacfda15ad20c6574a", + "sha256:37af783c2667ead34a811037bda56a0b142ac8438f7ed29ae93f82ddb812fbd6", + "sha256:3f2d9eafbb0b24a33f56acd16f39fc935756524dcb3172892721c54713964c70", + "sha256:47d8365a8ef14097aa4c65730689be51851b4ade677285a3b2daa03b37893e26", + "sha256:510e904079bc56ea784677348e151e1156040dbfb736f1d8ea4b9e6d0ab2d9f4", + "sha256:58d0851da422bba31c7f652a7e9335313cf94a641aa6d73b8f3c67602f75b593", + "sha256:7940d5c2185ffb989203dacbb28e6ae88b4f1bb25d04e17f94b0edd82232bcbd", + "sha256:7cf39bb3a905579836f7a8f3a45320d9eb22f16ab0c1e112efb940ced4d057a5", + "sha256:9563a23c1456c0ab550c087833bc13fcc61013a66c6420921d5b70550ea312bf", + "sha256:95b392952935947e0786a90b75cc33388549dcb19af716b525dae65b186138fc", + "sha256:983129f3fd3cef5c3cf067adcca56e30a169656c00fcc6c648629dbb850b27fa", + "sha256:a0b75b1f1854771844c647c464533def3e0a899dd094a85d1d4ed72ecaaee93d", + "sha256:b5db89cc0ef624f3a81214b7961a99f443b8c91e88188376b6b322fd10d5b118", + "sha256:c0a7751ba1a4bfbe7831920d98cee3ce748007eab8dfda74593d44079568219a", + "sha256:c0c5a7d4aafcc30c9b6d8613a362567e32e5f5b708dc41bc3a81dac56f8af8bb", + "sha256:d4d63d85eacc6cb37b459b16061e1f100d154bee89dc8d8f9a6128a5a538e92e", + "sha256:da5e7e941d6e71c9c9a717c93725cda0708c2474f532e3680ac5e39ec57d224d", + "sha256:dccad2b3c583f036f43f80ac99ee212c2fa9a45151358d55f13004d095e683b2", + "sha256:df46307d39f2aeaafa1d25309b8a8d11738b73e9861f72d4d0a092528f498baa", + "sha256:e70b5e1cb48828ddd2818f99b1662cb9226dc6f57d07fc75485405c77da17436", + "sha256:ea825562b8cd057cbc9810d496b8b5dec37a1e2fc7b27bc7c1e72ce94462a09a" + ], + "index": "pypi", + "version": "==4.3.1" + }, "markdown": { "hashes": [ "sha256:c00429bd503a47ec88d5e30a751e147dcb4c6889663cd3e2ba0afe858e009baa", diff --git a/docs/build_manpage.py b/docs/build_manpage.py index ab730481..a0ab4edb 100644..100755 --- a/docs/build_manpage.py +++ b/docs/build_manpage.py @@ -1,2 +1,235 @@ #!/usr/bin/env python3 -print("Manpage build not yet supported") +from datetime import date +from io import StringIO +from lxml import etree +import markdown +from markdown.extensions import Extension +import re +import sys +import yaml + +# Prevent our markdown parser from trying to help by interpreting things in angle brackets as HTML tags. +class EscapeHtml(Extension): + def extendMarkdown(self, md, md_globals): + del md.preprocessors['html_block'] + del md.inlinePatterns['html'] + +class RoffWalker(object): + def __init__(self, tree, output=sys.stdout): + self.tree = tree + self.target = output + self.f = StringIO() + + def walk(self): + self._walk(self.tree, parent_tag=None) + # We don't want to start lines with \. because that can confuse man + # For lines that start with \., we need to prefix them with \& so it + # knows not to treat that line as a directive + data = re.sub(r'^\\\.', r'\&.', self.f.getvalue(), flags=re.MULTILINE) + self.target.write(data) + + def _ul_is_special(self, root): + if len(root) != 1: + return False + child = root[0] + if child.tag != 'li': + return False + msg = ''.join(child.itertext()).strip() + return msg.endswith(':') + + def _walk_child(self, root): + if len(root) > 0: + self._walk(root[0], parent_tag=root.tag) + + def _write_element(self, root, ensure_newline=True): + if root.text is not None: + text = self._sanitize(root.text) + self.__write_raw(text) + self._walk_child(root) + self._write_tail(root, ensure_newline=ensure_newline) + + def _write_tail(self, root, ensure_newline=False, inline=False): + if root.tail is not None: + if inline or root.tail != '\n': + text = self._sanitize(root.tail) + if text.endswith('\n'): + ensure_newline = False + self.__write_raw(text) + if ensure_newline: + self.__write_raw('\n') + + def _walk(self, root, parent_tag=None): + last_tag = None + while root is not None: + if root.tag == 'h1': + self.__write_cmd('.TH "JQ" "1" "{}" "" ""'.format(date.today().strftime('%B %Y'))) + self.__write_cmd('.SH "NAME"') + # TODO: properly parse this + self.__write_raw(r'\fBjq\fR \- Command\-line JSON processor' + "\n") + + elif root.tag == 'h2': + self.__write_cmd('.SH "{}"'.format(''.join(root.itertext()).strip())) + + elif root.tag == 'h3': + text = ''.join(root.itertext()).strip() + self.__write_cmd('.SS "{}"'.format(self._h3_sanitize(text))) + + elif root.tag == 'p': + if last_tag not in ['h2', 'h3'] and parent_tag not in ['li']: + self.__write_cmd('.P') + self._write_element(root, ensure_newline=(parent_tag != 'li')) + + elif root.tag == 'ul': + if self._ul_is_special(root): + li = root[0] + self.__write_cmd('.TP') + self._write_element(li) + next = root.getnext() + while next is not None and next.tag == 'p': + if next.getnext() is not None and next.getnext().tag == 'pre': + # we don't want to .IP these, because it'll look funny with the code indent + break + self.__write_cmd('.IP') + self._write_element(next) + root = next + next = root.getnext() + else: + self._walk_child(root) + self._write_tail(root) + # A pre tag after the end of a list doesn't want two of the indentation commands + if root.getnext() is None or root.getnext().tag != 'pre': + self.__write_cmd('.IP "" 0') + + elif root.tag == 'li': + self.__write_cmd(r'.IP "\(bu" 4') + if root.text is not None and root.text.strip() != '': + text = self._sanitize(root.text) + self.__write_raw(text) + self._walk_child(root) + self._write_tail(root, ensure_newline=True) + + elif root.tag == 'strong': + if root.text is not None: + text = self._sanitize(root.text) + self.__write_raw('\\fB{}\\fR'.format(text)) + + self._write_tail(root, inline=True) + + elif root.tag == 'em': + if root.text is not None: + text = self._sanitize(root.text) + self.__write_raw('\\fI{}\\fR'.format(text)) + self._write_tail(root, inline=True) + + elif root.tag == 'code': + if root.text is not None: + text = self._code_sanitize(root.text) + self.__write_raw('\\fB{}\\fR'.format(text)) + self._write_tail(root, inline=True) + + elif root.tag == 'pre': + self.__write_cmd('.IP "" 4') + self.__write_cmd('.nf\n') # extra newline for spacing reasons + next = root + first = True + while next is not None and next.tag == 'pre': + if not first: + self.__write_raw('\n') + text = ''.join(next.itertext(with_tail=False)) + self.__write_raw(self._pre_sanitize(text)) + first = False + root = next + next = next.getnext() + self.__write_cmd('.fi') + self.__write_cmd('.IP "" 0') + + else: + self._walk_child(root) + + last_tag = root.tag + root = root.getnext() + + def _base_sanitize(self, text): + text = re.sub(r'\\', r'\\e', text) + text = re.sub(r'\.', r'\\.', text) + text = re.sub("'", r"\'", text) + text = re.sub('-', r'\-', text) + return text + + def _pre_sanitize(self, text): + return self._base_sanitize(text) + + def _code_sanitize(self, text): + text = self._base_sanitize(text) + text = re.sub(r'\s', ' ', text) + return text + + def _h3_sanitize(self, text): + text = self._base_sanitize(text) + text = re.sub(' \n|\n ', ' ', text) + text = re.sub('\n', ' ', text) + return text + + def _sanitize(self, text): + text = self._base_sanitize(text) + text = re.sub(r'<([^>]+)>', r'\\fI\1\\fR', text) + text = re.sub(r' +', ' ', text) + text = re.sub('\n', ' ', text) + return text + + def __write_cmd(self, dat): + print('.', dat, sep='\n', file=self.f) + pass + + def __write_raw(self, dat): + print(dat, sep='', end='', file=self.f) + pass + +def load_yml_file(fn): + with open(fn) as f: + return yaml.load(f) + +def dedent_body(body): + lines = [re.sub(r'^ (\S)', r'\1', l) for l in body.split('\n')] + return '\n'.join(lines) + +def convert_manual_to_markdown(): + f = StringIO() + manual = load_yml_file("content/manual/manual.yml") + f.write(manual.get('manpage_intro', '\n')) + f.write(dedent_body(manual.get('body', '\n'))) + for section in manual.get('sections', []): + f.write('## {}\n'.format(section.get('title', '').upper())) + f.write(dedent_body(section.get('body', '\n'))) + f.write('\n') + for entry in section.get('entries', []): + f.write('### {}\n'.format(entry.get('title', ''))) + f.write(dedent_body(entry.get('body', '\n'))) + f.write('\n') + if entry.get('examples') is not None: + f.write("~~~~\n") + first = True + for example in entry.get('examples'): + if not first: + f.write('\n') + f.write("jq '{}'\n".format(example.get('program', ''))) + f.write(" {}\n".format(example.get('input', ''))) + output = [str(x) for x in example.get('output', [])] + f.write("=> {}\n".format(', '.join(output))) + first = False + f.write("~~~~\n") + f.write('\n') + f.write(manual.get('manpage_epilogue', '')) + return f.getvalue() + +# Convert manual.yml to our special markdown format +markdown_data = convert_manual_to_markdown() + +# Convert markdown to html +html_data = markdown.markdown(markdown_data, extensions=[EscapeHtml(), 'fenced_code']) + +# Parse the html into a tree so we can walk it +tr = etree.HTML(html_data, etree.HTMLParser()) + +# Convert the markdown to ROFF +RoffWalker(tr).walk() diff --git a/docs/content/manual/manual.yml b/docs/content/manual/manual.yml index a9d2fcd2..ef609a64 100644 --- a/docs/content/manual/manual.yml +++ b/docs/content/manual/manual.yml @@ -172,7 +172,7 @@ sections: ASCII output with every non-ASCII character replaced with the equivalent escape sequence. - * `--unbuffered` + * `--unbuffered`: Flush the output after each JSON object is printed (useful if you're piping a slow data source into jq and piping jq's @@ -658,10 +658,10 @@ sections: - **Strings** are added by being joined into a larger string. - **Objects** are added by merging, that is, inserting all - the key-value pairs from both objects into a single - combined object. If both objects contain a value for the - same key, the object on the right of the `+` wins. (For - recursive merge use the `*` operator.) + the key-value pairs from both objects into a single + combined object. If both objects contain a value for the + same key, the object on the right of the `+` wins. (For + recursive merge use the `*` operator.) `null` can be added to any value, and returns the other value unchanged. @@ -1975,40 +1975,40 @@ sections: jq provides a few SQL-style operators. - * INDEX(stream; index_expression): + * INDEX(stream; index_expression): - This builtin produces an object whose keys are computed by - the given index expression applied to each value from the - given stream. + This builtin produces an object whose keys are computed by + the given index expression applied to each value from the + given stream. - * JOIN($idx; stream; idx_expr; join_expr): + * JOIN($idx; stream; idx_expr; join_expr): - This builtin joins the values from the given stream to the - given index. The index's keys are computed by applying the - given index expression to each value from the given stream. - An array of the value in the stream and the corresponding - value from the index is fed to the given join expression to - produce each result. + This builtin joins the values from the given stream to the + given index. The index's keys are computed by applying the + given index expression to each value from the given stream. + An array of the value in the stream and the corresponding + value from the index is fed to the given join expression to + produce each result. - * JOIN($idx; stream; idx_expr): + * JOIN($idx; stream; idx_expr): - Same as `JOIN($idx; stream; idx_expr; .)`. + Same as `JOIN($idx; stream; idx_expr; .)`. - * JOIN($idx; idx_expr): + * JOIN($idx; idx_expr): - This builtin joins the input `.` to the given index, applying - the given index expression to `.` to compute the index key. - The join operation is as described above. + This builtin joins the input `.` to the given index, applying + the given index expression to `.` to compute the index key. + The join operation is as described above. - * IN(s): + * IN(s): - This builtin outputs `true` if `.` appears in the given - stream, otherwise it outputs `false`. + This builtin outputs `true` if `.` appears in the given + stream, otherwise it outputs `false`. - * IN(source; s): + * IN(source; s): - This builtin outputs `true` if any value in the source stream - appears in the second stream, otherwise it outputs `false`. + This builtin outputs `true` if any value in the source stream + appears in the second stream, otherwise it outputs `false`. - title: "`builtins`" body: | @@ -2643,9 +2643,9 @@ sections: For example, in the following expression there is a binding which is visible "to the right" of it, `... | .*3 as - $times_three | [. + $times_three] | ...`, but not "to the + $times_three | [. + $times_three] | ...`, but not "to the left". Consider this expression now, `... | (.*3 as - $times_three | [.+ $times_three]) | ...`: here the binding + $times_three | [. + $times_three]) | ...`: here the binding `$times_three` is _not_ visible past the closing parenthesis. - title: Reduce @@ -2868,7 +2868,7 @@ sections: Two builtins provide minimal output capabilities, `debug`, and `stderr`. (Recall that a jq program's output values are always - output as JSON texts on `stdout`.) The `debug` builtin can have + output as JSON texts on `stdout`.) The `debug` builtin can have application-specific behavior, such as for executables that use the libjq C API but aren't the jq executable itself. The `stderr` builtin outputs its input in raw mode to stder with no additional |