diff options
-rw-r--r-- | .github/CODEOWNERS | 6 | ||||
-rw-r--r-- | doc/Makefile | 9 | ||||
-rw-r--r-- | doc/build-aux/pandoc-filters/docbook-reader/citerefentry-to-rst-role.lua | 23 | ||||
-rw-r--r-- | doc/build-aux/pandoc-filters/docbook-writer/labelless-link-is-xref.lua (renamed from doc/labelless-link-is-xref.lua) | 10 | ||||
-rw-r--r-- | doc/build-aux/pandoc-filters/docbook-writer/rst-roles.lua | 36 | ||||
-rw-r--r-- | doc/build-aux/pandoc-filters/link-unix-man-references.lua | 18 | ||||
-rw-r--r-- | doc/build-aux/pandoc-filters/myst-reader/roles.lua | 29 | ||||
-rw-r--r-- | doc/build-aux/pandoc-filters/myst-writer/roles.lua | 25 | ||||
-rw-r--r-- | doc/contributing/contributing-to-documentation.chapter.md | 7 | ||||
-rwxr-xr-x | maintainers/scripts/db-to-md.sh | 88 | ||||
-rwxr-xr-x | maintainers/scripts/doc/escape-code-markup.py | 97 | ||||
-rwxr-xr-x | maintainers/scripts/doc/replace-xrefs-by-empty-links.py | 32 | ||||
-rw-r--r-- | maintainers/scripts/doc/unknown-code-language.lua | 12 | ||||
-rwxr-xr-x | nixos/doc/manual/md-to-db.sh | 10 |
14 files changed, 398 insertions, 4 deletions
diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index ec4222a0d4fb..aac92be3a4a0 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -42,6 +42,12 @@ # Nixpkgs build-support /pkgs/build-support/writers @lassulus @Profpatsch +# Nixpkgs documentation +/maintainers/scripts/db-to-md.sh @jtojnar @ryantm +/maintainers/scripts/doc @jtojnar @ryantm +/doc/build-aux/pandoc-filters @jtojnar +/doc/contributing/contributing-to-documentation.chapter.md @jtojnar + # NixOS Internals /nixos/default.nix @nbp @infinisil /nixos/lib/from-env.nix @nbp @infinisil diff --git a/doc/Makefile b/doc/Makefile index 1d3a0e7ccbdf..f8d2d7248fab 100644 --- a/doc/Makefile +++ b/doc/Makefile @@ -3,12 +3,17 @@ MD_TARGETS=$(addsuffix .xml, $(basename $(shell find . -type f -regex '.*\.md$$' PANDOC ?= pandoc pandoc_media_dir = media -# NOTE: Keep in sync with NixOS manual (/nixos/doc/manual/md-to-db.sh). +# NOTE: Keep in sync with NixOS manual (/nixos/doc/manual/md-to-db.sh) and conversion script (/maintainers/scripts/db-to-md.sh). # TODO: Remove raw-attribute when we can get rid of DocBook altogether. pandoc_commonmark_enabled_extensions = +attributes+fenced_divs+footnotes+bracketed_spans+definition_lists+pipe_tables+raw_attribute +# Not needed: +# - docbook-reader/citerefentry-to-rst-role.lua (only relevant for DocBook → MarkDown/rST/MyST) pandoc_flags = --extract-media=$(pandoc_media_dir) \ --lua-filter=$(PANDOC_LUA_FILTERS_DIR)/diagram-generator.lua \ - --lua-filter=labelless-link-is-xref.lua \ + --lua-filter=build-aux/pandoc-filters/myst-reader/roles.lua \ + --lua-filter=build-aux/pandoc-filters/link-unix-man-references.lua \ + --lua-filter=build-aux/pandoc-filters/docbook-writer/rst-roles.lua \ + --lua-filter=build-aux/pandoc-filters/docbook-writer/labelless-link-is-xref.lua \ -f commonmark$(pandoc_commonmark_enabled_extensions)+smart .PHONY: all diff --git a/doc/build-aux/pandoc-filters/docbook-reader/citerefentry-to-rst-role.lua b/doc/build-aux/pandoc-filters/docbook-reader/citerefentry-to-rst-role.lua new file mode 100644 index 000000000000..281e85af2717 --- /dev/null +++ b/doc/build-aux/pandoc-filters/docbook-reader/citerefentry-to-rst-role.lua @@ -0,0 +1,23 @@ +--[[ +Converts Code AST nodes produced by pandoc’s DocBook reader +from citerefentry elements into AST for corresponding role +for reStructuredText. + +We use subset of MyST syntax (CommonMark with features from rST) +so let’s use the rST AST for rST features. + +Reference: https://www.sphinx-doc.org/en/master/usage/restructuredtext/roles.html#role-manpage +]] + +function Code(elem) + elem.classes = elem.classes:map(function (x) + if x == 'citerefentry' then + elem.attributes['role'] = 'manpage' + return 'interpreted-text' + else + return x + end + end) + + return elem +end diff --git a/doc/labelless-link-is-xref.lua b/doc/build-aux/pandoc-filters/docbook-writer/labelless-link-is-xref.lua index 67569b020916..fa97729a28bc 100644 --- a/doc/labelless-link-is-xref.lua +++ b/doc/build-aux/pandoc-filters/docbook-writer/labelless-link-is-xref.lua @@ -1,3 +1,13 @@ +--[[ +Converts Link AST nodes with empty label to DocBook xref elements. + +This is a temporary script to be able use cross-references conveniently +using syntax taken from MyST, while we still use docbook-xsl +for generating the documentation. + +Reference: https://myst-parser.readthedocs.io/en/latest/using/syntax.html#targets-and-cross-referencing +]] + local function starts_with(start, str) return str:sub(1, #start) == start end diff --git a/doc/build-aux/pandoc-filters/docbook-writer/rst-roles.lua b/doc/build-aux/pandoc-filters/docbook-writer/rst-roles.lua new file mode 100644 index 000000000000..92dc6895750f --- /dev/null +++ b/doc/build-aux/pandoc-filters/docbook-writer/rst-roles.lua @@ -0,0 +1,36 @@ +--[[ +Converts AST for reStructuredText roles into corresponding +DocBook elements. + +Currently, only a subset of roles is supported. + +Reference: + List of roles: + https://www.sphinx-doc.org/en/master/usage/restructuredtext/roles.html + manpage: + https://tdg.docbook.org/tdg/5.1/citerefentry.html + file: + https://tdg.docbook.org/tdg/5.1/filename.html +]] + +function Code(elem) + if elem.classes:includes('interpreted-text') then + local tag = nil + local content = elem.text + if elem.attributes['role'] == 'manpage' then + tag = 'citerefentry' + local title, volnum = content:match('^(.+)%((%w+)%)$') + if title == nil then + -- No volnum in parentheses. + title = content + end + content = '<refentrytitle>' .. title .. '</refentrytitle>' .. (volnum ~= nil and ('<manvolnum>' .. volnum .. '</manvolnum>') or '') + elseif elem.attributes['role'] == 'file' then + tag = 'filename' + end + + if tag ~= nil then + return pandoc.RawInline('docbook', '<' .. tag .. '>' .. content .. '</' .. tag .. '>') + end + end +end diff --git a/doc/build-aux/pandoc-filters/link-unix-man-references.lua b/doc/build-aux/pandoc-filters/link-unix-man-references.lua new file mode 100644 index 000000000000..12431f140fed --- /dev/null +++ b/doc/build-aux/pandoc-filters/link-unix-man-references.lua @@ -0,0 +1,18 @@ +--[[ +Turns a manpage reference into a link, when a mapping is defined +in the unix-man-urls.lua file. +]] + +local man_urls = { + ["tmpfiles.d(5)"] = "https://www.freedesktop.org/software/systemd/man/tmpfiles.d.html", + ["nix.conf(5)"] = "https://nixos.org/manual/nix/stable/#sec-conf-file", + ["systemd.time(7)"] = "https://www.freedesktop.org/software/systemd/man/systemd.time.html", + ["systemd.timer(5)"] = "https://www.freedesktop.org/software/systemd/man/systemd.timer.html", +} + +function Code(elem) + local is_man_role = elem.classes:includes('interpreted-text') and elem.attributes['role'] == 'manpage' + if is_man_role and man_urls[elem.text] ~= nil then + return pandoc.Link(elem, man_urls[elem.text]) + end +end diff --git a/doc/build-aux/pandoc-filters/myst-reader/roles.lua b/doc/build-aux/pandoc-filters/myst-reader/roles.lua new file mode 100644 index 000000000000..c33a688eeba7 --- /dev/null +++ b/doc/build-aux/pandoc-filters/myst-reader/roles.lua @@ -0,0 +1,29 @@ +--[[ +Replaces Str AST nodes containing {role}, followed by a Code node +by a Code node with attrs that would be produced by rST reader +from the role syntax. + +This is to emulate MyST syntax in Pandoc. +(MyST is a CommonMark flavour with rST features mixed in.) + +Reference: https://myst-parser.readthedocs.io/en/latest/syntax/syntax.html#roles-an-in-line-extension-point +]] + +function Inlines(inlines) + for i = #inlines-1,1,-1 do + local first = inlines[i] + local second = inlines[i+1] + local correct_tags = first.tag == 'Str' and second.tag == 'Code' + if correct_tags then + -- docutils supports alphanumeric strings separated by [-._:] + -- We are slightly more liberal for simplicity. + local role = first.text:match('^{([-._+:%w]+)}$') + if role ~= nil then + inlines:remove(i) + second.attributes['role'] = role + second.classes:insert('interpreted-text') + end + end + end + return inlines +end diff --git a/doc/build-aux/pandoc-filters/myst-writer/roles.lua b/doc/build-aux/pandoc-filters/myst-writer/roles.lua new file mode 100644 index 000000000000..0136bc550652 --- /dev/null +++ b/doc/build-aux/pandoc-filters/myst-writer/roles.lua @@ -0,0 +1,25 @@ +--[[ +Replaces Code nodes with attrs that would be produced by rST reader +from the role syntax by a Str AST node containing {role}, followed by a Code node. + +This is to emulate MyST syntax in Pandoc. +(MyST is a CommonMark flavour with rST features mixed in.) + +Reference: https://myst-parser.readthedocs.io/en/latest/syntax/syntax.html#roles-an-in-line-extension-point +]] + +function Code(elem) + local role = elem.attributes['role'] + + if elem.classes:includes('interpreted-text') and role ~= nil then + elem.classes = elem.classes:filter(function (c) + return c ~= 'interpreted-text' + end) + elem.attributes['role'] = nil + + return { + pandoc.Str('{' .. role .. '}'), + elem, + } + end +end diff --git a/doc/contributing/contributing-to-documentation.chapter.md b/doc/contributing/contributing-to-documentation.chapter.md index 2f7ae32259c4..178fdb36262b 100644 --- a/doc/contributing/contributing-to-documentation.chapter.md +++ b/doc/contributing/contributing-to-documentation.chapter.md @@ -52,6 +52,13 @@ Additionally, the following syntax extensions are currently used: This syntax is taken from [MyST](https://myst-parser.readthedocs.io/en/latest/using/syntax.html#targets-and-cross-referencing). +- []{#ssec-contributing-markup-inline-roles} + If you want to link to a man page, you can use `` {manpage}`nix.conf(5)` ``, which will turn into {manpage}`nix.conf(5)`. + + The references will turn into links when a mapping exists in {file}`doc/build-aux/pandoc-filters/unix-man-urls.lua`. + + This syntax is taken from [MyST](https://myst-parser.readthedocs.io/en/latest/syntax/syntax.html#roles-an-in-line-extension-point). Though, the feature originates from [reStructuredText](https://www.sphinx-doc.org/en/master/usage/restructuredtext/roles.html#role-manpage) with slightly different syntax. + - []{#ssec-contributing-markup-admonitions} **Admonitions**, set off from the text to bring attention to something. diff --git a/maintainers/scripts/db-to-md.sh b/maintainers/scripts/db-to-md.sh new file mode 100755 index 000000000000..01357d1e2412 --- /dev/null +++ b/maintainers/scripts/db-to-md.sh @@ -0,0 +1,88 @@ +#! /usr/bin/env nix-shell +#! nix-shell -I nixpkgs=. -i bash -p pandoc + +# This script is temporarily needed while we transition the manual to +# CommonMark. It converts DocBook files into our CommonMark flavour. + +debug= +files=() + +while [ "$#" -gt 0 ]; do + i="$1"; shift 1 + case "$i" in + --debug) + debug=1 + ;; + *) + files+=("$i") + ;; + esac +done + +echo "WARNING: This is an experimental script and might not preserve all formatting." > /dev/stderr +echo "Please report any issues you discover." > /dev/stderr + +outExtension="md" +if [[ $debug ]]; then + outExtension="json" +fi + +DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" + +# NOTE: Keep in sync with Nixpkgs manual (/doc/Makefile). +# TODO: Remove raw-attribute when we can get rid of DocBook altogether. +pandoc_commonmark_enabled_extensions=+attributes+fenced_divs+footnotes+bracketed_spans+definition_lists+pipe_tables+raw_attribute +targetLang="commonmark${pandoc_commonmark_enabled_extensions}+smart" +if [[ $debug ]]; then + targetLang=json +fi +pandoc_flags=( + # Not needed: + # - diagram-generator.lua (we do not support that in NixOS manual to limit dependencies) + # - media extraction (was only required for diagram generator) + # - myst-reader/roles.lua (only relevant for MyST → DocBook) + # - link-unix-man-references.lua (links should only be added to display output) + # - docbook-writer/rst-roles.lua (only relevant for → DocBook) + # - docbook-writer/labelless-link-is-xref.lua (only relevant for → DocBook) + "--lua-filter=$DIR/../../doc/build-aux/pandoc-filters/docbook-reader/citerefentry-to-rst-role.lua" + "--lua-filter=$DIR/../../doc/build-aux/pandoc-filters/myst-writer/roles.lua" + "--lua-filter=$DIR/doc/unknown-code-language.lua" + -f docbook + -t "$targetLang" + --tab-stop=2 + --wrap=none +) + +for file in "${files[@]}"; do + if [[ ! -f "$file" ]]; then + echo "db-to-md.sh: $file does not exist" > /dev/stderr + exit 1 + else + rootElement=$(xmllint --xpath 'name(//*)' "$file") + + if [[ $rootElement = chapter ]]; then + extension=".chapter.$outExtension" + elif [[ $rootElement = section ]]; then + extension=".section.$outExtension" + else + echo "db-to-md.sh: $file contains an unsupported root element $rootElement" > /dev/stderr + exit 1 + fi + + outFile="${file%".section.xml"}" + outFile="${outFile%".chapter.xml"}" + outFile="${outFile%".xml"}$extension" + temp1=$(mktemp) + $DIR/doc/escape-code-markup.py "$file" "$temp1" + if [[ $debug ]]; then + echo "Converted $file to $temp1" > /dev/stderr + fi + temp2=$(mktemp) + $DIR/doc/replace-xrefs-by-empty-links.py "$temp1" "$temp2" + if [[ $debug ]]; then + echo "Converted $temp1 to $temp2" > /dev/stderr + fi + pandoc "$temp2" -o "$outFile" "${pandoc_flags[@]}" + echo "Converted $file to $outFile" > /dev/stderr + fi +done diff --git a/maintainers/scripts/doc/escape-code-markup.py b/maintainers/scripts/doc/escape-code-markup.py new file mode 100755 index 000000000000..015435b698e6 --- /dev/null +++ b/maintainers/scripts/doc/escape-code-markup.py @@ -0,0 +1,97 @@ +#! /usr/bin/env nix-shell +#! nix-shell -I nixpkgs=channel:nixos-unstable -i python3 -p python3 -p python3.pkgs.lxml + +""" +Pandoc will strip any markup within code elements so +let’s escape them so that they can be handled manually. +""" + +import lxml.etree as ET +import re +import sys + +def replace_element_by_text(el: ET.Element, text: str) -> None: + """ + Author: bernulf + Source: https://stackoverflow.com/a/10520552/160386 + SPDX-License-Identifier: CC-BY-SA-3.0 + """ + text = text + (el.tail or "") + parent = el.getparent() + if parent is not None: + previous = el.getprevious() + if previous is not None: + previous.tail = (previous.tail or "") + text + else: + parent.text = (parent.text or "") + text + parent.remove(el) + +DOCBOOK_NS = "http://docbook.org/ns/docbook" + +# List of elements that pandoc’s DocBook reader strips markup from. +# https://github.com/jgm/pandoc/blob/master/src/Text/Pandoc/Readers/DocBook.hs +code_elements = [ + # CodeBlock + "literallayout", + "screen", + "programlisting", + # Code (inline) + "classname", + "code", + "filename", + "envar", + "literal", + "computeroutput", + "prompt", + "parameter", + "option", + "markup", + "wordasword", + "command", + "varname", + "function", + "type", + "symbol", + "constant", + "userinput", + "systemitem", +] + +XMLNS_REGEX = re.compile(r'\s+xmlns(?::[^=]+)?="[^"]*"') +ROOT_ELEMENT_REGEX = re.compile(r'^\s*<[^>]+>') + +def remove_xmlns(match: re.Match) -> str: + """ + Removes xmlns attributes. + + Expects a match containing an opening tag. + """ + return XMLNS_REGEX.sub('', match.group(0)) + +if __name__ == '__main__': + assert len(sys.argv) >= 3, "usage: escape-code-markup.py <input> <output>" + + tree = ET.parse(sys.argv[1]) + name_predicate = " or ".join([f"local-name()='{el}'" for el in code_elements]) + + for markup in tree.xpath(f"//*[({name_predicate}) and namespace-uri()='{DOCBOOK_NS}']/*"): + text = ET.tostring(markup, encoding=str) + + # tostring adds xmlns attributes to the element we want to stringify + # as if it was supposed to be usable standalone. + # We are just converting it to CDATA so we do not care. + # Let’s strip the namespace declarations to keep the code clean. + # + # Note that this removes even namespaces that were potentially + # in the original file. Though, that should be very rare – + # most of the time, we will stringify empty DocBook elements + # like <xref> or <co> or, at worst, <link> with xlink:href attribute. + # + # Also note that the regex expects the root element to be first + # thing in the string. But that should be fine, the tostring method + # does not produce XML declaration or doctype by default. + text = ROOT_ELEMENT_REGEX.sub(remove_xmlns, text) + + replace_element_by_text(markup, text) + + tree.write(sys.argv[2]) diff --git a/maintainers/scripts/doc/replace-xrefs-by-empty-links.py b/maintainers/scripts/doc/replace-xrefs-by-empty-links.py new file mode 100755 index 000000000000..2006ef897f7a --- /dev/null +++ b/maintainers/scripts/doc/replace-xrefs-by-empty-links.py @@ -0,0 +1,32 @@ +#! /usr/bin/env nix-shell +#! nix-shell -I nixpkgs=channel:nixos-unstable -i python3 -p python3 -p python3.pkgs.lxml + +""" +Pandoc will try to resolve xrefs and replace them with regular links. +let’s replace them with links with empty labels which MyST +and our pandoc filters recognize as cross-references. +""" + +import lxml.etree as ET +import sys + +XLINK_NS = "http://www.w3.org/1999/xlink" + +ns = { + "db": "http://docbook.org/ns/docbook", +} + + +if __name__ == '__main__': + assert len(sys.argv) >= 3, "usage: replace-xrefs-by-empty-links.py <input> <output>" + + tree = ET.parse(sys.argv[1]) + for xref in tree.findall(".//db:xref", ns): + text = ET.tostring(xref, encoding=str) + parent = xref.getparent() + link = parent.makeelement('link') + target_name = xref.get("linkend") + link.set(f"{{{XLINK_NS}}}href", f"#{target_name}") + parent.replace(xref, link) + + tree.write(sys.argv[2]) diff --git a/maintainers/scripts/doc/unknown-code-language.lua b/maintainers/scripts/doc/unknown-code-language.lua new file mode 100644 index 000000000000..85d8df4690ba --- /dev/null +++ b/maintainers/scripts/doc/unknown-code-language.lua @@ -0,0 +1,12 @@ +--[[ +Adds “unknown” class to CodeBlock AST nodes without any classes. + +This will cause Pandoc to use fenced code block, which we prefer. +]] + +function CodeBlock(elem) + if #elem.classes == 0 then + elem.classes:insert('unknown') + return elem + end +end diff --git a/nixos/doc/manual/md-to-db.sh b/nixos/doc/manual/md-to-db.sh index c7a328b2295d..6dd4b8c6e419 100755 --- a/nixos/doc/manual/md-to-db.sh +++ b/nixos/doc/manual/md-to-db.sh @@ -12,8 +12,14 @@ pushd $DIR # TODO: Remove raw-attribute when we can get rid of DocBook altogether. pandoc_commonmark_enabled_extensions=+attributes+fenced_divs+footnotes+bracketed_spans+definition_lists+pipe_tables+raw_attribute pandoc_flags=( - # media extraction and diagram-generator.lua not needed - "--lua-filter=$DIR/../../../doc/labelless-link-is-xref.lua" + # Not needed: + # - diagram-generator.lua (we do not support that in NixOS manual to limit dependencies) + # - media extraction (was only required for diagram generator) + # - docbook-reader/citerefentry-to-rst-role.lua (only relevant for DocBook → MarkDown/rST/MyST) + "--lua-filter=$DIR/../../../doc/build-aux/pandoc-filters/myst-reader/roles.lua" + "--lua-filter=$DIR/../../../doc/build-aux/pandoc-filters/link-unix-man-references.lua" + "--lua-filter=$DIR/../../../doc/build-aux/pandoc-filters/docbook-writer/rst-roles.lua" + "--lua-filter=$DIR/../../../doc/build-aux/pandoc-filters/docbook-writer/labelless-link-is-xref.lua" -f "commonmark${pandoc_commonmark_enabled_extensions}+smart" -t docbook ) |