summaryrefslogtreecommitdiffstats
path: root/maintainers/scripts/doc
diff options
context:
space:
mode:
Diffstat (limited to 'maintainers/scripts/doc')
-rwxr-xr-xmaintainers/scripts/doc/escape-code-markup.py97
-rwxr-xr-xmaintainers/scripts/doc/replace-xrefs-by-empty-links.py32
-rw-r--r--maintainers/scripts/doc/unknown-code-language.lua12
3 files changed, 141 insertions, 0 deletions
diff --git a/maintainers/scripts/doc/escape-code-markup.py b/maintainers/scripts/doc/escape-code-markup.py
new file mode 100755
index 000000000000..015435b698e6
--- /dev/null
+++ b/maintainers/scripts/doc/escape-code-markup.py
@@ -0,0 +1,97 @@
+#! /usr/bin/env nix-shell
+#! nix-shell -I nixpkgs=channel:nixos-unstable -i python3 -p python3 -p python3.pkgs.lxml
+
+"""
+Pandoc will strip any markup within code elements so
+let’s escape them so that they can be handled manually.
+"""
+
+import lxml.etree as ET
+import re
+import sys
+
+def replace_element_by_text(el: ET.Element, text: str) -> None:
+ """
+ Author: bernulf
+ Source: https://stackoverflow.com/a/10520552/160386
+ SPDX-License-Identifier: CC-BY-SA-3.0
+ """
+ text = text + (el.tail or "")
+ parent = el.getparent()
+ if parent is not None:
+ previous = el.getprevious()
+ if previous is not None:
+ previous.tail = (previous.tail or "") + text
+ else:
+ parent.text = (parent.text or "") + text
+ parent.remove(el)
+
+DOCBOOK_NS = "http://docbook.org/ns/docbook"
+
+# List of elements that pandoc’s DocBook reader strips markup from.
+# https://github.com/jgm/pandoc/blob/master/src/Text/Pandoc/Readers/DocBook.hs
+code_elements = [
+ # CodeBlock
+ "literallayout",
+ "screen",
+ "programlisting",
+ # Code (inline)
+ "classname",
+ "code",
+ "filename",
+ "envar",
+ "literal",
+ "computeroutput",
+ "prompt",
+ "parameter",
+ "option",
+ "markup",
+ "wordasword",
+ "command",
+ "varname",
+ "function",
+ "type",
+ "symbol",
+ "constant",
+ "userinput",
+ "systemitem",
+]
+
+XMLNS_REGEX = re.compile(r'\s+xmlns(?::[^=]+)?="[^"]*"')
+ROOT_ELEMENT_REGEX = re.compile(r'^\s*<[^>]+>')
+
+def remove_xmlns(match: re.Match) -> str:
+ """
+ Removes xmlns attributes.
+
+ Expects a match containing an opening tag.
+ """
+ return XMLNS_REGEX.sub('', match.group(0))
+
+if __name__ == '__main__':
+ assert len(sys.argv) >= 3, "usage: escape-code-markup.py <input> <output>"
+
+ tree = ET.parse(sys.argv[1])
+ name_predicate = " or ".join([f"local-name()='{el}'" for el in code_elements])
+
+ for markup in tree.xpath(f"//*[({name_predicate}) and namespace-uri()='{DOCBOOK_NS}']/*"):
+ text = ET.tostring(markup, encoding=str)
+
+ # tostring adds xmlns attributes to the element we want to stringify
+ # as if it was supposed to be usable standalone.
+ # We are just converting it to CDATA so we do not care.
+ # Let’s strip the namespace declarations to keep the code clean.
+ #
+ # Note that this removes even namespaces that were potentially
+ # in the original file. Though, that should be very rare –
+ # most of the time, we will stringify empty DocBook elements
+ # like <xref> or <co> or, at worst, <link> with xlink:href attribute.
+ #
+ # Also note that the regex expects the root element to be first
+ # thing in the string. But that should be fine, the tostring method
+ # does not produce XML declaration or doctype by default.
+ text = ROOT_ELEMENT_REGEX.sub(remove_xmlns, text)
+
+ replace_element_by_text(markup, text)
+
+ tree.write(sys.argv[2])
diff --git a/maintainers/scripts/doc/replace-xrefs-by-empty-links.py b/maintainers/scripts/doc/replace-xrefs-by-empty-links.py
new file mode 100755
index 000000000000..2006ef897f7a
--- /dev/null
+++ b/maintainers/scripts/doc/replace-xrefs-by-empty-links.py
@@ -0,0 +1,32 @@
+#! /usr/bin/env nix-shell
+#! nix-shell -I nixpkgs=channel:nixos-unstable -i python3 -p python3 -p python3.pkgs.lxml
+
+"""
+Pandoc will try to resolve xrefs and replace them with regular links.
+let’s replace them with links with empty labels which MyST
+and our pandoc filters recognize as cross-references.
+"""
+
+import lxml.etree as ET
+import sys
+
+XLINK_NS = "http://www.w3.org/1999/xlink"
+
+ns = {
+ "db": "http://docbook.org/ns/docbook",
+}
+
+
+if __name__ == '__main__':
+ assert len(sys.argv) >= 3, "usage: replace-xrefs-by-empty-links.py <input> <output>"
+
+ tree = ET.parse(sys.argv[1])
+ for xref in tree.findall(".//db:xref", ns):
+ text = ET.tostring(xref, encoding=str)
+ parent = xref.getparent()
+ link = parent.makeelement('link')
+ target_name = xref.get("linkend")
+ link.set(f"{{{XLINK_NS}}}href", f"#{target_name}")
+ parent.replace(xref, link)
+
+ tree.write(sys.argv[2])
diff --git a/maintainers/scripts/doc/unknown-code-language.lua b/maintainers/scripts/doc/unknown-code-language.lua
new file mode 100644
index 000000000000..85d8df4690ba
--- /dev/null
+++ b/maintainers/scripts/doc/unknown-code-language.lua
@@ -0,0 +1,12 @@
+--[[
+Adds “unknown” class to CodeBlock AST nodes without any classes.
+
+This will cause Pandoc to use fenced code block, which we prefer.
+]]
+
+function CodeBlock(elem)
+ if #elem.classes == 0 then
+ elem.classes:insert('unknown')
+ return elem
+ end
+end