summaryrefslogtreecommitdiffstats
path: root/contrib
diff options
context:
space:
mode:
authorKevin McCarthy <kevin@8t8.us>2019-11-09 13:43:11 -0800
committerKevin McCarthy <kevin@8t8.us>2019-11-09 13:43:11 -0800
commit183c468721b3d404e2505d4b27394f41ad6dfdaa (patch)
tree3aaafb9dbbe422a8de786aa9bde0a4f0c72954a6 /contrib
parent202ec2c8a4bc8ddc7a3ecafbe82af6b167fc6ae2 (diff)
Add markdown2html contrib script.
This was contributed by martin f. kraft, to give an example script for the new $send_multipart_alternative_filter funtionality.
Diffstat (limited to 'contrib')
-rw-r--r--contrib/Makefile.am2
-rw-r--r--contrib/markdown2html309
2 files changed, 310 insertions, 1 deletions
diff --git a/contrib/Makefile.am b/contrib/Makefile.am
index 4d25f42e..973aed23 100644
--- a/contrib/Makefile.am
+++ b/contrib/Makefile.am
@@ -6,7 +6,7 @@ SAMPLES = Mush.rc Pine.rc gpg.rc pgp2.rc pgp5.rc pgp6.rc Tin.rc \
sample.mailcap sample.muttrc sample.muttrc-sidebar sample.muttrc-tlr \
sample.muttrc-compress sample.muttrc-starter \
sample.vimrc-sidebar colors.default colors.linux smime.rc \
- ca-bundle.crt smime_keys_test.pl mutt_xtitle
+ ca-bundle.crt smime_keys_test.pl mutt_xtitle markdown2html
EXTRA_DIST = language.txt language50.txt \
patch.slang-1.2.2.keypad.1 \
diff --git a/contrib/markdown2html b/contrib/markdown2html
new file mode 100644
index 00000000..be705d65
--- /dev/null
+++ b/contrib/markdown2html
@@ -0,0 +1,309 @@
+#!/usr/bin/python3
+#
+# markdown2html.py — simple Markdown-to-HTML converter for use with Mutt
+#
+# Mutt recently learnt [how to compose `multipart/alternative`
+# emails][1]. This script assumes a message has been composed using Markdown
+# (with a lot of pandoc extensions enabled), and translates it to `text/html`
+# for Mutt to tie into such a `multipart/alternative` message.
+#
+# [1]: https://gitlab.com/muttmua/mutt/commit/0e566a03725b4ad789aa6ac1d17cdf7bf4e7e354)
+#
+# Configuration:
+# muttrc:
+# set send_multipart_alternative=yes
+# set send_multipart_alternative_filter=/path/to/markdown2html.py
+#
+# Optionally, Custom CSS styles will be read from `~/.mutt/markdown2html.css`,
+# if present.
+#
+# Requirements:
+# - python3
+# - PyPandoc (and pandoc installed, or downloaded)
+# - Pynliner
+#
+# Optional:
+# - Pygments, if installed, then syntax highlighting is enabled
+#
+# Latest version:
+# https://git.madduck.net/etc/mutt.git/blob_plain/HEAD:/.mutt/markdown2html
+#
+# Copyright © 2019 martin f. krafft <madduck@madduck.net>
+# Released under the GPL-2+ licence, just like Mutt itself.
+#
+
+import pypandoc
+import pynliner
+import re
+import os
+import sys
+
+try:
+ from pygments.formatters import get_formatter_by_name
+ formatter = get_formatter_by_name('html', style='default')
+ DEFAULT_CSS = formatter.get_style_defs('.sourceCode')
+
+except ImportError:
+ DEFAULT_CSS = ""
+
+
+DEFAULT_CSS += '''
+.quote {
+ padding: 0 0.5em;
+ margin: 0;
+ font-style: italic;
+ border-left: 2px solid #ccc;
+ color: #999;
+ font-size: 80%;
+}
+.quotelead {
+ font-style: italic;
+ margin-bottom: -1em;
+ color: #999;
+ font-size: 80%;
+}
+.quotechar { display: none; }
+.footnote-ref, .footnote-back { text-decoration: none;}
+.signature {
+ color: #999;
+ font-family: monospace;
+ white-space: pre;
+ margin: 1em 0 0 0;
+ font-size: 80%;
+}
+table, th, td {
+ border-collapse: collapse;
+ border: 1px solid #999;
+}
+th, td { padding: 0.5em; }
+.header {
+ background: #eee;
+}
+.even { background: #eee; }
+'''
+
+STYLESHEET = os.path.join(os.path.expanduser('~/.mutt'),
+ 'markdown2html.css')
+if os.path.exists(STYLESHEET):
+ DEFAULT_CSS += open(STYLESHEET).read()
+
+HTML_DOCUMENT = '''<!DOCTYPE html>
+<html><head>
+<meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>
+<meta charset="utf-8"/>
+<meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes"/>
+<title>HTML E-Mail</title>
+</head><body class="email">
+{htmlbody}
+</body></html>'''
+
+
+SIGNATURE_HTML = \
+ '<div class="signature"><span class="leader">-- </span>{sig}</div>'
+
+
+def _preprocess_markdown(mdwn):
+ '''
+ Preprocess Markdown for handling by the converter.
+ '''
+ # convert hard line breaks within paragraphs to 2 trailing spaces, which
+ # is the markdown way of representing hard line breaks. Note how the
+ # regexp will not match between paragraphs.
+ ret = re.sub(r'(\S)\n(\s*\S)', r'\g<1> \n\g<2>', mdwn, flags=re.MULTILINE)
+
+ # Clients like Thunderbird need the leading '>' to be able to properly
+ # create nested quotes, so we duplicate the symbol, the first instance
+ # will tell pandoc to create a blockquote, while the second instance will
+ # be a <span> containing the character, along with a class that causes CSS
+ # to actually hide it from display. However, this does not work with the
+ # text-mode HTML2text converters, and so it's left commented for now.
+ #ret = re.sub(r'\n>', r' \n>[>]{.quotechar}', ret, flags=re.MULTILINE)
+
+ return ret
+
+
+def _identify_quotes_for_later(mdwn):
+ '''
+ Email quoting such as:
+
+ ```
+ On 1970-01-01, you said:
+ > The Flat Earth Society has members all around the globe.
+ ```
+
+ isn't really properly handled by Markdown, so let's do our best to
+ identify the individual elements, and mark them, using a syntax similar to
+ what pandoc uses already in some cases. As pandoc won't actually use these
+ data (yet?), we call `self._reformat_quotes` later to use these markers
+ to slap the appropriate classes on the HTML tags.
+ '''
+
+ def generate_lines_with_context(mdwn):
+ '''
+ Iterates the input string line-wise, returning a triplet of
+ previous, current, and next line, the first and last of which
+ will be None on the first and last line of the input data
+ respectively.
+ '''
+ prev = cur = nxt = None
+ lines = iter(mdwn.splitlines())
+ cur = next(lines)
+ for nxt in lines:
+ yield prev, cur, nxt
+ prev = cur
+ cur = nxt
+ yield prev, cur, None
+
+ ret = []
+ for prev, cur, nxt in generate_lines_with_context(mdwn):
+
+ # The lead-in to a quote is a single line immediately preceding the
+ # quote, and ending with ':'. Note that there could be multiple of
+ # these:
+ if re.match(r'^.+:\s*$', cur) and nxt.startswith('>'):
+ ret.append(f'{{.quotelead}}{cur.strip()}')
+ # pandoc needs an empty line before the blockquote, so
+ # we enter one for the purpose of HTML rendition:
+ ret.append('')
+ continue
+
+ # The first blockquote after such a lead-in gets marked as the
+ # "initial" quote:
+ elif prev and re.match(r'^.+:\s*$', prev) and cur.startswith('>'):
+ ret.append(re.sub(r'^(\s*>\s*)+(.+)',
+ r'\g<1>{.quoteinitial}\g<2>',
+ cur, flags=re.MULTILINE))
+
+ # All other occurrences of blockquotes get the "subsequent" marker:
+ elif cur.startswith('>') and prev and not prev.startswith('>'):
+ ret.append(re.sub(r'^((?:\s*>\s*)+)(.+)',
+ r'\g<1>{.quotesubsequent}\g<2>',
+ cur, flags=re.MULTILINE))
+
+ else: # pass through everything else.
+ ret.append(cur)
+
+ return '\n'.join(ret)
+
+
+def _reformat_quotes(html):
+ '''
+ Earlier in the pipeline, we marked email quoting, using markers, which we
+ now need to turn into HTML classes, so that we can use CSS to style them.
+ '''
+ ret = html.replace('<p>{.quotelead}', '<p class="quotelead">')
+ ret = re.sub(r'<blockquote>\n((?:<blockquote>\n)*)<p>(?:\{\.quote(\w+)\})',
+ r'<blockquote class="quote \g<2>">\n\g<1><p>', ret, flags=re.MULTILINE)
+ return ret
+
+
+
+def _convert_with_pandoc(mdwn, inputfmt='markdown', outputfmt='html5',
+ ext_enabled=None, ext_disabled=None,
+ standalone=True, title="HTML E-Mail"):
+ '''
+ Invoke pandoc to do the actual conversion of Markdown to HTML5.
+ '''
+ if not ext_enabled:
+ ext_enabled = [ 'backtick_code_blocks',
+ 'line_blocks',
+ 'fancy_lists',
+ 'startnum',
+ 'definition_lists',
+ 'example_lists',
+ 'table_captions',
+ 'simple_tables',
+ 'multiline_tables',
+ 'grid_tables',
+ 'pipe_tables',
+ 'all_symbols_escapable',
+ 'intraword_underscores',
+ 'strikeout',
+ 'superscript',
+ 'subscript',
+ 'fenced_divs',
+ 'bracketed_spans',
+ 'footnotes',
+ 'inline_notes',
+ 'emoji',
+ 'tex_math_double_backslash',
+ 'autolink_bare_uris'
+ ]
+ if not ext_disabled:
+ ext_disabled = [ 'tex_math_single_backslash',
+ 'tex_math_dollars',
+ 'smart',
+ 'raw_html'
+ ]
+
+ enabled = '+'.join(ext_enabled)
+ disabled = '-'.join(ext_disabled)
+ inputfmt = f'{inputfmt}+{enabled}-{disabled}'
+
+ args = []
+ if standalone:
+ args.append('--standalone')
+ if title:
+ args.append(f'--metadata=pagetitle:"{title}"')
+
+ return pypandoc.convert_text(mdwn, format=inputfmt, to=outputfmt,
+ extra_args=args)
+
+
+def _apply_styling(html):
+ '''
+ Inline all styles defined and used into the individual HTML tags.
+ '''
+ return pynliner.Pynliner().from_string(html).with_cssString(DEFAULT_CSS).run()
+
+
+def _postprocess_html(html):
+ '''
+ Postprocess the generated and styled HTML.
+ '''
+ return html
+
+
+def convert_markdown_to_html(mdwn):
+ '''
+ Converts the input Markdown to HTML, handling separately the body, as well
+ as an optional signature.
+ '''
+ parts = re.split(r'^-- $', mdwn, 1, flags=re.MULTILINE)
+ body = parts[0]
+ if len(parts) == 2:
+ sig = parts[1]
+ else:
+ sig = None
+
+ html=''
+ if body:
+ body = _preprocess_markdown(body)
+ body = _identify_quotes_for_later(body)
+ html = _convert_with_pandoc(body, standalone=False)
+ html = _reformat_quotes(html)
+
+ if sig:
+ sig = _preprocess_markdown(sig)
+ html += SIGNATURE_HTML.format(sig='<br/>'.join(sig.splitlines()))
+
+ html = HTML_DOCUMENT.format(htmlbody=html)
+ html = _apply_styling(html)
+ html = _postprocess_html(html)
+
+ return html
+
+
+def main():
+ '''
+ Convert text on stdin to HTML, and print it to stdout, like mutt would
+ expect.
+ '''
+ html = convert_markdown_to_html(sys.stdin.read())
+ if html:
+ # mutt expects the content type in the first line, so:
+ print(f'text/html\n\n{html}')
+
+
+if __name__ == '__main__':
+ main()