1 files changed, 263 insertions, 0 deletions
diff --git a/gitsrht/annotations.py b/gitsrht/annotations.py
new file mode 100644
index 0000000..b9508fd
--- /dev/null
+++ b/gitsrht/annotations.py
@@ -0,0 +1,263 @@
+from pygments.formatter import Formatter
+from pygments.token import Token, STANDARD_TYPES
+from pygments.util import string_types, iteritems
+from srht.markdown import markdown
+from urllib.parse import urlparse
+
+_escape_html_table = {
+    ord('&'): u'&amp;',
+    ord('<'): u'&lt;',
+    ord('>'): u'&gt;',
+    ord('"'): u'&quot;',
+    ord("'"): u'&#39;',
+}
+
+def escape_html(text, table=_escape_html_table):
+    return text.translate(table)
+
+def _get_ttype_class(ttype):
+    fname = STANDARD_TYPES.get(ttype)
+    if fname:
+        return fname
+    aname = ''
+    while fname is None:
+        aname = '-' + ttype[-1] + aname
+        ttype = ttype.parent
+        fname = STANDARD_TYPES.get(ttype)
+    return fname + aname
+
+# Fork of the pygments HtmlFormatter (BSD licensed)
+# The main difference is that it relies on AnnotatedFormatter to escape the
+# HTML tags in the source. Other features we don't use are removed to keep it
+# slim.
+class _BaseFormatter(Formatter):
+    def __init__(self):
+        super().__init__()
+        self._create_stylesheet()
+
+    def get_style_defs(self, arg=None):
+        """
+        Return CSS style definitions for the classes produced by the current
+        highlighting style. ``arg`` can be a string or list of selectors to
+        insert before the token type classes.
+        """
+        if arg is None:
+            arg = ".highlight"
+        if isinstance(arg, string_types):
+            args = [arg]
+        else:
+            args = list(arg)
+
+        def prefix(cls):
+            if cls:
+                cls = '.' + cls
+            tmp = []
+            for arg in args:
+                tmp.append((arg and arg + ' ' or '') + cls)
+            return ', '.join(tmp)
+
+        styles = [(level, ttype, cls, style)
+                  for cls, (style, ttype, level) in iteritems(self.class2style)
+                  if cls and style]
+        styles.sort()
+        lines = ['%s { %s } /* %s */' % (prefix(cls), style, repr(ttype)[6:])
+                 for (level, ttype, cls, style) in styles]
+        return '\n'.join(lines)
+
+    def _get_css_class(self, ttype):
+        """Return the css class of this token type prefixed with
+        the classprefix option."""
+        ttypeclass = _get_ttype_class(ttype)
+        if ttypeclass:
+            return ttypeclass
+        return ''
+
+    def _get_css_classes(self, ttype):
+        """Return the css classes of this token type prefixed with
+        the classprefix option."""
+        cls = self._get_css_class(ttype)
+        while ttype not in STANDARD_TYPES:
+            ttype = ttype.parent
+            cls = self._get_css_class(ttype) + ' ' + cls
+        return cls
+
+    def _create_stylesheet(self):
+        t2c = self.ttype2class = {Token: ''}
+        c2s = self.class2style = {}
+        for ttype, ndef in self.style:
+            name = self._get_css_class(ttype)
+            style = ''
+            if ndef['color']:
+                style += 'color: #%s; ' % ndef['color']
+            if ndef['bold']:
+                style += 'font-weight: bold; '
+            if ndef['italic']:
+                style += 'font-style: italic; '
+            if ndef['underline']:
+                style += 'text-decoration: underline; '
+            if ndef['bgcolor']:
+                style += 'background-color: #%s; ' % ndef['bgcolor']
+            if ndef['border']:
+                style += 'border: 1px solid #%s; ' % ndef['border']
+            if style:
+                t2c[ttype] = name
+                # save len(ttype) to enable ordering the styles by
+                # hierarchy (necessary for CSS cascading rules!)
+                c2s[name] = (style[:-2], ttype, len(ttype))
+
+    def _format_lines(self, tokensource):
+        lsep = "\n"
+        # for <span style=""> lookup only
+        getcls = self.ttype2class.get
+        c2s = self.class2style
+
+        lspan = ''
+        line = []
+        for ttype, value in tokensource:
+            cls = self._get_css_classes(ttype)
+            cspan = cls and '<span class="%s">' % cls or ''
+
+            parts = value.split('\n')
+
+            # for all but the last line
+            for part in parts[:-1]:
+                if line:
+                    if lspan != cspan:
+                        line.extend(((lspan and '</span>'), cspan, part,
+                                     (cspan and '</span>'), lsep))
+                    else:  # both are the same
+                        line.extend((part, (lspan and '</span>'), lsep))
+                    yield 1, ''.join(line)
+                    line = []
+                elif part:
+                    yield 1, ''.join((cspan, part, (cspan and '</span>'), lsep))
+                else:
+                    yield 1, lsep
+            # for the last line
+            if line and parts[-1]:
+                if lspan != cspan:
+                    line.extend(((lspan and '</span>'), cspan, parts[-1]))
+                    lspan = cspan
+                else:
+                    line.append(parts[-1])
+            elif parts[-1]:
+                line = [cspan, parts[-1]]
+                lspan = cspan
+            # else we neither have to open a new span nor set lspan
+
+        if line:
+            line.extend(((lspan and '</span>'), lsep))
+            yield 1, ''.join(line)
+
+    def _wrap_div(self, inner):
+        yield 0, f"<div class='highlight'>"
+        for tup in inner:
+            yield tup
+        yield 0, '</div>\n'
+
+    def _wrap_pre(self, inner):
+        yield 0, '<pre><span></span>'
+        for tup in inner:
+            yield tup
+        yield 0, '</pre>'
+
+    def wrap(self, source, outfile):
+        """
+        Wrap the ``source``, which is a generator yielding
+        individual lines, in custom generators. See docstring
+        for `format`. Can be overridden.
+        """
+        return self._wrap_div(self._wrap_pre(source))
+
+    def format_unencoded(self, tokensource, outfile):
+        source = self._format_lines(tokensource)
+        source = self.wrap(source, outfile)
+        for t, piece in source:
+            outfile.write(piece)
+
+def validate_annotation(valid, anno):
+    valid.expect("type" in anno, "'type' is required")
+    if not valid.ok:
+        return
+    valid.expect(anno["type"] in ["link", "markdown"],
+            f"'{anno['type']} is not a valid annotation type'")
+    if anno["type"] == "link":
+        for field in ["lineno", "colno", "len"]:
+            valid.expect(field in anno, "f'{field}' is required")
+            valid.expect(field not in anno or isinstance(anno[field], int),
+                    "f'{field}' must be an integer")
+        valid.expect("to" in anno, "'to' is required")
+        valid.expect("title" not in anno or isinstance(anno["title"], str),
+                "'title' must be a string")
+    elif anno["type"] == "markdown":
+        for field in ["lineno"]:
+            valid.expect(field in anno, "f'{field}' is required")
+            valid.expect(field not in anno or isinstance(anno[field], int),
+                    "f'{field}' must be an integer")
+        for field in ["title", "content"]:
+            valid.expect(field in anno, "f'{field}' is required")
+            valid.expect(field not in anno or isinstance(anno[field], str),
+                    "f'{field}' must be a string")
+
+class AnnotatedFormatter(_BaseFormatter):
+    def __init__(self, annos, link_prefix):
+        super().__init__()
+        self.annos = dict()
+        self.link_prefix = link_prefix
+        for anno in (annos or list()):
+            lineno = int(anno["lineno"])
+            self.annos.setdefault(lineno, list())
+            self.annos[lineno].append(anno)
+            self.annos[lineno] = sorted(self.annos[lineno],
+                    key=lambda anno: anno.get("from", -1))
+
+    def _annotate_token(self, token, colno, annos):
+        # TODO: Extend this to support >1 anno per token
+        for anno in annos:
+            if anno["type"] == "link":
+                start = anno["colno"] - 1
+                end = anno["colno"] + anno["len"] - 1
+                target = anno["to"]
+                title = anno.get("title", "")
+                url = urlparse(target)
+                if url.scheme == "":
+                    target = self.link_prefix + "/" + target
+                if start <= colno < end:
+                    return (f"<a class='annotation' title='{title}' " +
+                        f"href='{escape_html(target)}' " +
+                        f"rel='nofollow noopener' " +
+                        f">{escape_html(token)}</a>""")
+            elif anno["type"] == "markdown":
+                if "\n" not in token:
+                    continue
+                title = anno["title"]
+                content = anno["content"]
+                content = markdown(content, baselevel=6,
+                        link_prefix=self.link_prefix)
+                annotation = f"<details><summary>{title}</summary>{content}</details>\n"
+                token = escape_html(token).replace("\n", annotation, 1)
+                return token
+            # Other types?
+        return escape_html(token)
+
+    def _wrap_source(self, source):
+        lineno = 0
+        colno = 0
+        for ttype, token in source:
+            parts = token.splitlines(True)
+            _lineno = lineno
+            for part in parts:
+                annos = self.annos.get(_lineno + 1, [])
+                if any(annos):
+                    yield ttype, self._annotate_token(part, colno, annos)
+                else:
+                    yield ttype, escape_html(part)
+                _lineno += 1
+            if "\n" in token:
+                lineno += sum(1 if c == "\n" else 0 for c in token)
+                colno = len(token[token.rindex("\n")+1:])
+            else:
+                colno += len(token)
+
+    def _format_lines(self, source):
+        yield from super()._format_lines(self._wrap_source(source))