fix: better code fence handling

author: Tom Milligan <tom.milligan@uipath.com> 2023-04-23 12:19:05 +0100
committer: Tom Milligan <tom.milligan@uipath.com> 2023-04-23 12:29:54 +0100
commit: b3e82df34eef7345f55a7cfae04fc2c2e6c9478c (patch)
tree: aa859df14b380b8fe2c64e538a751bf1b5a8433f
parent: e8813eb104734881f7aa313d09178510da42c9ee (diff)
4 files changed, 159 insertions, 25 deletions
diff --git a/integration/expected/chapter_1_main.html b/integration/expected/chapter_1_main.html
index 0e25358..57b88fb 100644
--- a/integration/expected/chapter_1_main.html
+++ b/integration/expected/chapter_1_main.html
@@ -62,4 +62,14 @@ No title, only body
 <p>This is a commonly shared warning!</p>
 </div>
 </div>
+<div id="admonition-note-2" class="admonition note">
+<div class="admonition-title">
+<p>Note</p>
+<p><a class="admonition-anchor-link" href="#admonition-note-2"></a></p>
+</div>
+<div>
+<pre><code class="language-bash">Nested code block
+</code></pre>
+</div>
+</div>
 
diff --git a/integration/src/chapter_1.md b/integration/src/chapter_1.md
index cfbc723..b1db164 100644
--- a/integration/src/chapter_1.md
+++ b/integration/src/chapter_1.md
@@ -23,3 +23,9 @@ Hidden on load
 ```
 
 {{#include common_warning.md}}
+
+````admonish
+```bash
+Nested code block
+```
+````
diff --git a/src/lib.rs b/src/lib.rs
index b91a7d4..f69a395 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -9,6 +9,7 @@ use pulldown_cmark::{CodeBlockKind::*, Event, Options, Parser, Tag};
 use std::{borrow::Cow, str::FromStr};
 
 mod config;
+mod parse;
 mod resolve;
 mod types;
 
@@ -218,28 +219,6 @@ impl<'a> Admonition<'a> {
 const ANCHOR_ID_PREFIX: &str = "admonition";
 const ANCHOR_ID_DEFAULT: &str = "default";
 
-fn extract_admonish_body(content: &str) -> &str {
-    const PRE_END: char = '\n';
-    const POST: &str = "```";
-
-    // We can't trust the info string length to find the start of the body
-    // it may change length if it contains HTML or character escapes.
-    //
-    // So we scan for the first newline and use that.
-    // If gods forbid it doesn't exist for some reason, just include the whole info string.
-    let start_index = content
-        // Start one character _after_ the newline
-        .find(PRE_END)
-        .map(|index| index + 1)
-        .unwrap_or_default();
-    let end_index = content.len() - POST.len();
-
-    let admonish_content = &content[start_index..end_index];
-    // The newline after a code block is technically optional, so we have to
-    // trim it off dynamically.
-    admonish_content.trim()
-}
-
 /// Given the content in the span of the code block, and the info string,
 /// return `Some(Admonition)` if the code block is an admonition.
 ///
@@ -286,7 +265,7 @@ Original markdown input:
             })
         }
     };
-    let body = extract_admonish_body(content);
+    let body = parse::extract_admonish_body(content);
     Some(Ok(Admonition::new(info, body)))
 }
 
@@ -321,8 +300,8 @@ fn preprocess(
 
     let events = Parser::new_ext(content, opts);
 
-    for (e, span) in events.into_offset_iter() {
-        if let Event::Start(Tag::CodeBlock(Fenced(info_string))) = e.clone() {
+    for (event, span) in events.into_offset_iter() {
+        if let Event::Start(Tag::CodeBlock(Fenced(info_string))) = event.clone() {
             let span_content = &content[span.start..span.end];
 
             let admonition = match parse_admonition(
@@ -444,6 +423,40 @@ Text
     }
 
     #[test]
+    fn adds_admonish_longer_code_fence() {
+        let content = r#"# Chapter
+````admonish
+```json
+{}
+```
+````
+Text
+"#;
+
+        let expected = r##"# Chapter
+
+<div id="admonition-note" class="admonition note">
+<div class="admonition-title">
+
+Note
+
+<a class="admonition-anchor-link" href="#admonition-note"></a>
+</div>
+<div>
+
+```json
+{}
+```
+
+</div>
+</div>
+Text
+"##;
+
+        assert_eq!(expected, prep(content));
+    }
+
+    #[test]
     fn adds_admonish_directive() {
         let content = r#"# Chapter
 ```admonish warning
diff --git a/src/parse.rs b/src/parse.rs
new file mode 100644
index 0000000..1632df8
--- /dev/null
+++ b/src/parse.rs
@@ -0,0 +1,105 @@
+/// We can't trust the info string length to find the start of the body
+/// it may change length if it contains HTML or character escapes.
+///
+/// So we scan for the first newline and use that.
+/// If gods forbid it doesn't exist for some reason, just include the whole info string.
+fn extract_admonish_body_start_index(content: &str) -> usize {
+    let index = content
+        .find('\n')
+        // Start one character _after_ the newline
+        .map(|index| index + 1);
+
+    // If we can't get a valid index, include all content
+    match index {
+        // Couldn't find a newline
+        None => 0,
+        Some(index) => {
+            // Index out of bound of content
+            if index > (content.len() - 1) {
+                0
+            } else {
+                index
+            }
+        }
+    }
+}
+
+fn extract_admonish_body_end_index(content: &str) -> usize {
+    let number_fence_characters = content
+        .chars()
+        .rev()
+        .position(|c| !(c == '`' || c == '~'))
+        .unwrap_or_default();
+
+    content.len() - number_fence_characters
+}
+
+/// Given the whole text content of the code fence, extract the body.
+///
+/// This really feels like we should get the markdown parser to do it for us,
+/// but it's not really clear a good way of doing that.
+///
+/// ref: https://spec.commonmark.org/0.30/#fenced-code-blocks
+pub(crate) fn extract_admonish_body(content: &str) -> &str {
+    let start_index = extract_admonish_body_start_index(content);
+    let end_index = extract_admonish_body_end_index(content);
+
+    let admonish_content = &content[start_index..end_index];
+    // The newline after a code block is technically optional, so we have to
+    // trim it off dynamically.
+    admonish_content.trim_end()
+}
+
+#[cfg(test)]
+mod test {
+    use super::*;
+    use pretty_assertions::assert_eq;
+
+    #[test]
+    fn test_extract_start() {
+        for (text, expected) in [
+            ("```sane example\ncontent```", 16),
+            ("~~~~~\nlonger fence", 6),
+            // empty
+            ("```\n```", 4),
+            // bounds check, should not index outside of content
+            ("```\n", 0),
+        ] {
+            let actual = extract_admonish_body_start_index(text);
+            assert_eq!(actual, expected);
+        }
+    }
+
+    #[test]
+    fn test_extract_end() {
+        for (text, expected) in [
+            ("\n```", 1),
+            // different lengths
+            ("\n``````", 1),
+            ("\n~~~~", 1),
+            // whitespace before fence end
+            ("\n   ```", 4),
+            ("content\n```", 8),
+        ] {
+            let actual = extract_admonish_body_end_index(text);
+            assert_eq!(actual, expected);
+        }
+    }
+
+    #[test]
+    fn test_extract() {
+        for (text, expected) in [
+            // standard
+            ("```admonish\ncontent\n```", "content"),
+            // whitespace
+            ("```admonish  \n  content  \n  ```", "  content"),
+            // longer
+            ("`````admonish\ncontent\n`````", "content"),
+            // unequal
+            ("~~~admonish\ncontent\n~~~~~", "content"),
+        ] {
+            let actual = extract_admonish_body(text);
+            assert_eq!(actual, expected);
+        }
+    }
+}
author	Tom Milligan <tom.milligan@uipath.com>	2023-04-23 12:19:05 +0100
committer	Tom Milligan <tom.milligan@uipath.com>	2023-04-23 12:29:54 +0100
commit	b3e82df34eef7345f55a7cfae04fc2c2e6c9478c (patch)
tree	aa859df14b380b8fe2c64e538a751bf1b5a8433f
parent	e8813eb104734881f7aa313d09178510da42c9ee (diff)