summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTom Milligan <tom.milligan@uipath.com>2023-04-23 12:19:05 +0100
committerTom Milligan <tom.milligan@uipath.com>2023-04-23 12:29:54 +0100
commitb3e82df34eef7345f55a7cfae04fc2c2e6c9478c (patch)
treeaa859df14b380b8fe2c64e538a751bf1b5a8433f
parente8813eb104734881f7aa313d09178510da42c9ee (diff)
fix: better code fence handling
-rw-r--r--integration/expected/chapter_1_main.html10
-rw-r--r--integration/src/chapter_1.md6
-rw-r--r--src/lib.rs63
-rw-r--r--src/parse.rs105
4 files changed, 159 insertions, 25 deletions
diff --git a/integration/expected/chapter_1_main.html b/integration/expected/chapter_1_main.html
index 0e25358..57b88fb 100644
--- a/integration/expected/chapter_1_main.html
+++ b/integration/expected/chapter_1_main.html
@@ -62,4 +62,14 @@ No title, only body
<p>This is a commonly shared warning!</p>
</div>
</div>
+<div id="admonition-note-2" class="admonition note">
+<div class="admonition-title">
+<p>Note</p>
+<p><a class="admonition-anchor-link" href="#admonition-note-2"></a></p>
+</div>
+<div>
+<pre><code class="language-bash">Nested code block
+</code></pre>
+</div>
+</div>
diff --git a/integration/src/chapter_1.md b/integration/src/chapter_1.md
index cfbc723..b1db164 100644
--- a/integration/src/chapter_1.md
+++ b/integration/src/chapter_1.md
@@ -23,3 +23,9 @@ Hidden on load
```
{{#include common_warning.md}}
+
+````admonish
+```bash
+Nested code block
+```
+````
diff --git a/src/lib.rs b/src/lib.rs
index b91a7d4..f69a395 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -9,6 +9,7 @@ use pulldown_cmark::{CodeBlockKind::*, Event, Options, Parser, Tag};
use std::{borrow::Cow, str::FromStr};
mod config;
+mod parse;
mod resolve;
mod types;
@@ -218,28 +219,6 @@ impl<'a> Admonition<'a> {
const ANCHOR_ID_PREFIX: &str = "admonition";
const ANCHOR_ID_DEFAULT: &str = "default";
-fn extract_admonish_body(content: &str) -> &str {
- const PRE_END: char = '\n';
- const POST: &str = "```";
-
- // We can't trust the info string length to find the start of the body
- // it may change length if it contains HTML or character escapes.
- //
- // So we scan for the first newline and use that.
- // If gods forbid it doesn't exist for some reason, just include the whole info string.
- let start_index = content
- // Start one character _after_ the newline
- .find(PRE_END)
- .map(|index| index + 1)
- .unwrap_or_default();
- let end_index = content.len() - POST.len();
-
- let admonish_content = &content[start_index..end_index];
- // The newline after a code block is technically optional, so we have to
- // trim it off dynamically.
- admonish_content.trim()
-}
-
/// Given the content in the span of the code block, and the info string,
/// return `Some(Admonition)` if the code block is an admonition.
///
@@ -286,7 +265,7 @@ Original markdown input:
})
}
};
- let body = extract_admonish_body(content);
+ let body = parse::extract_admonish_body(content);
Some(Ok(Admonition::new(info, body)))
}
@@ -321,8 +300,8 @@ fn preprocess(
let events = Parser::new_ext(content, opts);
- for (e, span) in events.into_offset_iter() {
- if let Event::Start(Tag::CodeBlock(Fenced(info_string))) = e.clone() {
+ for (event, span) in events.into_offset_iter() {
+ if let Event::Start(Tag::CodeBlock(Fenced(info_string))) = event.clone() {
let span_content = &content[span.start..span.end];
let admonition = match parse_admonition(
@@ -444,6 +423,40 @@ Text
}
#[test]
+ fn adds_admonish_longer_code_fence() {
+ let content = r#"# Chapter
+````admonish
+```json
+{}
+```
+````
+Text
+"#;
+
+ let expected = r##"# Chapter
+
+<div id="admonition-note" class="admonition note">
+<div class="admonition-title">
+
+Note
+
+<a class="admonition-anchor-link" href="#admonition-note"></a>
+</div>
+<div>
+
+```json
+{}
+```
+
+</div>
+</div>
+Text
+"##;
+
+ assert_eq!(expected, prep(content));
+ }
+
+ #[test]
fn adds_admonish_directive() {
let content = r#"# Chapter
```admonish warning
diff --git a/src/parse.rs b/src/parse.rs
new file mode 100644
index 0000000..1632df8
--- /dev/null
+++ b/src/parse.rs
@@ -0,0 +1,105 @@
+/// We can't trust the info string length to find the start of the body
+/// it may change length if it contains HTML or character escapes.
+///
+/// So we scan for the first newline and use that.
+/// If gods forbid it doesn't exist for some reason, just include the whole info string.
+fn extract_admonish_body_start_index(content: &str) -> usize {
+ let index = content
+ .find('\n')
+ // Start one character _after_ the newline
+ .map(|index| index + 1);
+
+ // If we can't get a valid index, include all content
+ match index {
+ // Couldn't find a newline
+ None => 0,
+ Some(index) => {
+ // Index out of bound of content
+ if index > (content.len() - 1) {
+ 0
+ } else {
+ index
+ }
+ }
+ }
+}
+
+fn extract_admonish_body_end_index(content: &str) -> usize {
+ let number_fence_characters = content
+ .chars()
+ .rev()
+ .position(|c| !(c == '`' || c == '~'))
+ .unwrap_or_default();
+
+ content.len() - number_fence_characters
+}
+
+/// Given the whole text content of the code fence, extract the body.
+///
+/// This really feels like we should get the markdown parser to do it for us,
+/// but it's not really clear a good way of doing that.
+///
+/// ref: https://spec.commonmark.org/0.30/#fenced-code-blocks
+pub(crate) fn extract_admonish_body(content: &str) -> &str {
+ let start_index = extract_admonish_body_start_index(content);
+ let end_index = extract_admonish_body_end_index(content);
+
+ let admonish_content = &content[start_index..end_index];
+ // The newline after a code block is technically optional, so we have to
+ // trim it off dynamically.
+ admonish_content.trim_end()
+}
+
+#[cfg(test)]
+mod test {
+ use super::*;
+ use pretty_assertions::assert_eq;
+
+ #[test]
+ fn test_extract_start() {
+ for (text, expected) in [
+ ("```sane example\ncontent```", 16),
+ ("~~~~~\nlonger fence", 6),
+ // empty
+ ("```\n```", 4),
+ // bounds check, should not index outside of content
+ ("```\n", 0),
+ ] {
+ let actual = extract_admonish_body_start_index(text);
+ assert_eq!(actual, expected);
+ }
+ }
+
+ #[test]
+ fn test_extract_end() {
+ for (text, expected) in [
+ ("\n```", 1),
+ // different lengths
+ ("\n``````", 1),
+ ("\n~~~~", 1),
+ // whitespace before fence end
+ ("\n ```", 4),
+ ("content\n```", 8),
+ ] {
+ let actual = extract_admonish_body_end_index(text);
+ assert_eq!(actual, expected);
+ }
+ }
+
+ #[test]
+ fn test_extract() {
+ for (text, expected) in [
+ // standard
+ ("```admonish\ncontent\n```", "content"),
+ // whitespace
+ ("```admonish \n content \n ```", " content"),
+ // longer
+ ("`````admonish\ncontent\n`````", "content"),
+ // unequal
+ ("~~~admonish\ncontent\n~~~~~", "content"),
+ ] {
+ let actual = extract_admonish_body(text);
+ assert_eq!(actual, expected);
+ }
+ }
+}