diff options
author | Tom Milligan <tom.milligan@uipath.com> | 2023-04-23 12:19:05 +0100 |
---|---|---|
committer | Tom Milligan <tom.milligan@uipath.com> | 2023-04-23 12:29:54 +0100 |
commit | b3e82df34eef7345f55a7cfae04fc2c2e6c9478c (patch) | |
tree | aa859df14b380b8fe2c64e538a751bf1b5a8433f | |
parent | e8813eb104734881f7aa313d09178510da42c9ee (diff) |
fix: better code fence handling
-rw-r--r-- | integration/expected/chapter_1_main.html | 10 | ||||
-rw-r--r-- | integration/src/chapter_1.md | 6 | ||||
-rw-r--r-- | src/lib.rs | 63 | ||||
-rw-r--r-- | src/parse.rs | 105 |
4 files changed, 159 insertions, 25 deletions
diff --git a/integration/expected/chapter_1_main.html b/integration/expected/chapter_1_main.html index 0e25358..57b88fb 100644 --- a/integration/expected/chapter_1_main.html +++ b/integration/expected/chapter_1_main.html @@ -62,4 +62,14 @@ No title, only body <p>This is a commonly shared warning!</p> </div> </div> +<div id="admonition-note-2" class="admonition note"> +<div class="admonition-title"> +<p>Note</p> +<p><a class="admonition-anchor-link" href="#admonition-note-2"></a></p> +</div> +<div> +<pre><code class="language-bash">Nested code block +</code></pre> +</div> +</div> diff --git a/integration/src/chapter_1.md b/integration/src/chapter_1.md index cfbc723..b1db164 100644 --- a/integration/src/chapter_1.md +++ b/integration/src/chapter_1.md @@ -23,3 +23,9 @@ Hidden on load ``` {{#include common_warning.md}} + +````admonish +```bash +Nested code block +``` +```` @@ -9,6 +9,7 @@ use pulldown_cmark::{CodeBlockKind::*, Event, Options, Parser, Tag}; use std::{borrow::Cow, str::FromStr}; mod config; +mod parse; mod resolve; mod types; @@ -218,28 +219,6 @@ impl<'a> Admonition<'a> { const ANCHOR_ID_PREFIX: &str = "admonition"; const ANCHOR_ID_DEFAULT: &str = "default"; -fn extract_admonish_body(content: &str) -> &str { - const PRE_END: char = '\n'; - const POST: &str = "```"; - - // We can't trust the info string length to find the start of the body - // it may change length if it contains HTML or character escapes. - // - // So we scan for the first newline and use that. - // If gods forbid it doesn't exist for some reason, just include the whole info string. - let start_index = content - // Start one character _after_ the newline - .find(PRE_END) - .map(|index| index + 1) - .unwrap_or_default(); - let end_index = content.len() - POST.len(); - - let admonish_content = &content[start_index..end_index]; - // The newline after a code block is technically optional, so we have to - // trim it off dynamically. - admonish_content.trim() -} - /// Given the content in the span of the code block, and the info string, /// return `Some(Admonition)` if the code block is an admonition. /// @@ -286,7 +265,7 @@ Original markdown input: }) } }; - let body = extract_admonish_body(content); + let body = parse::extract_admonish_body(content); Some(Ok(Admonition::new(info, body))) } @@ -321,8 +300,8 @@ fn preprocess( let events = Parser::new_ext(content, opts); - for (e, span) in events.into_offset_iter() { - if let Event::Start(Tag::CodeBlock(Fenced(info_string))) = e.clone() { + for (event, span) in events.into_offset_iter() { + if let Event::Start(Tag::CodeBlock(Fenced(info_string))) = event.clone() { let span_content = &content[span.start..span.end]; let admonition = match parse_admonition( @@ -444,6 +423,40 @@ Text } #[test] + fn adds_admonish_longer_code_fence() { + let content = r#"# Chapter +````admonish +```json +{} +``` +```` +Text +"#; + + let expected = r##"# Chapter + +<div id="admonition-note" class="admonition note"> +<div class="admonition-title"> + +Note + +<a class="admonition-anchor-link" href="#admonition-note"></a> +</div> +<div> + +```json +{} +``` + +</div> +</div> +Text +"##; + + assert_eq!(expected, prep(content)); + } + + #[test] fn adds_admonish_directive() { let content = r#"# Chapter ```admonish warning diff --git a/src/parse.rs b/src/parse.rs new file mode 100644 index 0000000..1632df8 --- /dev/null +++ b/src/parse.rs @@ -0,0 +1,105 @@ +/// We can't trust the info string length to find the start of the body +/// it may change length if it contains HTML or character escapes. +/// +/// So we scan for the first newline and use that. +/// If gods forbid it doesn't exist for some reason, just include the whole info string. +fn extract_admonish_body_start_index(content: &str) -> usize { + let index = content + .find('\n') + // Start one character _after_ the newline + .map(|index| index + 1); + + // If we can't get a valid index, include all content + match index { + // Couldn't find a newline + None => 0, + Some(index) => { + // Index out of bound of content + if index > (content.len() - 1) { + 0 + } else { + index + } + } + } +} + +fn extract_admonish_body_end_index(content: &str) -> usize { + let number_fence_characters = content + .chars() + .rev() + .position(|c| !(c == '`' || c == '~')) + .unwrap_or_default(); + + content.len() - number_fence_characters +} + +/// Given the whole text content of the code fence, extract the body. +/// +/// This really feels like we should get the markdown parser to do it for us, +/// but it's not really clear a good way of doing that. +/// +/// ref: https://spec.commonmark.org/0.30/#fenced-code-blocks +pub(crate) fn extract_admonish_body(content: &str) -> &str { + let start_index = extract_admonish_body_start_index(content); + let end_index = extract_admonish_body_end_index(content); + + let admonish_content = &content[start_index..end_index]; + // The newline after a code block is technically optional, so we have to + // trim it off dynamically. + admonish_content.trim_end() +} + +#[cfg(test)] +mod test { + use super::*; + use pretty_assertions::assert_eq; + + #[test] + fn test_extract_start() { + for (text, expected) in [ + ("```sane example\ncontent```", 16), + ("~~~~~\nlonger fence", 6), + // empty + ("```\n```", 4), + // bounds check, should not index outside of content + ("```\n", 0), + ] { + let actual = extract_admonish_body_start_index(text); + assert_eq!(actual, expected); + } + } + + #[test] + fn test_extract_end() { + for (text, expected) in [ + ("\n```", 1), + // different lengths + ("\n``````", 1), + ("\n~~~~", 1), + // whitespace before fence end + ("\n ```", 4), + ("content\n```", 8), + ] { + let actual = extract_admonish_body_end_index(text); + assert_eq!(actual, expected); + } + } + + #[test] + fn test_extract() { + for (text, expected) in [ + // standard + ("```admonish\ncontent\n```", "content"), + // whitespace + ("```admonish \n content \n ```", " content"), + // longer + ("`````admonish\ncontent\n`````", "content"), + // unequal + ("~~~admonish\ncontent\n~~~~~", "content"), + ] { + let actual = extract_admonish_body(text); + assert_eq!(actual, expected); + } + } +} |