summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJan-Erik Rediger <janerik@fnordig.de>2022-01-25 20:57:43 +0100
committerJan-Erik Rediger <janerik@fnordig.de>2022-01-25 21:04:50 +0100
commitadde0c8cfb1ef3199649562ae0d6cc21914befda (patch)
tree7e5407331ef7c1896969965dd8eaafaa53ccc81f
parente759070dc8c8d04f452a1c52cc22fd884b6ab0a0 (diff)
Avoid roundtripping through pulldown-cmark
Roundtripping markdown is actually quite hard. We don't actually require that. All we need is once parsing the markdown to find the right marker and the headings. We then manually generate markdown and all other content can be copied unparsed again.
-rw-r--r--Cargo.lock16
-rw-r--r--Cargo.toml5
-rw-r--r--src/lib.rs75
-rw-r--r--tests/adds_toc.out.md1
-rw-r--r--tests/backslash_escapes.out.md8
-rw-r--r--tests/handles_inline_code.in.md2
-rw-r--r--tests/it.rs4
-rw-r--r--tests/multi_header_linear.out.md5
-rw-r--r--tests/tables_untouched.out.md7
-rw-r--r--tests/tables_with_html.out.md6
10 files changed, 57 insertions, 72 deletions
diff --git a/Cargo.lock b/Cargo.lock
index ecbbbfe..1b12f4a 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -845,8 +845,7 @@ dependencies = [
"log",
"mdbook",
"pretty_assertions",
- "pulldown-cmark 0.8.0",
- "pulldown-cmark-to-cmark",
+ "pulldown-cmark 0.9.1",
"serde_json",
"toml",
]
@@ -1205,9 +1204,9 @@ dependencies = [
[[package]]
name = "pulldown-cmark"
-version = "0.8.0"
+version = "0.9.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ffade02495f22453cd593159ea2f59827aae7f53fa8323f756799b670881dcf8"
+checksum = "34f197a544b0c9ab3ae46c359a7ec9cbbb5c7bf97054266fecb7ead794a181d6"
dependencies = [
"bitflags",
"getopts",
@@ -1216,15 +1215,6 @@ dependencies = [
]
[[package]]
-name = "pulldown-cmark-to-cmark"
-version = "6.0.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "95048382115a9da7be92ad51c84064d585b7da17472dcaa7f5eed8853c4c3707"
-dependencies = [
- "pulldown-cmark 0.8.0",
-]
-
-[[package]]
name = "quick-error"
version = "1.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
diff --git a/Cargo.toml b/Cargo.toml
index 6538455..68adc9d 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -10,9 +10,7 @@ edition = "2018"
[dependencies]
mdbook = "0.4.10"
-pulldown-cmark = "0.8.0"
-pulldown-cmark-to-cmark = "6.0.2"
-env_logger = "0.8.4"
+pulldown-cmark = "0.9.1"
log = "0.4.11"
clap = "2.33.3"
serde_json = "1.0.57"
@@ -20,3 +18,4 @@ toml = "0.5.6"
[dev-dependencies]
pretty_assertions = "0.6.1"
+env_logger = "0.8.4"
diff --git a/src/lib.rs b/src/lib.rs
index d292c33..c4d11b1 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -8,7 +8,6 @@ use mdbook::errors::{Error, Result};
use mdbook::preprocess::{Preprocessor, PreprocessorContext};
use pulldown_cmark::Tag::*;
use pulldown_cmark::{Event, Options, Parser};
-use pulldown_cmark_to_cmark::{cmark_with_options, Options as COptions};
use toml::value::Table;
pub struct Toc;
@@ -110,6 +109,7 @@ fn build_toc(toc: &[(u32, String, String)]) -> String {
let mut toc_iter = toc.iter().peekable();
// Start from the level of the first header.
+ let min_level = toc.iter().map(|(lvl, _, _)| *lvl).min().unwrap_or(1);
let mut last_lower = match toc_iter.peek() {
Some((lvl, _, _)) => *lvl,
None => 0,
@@ -127,7 +127,7 @@ fn build_toc(toc: &[(u32, String, String)]) -> String {
});
for (level, name, slug) in toc {
- let width = 2 * (level - 1) as usize;
+ let width = 2 * (level - min_level) as usize;
writeln!(result, "{1:0$}* [{2}](#{3})", width, "", name, slug).unwrap();
}
@@ -135,7 +135,6 @@ fn build_toc(toc: &[(u32, String, String)]) -> String {
}
fn add_toc(content: &str, cfg: &Config) -> Result<String> {
- let mut buf = String::with_capacity(content.len());
let mut toc_found = false;
let mut toc_content = vec![];
@@ -150,40 +149,41 @@ fn add_toc(content: &str, cfg: &Config) -> Result<String> {
opts.insert(Options::ENABLE_TASKLISTS);
let mark: Vec<Event> = Parser::new(&cfg.marker).collect();
- let mut mark_start = -1;
+ let mut mark_start = None;
+ let mut mark_end = 0..0;
let mut mark_loc = 0;
- let mut c = -1;
- for e in Parser::new_ext(&content, opts) {
- c += 1;
- log::trace!("Event: {:?}", e);
+ for (e, span) in Parser::new_ext(&content, opts).into_offset_iter() {
+ log::trace!("Event: {:?} (span: {:?})", e, span);
if !toc_found {
log::trace!(
- "TOC not found yet. Location: {}, Start: {}",
+ "TOC not found yet. Location: {}, Start: {:?}",
mark_loc,
mark_start
);
if e == mark[mark_loc] {
- if mark_start == -1 {
- mark_start = c;
+ if mark_start.is_none() {
+ mark_start = Some(span.clone());
}
mark_loc += 1;
if mark_loc >= mark.len() {
+ mark_end = span;
toc_found = true
}
} else if mark_loc > 0 {
mark_loc = 0;
- mark_start = -1;
+ mark_start = None;
} else {
continue;
}
}
- if let Event::Start(Heading(lvl)) = e {
- current_header_level = Some(lvl);
+ if let Event::Start(Heading(lvl, fragment, classes)) = e {
+ log::trace!("Header(lvl={lvl}, fragment={fragment:?}, classes={classes:?})");
+ current_header_level = Some(lvl as u32);
continue;
}
- if let Event::End(Heading(_)) = e {
+ if let Event::End(Heading(..)) = e {
// Skip if this header is nested too deeply.
if let Some(level) = current_header_level.take() {
let header = current_header.clone();
@@ -219,29 +219,30 @@ fn add_toc(content: &str, cfg: &Config) -> Result<String> {
let toc = build_toc(&toc_content);
log::trace!("Built TOC: {:?}", toc);
- let toc_events = Parser::new(&toc).collect::<Vec<_>>();
-
- let mut c = -1;
- let events = Parser::new_ext(&content, opts)
- .map(|e| {
- c += 1;
- if toc_found && c > mark_start && c < mark_start + (mark.len() as i32) {
- vec![]
- } else if toc_found && c == mark_start {
- toc_events.clone()
- } else {
- vec![e]
- }
- })
- .flatten();
-
- let opts = COptions {
- newlines_after_codeblock: 1,
- ..Default::default()
+ log::trace!("toc_found={toc_found} mark_start={mark_start:?} mark_end={mark_end:?}");
+
+ let content = if toc_found {
+ let mark_start = mark_start.unwrap();
+ let content_before_toc = &content[0..mark_start.start];
+ let content_after_toc = &content[mark_end.end..];
+ log::trace!("content_before_toc={:?}", content_before_toc);
+ log::trace!("content_after_toc={:?}", content_after_toc);
+ // Multiline markers might have consumed trailing newlines,
+ // we ensure there's always one before the content.
+ let extra = if content_after_toc.as_bytes()[0] == b'\n' {
+ ""
+ } else {
+ "\n"
+ };
+ format!(
+ "{}{}{}{}",
+ content_before_toc, toc, extra, content_after_toc
+ )
+ } else {
+ content.to_string()
};
- cmark_with_options(events, &mut buf, None, opts)
- .map(|_| buf)
- .map_err(|err| Error::msg(format!("Markdown serialization failed: {}", err)))
+
+ Ok(content)
}
impl Toc {
diff --git a/tests/adds_toc.out.md b/tests/adds_toc.out.md
index d23da60..a0f5312 100644
--- a/tests/adds_toc.out.md
+++ b/tests/adds_toc.out.md
@@ -18,3 +18,4 @@
## Header 2.2
### Header 2.2.1
+
diff --git a/tests/backslash_escapes.out.md b/tests/backslash_escapes.out.md
index 6be9dec..55fd035 100644
--- a/tests/backslash_escapes.out.md
+++ b/tests/backslash_escapes.out.md
@@ -1,9 +1,9 @@
-\*not emphasized\*
+\*not emphasized*
\<br/> not a tag
-\[not a link\](/foo)
-\`not code\`
+\[not a link](/foo)
+\`not code`
\* not a list
\# not a heading
-\[foo\]: /url "not a reference"
+\[foo]: /url "not a reference"
\&ouml; not a character entity
1\. not a list
diff --git a/tests/handles_inline_code.in.md b/tests/handles_inline_code.in.md
index bf49fa2..bd3181f 100644
--- a/tests/handles_inline_code.in.md
+++ b/tests/handles_inline_code.in.md
@@ -13,5 +13,3 @@
##### Header 1.1.1.1.1
# Another header `with inline` code
-
-
diff --git a/tests/it.rs b/tests/it.rs
index b0d7e60..474b2dc 100644
--- a/tests/it.rs
+++ b/tests/it.rs
@@ -58,7 +58,7 @@ macro_rules! assert_toc {
let chapter = Chapter::from_content(content);
let result = Toc::add_toc(&chapter, &config);
match result {
- Ok(result) => assert_eq!(expected.trim_end(), result),
+ Ok(result) => assert_eq!(expected, result),
Err(e) => panic!("{} failed. Error: {}", $name, e),
}
};
@@ -114,7 +114,7 @@ fn unique_slugs() {
#[test]
fn add_toc_with_github_marker() {
- let marker = "* auto-gen TOC:\n{:toc}".to_owned();
+ let marker = "* auto-gen TOC:\n{:toc}\n".to_owned();
assert_toc!("github_marker", with_marker(marker));
}
diff --git a/tests/multi_header_linear.out.md b/tests/multi_header_linear.out.md
index d5afac4..219ff6f 100644
--- a/tests/multi_header_linear.out.md
+++ b/tests/multi_header_linear.out.md
@@ -7,14 +7,9 @@
* [Level 1.2.1](#level-121)
## Level 1.1
-
### Level 1.1.1
-
### Level 1.1.2
-
## Level 1.2
-
### Level 1.2.1
text
-
diff --git a/tests/tables_untouched.out.md b/tests/tables_untouched.out.md
index 7207799..1d668ed 100644
--- a/tests/tables_untouched.out.md
+++ b/tests/tables_untouched.out.md
@@ -1,5 +1,6 @@
# Heading
-|Head 1|Head 2|
-|------|------|
-|Row 1|Row 2|
+| Head 1 | Head 2 |
+|--------|--------|
+| Row 1 | Row 2 |
+
diff --git a/tests/tables_with_html.out.md b/tests/tables_with_html.out.md
index 97e5b03..9f13052 100644
--- a/tests/tables_with_html.out.md
+++ b/tests/tables_with_html.out.md
@@ -1,5 +1,5 @@
# Heading
-|Head 1|Head 2|
-|------|------|
-|<span>Row 1</span>|Row 2|
+| Head 1 | Head 2 |
+|--------|--------|
+| <span>Row 1</span> | Row 2 |