summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTom Milligan <tom.milligan@uipath.com>2023-09-19 12:15:44 +0100
committerTom Milligan <tom.milligan@uipath.com>2023-09-19 12:21:42 +0100
commit933432afb247dce906b7f638879440f1cad48d88 (patch)
tree06188369ad8d0622765105b14388316022979045
parent496e8f7c6d932b0060e7573f9b78e4ecbececae1 (diff)
markdown: fix panic when searching for indent
-rw-r--r--src/markdown.rs74
1 files changed, 63 insertions, 11 deletions
diff --git a/src/markdown.rs b/src/markdown.rs
index 0ea2fa2..f289f6e 100644
--- a/src/markdown.rs
+++ b/src/markdown.rs
@@ -28,18 +28,8 @@ pub(crate) fn preprocess(
for (event, span) in events.into_offset_iter() {
if let Event::Start(Tag::CodeBlock(Fenced(info_string))) = event.clone() {
let span_content = &content[span.start..span.end];
-
- // Scan for a line start before this span.
- // For safety, only scan up to a fixed limit of the text
const INDENT_SCAN_MAX: usize = 1024;
- // If there's less text than that, just scan from the start
- let line_scan_start = span.start.checked_sub(INDENT_SCAN_MAX).unwrap_or_default();
- // If we can't find a newline, assume no indent
- let indent = content[line_scan_start..span.start]
- .chars()
- .rev()
- .position(|c| c == '\n')
- .unwrap_or_default();
+ let indent = indent_of(content, span.start, INDENT_SCAN_MAX);
let admonition = match parse_admonition(
info_string.as_ref(),
@@ -75,11 +65,73 @@ pub(crate) fn preprocess(
Ok(content)
}
+/// Returns the indent of the given position.
+///
+/// Defined as the number of characters between the given `position` (where
+/// position is a valid char boundary byte-index in `content`),
+/// and the previous newline character `\n`.
+///
+/// `max` is the maximum number of characters to scan before assuming there is
+/// no indent (will return zero if exceeded).
+///
+/// ## Panics
+///
+/// Will panic if `position` is not a valid utf-8 char boundary index of `content`.
+fn indent_of(content: &str, position: usize, max: usize) -> usize {
+ // Scan for a line start before this span.
+ content[..position]
+ .chars()
+ .rev()
+ // For safety, only scan up to a fixed limit of the text
+ .take(max)
+ .position(|c| c == '\n')
+ // If we can't find a newline, assume no indent
+ .unwrap_or_default()
+}
+
#[cfg(test)]
mod test {
use super::*;
use pretty_assertions::assert_eq;
+ #[test]
+ fn indent_of_samples() {
+ for (content, position, max, expected) in [
+ // Empty case
+ ("", 0, 10, 0),
+ ("no newline", 4, 10, 0),
+ // Newline at position 5, difference from 8 is 3
+ ("with\nnewline", 8, 10, 3),
+ // If no newline in safety limit, return 0
+ ("with\nnewline", 8, 2, 0),
+ // Safety limit is characters, not bytes
+ // Regression test for FIXME LINK
+ (
+ "例えばこれは",
+ // Position is generated by mdbook internals, should be valid char limit
+ // This mimics the second character starting the span
+ "例".len(),
+ // Any arbitrary safetly limit should be valid
+ 1,
+ // Should not panic
+ 0,
+ ),
+ (
+ "例え\n れは",
+ // Position is generated by mdbook internals, should be valid char limit
+ // This mimics the second character starting the span
+ "例え\n ".len(),
+ // Any arbitrary safetly limit should be valid
+ 4,
+ // Should not panic
+ 2,
+ ),
+ ] {
+ let actual = indent_of(content, position, max);
+ assert_eq!(actual, expected);
+ }
+ }
+
fn prep(content: &str) -> String {
preprocess(
content,