summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric Huss <eric@huss.org>2024-05-13 20:54:36 +0000
committerGitHub <noreply@github.com>2024-05-13 20:54:36 +0000
commita555c6b6b261abd0d4617e3ffa298abee844b0b6 (patch)
tree7464ddac3776814edb102acbb420277bedcb925c
parentf14fc61b4b0ae86288943a85b1e1ddc7e50883e9 (diff)
parent0752fa4e4352cc29f7450d2cbd1f6a3002b27b56 (diff)
Merge pull request #2325 from ehuss/poll-watcher
Add a poll-based file watcher.
-rw-r--r--Cargo.toml3
-rw-r--r--guide/src/cli/arg-watcher.md7
-rw-r--r--guide/src/cli/serve.md2
-rw-r--r--guide/src/cli/watch.md1
-rw-r--r--src/cmd/command_prelude.rs13
-rw-r--r--src/cmd/serve.rs25
-rw-r--r--src/cmd/watch.rs211
-rw-r--r--src/cmd/watch/native.rs189
-rw-r--r--src/cmd/watch/poller.rs386
9 files changed, 638 insertions, 199 deletions
diff --git a/Cargo.toml b/Cargo.toml
index 2beec26a..db1b1825 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -41,6 +41,7 @@ notify = { version = "6.1.1", optional = true }
notify-debouncer-mini = { version = "0.4.1", optional = true }
ignore = { version = "0.4.20", optional = true }
pathdiff = { version = "0.2.1", optional = true }
+walkdir = { version = "2.3.3", optional = true }
# Serve feature
futures-util = { version = "0.3.28", optional = true }
@@ -61,7 +62,7 @@ walkdir = "2.3.3"
[features]
default = ["watch", "serve", "search"]
-watch = ["dep:notify", "dep:notify-debouncer-mini", "dep:ignore", "dep:pathdiff"]
+watch = ["dep:notify", "dep:notify-debouncer-mini", "dep:ignore", "dep:pathdiff", "dep:walkdir"]
serve = ["dep:futures-util", "dep:tokio", "dep:warp"]
search = ["dep:elasticlunr-rs", "dep:ammonia"]
diff --git a/guide/src/cli/arg-watcher.md b/guide/src/cli/arg-watcher.md
new file mode 100644
index 00000000..bddf2dd2
--- /dev/null
+++ b/guide/src/cli/arg-watcher.md
@@ -0,0 +1,7 @@
+#### `--watcher`
+
+There are different backends used to determine when a file has changed.
+
+* `poll` (default) --- Checks for file modifications by scanning the filesystem every second.
+* `native` --- Uses the native operating system facilities to receive notifications when files change.
+ This can have less constant overhead, but may not be as reliable as the `poll` based watcher. See these issues for more information: [#383](https://github.com/rust-lang/mdBook/issues/383) [#1441](https://github.com/rust-lang/mdBook/issues/1441) [#1707](https://github.com/rust-lang/mdBook/issues/1707) [#2035](https://github.com/rust-lang/mdBook/issues/2035) [#2102](https://github.com/rust-lang/mdBook/issues/2102)
diff --git a/guide/src/cli/serve.md b/guide/src/cli/serve.md
index 5a92d8cb..4603df8e 100644
--- a/guide/src/cli/serve.md
+++ b/guide/src/cli/serve.md
@@ -44,6 +44,8 @@ book. Relative paths are interpreted relative to the book's root directory. If
not specified it will default to the value of the `build.build-dir` key in
`book.toml`, or to `./book`.
+{{#include arg-watcher.md}}
+
#### Specify exclude patterns
The `serve` command will not automatically trigger a build for files listed in
diff --git a/guide/src/cli/watch.md b/guide/src/cli/watch.md
index 5e868a6c..be2f5be4 100644
--- a/guide/src/cli/watch.md
+++ b/guide/src/cli/watch.md
@@ -27,6 +27,7 @@ book. Relative paths are interpreted relative to the book's root directory. If
not specified it will default to the value of the `build.build-dir` key in
`book.toml`, or to `./book`.
+{{#include arg-watcher.md}}
#### Specify exclude patterns
diff --git a/src/cmd/command_prelude.rs b/src/cmd/command_prelude.rs
index b6362e60..ea2d3095 100644
--- a/src/cmd/command_prelude.rs
+++ b/src/cmd/command_prelude.rs
@@ -36,6 +36,19 @@ pub trait CommandExt: Sized {
fn arg_open(self) -> Self {
self._arg(arg!(-o --open "Opens the compiled book in a web browser"))
}
+
+ fn arg_watcher(self) -> Self {
+ #[cfg(feature = "watch")]
+ return self._arg(
+ Arg::new("watcher")
+ .long("watcher")
+ .value_parser(["poll", "native"])
+ .default_value("poll")
+ .help("The filesystem watching technique"),
+ );
+ #[cfg(not(feature = "watch"))]
+ return self;
+ }
}
impl CommandExt for Command {
diff --git a/src/cmd/serve.rs b/src/cmd/serve.rs
index eeb19cb3..5e637a02 100644
--- a/src/cmd/serve.rs
+++ b/src/cmd/serve.rs
@@ -6,7 +6,6 @@ use clap::builder::NonEmptyStringValueParser;
use futures_util::sink::SinkExt;
use futures_util::StreamExt;
use mdbook::errors::*;
-use mdbook::utils;
use mdbook::utils::fs::get_404_output_file;
use mdbook::MDBook;
use std::net::{SocketAddr, ToSocketAddrs};
@@ -43,12 +42,13 @@ pub fn make_subcommand() -> Command {
.help("Port to use for HTTP connections"),
)
.arg_open()
+ .arg_watcher()
}
// Serve command implementation
pub fn execute(args: &ArgMatches) -> Result<()> {
let book_dir = get_book_dir(args);
- let mut book = MDBook::load(book_dir)?;
+ let mut book = MDBook::load(&book_dir)?;
let port = args.get_one::<String>("port").unwrap();
let hostname = args.get_one::<String>("hostname").unwrap();
@@ -97,23 +97,12 @@ pub fn execute(args: &ArgMatches) -> Result<()> {
}
#[cfg(feature = "watch")]
- watch::trigger_on_change(&book, move |paths, book_dir| {
- info!("Files changed: {:?}", paths);
- info!("Building book...");
-
- // FIXME: This area is really ugly because we need to re-set livereload :(
- let result = MDBook::load(book_dir).and_then(|mut b| {
- update_config(&mut b);
- b.build()
- });
-
- if let Err(e) = result {
- error!("Unable to load the book");
- utils::log_backtrace(&e);
- } else {
+ {
+ let watcher = watch::WatcherKind::from_str(args.get_one::<String>("watcher").unwrap());
+ watch::rebuild_on_change(watcher, &book_dir, &update_config, &move || {
let _ = tx.send(Message::text("reload"));
- }
- });
+ });
+ }
let _ = thread_handle.join();
diff --git a/src/cmd/watch.rs b/src/cmd/watch.rs
index ef3371d8..7adb2bbb 100644
--- a/src/cmd/watch.rs
+++ b/src/cmd/watch.rs
@@ -1,14 +1,11 @@
use super::command_prelude::*;
use crate::{get_book_dir, open};
-use ignore::gitignore::Gitignore;
use mdbook::errors::Result;
-use mdbook::utils;
use mdbook::MDBook;
-use pathdiff::diff_paths;
use std::path::{Path, PathBuf};
-use std::sync::mpsc::channel;
-use std::thread::sleep;
-use std::time::Duration;
+
+mod native;
+mod poller;
// Create clap subcommand arguments
pub fn make_subcommand() -> Command {
@@ -17,12 +14,28 @@ pub fn make_subcommand() -> Command {
.arg_dest_dir()
.arg_root_dir()
.arg_open()
+ .arg_watcher()
+}
+
+pub enum WatcherKind {
+ Poll,
+ Native,
+}
+
+impl WatcherKind {
+ pub fn from_str(s: &str) -> WatcherKind {
+ match s {
+ "poll" => WatcherKind::Poll,
+ "native" => WatcherKind::Native,
+ _ => panic!("unsupported watcher {s}"),
+ }
+ }
}
// Watch command implementation
pub fn execute(args: &ArgMatches) -> Result<()> {
let book_dir = get_book_dir(args);
- let mut book = MDBook::load(book_dir)?;
+ let mut book = MDBook::load(&book_dir)?;
let update_config = |book: &mut MDBook| {
if let Some(dest_dir) = args.get_one::<PathBuf>("dest-dir") {
@@ -41,42 +54,21 @@ pub fn execute(args: &ArgMatches) -> Result<()> {
open(path);
}
- trigger_on_change(&book, |paths, book_dir| {
- info!("Files changed: {:?}\nBuilding book...\n", paths);
- let result = MDBook::load(book_dir).and_then(|mut b| {
- update_config(&mut b);
- b.build()
- });
-
- if let Err(e) = result {
- error!("Unable to build the book");
- utils::log_backtrace(&e);
- }
- });
+ let watcher = WatcherKind::from_str(args.get_one::<String>("watcher").unwrap());
+ rebuild_on_change(watcher, &book_dir, &update_config, &|| {});
Ok(())
}
-fn remove_ignored_files(book_root: &Path, paths: &[PathBuf]) -> Vec<PathBuf> {
- if paths.is_empty() {
- return vec![];
- }
-
- match find_gitignore(book_root) {
- Some(gitignore_path) => {
- let (ignore, err) = Gitignore::new(&gitignore_path);
- if let Some(err) = err {
- warn!(
- "error reading gitignore `{}`: {err}",
- gitignore_path.display()
- );
- }
- filter_ignored_files(ignore, paths)
- }
- None => {
- // There is no .gitignore file.
- paths.iter().map(|path| path.to_path_buf()).collect()
- }
+pub fn rebuild_on_change(
+ kind: WatcherKind,
+ book_dir: &Path,
+ update_config: &dyn Fn(&mut MDBook),
+ post_build: &dyn Fn(),
+) {
+ match kind {
+ WatcherKind::Poll => self::poller::rebuild_on_change(book_dir, update_config, post_build),
+ WatcherKind::Native => self::native::rebuild_on_change(book_dir, update_config, post_build),
}
}
@@ -86,144 +78,3 @@ fn find_gitignore(book_root: &Path) -> Option<PathBuf> {
.map(|p| p.join(".gitignore"))
.find(|p| p.exists())
}
-
-// Note: The usage of `canonicalize` may encounter occasional failures on the Windows platform, presenting a potential risk.
-// For more details, refer to [Pull Request #2229](https://github.com/rust-lang/mdBook/pull/2229#discussion_r1408665981).
-fn filter_ignored_files(ignore: Gitignore, paths: &[PathBuf]) -> Vec<PathBuf> {
- let ignore_root = ignore
- .path()
- .canonicalize()
- .expect("ignore root canonicalize error");
-
- paths
- .iter()
- .filter(|path| {
- let relative_path =
- diff_paths(path, &ignore_root).expect("One of the paths should be an absolute");
- !ignore
- .matched_path_or_any_parents(&relative_path, relative_path.is_dir())
- .is_ignore()
- })
- .map(|path| path.to_path_buf())
- .collect()
-}
-
-/// Calls the closure when a book source file is changed, blocking indefinitely.
-pub fn trigger_on_change<F>(book: &MDBook, closure: F)
-where
- F: Fn(Vec<PathBuf>, &Path),
-{
- use notify::RecursiveMode::*;
-
- // Create a channel to receive the events.
- let (tx, rx) = channel();
-
- let mut debouncer = match notify_debouncer_mini::new_debouncer(Duration::from_secs(1), tx) {
- Ok(d) => d,
- Err(e) => {
- error!("Error while trying to watch the files:\n\n\t{:?}", e);
- std::process::exit(1)
- }
- };
- let watcher = debouncer.watcher();
-
- // Add the source directory to the watcher
- if let Err(e) = watcher.watch(&book.source_dir(), Recursive) {
- error!("Error while watching {:?}:\n {:?}", book.source_dir(), e);
- std::process::exit(1);
- };
-
- let _ = watcher.watch(&book.theme_dir(), Recursive);
-
- // Add the book.toml file to the watcher if it exists
- let _ = watcher.watch(&book.root.join("book.toml"), NonRecursive);
-
- for dir in &book.config.build.extra_watch_dirs {
- let path = book.root.join(dir);
- let canonical_path = path.canonicalize().unwrap_or_else(|e| {
- error!("Error while watching extra directory {path:?}:\n {e}");
- std::process::exit(1);
- });
-
- if let Err(e) = watcher.watch(&canonical_path, Recursive) {
- error!(
- "Error while watching extra directory {:?}:\n {:?}",
- canonical_path, e
- );
- std::process::exit(1);
- }
- }
-
- info!("Listening for changes...");
-
- loop {
- let first_event = rx.recv().unwrap();
- sleep(Duration::from_millis(50));
- let other_events = rx.try_iter();
-
- let all_events = std::iter::once(first_event).chain(other_events);
-
- let paths: Vec<_> = all_events
- .filter_map(|event| match event {
- Ok(events) => Some(events),
- Err(error) => {
- log::warn!("error while watching for changes: {error}");
- None
- }
- })
- .flatten()
- .map(|event| event.path)
- .collect();
-
- // If we are watching files outside the current repository (via extra-watch-dirs), then they are definitionally
- // ignored by gitignore. So we handle this case by including such files into the watched paths list.
- let any_external_paths = paths.iter().filter(|p| !p.starts_with(&book.root)).cloned();
- let mut paths = remove_ignored_files(&book.root, &paths[..]);
- paths.extend(any_external_paths);
-
- if !paths.is_empty() {
- closure(paths, &book.root);
- }
- }
-}
-
-#[cfg(test)]
-mod tests {
- use super::*;
- use ignore::gitignore::GitignoreBuilder;
- use std::env;
-
- #[test]
- fn test_filter_ignored_files() {
- let current_dir = env::current_dir().unwrap();
-
- let ignore = GitignoreBuilder::new(&current_dir)
- .add_line(None, "*.html")
- .unwrap()
- .build()
- .unwrap();
- let should_remain = current_dir.join("record.text");
- let should_filter = current_dir.join("index.html");
-
- let remain = filter_ignored_files(ignore, &[should_remain.clone(), should_filter]);
- assert_eq!(remain, vec![should_remain])
- }
-
- #[test]
- fn filter_ignored_files_should_handle_parent_dir() {
- let current_dir = env::current_dir().unwrap();
-
- let ignore = GitignoreBuilder::new(&current_dir)
- .add_line(None, "*.html")
- .unwrap()
- .build()
- .unwrap();
-
- let parent_dir = current_dir.join("..");
- let should_remain = parent_dir.join("record.text");
- let should_filter = parent_dir.join("index.html");
-
- let remain = filter_ignored_files(ignore, &[should_remain.clone(), should_filter]);
- assert_eq!(remain, vec![should_remain])
- }
-}
diff --git a/src/cmd/watch/native.rs b/src/cmd/watch/native.rs
new file mode 100644
index 00000000..fad8d7ce
--- /dev/null
+++ b/src/cmd/watch/native.rs
@@ -0,0 +1,189 @@
+//! A filesystem watcher using native operating system facilities.
+
+use ignore::gitignore::Gitignore;
+use mdbook::MDBook;
+use std::path::{Path, PathBuf};
+use std::sync::mpsc::channel;
+use std::thread::sleep;
+use std::time::Duration;
+
+pub fn rebuild_on_change(
+ book_dir: &Path,
+ update_config: &dyn Fn(&mut MDBook),
+ post_build: &dyn Fn(),
+) {
+ use notify::RecursiveMode::*;
+
+ let mut book = MDBook::load(book_dir).unwrap_or_else(|e| {
+ error!("failed to load book: {e}");
+ std::process::exit(1);
+ });
+
+ // Create a channel to receive the events.
+ let (tx, rx) = channel();
+
+ let mut debouncer = match notify_debouncer_mini::new_debouncer(Duration::from_secs(1), tx) {
+ Ok(d) => d,
+ Err(e) => {
+ error!("Error while trying to watch the files:\n\n\t{:?}", e);
+ std::process::exit(1)
+ }
+ };
+ let watcher = debouncer.watcher();
+
+ // Add the source directory to the watcher
+ if let Err(e) = watcher.watch(&book.source_dir(), Recursive) {
+ error!("Error while watching {:?}:\n {:?}", book.source_dir(), e);
+ std::process::exit(1);
+ };
+
+ let _ = watcher.watch(&book.theme_dir(), Recursive);
+
+ // Add the book.toml file to the watcher if it exists
+ let _ = watcher.watch(&book.root.join("book.toml"), NonRecursive);
+
+ for dir in &book.config.build.extra_watch_dirs {
+ let path = book.root.join(dir);
+ let canonical_path = path.canonicalize().unwrap_or_else(|e| {
+ error!("Error while watching extra directory {path:?}:\n {e}");
+ std::process::exit(1);
+ });
+
+ if let Err(e) = watcher.watch(&canonical_path, Recursive) {
+ error!(
+ "Error while watching extra directory {:?}:\n {:?}",
+ canonical_path, e
+ );
+ std::process::exit(1);
+ }
+ }
+
+ info!("Listening for changes...");
+
+ loop {
+ let first_event = rx.recv().unwrap();
+ sleep(Duration::from_millis(50));
+ let other_events = rx.try_iter();
+
+ let all_events = std::iter::once(first_event).chain(other_events);
+
+ let paths: Vec<_> = all_events
+ .filter_map(|event| match event {
+ Ok(events) => Some(events),
+ Err(error) => {
+ log::warn!("error while watching for changes: {error}");
+ None
+ }
+ })
+ .flatten()
+ .map(|event| event.path)
+ .collect();
+
+ // If we are watching files outside the current repository (via extra-watch-dirs), then they are definitionally
+ // ignored by gitignore. So we handle this case by including such files into the watched paths list.
+ let any_external_paths = paths.iter().filter(|p| !p.starts_with(&book.root)).cloned();
+ let mut paths = remove_ignored_files(&book.root, &paths[..]);
+ paths.extend(any_external_paths);
+
+ if !paths.is_empty() {
+ info!("Files changed: {paths:?}");
+ match MDBook::load(book_dir) {
+ Ok(mut b) => {
+ update_config(&mut b);
+ if let Err(e) = b.build() {
+ error!("failed to build the book: {e:?}");
+ } else {
+ post_build();
+ }
+ book = b;
+ }
+ Err(e) => error!("failed to load book config: {e:?}"),
+ }
+ }
+ }
+}
+
+fn remove_ignored_files(book_root: &Path, paths: &[PathBuf]) -> Vec<PathBuf> {
+ if paths.is_empty() {
+ return vec![];
+ }
+
+ match super::find_gitignore(book_root) {
+ Some(gitignore_path) => {
+ let (ignore, err) = Gitignore::new(&gitignore_path);
+ if let Some(err) = err {
+ warn!(
+ "error reading gitignore `{}`: {err}",
+ gitignore_path.display()
+ );
+ }
+ filter_ignored_files(ignore, paths)
+ }
+ None => {
+ // There is no .gitignore file.
+ paths.iter().map(|path| path.to_path_buf()).collect()
+ }
+ }
+}
+
+// Note: The usage of `canonicalize` may encounter occasional failures on the Windows platform, presenting a potential risk.
+// For more details, refer to [Pull Request #2229](https://github.com/rust-lang/mdBook/pull/2229#discussion_r1408665981).
+fn filter_ignored_files(ignore: Gitignore, paths: &[PathBuf]) -> Vec<PathBuf> {
+ let ignore_root = ignore
+ .path()
+ .canonicalize()
+ .expect("ignore root canonicalize error");
+
+ paths
+ .iter()
+ .filter(|path| {
+ let relative_path = pathdiff::diff_paths(&path, &ignore_root)
+ .expect("One of the paths should be an absolute");
+ !ignore
+ .matched_path_or_any_parents(&relative_path, relative_path.is_dir())
+ .is_ignore()
+ })
+ .map(|path| path.to_path_buf())
+ .collect()
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+ use ignore::gitignore::GitignoreBuilder;
+ use std::env;
+
+ #[test]
+ fn test_filter_ignored_files() {
+ let current_dir = env::current_dir().unwrap();
+
+ let ignore = GitignoreBuilder::new(&current_dir)
+ .add_line(None, "*.html")
+ .unwrap()
+ .build()
+ .unwrap();
+ let should_remain = current_dir.join("record.text");
+ let should_filter = current_dir.join("index.html");
+
+ let remain = filter_ignored_files(ignore, &[should_remain.clone(), should_filter]);
+ assert_eq!(remain, vec![should_remain])
+ }
+
+ #[test]
+ fn filter_ignored_files_should_handle_parent_dir() {
+ let current_dir = env::current_dir().unwrap();
+
+ let ignore = GitignoreBuilder::new(&current_dir)
+ .add_line(None, "*.html")
+ .unwrap()
+ .build()
+ .unwrap();
+
+ let parent_dir = current_dir.join("..");
+ let should_remain = parent_dir.join("record.text");
+ let should_filter = parent_dir.join("index.html");
+
+ let remain = filter_ignored_files(ignore, &[should_remain.clone(), should_filter]);
+ assert_eq!(remain, vec![should_remain])
+ }
+}
diff --git a/src/cmd/watch/poller.rs b/src/cmd/watch/poller.rs
new file mode 100644
index 00000000..5e1d1497
--- /dev/null
+++ b/src/cmd/watch/poller.rs
@@ -0,0 +1,386 @@
+//! A simple poll-based filesystem watcher.
+//!
+//! This exists because the native change notifications have historically had
+//! lots of problems. Various operating systems and different filesystems have
+//! had problems correctly reporting changes.
+
+use ignore::gitignore::Gitignore;
+use mdbook::MDBook;
+use pathdiff::diff_paths;
+use std::collections::HashMap;
+use std::fs::FileType;
+use std::path::{Path, PathBuf};
+use std::time::{Duration, Instant, SystemTime};
+use walkdir::WalkDir;
+
+/// Calls the closure when a book source file is changed, blocking indefinitely.
+pub fn rebuild_on_change(
+ book_dir: &Path,
+ update_config: &dyn Fn(&mut MDBook),
+ post_build: &dyn Fn(),
+) {
+ let mut book = MDBook::load(book_dir).unwrap_or_else(|e| {
+ error!("failed to load book: {e}");
+ std::process::exit(1);
+ });
+
+ let mut watcher = Watcher::new(book_dir);
+
+ info!("Watching for changes...");
+ // Scan once to initialize the starting point.
+ watcher.set_roots(&book);
+ watcher.scan();
+
+ // Track average scan time, to help investigate if the poller is taking
+ // undesirably long. This is not a rigorous benchmark, just a rough
+ // estimate.
+ const AVG_SIZE: usize = 60;
+ let mut avgs = vec![0.0; AVG_SIZE];
+ let mut avg_i = 0;
+
+ loop {
+ std::thread::sleep(Duration::new(1, 0));
+ watcher.set_roots(&book);
+ let start = Instant::now();
+ let paths = watcher.scan();
+ let elapsed = start.elapsed().as_secs_f64();
+ avgs[avg_i] = elapsed;
+ avg_i += 1;
+ if avg_i >= AVG_SIZE {
+ avg_i = 0;
+ let avg = avgs.iter().sum::<f64>() / (avgs.len() as f64);
+ trace!(
+ "scan average time: {avg:.2}s, scan size is {}",
+ watcher.path_data.len()
+ );
+ }
+
+ if !paths.is_empty() {
+ info!("Files changed: {paths:?}");
+ match MDBook::load(book_dir) {
+ Ok(mut b) => {
+ update_config(&mut b);
+ if let Err(e) = b.build() {
+ error!("failed to build the book: {e:?}");
+ } else {
+ post_build();
+ }
+ book = b;
+ }
+ Err(e) => error!("failed to load book config: {e:?}"),
+ }
+ }
+ }
+}
+
+#[derive(PartialEq)]
+struct PathData {
+ file_type: FileType,
+ mtime: SystemTime,
+ size: u64,
+}
+
+/// A very simple poll-watcher that scans for modified files.
+#[derive(Default)]
+struct Watcher {
+ /// The root paths where it will recursively scan for changes.
+ root_paths: Vec<PathBuf>,
+ /// Data about files on disk.
+ path_data: HashMap<PathBuf, PathData>,
+ /// Filters paths that will be watched.
+ ignore: Option<(PathBuf, Gitignore)>,
+}
+
+impl Watcher {
+ fn new(book_root: &Path) -> Watcher {
+ // FIXME: ignore should be reloaded when it changes.
+ let ignore = super::find_gitignore(book_root).map(|gitignore_path| {
+ let (ignore, err) = Gitignore::new(&gitignore_path);
+ if let Some(err) = err {
+ warn!(
+ "error reading gitignore `{}`: {err}",
+ gitignore_path.display()
+ );
+ }
+ // Note: The usage of `canonicalize` may encounter occasional
+ // failures on the Windows platform, presenting a potential risk.
+ // For more details, refer to [Pull Request
+ // #2229](https://github.com/rust-lang/mdBook/pull/2229#discussion_r1408665981).
+ let ignore_path = ignore
+ .path()
+ .canonicalize()
+ .expect("ignore root canonicalize error");
+ (ignore_path, ignore)
+ });
+
+ Watcher {
+ ignore,
+ ..Default::default()
+ }
+ }
+
+ /// Sets the root directories where scanning will start.
+ fn set_roots(&mut self, book: &MDBook) {
+ let mut root_paths = vec![
+ book.source_dir(),
+ book.theme_dir(),
+ book.root.join("book.toml"),
+ ];
+ root_paths.extend(
+ book.config
+ .build
+ .extra_watch_dirs
+ .iter()
+ .map(|path| book.root.join(path)),
+ );
+ if let Some(html_config) = book.config.html_config() {
+ root_paths.extend(
+ html_config
+ .additional_css
+ .iter()
+ .chain(html_config.additional_js.iter())
+ .map(|path| book.root.join(path)),
+ );
+ }
+
+ self.root_paths = root_paths;
+ }
+
+ /// Scans for changes.
+ ///
+ /// Returns the paths that have changed.
+ fn scan(&mut self) -> Vec<PathBuf> {
+ let ignore = &self.ignore;
+ let new_path_data: HashMap<_, _> = self
+ .root_paths
+ .iter()
+ .filter(|root| root.exists())
+ .flat_map(|root| {
+ WalkDir::new(root)
+ .follow_links(true)
+ .into_iter()
+ .filter_entry(|entry| {
+ if let Some((ignore_path, ignore)) = ignore {
+ let path = entry.path();
+ // Canonicalization helps with removing `..` and
+ // `.` entries, which can cause issues with
+ // diff_paths.
+ let path = path.canonicalize().unwrap_or_else(|_| path.to_path_buf());
+ let relative_path = diff_paths(&path, &ignore_path)
+ .expect("One of the paths should be an absolute");
+ if ignore
+ .matched_path_or_any_parents(&relative_path, relative_path.is_dir())
+ .is_ignore()
+ {
+ trace!("ignoring {path:?}");
+ return false;
+ }
+ }
+ true
+ })
+ .filter_map(move |entry| {
+ let entry = match entry {
+ Ok(e) => e,
+ Err(e) => {
+ debug!("failed to scan {root:?}: {e}");
+ return None;
+ }
+ };
+ if entry.file_type().is_dir() {
+ // Changes to directories themselves aren't
+ // particularly interesting.
+ return None;
+ }
+ let path = entry.path().to_path_buf();
+
+ let meta = match entry.metadata() {
+ Ok(meta) => meta,
+ Err(e) => {
+ debug!("failed to scan {path:?}: {e}");
+ return None;
+ }
+ };
+ let mtime = meta.modified().unwrap_or(SystemTime::UNIX_EPOCH);
+ let pd = PathData {
+ file_type: meta.file_type(),
+ mtime,
+ size: meta.len(),
+ };
+ Some((path, pd))
+ })
+ })
+ .collect();
+ let mut paths = Vec::new();
+ for (new_path, new_data) in &new_path_data {
+ match self.path_data.get(new_path) {
+ Some(old_data) => {
+ if new_data != old_data {
+ paths.push(new_path.to_path_buf());
+ }
+ }
+ None => {
+ paths.push(new_path.clone());
+ }
+ }
+ }
+ for old_path in self.path_data.keys() {
+ if !new_path_data.contains_key(old_path) {
+ paths.push(old_path.to_path_buf());
+ }
+ }
+ self.path_data = new_path_data;
+ paths
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ /// Helper for testing the watcher.
+ fn check_watch_behavior(
+ gitignore_path: &str,
+ gitignore: &str,
+ book_root_path: &str,
+ ignored: &[&str],
+ not_ignored: &[&str],
+ extra_setup: &dyn Fn(&Path),
+ ) {
+ // Create the book and initialize things.
+ let temp = tempfile::Builder::new()
+ .prefix("mdbook-")
+ .tempdir()
+ .unwrap();
+ let root = temp.path();
+ let book_root = root.join(book_root_path);
+ // eprintln!("book_root={book_root:?}",);
+ MDBook::init(&book_root).build().unwrap();
+ std::fs::write(root.join(gitignore_path), gitignore).unwrap();
+ let create = |paths: &[&str]| {
+ let mut paths = paths
+ .iter()
+ .map(|path| root.join(path))
+ .inspect(|path| {
+ std::fs::create_dir_all(path.parent().unwrap()).unwrap();
+ std::fs::write(path, "initial content").unwrap();
+ })
+ .map(|path| path.canonicalize().unwrap())
+ .collect::<Vec<_>>();
+ paths.sort();
+ paths
+ };
+ let ignored = create(ignored);
+ let not_ignored = create(not_ignored);
+ extra_setup(&book_root);
+ // Create a watcher and check its behavior.
+ let book = MDBook::load(&book_root).unwrap();
+ let mut watcher = Watcher::new(&book_root);
+ watcher.set_roots(&book);
+ // Do an initial scan to initialize its state.
+ watcher.scan();
+ // Verify the steady state is empty.
+ let changed = watcher.scan();
+ assert_eq!(changed, Vec::<PathBuf>::new());
+ // Modify all files, and verify that only not_ignored are detected.
+ for path in ignored.iter().chain(not_ignored.iter()) {
+ std::fs::write(path, "modified").unwrap();
+ }
+ let changed = watcher.scan();
+ let mut changed = changed
+ .into_iter()
+ .map(|p| p.canonicalize().unwrap())
+ .collect::<Vec<_>>();
+ changed.sort();
+ assert_eq!(changed, not_ignored);
+ // Verify again that steady state is empty.
+ let changed = watcher.scan();
+ assert_eq!(changed, Vec::<PathBuf>::new());
+ }
+
+ #[test]
+ fn test_ignore() {
+ // Basic gitignore test.
+ check_watch_behavior(
+ "foo/.gitignore",
+ "*.tmp",
+ "foo",
+ &["foo/src/somefile.tmp"],
+ &["foo/src/chapter.md"],
+ &|_book_root| {},
+ );
+ }
+
+ #[test]
+ fn test_ignore_in_parent() {
+ // gitignore is in the parent of the book
+ check_watch_behavior(
+ ".gitignore",
+ "*.tmp\nsomedir/\n/inroot\n/foo/src/inbook\n",
+ "foo",
+ &[
+ "foo/src/somefile.tmp",
+ "foo/src/somedir/somefile",
+ "inroot/somefile",
+ "foo/src/inbook/somefile",
+ ],
+ &["foo/src/inroot/somefile"],
+ &|_book_root| {},
+ );
+ }
+
+ #[test]
+ fn test_ignore_canonical() {
+ // test with path with ..
+ check_watch_behavior(
+ ".gitignore",
+ "*.tmp\nsomedir/\n/foo/src/inbook\n",
+ "bar/../foo",
+ &[
+ "foo/src/somefile.tmp",
+ "foo/src/somedir/somefile",
+ "foo/src/inbook/somefile",
+ ],
+ &["foo/src/chapter.md"],
+ &|_book_root| {},
+ );
+ }
+
+ #[test]
+ fn test_scan_extra_watch() {
+ // Check behavior with extra-watch-dirs
+ check_watch_behavior(
+ ".gitignore",
+ "*.tmp\n/outside-root/ignoreme\n/foo/examples/ignoreme\n",
+ "foo",
+ &[
+ "foo/src/somefile.tmp",
+ "foo/examples/example.tmp",
+ "outside-root/somefile.tmp",
+ "outside-root/ignoreme",
+ "foo/examples/ignoreme",
+ ],
+ &[
+ "foo/src/chapter.md",
+ "foo/examples/example.rs",
+ "foo/examples/example2.rs",
+