summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorThayne McCombs <astrothayne@gmail.com>2022-06-16 01:22:43 -0600
committerThayne McCombs <astrothayne@gmail.com>2024-05-07 22:52:49 -0600
commitd44badc1909ae3d982cc716058c584b5a9a3572b (patch)
tree827bfe0d53b96596723b54b5526e4c1ca7b82e22
parent6becb6618576084878b54dea53d3920600d67e00 (diff)
Implement option for printing custom formats
-rw-r--r--CHANGELOG.md1
-rw-r--r--doc/fd.163
-rw-r--r--src/cli.rs14
-rw-r--r--src/config.rs4
-rw-r--r--src/exec/mod.rs159
-rw-r--r--src/exec/token.rs98
-rw-r--r--src/fmt/input.rs (renamed from src/exec/input.rs)0
-rw-r--r--src/fmt/mod.rs281
-rw-r--r--src/main.rs5
-rw-r--r--src/output.rs22
-rw-r--r--tests/tests.rs60
11 files changed, 441 insertions, 266 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 55abe28..4b9f967 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -3,6 +3,7 @@
## Features
- Allow passing an optional argument to `--strip-cwd-prefix` of "always", "never", or "auto". to force whether the cwd prefix is stripped or not.
+- Add a `--format` option which allows using a format template for direct ouput similar to the template used for `--exec`. (#1043)
## Bugfixes
diff --git a/doc/fd.1 b/doc/fd.1
index b7abcda..108c759 100644
--- a/doc/fd.1
+++ b/doc/fd.1
@@ -375,6 +375,30 @@ Set the path separator to use when printing file paths. The default is the OS-sp
Provide paths to search as an alternative to the positional \fIpath\fR argument. Changes the usage to
\'fd [FLAGS/OPTIONS] \-\-search\-path PATH \-\-search\-path PATH2 [PATTERN]\'
.TP
+.BI "\-\-format " fmt
+Specify a template string that is used for printing a line for each file found.
+
+The following placeholders are substituted into the string for each file before printing:
+.RS
+.IP {}
+path (of the current search result)
+.IP {/}
+basename
+.IP {//}
+parent directory
+.IP {.}
+path without file extension
+.IP {/.}
+basename without file extension
+.IP {{
+literal '{' (an escape sequence)
+.IP }}
+literal '}' (an escape sequence)
+.P
+Notice that you can use "{{" and "}}" to escape "{" and "}" respectively, which is especially
+useful if you need to include the literal text of one of the above placeholders.
+.RE
+.TP
.BI "\-x, \-\-exec " command
.RS
Execute
@@ -395,29 +419,12 @@ If parallelism is enabled, the order commands will be executed in is non-determi
--threads=1, the order is determined by the operating system and may not be what you expect. Thus, it is
recommended that you don't rely on any ordering of the results.
-The following placeholders are substituted before the command is executed:
-.RS
-.IP {}
-path (of the current search result)
-.IP {/}
-basename
-.IP {//}
-parent directory
-.IP {.}
-path without file extension
-.IP {/.}
-basename without file extension
-.IP {{
-literal '{' (an escape sequence)
-.IP }}
-literal '}' (an escape sequence)
-.RE
+Before executing the command, any placeholder patterns in the command are replaced with the
+corresponding values for the current file. The same placeholders are used as in the "\-\-format"
+option.
If no placeholder is present, an implicit "{}" at the end is assumed.
-Notice that you can use "{{" and "}}" to escape "{" and "}" respectively, which is especially
-useful if you need to include the literal text of one of the above placeholders.
-
Examples:
- find all *.zip files and unzip them:
@@ -441,19 +448,9 @@ once, with all search results as arguments.
The order of the arguments is non-deterministic and should not be relied upon.
-One of the following placeholders is substituted before the command is executed:
-.RS
-.IP {}
-path (of all search results)
-.IP {/}
-basename
-.IP {//}
-parent directory
-.IP {.}
-path without file extension
-.IP {/.}
-basename without file extension
-.RE
+This uses the same placeholders as "\-\-format" and "\-\-exec", but instead of expanding
+once per command invocation each argument containing a placeholder is expanding for every
+file in a batch and passed as separate arguments.
If no placeholder is present, an implicit "{}" at the end is assumed.
diff --git a/src/cli.rs b/src/cli.rs
index 1b91862..0eabd12 100644
--- a/src/cli.rs
+++ b/src/cli.rs
@@ -452,6 +452,20 @@ pub struct Opts {
)]
pub owner: Option<OwnerFilter>,
+ /// Instead of printing the file normally, print the format string with the following placeholders replaced:
+ /// '{}': path (of the current search result)
+ /// '{/}': basename
+ /// '{//}': parent directory
+ /// '{.}': path without file extension
+ /// '{/.}': basename without file extension
+ #[arg(
+ long,
+ value_name = "fmt",
+ help = "Print results according to template",
+ conflicts_with = "list_details"
+ )]
+ pub format: Option<String>,
+
#[command(flatten)]
pub exec: Exec,
diff --git a/src/config.rs b/src/config.rs
index 75b4c2b..cf7a660 100644
--- a/src/config.rs
+++ b/src/config.rs
@@ -8,6 +8,7 @@ use crate::filetypes::FileTypes;
#[cfg(unix)]
use crate::filter::OwnerFilter;
use crate::filter::{SizeFilter, TimeFilter};
+use crate::fmt::FormatTemplate;
/// Configuration options for *fd*.
pub struct Config {
@@ -85,6 +86,9 @@ pub struct Config {
/// The value (if present) will be a lowercase string without leading dots.
pub extensions: Option<RegexSet>,
+ /// A format string to use to format results, similarly to exec
+ pub format: Option<FormatTemplate>,
+
/// If a value is supplied, each item found will be used to generate and execute commands.
pub command: Option<Arc<CommandSet>>,
diff --git a/src/exec/mod.rs b/src/exec/mod.rs
index d95f5d9..c964e6e 100644
--- a/src/exec/mod.rs
+++ b/src/exec/mod.rs
@@ -1,13 +1,10 @@
mod command;
-mod input;
mod job;
-mod token;
-use std::borrow::Cow;
-use std::ffi::{OsStr, OsString};
+use std::ffi::OsString;
use std::io;
use std::iter;
-use std::path::{Component, Path, PathBuf, Prefix};
+use std::path::{Path, PathBuf};
use std::process::Stdio;
use std::sync::Mutex;
@@ -15,11 +12,10 @@ use anyhow::{bail, Result};
use argmax::Command;
use crate::exit_codes::{merge_exitcodes, ExitCode};
+use crate::fmt::{FormatTemplate, Token};
use self::command::{execute_commands, handle_cmd_error};
-use self::input::{basename, dirname, remove_extension};
pub use self::job::{batch, job};
-use self::token::{tokenize, Token};
/// Execution mode of the command
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
@@ -131,7 +127,7 @@ impl CommandSet {
#[derive(Debug)]
struct CommandBuilder {
pre_args: Vec<OsString>,
- path_arg: ArgumentTemplate,
+ path_arg: FormatTemplate,
post_args: Vec<OsString>,
cmd: Command,
count: usize,
@@ -220,7 +216,7 @@ impl CommandBuilder {
/// `generate_and_execute()` method will be used to generate a command and execute it.
#[derive(Debug, Clone, PartialEq)]
struct CommandTemplate {
- args: Vec<ArgumentTemplate>,
+ args: Vec<FormatTemplate>,
}
impl CommandTemplate {
@@ -235,7 +231,7 @@ impl CommandTemplate {
for arg in input {
let arg = arg.as_ref();
- let tmpl = tokenize(arg);
+ let tmpl = FormatTemplate::parse(arg);
has_placeholder |= tmpl.has_tokens();
args.push(tmpl);
}
@@ -251,7 +247,7 @@ impl CommandTemplate {
// If a placeholder token was not supplied, append one at the end of the command.
if !has_placeholder {
- args.push(ArgumentTemplate::Tokens(vec![Token::Placeholder]));
+ args.push(FormatTemplate::Tokens(vec![Token::Placeholder]));
}
Ok(CommandTemplate { args })
@@ -274,111 +270,6 @@ impl CommandTemplate {
}
}
-/// Represents a template for a single command argument.
-///
-/// The argument is either a collection of `Token`s including at least one placeholder variant, or
-/// a fixed text.
-#[derive(Clone, Debug, PartialEq)]
-enum ArgumentTemplate {
- Tokens(Vec<Token>),
- Text(String),
-}
-
-impl ArgumentTemplate {
- pub fn has_tokens(&self) -> bool {
- matches!(self, ArgumentTemplate::Tokens(_))
- }
-
- /// Generate an argument from this template. If path_separator is Some, then it will replace
- /// the path separator in all placeholder tokens. Text arguments and tokens are not affected by
- /// path separator substitution.
- pub fn generate(&self, path: impl AsRef<Path>, path_separator: Option<&str>) -> OsString {
- use self::Token::*;
- let path = path.as_ref();
-
- match *self {
- ArgumentTemplate::Tokens(ref tokens) => {
- let mut s = OsString::new();
- for token in tokens {
- match *token {
- Basename => s.push(Self::replace_separator(basename(path), path_separator)),
- BasenameNoExt => s.push(Self::replace_separator(
- &remove_extension(basename(path).as_ref()),
- path_separator,
- )),
- NoExt => s.push(Self::replace_separator(
- &remove_extension(path),
- path_separator,
- )),
- Parent => s.push(Self::replace_separator(&dirname(path), path_separator)),
- Placeholder => {
- s.push(Self::replace_separator(path.as_ref(), path_separator))
- }
- Text(ref string) => s.push(string),
- }
- }
- s
- }
- ArgumentTemplate::Text(ref text) => OsString::from(text),
- }
- }
-
- /// Replace the path separator in the input with the custom separator string. If path_separator
- /// is None, simply return a borrowed Cow<OsStr> of the input. Otherwise, the input is
- /// interpreted as a Path and its components are iterated through and re-joined into a new
- /// OsString.
- fn replace_separator<'a>(path: &'a OsStr, path_separator: Option<&str>) -> Cow<'a, OsStr> {
- // fast-path - no replacement necessary
- if path_separator.is_none() {
- return Cow::Borrowed(path);
- }
-
- let path_separator = path_separator.unwrap();
- let mut out = OsString::with_capacity(path.len());
- let mut components = Path::new(path).components().peekable();
-
- while let Some(comp) = components.next() {
- match comp {
- // Absolute paths on Windows are tricky. A Prefix component is usually a drive
- // letter or UNC path, and is usually followed by RootDir. There are also
- // "verbatim" prefixes beginning with "\\?\" that skip normalization. We choose to
- // ignore verbatim path prefixes here because they're very rare, might be
- // impossible to reach here, and there's no good way to deal with them. If users
- // are doing something advanced involving verbatim windows paths, they can do their
- // own output filtering with a tool like sed.
- Component::Prefix(prefix) => {
- if let Prefix::UNC(server, share) = prefix.kind() {
- // Prefix::UNC is a parsed version of '\\server\share'
- out.push(path_separator);
- out.push(path_separator);
- out.push(server);
- out.push(path_separator);
- out.push(share);
- } else {
- // All other Windows prefix types are rendered as-is. This results in e.g. "C:" for
- // drive letters. DeviceNS and Verbatim* prefixes won't have backslashes converted,
- // but they're not returned by directories fd can search anyway so we don't worry
- // about them.
- out.push(comp.as_os_str());
- }
- }
-
- // Root directory is always replaced with the custom separator.
- Component::RootDir => out.push(path_separator),
-
- // Everything else is joined normally, with a trailing separator if we're not last
- _ => {
- out.push(comp.as_os_str());
- if components.peek().is_some() {
- out.push(path_separator);
- }
- }
- }
- }
- Cow::Owned(out)
- }
-}
-
#[cfg(test)]
mod tests {
use super::*;
@@ -398,9 +289,9 @@ mod tests {
CommandSet {
commands: vec![CommandTemplate {
args: vec![
- ArgumentTemplate::Text("echo".into()),
- ArgumentTemplate::Text("${SHELL}:".into()),
- ArgumentTemplate::Tokens(vec![Token::Placeholder]),
+ FormatTemplate::Text("echo".into()),
+ FormatTemplate::Text("${SHELL}:".into()),
+ FormatTemplate::Tokens(vec![Token::Placeholder]),
]
}],
mode: ExecutionMode::OneByOne,
@@ -415,8 +306,8 @@ mod tests {
CommandSet {
commands: vec![CommandTemplate {
args: vec![
- ArgumentTemplate::Text("echo".into()),
- ArgumentTemplate::Tokens(vec![Token::NoExt]),
+ FormatTemplate::Text("echo".into()),
+ FormatTemplate::Tokens(vec![Token::NoExt]),
],
}],
mode: ExecutionMode::OneByOne,
@@ -431,8 +322,8 @@ mod tests {
CommandSet {
commands: vec![CommandTemplate {
args: vec![
- ArgumentTemplate::Text("echo".into()),
- ArgumentTemplate::Tokens(vec![Token::Basename]),
+ FormatTemplate::Text("echo".into()),
+ FormatTemplate::Tokens(vec![Token::Basename]),
],
}],
mode: ExecutionMode::OneByOne,
@@ -447,8 +338,8 @@ mod tests {
CommandSet {
commands: vec![CommandTemplate {
args: vec![
- ArgumentTemplate::Text("echo".into()),
- ArgumentTemplate::Tokens(vec![Token::Parent]),
+ FormatTemplate::Text("echo".into()),
+ FormatTemplate::Tokens(vec![Token::Parent]),
],
}],
mode: ExecutionMode::OneByOne,
@@ -463,8 +354,8 @@ mod tests {
CommandSet {
commands: vec![CommandTemplate {
args: vec![
- ArgumentTemplate::Text("echo".into()),
- ArgumentTemplate::Tokens(vec![Token::BasenameNoExt]),
+ FormatTemplate::Text("echo".into()),
+ FormatTemplate::Tokens(vec![Token::BasenameNoExt]),
],
}],
mode: ExecutionMode::OneByOne,
@@ -494,9 +385,9 @@ mod tests {
CommandSet {
commands: vec![CommandTemplate {
args: vec![
- ArgumentTemplate::Text("cp".into()),
- ArgumentTemplate::Tokens(vec![Token::Placeholder]),
- ArgumentTemplate::Tokens(vec![
+ FormatTemplate::Text("cp".into()),
+ FormatTemplate::Tokens(vec![Token::Placeholder]),
+ FormatTemplate::Tokens(vec![
Token::BasenameNoExt,
Token::Text(".ext".into())
]),
@@ -514,8 +405,8 @@ mod tests {
CommandSet {
commands: vec![CommandTemplate {
args: vec![
- ArgumentTemplate::Text("echo".into()),
- ArgumentTemplate::Tokens(vec![Token::NoExt]),
+ FormatTemplate::Text("echo".into()),
+ FormatTemplate::Tokens(vec![Token::NoExt]),
],
}],
mode: ExecutionMode::Batch,
@@ -540,7 +431,7 @@ mod tests {
#[test]
fn generate_custom_path_separator() {
- let arg = ArgumentTemplate::Tokens(vec![Token::Placeholder]);
+ let arg = FormatTemplate::Tokens(vec![Token::Placeholder]);
macro_rules! check {
($input:expr, $expected:expr) => {
assert_eq!(arg.generate($input, Some("#")), OsString::from($expected));
@@ -555,7 +446,7 @@ mod tests {
#[cfg(windows)]
#[test]
fn generate_custom_path_separator_windows() {
- let arg = ArgumentTemplate::Tokens(vec![Token::Placeholder]);
+ let arg = FormatTemplate::Tokens(vec![Token::Placeholder]);
macro_rules! check {
($input:expr, $expected:expr) => {
assert_eq!(arg.generate($input, Some("#")), OsString::from($expected));
diff --git a/src/exec/token.rs b/src/exec/token.rs
deleted file mode 100644
index b96c3a3..0000000
--- a/src/exec/token.rs
+++ /dev/null
@@ -1,98 +0,0 @@
-use aho_corasick::AhoCorasick;
-use std::fmt::{self, Display, Formatter};
-use std::sync::OnceLock;
-
-use super::ArgumentTemplate;
-
-/// Designates what should be written to a buffer
-///
-/// Each `Token` contains either text, or a placeholder variant, which will be used to generate
-/// commands after all tokens for a given command template have been collected.
-#[derive(Clone, Debug, PartialEq, Eq)]
-pub enum Token {
- Placeholder,
- Basename,
- Parent,
- NoExt,
- BasenameNoExt,
- Text(String),
-}
-
-impl Display for Token {
- fn fmt(&self, f: &mut Formatter) -> fmt::Result {
- match *self {
- Token::Placeholder => f.write_str("{}")?,
- Token::Basename => f.write_str("{/}")?,
- Token::Parent => f.write_str("{//}")?,
- Token::NoExt => f.write_str("{.}")?,
- Token::BasenameNoExt => f.write_str("{/.}")?,
- Token::Text(ref string) => f.write_str(string)?,
- }
- Ok(())
- }
-}
-
-static PLACEHOLDERS: OnceLock<AhoCorasick> = OnceLock::new();
-
-pub(super) fn tokenize(input: &str) -> ArgumentTemplate {
- // NOTE: we assume that { and } have the same length
- const BRACE_LEN: usize = '{'.len_utf8();
- let mut tokens = Vec::new();
- let mut remaining = input;
- let mut buf = String::new();
- let placeholders = PLACEHOLDERS.get_or_init(|| {
- AhoCorasick::new(["{{", "}}", "{}", "{/}", "{//}", "{.}", "{/.}"]).unwrap()
- });
- while let Some(m) = placeholders.find(remaining) {
- match m.pattern().as_u32() {
- 0 | 1 => {
- // we found an escaped {{ or }}, so add
- // everything up to the first char to the buffer
- // then skip the second one.
- buf += &remaining[..m.start() + BRACE_LEN];
- remaining = &remaining[m.end()..];
- }
- id if !remaining[m.end()..].starts_with('}') => {
- buf += &remaining[..m.start()];
- if !buf.is_empty() {
- tokens.push(Token::Text(std::mem::take(&mut buf)));
- }
- tokens.push(token_from_pattern_id(id));
- remaining = &remaining[m.end()..];
- }
- _ => {
- // We got a normal pattern, but the final "}"
- // is escaped, so add up to that to the buffer, then
- // skip the final }
- buf += &remaining[..m.end()];
- remaining = &remaining[m.end() + BRACE_LEN..];
- }
- }
- }
- // Add the rest of the string to the buffer, and add the final buffer to the tokens
- if !remaining.is_empty() {
- buf += remaining;
- }
- if tokens.is_empty() {
- // No placeholders were found, so just return the text
- return ArgumentTemplate::Text(buf);
- }
- // Add final text segment
- if !buf.is_empty() {
- tokens.push(Token::Text(buf));
- }
- debug_assert!(!tokens.is_empty());
- ArgumentTemplate::Tokens(tokens)
-}
-
-fn token_from_pattern_id(id: u32) -> Token {
- use Token::*;
- match id {
- 2 => Placeholder,
- 3 => Basename,
- 4 => Parent,
- 5 => NoExt,
- 6 => BasenameNoExt,
- _ => unreachable!(),
- }
-}
diff --git a/src/exec/input.rs b/src/fmt/input.rs
index a599432..a599432 100644
--- a/src/exec/input.rs
+++ b/src/fmt/input.rs
diff --git a/src/fmt/mod.rs b/src/fmt/mod.rs
new file mode 100644
index 0000000..45e82fc
--- /dev/null
+++ b/src/fmt/mod.rs
@@ -0,0 +1,281 @@
+mod input;
+
+use std::borrow::Cow;
+use std::ffi::{OsStr, OsString};
+use std::fmt::{self, Display, Formatter};
+use std::path::{Component, Path, Prefix};
+use std::sync::OnceLock;
+
+use aho_corasick::AhoCorasick;
+
+use self::input::{basename, dirname, remove_extension};
+
+/// Designates what should be written to a buffer
+///
+/// Each `Token` contains either text, or a placeholder variant, which will be used to generate
+/// commands after all tokens for a given command template have been collected.
+#[derive(Clone, Debug, PartialEq, Eq)]
+pub enum Token {
+ Placeholder,
+ Basename,
+ Parent,
+ NoExt,
+ BasenameNoExt,
+ Text(String),
+}
+
+impl Display for Token {
+ fn fmt(&self, f: &mut Formatter) -> fmt::Result {
+ match *self {
+ Token::Placeholder => f.write_str("{}")?,
+ Token::Basename => f.write_str("{/}")?,
+ Token::Parent => f.write_str("{//}")?,
+ Token::NoExt => f.write_str("{.}")?,
+ Token::BasenameNoExt => f.write_str("{/.}")?,
+ Token::Text(ref string) => f.write_str(string)?,
+ }
+ Ok(())
+ }
+}
+
+/// A parsed format string
+///
+/// This is either a collection of `Token`s including at least one placeholder variant,
+/// or a fixed text.
+#[derive(Clone, Debug, PartialEq)]
+pub enum FormatTemplate {
+ Tokens(Vec<Token>),
+ Text(String),
+}
+
+static PLACEHOLDERS: OnceLock<AhoCorasick> = OnceLock::new();
+
+impl FormatTemplate {
+ pub fn has_tokens(&self) -> bool {
+ matches!(self, FormatTemplate::Tokens(_))
+ }
+
+ pub fn parse(fmt: &str) -> Self {
+ // NOTE: we assume that { and } have the same length
+ const BRACE_LEN: usize = '{'.len_utf8();
+ let mut tokens = Vec::new();
+ let mut remaining = fmt;
+ let mut buf = String::new();
+ let placeholders = PLACEHOLDERS.get_or_init(|| {
+ AhoCorasick::new(["{{", "}}", "{}", "{/}", "{//}", "{.}", "{/.}"]).unwrap()
+ });
+ while let Some(m) = placeholders.find(remaining) {
+ match m.pattern().as_u32() {
+ 0 | 1 => {
+ // we found an escaped {{ or }}, so add
+ // everything up to the first char to the buffer
+ // then skip the second one.
+ buf += &remaining[..m.start() + BRACE_LEN];
+ remaining = &remaining[m.end()..];
+ }
+ id if !remaining[m.end()..].starts_with('}') => {
+ buf += &remaining[..m.start()];
+ if !buf.is_empty() {
+ tokens.push(Token::Text(std::mem::take(&mut buf)));
+ }
+ tokens.push(token_from_pattern_id(id));
+ remaining = &remaining[m.end()..];
+ }
+ _ => {
+ // We got a normal pattern, but the final "}"
+ // is escaped, so add up to that to the buffer, then
+ // skip the final }
+ buf += &remaining[..m.end()];
+ remaining = &remaining[m.end() + BRACE_LEN..];
+ }
+ }
+ }
+ // Add the rest of the string to the buffer, and add the final buffer to the tokens
+ if !remaining.is_empty() {
+ buf += remaining;
+ }
+ if tokens.is_empty() {
+ // No placeholders were found, so just return the text
+ return FormatTemplate::Text(buf);
+ }
+ // Add final text segment
+ if !buf.is_empty() {
+ tokens.push(Token::Text(buf));
+ }
+ debug_assert!(!tokens.is_empty());
+ FormatTemplate::Tokens(tokens)
+ }
+
+ /// Generate a result string from this template. If path_separator is Some, then it will replace
+ /// the path separator in all placeholder tokens. Fixed text and tokens are not affected by
+ /// path separator substitution.
+ pub fn generate(&self, path: impl AsRef<Path>, path_separator: Option<&str>) -> OsString {
+ use Token::*;
+ let path = path.as_ref();
+
+ match *self {
+ Self::Tokens(ref tokens) => {
+ let mut s = OsString::new();
+ for token in tokens {
+ match token {
+ Basename => s.push(Self::replace_separator(basename(path), path_separator)),
+ BasenameNoExt => s.push(Self::replace_separator(
+ &remove_extension(basename(path).as_ref()),
+ path_separator,
+ )),
+ NoExt => s.push(Self::replace_separator(
+ &remove_extension(path),
+ path_separator,
+ )),
+ Parent => s.push(Self::replace_separator(&dirname(path), path_separator)),
+ Placeholder => {
+ s.push(Self::replace_separator(path.as_ref(), path_separator))
+ }
+ Text(ref string) => s.push(string),
+ }
+ }
+ s
+ }
+ Self::Text(ref text) => OsString::from(text),
+ }
+ }
+
+ /// Replace the path separator in the input with the custom separator string. If path_separator
+ /// is None, simply return a borrowed Cow<OsStr> of the input. Otherwise, the input is
+ /// interpreted as a Path and its components are iterated through and re-joined into a new
+ /// OsString.
+ fn replace_separator<'a>(path: &'a OsStr, path_separator: Option<&str>) -> Cow<'a, OsStr> {
+ // fast-path - no replacement necessary
+ if path_separator.is_none() {
+ return Cow::Borrowed(path);
+ }
+
+ let path_separator = path_separator.unwrap();
+ let mut out = OsString::with_capacity(path.len());
+ let mut components = Path::new(path).components().peekable();
+
+ while let Some(comp) = components.next() {
+ match comp {
+ // Absolute paths on Windows are tricky. A Prefix component is usually a drive
+ // letter or UNC path, and is usually followed by RootDir. There are also
+ // "verbatim" prefixes beginning with "\\?\" that skip normalization. We choose to
+ // ignore verbatim path prefixes here because they're very rare, might be
+ // impossible to reach here, and there's no good way to deal with them. If users
+ // are doing something advanced involving verbatim windows paths, they can do their
+ // own output filtering with a tool like sed.
+ Component::Prefix(prefix) => {
+ if let Prefix::UNC(server, share) = prefix.kind() {
+ // Prefix::UNC is a parsed version of '\\server\share'
+ out.push(path_separator);
+ out.push(path_separator);
+ out.push(server);
+ out.push(path_separator);
+ out.push(share);
+ } else {
+ // All other Windows prefix types are rendered as-is. This results in e.g. "C:" for
+ // drive letters. DeviceNS and Verbatim* prefixes won't have backslashes converted,
+ // but they're not returned by directories fd can search anyway so we don't worry
+ // about them.
+ out.push(comp.as_os_str());
+ }
+ }
+
+ // Root directory is always replaced with the custom separator.
+ Component::RootDir => out.push(path_separator),
+
+ // Everything else is joined normally, with a trailing separator if we're not last
+ _ => {
+ out.push(comp.as_os_str());
+ if components.peek().is_some() {
+ out.push(path_separator);
+ }
+ }
+ }
+ }
+ Cow::Owned(out)
+ }
+}
+
+// Convert the id from an aho-corasick match to the
+// appropriate token
+fn token_from_pattern_id(id: u32) -> Token {
+ use Token::*;
+ match id {
+ 2 => Placeholder,
+ 3 => Basename,
+ 4 => Parent,
+ 5 => NoExt,
+ 6 => BasenameNoExt,
+ _ => unreachable!(),
+ }
+}
+
+#[cfg(test)]
+mod fmt_tests {
+ use super::*;
+ use std::path::PathBuf;
+
+ #[test]
+ fn parse_no_placeholders() {
+ let templ = FormatTemplate::parse("This string has no placeholders");
+ assert_eq!(
+ templ,
+ FormatTemplate::Text("This string has no placeholders".into())
+ );
+ }
+
+ #[test]
+ fn parse_only_brace_escapes() {
+ let templ = FormatTemplate::parse("This string only has escapes like {{ and }}");
+ assert_eq!(
+ templ,
+ FormatTemplate::Text("This string only has escapes like { and }".into())
+ );
+ }
+
+ #[test]
+ fn all_placeholders() {
+ use Token::*;
+
+ let templ = FormatTemplate::parse(
+ "{{path={} \
+ basename={/} \
+ parent={//} \
+ noExt={.} \
+ basenameNoExt={/.} \
+ }}",
+ );
+ assert_eq!(
+ templ,
+ FormatTemplate::Tokens(vec![
+ Text("{path=".into()),
+ Placeholder,
+ Text(" basename=".into()),
+ Basename,
+ Text(" parent=".into()),
+ Parent,
+ Text(" noExt=".into()),
+ NoExt,
+ Text(" basenameNoExt=".into()),
+ BasenameNoExt,
+ Text(" }".into()),
+ ])
+ );
+
+ let mut path = PathBuf::new();
+ path.push("a");
+ path.push("folder");
+ path.push("file.txt");
+
+ let expanded = templ.generate(&path, Some("/")).into_string().unwrap();
+
+ assert_eq!(
+ expanded,
+ "{path=a/folder/file.txt \
+ basename=file.txt \
+ parent=a/folder \
+ noExt=a/folder/file \
+ basenameNoExt=file }"
+ );
+ }
+}
diff --git a/src/main.rs b/src/main.rs
index eacf02e..31db976 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -7,6 +7,7 @@ mod exit_codes;
mod filesystem;
mod filetypes;
mod filter;
+mod fmt;
mod output;
mod regex_helper;
mod walk;
@@ -299,6 +300,10 @@ fn construct_config(mut opts: Opts, pattern_regexps: &[String]) -> Result<Config
.build()
})
.transpose()?,
+ format: opts
+ .format
+ .as_deref()
+ .map(crate::fmt::FormatTemplate::parse),
command: command.map(Arc::new),
batch_size: opts.batch_size,
exclude_patterns: opts.exclude.iter().map(|p| String::from("!") + p).collect(),
diff --git a/src/output.rs b/src/output.rs
index e33a89d..d86dd36 100644
--- a/src/output.rs
+++ b/src/output.rs
@@ -7,6 +7,7 @@ use crate::config::Config;
use crate::dir_entry::DirEntry;
use crate::error::print_error;
use crate::exit_codes::ExitCode;
+use crate::fmt::FormatTemplate;
fn replace_path_separator(path: &str, new_path_separator: &str) -> String {
path.replace(std::path::MAIN_SEPARATOR, new_path_separator)
@@ -14,7 +15,10 @@ fn replace_path_separator(path: &str, new_path_separator: &str) -> String {
// TODO: this function is performance critical and can probably be optimized
pub fn print_entry<W: Write>(stdout: &mut W, entry: &DirEntry, config: &Config) {
- let r = if let Some(ref ls_colors) = config.ls_colors {
+ // TODO: use format if supplied
+ let r = if let Some(ref format) = config.format {
+ print_entry_format(stdout, entry, config, format)
+ } else if let Some(ref ls_colors) = config.ls_colors {
print_entry_colorized(stdout, entry, config, ls_colors)
} else {
print_entry_uncolorized(stdout, entry, config)
@@ -55,6 +59,22 @@ fn print_trailing_slash<W: Write>(
}
// TODO: this function is performance critical and can probably be optimized
+fn print_entry_format<W: Write>(
+ stdout: &mut W,
+ entry: &DirEntry,
+ config: &Config,
+ format: &FormatTemplate,
+) -> io::Result<()> {
+ let separator = if config.null_separator { "\0" } else { "\n" };
+ let output = format.generate(
+ entry.stripped_path(config),
+ config.path_separator.as_deref(),
+ );
+ // TODO: support writing raw bytes on unix?
+ write!(stdout, "{}{}", output.to_string_lossy(), separator)
+}
+
+// TODO: this function is performance critical and can probably be optimized
fn print_entry_colorized<W: Write>(
stdout: &mut W,
entry: &DirEntry,
diff --git a/tests/tests.rs b/tests/tests.rs
index 1810ecd..8d1ce39 100644
--- a/tests/tests.rs
+++ b/tests/tests.rs
@@ -1624,6 +1624,66 @@ fn test_excludes() {
);
}
+#[test]
+fn format() {
+ let te = TestEnv::new(DEFAULT_DIRS, DEFAULT_FILES);
+
+ te.assert_output(
+ &["--format", "path={}", "--path-separator=/"],
+ "path=a.foo
+ path=e1 e2
+ path=one
+ path=one/b.foo
+ path=one/two
+ path=one/two/C.Foo2
+ path=one/two/c.foo
+ path=one/two/three
+ path=one/two/three/d.foo