summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndrew Gallant <jamslam@gmail.com>2023-09-22 14:57:44 -0400
committerAndrew Gallant <jamslam@gmail.com>2023-09-25 14:39:54 -0400
commitf608d4d9b3ab210b7e6964ca7d1d7dc9c077329e (patch)
tree897ee1174b490022f99e9b5be68aaec05acea613
parent23e21133ba6a8ca2c4d040092ad1bc22f14e8861 (diff)
hyperlink: rejigger how hyperlinks work
This essentially takes the work done in #2483 and does a bit of a facelift. A brief summary: * We reduce the hyperlink API we expose to just the format, a configuration and an environment. * We move buffer management into a hyperlink-specific interpolator. * We expand the documentation on --hyperlink-format. * We rewrite the hyperlink format parser to be a simple state machine with support for escaping '{{' and '}}'. * We remove the 'gethostname' dependency and instead insist on the caller to provide the hostname. (So grep-printer doesn't get it itself, but the application will.) Similarly for the WSL prefix. * Probably some other things. Overall, the general structure of #2483 was kept. The biggest change is probably requiring the caller to pass in things like a hostname instead of having the crate do it. I did this for a couple reasons: 1. I feel uncomfortable with code deep inside the printing logic reaching out into the environment to assume responsibility for retrieving the hostname. This feels more like an application-level responsibility. Arguably, path canonicalization falls into this same bucket, but it is more difficult to rip that out. (And we can do it in the future in a backwards compatible fashion I think.) 2. I wanted to permit end users to tell ripgrep about their system's hostname in their own way, e.g., by running a custom executable. I want this because I know at least for my own use cases, I sometimes log into systems using an SSH hostname that is distinct from the system's actual hostname (usually because the system is shared in some way or changing its hostname is not allowed/practical). I think that's about it. Closes #665, Closes #2483
-rw-r--r--Cargo.lock69
-rw-r--r--complete/_rg1
-rw-r--r--crates/core/app.rs89
-rw-r--r--crates/core/args.rs133
-rw-r--r--crates/printer/Cargo.toml2
-rw-r--r--crates/printer/src/hyperlink.rs1224
-rw-r--r--crates/printer/src/hyperlink_aliases.rs88
-rw-r--r--crates/printer/src/lib.rs5
-rw-r--r--crates/printer/src/path.rs49
-rw-r--r--crates/printer/src/standard.rs172
-rw-r--r--crates/printer/src/summary.rs71
-rw-r--r--crates/printer/src/util.rs128
12 files changed, 1267 insertions, 764 deletions
diff --git a/Cargo.lock b/Cargo.lock
index 885e73d3..6029cc1a 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -137,16 +137,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
[[package]]
-name = "gethostname"
-version = "0.4.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0176e0459c2e4a1fe232f984bca6890e681076abb9934f6cea7c326f3fc47818"
-dependencies = [
- "libc",
- "windows-targets",
-]
-
-[[package]]
name = "glob"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -216,10 +206,10 @@ version = "0.1.7"
dependencies = [
"base64",
"bstr",
- "gethostname",
"grep-matcher",
"grep-regex",
"grep-searcher",
+ "log",
"serde",
"serde_json",
"termcolor",
@@ -621,60 +611,3 @@ name = "winapi-x86_64-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
-
-[[package]]
-name = "windows-targets"
-version = "0.48.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7b1eb6f0cd7c80c79759c929114ef071b87354ce476d9d94271031c0497adfd5"
-dependencies = [
- "windows_aarch64_gnullvm",
- "windows_aarch64_msvc",
- "windows_i686_gnu",
- "windows_i686_msvc",
- "windows_x86_64_gnu",
- "windows_x86_64_gnullvm",
- "windows_x86_64_msvc",
-]
-
-[[package]]
-name = "windows_aarch64_gnullvm"
-version = "0.48.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "91ae572e1b79dba883e0d315474df7305d12f569b400fcf90581b06062f7e1bc"
-
-[[package]]
-name = "windows_aarch64_msvc"
-version = "0.48.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b2ef27e0d7bdfcfc7b868b317c1d32c641a6fe4629c171b8928c7b08d98d7cf3"
-
-[[package]]
-name = "windows_i686_gnu"
-version = "0.48.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "622a1962a7db830d6fd0a69683c80a18fda201879f0f447f065a3b7467daa241"
-
-[[package]]
-name = "windows_i686_msvc"
-version = "0.48.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4542c6e364ce21bf45d69fdd2a8e455fa38d316158cfd43b3ac1c5b1b19f8e00"
-
-[[package]]
-name = "windows_x86_64_gnu"
-version = "0.48.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ca2b8a661f7628cbd23440e50b05d705db3686f894fc9580820623656af974b1"
-
-[[package]]
-name = "windows_x86_64_gnullvm"
-version = "0.48.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7896dbc1f41e08872e9d5e8f8baa8fdd2677f29468c4e156210174edc7f7b953"
-
-[[package]]
-name = "windows_x86_64_msvc"
-version = "0.48.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1a515f5799fe4961cb532f983ce2b23082366b898e52ffbce459c86f67c8378a"
diff --git a/complete/_rg b/complete/_rg
index be8d18ba..7fd6c542 100644
--- a/complete/_rg
+++ b/complete/_rg
@@ -305,6 +305,7 @@ _rg() {
'--debug[show debug messages]'
'--field-context-separator[set string to delimit fields in context lines]'
'--field-match-separator[set string to delimit fields in matching lines]'
+ '--hostname-bin=[executable for getting system hostname]:hostname executable:_command_names -e'
'--hyperlink-format=[specify pattern for hyperlinks]:pattern'
'--trace[show more verbose debug messages]'
'--dfa-size-limit=[specify upper size limit of generated DFA]:DFA size (bytes)'
diff --git a/crates/core/app.rs b/crates/core/app.rs
index 9c523479..d0dfc8d3 100644
--- a/crates/core/app.rs
+++ b/crates/core/app.rs
@@ -580,6 +580,7 @@ pub fn all_args_and_flags() -> Vec<RGArg> {
flag_glob_case_insensitive(&mut args);
flag_heading(&mut args);
flag_hidden(&mut args);
+ flag_hostname_bin(&mut args);
flag_hyperlink_format(&mut args);
flag_iglob(&mut args);
flag_ignore_case(&mut args);
@@ -1495,19 +1496,93 @@ This flag can be disabled with --no-hidden.
args.push(arg);
}
+fn flag_hostname_bin(args: &mut Vec<RGArg>) {
+ const SHORT: &str = "Run a program to get this system's hostname.";
+ const LONG: &str = long!(
+ "\
+This flag controls how ripgrep determines this system's hostname. The flag's
+value should correspond to an executable (either a path or something that can
+be found via your system's *PATH* environment variable). When set, ripgrep will
+run this executable, with no arguments, and treat its output (with leading and
+trailing whitespace stripped) as your system's hostname.
+
+When not set (the default, or the empty string), ripgrep will try to
+automatically detect your system's hostname. On Unix, this corresponds
+to calling *gethostname*. On Windows, this corresponds to calling
+*GetComputerNameExW* to fetch the system's \"physical DNS hostname.\"
+
+ripgrep uses your system's hostname for producing hyperlinks.
+"
+ );
+ let arg =
+ RGArg::flag("hostname-bin", "COMMAND").help(SHORT).long_help(LONG);
+ args.push(arg);
+}
+
fn flag_hyperlink_format(args: &mut Vec<RGArg>) {
const SHORT: &str = "Set the format of hyperlinks to match results.";
const LONG: &str = long!(
"\
-Set the format of hyperlinks to match results. This defines a pattern which
-can contain the following placeholders: {file}, {line}, {column}, and {host}.
-An empty pattern or 'none' disables hyperlinks.
+Set the format of hyperlinks to match results. Hyperlinks make certain elements
+of ripgrep's output, such as file paths, clickable. This generally only works
+in terminal emulators that support OSC-8 hyperlinks. For example, the format
+*file://{host}{file}* will emit an RFC 8089 hyperlink.
+
+The following variables are available in the format string:
+
+*{path}*: Required. This is replaced with a path to a matching file. The
+path is guaranteed to be absolute and percent encoded such that it is valid to
+put into a URI. Note that a path is guaranteed to start with a */*.
+
+*{host}*: Optional. This is replaced with your system's hostname. On Unix,
+this corresponds to calling *gethostname*. On Windows, this corresponds to
+calling *GetComputerNameExW* to fetch the system's \"physical DNS hostname.\"
+Alternatively, if --hostname-bin was provided, then the hostname returned from
+the output of that program will be returned. If no hostname could be found,
+then this variable is replaced with the empty string.
+
+*{line}*: Optional. If appropriate, this is replaced with the line number of
+a match. If no line number is available (for example, if --no-line-number was
+given), then it is automatically replaced with the value *1*.
+
+*{column}*: Optional, but requires the presence of **{line}**. If appropriate,
+this is replaced with the column number of a match. If no column number is
+available (for example, if --no-column was given), then it is automatically
+replaced with the value *1*.
+
+*{wslprefix}*: Optional. This is a special value that is set to
+*wsl$/WSL_DISTRO_NAME*, where *WSL_DISTRO_NAME* corresponds to the value of
+the equivalent environment variable. If the system is not Unix or if the
+*WSL_DISTRO_NAME* environment variable is not set, then this is replaced with
+the empty string.
+
+Alternatively, a format string may correspond to one of the following
+aliases: default, file, grep+, kitty, macvim, none, subl, textmate, vscode,
+vscode-insiders, vscodium.
+
+A format string may be empty. An empty format string is equivalent to the
+*none* alias. In this case, hyperlinks will be disabled.
+
+At present, the default format when ripgrep detects a tty on stdout all systems
+is *default*. This is an alias that expands to *file://{host}{path}* on Unix
+and *file://{path}* on Windows. When stdout is not a tty, then the default
+format behaves as if it were *none*. That is, hyperlinks are disabled.
+
+Note that hyperlinks are only written when colors are enabled. To write
+hyperlinks without colors, you'll need to configure ripgrep to not colorize
+anything without actually disabling all ANSI escape codes completely:
+
+ --colors 'path:none' --colors 'line:none' --colors 'column:none' --colors 'match:none'
-The {file} placeholder is required, and will be replaced with the absolute
-file path with a few adjustments: The leading '/' on Unix is removed,
-and '\\' is replaced with '/' on Windows.
+ripgrep works this way because it treats the *--color=(never|always|auto)* flag
+as a proxy for whether ANSI escape codes should be used at all. This means
+that environment variables like *NO_COLOR=1* and *TERM=dumb* not only disable
+colors, but hyperlinks as well. Similarly, colors and hyperlinks are disabled
+when ripgrep is not writing to a tty. (Unless one forces the issue by setting
+*--color=always*.)
-As an example, the default pattern on Unix systems is: 'file://{host}/{file}'
+For more information on hyperlinks in terminal emulators, see:
+https://gist.github.com/egmontkob/eb114294efbcd5adb1944c9f3cb5feda
"
);
let arg =
diff --git a/crates/core/args.rs b/crates/core/args.rs
index 0f8d1f18..75029a05 100644
--- a/crates/core/args.rs
+++ b/crates/core/args.rs
@@ -18,9 +18,9 @@ use grep::pcre2::{
RegexMatcherBuilder as PCRE2RegexMatcherBuilder,
};
use grep::printer::{
- default_color_specs, ColorSpecs, HyperlinkPattern, JSONBuilder,
- PathPrinter, PathPrinterBuilder, Standard, StandardBuilder, Stats,
- Summary, SummaryBuilder, SummaryKind, JSON,
+ default_color_specs, ColorSpecs, HyperlinkConfig, HyperlinkEnvironment,
+ HyperlinkFormat, JSONBuilder, PathPrinter, PathPrinterBuilder, Standard,
+ StandardBuilder, Stats, Summary, SummaryBuilder, SummaryKind, JSON,
};
use grep::regex::{
RegexMatcher as RustRegexMatcher,
@@ -236,7 +236,7 @@ impl Args {
let mut builder = PathPrinterBuilder::new();
builder
.color_specs(self.matches().color_specs()?)
- .hyperlink_pattern(self.matches().hyperlink_pattern()?)
+ .hyperlink(self.matches().hyperlink_config()?)
.separator(self.matches().path_separator()?)
.terminator(self.matches().path_terminator().unwrap_or(b'\n'));
Ok(builder.build(wtr))
@@ -774,7 +774,7 @@ impl ArgMatches {
let mut builder = StandardBuilder::new();
builder
.color_specs(self.color_specs()?)
- .hyperlink_pattern(self.hyperlink_pattern()?)
+ .hyperlink(self.hyperlink_config()?)
.stats(self.stats())
.heading(self.heading())
.path(self.with_filename(paths))
@@ -814,7 +814,7 @@ impl ArgMatches {
builder
.kind(self.summary_kind().expect("summary format"))
.color_specs(self.color_specs()?)
- .hyperlink_pattern(self.hyperlink_pattern()?)
+ .hyperlink(self.hyperlink_config()?)
.stats(self.stats())
.path(self.with_filename(paths))
.max_matches(self.max_count()?)
@@ -1126,11 +1126,21 @@ impl ArgMatches {
/// for the current system is used if the value is not set.
///
/// If an invalid pattern is provided, then an error is returned.
- fn hyperlink_pattern(&self) -> Result<HyperlinkPattern> {
- Ok(match self.value_of_lossy("hyperlink-format") {
- Some(pattern) => HyperlinkPattern::from_str(&pattern)?,
- None => HyperlinkPattern::default_file_scheme(),
- })
+ fn hyperlink_config(&self) -> Result<HyperlinkConfig> {
+ let mut env = HyperlinkEnvironment::new();
+ env.host(hostname(self.value_of_os("hostname-bin")))
+ .wsl_prefix(wsl_prefix());
+ let fmt = match self.value_of_lossy("hyperlink-format") {
+ None => HyperlinkFormat::from_str("default").unwrap(),
+ Some(format) => match HyperlinkFormat::from_str(&format) {
+ Ok(format) => format,
+ Err(err) => {
+ let msg = format!("invalid hyperlink format: {err}");
+ return Err(msg.into());
+ }
+ },
+ };
+ Ok(HyperlinkConfig::new(env, fmt))
}
/// Returns true if ignore files should be processed case insensitively.
@@ -1838,6 +1848,107 @@ fn current_dir() -> Result<PathBuf> {
.into())
}
+/// Retrieves the hostname that ripgrep should use wherever a hostname is
+/// required. Currently, that's just in the hyperlink format.
+///
+/// This works by first running the given binary program (if present and with
+/// no arguments) to get the hostname after trimming leading and trailing
+/// whitespace. If that fails for any reason, then it falls back to getting
+/// the hostname via platform specific means (e.g., `gethostname` on Unix).
+///
+/// The purpose of `bin` is to make it possible for end users to override how
+/// ripgrep determines the hostname.
+fn hostname(bin: Option<&OsStr>) -> Option<String> {
+ let Some(bin) = bin else { return platform_hostname() };
+ let bin = match grep::cli::resolve_binary(bin) {
+ Ok(bin) => bin,
+ Err(err) => {
+ log::debug!(
+ "failed to run command '{bin:?}' to get hostname \
+ (falling back to platform hostname): {err}",
+ );
+ return platform_hostname();
+ }
+ };
+ let mut cmd = process::Command::new(&bin);
+ cmd.stdin(process::Stdio::null());
+ let rdr = match grep::cli::CommandReader::new(&mut cmd) {
+ Ok(rdr) => rdr,
+ Err(err) => {
+ log::debug!(
+ "failed to spawn command '{bin:?}' to get \
+ hostname (falling back to platform hostname): {err}",
+ );
+ return platform_hostname();
+ }
+ };
+ let out = match io::read_to_string(rdr) {
+ Ok(out) => out,
+ Err(err) => {
+ log::debug!(
+ "failed to read output from command '{bin:?}' to get \
+ hostname (falling back to platform hostname): {err}",
+ );
+ return platform_hostname();
+ }
+ };
+ let hostname = out.trim();
+ if hostname.is_empty() {
+ log::debug!(
+ "output from command '{bin:?}' is empty after trimming \
+ leading and trailing whitespace (falling back to \
+ platform hostname)",
+ );
+ return platform_hostname();
+ }
+ Some(hostname.to_string())
+}
+
+/// Attempts to get the hostname by using platform specific routines. For
+/// example, this will do `gethostname` on Unix and `GetComputerNameExW` on
+/// Windows.
+fn platform_hostname() -> Option<String> {
+ let hostname_os = match grep::cli::hostname() {
+ Ok(x) => x,
+ Err(err) => {
+ log::debug!("could not get hostname: {}", err);
+ return None;
+ }
+ };
+ let Some(hostname) = hostname_os.to_str() else {
+ log::debug!(
+ "got hostname {:?}, but it's not valid UTF-8",
+ hostname_os
+ );
+ return None;
+ };
+ Some(hostname.to_string())
+}
+
+/// Returns a value that is meant to fill in the `{wslprefix}` variable for
+/// a user given hyperlink format. A WSL prefix is a share/network like thing
+/// that is meant to permit Windows applications to open files stored within
+/// a WSL drive.
+///
+/// If a WSL distro name is unavailable, not valid UTF-8 or this isn't running
+/// in a Unix environment, then this returns None.
+///
+/// See: <https://learn.microsoft.com/en-us/windows/wsl/filesystems>
+fn wsl_prefix() -> Option<String> {
+ if !cfg!(unix) {
+ return None;
+ }
+ let distro_os = env::var_os("WSL_DISTRO_NAME")?;
+ let Some(distro) = distro_os.to_str() else {
+ log::debug!(
+ "found WSL_DISTRO_NAME={:?}, but value is not UTF-8",
+ distro_os
+ );
+ return None;
+ };
+ Some(format!("wsl$/{distro}"))
+}
+
/// Tries to assign a timestamp to every `Subject` in the vector to help with
/// sorting Subjects by time.
fn load_timestamps<G>(
diff --git a/crates/printer/Cargo.toml b/crates/printer/Cargo.toml
index 69e03d65..dc63a6cc 100644
--- a/crates/printer/Cargo.toml
+++ b/crates/printer/Cargo.toml
@@ -21,9 +21,9 @@ serde = ["dep:base64", "dep:serde", "dep:serde_json"]
[dependencies]
base64 = { version = "0.21.4", optional = true }
bstr = "1.6.2"
-gethostname = "0.4.3"
grep-matcher = { version = "0.1.6", path = "../matcher" }
grep-searcher = { version = "0.1.11", path = "../searcher" }
+log = "0.4.5"
termcolor = "1.3.0"
serde = { version = "1.0.188", optional = true, features = ["derive"] }
serde_json = { version = "1.0.107", optional = true }
diff --git a/crates/printer/src/hyperlink.rs b/crates/printer/src/hyperlink.rs
index fa38b5c2..7e6be6e4 100644
--- a/crates/printer/src/hyperlink.rs
+++ b/crates/printer/src/hyperlink.rs
@@ -1,394 +1,710 @@
-use std::{
- io::{self, Write},
- path::Path,
-};
+use std::{cell::RefCell, io, path::Path, sync::Arc};
use {
bstr::ByteSlice,
termcolor::{HyperlinkSpec, WriteColor},
};
-use crate::hyperlink_aliases::HYPERLINK_PATTERN_ALIASES;
+use crate::hyperlink_aliases;
-/// A builder for `HyperlinkPattern`.
+/// Hyperlink configuration.
///
-/// Once a `HyperlinkPattern` is built, it is immutable.
-#[derive(Debug)]
-pub struct HyperlinkPatternBuilder {
- parts: Vec<Part>,
+/// This configuration specifies both the [hyperlink format](HyperlinkFormat)
+/// and an [environment](HyperlinkConfig) for interpolating a subset of
+/// variables. The specific subset includes variables that are intended to
+/// be invariant throughout the lifetime of a process, such as a machine's
+/// hostname.
+///
+/// A hyperlink configuration can be provided to printer builders such as
+/// [`StandardBuilder::hyperlink`](crate::StandardBuilder::hyperlink).
+#[derive(Clone, Debug, Default, Eq, PartialEq)]
+pub struct HyperlinkConfig(Arc<HyperlinkConfigInner>);
+
+#[derive(Clone, Debug, Default, Eq, PartialEq)]
+struct HyperlinkConfigInner {
+ env: HyperlinkEnvironment,
+ format: HyperlinkFormat,
}
-/// A hyperlink pattern with placeholders.
+impl HyperlinkConfig {
+ /// Create a new configuration from an environment and a format.
+ pub fn new(
+ env: HyperlinkEnvironment,
+ format: HyperlinkFormat,
+ ) -> HyperlinkConfig {
+ HyperlinkConfig(Arc::new(HyperlinkConfigInner { env, format }))
+ }
+
+ /// Returns the hyperlink environment in this configuration.
+ pub(crate) fn environment(&self) -> &HyperlinkEnvironment {
+ &self.0.env
+ }
+
+ /// Returns the hyperlink format in this configuration.
+ pub(crate) fn format(&self) -> &HyperlinkFormat {
+ &self.0.format
+ }
+}
+
+/// A hyperlink format with variables.
+///
+/// This can be created by parsing a string using `HyperlinkPattern::from_str`.
+///
+/// The default format is empty. An empty format is valid and effectively
+/// disables hyperlinks.
+///
+/// # Example
+///
+/// ```
+/// use grep_printer::HyperlinkFormat;
+///
+/// let fmt = "vscode".parse::<HyperlinkFormat>()?;
+/// assert_eq!(fmt.to_string(), "vscode://file{path}:{line}:{column}");
///
-/// This can be created with `HyperlinkPatternBuilder` or from a string
-/// using `HyperlinkPattern::from_str`.
+/// # Ok::<(), Box<dyn std::error::Error>>(())
+/// ```
#[derive(Clone, Debug, Default, Eq, PartialEq)]
-pub struct HyperlinkPattern {
+pub struct HyperlinkFormat {
parts: Vec<Part>,
is_line_dependent: bool,
}
-/// A hyperlink pattern part.
-#[derive(Clone, Debug, Eq, PartialEq)]
-enum Part {
- /// Static text. Can include invariant values such as the hostname.
- Text(Vec<u8>),
- /// Placeholder for the file path.
- File,
- /// Placeholder for the line number.
- Line,
- /// Placeholder for the column number.
- Column,
+impl HyperlinkFormat {
+ /// Creates an empty hyperlink format.
+ pub fn empty() -> HyperlinkFormat {
+ HyperlinkFormat::default()
+ }
+
+ /// Returns true if this format is empty.
+ pub fn is_empty(&self) -> bool {
+ self.parts.is_empty()
+ }
+
+ /// Creates a [`HyperlinkConfig`] from this format and the environment
+ /// given.
+ pub fn into_config(self, env: HyperlinkEnvironment) -> HyperlinkConfig {
+ HyperlinkConfig::new(env, self)
+ }
+
+ /// Returns true if the format can produce line-dependent hyperlinks.
+ pub(crate) fn is_line_dependent(&self) -> bool {
+ self.is_line_dependent
+ }
}
-/// An error that can occur when parsing a hyperlink pattern.
-#[derive(Clone, Debug, Eq, PartialEq)]
-pub enum HyperlinkPatternError {
- /// This occurs when the pattern syntax is not valid.
- InvalidSyntax,
- /// This occurs when the {file} placeholder is missing.
- NoFilePlaceholder,
- /// This occurs when the {line} placeholder is missing,
- /// while the {column} placeholder is present.
- NoLinePlaceholder,
- /// This occurs when an unknown placeholder is used.
- InvalidPlaceholder(String),
- /// The pattern doesn't start with a valid scheme.
- InvalidScheme,
+impl std::str::FromStr for HyperlinkFormat {
+ type Err = HyperlinkFormatError;
+
+ fn from_str(s: &str) -> Result<HyperlinkFormat, HyperlinkFormatError> {
+ use self::HyperlinkFormatErrorKind::*;
+
+ #[derive(Debug)]
+ enum State {
+ Verbatim,
+ VerbatimCloseVariable,
+ OpenVariable,
+ InVariable,
+ }
+
+ let mut builder = FormatBuilder::new();
+ let input = match hyperlink_aliases::find(s) {
+ Some(format) => format,
+ None => s,
+ };
+ let mut name = String::new();
+ let mut state = State::Verbatim;
+ let err = |kind| HyperlinkFormatError { kind };
+ for ch in input.chars() {
+ state = match state {
+ State::Verbatim => {
+ if ch == '{' {
+ State::OpenVariable
+ } else if ch == '}' {
+ State::VerbatimCloseVariable
+ } else {
+ builder.append_char(ch);
+ State::Verbatim
+ }
+ }
+ State::VerbatimCloseVariable => {
+ if ch == '}' {
+ builder.append_char('}');
+ State::Verbatim
+ } else {
+ return Err(err(InvalidCloseVariable));
+ }
+ }
+ State::OpenVariable => {
+ if ch == '{' {
+ builder.append_char('{');
+ State::Verbatim
+ } else {
+ name.clear();
+ if ch == '}' {
+ builder.append_var(&name)?;
+ State::Verbatim
+ } else {
+ name.push(ch);
+ State::InVariable
+ }
+ }
+ }
+ State::InVariable => {
+ if ch == '}' {
+ builder.append_var(&name)?;
+ State::Verbatim
+ } else {
+ name.push(ch);
+ State::InVariable
+ }
+ }
+ };
+ }
+ match state {
+ State::Verbatim => builder.build(),
+ State::VerbatimCloseVariable => Err(err(InvalidCloseVariable)),
+ State::OpenVariable | State::InVariable => {
+ Err(err(UnclosedVariable))
+ }
+ }
+ }
}
-/// The values to replace the pattern placeholders with.
-#[derive(Clone, Debug)]
-pub(crate) struct HyperlinkValues<'a> {
- file: &'a HyperlinkPath,
- line: u64,
- column: u64,
+impl std::fmt::Display for HyperlinkFormat {
+ fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+ for part in self.parts.iter() {
+ part.fmt(f)?;
+ }
+ Ok(())
+ }
}
-/// Represents the {file} part of a hyperlink.
+/// A static environment for hyperlink interpolation.
///
-/// This is the value to use as-is in the hyperlink, converted from an OS file
-/// path.
-#[derive(Clone, Debug)]
-pub(crate) struct HyperlinkPath(Vec<u8>);
+/// This environment permits setting the values of varibables used in hyperlink
+/// interpolation that are not expected to change for the lifetime of a program.
+/// That is, these values are invariant.
+///
+/// Currently, this includes the hostname and a WSL distro prefix.
+#[derive(Clone, Debug, Default, Eq, PartialEq)]
+pub struct HyperlinkEnvironment {
+ host: Option<String>,
+ wsl_prefix: Option<String>,
+}
-impl HyperlinkPatternBuilder {
- /// Creates a new hyperlink pattern builder.
- pub fn new() -> Self {
- Self { parts: vec![] }
+impl HyperlinkEnvironment {
+ /// Create a new empty hyperlink environment.
+ pub fn new() -> HyperlinkEnvironment {
+ HyperlinkEnvironment::default()
}
- /// Appends static text.
- pub fn append_text(&mut self, text: &[u8]) -> &mut Self {
- if let Some(Part::Text(contents)) = self.parts.last_mut() {
- contents.extend_from_slice(text);
- } else if !text.is_empty() {
- self.parts.push(Part::Text(text.to_vec()));
- }
+ /// Set the `{host}` variable, which fills in any hostname components of
+ /// a hyperlink.
+ ///
+ /// One can get the hostname in the current environment via the `hostname`
+ /// function in the `grep-cli` crate.
+ pub fn host(&mut self, host: Option<String>) -> &mut HyperlinkEnvironment {
+ self.host = host;
self
}
- /// Appends the hostname.
- ///
- /// On WSL, appends `wsl$/{distro}` instead.
- pub fn append_hostname(&mut self) -> &mut Self {
- self.append_text(Self::get_hostname().as_bytes())
+ /// Set the `{wslprefix}` variable, which contains the WSL distro prefix.
+ /// An example value is `wsl$/Ubuntu`. The distro name can typically be
+ /// discovered from the `WSL_DISTRO_NAME` environment variable.
+ pub fn wsl_prefix(
+ &mut self,
+ wsl_prefix: Option<String>,
+ ) -> &mut HyperlinkEnvironment {
+ self.wsl_prefix = wsl_prefix;
+ self
}
+}
- /// Returns the hostname to use in the pattern.
- ///
- /// On WSL, returns `wsl$/{distro}`.
- fn get_hostname() -> String {
- if cfg!(unix) {
- if let Ok(mut wsl_distro) = std::env::var("WSL_DISTRO_NAME") {
- wsl_distro.insert_str(0, "wsl$/");
- return wsl_distro;
+/// An error that can occur when parsing a hyperlink format.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct HyperlinkFormatError {
+ kind: HyperlinkFormatErrorKind,
+}
+
+#[derive(Clone, Debug, Eq, PartialEq)]
+enum HyperlinkFormatErrorKind {
+ /// This occurs when there are zero variables in the format.
+ NoVariables,
+ /// This occurs when the {path} variable is missing.
+ NoPathVariable,
+ /// This occurs when the {line} variable is missing, while the {column}
+ /// variable is present.
+ NoLineVariable,
+ /// This occurs when an unknown variable is used.
+ InvalidVariable(String),
+ /// The format doesn't start with a valid scheme.
+ InvalidScheme,
+ /// This occurs when an unescaped `}` is found without a corresponding
+ /// `{` preceding it.
+ InvalidCloseVariable,
+ /// This occurs when a `{` is found without a corresponding `}` following
+ /// it.
+ UnclosedVariable,
+}
+
+impl std::error::Error for HyperlinkFormatError {}
+
+impl std::fmt::Display for HyperlinkFormatError {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ use self::HyperlinkFormatErrorKind::*;
+
+ match self.kind {
+ NoVariables => {
+ let aliases = hyperlink_aliases::iter()
+ .map(|(name, _)| name)
+ .collect::<Vec<&str>>()
+ .join(", ");
+ write!(
+ f,
+ "at least a {{path}} variable is required in a \
+ hyperlink format, or otherwise use a valid alias: {}",
+ aliases,
+ )
+ }
+ NoPathVariable => {
+ write!(
+ f,
+ "the {{path}} variable is required in a hyperlink format",
+ )
+ }
+ NoLineVariable => {
+ write!(
+ f,
+ "the hyperlink format contains a {{column}} variable, \
+ but no {{line}} variable is present",
+ )
+ }
+ InvalidVariable(ref name) => {
+ write!(
+ f,
+ "invalid hyperlink format variable: '{name}', choose \
+ from: path, line, column, host",
+ )
+ }
+ InvalidScheme => {
+ write!(
+ f,
+ "the hyperlink format must start with a valid URL scheme, \
+ i.e., [0-9A-Za-z+-.]+:",
+ )
+ }
+ InvalidCloseVariable => {
+ write!(
+ f,
+ "unopened variable: found '}}' without a \
+ corresponding '{{' preceding it",
+ )
+ }
+ UnclosedVariable => {
+ write!(
+ f,
+ "unclosed variable: found '{{' without a \
+ corresponding '}}' following it",
+ )
}
}
-
- gethostname::gethostname().to_string_lossy().to_string()
}
+}
- /// Appends a placeholder for the file path.
- pub fn append_file(&mut self) -> &mut Self {
- self.parts.push(Part::File);
- self
+/// A builder for `HyperlinkPattern`.
+///
+/// Once a `HyperlinkPattern` is built, it is immutable.
+#[derive(Debug)]
+struct FormatBuilder {
+ parts: Vec<Part>,
+}
+
+impl FormatBuilder {
+ /// Creates a new hyperlink format builder.
+ fn new() -> FormatBuilder {
+ FormatBuilder { parts: vec![] }
}
- /// Appends a placeholder for the line number.
- pub fn append_line(&mut self) -> &mut Self {
- self.parts.push(Part::Line);
+ /// Appends static text.
+ fn append_slice(&mut self, text: &[u8]) -> &mut FormatBuilder {
+ if let Some(Part::Text(contents)) = self.parts.last_mut() {
+ contents.extend_from_slice(text);
+ } else if !text.is_empty() {
+ self.parts.push(Part::Text(text.to_vec()));
+ }
self
}
- /// Appends a placeholder for the column number.
- pub fn append_column(&mut self) -> &mut Self {
- self.parts.push(Part::Column);
- self
+ /// Appends a single character.
+ fn append_char(&mut self, ch: char) -> &mut FormatBuilder {
+ self.append_slice(ch.encode_utf8(&mut [0; 4]).as_bytes())
+ }
+
+ /// Appends a variable with the given name. If the name isn't recognized,
+ /// then this returns an error.
+ fn append_var(
+ &mut self,
+ name: &str,
+ ) -> Result<&mut FormatBuilder, HyperlinkFormatError> {
+ let part = match name {
+ "host" => Part::Host,
+ "wslprefix" => Part::WSLPrefix,
+ "path" => Part::Path,
+ "line" => Part::Line,
+ "column" => Part::Column,
+ unknown => {
+ let err = HyperlinkFormatError {
+ kind: HyperlinkFormatErrorKind::InvalidVariable(
+ unknown.to_string(),
+ ),
+ };
<