diff options
author | Andrew Gallant <jamslam@gmail.com> | 2023-09-22 14:57:44 -0400 |
---|---|---|
committer | Andrew Gallant <jamslam@gmail.com> | 2023-09-24 18:31:47 -0400 |
commit | a43232577a3034c55a6d05c8d587039a8da2a243 (patch) | |
tree | 1e3ff0cdddd57336c365977df0709d7a1894d48d | |
parent | f2da0d140433d8301659c1f648d5ec0c1b78687c (diff) |
progressag/hyperlinks
-rw-r--r-- | Cargo.lock | 69 | ||||
-rw-r--r-- | complete/_rg | 1 | ||||
-rw-r--r-- | crates/core/app.rs | 76 | ||||
-rw-r--r-- | crates/core/args.rs | 127 | ||||
-rw-r--r-- | crates/printer/Cargo.toml | 2 | ||||
-rw-r--r-- | crates/printer/src/hyperlink.rs | 1057 | ||||
-rw-r--r-- | crates/printer/src/hyperlink_aliases.rs | 88 | ||||
-rw-r--r-- | crates/printer/src/lib.rs | 5 | ||||
-rw-r--r-- | crates/printer/src/path.rs | 49 | ||||
-rw-r--r-- | crates/printer/src/standard.rs | 172 | ||||
-rw-r--r-- | crates/printer/src/summary.rs | 71 | ||||
-rw-r--r-- | crates/printer/src/util.rs | 128 |
12 files changed, 1096 insertions, 749 deletions
@@ -137,16 +137,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" [[package]] -name = "gethostname" -version = "0.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0176e0459c2e4a1fe232f984bca6890e681076abb9934f6cea7c326f3fc47818" -dependencies = [ - "libc", - "windows-targets", -] - -[[package]] name = "glob" version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -216,10 +206,10 @@ version = "0.1.7" dependencies = [ "base64", "bstr", - "gethostname", "grep-matcher", "grep-regex", "grep-searcher", + "log", "serde", "serde_json", "termcolor", @@ -621,60 +611,3 @@ name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" - -[[package]] -name = "windows-targets" -version = "0.48.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b1eb6f0cd7c80c79759c929114ef071b87354ce476d9d94271031c0497adfd5" -dependencies = [ - "windows_aarch64_gnullvm", - "windows_aarch64_msvc", - "windows_i686_gnu", - "windows_i686_msvc", - "windows_x86_64_gnu", - "windows_x86_64_gnullvm", - "windows_x86_64_msvc", -] - -[[package]] -name = "windows_aarch64_gnullvm" -version = "0.48.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91ae572e1b79dba883e0d315474df7305d12f569b400fcf90581b06062f7e1bc" - -[[package]] -name = "windows_aarch64_msvc" -version = "0.48.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2ef27e0d7bdfcfc7b868b317c1d32c641a6fe4629c171b8928c7b08d98d7cf3" - -[[package]] -name = "windows_i686_gnu" -version = "0.48.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "622a1962a7db830d6fd0a69683c80a18fda201879f0f447f065a3b7467daa241" - -[[package]] -name = "windows_i686_msvc" -version = "0.48.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4542c6e364ce21bf45d69fdd2a8e455fa38d316158cfd43b3ac1c5b1b19f8e00" - -[[package]] -name = "windows_x86_64_gnu" -version = "0.48.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ca2b8a661f7628cbd23440e50b05d705db3686f894fc9580820623656af974b1" - -[[package]] -name = "windows_x86_64_gnullvm" -version = "0.48.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7896dbc1f41e08872e9d5e8f8baa8fdd2677f29468c4e156210174edc7f7b953" - -[[package]] -name = "windows_x86_64_msvc" -version = "0.48.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a515f5799fe4961cb532f983ce2b23082366b898e52ffbce459c86f67c8378a" diff --git a/complete/_rg b/complete/_rg index be8d18ba..7fd6c542 100644 --- a/complete/_rg +++ b/complete/_rg @@ -305,6 +305,7 @@ _rg() { '--debug[show debug messages]' '--field-context-separator[set string to delimit fields in context lines]' '--field-match-separator[set string to delimit fields in matching lines]' + '--hostname-bin=[executable for getting system hostname]:hostname executable:_command_names -e' '--hyperlink-format=[specify pattern for hyperlinks]:pattern' '--trace[show more verbose debug messages]' '--dfa-size-limit=[specify upper size limit of generated DFA]:DFA size (bytes)' diff --git a/crates/core/app.rs b/crates/core/app.rs index 9c523479..7d0f3baa 100644 --- a/crates/core/app.rs +++ b/crates/core/app.rs @@ -580,6 +580,7 @@ pub fn all_args_and_flags() -> Vec<RGArg> { flag_glob_case_insensitive(&mut args); flag_heading(&mut args); flag_hidden(&mut args); + flag_hostname_bin(&mut args); flag_hyperlink_format(&mut args); flag_iglob(&mut args); flag_ignore_case(&mut args); @@ -1495,19 +1496,80 @@ This flag can be disabled with --no-hidden. args.push(arg); } +fn flag_hostname_bin(args: &mut Vec<RGArg>) { + const SHORT: &str = "Run a program to get this system's hostname."; + const LONG: &str = long!( + "\ +This flag controls how ripgrep determines this system's hostname. The flag's +value should correspond to an executable (either a path or something that can +be found via your system's *PATH* environment variable). When set, ripgrep will +run this executable, with no arguments, and treat its output (with leading and +trailing whitespace stripped) as your system's hostname. + +When not set (the default, or the empty string), ripgrep will try to +automatically detect your system's hostname. On Unix, this corresponds +to calling *gethostname*. On Windows, this corresponds to calling +*GetComputerNameExW* to fetch the system's \"physical DNS hostname.\" + +ripgrep uses your system's hostname for producing hyperlinks. +" + ); + let arg = + RGArg::flag("hostname-bin", "COMMAND").help(SHORT).long_help(LONG); + args.push(arg); +} + fn flag_hyperlink_format(args: &mut Vec<RGArg>) { const SHORT: &str = "Set the format of hyperlinks to match results."; const LONG: &str = long!( "\ -Set the format of hyperlinks to match results. This defines a pattern which -can contain the following placeholders: {file}, {line}, {column}, and {host}. -An empty pattern or 'none' disables hyperlinks. +Set the format of hyperlinks to match results. Hyperlinks make certain elements +of ripgrep's output, such as file paths, clickable. This generally only works +in terminal emulators that support OSC-8 hyperlinks. For example, the format +*file://{host}{file}* will emit an RFC 8089 hyperlink. + +The following variables are available in the format string: + +*{path}*: Required. This is replaced with a path to a matching file. The +path is guaranteed to be absolute and percent encoded such that it is valid to +put into a URI. Note that a path is guaranteed to start with a */*. + +*{host}*: Optional. This is replaced with your system's hostname. On Unix, +this corresponds to calling *gethostname*. On Windows, this corresponds to +calling *GetComputerNameExW* to fetch the system's \"physical DNS hostname.\" +Alternatively, if --hostname-bin was provided, then the hostname returned from +the output of that program will be returned. If no hostname could be found, +then this variable is replaced with the empty string. + +*{line}*: Optional. If appropriate, this is replaced with the line number of +a match. If no line number is available (for example, if --no-line-number was +given), then it is automatically replaced with the value *1*. + +*{column}*: Optional, but requires the presence of **{line}**. If appropriate, +this is replaced with the column number of a match. If no column number is +available (for example, if --no-column was given), then it is automatically +replaced with the value *1*. + +*{wslprefix}*: Optional. This is a special value that is set to +*wsl$/WSL_DISTRO_NAME*, where *WSL_DISTRO_NAME* corresponds to the value of +the equivalent environment variable. If the system is not Unix or if the +*WSL_DISTRO_NAME* environment variable is not set, then this is replaced with +the empty string. + +Alternatively, a format string may correspond to one of the following +aliases: default, file, grep+, kitty, macvim, none, subl, textmate, vscode, +vscode-insiders, vscodium. + +A format string may be empty. An empty format string is equivalent to the +*none* alias. In this case, hyperlinks will be disabled. -The {file} placeholder is required, and will be replaced with the absolute -file path with a few adjustments: The leading '/' on Unix is removed, -and '\\' is replaced with '/' on Windows. +At present, the default format when ripgrep detects a tty on stdout all systems +is *default*. This is an alias that expands to *file://{host}{path}* on Unix +and *file://{path}* on Windows. When stdout is not a tty, then the default +format behaves as if it were *none*. That is, hyperlinks are disabled. -As an example, the default pattern on Unix systems is: 'file://{host}/{file}' +For more information, see: +https://gist.github.com/egmontkob/eb114294efbcd5adb1944c9f3cb5feda " ); let arg = diff --git a/crates/core/args.rs b/crates/core/args.rs index 0f8d1f18..cc6b4592 100644 --- a/crates/core/args.rs +++ b/crates/core/args.rs @@ -18,9 +18,9 @@ use grep::pcre2::{ RegexMatcherBuilder as PCRE2RegexMatcherBuilder, }; use grep::printer::{ - default_color_specs, ColorSpecs, HyperlinkPattern, JSONBuilder, - PathPrinter, PathPrinterBuilder, Standard, StandardBuilder, Stats, - Summary, SummaryBuilder, SummaryKind, JSON, + default_color_specs, ColorSpecs, HyperlinkConfig, HyperlinkEnvironment, + HyperlinkFormat, JSONBuilder, PathPrinter, PathPrinterBuilder, Standard, + StandardBuilder, Stats, Summary, SummaryBuilder, SummaryKind, JSON, }; use grep::regex::{ RegexMatcher as RustRegexMatcher, @@ -236,7 +236,7 @@ impl Args { let mut builder = PathPrinterBuilder::new(); builder .color_specs(self.matches().color_specs()?) - .hyperlink_pattern(self.matches().hyperlink_pattern()?) + .hyperlink(self.matches().hyperlink_config()?) .separator(self.matches().path_separator()?) .terminator(self.matches().path_terminator().unwrap_or(b'\n')); Ok(builder.build(wtr)) @@ -774,7 +774,7 @@ impl ArgMatches { let mut builder = StandardBuilder::new(); builder .color_specs(self.color_specs()?) - .hyperlink_pattern(self.hyperlink_pattern()?) + .hyperlink(self.hyperlink_config()?) .stats(self.stats()) .heading(self.heading()) .path(self.with_filename(paths)) @@ -814,7 +814,7 @@ impl ArgMatches { builder .kind(self.summary_kind().expect("summary format")) .color_specs(self.color_specs()?) - .hyperlink_pattern(self.hyperlink_pattern()?) + .hyperlink(self.hyperlink_config()?) .stats(self.stats()) .path(self.with_filename(paths)) .max_matches(self.max_count()?) @@ -1126,11 +1126,15 @@ impl ArgMatches { /// for the current system is used if the value is not set. /// /// If an invalid pattern is provided, then an error is returned. - fn hyperlink_pattern(&self) -> Result<HyperlinkPattern> { - Ok(match self.value_of_lossy("hyperlink-format") { - Some(pattern) => HyperlinkPattern::from_str(&pattern)?, - None => HyperlinkPattern::default_file_scheme(), - }) + fn hyperlink_config(&self) -> Result<HyperlinkConfig> { + let mut env = HyperlinkEnvironment::new(); + env.host(hostname(self.value_of_os("hostname-bin"))) + .wsl_prefix(wsl_prefix()); + let fmt = match self.value_of_lossy("hyperlink-format") { + None => HyperlinkFormat::from_str("default")?, + Some(format) => HyperlinkFormat::from_str(&format)?, + }; + Ok(HyperlinkConfig::new(env, fmt)) } /// Returns true if ignore files should be processed case insensitively. @@ -1838,6 +1842,107 @@ fn current_dir() -> Result<PathBuf> { .into()) } +/// Retrieves the hostname that ripgrep should use wherever a hostname is +/// required. Currently, that's just in the hyperlink format. +/// +/// This works by first running the given binary program (if present and with +/// no arguments) to get the hostname after trimming leading and trailing +/// whitespace. If that fails for any reason, then it falls back to getting +/// the hostname via platform specific means (e.g., `gethostname` on Unix). +/// +/// The purpose of `bin` is to make it possible for end users to override how +/// ripgrep determines the hostname. +fn hostname(bin: Option<&OsStr>) -> Option<String> { + let Some(bin) = bin else { return platform_hostname() }; + let bin = match grep::cli::resolve_binary(bin) { + Ok(bin) => bin, + Err(err) => { + log::debug!( + "failed to run command '{bin:?}' to get hostname \ + (falling back to platform hostname): {err}", + ); + return platform_hostname(); + } + }; + let mut cmd = process::Command::new(&bin); + cmd.stdin(process::Stdio::null()); + let rdr = match grep::cli::CommandReader::new(&mut cmd) { + Ok(rdr) => rdr, + Err(err) => { + log::debug!( + "failed to spawn command '{bin:?}' to get \ + hostname (falling back to platform hostname): {err}", + ); + return platform_hostname(); + } + }; + let out = match io::read_to_string(rdr) { + Ok(out) => out, + Err(err) => { + log::debug!( + "failed to read output from command '{bin:?}' to get \ + hostname (falling back to platform hostname): {err}", + ); + return platform_hostname(); + } + }; + let hostname = out.trim(); + if hostname.is_empty() { + log::debug!( + "output from command '{bin:?}' is empty after trimming \ + leading and trailing whitespace (falling back to \ + platform hostname)", + ); + return platform_hostname(); + } + Some(hostname.to_string()) +} + +/// Attempts to get the hostname by using platform specific routines. For +/// example, this will do `gethostname` on Unix and `GetComputerNameExW` on +/// Windows. +fn platform_hostname() -> Option<String> { + let hostname_os = match grep::cli::hostname() { + Ok(x) => x, + Err(err) => { + log::debug!("could not get hostname: {}", err); + return None; + } + }; + let Some(hostname) = hostname_os.to_str() else { + log::debug!( + "got hostname {:?}, but it's not valid UTF-8", + hostname_os + ); + return None; + }; + Some(hostname.to_string()) +} + +/// Returns a value that is meant to fill in the `{wslprefix}` variable for +/// a user given hyperlink format. A WSL prefix is a share/network like thing +/// that is meant to permit Windows applications to open files stored within +/// a WSL drive. +/// +/// If a WSL distro name is unavailable, not valid UTF-8 or this isn't running +/// in a Unix environment, then this returns None. +/// +/// See: <https://learn.microsoft.com/en-us/windows/wsl/filesystems> +fn wsl_prefix() -> Option<String> { + if !cfg!(unix) { + return None; + } + let distro_os = env::var_os("WSL_DISTRO_NAME")?; + let Some(distro) = distro_os.to_str() else { + log::debug!( + "found WSL_DISTRO_NAME={:?}, but value is not UTF-8", + distro_os + ); + return None; + }; + Some(format!("wsl$/{distro}")) +} + /// Tries to assign a timestamp to every `Subject` in the vector to help with /// sorting Subjects by time. fn load_timestamps<G>( diff --git a/crates/printer/Cargo.toml b/crates/printer/Cargo.toml index 69e03d65..dc63a6cc 100644 --- a/crates/printer/Cargo.toml +++ b/crates/printer/Cargo.toml @@ -21,9 +21,9 @@ serde = ["dep:base64", "dep:serde", "dep:serde_json"] [dependencies] base64 = { version = "0.21.4", optional = true } bstr = "1.6.2" -gethostname = "0.4.3" grep-matcher = { version = "0.1.6", path = "../matcher" } grep-searcher = { version = "0.1.11", path = "../searcher" } +log = "0.4.5" termcolor = "1.3.0" serde = { version = "1.0.188", optional = true, features = ["derive"] } serde_json = { version = "1.0.107", optional = true } diff --git a/crates/printer/src/hyperlink.rs b/crates/printer/src/hyperlink.rs index fa38b5c2..2fb8a61e 100644 --- a/crates/printer/src/hyperlink.rs +++ b/crates/printer/src/hyperlink.rs @@ -1,85 +1,275 @@ -use std::{ - io::{self, Write}, - path::Path, -}; +use std::{cell::RefCell, io, path::Path, sync::Arc}; use { bstr::ByteSlice, termcolor::{HyperlinkSpec, WriteColor}, }; -use crate::hyperlink_aliases::HYPERLINK_PATTERN_ALIASES; +use crate::hyperlink_aliases; -/// A builder for `HyperlinkPattern`. +/// Hyperlink configuration. /// -/// Once a `HyperlinkPattern` is built, it is immutable. -#[derive(Debug)] -pub struct HyperlinkPatternBuilder { - parts: Vec<Part>, +/// This configuration specifies both the [hyperlink format](HyperlinkFormat) +/// and an [environment](HyperlinkConfig) for interpolating a subset of +/// variables. The specific subset includes variables that are intended to +/// be invariant throughout the lifetime of a process, such as a machine's +/// hostname. +/// +/// A hyperlink configuration can be provided to printer builders such as +/// [`StandardBuilder::hyperlink`](crate::StandardBuilder::hyperlink). +#[derive(Clone, Debug, Default, Eq, PartialEq)] +pub struct HyperlinkConfig(Arc<HyperlinkConfigInner>); + +#[derive(Clone, Debug, Default, Eq, PartialEq)] +struct HyperlinkConfigInner { + env: HyperlinkEnvironment, + format: HyperlinkFormat, +} + +impl HyperlinkConfig { + /// Create a new configuration from an environment and a format. + pub fn new( + env: HyperlinkEnvironment, + format: HyperlinkFormat, + ) -> HyperlinkConfig { + HyperlinkConfig(Arc::new(HyperlinkConfigInner { env, format })) + } + + /// Returns the hyperlink environment in this configuration. + pub(crate) fn environment(&self) -> &HyperlinkEnvironment { + &self.0.env + } + + /// Returns the hyperlink format in this configuration. + pub(crate) fn format(&self) -> &HyperlinkFormat { + &self.0.format + } } -/// A hyperlink pattern with placeholders. +/// A hyperlink format with variables. +/// +/// This can be created by parsing a string using `HyperlinkPattern::from_str`. +/// +/// The default format is empty. An empty format is valid and effectively +/// disables hyperlinks. +/// +/// # Example +/// +/// ``` +/// use grep_printer::HyperlinkFormat; /// -/// This can be created with `HyperlinkPatternBuilder` or from a string -/// using `HyperlinkPattern::from_str`. +/// let fmt = "vscode".parse::<HyperlinkFormat>()?; +/// assert_eq!(fmt.to_string(), "vscode://file{path}:{line}:{column}"); +/// +/// # Ok::<(), Box<dyn std::error::Error>>(()) +/// ``` #[derive(Clone, Debug, Default, Eq, PartialEq)] -pub struct HyperlinkPattern { +pub struct HyperlinkFormat { parts: Vec<Part>, is_line_dependent: bool, } -/// A hyperlink pattern part. -#[derive(Clone, Debug, Eq, PartialEq)] -enum Part { - /// Static text. Can include invariant values such as the hostname. - Text(Vec<u8>), - /// Placeholder for the file path. - File, - /// Placeholder for the line number. - Line, - /// Placeholder for the column number. - Column, +impl HyperlinkFormat { + /// Creates an empty hyperlink format. + pub fn empty() -> HyperlinkFormat { + HyperlinkFormat::default() + } + + /// Returns true if this format is empty. + pub fn is_empty(&self) -> bool { + self.parts.is_empty() + } + + /// Creates a [`HyperlinkConfig`] from this format and the environment + /// given. + pub fn into_config(self, env: HyperlinkEnvironment) -> HyperlinkConfig { + HyperlinkConfig::new(env, self) + } + + /// Returns true if the format can produce line-dependent hyperlinks. + pub(crate) fn is_line_dependent(&self) -> bool { + self.is_line_dependent + } } -/// An error that can occur when parsing a hyperlink pattern. +impl std::str::FromStr for HyperlinkFormat { + type Err = HyperlinkFormatError; + + fn from_str(s: &str) -> Result<HyperlinkFormat, HyperlinkFormatError> { + let mut builder = FormatBuilder::new(); + let mut input = match hyperlink_aliases::find(s) { + Some(format) => format.as_bytes(), + None => s.as_bytes(), + }; + + while !input.is_empty() { + if input[0] == b'{' { + let end = input + .find_byte(b'}') + .ok_or(HyperlinkFormatError::InvalidSyntax)?; + + match &input[1..end] { + b"host" => builder.append_hostname(), + b"wslprefix" => builder.append_wsl_prefix(), + b"path" => builder.append_path(), + b"line" => builder.append_line(), + b"column" => builder.append_column(), + other => { + return Err(HyperlinkFormatError::InvalidVariable( + String::from_utf8_lossy(other).to_string(), + )) + } + }; + + input = &input[(end + 1)..]; + } else { + // Static text + let end = input.find_byte(b'{').unwrap_or(input.len()); + builder.append_text(&input[..end]); + input = &input[end..]; + } + } + + builder.build() + } +} + +impl std::fmt::Display for HyperlinkFormat { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + for part in self.parts.iter() { + part.fmt(f)?; + } + Ok(()) + } +} + +/// A static environment for hyperlink interpolation. +/// +/// This environment permits setting the values of varibables used in hyperlink +/// interpolation that are not expected to change for the lifetime of a program. +/// That is, these values are invariant. +/// +/// Currently, this includes the hostname and a WSL distro prefix. +#[derive(Clone, Debug, Default, Eq, PartialEq)] +pub struct HyperlinkEnvironment { + host: Option<String>, + wsl_prefix: Option<String>, +} + +impl HyperlinkEnvironment { + /// Create a new empty hyperlink environment. + pub fn new() -> HyperlinkEnvironment { + HyperlinkEnvironment::default() + } + + /// Set the `{host}` variable, which fills in any hostname components of + /// a hyperlink. + /// + /// One can get the hostname in the current environment via the `hostname` + /// function in the `grep-cli` crate. + pub fn host(&mut self, host: Option<String>) -> &mut HyperlinkEnvironment { + self.host = host; + self + } + + /// Set the `{wslprefix}` variable, which contains the WSL distro prefix. + /// An example value is `wsl$/Ubuntu`. The distro name can typically be + /// discovered from the `WSL_DISTRO_NAME` environment variable. + pub fn wsl_prefix( + &mut self, + wsl_prefix: Option<String>, + ) -> &mut HyperlinkEnvironment { + self.wsl_prefix = wsl_prefix; + self + } +} + +/// An error that can occur when parsing a hyperlink format. #[derive(Clone, Debug, Eq, PartialEq)] -pub enum HyperlinkPatternError { - /// This occurs when the pattern syntax is not valid. +pub enum HyperlinkFormatError { + /// This occurs when the format syntax is not valid. InvalidSyntax, - /// This occurs when the {file} placeholder is missing. - NoFilePlaceholder, - /// This occurs when the {line} placeholder is missing, - /// while the {column} placeholder is present. - NoLinePlaceholder, - /// This occurs when an unknown placeholder is used. - InvalidPlaceholder(String), - /// The pattern doesn't start with a valid scheme. + /// This occurs when there are zero variables in the format. + NoVariables, + /// This occurs when the {path} variable is missing. + NoPathVariable, + /// This occurs when the {line} variable is missing, while the {column} + /// variable is present. + NoLineVariable, + /// This occurs when an unknown variable is used. + InvalidVariable(String), + /// The format doesn't start with a valid scheme. InvalidScheme, } -/// The values to replace the pattern placeholders with. -#[derive(Clone, Debug)] -pub(crate) struct HyperlinkValues<'a> { - file: &'a HyperlinkPath, - line: u64, - column: u64, +impl std::error::Error for HyperlinkFormatError {} + +impl std::fmt::Display for HyperlinkFormatError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + HyperlinkFormatError::InvalidSyntax => { + write!(f, "invalid hyperlink format syntax") + } + HyperlinkFormatError::NoVariables => { + let aliases = hyperlink_aliases::iter() + .map(|(name, _)| name) + .collect::<Vec<&str>>() + .join(", "); + write!( + f, + "at least a {{path}} variable is required in a \ + hyperlink format, or otherwise use a valid alias: {}", + aliases, + ) + } + HyperlinkFormatError::NoPathVariable => { + write!( + f, + "the {{path}} variable is required in a hyperlink format", + ) + } + HyperlinkFormatError::NoLineVariable => { + write!( + f, + "the hyperlink format contains a {{column}} variable, \ + but no {{line}} variable is present", + ) + } + HyperlinkFormatError::InvalidVariable(name) => { + write!( + f, + "invalid hyperlink format variable: '{}', choose \ + from: path, line, column, host", + name + ) + } + HyperlinkFormatError::InvalidScheme => { + write!( + f, + "the hyperlink format must start with a valid URL scheme, \ + i.e., [0-9A-Za-z+-.]+:", + ) + } + } + } } -/// Represents the {file} part of a hyperlink. +/// A builder for `HyperlinkPattern`. /// -/// This is the value to use as-is in the hyperlink, converted from an OS file -/// path. -#[derive(Clone, Debug)] -pub(crate) struct HyperlinkPath(Vec<u8>); +/// Once a `HyperlinkPattern` is built, it is immutable. +#[derive(Debug)] +struct FormatBuilder { + parts: Vec<Part>, +} -impl HyperlinkPatternBuilder { - /// Creates a new hyperlink pattern builder. - pub fn new() -> Self { - Self { parts: vec![] } +impl FormatBuilder { + /// Creates a new hyperlink format builder. + fn new() -> FormatBuilder { + FormatBuilder { parts: vec![] } } /// Appends static text. - pub fn append_text(&mut self, text: &[u8]) -> &mut Self { + fn append_text(&mut self, text: &[u8]) -> &mut FormatBuilder { if let Some(Part::Text(contents)) = self.parts.last_mut() { contents.extend_from_slice(text); } else if !text.is_empty() { @@ -88,307 +278,330 @@ impl HyperlinkPatternBuilder { self } - /// Appends the hostname. - /// - /// On WSL, appends `wsl$/{distro}` instead. - pub fn append_hostname(&mut self) -> &mut Self { - self.append_text(Self::get_hostname().as_bytes()) + /// Appends a variable for the hostname. + fn append_hostname(&mut self) -> &mut FormatBuilder { + self.parts.push(Part::Host); + self } - /// Returns the hostname to use in the pattern. - /// - /// On WSL, returns `wsl$/{distro}`. - fn get_hostname() -> String { - if cfg!(unix) { - if let Ok(mut wsl_distro) = std::env::var("WSL_DISTRO_NAME") { - wsl_distro.insert_str(0, "wsl$/"); - return wsl_distro; - } - } - - gethostname::gethostname().to_string_lossy().to_string() + /// Appends a variable for a WSL path prefix. + fn append_wsl_prefix(&mut self) -> &mut FormatBuilder { + self.parts.push(Part::WSLPrefix); + self } - /// Appends a placeholder for the file path. - pub fn append_file(&mut self) -> &mut Self { - self.parts.push(Part::File); + /// Appends a variable for the file path. + fn append_path(&mut self) -> &mut FormatBuilder { + self.parts.push(Part::Path); self } - /// Appends a placeholder for the line number. - pub fn append_line(&mut self) -> &mut Self { + /// Appends a variable for the line number. + fn append_line(&mut self) -> &mut FormatBuilder { self.parts.push(Part::Line); self } - /// Appends a placeholder for the column number. - pub fn append_column(&mut self) -> &mut Self { + /// Appends a variable for the column number. + fn append_column(&mut self) -> &mut FormatBuilder { self.parts.push(Part::Column); self } - /// Builds the pattern. - pub fn build(&self) -> Result<HyperlinkPattern, HyperlinkPatternError> { + /// Builds the format. + fn build(&self) -> Result<HyperlinkFormat, HyperlinkFormatError> { self.validate()?; - Ok(HyperlinkPattern { + Ok(HyperlinkFormat { parts: self.parts.clone(), is_line_dependent: self.parts.contains(&Part::Line), }) } - /// Validate that the pattern is well-formed. - fn validate(&self) -> Result<(), HyperlinkPatternError> { + /// Validate that the format is well-formed. + fn validate(&self) -> Result<(), HyperlinkFormatError> { + // An empty format is fine. It just means hyperlink support is + // disabled. if self.parts.is_empty() { return Ok(()); } - - if !self.parts.contains(&Part::File) { - return Err(HyperlinkPatternError::NoFilePlaceholder); + // If all parts are just text, then there are no variables. It's + // likely a reference to invalid alias. + if self.parts.iter().all(|p| matches!(*p, Part::Text(_))) { + return Err(HyperlinkFormatError::NoVariables); } - + // Even if we have other variables, no path variable means the + // hyperlink can't possibly work the way it is intended. + if !self.parts.contains(&Part::Path) { + return Err(HyperlinkFormatError::NoPathVariable); + } + // If the {column} variable is used, then we also need a {line} + // variable or else {column} can't possibly work. if self.parts.contains(&Part::Column) && !self.parts.contains(&Part::Line) { - return Err(HyperlinkPatternError::NoLinePlaceholder); + return Err(HyperlinkFormatError::NoLineVariable); } self.validate_scheme() } - /// Validate that the pattern starts with a valid scheme. + /// Validate that the format starts with a valid scheme. Validation is done + /// according to how a scheme is defined in RFC 1738 sections 2.1[1] and + /// 5[2]. In short, a scheme is this: /// - /// A valid scheme starts with an alphabetic character, continues with - /// a sequence of alphanumeric characters, periods, hyphens or plus signs, - /// and ends with a colon. - fn validate_scheme(&self) -> Result<(), HyperlinkPatternError> { - if let Some(Part::Text(value)) = self.parts.first() { - if let Some(colon_index) = value.find_byte(b':') { - if value[0].is_ascii_alphabetic() - && value.iter().take(colon_index).all(|c| { - c.is_ascii_alphanumeric() - || matches!(c, b'.' | b'-' | b'+') - }) - { - return Ok(()); - } + /// scheme = 1*[ lowalpha | digit | "+" | "-" | "." ] + /// + /// but is case insensitive. + /// + /// [1]: https://datatracker.ietf.org/doc/html/rfc1738#section-2.1 + /// [2]: https://datatracker.ietf.org/doc/html/rfc1738#section-5 + fn validate_scheme(&self) -> Result<(), HyperlinkFormatError> { + let Some(Part::Text(ref part)) = self.parts.first() else { + return Err(HyperlinkFormatError::InvalidScheme); + }; + let Some(colon) = part.find_byte(b':') else { + return Err(HyperlinkFormatError::InvalidScheme); + }; + let scheme = &part[..colon]; + if scheme.is_empty() { + return Err(HyperlinkFormatError::InvalidScheme); + } + let is_valid_scheme_char = |byte| match byte { + b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z' | b'+' | b'-' | b'.' => { + true } + _ => false, + }; + if !scheme.iter().all(|&b| is_valid_scheme_char(b)) { + return Err(HyperlinkFormatError::InvalidScheme); } - - Err(HyperlinkPatternError::InvalidScheme) + Ok(()) } } -impl HyperlinkPattern { - /// Creates an empty hyperlink pattern. - pub fn empty() -> Self { - HyperlinkPattern::default() - } +/// A hyperlink format part. +#[derive(Clone, Debug, Eq, PartialEq)] +enum Part { + /// Static text. |