From a43232577a3034c55a6d05c8d587039a8da2a243 Mon Sep 17 00:00:00 2001 From: Andrew Gallant Date: Fri, 22 Sep 2023 14:57:44 -0400 Subject: progress --- Cargo.lock | 69 +- complete/_rg | 1 + crates/core/app.rs | 76 ++- crates/core/args.rs | 127 +++- crates/printer/Cargo.toml | 2 +- crates/printer/src/hyperlink.rs | 1057 ++++++++++++++++++------------- crates/printer/src/hyperlink_aliases.rs | 88 ++- crates/printer/src/lib.rs | 5 +- crates/printer/src/path.rs | 49 +- crates/printer/src/standard.rs | 172 ++--- crates/printer/src/summary.rs | 71 ++- crates/printer/src/util.rs | 128 ++-- 12 files changed, 1096 insertions(+), 749 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 885e73d3..6029cc1a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -136,16 +136,6 @@ version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" -[[package]] -name = "gethostname" -version = "0.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0176e0459c2e4a1fe232f984bca6890e681076abb9934f6cea7c326f3fc47818" -dependencies = [ - "libc", - "windows-targets", -] - [[package]] name = "glob" version = "0.3.1" @@ -216,10 +206,10 @@ version = "0.1.7" dependencies = [ "base64", "bstr", - "gethostname", "grep-matcher", "grep-regex", "grep-searcher", + "log", "serde", "serde_json", "termcolor", @@ -621,60 +611,3 @@ name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" - -[[package]] -name = "windows-targets" -version = "0.48.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b1eb6f0cd7c80c79759c929114ef071b87354ce476d9d94271031c0497adfd5" -dependencies = [ - "windows_aarch64_gnullvm", - "windows_aarch64_msvc", - "windows_i686_gnu", - "windows_i686_msvc", - "windows_x86_64_gnu", - "windows_x86_64_gnullvm", - "windows_x86_64_msvc", -] - -[[package]] -name = "windows_aarch64_gnullvm" -version = "0.48.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91ae572e1b79dba883e0d315474df7305d12f569b400fcf90581b06062f7e1bc" - -[[package]] -name = "windows_aarch64_msvc" -version = "0.48.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2ef27e0d7bdfcfc7b868b317c1d32c641a6fe4629c171b8928c7b08d98d7cf3" - -[[package]] -name = "windows_i686_gnu" -version = "0.48.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "622a1962a7db830d6fd0a69683c80a18fda201879f0f447f065a3b7467daa241" - -[[package]] -name = "windows_i686_msvc" -version = "0.48.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4542c6e364ce21bf45d69fdd2a8e455fa38d316158cfd43b3ac1c5b1b19f8e00" - -[[package]] -name = "windows_x86_64_gnu" -version = "0.48.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ca2b8a661f7628cbd23440e50b05d705db3686f894fc9580820623656af974b1" - -[[package]] -name = "windows_x86_64_gnullvm" -version = "0.48.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7896dbc1f41e08872e9d5e8f8baa8fdd2677f29468c4e156210174edc7f7b953" - -[[package]] -name = "windows_x86_64_msvc" -version = "0.48.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a515f5799fe4961cb532f983ce2b23082366b898e52ffbce459c86f67c8378a" diff --git a/complete/_rg b/complete/_rg index be8d18ba..7fd6c542 100644 --- a/complete/_rg +++ b/complete/_rg @@ -305,6 +305,7 @@ _rg() { '--debug[show debug messages]' '--field-context-separator[set string to delimit fields in context lines]' '--field-match-separator[set string to delimit fields in matching lines]' + '--hostname-bin=[executable for getting system hostname]:hostname executable:_command_names -e' '--hyperlink-format=[specify pattern for hyperlinks]:pattern' '--trace[show more verbose debug messages]' '--dfa-size-limit=[specify upper size limit of generated DFA]:DFA size (bytes)' diff --git a/crates/core/app.rs b/crates/core/app.rs index 9c523479..7d0f3baa 100644 --- a/crates/core/app.rs +++ b/crates/core/app.rs @@ -580,6 +580,7 @@ pub fn all_args_and_flags() -> Vec { flag_glob_case_insensitive(&mut args); flag_heading(&mut args); flag_hidden(&mut args); + flag_hostname_bin(&mut args); flag_hyperlink_format(&mut args); flag_iglob(&mut args); flag_ignore_case(&mut args); @@ -1495,19 +1496,80 @@ This flag can be disabled with --no-hidden. args.push(arg); } +fn flag_hostname_bin(args: &mut Vec) { + const SHORT: &str = "Run a program to get this system's hostname."; + const LONG: &str = long!( + "\ +This flag controls how ripgrep determines this system's hostname. The flag's +value should correspond to an executable (either a path or something that can +be found via your system's *PATH* environment variable). When set, ripgrep will +run this executable, with no arguments, and treat its output (with leading and +trailing whitespace stripped) as your system's hostname. + +When not set (the default, or the empty string), ripgrep will try to +automatically detect your system's hostname. On Unix, this corresponds +to calling *gethostname*. On Windows, this corresponds to calling +*GetComputerNameExW* to fetch the system's \"physical DNS hostname.\" + +ripgrep uses your system's hostname for producing hyperlinks. +" + ); + let arg = + RGArg::flag("hostname-bin", "COMMAND").help(SHORT).long_help(LONG); + args.push(arg); +} + fn flag_hyperlink_format(args: &mut Vec) { const SHORT: &str = "Set the format of hyperlinks to match results."; const LONG: &str = long!( "\ -Set the format of hyperlinks to match results. This defines a pattern which -can contain the following placeholders: {file}, {line}, {column}, and {host}. -An empty pattern or 'none' disables hyperlinks. +Set the format of hyperlinks to match results. Hyperlinks make certain elements +of ripgrep's output, such as file paths, clickable. This generally only works +in terminal emulators that support OSC-8 hyperlinks. For example, the format +*file://{host}{file}* will emit an RFC 8089 hyperlink. + +The following variables are available in the format string: + +*{path}*: Required. This is replaced with a path to a matching file. The +path is guaranteed to be absolute and percent encoded such that it is valid to +put into a URI. Note that a path is guaranteed to start with a */*. + +*{host}*: Optional. This is replaced with your system's hostname. On Unix, +this corresponds to calling *gethostname*. On Windows, this corresponds to +calling *GetComputerNameExW* to fetch the system's \"physical DNS hostname.\" +Alternatively, if --hostname-bin was provided, then the hostname returned from +the output of that program will be returned. If no hostname could be found, +then this variable is replaced with the empty string. + +*{line}*: Optional. If appropriate, this is replaced with the line number of +a match. If no line number is available (for example, if --no-line-number was +given), then it is automatically replaced with the value *1*. + +*{column}*: Optional, but requires the presence of **{line}**. If appropriate, +this is replaced with the column number of a match. If no column number is +available (for example, if --no-column was given), then it is automatically +replaced with the value *1*. + +*{wslprefix}*: Optional. This is a special value that is set to +*wsl$/WSL_DISTRO_NAME*, where *WSL_DISTRO_NAME* corresponds to the value of +the equivalent environment variable. If the system is not Unix or if the +*WSL_DISTRO_NAME* environment variable is not set, then this is replaced with +the empty string. + +Alternatively, a format string may correspond to one of the following +aliases: default, file, grep+, kitty, macvim, none, subl, textmate, vscode, +vscode-insiders, vscodium. + +A format string may be empty. An empty format string is equivalent to the +*none* alias. In this case, hyperlinks will be disabled. -The {file} placeholder is required, and will be replaced with the absolute -file path with a few adjustments: The leading '/' on Unix is removed, -and '\\' is replaced with '/' on Windows. +At present, the default format when ripgrep detects a tty on stdout all systems +is *default*. This is an alias that expands to *file://{host}{path}* on Unix +and *file://{path}* on Windows. When stdout is not a tty, then the default +format behaves as if it were *none*. That is, hyperlinks are disabled. -As an example, the default pattern on Unix systems is: 'file://{host}/{file}' +For more information, see: +https://gist.github.com/egmontkob/eb114294efbcd5adb1944c9f3cb5feda " ); let arg = diff --git a/crates/core/args.rs b/crates/core/args.rs index 0f8d1f18..cc6b4592 100644 --- a/crates/core/args.rs +++ b/crates/core/args.rs @@ -18,9 +18,9 @@ use grep::pcre2::{ RegexMatcherBuilder as PCRE2RegexMatcherBuilder, }; use grep::printer::{ - default_color_specs, ColorSpecs, HyperlinkPattern, JSONBuilder, - PathPrinter, PathPrinterBuilder, Standard, StandardBuilder, Stats, - Summary, SummaryBuilder, SummaryKind, JSON, + default_color_specs, ColorSpecs, HyperlinkConfig, HyperlinkEnvironment, + HyperlinkFormat, JSONBuilder, PathPrinter, PathPrinterBuilder, Standard, + StandardBuilder, Stats, Summary, SummaryBuilder, SummaryKind, JSON, }; use grep::regex::{ RegexMatcher as RustRegexMatcher, @@ -236,7 +236,7 @@ impl Args { let mut builder = PathPrinterBuilder::new(); builder .color_specs(self.matches().color_specs()?) - .hyperlink_pattern(self.matches().hyperlink_pattern()?) + .hyperlink(self.matches().hyperlink_config()?) .separator(self.matches().path_separator()?) .terminator(self.matches().path_terminator().unwrap_or(b'\n')); Ok(builder.build(wtr)) @@ -774,7 +774,7 @@ impl ArgMatches { let mut builder = StandardBuilder::new(); builder .color_specs(self.color_specs()?) - .hyperlink_pattern(self.hyperlink_pattern()?) + .hyperlink(self.hyperlink_config()?) .stats(self.stats()) .heading(self.heading()) .path(self.with_filename(paths)) @@ -814,7 +814,7 @@ impl ArgMatches { builder .kind(self.summary_kind().expect("summary format")) .color_specs(self.color_specs()?) - .hyperlink_pattern(self.hyperlink_pattern()?) + .hyperlink(self.hyperlink_config()?) .stats(self.stats()) .path(self.with_filename(paths)) .max_matches(self.max_count()?) @@ -1126,11 +1126,15 @@ impl ArgMatches { /// for the current system is used if the value is not set. /// /// If an invalid pattern is provided, then an error is returned. - fn hyperlink_pattern(&self) -> Result { - Ok(match self.value_of_lossy("hyperlink-format") { - Some(pattern) => HyperlinkPattern::from_str(&pattern)?, - None => HyperlinkPattern::default_file_scheme(), - }) + fn hyperlink_config(&self) -> Result { + let mut env = HyperlinkEnvironment::new(); + env.host(hostname(self.value_of_os("hostname-bin"))) + .wsl_prefix(wsl_prefix()); + let fmt = match self.value_of_lossy("hyperlink-format") { + None => HyperlinkFormat::from_str("default")?, + Some(format) => HyperlinkFormat::from_str(&format)?, + }; + Ok(HyperlinkConfig::new(env, fmt)) } /// Returns true if ignore files should be processed case insensitively. @@ -1838,6 +1842,107 @@ fn current_dir() -> Result { .into()) } +/// Retrieves the hostname that ripgrep should use wherever a hostname is +/// required. Currently, that's just in the hyperlink format. +/// +/// This works by first running the given binary program (if present and with +/// no arguments) to get the hostname after trimming leading and trailing +/// whitespace. If that fails for any reason, then it falls back to getting +/// the hostname via platform specific means (e.g., `gethostname` on Unix). +/// +/// The purpose of `bin` is to make it possible for end users to override how +/// ripgrep determines the hostname. +fn hostname(bin: Option<&OsStr>) -> Option { + let Some(bin) = bin else { return platform_hostname() }; + let bin = match grep::cli::resolve_binary(bin) { + Ok(bin) => bin, + Err(err) => { + log::debug!( + "failed to run command '{bin:?}' to get hostname \ + (falling back to platform hostname): {err}", + ); + return platform_hostname(); + } + }; + let mut cmd = process::Command::new(&bin); + cmd.stdin(process::Stdio::null()); + let rdr = match grep::cli::CommandReader::new(&mut cmd) { + Ok(rdr) => rdr, + Err(err) => { + log::debug!( + "failed to spawn command '{bin:?}' to get \ + hostname (falling back to platform hostname): {err}", + ); + return platform_hostname(); + } + }; + let out = match io::read_to_string(rdr) { + Ok(out) => out, + Err(err) => { + log::debug!( + "failed to read output from command '{bin:?}' to get \ + hostname (falling back to platform hostname): {err}", + ); + return platform_hostname(); + } + }; + let hostname = out.trim(); + if hostname.is_empty() { + log::debug!( + "output from command '{bin:?}' is empty after trimming \ + leading and trailing whitespace (falling back to \ + platform hostname)", + ); + return platform_hostname(); + } + Some(hostname.to_string()) +} + +/// Attempts to get the hostname by using platform specific routines. For +/// example, this will do `gethostname` on Unix and `GetComputerNameExW` on +/// Windows. +fn platform_hostname() -> Option { + let hostname_os = match grep::cli::hostname() { + Ok(x) => x, + Err(err) => { + log::debug!("could not get hostname: {}", err); + return None; + } + }; + let Some(hostname) = hostname_os.to_str() else { + log::debug!( + "got hostname {:?}, but it's not valid UTF-8", + hostname_os + ); + return None; + }; + Some(hostname.to_string()) +} + +/// Returns a value that is meant to fill in the `{wslprefix}` variable for +/// a user given hyperlink format. A WSL prefix is a share/network like thing +/// that is meant to permit Windows applications to open files stored within +/// a WSL drive. +/// +/// If a WSL distro name is unavailable, not valid UTF-8 or this isn't running +/// in a Unix environment, then this returns None. +/// +/// See: +fn wsl_prefix() -> Option { + if !cfg!(unix) { + return None; + } + let distro_os = env::var_os("WSL_DISTRO_NAME")?; + let Some(distro) = distro_os.to_str() else { + log::debug!( + "found WSL_DISTRO_NAME={:?}, but value is not UTF-8", + distro_os + ); + return None; + }; + Some(format!("wsl$/{distro}")) +} + /// Tries to assign a timestamp to every `Subject` in the vector to help with /// sorting Subjects by time. fn load_timestamps( diff --git a/crates/printer/Cargo.toml b/crates/printer/Cargo.toml index 69e03d65..dc63a6cc 100644 --- a/crates/printer/Cargo.toml +++ b/crates/printer/Cargo.toml @@ -21,9 +21,9 @@ serde = ["dep:base64", "dep:serde", "dep:serde_json"] [dependencies] base64 = { version = "0.21.4", optional = true } bstr = "1.6.2" -gethostname = "0.4.3" grep-matcher = { version = "0.1.6", path = "../matcher" } grep-searcher = { version = "0.1.11", path = "../searcher" } +log = "0.4.5" termcolor = "1.3.0" serde = { version = "1.0.188", optional = true, features = ["derive"] } serde_json = { version = "1.0.107", optional = true } diff --git a/crates/printer/src/hyperlink.rs b/crates/printer/src/hyperlink.rs index fa38b5c2..2fb8a61e 100644 --- a/crates/printer/src/hyperlink.rs +++ b/crates/printer/src/hyperlink.rs @@ -1,85 +1,275 @@ -use std::{ - io::{self, Write}, - path::Path, -}; +use std::{cell::RefCell, io, path::Path, sync::Arc}; use { bstr::ByteSlice, termcolor::{HyperlinkSpec, WriteColor}, }; -use crate::hyperlink_aliases::HYPERLINK_PATTERN_ALIASES; +use crate::hyperlink_aliases; -/// A builder for `HyperlinkPattern`. +/// Hyperlink configuration. /// -/// Once a `HyperlinkPattern` is built, it is immutable. -#[derive(Debug)] -pub struct HyperlinkPatternBuilder { - parts: Vec, +/// This configuration specifies both the [hyperlink format](HyperlinkFormat) +/// and an [environment](HyperlinkConfig) for interpolating a subset of +/// variables. The specific subset includes variables that are intended to +/// be invariant throughout the lifetime of a process, such as a machine's +/// hostname. +/// +/// A hyperlink configuration can be provided to printer builders such as +/// [`StandardBuilder::hyperlink`](crate::StandardBuilder::hyperlink). +#[derive(Clone, Debug, Default, Eq, PartialEq)] +pub struct HyperlinkConfig(Arc); + +#[derive(Clone, Debug, Default, Eq, PartialEq)] +struct HyperlinkConfigInner { + env: HyperlinkEnvironment, + format: HyperlinkFormat, +} + +impl HyperlinkConfig { + /// Create a new configuration from an environment and a format. + pub fn new( + env: HyperlinkEnvironment, + format: HyperlinkFormat, + ) -> HyperlinkConfig { + HyperlinkConfig(Arc::new(HyperlinkConfigInner { env, format })) + } + + /// Returns the hyperlink environment in this configuration. + pub(crate) fn environment(&self) -> &HyperlinkEnvironment { + &self.0.env + } + + /// Returns the hyperlink format in this configuration. + pub(crate) fn format(&self) -> &HyperlinkFormat { + &self.0.format + } } -/// A hyperlink pattern with placeholders. +/// A hyperlink format with variables. +/// +/// This can be created by parsing a string using `HyperlinkPattern::from_str`. +/// +/// The default format is empty. An empty format is valid and effectively +/// disables hyperlinks. +/// +/// # Example +/// +/// ``` +/// use grep_printer::HyperlinkFormat; /// -/// This can be created with `HyperlinkPatternBuilder` or from a string -/// using `HyperlinkPattern::from_str`. +/// let fmt = "vscode".parse::()?; +/// assert_eq!(fmt.to_string(), "vscode://file{path}:{line}:{column}"); +/// +/// # Ok::<(), Box>(()) +/// ``` #[derive(Clone, Debug, Default, Eq, PartialEq)] -pub struct HyperlinkPattern { +pub struct HyperlinkFormat { parts: Vec, is_line_dependent: bool, } -/// A hyperlink pattern part. -#[derive(Clone, Debug, Eq, PartialEq)] -enum Part { - /// Static text. Can include invariant values such as the hostname. - Text(Vec), - /// Placeholder for the file path. - File, - /// Placeholder for the line number. - Line, - /// Placeholder for the column number. - Column, +impl HyperlinkFormat { + /// Creates an empty hyperlink format. + pub fn empty() -> HyperlinkFormat { + HyperlinkFormat::default() + } + + /// Returns true if this format is empty. + pub fn is_empty(&self) -> bool { + self.parts.is_empty() + } + + /// Creates a [`HyperlinkConfig`] from this format and the environment + /// given. + pub fn into_config(self, env: HyperlinkEnvironment) -> HyperlinkConfig { + HyperlinkConfig::new(env, self) + } + + /// Returns true if the format can produce line-dependent hyperlinks. + pub(crate) fn is_line_dependent(&self) -> bool { + self.is_line_dependent + } } -/// An error that can occur when parsing a hyperlink pattern. +impl std::str::FromStr for HyperlinkFormat { + type Err = HyperlinkFormatError; + + fn from_str(s: &str) -> Result { + let mut builder = FormatBuilder::new(); + let mut input = match hyperlink_aliases::find(s) { + Some(format) => format.as_bytes(), + None => s.as_bytes(), + }; + + while !input.is_empty() { + if input[0] == b'{' { + let end = input + .find_byte(b'}') + .ok_or(HyperlinkFormatError::InvalidSyntax)?; + + match &input[1..end] { + b"host" => builder.append_hostname(), + b"wslprefix" => builder.append_wsl_prefix(), + b"path" => builder.append_path(), + b"line" => builder.append_line(), + b"column" => builder.append_column(), + other => { + return Err(HyperlinkFormatError::InvalidVariable( + String::from_utf8_lossy(other).to_string(), + )) + } + }; + + input = &input[(end + 1)..]; + } else { + // Static text + let end = input.find_byte(b'{').unwrap_or(input.len()); + builder.append_text(&input[..end]); + input = &input[end..]; + } + } + + builder.build() + } +} + +impl std::fmt::Display for HyperlinkFormat { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + for part in self.parts.iter() { + part.fmt(f)?; + } + Ok(()) + } +} + +/// A static environment for hyperlink interpolation. +/// +/// This environment permits setting the values of varibables used in hyperlink +/// interpolation that are not expected to change for the lifetime of a program. +/// That is, these values are invariant. +/// +/// Currently, this includes the hostname and a WSL distro prefix. +#[derive(Clone, Debug, Default, Eq, PartialEq)] +pub struct HyperlinkEnvironment { + host: Option, + wsl_prefix: Option, +} + +impl HyperlinkEnvironment { + /// Create a new empty hyperlink environment. + pub fn new() -> HyperlinkEnvironment { + HyperlinkEnvironment::default() + } + + /// Set the `{host}` variable, which fills in any hostname components of + /// a hyperlink. + /// + /// One can get the hostname in the current environment via the `hostname` + /// function in the `grep-cli` crate. + pub fn host(&mut self, host: Option) -> &mut HyperlinkEnvironment { + self.host = host; + self + } + + /// Set the `{wslprefix}` variable, which contains the WSL distro prefix. + /// An example value is `wsl$/Ubuntu`. The distro name can typically be + /// discovered from the `WSL_DISTRO_NAME` environment variable. + pub fn wsl_prefix( + &mut self, + wsl_prefix: Option, + ) -> &mut HyperlinkEnvironment { + self.wsl_prefix = wsl_prefix; + self + } +} + +/// An error that can occur when parsing a hyperlink format. #[derive(Clone, Debug, Eq, PartialEq)] -pub enum HyperlinkPatternError { - /// This occurs when the pattern syntax is not valid. +pub enum HyperlinkFormatError { + /// This occurs when the format syntax is not valid. InvalidSyntax, - /// This occurs when the {file} placeholder is missing. - NoFilePlaceholder, - /// This occurs when the {line} placeholder is missing, - /// while the {column} placeholder is present. - NoLinePlaceholder, - /// This occurs when an unknown placeholder is used. - InvalidPlaceholder(String), - /// The pattern doesn't start with a valid scheme. + /// This occurs when there are zero variables in the format. + NoVariables, + /// This occurs when the {path} variable is missing. + NoPathVariable, + /// This occurs when the {line} variable is missing, while the {column} + /// variable is present. + NoLineVariable, + /// This occurs when an unknown variable is used. + InvalidVariable(String), + /// The format doesn't start with a valid scheme. InvalidScheme, } -/// The values to replace the pattern placeholders with. -#[derive(Clone, Debug)] -pub(crate) struct HyperlinkValues<'a> { - file: &'a HyperlinkPath, - line: u64, - column: u64, +impl std::error::Error for HyperlinkFormatError {} + +impl std::fmt::Display for HyperlinkFormatError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + HyperlinkFormatError::InvalidSyntax => { + write!(f, "invalid hyperlink format syntax") + } + HyperlinkFormatError::NoVariables => { + let aliases = hyperlink_aliases::iter() + .map(|(name, _)| name) + .collect::>() + .join(", "); + write!( + f, + "at least a {{path}} variable is required in a \ + hyperlink format, or otherwise use a valid alias: {}", + aliases, + ) + } + HyperlinkFormatError::NoPathVariable => { + write!( + f, + "the {{path}} variable is required in a hyperlink format", + ) + } + HyperlinkFormatError::NoLineVariable => { + write!( + f, + "the hyperlink format contains a {{column}} variable, \ + but no {{line}} variable is present", + ) + } + HyperlinkFormatError::InvalidVariable(name) => { + write!( + f, + "invalid hyperlink format variable: '{}', choose \ + from: path, line, column, host", + name + ) + } + HyperlinkFormatError::InvalidScheme => { + write!( + f, + "the hyperlink format must start with a valid URL scheme, \ + i.e., [0-9A-Za-z+-.]+:", + ) + } + } + } } -/// Represents the {file} part of a hyperlink. +/// A builder for `HyperlinkPattern`. /// -/// This is the value to use as-is in the hyperlink, converted from an OS file -/// path. -#[derive(Clone, Debug)] -pub(crate) struct HyperlinkPath(Vec); +/// Once a `HyperlinkPattern` is built, it is immutable. +#[derive(Debug)] +struct FormatBuilder { + parts: Vec, +} -impl HyperlinkPatternBuilder { - /// Creates a new hyperlink pattern builder. - pub fn new() -> Self { - Self { parts: vec![] } +impl FormatBuilder { + /// Creates a new hyperlink format builder. + fn new() -> FormatBuilder { + FormatBuilder { parts: vec![] } } /// Appends static text. - pub fn append_text(&mut self, text: &[u8]) -> &mut Self { + fn append_text(&mut self, text: &[u8]) -> &mut FormatBuilder { if let Some(Part::Text(contents)) = self.parts.last_mut() { contents.extend_from_slice(text); } else if !text.is_empty() { @@ -88,307 +278,330 @@ impl HyperlinkPatternBuilder { self } - /// Appends the hostname. - /// - /// On WSL, appends `wsl$/{distro}` instead. - pub fn append_hostname(&mut self) -> &mut Self { - self.append_text(Self::get_hostname().as_bytes()) + /// Appends a variable for the hostname. + fn append_hostname(&mut self) -> &mut FormatBuilder { + self.parts.push(Part::Host); + self } - /// Returns the hostname to use in the pattern. - /// - /// On WSL, returns `wsl$/{distro}`. - fn get_hostname() -> String { - if cfg!(unix) { - if let Ok(mut wsl_distro) = std::env::var("WSL_DISTRO_NAME") { - wsl_distro.insert_str(0, "wsl$/"); - return wsl_distro; - } - } - - gethostname::gethostname().to_string_lossy().to_string() + /// Appends a variable for a WSL path prefix. + fn append_wsl_prefix(&mut self) -> &mut FormatBuilder { + self.parts.push(Part::WSLPrefix); + self } - /// Appends a placeholder for the file path. - pub fn append_file(&mut self) -> &mut Self { - self.parts.push(Part::File); + /// Appends a variable for the file path. + fn append_path(&mut self) -> &mut FormatBuilder { + self.parts.push(Part::Path); self } - /// Appends a placeholder for the line number. - pub fn append_line(&mut self) -> &mut Self { + /// Appends a variable for the line number. + fn append_line(&mut self) -> &mut FormatBuilder { self.parts.push(Part::Line); self } - /// Appends a placeholder for the column number. - pub fn append_column(&mut self) -> &mut Self { + /// Appends a variable for the column number. + fn append_column(&mut self) -> &mut FormatBuilder { self.parts.push(Part::Column); self } - /// Builds the pattern. - pub fn build(&self) -> Result { + /// Builds the format. + fn build(&self) -> Result { self.validate()?; - Ok(HyperlinkPattern { + Ok(HyperlinkFormat { parts: self.parts.clone(), is_line_dependent: self.parts.contains(&Part::Line), }) } - /// Validate that the pattern is well-formed. - fn validate(&self) -> Result<(), HyperlinkPatternError> { + /// Validate that the format is well-formed. + fn validate(&self) -> Result<(), HyperlinkFormatError> { + // An empty format is fine. It just means hyperlink support is + // disabled. if self.parts.is_empty() { return Ok(()); } - - if !self.parts.contains(&Part::File) { - return Err(HyperlinkPatternError::NoFilePlaceholder); + // If all parts are just text, then there are no variables. It's + // likely a reference to invalid alias. + if self.parts.iter().all(|p| matches!(*p, Part::Text(_))) { + return Err(HyperlinkFormatError::NoVariables); } - + // Even if we have other variables, no path variable means the + // hyperlink can't possibly work the way it is intended. + if !self.parts.contains(&Part::Path) { + return Err(HyperlinkFormatError::NoPathVariable); + } + // If the {column} variable is used, then we also need a {line} + // variable or else {column} can't possibly work. if self.parts.contains(&Part::Column) && !self.parts.contains(&Part::Line) { - return Err(HyperlinkPatternError::NoLinePlaceholder); + return Err(HyperlinkFormatError::NoLineVariable); } self.validate_scheme() } - /// Validate that the pattern starts with a valid scheme. + /// Validate that the format starts with a valid scheme. Validation is done + /// according to how a scheme is defined in RFC 1738 sections 2.1[1] and + /// 5[2]. In short, a scheme is this: /// - /// A valid scheme starts with an alphabetic character, continues with - /// a sequence of alphanumeric characters, periods, hyphens or plus signs, - /// and ends with a colon. - fn validate_scheme(&self) -> Result<(), HyperlinkPatternError> { - if let Some(Part::Text(value)) = self.parts.first() { - if let Some(colon_index) = value.find_byte(b':') { - if value[0].is_ascii_alphabetic() - && value.iter().take(colon_index).all(|c| { - c.is_ascii_alphanumeric() - || matches!(c, b'.' | b'-' | b'+') - }) - { - return Ok(()); - } + /// scheme = 1*[ lowalpha | digit | "+" | "-" | "." ] + /// + /// but is case insensitive. + /// + /// [1]: https://datatracker.ietf.org/doc/html/rfc1738#section-2.1 + /// [2]: https://datatracker.ietf.org/doc/html/rfc1738#section-5 + fn validate_scheme(&self) -> Result<(), HyperlinkFormatError> { + let Some(Part::Text(ref part)) = self.parts.first() else { + return Err(HyperlinkFormatError::InvalidScheme); + }; + let Some(colon) = part.find_byte(b':') else { + return Err(HyperlinkFormatError::InvalidScheme); + }; + let scheme = &part[..colon]; + if scheme.is_empty() { + return Err(HyperlinkFormatError::InvalidScheme); + } + let is_valid_scheme_char = |byte| match byte { + b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z' | b'+' | b'-' | b'.' => { + true } + _ => false, + }; + if !scheme.iter().all(|&b| is_valid_scheme_char(b)) { + return Err(HyperlinkFormatError::InvalidScheme); } - - Err(HyperlinkPatternError::InvalidScheme) + Ok(()) } } -impl HyperlinkPattern { - /// Creates an empty hyperlink pattern. - pub fn empty() -> Self { - HyperlinkPattern::default() - } +/// A hyperlink format part. +#[derive(Clone, Debug, Eq, PartialEq)] +enum Part { + /// Static text. + Text(Vec), + /// Variable for the hostname. + Host, + /// Variable for a WSL path prefix. + WSLPrefix, + /// Variable for the file path. + Path, + /// Variable for the line number. + Line, + /// Variable for the column number. + Column, +} - /// Creates a default pattern suitable for Unix. - /// - /// The returned pattern is `file://{host}/{file}` - #[cfg(unix)] - pub fn default_file_scheme() -> Self { - HyperlinkPatternBuilder::new() - .append_text(b"file://") - .append_hostname() - .append_text(b"/") - .append_file() - .build() - .unwrap() +impl Part { + fn write_to( + &self, + env: &HyperlinkEnvironment, + values: &Values, + dest: &mut Vec, + ) { + match self { + Part::Text(ref text) => dest.extend_from_slice(text), + Part::Host => dest.extend_from_slice( + env.host.as_ref().map(|s| s.as_bytes()).unwrap_or(b""), + ), + Part::WSLPrefix => dest.extend_from_slice( + env.wsl_prefix.as_ref().map(|s| s.as_bytes()).unwrap_or(b""), + ), + Part::Path => dest.extend_from_slice(&values.path.0), + Part::Line => { + let line = values.line.unwrap_or(1).to_string(); + dest.extend_from_slice(line.as_bytes()); + } + Part::Column => { + let column = values.column.unwrap_or(1).to_string(); + dest.extend_from_slice(column.as_bytes()); + } + } } +} - /// Creates a default pattern suitable for Windows. - /// - /// The returned pattern is `file:///{file}` - #[cfg(windows)] - pub fn default_file_scheme() -> Self { - HyperlinkPatternBuilder::new() - .append_text(b"file:///") - .append_file() - .build() - .unwrap() +impl std::fmt::Display for Part { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + match self { + Part::Text(text) => write!(f, "{}", String::from_utf8_lossy(text)), + Part::Host => write!(f, "{{host}}"), + Part::WSLPrefix => write!(f, "{{wslprefix}}"), + Part::Path => write!(f, "{{path}}"), + Part::Line => write!(f, "{{line}}"), + Part::Column => write!(f, "{{column}}"), + } } +} - /// Returns true if this pattern is empty. - pub fn is_empty(&self) -> bool { - self.parts.is_empty() +/// The values to replace the format variables with. +/// +/// This only consists of values that depend on each path or match printed. +/// Values that are invariant throughout the lifetime of the process are set +/// via a [`HyperlinkEnvironment`]. +#[derive(Clone, Debug)] +pub(crate) struct Values<'a> { + path: &'a HyperlinkPath, + line: Option, + column: Option, +} + +impl<'a> Values<'a> { + /// Creates a new set of values, starting with the path given. + /// + /// Callers may also set the line and column number using the mutator + /// methods. + pub(crate) fn new(path: &'a HyperlinkPath) -> Values<'a> { + Values { path, line: None, column: None } } - /// Returns true if the pattern can produce line-dependent hyperlinks. - pub fn is_line_dependent(&self) -> bool { - self.is_line_dependent + /// Sets the line number for these values. + /// + /// If a line number is not set and a hyperlink format contains a `{line}` + /// variable, then it is interpolated with the value of `1` automatically. + pub(crate) fn line(mut self, line: Option) -> Values<'a> { + self.line = line; + self } - /// Renders this pattern with the given values to the given output. - pub(crate) fn render( - &self, - values: &HyperlinkValues, - output: &mut impl Write, - ) -> io::Result<()> { - for part in &self.parts { - part.render(values, output)?; - } - Ok(()) + /// Sets the column number for these values. + /// + /// If a column number is not set and a hyperlink format contains a + /// `{column}` variable, then it is interpolated with the value of `1` + /// automatically. + pub(crate) fn column(mut self, column: Option) -> Values<'a> { + self.column = column; + self } } -impl std::str::FromStr for HyperlinkPattern { - type Err = HyperlinkPatternError; +#[derive(Clone, Debug)] +pub(crate) struct Interpolator { + config: HyperlinkConfig, + buf: RefCell>, +} - fn from_str(s: &str) -> Result { - let mut builder = HyperlinkPatternBuilder::new(); - let mut input = s.as_bytes(); +impl Interpolator { + pub(crate) fn new(config: &HyperlinkConfig) -> Interpolator { + Interpolator { config: config.clone(), buf: RefCell::new(vec![]) } + } - if let Ok(index) = HYPERLINK_PATTERN_ALIASES - .binary_search_by_key(&input, |&(name, _)| name.as_bytes()) + pub(crate) fn begin( + &self, + values: &Values, + mut wtr: W, + ) -> io::Result { + if self.config.format().is_empty() + || !wtr.supports_hyperlinks() + || !wtr.supports_color() { - input = HYPERLINK_PATTERN_ALIASES[index].1.as_bytes(); + return Ok(InterpolatorStatus::inactive()); } - - while !input.is_empty() { - if input[0] == b'{' { - // Placeholder - let end = input - .find_byte(b'}') - .ok_or(HyperlinkPatternError::InvalidSyntax)?; - - match &input[1..end] { - b"file" => builder.append_file(), - b"line" => builder.append_line(), - b"column" => builder.append_column(), - b"host" => builder.append_hostname(), - other => { - return Err(HyperlinkPatternError::InvalidPlaceholder( - String::from_utf8_lossy(other).to_string(), - )) - } - }; - - input = &input[(end + 1)..]; - } else { - // Static text - let end = input.find_byte(b'{').unwrap_or(input.len()); - builder.append_text(&input[..end]); - input = &input[end..]; - } + let mut buf = self.buf.borrow_mut(); + buf.clear(); + for part in self.config.format().parts.iter() { + part.write_to(self.config.environment(), values, &mut buf); } - - builder.build() + let spec = HyperlinkSpec::open(&buf); + wtr.set_hyperlink(&spec)?; + Ok(InterpolatorStatus { active: true }) } -} -impl ToString for HyperlinkPattern { - fn to_string(&self) -> String { - self.parts.iter().map(|p| p.to_string()).collect() - } -} - -impl Part { - fn render( + pub(crate) fn finish( &self, - values: &HyperlinkValues, - output: &mut impl Write, + status: InterpolatorStatus, + mut wtr: W, ) -> io::Result<()> { - match self { - Part::Text(text) => output.write_all(text), - Part::File => output.write_all(&values.file.0), - Part::Line => write!(output, "{}", values.line), - Part::Column => write!(output, "{}", values.column), + if !status.active { + return Ok(()); } + wtr.set_hyperlink(&HyperlinkSpec::close()) } } -impl ToString for Part { - fn to_string(&self) -> String { - match self { - Part::Text(text) => String::from_utf8_lossy(text).to_string(), - Part::File => "{file}".to_string(), - Part::Line => "{line}".to_string(), - Part::Column => "{column}".to_string(), - } - } +#[derive(Debug)] +pub(crate) struct InterpolatorStatus { + active: bool, } -impl std::fmt::Display for HyperlinkPatternError { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - HyperlinkPatternError::InvalidSyntax => { - write!(f, "invalid hyperlink pattern syntax") - } - HyperlinkPatternError::NoFilePlaceholder => { - write!( - f, - "the {{file}} placeholder is required in hyperlink \ - patterns", - ) - } - HyperlinkPatternError::NoLinePlaceholder => { - write!( - f, - "the hyperlink pattern contains a {{column}} placeholder, \ - but no {{line}} placeholder is present", - ) - } - HyperlinkPatternError::InvalidPlaceholder(name) => { - write!( - f, - "invalid hyperlink pattern placeholder: '{}', choose \ - from: file, line, column, host", - name - ) - } - HyperlinkPatternError::InvalidScheme => { - write!( - f, - "the hyperlink pattern must start with a valid URL scheme" - ) - } - } +impl InterpolatorStatus { + pub(crate) fn inactive() -> InterpolatorStatus { + InterpolatorStatus { active: false } } } -impl std::error::Error for HyperlinkPatternError {} - -impl<'a> HyperlinkValues<'a> { - /// Creates a new set of hyperlink values. - pub(crate) fn new( - file: &'a HyperlinkPath, - line: Option, - column: Option, - ) -> Self { - HyperlinkValues { - file, - line: line.unwrap_or(1), - column: column.unwrap_or(1), - } - } -} +/// Represents the {path} part of a hyperlink. +/// +/// This is the value to use as-is in the hyperlink, converted from an OS file +/// path. +#[derive(Clone, Debug)] +pub(crate) struct HyperlinkPath(Vec); impl HyperlinkPath { /// Returns a hyperlink path from an OS path. #[cfg(unix)] - pub(crate) fn from_path(path: &Path) -> Option { - // On Unix, this function returns the absolute file path without the - // leading slash, as it makes for more natural hyperlink patterns, for - // instance: - // file://{host}/{file} instead of file://{host}{file} - // vscode://file/{file} instead of vscode://file{file} - // It also allows for patterns to be multi-platform. - - let path = path.canonicalize().ok()?; - let path = path.to_str()?.as_bytes(); - let path = if path.starts_with(b"/") { &path[1..] } else { path }; - Some(Self::encode(path)) + pub(crate) fn from_path(original_path: &Path) -> Option { + use std::os::unix::ffi::OsStrExt; + + // We canonicalize the path in order to get an absolute version of it + // without any `.` or `..` or superflous separators. Unfortunately, + // this does also remove symlinks, and in theory, it would be nice to + // retain them. Perhaps even simpler, we could just join the current + // working directory with the path and be done with it. There was + // some discussion about this on PR#2483, and there generally appears + // to be some uncertainty about the extent to which hyperlinks with + // things like `..` in them actually work. So for now, we do the safest + // thing possible even though I think it can result in worse user + // experience. (Because it means the path you click on and the actual + // path that gets followed are different, even though they ostensibly + // refer to the same file.) + // + // There's also the potential issue that path canonicalization is + // expensive since it can touch the file system. That is probably + // less of an issue since hyperlinks are only created when they're + // supported, i.e., when writing to a tty. + // + // [1]: https://github.com/BurntSushi/ripgrep/pull/2483 + let path = match original_path.canonicalize() { + Ok(path) => path, + Err(err) => { + log::debug!( + "hyperlink creation for {:?} failed, error occurred \ + during path canonicalization: {}", + original_path, + err, + ); + return None; + } + }; + let bytes = path.as_os_str().as_bytes(); + // This should not be possible since one imagines that canonicalization + // should always return an absolute path. But it doesn't actually + // appear guaranteed by POSIX, so we check whether it's true or not and + // refuse to create a hyperlink from a relative path if it isn't. + if !bytes.starts_with(b"/") { + log::debug!( + "hyperlink creation for {:?} failed, canonicalization \ + returned {:?}, which does not start with a slash", + original_path, + path, + ); + return None; + } + Some(HyperlinkPath::encode(bytes)) } /// Returns a hyperlink path from an OS path. #[cfg(windows)] - pub fn from_path(path: &Path) -> Option { + pub(crate) fn from_path(original_path: &Path) -> Option { // On Windows, Path::canonicalize returns the result of // GetFinalPathNameByHandleW with VOLUME_NAME_DOS, // which produces paths such as the following: + // // \\?\C:\dir\file.txt (local path) // \\?\UNC\server\dir\file.txt (network share) // @@ -396,55 +609,102 @@ impl HyperlinkPath { // It is followed either by the drive letter, or by UNC\ // (universal naming convention), which denotes a network share. // - // Given that the default URL pattern on Windows is file:///{file} + // Given that the default URL format on Windows is file://{path} // we need to return the following from this function: - // C:/dir/file.txt (local path) - // /server/dir/file.txt (network share) + // + // /C:/dir/file.txt (local path) + // //server/dir/file.txt (network share) // // Which produces the following links: + // // file:///C:/dir/file.txt (local path) // file:////server/dir/file.txt (network share) // - // This substitutes the {file} placeholder with the expected value - // for the most common DOS paths, but on the other hand, - // network paths start with a single slash, which may be unexpected. - // It produces correct URLs though. + // This substitutes the {path} variable with the expected value for + // the most common DOS paths, but on the other hand, network paths + // start with a single slash, which may be unexpected. It seems to work + // though? + // + // Note that the following URL syntax also seems to be valid? // - // Note that the following URL syntax is also valid for network shares: // file://server/dir/file.txt - // It is also more consistent with the Unix case, but in order to - // use it, the pattern would have to be file://{file} and - // the {file} placeholder would have to be replaced with - // /C:/dir/file.txt - // for local files, which is not ideal, and it is certainly unexpected. + // + // But the initial implementation of this routine went for the format + // above. // // Also note that the file://C:/dir/file.txt syntax is not correct, // even though it often works in practice. // - // In the end, this choice was confirmed by VSCode, whose pattern is - // vscode://file/{file}:{line}:{column} and which correctly understands - // the following URL format for network drives: + // In the end, this choice was confirmed by VSCode, whose format is + // + // vscode://file{path}:{line}:{column} + // + // and which correctly understands the following URL format for network + // drives: + // // vscode://file//server/dir/file.txt:1:1 + // // It doesn't parse any other number of slashes in "file//server" as a // network path. - const WIN32_NAMESPACE_PREFIX: &[u8] = br"\\?\"; - const UNC_PREFIX: &[u8] = br"UNC\"; - - let path = path.canonicalize().ok()?; - let mut path = path.to_str()?.as_bytes(); - - if path.starts_with(WIN32_NAMESPACE_PREFIX) { - path = &path[WIN32_NAMESPACE_PREFIX.len()..]; - - if path.starts_with(UNC_PREFIX) { - path = &path[(UNC_PREFIX.len() - 1)..]; + const WIN32_NAMESPACE_PREFIX: &str = r"\\?\"; + const UNC_PREFIX: &str = r"UNC\"; + + // As for Unix, we canonicalize the path to make sure we have an + // absolute path. + let path = match original_path.canonicalize() { + Ok(path) => path, + Err(err) => { + log::debug!( + "hyperlink creation for {:?} failed, error occurred \ + during path canonicalization: {}", + original_path, + err, + ); + return None; + } + }; + // We convert the path to a string for easier manipulation. If it + // wasn't valid UTF-16 (and thus could not be non-lossily transcoded + // to UTF-8), then we just give up. It's not clear we could make + // a meaningful hyperlink from it anyway. And this should be an + // exceptionally rare case. + let mut string = match path.to_str() { + Some(string) => string, + None => { + log::debug!( + "hyperlink creation for {:?} failed, path is not \ + valid UTF-8", + original_path, + ); + return None; } - } else { + }; + // As the comment above says, we expect all canonicalized paths to + // begin with a \\?\. If it doesn't, then something weird is happening + // and we should just give up. + if !string.starts_with(WIN32_NAMESPACE_PREFIX) { + log::debug!( + "hyperlink creation for {:?} failed, canonicalization \ + returned {:?}, which does not start with \\\\?\\", + original_path, + path, + ); return None; } + string = &string[WIN32_NAMESPACE_PREFIX.len()..]; - Some(Self::encode(path)) + // And as above, drop the UNC prefix too, but keep the leading slash. + if string.starts_with(UNC_PREFIX) { + string = &string[(UNC_PREFIX.len() - 1)..]; + } + // Finally, add a leading slash. In the local file case, this turns + // C:\foo\bar into /C:\foo\bar (and then percent encoding turns it into + // /C:/foo/bar). In the network share case, this turns \share\foo\bar + // into /\share/foo/bar (and then percent encoding turns it into + // //share/foo/bar). + let with_slash = format!("/{string}"); + Some(HyperlinkPath::encode(with_slash.as_bytes())) } /// Percent-encodes a path. @@ -461,9 +721,8 @@ impl HyperlinkPath { /// creates invalid file:// URLs on that platform. fn encode(input: &[u8]) -> HyperlinkPath { let mut result = Vec::with_capacity(input.len()); - - for &c in input { - match c { + for &byte in input.iter() { + match byte { b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z' @@ -474,7 +733,7 @@ impl HyperlinkPath { | b'_' | b'~' | 128.. => { - result.push(c); + result.push(byte); } #[cfg(windows)] b'\\' => { @@ -483,60 +742,12 @@ impl HyperlinkPath { _ => { const HEX: &[u8] = b"0123456789ABCDEF"; result.push(b'%'); - result.push(HEX[(c >> 4) as usize]); - result.push(HEX[(c & 0xF) as usize]); + result.push(HEX[(byte >> 4) as usize]); + result.push(HEX[(byte & 0xF) as usize]); } } } - - Self(result) - } -} - -impl std::fmt::Display for HyperlinkPath { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!( - f, - "{}", - std::str::from_utf8(&self.0).unwrap_or("invalid utf-8") - ) - } -} - -/// A simple abstraction over a hyperlink span written to the terminal. This -/// helps tracking whether a hyperlink has been started, and should be ended. -#[derive(Debug, Default)] -pub(crate) struct HyperlinkSpan { - active: bool, -} - -impl HyperlinkSpan { - /// Starts a hyperlink and returns a span which tracks whether it is still - /// in effect. - pub(crate) fn start( - wtr: &mut impl WriteColor, - hyperlink: &HyperlinkSpec, - ) -> io::Result { - if wtr.supports_hyperlinks() && hyperlink.uri().is_some() { - wtr.set_hyperlink(hyperlink)?; - Ok(HyperlinkSpan { active: true }) - } else { - Ok(HyperlinkSpan { active: false }) - } - } - - /// Ends the hyperlink span if it is active. - pub(crate) fn end(&mut self, wtr: &mut impl WriteColor) -> io::Result<()> { - if self.is_active() { - wtr.set_hyperlink(&HyperlinkSpec::close())?; - self.active = false; - } - Ok(()) - } - - /// Returns true if there is currently an active hyperlink. - pub(crate) fn is_active(&self) -> bool { - self.active + HyperlinkPath(result) } } @@ -547,135 +758,107 @@ mod tests { use super::*; #[test] - fn build_pattern() { - let pattern = HyperlinkPatternBuilder::new() + fn build_format() { + let format = FormatBuilder::new() .append_text(b"foo://") .append_text(b"bar-") .append_text(b"baz") - .append_file() + .append_path() .build() .unwrap(); - assert_eq!(pattern.to_string(), "foo://bar-baz{file}"); - assert_eq!(pattern.parts[0], Part::Text(b"foo://bar-baz".to_vec())); - assert!(!pattern.is_empty()); + assert_eq!(format.to_string(), "foo://bar-baz{path}"); + assert_eq!(format.parts[0], Part::Text(b"foo://bar-baz".to_vec())); + assert!(!format.is_empty()); } #[test] - fn build_empty_pattern() { - let pattern = HyperlinkPatternBuilder::new().build().unwrap(); + fn build_empty_format() { + let format = FormatBuilder::new().build().unwrap(); - assert!(pattern.is_empty()); - assert_eq!(pattern, HyperlinkPattern::empty()); - assert_eq!(pattern, HyperlinkPattern::default()); + assert!(format.is_empty()); + assert_eq!(format, HyperlinkFormat::empty()); + assert_eq!(format, HyperlinkFormat::default()); } #[test] fn handle_alias() { - assert!(HyperlinkPattern::from_str("file").is_ok()); - assert!(HyperlinkPattern::from_str("none").is_ok()); - assert!(HyperlinkPattern::from_str("none").unwrap().is_empty()); + assert!(HyperlinkFormat::from_str("file").is_ok()); + assert!(HyperlinkFormat::from_str("none").is_ok()); + assert!(HyperlinkFormat::from_str("none").unwrap().is_empty()); } #[test] - fn parse_pattern() { - let pattern = HyperlinkPattern::from_str( - "foo://{host}/bar/{file}:{line}:{column}", + fn parse_format() { + let format = HyperlinkFormat::from_str( + "foo://{host}/bar/{path}:{line}:{column}", ) .unwrap(); assert_eq!( - pattern.to_string(), - "foo://{host}/bar/{file}:{line}:{column}" - .replace("{host}", &HyperlinkPatternBuilder::get_hostname()) + format.to_string(), + "foo://{host}/bar/{path}:{line}:{column}" ); - assert_eq!(pattern.parts.len(), 6); - assert!(pattern.parts.contains(&Part::File)); - assert!(pattern.parts.contains(&Part::Line)); - assert!(pattern.parts.contains(&Part::Column)); + assert_eq!(format.parts.len(), 8); + assert!(format.parts.contains(&Part::Path)); + assert!(format.parts.contains(&Part::Line)); + assert!(format.parts.contains(&Part::Column)); } #[test] fn parse_valid() { - assert!(HyperlinkPattern::from_str("").unwrap().is_empty()); + assert!(HyperlinkFormat::from_str("").unwrap().is_empty()); assert_eq!( - HyperlinkPattern::from_str("foo://{file}").unwrap().to_string(), - "foo://{file}" + HyperlinkFormat::from_str("foo://{path}").unwrap().to_string(), + "foo://{path}" ); assert_eq!( - HyperlinkPattern::from_str("foo://{file}/bar") - .unwrap() - .to_string(), - "foo://{file}/bar" + HyperlinkFormat::from_str("foo://{path}/bar").unwrap().to_string(), + "foo://{path}/bar" ); - HyperlinkPattern::from_str("f://{file}").unwrap(); - HyperlinkPattern::from_str("f:{file}").unwrap(); - HyperlinkPattern::from_str("f-+.:{file}").unwrap(); - HyperlinkPattern::from_str("f42:{file}").unwrap(); + HyperlinkFormat::from_str("f://{path}").unwrap(); + HyperlinkFormat::from_str("f:{path}").unwrap(); + HyperlinkFormat::from_str("f-+.:{path}").unwrap(); + HyperlinkFormat::from_str("f42:{path}").unwrap(); + HyperlinkFormat::from_str("42:{path}").unwrap(); + HyperlinkFormat::from_str("+:{path}").unwrap(); + HyperlinkFormat::from_str("F42:{path}").unwrap(); } #[test] fn parse_invalid() { assert_eq!( - HyperlinkPattern::from_str("foo://bar").unwrap_err(), - HyperlinkPatternError::NoFilePlaceholder + HyperlinkFormat::from_str("foo://bar").unwrap_err(), + HyperlinkFormatError::NoVariables ); assert_eq!( - HyperlinkPattern::from_str("foo://{bar}").unwrap_err(), - HyperlinkPatternError::InvalidPlaceholder("bar".to_string()) + HyperlinkFormat::from_str("foo://{line}").unwrap_err(), + HyperlinkFormatError::NoPathVariable ); assert_eq!( - HyperlinkPattern::from_str("foo://{file").unwrap_err(), - HyperlinkPatternError::InvalidSyntax + HyperlinkFormat::from_str("foo://{bar}").unwrap_err(), + HyperlinkFormatError::InvalidVariable("bar".to_string()) ); assert_eq!( - HyperlinkPattern::from_str("foo://{file}:{column}").unwrap_err(), - HyperlinkPatternError::NoLinePlaceholder + HyperlinkFormat::from_str("foo://{path").unwrap_err(), + HyperlinkFormatError::InvalidSyntax ); assert_eq!( - HyperlinkPattern::from_str("{file}").unwrap_err(), - HyperlinkPatternError::InvalidScheme + HyperlinkFormat::from_str("foo://{path}:{column}").unwrap_err(), + HyperlinkFormatError::NoLineVariable ); assert_eq!( - HyperlinkPattern::from_str(":{file}").unwrap_err(), - HyperlinkPatternError::InvalidScheme + HyperlinkFormat::from_str("{path}").unwrap_err(), + HyperlinkFormatError::InvalidScheme ); assert_eq!( - HyperlinkPattern::from_str("f*:{file}").unwrap_err(), - HyperlinkPatternError::InvalidScheme + HyperlinkFormat::from_str(":{path}").unwrap_err(), + HyperlinkFormatError::InvalidScheme + ); + assert_eq!( + HyperlinkFormat::from_str("f*:{path}").unwrap_err(), + HyperlinkFormatError::InvalidScheme ); - } - - #[test] - fn aliases_are_valid() { - for (name, definition) in HYPERLINK_PATTERN_ALIASES { - assert!( - HyperlinkPattern::from_str(definition).is_ok(), - "invalid hyperlink alias: {}", - name - ); - } - } - - #[test] - fn aliases_are_sorted() { - let mut names = HYPERLINK_PATTERN_ALIASES.iter().map(|(name, _)| name); - - let Some(mut previous_name) = names.next() else { - return; - }; - - for name in names { - assert!( - name > previous_name, - "'{}' should be sorted before '{}' \ - in HYPERLINK_PATTERN_ALIASES", - name, - previous_name - ); - - previous_name = name; - } } } diff --git a/crates/printer/src/hyperlink_aliases.rs b/crates/printer/src/hyperlink_aliases.rs index 6d429bf8..c98bc0b0 100644 --- a/crates/printer/src/hyperlink_aliases.rs +++ b/crates/printer/src/hyperlink_aliases.rs @@ -1,23 +1,87 @@ /// Aliases to well-known hyperlink schemes. /// /// These need to be sorted by name. -pub(crate) const HYPERLINK_PATTERN_ALIASES: &[(&str, &str)] = &[ - #[cfg(unix)] - ("file", "file://{host}/{file}"), +const HYPERLINK_PATTERN_ALIASES: &[(&str, &str)] = &[ + #[cfg(not(windows))] + ("default", "file://{host}{path}"), #[cfg(windows)] - ("file", "file:///{file}"), + ("default", "file://{path}"), + ("file", "file://{host}{path}"), // https://github.com/misaki-web/grepp - ("grep+", "grep+:///{file}:{line}"), - ("kitty", "file://{host}/{file}#{line}"), + ("grep+", "grep+://{path}:{line}"), + ("kitty", "file://{host}{path}#{line}"), // https://macvim.org/docs/gui_mac.txt.html#mvim%3A%2F%2F - ("macvim", "mvim://open?url=file:///{file}&line={line}&column={column}"), + ("macvim", "mvim://open?url=file://{path}&line={line}&column={column}"), ("none", ""), // https://github.com/inopinatus/sublime_url - ("subl", "subl://open?url=file:///{file}&line={line}&column={column}"), + ("subl", "subl://open?url=file://{path}&line={line}&column={column}"), // https://macromates