summaryrefslogtreecommitdiffstats
path: root/crates/core/args.rs
diff options
context:
space:
mode:
authorAndrew Gallant <jamslam@gmail.com>2023-09-22 14:57:44 -0400
committerAndrew Gallant <jamslam@gmail.com>2023-09-25 14:39:54 -0400
commitf608d4d9b3ab210b7e6964ca7d1d7dc9c077329e (patch)
tree897ee1174b490022f99e9b5be68aaec05acea613 /crates/core/args.rs
parent23e21133ba6a8ca2c4d040092ad1bc22f14e8861 (diff)
hyperlink: rejigger how hyperlinks work
This essentially takes the work done in #2483 and does a bit of a facelift. A brief summary: * We reduce the hyperlink API we expose to just the format, a configuration and an environment. * We move buffer management into a hyperlink-specific interpolator. * We expand the documentation on --hyperlink-format. * We rewrite the hyperlink format parser to be a simple state machine with support for escaping '{{' and '}}'. * We remove the 'gethostname' dependency and instead insist on the caller to provide the hostname. (So grep-printer doesn't get it itself, but the application will.) Similarly for the WSL prefix. * Probably some other things. Overall, the general structure of #2483 was kept. The biggest change is probably requiring the caller to pass in things like a hostname instead of having the crate do it. I did this for a couple reasons: 1. I feel uncomfortable with code deep inside the printing logic reaching out into the environment to assume responsibility for retrieving the hostname. This feels more like an application-level responsibility. Arguably, path canonicalization falls into this same bucket, but it is more difficult to rip that out. (And we can do it in the future in a backwards compatible fashion I think.) 2. I wanted to permit end users to tell ripgrep about their system's hostname in their own way, e.g., by running a custom executable. I want this because I know at least for my own use cases, I sometimes log into systems using an SSH hostname that is distinct from the system's actual hostname (usually because the system is shared in some way or changing its hostname is not allowed/practical). I think that's about it. Closes #665, Closes #2483
Diffstat (limited to 'crates/core/args.rs')
-rw-r--r--crates/core/args.rs133
1 files changed, 122 insertions, 11 deletions
diff --git a/crates/core/args.rs b/crates/core/args.rs
index 0f8d1f18..75029a05 100644
--- a/crates/core/args.rs
+++ b/crates/core/args.rs
@@ -18,9 +18,9 @@ use grep::pcre2::{
RegexMatcherBuilder as PCRE2RegexMatcherBuilder,
};
use grep::printer::{
- default_color_specs, ColorSpecs, HyperlinkPattern, JSONBuilder,
- PathPrinter, PathPrinterBuilder, Standard, StandardBuilder, Stats,
- Summary, SummaryBuilder, SummaryKind, JSON,
+ default_color_specs, ColorSpecs, HyperlinkConfig, HyperlinkEnvironment,
+ HyperlinkFormat, JSONBuilder, PathPrinter, PathPrinterBuilder, Standard,
+ StandardBuilder, Stats, Summary, SummaryBuilder, SummaryKind, JSON,
};
use grep::regex::{
RegexMatcher as RustRegexMatcher,
@@ -236,7 +236,7 @@ impl Args {
let mut builder = PathPrinterBuilder::new();
builder
.color_specs(self.matches().color_specs()?)
- .hyperlink_pattern(self.matches().hyperlink_pattern()?)
+ .hyperlink(self.matches().hyperlink_config()?)
.separator(self.matches().path_separator()?)
.terminator(self.matches().path_terminator().unwrap_or(b'\n'));
Ok(builder.build(wtr))
@@ -774,7 +774,7 @@ impl ArgMatches {
let mut builder = StandardBuilder::new();
builder
.color_specs(self.color_specs()?)
- .hyperlink_pattern(self.hyperlink_pattern()?)
+ .hyperlink(self.hyperlink_config()?)
.stats(self.stats())
.heading(self.heading())
.path(self.with_filename(paths))
@@ -814,7 +814,7 @@ impl ArgMatches {
builder
.kind(self.summary_kind().expect("summary format"))
.color_specs(self.color_specs()?)
- .hyperlink_pattern(self.hyperlink_pattern()?)
+ .hyperlink(self.hyperlink_config()?)
.stats(self.stats())
.path(self.with_filename(paths))
.max_matches(self.max_count()?)
@@ -1126,11 +1126,21 @@ impl ArgMatches {
/// for the current system is used if the value is not set.
///
/// If an invalid pattern is provided, then an error is returned.
- fn hyperlink_pattern(&self) -> Result<HyperlinkPattern> {
- Ok(match self.value_of_lossy("hyperlink-format") {
- Some(pattern) => HyperlinkPattern::from_str(&pattern)?,
- None => HyperlinkPattern::default_file_scheme(),
- })
+ fn hyperlink_config(&self) -> Result<HyperlinkConfig> {
+ let mut env = HyperlinkEnvironment::new();
+ env.host(hostname(self.value_of_os("hostname-bin")))
+ .wsl_prefix(wsl_prefix());
+ let fmt = match self.value_of_lossy("hyperlink-format") {
+ None => HyperlinkFormat::from_str("default").unwrap(),
+ Some(format) => match HyperlinkFormat::from_str(&format) {
+ Ok(format) => format,
+ Err(err) => {
+ let msg = format!("invalid hyperlink format: {err}");
+ return Err(msg.into());
+ }
+ },
+ };
+ Ok(HyperlinkConfig::new(env, fmt))
}
/// Returns true if ignore files should be processed case insensitively.
@@ -1838,6 +1848,107 @@ fn current_dir() -> Result<PathBuf> {
.into())
}
+/// Retrieves the hostname that ripgrep should use wherever a hostname is
+/// required. Currently, that's just in the hyperlink format.
+///
+/// This works by first running the given binary program (if present and with
+/// no arguments) to get the hostname after trimming leading and trailing
+/// whitespace. If that fails for any reason, then it falls back to getting
+/// the hostname via platform specific means (e.g., `gethostname` on Unix).
+///
+/// The purpose of `bin` is to make it possible for end users to override how
+/// ripgrep determines the hostname.
+fn hostname(bin: Option<&OsStr>) -> Option<String> {
+ let Some(bin) = bin else { return platform_hostname() };
+ let bin = match grep::cli::resolve_binary(bin) {
+ Ok(bin) => bin,
+ Err(err) => {
+ log::debug!(
+ "failed to run command '{bin:?}' to get hostname \
+ (falling back to platform hostname): {err}",
+ );
+ return platform_hostname();
+ }
+ };
+ let mut cmd = process::Command::new(&bin);
+ cmd.stdin(process::Stdio::null());
+ let rdr = match grep::cli::CommandReader::new(&mut cmd) {
+ Ok(rdr) => rdr,
+ Err(err) => {
+ log::debug!(
+ "failed to spawn command '{bin:?}' to get \
+ hostname (falling back to platform hostname): {err}",
+ );
+ return platform_hostname();
+ }
+ };
+ let out = match io::read_to_string(rdr) {
+ Ok(out) => out,
+ Err(err) => {
+ log::debug!(
+ "failed to read output from command '{bin:?}' to get \
+ hostname (falling back to platform hostname): {err}",
+ );
+ return platform_hostname();
+ }
+ };
+ let hostname = out.trim();
+ if hostname.is_empty() {
+ log::debug!(
+ "output from command '{bin:?}' is empty after trimming \
+ leading and trailing whitespace (falling back to \
+ platform hostname)",
+ );
+ return platform_hostname();
+ }
+ Some(hostname.to_string())
+}
+
+/// Attempts to get the hostname by using platform specific routines. For
+/// example, this will do `gethostname` on Unix and `GetComputerNameExW` on
+/// Windows.
+fn platform_hostname() -> Option<String> {
+ let hostname_os = match grep::cli::hostname() {
+ Ok(x) => x,
+ Err(err) => {
+ log::debug!("could not get hostname: {}", err);
+ return None;
+ }
+ };
+ let Some(hostname) = hostname_os.to_str() else {
+ log::debug!(
+ "got hostname {:?}, but it's not valid UTF-8",
+ hostname_os
+ );
+ return None;
+ };
+ Some(hostname.to_string())
+}
+
+/// Returns a value that is meant to fill in the `{wslprefix}` variable for
+/// a user given hyperlink format. A WSL prefix is a share/network like thing
+/// that is meant to permit Windows applications to open files stored within
+/// a WSL drive.
+///
+/// If a WSL distro name is unavailable, not valid UTF-8 or this isn't running
+/// in a Unix environment, then this returns None.
+///
+/// See: <https://learn.microsoft.com/en-us/windows/wsl/filesystems>
+fn wsl_prefix() -> Option<String> {
+ if !cfg!(unix) {
+ return None;
+ }
+ let distro_os = env::var_os("WSL_DISTRO_NAME")?;
+ let Some(distro) = distro_os.to_str() else {
+ log::debug!(
+ "found WSL_DISTRO_NAME={:?}, but value is not UTF-8",
+ distro_os
+ );
+ return None;
+ };
+ Some(format!("wsl$/{distro}"))
+}
+
/// Tries to assign a timestamp to every `Subject` in the vector to help with
/// sorting Subjects by time.
fn load_timestamps<G>(