From f608d4d9b3ab210b7e6964ca7d1d7dc9c077329e Mon Sep 17 00:00:00 2001 From: Andrew Gallant Date: Fri, 22 Sep 2023 14:57:44 -0400 Subject: [PATCH] hyperlink: rejigger how hyperlinks work This essentially takes the work done in #2483 and does a bit of a facelift. A brief summary: * We reduce the hyperlink API we expose to just the format, a configuration and an environment. * We move buffer management into a hyperlink-specific interpolator. * We expand the documentation on --hyperlink-format. * We rewrite the hyperlink format parser to be a simple state machine with support for escaping '{{' and '}}'. * We remove the 'gethostname' dependency and instead insist on the caller to provide the hostname. (So grep-printer doesn't get it itself, but the application will.) Similarly for the WSL prefix. * Probably some other things. Overall, the general structure of #2483 was kept. The biggest change is probably requiring the caller to pass in things like a hostname instead of having the crate do it. I did this for a couple reasons: 1. I feel uncomfortable with code deep inside the printing logic reaching out into the environment to assume responsibility for retrieving the hostname. This feels more like an application-level responsibility. Arguably, path canonicalization falls into this same bucket, but it is more difficult to rip that out. (And we can do it in the future in a backwards compatible fashion I think.) 2. I wanted to permit end users to tell ripgrep about their system's hostname in their own way, e.g., by running a custom executable. I want this because I know at least for my own use cases, I sometimes log into systems using an SSH hostname that is distinct from the system's actual hostname (usually because the system is shared in some way or changing its hostname is not allowed/practical). I think that's about it. Closes #665, Closes #2483 --- Cargo.lock | 69 +- complete/_rg | 1 + crates/core/app.rs | 89 +- crates/core/args.rs | 133 ++- crates/printer/Cargo.toml | 2 +- crates/printer/src/hyperlink.rs | 1304 ++++++++++++++--------- crates/printer/src/hyperlink_aliases.rs | 88 +- crates/printer/src/lib.rs | 5 +- crates/printer/src/path.rs | 49 +- crates/printer/src/standard.rs | 172 +-- crates/printer/src/summary.rs | 71 +- crates/printer/src/util.rs | 128 ++- 12 files changed, 1307 insertions(+), 804 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 885e73d3..6029cc1a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -136,16 +136,6 @@ version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" -[[package]] -name = "gethostname" -version = "0.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0176e0459c2e4a1fe232f984bca6890e681076abb9934f6cea7c326f3fc47818" -dependencies = [ - "libc", - "windows-targets", -] - [[package]] name = "glob" version = "0.3.1" @@ -216,10 +206,10 @@ version = "0.1.7" dependencies = [ "base64", "bstr", - "gethostname", "grep-matcher", "grep-regex", "grep-searcher", + "log", "serde", "serde_json", "termcolor", @@ -621,60 +611,3 @@ name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" - -[[package]] -name = "windows-targets" -version = "0.48.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b1eb6f0cd7c80c79759c929114ef071b87354ce476d9d94271031c0497adfd5" -dependencies = [ - "windows_aarch64_gnullvm", - "windows_aarch64_msvc", - "windows_i686_gnu", - "windows_i686_msvc", - "windows_x86_64_gnu", - "windows_x86_64_gnullvm", - "windows_x86_64_msvc", -] - -[[package]] -name = "windows_aarch64_gnullvm" -version = "0.48.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91ae572e1b79dba883e0d315474df7305d12f569b400fcf90581b06062f7e1bc" - -[[package]] -name = "windows_aarch64_msvc" -version = "0.48.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2ef27e0d7bdfcfc7b868b317c1d32c641a6fe4629c171b8928c7b08d98d7cf3" - -[[package]] -name = "windows_i686_gnu" -version = "0.48.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "622a1962a7db830d6fd0a69683c80a18fda201879f0f447f065a3b7467daa241" - -[[package]] -name = "windows_i686_msvc" -version = "0.48.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4542c6e364ce21bf45d69fdd2a8e455fa38d316158cfd43b3ac1c5b1b19f8e00" - -[[package]] -name = "windows_x86_64_gnu" -version = "0.48.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ca2b8a661f7628cbd23440e50b05d705db3686f894fc9580820623656af974b1" - -[[package]] -name = "windows_x86_64_gnullvm" -version = "0.48.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7896dbc1f41e08872e9d5e8f8baa8fdd2677f29468c4e156210174edc7f7b953" - -[[package]] -name = "windows_x86_64_msvc" -version = "0.48.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a515f5799fe4961cb532f983ce2b23082366b898e52ffbce459c86f67c8378a" diff --git a/complete/_rg b/complete/_rg index be8d18ba..7fd6c542 100644 --- a/complete/_rg +++ b/complete/_rg @@ -305,6 +305,7 @@ _rg() { '--debug[show debug messages]' '--field-context-separator[set string to delimit fields in context lines]' '--field-match-separator[set string to delimit fields in matching lines]' + '--hostname-bin=[executable for getting system hostname]:hostname executable:_command_names -e' '--hyperlink-format=[specify pattern for hyperlinks]:pattern' '--trace[show more verbose debug messages]' '--dfa-size-limit=[specify upper size limit of generated DFA]:DFA size (bytes)' diff --git a/crates/core/app.rs b/crates/core/app.rs index 9c523479..d0dfc8d3 100644 --- a/crates/core/app.rs +++ b/crates/core/app.rs @@ -580,6 +580,7 @@ pub fn all_args_and_flags() -> Vec { flag_glob_case_insensitive(&mut args); flag_heading(&mut args); flag_hidden(&mut args); + flag_hostname_bin(&mut args); flag_hyperlink_format(&mut args); flag_iglob(&mut args); flag_ignore_case(&mut args); @@ -1495,19 +1496,93 @@ This flag can be disabled with --no-hidden. args.push(arg); } +fn flag_hostname_bin(args: &mut Vec) { + const SHORT: &str = "Run a program to get this system's hostname."; + const LONG: &str = long!( + "\ +This flag controls how ripgrep determines this system's hostname. The flag's +value should correspond to an executable (either a path or something that can +be found via your system's *PATH* environment variable). When set, ripgrep will +run this executable, with no arguments, and treat its output (with leading and +trailing whitespace stripped) as your system's hostname. + +When not set (the default, or the empty string), ripgrep will try to +automatically detect your system's hostname. On Unix, this corresponds +to calling *gethostname*. On Windows, this corresponds to calling +*GetComputerNameExW* to fetch the system's \"physical DNS hostname.\" + +ripgrep uses your system's hostname for producing hyperlinks. +" + ); + let arg = + RGArg::flag("hostname-bin", "COMMAND").help(SHORT).long_help(LONG); + args.push(arg); +} + fn flag_hyperlink_format(args: &mut Vec) { const SHORT: &str = "Set the format of hyperlinks to match results."; const LONG: &str = long!( "\ -Set the format of hyperlinks to match results. This defines a pattern which -can contain the following placeholders: {file}, {line}, {column}, and {host}. -An empty pattern or 'none' disables hyperlinks. +Set the format of hyperlinks to match results. Hyperlinks make certain elements +of ripgrep's output, such as file paths, clickable. This generally only works +in terminal emulators that support OSC-8 hyperlinks. For example, the format +*file://{host}{file}* will emit an RFC 8089 hyperlink. -The {file} placeholder is required, and will be replaced with the absolute -file path with a few adjustments: The leading '/' on Unix is removed, -and '\\' is replaced with '/' on Windows. +The following variables are available in the format string: -As an example, the default pattern on Unix systems is: 'file://{host}/{file}' +*{path}*: Required. This is replaced with a path to a matching file. The +path is guaranteed to be absolute and percent encoded such that it is valid to +put into a URI. Note that a path is guaranteed to start with a */*. + +*{host}*: Optional. This is replaced with your system's hostname. On Unix, +this corresponds to calling *gethostname*. On Windows, this corresponds to +calling *GetComputerNameExW* to fetch the system's \"physical DNS hostname.\" +Alternatively, if --hostname-bin was provided, then the hostname returned from +the output of that program will be returned. If no hostname could be found, +then this variable is replaced with the empty string. + +*{line}*: Optional. If appropriate, this is replaced with the line number of +a match. If no line number is available (for example, if --no-line-number was +given), then it is automatically replaced with the value *1*. + +*{column}*: Optional, but requires the presence of **{line}**. If appropriate, +this is replaced with the column number of a match. If no column number is +available (for example, if --no-column was given), then it is automatically +replaced with the value *1*. + +*{wslprefix}*: Optional. This is a special value that is set to +*wsl$/WSL_DISTRO_NAME*, where *WSL_DISTRO_NAME* corresponds to the value of +the equivalent environment variable. If the system is not Unix or if the +*WSL_DISTRO_NAME* environment variable is not set, then this is replaced with +the empty string. + +Alternatively, a format string may correspond to one of the following +aliases: default, file, grep+, kitty, macvim, none, subl, textmate, vscode, +vscode-insiders, vscodium. + +A format string may be empty. An empty format string is equivalent to the +*none* alias. In this case, hyperlinks will be disabled. + +At present, the default format when ripgrep detects a tty on stdout all systems +is *default*. This is an alias that expands to *file://{host}{path}* on Unix +and *file://{path}* on Windows. When stdout is not a tty, then the default +format behaves as if it were *none*. That is, hyperlinks are disabled. + +Note that hyperlinks are only written when colors are enabled. To write +hyperlinks without colors, you'll need to configure ripgrep to not colorize +anything without actually disabling all ANSI escape codes completely: + + --colors 'path:none' --colors 'line:none' --colors 'column:none' --colors 'match:none' + +ripgrep works this way because it treats the *--color=(never|always|auto)* flag +as a proxy for whether ANSI escape codes should be used at all. This means +that environment variables like *NO_COLOR=1* and *TERM=dumb* not only disable +colors, but hyperlinks as well. Similarly, colors and hyperlinks are disabled +when ripgrep is not writing to a tty. (Unless one forces the issue by setting +*--color=always*.) + +For more information on hyperlinks in terminal emulators, see: +https://gist.github.com/egmontkob/eb114294efbcd5adb1944c9f3cb5feda " ); let arg = diff --git a/crates/core/args.rs b/crates/core/args.rs index 0f8d1f18..75029a05 100644 --- a/crates/core/args.rs +++ b/crates/core/args.rs @@ -18,9 +18,9 @@ use grep::pcre2::{ RegexMatcherBuilder as PCRE2RegexMatcherBuilder, }; use grep::printer::{ - default_color_specs, ColorSpecs, HyperlinkPattern, JSONBuilder, - PathPrinter, PathPrinterBuilder, Standard, StandardBuilder, Stats, - Summary, SummaryBuilder, SummaryKind, JSON, + default_color_specs, ColorSpecs, HyperlinkConfig, HyperlinkEnvironment, + HyperlinkFormat, JSONBuilder, PathPrinter, PathPrinterBuilder, Standard, + StandardBuilder, Stats, Summary, SummaryBuilder, SummaryKind, JSON, }; use grep::regex::{ RegexMatcher as RustRegexMatcher, @@ -236,7 +236,7 @@ impl Args { let mut builder = PathPrinterBuilder::new(); builder .color_specs(self.matches().color_specs()?) - .hyperlink_pattern(self.matches().hyperlink_pattern()?) + .hyperlink(self.matches().hyperlink_config()?) .separator(self.matches().path_separator()?) .terminator(self.matches().path_terminator().unwrap_or(b'\n')); Ok(builder.build(wtr)) @@ -774,7 +774,7 @@ impl ArgMatches { let mut builder = StandardBuilder::new(); builder .color_specs(self.color_specs()?) - .hyperlink_pattern(self.hyperlink_pattern()?) + .hyperlink(self.hyperlink_config()?) .stats(self.stats()) .heading(self.heading()) .path(self.with_filename(paths)) @@ -814,7 +814,7 @@ impl ArgMatches { builder .kind(self.summary_kind().expect("summary format")) .color_specs(self.color_specs()?) - .hyperlink_pattern(self.hyperlink_pattern()?) + .hyperlink(self.hyperlink_config()?) .stats(self.stats()) .path(self.with_filename(paths)) .max_matches(self.max_count()?) @@ -1126,11 +1126,21 @@ impl ArgMatches { /// for the current system is used if the value is not set. /// /// If an invalid pattern is provided, then an error is returned. - fn hyperlink_pattern(&self) -> Result { - Ok(match self.value_of_lossy("hyperlink-format") { - Some(pattern) => HyperlinkPattern::from_str(&pattern)?, - None => HyperlinkPattern::default_file_scheme(), - }) + fn hyperlink_config(&self) -> Result { + let mut env = HyperlinkEnvironment::new(); + env.host(hostname(self.value_of_os("hostname-bin"))) + .wsl_prefix(wsl_prefix()); + let fmt = match self.value_of_lossy("hyperlink-format") { + None => HyperlinkFormat::from_str("default").unwrap(), + Some(format) => match HyperlinkFormat::from_str(&format) { + Ok(format) => format, + Err(err) => { + let msg = format!("invalid hyperlink format: {err}"); + return Err(msg.into()); + } + }, + }; + Ok(HyperlinkConfig::new(env, fmt)) } /// Returns true if ignore files should be processed case insensitively. @@ -1838,6 +1848,107 @@ fn current_dir() -> Result { .into()) } +/// Retrieves the hostname that ripgrep should use wherever a hostname is +/// required. Currently, that's just in the hyperlink format. +/// +/// This works by first running the given binary program (if present and with +/// no arguments) to get the hostname after trimming leading and trailing +/// whitespace. If that fails for any reason, then it falls back to getting +/// the hostname via platform specific means (e.g., `gethostname` on Unix). +/// +/// The purpose of `bin` is to make it possible for end users to override how +/// ripgrep determines the hostname. +fn hostname(bin: Option<&OsStr>) -> Option { + let Some(bin) = bin else { return platform_hostname() }; + let bin = match grep::cli::resolve_binary(bin) { + Ok(bin) => bin, + Err(err) => { + log::debug!( + "failed to run command '{bin:?}' to get hostname \ + (falling back to platform hostname): {err}", + ); + return platform_hostname(); + } + }; + let mut cmd = process::Command::new(&bin); + cmd.stdin(process::Stdio::null()); + let rdr = match grep::cli::CommandReader::new(&mut cmd) { + Ok(rdr) => rdr, + Err(err) => { + log::debug!( + "failed to spawn command '{bin:?}' to get \ + hostname (falling back to platform hostname): {err}", + ); + return platform_hostname(); + } + }; + let out = match io::read_to_string(rdr) { + Ok(out) => out, + Err(err) => { + log::debug!( + "failed to read output from command '{bin:?}' to get \ + hostname (falling back to platform hostname): {err}", + ); + return platform_hostname(); + } + }; + let hostname = out.trim(); + if hostname.is_empty() { + log::debug!( + "output from command '{bin:?}' is empty after trimming \ + leading and trailing whitespace (falling back to \ + platform hostname)", + ); + return platform_hostname(); + } + Some(hostname.to_string()) +} + +/// Attempts to get the hostname by using platform specific routines. For +/// example, this will do `gethostname` on Unix and `GetComputerNameExW` on +/// Windows. +fn platform_hostname() -> Option { + let hostname_os = match grep::cli::hostname() { + Ok(x) => x, + Err(err) => { + log::debug!("could not get hostname: {}", err); + return None; + } + }; + let Some(hostname) = hostname_os.to_str() else { + log::debug!( + "got hostname {:?}, but it's not valid UTF-8", + hostname_os + ); + return None; + }; + Some(hostname.to_string()) +} + +/// Returns a value that is meant to fill in the `{wslprefix}` variable for +/// a user given hyperlink format. A WSL prefix is a share/network like thing +/// that is meant to permit Windows applications to open files stored within +/// a WSL drive. +/// +/// If a WSL distro name is unavailable, not valid UTF-8 or this isn't running +/// in a Unix environment, then this returns None. +/// +/// See: +fn wsl_prefix() -> Option { + if !cfg!(unix) { + return None; + } + let distro_os = env::var_os("WSL_DISTRO_NAME")?; + let Some(distro) = distro_os.to_str() else { + log::debug!( + "found WSL_DISTRO_NAME={:?}, but value is not UTF-8", + distro_os + ); + return None; + }; + Some(format!("wsl$/{distro}")) +} + /// Tries to assign a timestamp to every `Subject` in the vector to help with /// sorting Subjects by time. fn load_timestamps( diff --git a/crates/printer/Cargo.toml b/crates/printer/Cargo.toml index 69e03d65..dc63a6cc 100644 --- a/crates/printer/Cargo.toml +++ b/crates/printer/Cargo.toml @@ -21,9 +21,9 @@ serde = ["dep:base64", "dep:serde", "dep:serde_json"] [dependencies] base64 = { version = "0.21.4", optional = true } bstr = "1.6.2" -gethostname = "0.4.3" grep-matcher = { version = "0.1.6", path = "../matcher" } grep-searcher = { version = "0.1.11", path = "../searcher" } +log = "0.4.5" termcolor = "1.3.0" serde = { version = "1.0.188", optional = true, features = ["derive"] } serde_json = { version = "1.0.107", optional = true } diff --git a/crates/printer/src/hyperlink.rs b/crates/printer/src/hyperlink.rs index fa38b5c2..7e6be6e4 100644 --- a/crates/printer/src/hyperlink.rs +++ b/crates/printer/src/hyperlink.rs @@ -1,85 +1,332 @@ -use std::{ - io::{self, Write}, - path::Path, -}; +use std::{cell::RefCell, io, path::Path, sync::Arc}; use { bstr::ByteSlice, termcolor::{HyperlinkSpec, WriteColor}, }; -use crate::hyperlink_aliases::HYPERLINK_PATTERN_ALIASES; +use crate::hyperlink_aliases; + +/// Hyperlink configuration. +/// +/// This configuration specifies both the [hyperlink format](HyperlinkFormat) +/// and an [environment](HyperlinkConfig) for interpolating a subset of +/// variables. The specific subset includes variables that are intended to +/// be invariant throughout the lifetime of a process, such as a machine's +/// hostname. +/// +/// A hyperlink configuration can be provided to printer builders such as +/// [`StandardBuilder::hyperlink`](crate::StandardBuilder::hyperlink). +#[derive(Clone, Debug, Default, Eq, PartialEq)] +pub struct HyperlinkConfig(Arc); + +#[derive(Clone, Debug, Default, Eq, PartialEq)] +struct HyperlinkConfigInner { + env: HyperlinkEnvironment, + format: HyperlinkFormat, +} + +impl HyperlinkConfig { + /// Create a new configuration from an environment and a format. + pub fn new( + env: HyperlinkEnvironment, + format: HyperlinkFormat, + ) -> HyperlinkConfig { + HyperlinkConfig(Arc::new(HyperlinkConfigInner { env, format })) + } + + /// Returns the hyperlink environment in this configuration. + pub(crate) fn environment(&self) -> &HyperlinkEnvironment { + &self.0.env + } + + /// Returns the hyperlink format in this configuration. + pub(crate) fn format(&self) -> &HyperlinkFormat { + &self.0.format + } +} + +/// A hyperlink format with variables. +/// +/// This can be created by parsing a string using `HyperlinkPattern::from_str`. +/// +/// The default format is empty. An empty format is valid and effectively +/// disables hyperlinks. +/// +/// # Example +/// +/// ``` +/// use grep_printer::HyperlinkFormat; +/// +/// let fmt = "vscode".parse::()?; +/// assert_eq!(fmt.to_string(), "vscode://file{path}:{line}:{column}"); +/// +/// # Ok::<(), Box>(()) +/// ``` +#[derive(Clone, Debug, Default, Eq, PartialEq)] +pub struct HyperlinkFormat { + parts: Vec, + is_line_dependent: bool, +} + +impl HyperlinkFormat { + /// Creates an empty hyperlink format. + pub fn empty() -> HyperlinkFormat { + HyperlinkFormat::default() + } + + /// Returns true if this format is empty. + pub fn is_empty(&self) -> bool { + self.parts.is_empty() + } + + /// Creates a [`HyperlinkConfig`] from this format and the environment + /// given. + pub fn into_config(self, env: HyperlinkEnvironment) -> HyperlinkConfig { + HyperlinkConfig::new(env, self) + } + + /// Returns true if the format can produce line-dependent hyperlinks. + pub(crate) fn is_line_dependent(&self) -> bool { + self.is_line_dependent + } +} + +impl std::str::FromStr for HyperlinkFormat { + type Err = HyperlinkFormatError; + + fn from_str(s: &str) -> Result { + use self::HyperlinkFormatErrorKind::*; + + #[derive(Debug)] + enum State { + Verbatim, + VerbatimCloseVariable, + OpenVariable, + InVariable, + } + + let mut builder = FormatBuilder::new(); + let input = match hyperlink_aliases::find(s) { + Some(format) => format, + None => s, + }; + let mut name = String::new(); + let mut state = State::Verbatim; + let err = |kind| HyperlinkFormatError { kind }; + for ch in input.chars() { + state = match state { + State::Verbatim => { + if ch == '{' { + State::OpenVariable + } else if ch == '}' { + State::VerbatimCloseVariable + } else { + builder.append_char(ch); + State::Verbatim + } + } + State::VerbatimCloseVariable => { + if ch == '}' { + builder.append_char('}'); + State::Verbatim + } else { + return Err(err(InvalidCloseVariable)); + } + } + State::OpenVariable => { + if ch == '{' { + builder.append_char('{'); + State::Verbatim + } else { + name.clear(); + if ch == '}' { + builder.append_var(&name)?; + State::Verbatim + } else { + name.push(ch); + State::InVariable + } + } + } + State::InVariable => { + if ch == '}' { + builder.append_var(&name)?; + State::Verbatim + } else { + name.push(ch); + State::InVariable + } + } + }; + } + match state { + State::Verbatim => builder.build(), + State::VerbatimCloseVariable => Err(err(InvalidCloseVariable)), + State::OpenVariable | State::InVariable => { + Err(err(UnclosedVariable)) + } + } + } +} + +impl std::fmt::Display for HyperlinkFormat { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + for part in self.parts.iter() { + part.fmt(f)?; + } + Ok(()) + } +} + +/// A static environment for hyperlink interpolation. +/// +/// This environment permits setting the values of varibables used in hyperlink +/// interpolation that are not expected to change for the lifetime of a program. +/// That is, these values are invariant. +/// +/// Currently, this includes the hostname and a WSL distro prefix. +#[derive(Clone, Debug, Default, Eq, PartialEq)] +pub struct HyperlinkEnvironment { + host: Option, + wsl_prefix: Option, +} + +impl HyperlinkEnvironment { + /// Create a new empty hyperlink environment. + pub fn new() -> HyperlinkEnvironment { + HyperlinkEnvironment::default() + } + + /// Set the `{host}` variable, which fills in any hostname components of + /// a hyperlink. + /// + /// One can get the hostname in the current environment via the `hostname` + /// function in the `grep-cli` crate. + pub fn host(&mut self, host: Option) -> &mut HyperlinkEnvironment { + self.host = host; + self + } + + /// Set the `{wslprefix}` variable, which contains the WSL distro prefix. + /// An example value is `wsl$/Ubuntu`. The distro name can typically be + /// discovered from the `WSL_DISTRO_NAME` environment variable. + pub fn wsl_prefix( + &mut self, + wsl_prefix: Option, + ) -> &mut HyperlinkEnvironment { + self.wsl_prefix = wsl_prefix; + self + } +} + +/// An error that can occur when parsing a hyperlink format. +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct HyperlinkFormatError { + kind: HyperlinkFormatErrorKind, +} + +#[derive(Clone, Debug, Eq, PartialEq)] +enum HyperlinkFormatErrorKind { + /// This occurs when there are zero variables in the format. + NoVariables, + /// This occurs when the {path} variable is missing. + NoPathVariable, + /// This occurs when the {line} variable is missing, while the {column} + /// variable is present. + NoLineVariable, + /// This occurs when an unknown variable is used. + InvalidVariable(String), + /// The format doesn't start with a valid scheme. + InvalidScheme, + /// This occurs when an unescaped `}` is found without a corresponding + /// `{` preceding it. + InvalidCloseVariable, + /// This occurs when a `{` is found without a corresponding `}` following + /// it. + UnclosedVariable, +} + +impl std::error::Error for HyperlinkFormatError {} + +impl std::fmt::Display for HyperlinkFormatError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + use self::HyperlinkFormatErrorKind::*; + + match self.kind { + NoVariables => { + let aliases = hyperlink_aliases::iter() + .map(|(name, _)| name) + .collect::>() + .join(", "); + write!( + f, + "at least a {{path}} variable is required in a \ + hyperlink format, or otherwise use a valid alias: {}", + aliases, + ) + } + NoPathVariable => { + write!( + f, + "the {{path}} variable is required in a hyperlink format", + ) + } + NoLineVariable => { + write!( + f, + "the hyperlink format contains a {{column}} variable, \ + but no {{line}} variable is present", + ) + } + InvalidVariable(ref name) => { + write!( + f, + "invalid hyperlink format variable: '{name}', choose \ + from: path, line, column, host", + ) + } + InvalidScheme => { + write!( + f, + "the hyperlink format must start with a valid URL scheme, \ + i.e., [0-9A-Za-z+-.]+:", + ) + } + InvalidCloseVariable => { + write!( + f, + "unopened variable: found '}}' without a \ + corresponding '{{' preceding it", + ) + } + UnclosedVariable => { + write!( + f, + "unclosed variable: found '{{' without a \ + corresponding '}}' following it", + ) + } + } + } +} /// A builder for `HyperlinkPattern`. /// /// Once a `HyperlinkPattern` is built, it is immutable. #[derive(Debug)] -pub struct HyperlinkPatternBuilder { +struct FormatBuilder { parts: Vec, } -/// A hyperlink pattern with placeholders. -/// -/// This can be created with `HyperlinkPatternBuilder` or from a string -/// using `HyperlinkPattern::from_str`. -#[derive(Clone, Debug, Default, Eq, PartialEq)] -pub struct HyperlinkPattern { - parts: Vec, - is_line_dependent: bool, -} - -/// A hyperlink pattern part. -#[derive(Clone, Debug, Eq, PartialEq)] -enum Part { - /// Static text. Can include invariant values such as the hostname. - Text(Vec), - /// Placeholder for the file path. - File, - /// Placeholder for the line number. - Line, - /// Placeholder for the column number. - Column, -} - -/// An error that can occur when parsing a hyperlink pattern. -#[derive(Clone, Debug, Eq, PartialEq)] -pub enum HyperlinkPatternError { - /// This occurs when the pattern syntax is not valid. - InvalidSyntax, - /// This occurs when the {file} placeholder is missing. - NoFilePlaceholder, - /// This occurs when the {line} placeholder is missing, - /// while the {column} placeholder is present. - NoLinePlaceholder, - /// This occurs when an unknown placeholder is used. - InvalidPlaceholder(String), - /// The pattern doesn't start with a valid scheme. - InvalidScheme, -} - -/// The values to replace the pattern placeholders with. -#[derive(Clone, Debug)] -pub(crate) struct HyperlinkValues<'a> { - file: &'a HyperlinkPath, - line: u64, - column: u64, -} - -/// Represents the {file} part of a hyperlink. -/// -/// This is the value to use as-is in the hyperlink, converted from an OS file -/// path. -#[derive(Clone, Debug)] -pub(crate) struct HyperlinkPath(Vec); - -impl HyperlinkPatternBuilder { - /// Creates a new hyperlink pattern builder. - pub fn new() -> Self { - Self { parts: vec![] } +impl FormatBuilder { + /// Creates a new hyperlink format builder. + fn new() -> FormatBuilder { + FormatBuilder { parts: vec![] } } /// Appends static text. - pub fn append_text(&mut self, text: &[u8]) -> &mut Self { + fn append_slice(&mut self, text: &[u8]) -> &mut FormatBuilder { if let Some(Part::Text(contents)) = self.parts.last_mut() { contents.extend_from_slice(text); } else if !text.is_empty() { @@ -88,307 +335,376 @@ impl HyperlinkPatternBuilder { self } - /// Appends the hostname. - /// - /// On WSL, appends `wsl$/{distro}` instead. - pub fn append_hostname(&mut self) -> &mut Self { - self.append_text(Self::get_hostname().as_bytes()) + /// Appends a single character. + fn append_char(&mut self, ch: char) -> &mut FormatBuilder { + self.append_slice(ch.encode_utf8(&mut [0; 4]).as_bytes()) } - /// Returns the hostname to use in the pattern. - /// - /// On WSL, returns `wsl$/{distro}`. - fn get_hostname() -> String { - if cfg!(unix) { - if let Ok(mut wsl_distro) = std::env::var("WSL_DISTRO_NAME") { - wsl_distro.insert_str(0, "wsl$/"); - return wsl_distro; + /// Appends a variable with the given name. If the name isn't recognized, + /// then this returns an error. + fn append_var( + &mut self, + name: &str, + ) -> Result<&mut FormatBuilder, HyperlinkFormatError> { + let part = match name { + "host" => Part::Host, + "wslprefix" => Part::WSLPrefix, + "path" => Part::Path, + "line" => Part::Line, + "column" => Part::Column, + unknown => { + let err = HyperlinkFormatError { + kind: HyperlinkFormatErrorKind::InvalidVariable( + unknown.to_string(), + ), + }; + return Err(err); } - } - - gethostname::gethostname().to_string_lossy().to_string() + }; + self.parts.push(part); + Ok(self) } - /// Appends a placeholder for the file path. - pub fn append_file(&mut self) -> &mut Self { - self.parts.push(Part::File); - self - } - - /// Appends a placeholder for the line number. - pub fn append_line(&mut self) -> &mut Self { - self.parts.push(Part::Line); - self - } - - /// Appends a placeholder for the column number. - pub fn append_column(&mut self) -> &mut Self { - self.parts.push(Part::Column); - self - } - - /// Builds the pattern. - pub fn build(&self) -> Result { + /// Builds the format. + fn build(&self) -> Result { self.validate()?; - - Ok(HyperlinkPattern { + Ok(HyperlinkFormat { parts: self.parts.clone(), is_line_dependent: self.parts.contains(&Part::Line), }) } - /// Validate that the pattern is well-formed. - fn validate(&self) -> Result<(), HyperlinkPatternError> { + /// Validate that the format is well-formed. + fn validate(&self) -> Result<(), HyperlinkFormatError> { + use self::HyperlinkFormatErrorKind::*; + + let err = |kind| HyperlinkFormatError { kind }; + // An empty format is fine. It just means hyperlink support is + // disabled. if self.parts.is_empty() { return Ok(()); } - - if !self.parts.contains(&Part::File) { - return Err(HyperlinkPatternError::NoFilePlaceholder); + // If all parts are just text, then there are no variables. It's + // likely a reference to invalid alias. + if self.parts.iter().all(|p| matches!(*p, Part::Text(_))) { + return Err(err(NoVariables)); } - + // Even if we have other variables, no path variable means the + // hyperlink can't possibly work the way it is intended. + if !self.parts.contains(&Part::Path) { + return Err(err(NoPathVariable)); + } + // If the {column} variable is used, then we also need a {line} + // variable or else {column} can't possibly work. if self.parts.contains(&Part::Column) && !self.parts.contains(&Part::Line) { - return Err(HyperlinkPatternError::NoLinePlaceholder); + return Err(err(NoLineVariable)); } - self.validate_scheme() } - /// Validate that the pattern starts with a valid scheme. + /// Validate that the format starts with a valid scheme. Validation is done + /// according to how a scheme is defined in RFC 1738 sections 2.1[1] and + /// 5[2]. In short, a scheme is this: /// - /// A valid scheme starts with an alphabetic character, continues with - /// a sequence of alphanumeric characters, periods, hyphens or plus signs, - /// and ends with a colon. - fn validate_scheme(&self) -> Result<(), HyperlinkPatternError> { - if let Some(Part::Text(value)) = self.parts.first() { - if let Some(colon_index) = value.find_byte(b':') { - if value[0].is_ascii_alphabetic() - && value.iter().take(colon_index).all(|c| { - c.is_ascii_alphanumeric() - || matches!(c, b'.' | b'-' | b'+') - }) - { - return Ok(()); - } - } + /// scheme = 1*[ lowalpha | digit | "+" | "-" | "." ] + /// + /// but is case insensitive. + /// + /// [1]: https://datatracker.ietf.org/doc/html/rfc1738#section-2.1 + /// [2]: https://datatracker.ietf.org/doc/html/rfc1738#section-5 + fn validate_scheme(&self) -> Result<(), HyperlinkFormatError> { + let err_invalid_scheme = HyperlinkFormatError { + kind: HyperlinkFormatErrorKind::InvalidScheme, + }; + let Some(Part::Text(ref part)) = self.parts.first() else { + return Err(err_invalid_scheme); + }; + let Some(colon) = part.find_byte(b':') else { + return Err(err_invalid_scheme); + }; + let scheme = &part[..colon]; + if scheme.is_empty() { + return Err(err_invalid_scheme); } - - Err(HyperlinkPatternError::InvalidScheme) - } -} - -impl HyperlinkPattern { - /// Creates an empty hyperlink pattern. - pub fn empty() -> Self { - HyperlinkPattern::default() - } - - /// Creates a default pattern suitable for Unix. - /// - /// The returned pattern is `file://{host}/{file}` - #[cfg(unix)] - pub fn default_file_scheme() -> Self { - HyperlinkPatternBuilder::new() - .append_text(b"file://") - .append_hostname() - .append_text(b"/") - .append_file() - .build() - .unwrap() - } - - /// Creates a default pattern suitable for Windows. - /// - /// The returned pattern is `file:///{file}` - #[cfg(windows)] - pub fn default_file_scheme() -> Self { - HyperlinkPatternBuilder::new() - .append_text(b"file:///") - .append_file() - .build() - .unwrap() - } - - /// Returns true if this pattern is empty. - pub fn is_empty(&self) -> bool { - self.parts.is_empty() - } - - /// Returns true if the pattern can produce line-dependent hyperlinks. - pub fn is_line_dependent(&self) -> bool { - self.is_line_dependent - } - - /// Renders this pattern with the given values to the given output. - pub(crate) fn render( - &self, - values: &HyperlinkValues, - output: &mut impl Write, - ) -> io::Result<()> { - for part in &self.parts { - part.render(values, output)?; + let is_valid_scheme_char = |byte| match byte { + b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z' | b'+' | b'-' | b'.' => { + true + } + _ => false, + }; + if !scheme.iter().all(|&b| is_valid_scheme_char(b)) { + return Err(err_invalid_scheme); } Ok(()) } } -impl std::str::FromStr for HyperlinkPattern { - type Err = HyperlinkPatternError; - - fn from_str(s: &str) -> Result { - let mut builder = HyperlinkPatternBuilder::new(); - let mut input = s.as_bytes(); - - if let Ok(index) = HYPERLINK_PATTERN_ALIASES - .binary_search_by_key(&input, |&(name, _)| name.as_bytes()) - { - input = HYPERLINK_PATTERN_ALIASES[index].1.as_bytes(); - } - - while !input.is_empty() { - if input[0] == b'{' { - // Placeholder - let end = input - .find_byte(b'}') - .ok_or(HyperlinkPatternError::InvalidSyntax)?; - - match &input[1..end] { - b"file" => builder.append_file(), - b"line" => builder.append_line(), - b"column" => builder.append_column(), - b"host" => builder.append_hostname(), - other => { - return Err(HyperlinkPatternError::InvalidPlaceholder( - String::from_utf8_lossy(other).to_string(), - )) - } - }; - - input = &input[(end + 1)..]; - } else { - // Static text - let end = input.find_byte(b'{').unwrap_or(input.len()); - builder.append_text(&input[..end]); - input = &input[end..]; - } - } - - builder.build() - } -} - -impl ToString for HyperlinkPattern { - fn to_string(&self) -> String { - self.parts.iter().map(|p| p.to_string()).collect() - } +/// A hyperlink format part. +/// +/// A sequence of these corresponds to a complete format. (Not all sequences +/// are valid.) +#[derive(Clone, Debug, Eq, PartialEq)] +enum Part { + /// Static text. + /// + /// We use `Vec` here (and more generally treat a format string as a + /// sequence of bytes) because file paths may be arbitrary bytes. A rare + /// case, but one for which there is no good reason to choke on. + Text(Vec), + /// Variable for the hostname. + Host, + /// Variable for a WSL path prefix. + WSLPrefix, + /// Variable for the file path. + Path, + /// Variable for the line number. + Line, + /// Variable for the column number. + Column, } impl Part { - fn render( + /// Interpolate this part using the given `env` and `values`, and write + /// the result of interpolation to the buffer provided. + fn interpolate_to( &self, - values: &HyperlinkValues, - output: &mut impl Write, + env: &HyperlinkEnvironment, + values: &Values, + dest: &mut Vec, + ) { + match self { + Part::Text(ref text) => dest.extend_from_slice(text), + Part::Host => dest.extend_from_slice( + env.host.as_ref().map(|s| s.as_bytes()).unwrap_or(b""), + ), + Part::WSLPrefix => dest.extend_from_slice( + env.wsl_prefix.as_ref().map(|s| s.as_bytes()).unwrap_or(b""), + ), + Part::Path => dest.extend_from_slice(&values.path.0), + Part::Line => { + let line = values.line.unwrap_or(1).to_string(); + dest.extend_from_slice(line.as_bytes()); + } + Part::Column => { + let column = values.column.unwrap_or(1).to_string(); + dest.extend_from_slice(column.as_bytes()); + } + } + } +} + +impl std::fmt::Display for Part { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + match self { + Part::Text(text) => write!(f, "{}", String::from_utf8_lossy(text)), + Part::Host => write!(f, "{{host}}"), + Part::WSLPrefix => write!(f, "{{wslprefix}}"), + Part::Path => write!(f, "{{path}}"), + Part::Line => write!(f, "{{line}}"), + Part::Column => write!(f, "{{column}}"), + } + } +} + +/// The values to replace the format variables with. +/// +/// This only consists of values that depend on each path or match printed. +/// Values that are invariant throughout the lifetime of the process are set +/// via a [`HyperlinkEnvironment`]. +#[derive(Clone, Debug)] +pub(crate) struct Values<'a> { + path: &'a HyperlinkPath, + line: Option, + column: Option, +} + +impl<'a> Values<'a> { + /// Creates a new set of values, starting with the path given. + /// + /// Callers may also set the line and column number using the mutator + /// methods. + pub(crate) fn new(path: &'a HyperlinkPath) -> Values<'a> { + Values { path, line: None, column: None } + } + + /// Sets the line number for these values. + /// + /// If a line number is not set and a hyperlink format contains a `{line}` + /// variable, then it is interpolated with the value of `1` automatically. + pub(crate) fn line(mut self, line: Option) -> Values<'a> { + self.line = line; + self + } + + /// Sets the column number for these values. + /// + /// If a column number is not set and a hyperlink format contains a + /// `{column}` variable, then it is interpolated with the value of `1` + /// automatically. + pub(crate) fn column(mut self, column: Option) -> Values<'a> { + self.column = column; + self + } +} + +/// An abstraction for interpolating a hyperlink format with values for every +/// variable. +/// +/// Interpolation of variables occurs through two different sources. The +/// first is via a `HyperlinkEnvironment` for values that are expected to +/// be invariant. This comes from the `HyperlinkConfig` used to build this +/// interpolator. The second source is via `Values`, which is provided to +/// `Interpolator::begin`. The `Values` contains things like the file path, +/// line number and column number. +#[derive(Clone, Debug)] +pub(crate) struct Interpolator { + config: HyperlinkConfig, + buf: RefCell>, +} + +impl Interpolator { + /// Create a new interpolator for the given hyperlink format configuration. + pub(crate) fn new(config: &HyperlinkConfig) -> Interpolator { + Interpolator { config: config.clone(), buf: RefCell::new(vec![]) } + } + + /// Start interpolation with the given values by writing a hyperlink + /// to `wtr`. Subsequent writes to `wtr`, until `Interpolator::end` is + /// called, are the label for the hyperlink. + /// + /// This returns an interpolator status which indicates whether the + /// hyperlink was written. It might not be written, for example, if the + /// underlying writer doesn't support hyperlinks or if the hyperlink + /// format is empty. The status should be provided to `Interpolator::end` + /// as an instruction for whether to close the hyperlink or not. + pub(crate) fn begin( + &self, + values: &Values, + mut wtr: W, + ) -> io::Result { + if self.config.format().is_empty() + || !wtr.supports_hyperlinks() + || !wtr.supports_color() + { + return Ok(InterpolatorStatus::inactive()); + } + let mut buf = self.buf.borrow_mut(); + buf.clear(); + for part in self.config.format().parts.iter() { + part.interpolate_to(self.config.environment(), values, &mut buf); + } + let spec = HyperlinkSpec::open(&buf); + wtr.set_hyperlink(&spec)?; + Ok(InterpolatorStatus { active: true }) + } + + /// Writes the correct escape sequences to `wtr` to close any extant + /// hyperlink, marking the end of a hyperlink's label. + /// + /// The status given should be returned from a corresponding + /// `Interpolator::begin` call. Since `begin` may not write a hyperlink + /// (e.g., if the underlying writer doesn't support hyperlinks), it follows + /// that `finish` must not close a hyperlink that was never opened. The + /// status indicates whether the hyperlink was opened or not. + pub(crate) fn finish( + &self, + status: InterpolatorStatus, + mut wtr: W, ) -> io::Result<()> { - match self { - Part::Text(text) => output.write_all(text), - Part::File => output.write_all(&values.file.0), - Part::Line => write!(output, "{}", values.line), - Part::Column => write!(output, "{}", values.column), + if !status.active { + return Ok(()); } + wtr.set_hyperlink(&HyperlinkSpec::close()) } } -impl ToString for Part { - fn to_string(&self) -> String { - match self { - Part::Text(text) => String::from_utf8_lossy(text).to_string(), - Part::File => "{file}".to_string(), - Part::Line => "{line}".to_string(), - Part::Column => "{column}".to_string(), - } +/// A status indicating whether a hyperlink was written or not. +/// +/// This is created by `Interpolator::begin` and used by `Interpolator::finish` +/// to determine whether a hyperlink was actually opened or not. If it wasn't +/// opened, then finishing interpolation is a no-op. +#[derive(Debug)] +pub(crate) struct InterpolatorStatus { + active: bool, +} + +impl InterpolatorStatus { + /// Create an inactive interpolator status. + pub(crate) fn inactive() -> InterpolatorStatus { + InterpolatorStatus { active: false } } } -impl std::fmt::Display for HyperlinkPatternError { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - HyperlinkPatternError::InvalidSyntax => { - write!(f, "invalid hyperlink pattern syntax") - } - HyperlinkPatternError::NoFilePlaceholder => { - write!( - f, - "the {{file}} placeholder is required in hyperlink \ - patterns", - ) - } - HyperlinkPatternError::NoLinePlaceholder => { - write!( - f, - "the hyperlink pattern contains a {{column}} placeholder, \ - but no {{line}} placeholder is present", - ) - } - HyperlinkPatternError::InvalidPlaceholder(name) => { - write!( - f, - "invalid hyperlink pattern placeholder: '{}', choose \ - from: file, line, column, host", - name - ) - } - HyperlinkPatternError::InvalidScheme => { - write!( - f, - "the hyperlink pattern must start with a valid URL scheme" - ) - } - } - } -} - -impl std::error::Error for HyperlinkPatternError {} - -impl<'a> HyperlinkValues<'a> { - /// Creates a new set of hyperlink values. - pub(crate) fn new( - file: &'a HyperlinkPath, - line: Option, - column: Option, - ) -> Self { - HyperlinkValues { - file, - line: line.unwrap_or(1), - column: column.unwrap_or(1), - } - } -} +/// Represents the `{path}` part of a hyperlink. +/// +/// This is the value to use as-is in the hyperlink, converted from an OS file +/// path. +#[derive(Clone, Debug)] +pub(crate) struct HyperlinkPath(Vec); impl HyperlinkPath { /// Returns a hyperlink path from an OS path. #[cfg(unix)] - pub(crate) fn from_path(path: &Path) -> Option { - // On Unix, this function returns the absolute file path without the - // leading slash, as it makes for more natural hyperlink patterns, for - // instance: - // file://{host}/{file} instead of file://{host}{file} - // vscode://file/{file} instead of vscode://file{file} - // It also allows for patterns to be multi-platform. + pub(crate) fn from_path(original_path: &Path) -> Option { + use std::os::unix::ffi::OsStrExt; - let path = path.canonicalize().ok()?; - let path = path.to_str()?.as_bytes(); - let path = if path.starts_with(b"/") { &path[1..] } else { path }; - Some(Self::encode(path)) + // We canonicalize the path in order to get an absolute version of it + // without any `.` or `..` or superflous separators. Unfortunately, + // this does also remove symlinks, and in theory, it would be nice to + // retain them. Perhaps even simpler, we could just join the current + // working directory with the path and be done with it. There was + // some discussion about this on PR#2483, and there generally appears + // to be some uncertainty about the extent to which hyperlinks with + // things like `..` in them actually work. So for now, we do the safest + // thing possible even though I think it can result in worse user + // experience. (Because it means the path you click on and the actual + // path that gets followed are different, even though they ostensibly + // refer to the same file.) + // + // There's also the potential issue that path canonicalization is + // expensive since it can touch the file system. That is probably + // less of an issue since hyperlinks are only created when they're + // supported, i.e., when writing to a tty. + // + // [1]: https://github.com/BurntSushi/ripgrep/pull/2483 + let path = match original_path.canonicalize() { + Ok(path) => path, + Err(err) => { + log::debug!( + "hyperlink creation for {:?} failed, error occurred \ + during path canonicalization: {}", + original_path, + err, + ); + return None; + } + }; + let bytes = path.as_os_str().as_bytes(); + // This should not be possible since one imagines that canonicalization + // should always return an absolute path. But it doesn't actually + // appear guaranteed by POSIX, so we check whether it's true or not and + // refuse to create a hyperlink from a relative path if it isn't. + if !bytes.starts_with(b"/") { + log::debug!( + "hyperlink creation for {:?} failed, canonicalization \ + returned {:?}, which does not start with a slash", + original_path, + path, + ); + return None; + } + Some(HyperlinkPath::encode(bytes)) } /// Returns a hyperlink path from an OS path. #[cfg(windows)] - pub fn from_path(path: &Path) -> Option { + pub(crate) fn from_path(original_path: &Path) -> Option { // On Windows, Path::canonicalize returns the result of // GetFinalPathNameByHandleW with VOLUME_NAME_DOS, // which produces paths such as the following: + // // \\?\C:\dir\file.txt (local path) // \\?\UNC\server\dir\file.txt (network share) // @@ -396,55 +712,102 @@ impl HyperlinkPath { // It is followed either by the drive letter, or by UNC\ // (universal naming convention), which denotes a network share. // - // Given that the default URL pattern on Windows is file:///{file} + // Given that the default URL format on Windows is file://{path} // we need to return the following from this function: - // C:/dir/file.txt (local path) - // /server/dir/file.txt (network share) + // + // /C:/dir/file.txt (local path) + // //server/dir/file.txt (network share) // // Which produces the following links: + // // file:///C:/dir/file.txt (local path) // file:////server/dir/file.txt (network share) // - // This substitutes the {file} placeholder with the expected value - // for the most common DOS paths, but on the other hand, - // network paths start with a single slash, which may be unexpected. - // It produces correct URLs though. + // This substitutes the {path} variable with the expected value for + // the most common DOS paths, but on the other hand, network paths + // start with a single slash, which may be unexpected. It seems to work + // though? + // + // Note that the following URL syntax also seems to be valid? // - // Note that the following URL syntax is also valid for network shares: // file://server/dir/file.txt - // It is also more consistent with the Unix case, but in order to - // use it, the pattern would have to be file://{file} and - // the {file} placeholder would have to be replaced with - // /C:/dir/file.txt - // for local files, which is not ideal, and it is certainly unexpected. + // + // But the initial implementation of this routine went for the format + // above. // // Also note that the file://C:/dir/file.txt syntax is not correct, // even though it often works in practice. // - // In the end, this choice was confirmed by VSCode, whose pattern is - // vscode://file/{file}:{line}:{column} and which correctly understands - // the following URL format for network drives: + // In the end, this choice was confirmed by VSCode, whose format is + // + // vscode://file{path}:{line}:{column} + // + // and which correctly understands the following URL format for network + // drives: + // // vscode://file//server/dir/file.txt:1:1 + // // It doesn't parse any other number of slashes in "file//server" as a // network path. - const WIN32_NAMESPACE_PREFIX: &[u8] = br"\\?\"; - const UNC_PREFIX: &[u8] = br"UNC\"; + const WIN32_NAMESPACE_PREFIX: &str = r"\\?\"; + const UNC_PREFIX: &str = r"UNC\"; - let path = path.canonicalize().ok()?; - let mut path = path.to_str()?.as_bytes(); - - if path.starts_with(WIN32_NAMESPACE_PREFIX) { - path = &path[WIN32_NAMESPACE_PREFIX.len()..]; - - if path.starts_with(UNC_PREFIX) { - path = &path[(UNC_PREFIX.len() - 1)..]; + // As for Unix, we canonicalize the path to make sure we have an + // absolute path. + let path = match original_path.canonicalize() { + Ok(path) => path, + Err(err) => { + log::debug!( + "hyperlink creation for {:?} failed, error occurred \ + during path canonicalization: {}", + original_path, + err, + ); + return None; } - } else { + }; + // We convert the path to a string for easier manipulation. If it + // wasn't valid UTF-16 (and thus could not be non-lossily transcoded + // to UTF-8), then we just give up. It's not clear we could make + // a meaningful hyperlink from it anyway. And this should be an + // exceptionally rare case. + let mut string = match path.to_str() { + Some(string) => string, + None => { + log::debug!( + "hyperlink creation for {:?} failed, path is not \ + valid UTF-8", + original_path, + ); + return None; + } + }; + // As the comment above says, we expect all canonicalized paths to + // begin with a \\?\. If it doesn't, then something weird is happening + // and we should just give up. + if !string.starts_with(WIN32_NAMESPACE_PREFIX) { + log::debug!( + "hyperlink creation for {:?} failed, canonicalization \ + returned {:?}, which does not start with \\\\?\\", + original_path, + path, + ); return None; } + string = &string[WIN32_NAMESPACE_PREFIX.len()..]; - Some(Self::encode(path)) + // And as above, drop the UNC prefix too, but keep the leading slash. + if string.starts_with(UNC_PREFIX) { + string = &string[(UNC_PREFIX.len() - 1)..]; + } + // Finally, add a leading slash. In the local file case, this turns + // C:\foo\bar into /C:\foo\bar (and then percent encoding turns it into + // /C:/foo/bar). In the network share case, this turns \share\foo\bar + // into /\share/foo/bar (and then percent encoding turns it into + // //share/foo/bar). + let with_slash = format!("/{string}"); + Some(HyperlinkPath::encode(with_slash.as_bytes())) } /// Percent-encodes a path. @@ -461,9 +824,8 @@ impl HyperlinkPath { /// creates invalid file:// URLs on that platform. fn encode(input: &[u8]) -> HyperlinkPath { let mut result = Vec::with_capacity(input.len()); - - for &c in input { - match c { + for &byte in input.iter() { + match byte { b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z' @@ -474,7 +836,7 @@ impl HyperlinkPath { | b'_' | b'~' | 128.. => { - result.push(c); + result.push(byte); } #[cfg(windows)] b'\\' => { @@ -483,60 +845,12 @@ impl HyperlinkPath { _ => { const HEX: &[u8] = b"0123456789ABCDEF"; result.push(b'%'); - result.push(HEX[(c >> 4) as usize]); - result.push(HEX[(c & 0xF) as usize]); + result.push(HEX[(byte >> 4) as usize]); + result.push(HEX[(byte & 0xF) as usize]); } } } - - Self(result) - } -} - -impl std::fmt::Display for HyperlinkPath { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!( - f, - "{}", - std::str::from_utf8(&self.0).unwrap_or("invalid utf-8") - ) - } -} - -/// A simple abstraction over a hyperlink span written to the terminal. This -/// helps tracking whether a hyperlink has been started, and should be ended. -#[derive(Debug, Default)] -pub(crate) struct HyperlinkSpan { - active: bool, -} - -impl HyperlinkSpan { - /// Starts a hyperlink and returns a span which tracks whether it is still - /// in effect. - pub(crate) fn start( - wtr: &mut impl WriteColor, - hyperlink: &HyperlinkSpec, - ) -> io::Result { - if wtr.supports_hyperlinks() && hyperlink.uri().is_some() { - wtr.set_hyperlink(hyperlink)?; - Ok(HyperlinkSpan { active: true }) - } else { - Ok(HyperlinkSpan { active: false }) - } - } - - /// Ends the hyperlink span if it is active. - pub(crate) fn end(&mut self, wtr: &mut impl WriteColor) -> io::Result<()> { - if self.is_active() { - wtr.set_hyperlink(&HyperlinkSpec::close())?; - self.active = false; - } - Ok(()) - } - - /// Returns true if there is currently an active hyperlink. - pub(crate) fn is_active(&self) -> bool { - self.active + HyperlinkPath(result) } } @@ -547,135 +861,141 @@ mod tests { use super::*; #[test] - fn build_pattern() { - let pattern = HyperlinkPatternBuilder::new() - .append_text(b"foo://") - .append_text(b"bar-") - .append_text(b"baz") - .append_file() + fn build_format() { + let format = FormatBuilder::new() + .append_slice(b"foo://") + .append_slice(b"bar-") + .append_slice(b"baz") + .append_var("path") + .unwrap() .build() .unwrap(); - assert_eq!(pattern.to_string(), "foo://bar-baz{file}"); - assert_eq!(pattern.parts[0], Part::Text(b"foo://bar-baz".to_vec())); - assert!(!pattern.is_empty()); + assert_eq!(format.to_string(), "foo://bar-baz{path}"); + assert_eq!(format.parts[0], Part::Text(b"foo://bar-baz".to_vec())); + assert!(!format.is_empty()); } #[test] - fn build_empty_pattern() { - let pattern = HyperlinkPatternBuilder::new().build().unwrap(); + fn build_empty_format() { + let format = FormatBuilder::new().build().unwrap(); - assert!(pattern.is_empty()); - assert_eq!(pattern, HyperlinkPattern::empty()); - assert_eq!(pattern, HyperlinkPattern::default()); + assert!(format.is_empty()); + assert_eq!(format, HyperlinkFormat::empty()); + assert_eq!(format, HyperlinkFormat::default()); } #[test] fn handle_alias() { - assert!(HyperlinkPattern::from_str("file").is_ok()); - assert!(HyperlinkPattern::from_str("none").is_ok()); - assert!(HyperlinkPattern::from_str("none").unwrap().is_empty()); + assert!(HyperlinkFormat::from_str("file").is_ok()); + assert!(HyperlinkFormat::from_str("none").is_ok()); + assert!(HyperlinkFormat::from_str("none").unwrap().is_empty()); } #[test] - fn parse_pattern() { - let pattern = HyperlinkPattern::from_str( - "foo://{host}/bar/{file}:{line}:{column}", + fn parse_format() { + let format = HyperlinkFormat::from_str( + "foo://{host}/bar/{path}:{line}:{column}", ) .unwrap(); assert_eq!( - pattern.to_string(), - "foo://{host}/bar/{file}:{line}:{column}" - .replace("{host}", &HyperlinkPatternBuilder::get_hostname()) + format.to_string(), + "foo://{host}/bar/{path}:{line}:{column}" ); - assert_eq!(pattern.parts.len(), 6); - assert!(pattern.parts.contains(&Part::File)); - assert!(pattern.parts.contains(&Part::Line)); - assert!(pattern.parts.contains(&Part::Column)); + assert_eq!(format.parts.len(), 8); + assert!(format.parts.contains(&Part::Path)); + assert!(format.parts.contains(&Part::Line)); + assert!(format.parts.contains(&Part::Column)); } #[test] fn parse_valid() { - assert!(HyperlinkPattern::from_str("").unwrap().is_empty()); + assert!(HyperlinkFormat::from_str("").unwrap().is_empty()); assert_eq!( - HyperlinkPattern::from_str("foo://{file}").unwrap().to_string(), - "foo://{file}" + HyperlinkFormat::from_str("foo://{path}").unwrap().to_string(), + "foo://{path}" ); assert_eq!( - HyperlinkPattern::from_str("foo://{file}/bar") - .unwrap() - .to_string(), - "foo://{file}/bar" + HyperlinkFormat::from_str("foo://{path}/bar").unwrap().to_string(), + "foo://{path}/bar" ); - HyperlinkPattern::from_str("f://{file}").unwrap(); - HyperlinkPattern::from_str("f:{file}").unwrap(); - HyperlinkPattern::from_str("f-+.:{file}").unwrap(); - HyperlinkPattern::from_str("f42:{file}").unwrap(); + HyperlinkFormat::from_str("f://{path}").unwrap(); + HyperlinkFormat::from_str("f:{path}").unwrap(); + HyperlinkFormat::from_str("f-+.:{path}").unwrap(); + HyperlinkFormat::from_str("f42:{path}").unwrap(); + HyperlinkFormat::from_str("42:{path}").unwrap(); + HyperlinkFormat::from_str("+:{path}").unwrap(); + HyperlinkFormat::from_str("F42:{path}").unwrap(); + HyperlinkFormat::from_str("F42://foo{{bar}}{path}").unwrap(); } #[test] fn parse_invalid() { - assert_eq!( - HyperlinkPattern::from_str("foo://bar").unwrap_err(), - HyperlinkPatternError::NoFilePlaceholder - ); - assert_eq!( - HyperlinkPattern::from_str("foo://{bar}").unwrap_err(), - HyperlinkPatternError::InvalidPlaceholder("bar".to_string()) - ); - assert_eq!( - HyperlinkPattern::from_str("foo://{file").unwrap_err(), - HyperlinkPatternError::InvalidSyntax - ); - assert_eq!( - HyperlinkPattern::from_str("foo://{file}:{column}").unwrap_err(), - HyperlinkPatternError::NoLinePlaceholder - ); - assert_eq!( - HyperlinkPattern::from_str("{file}").unwrap_err(), - HyperlinkPatternError::InvalidScheme - ); - assert_eq!( - HyperlinkPattern::from_str(":{file}").unwrap_err(), - HyperlinkPatternError::InvalidScheme - ); - assert_eq!( - HyperlinkPattern::from_str("f*:{file}").unwrap_err(), - HyperlinkPatternError::InvalidScheme - ); - } + use super::HyperlinkFormatErrorKind::*; - #[test] - fn aliases_are_valid() { - for (name, definition) in HYPERLINK_PATTERN_ALIASES { - assert!( - HyperlinkPattern::from_str(definition).is_ok(), - "invalid hyperlink alias: {}", - name - ); - } - } + let err = |kind| HyperlinkFormatError { kind }; + assert_eq!( + HyperlinkFormat::from_str("foo://bar").unwrap_err(), + err(NoVariables), + ); + assert_eq!( + HyperlinkFormat::from_str("foo://{line}").unwrap_err(), + err(NoPathVariable), + ); + assert_eq!( + HyperlinkFormat::from_str("foo://{path").unwrap_err(), + err(UnclosedVariable), + ); + assert_eq!( + HyperlinkFormat::from_str("foo://{path}:{column}").unwrap_err(), + err(NoLineVariable), + ); + assert_eq!( + HyperlinkFormat::from_str("{path}").unwrap_err(), + err(InvalidScheme), + ); + assert_eq!( + HyperlinkFormat::from_str(":{path}").unwrap_err(), + err(InvalidScheme), + ); + assert_eq!( + HyperlinkFormat::from_str("f*:{path}").unwrap_err(), + err(InvalidScheme), + ); - #[test] - fn aliases_are_sorted() { - let mut names = HYPERLINK_PATTERN_ALIASES.iter().map(|(name, _)| name); - - let Some(mut previous_name) = names.next() else { - return; - }; - - for name in names { - assert!( - name > previous_name, - "'{}' should be sorted before '{}' \ - in HYPERLINK_PATTERN_ALIASES", - name, - previous_name - ); - - previous_name = name; - } + assert_eq!( + HyperlinkFormat::from_str("foo://{bar}").unwrap_err(), + err(InvalidVariable("bar".to_string())), + ); + assert_eq!( + HyperlinkFormat::from_str("foo://{}}bar}").unwrap_err(), + err(InvalidVariable("".to_string())), + ); + assert_eq!( + HyperlinkFormat::from_str("foo://{b}}ar}").unwrap_err(), + err(InvalidVariable("b".to_string())), + ); + assert_eq!( + HyperlinkFormat::from_str("foo://{bar}}}").unwrap_err(), + err(InvalidVariable("bar".to_string())), + ); + assert_eq!( + HyperlinkFormat::from_str("foo://{{bar}").unwrap_err(), + err(InvalidCloseVariable), + ); + assert_eq!( + HyperlinkFormat::from_str("foo://{{{bar}").unwrap_err(), + err(InvalidVariable("bar".to_string())), + ); + assert_eq!( + HyperlinkFormat::from_str("foo://{b{{ar}").unwrap_err(), + err(InvalidVariable("b{{ar".to_string())), + ); + assert_eq!( + HyperlinkFormat::from_str("foo://{bar{{}").unwrap_err(), + err(InvalidVariable("bar{{".to_string())), + ); } } diff --git a/crates/printer/src/hyperlink_aliases.rs b/crates/printer/src/hyperlink_aliases.rs index 6d429bf8..c98bc0b0 100644 --- a/crates/printer/src/hyperlink_aliases.rs +++ b/crates/printer/src/hyperlink_aliases.rs @@ -1,23 +1,87 @@ /// Aliases to well-known hyperlink schemes. /// /// These need to be sorted by name. -pub(crate) const HYPERLINK_PATTERN_ALIASES: &[(&str, &str)] = &[ - #[cfg(unix)] - ("file", "file://{host}/{file}"), +const HYPERLINK_PATTERN_ALIASES: &[(&str, &str)] = &[ + #[cfg(not(windows))] + ("default", "file://{host}{path}"), #[cfg(windows)] - ("file", "file:///{file}"), + ("default", "file://{path}"), + ("file", "file://{host}{path}"), // https://github.com/misaki-web/grepp - ("grep+", "grep+:///{file}:{line}"), - ("kitty", "file://{host}/{file}#{line}"), + ("grep+", "grep+://{path}:{line}"), + ("kitty", "file://{host}{path}#{line}"), // https://macvim.org/docs/gui_mac.txt.html#mvim%3A%2F%2F - ("macvim", "mvim://open?url=file:///{file}&line={line}&column={column}"), + ("macvim", "mvim://open?url=file://{path}&line={line}&column={column}"), ("none", ""), // https://github.com/inopinatus/sublime_url - ("subl", "subl://open?url=file:///{file}&line={line}&column={column}"), + ("subl", "subl://open?url=file://{path}&line={line}&column={column}"), // https://macromates.com/blog/2007/the-textmate-url-scheme/ - ("textmate", "txmt://open?url=file:///{file}&line={line}&column={column}"), + ("textmate", "txmt://open?url=file://{path}&line={line}&column={column}"), // https://code.visualstudio.com/docs/editor/command-line#_opening-vs-code-with-urls - ("vscode", "vscode://file/{file}:{line}:{column}"), - ("vscode-insiders", "vscode-insiders://file/{file}:{line}:{column}"), - ("vscodium", "vscodium://file/{file}:{line}:{column}"), + ("vscode", "vscode://file{path}:{line}:{column}"), + ("vscode-insiders", "vscode-insiders://file{path}:{line}:{column}"), + ("vscodium", "vscodium://file{path}:{line}:{column}"), ]; + +/// Look for the hyperlink format defined by the given alias name. +/// +/// If one does not exist, `None` is returned. +pub(crate) fn find(name: &str) -> Option<&str> { + HYPERLINK_PATTERN_ALIASES + .binary_search_by_key(&name, |&(name, _)| name) + .map(|i| HYPERLINK_PATTERN_ALIASES[i].1) + .ok() +} + +/// Return an iterator over all available alias names and their definitions. +pub(crate) fn iter() -> impl Iterator { + HYPERLINK_PATTERN_ALIASES.iter().copied() +} + +#[cfg(test)] +mod tests { + use crate::HyperlinkFormat; + + use super::*; + + #[test] + fn is_sorted() { + let mut prev = HYPERLINK_PATTERN_ALIASES + .get(0) + .expect("aliases should be non-empty") + .0; + for &(name, _) in HYPERLINK_PATTERN_ALIASES.iter().skip(1) { + assert!( + name > prev, + "'{prev}' should come before '{name}' in \ + HYPERLINK_PATTERN_ALIASES", + ); + prev = name; + } + } + + #[test] + fn alias_names_are_reasonable() { + for &(name, _) in HYPERLINK_PATTERN_ALIASES.iter() { + // There's no hard rule here, but if we want to define an alias + // with a name that doesn't pass this assert, then we should + // probably flag it as worthy of consideration. For example, we + // really do not want to define an alias that contains `{` or `}`, + // which might confuse it for a variable. + assert!(name.chars().all(|c| c.is_alphanumeric() + || c == '+' + || c == '-' + || c == '.')); + } + } + + #[test] + fn aliases_are_valid_formats() { + for (name, definition) in HYPERLINK_PATTERN_ALIASES { + assert!( + definition.parse::().is_ok(), + "invalid hyperlink alias '{name}': {definition}", + ); + } + } +} diff --git a/crates/printer/src/lib.rs b/crates/printer/src/lib.rs index b2869d99..6c4a3735 100644 --- a/crates/printer/src/lib.rs +++ b/crates/printer/src/lib.rs @@ -60,12 +60,13 @@ assert_eq!(output, expected); */ #![deny(missing_docs)] -#![cfg_attr(feature = "pattern", feature(pattern))] +#![cfg_attr(docsrs, feature(doc_auto_cfg))] pub use crate::{ color::{default_color_specs, ColorError, ColorSpecs, UserColorSpec}, hyperlink::{ - HyperlinkPattern, HyperlinkPatternBuilder, HyperlinkPatternError, + HyperlinkConfig, HyperlinkEnvironment, HyperlinkFormat, + HyperlinkFormatError, }, path::{PathPrinter, PathPrinterBuilder}, standard::{Standard, StandardBuilder, StandardSink}, diff --git a/crates/printer/src/path.rs b/crates/printer/src/path.rs index c25956bc..38a2c9ec 100644 --- a/crates/printer/src/path.rs +++ b/crates/printer/src/path.rs @@ -4,7 +4,7 @@ use termcolor::WriteColor; use crate::{ color::ColorSpecs, - hyperlink::{HyperlinkPattern, HyperlinkSpan}, + hyperlink::{self, HyperlinkConfig}, util::PrinterPath, }; @@ -12,7 +12,7 @@ use crate::{ #[derive(Clone, Debug)] struct Config { colors: ColorSpecs, - hyperlink_pattern: HyperlinkPattern, + hyperlink: HyperlinkConfig, separator: Option, terminator: u8, } @@ -21,7 +21,7 @@ impl Default for Config { fn default() -> Config { Config { colors: ColorSpecs::default(), - hyperlink_pattern: HyperlinkPattern::default(), + hyperlink: HyperlinkConfig::default(), separator: None, terminator: b'\n', } @@ -43,7 +43,9 @@ impl PathPrinterBuilder { /// Create a new path printer with the current configuration that writes /// paths to the given writer. pub fn build(&self, wtr: W) -> PathPrinter { - PathPrinter { config: self.config.clone(), wtr, buf: vec![] } + let interpolator = + hyperlink::Interpolator::new(&self.config.hyperlink); + PathPrinter { config: self.config.clone(), wtr, interpolator } } /// Set the user color specifications to use for coloring in this printer. @@ -73,7 +75,7 @@ impl PathPrinterBuilder { self } - /// Set the hyperlink pattern to use for hyperlinks output by this printer. + /// Set the configuration to use for hyperlinks output by this printer. /// /// Regardless of the hyperlink format provided here, whether hyperlinks /// are actually used or not is determined by the implementation of @@ -83,12 +85,12 @@ impl PathPrinterBuilder { /// /// This completely overrides any previous hyperlink format. /// - /// The default pattern format results in not emitting any hyperlinks. - pub fn hyperlink_pattern( + /// The default configuration results in not emitting any hyperlinks. + pub fn hyperlink( &mut self, - pattern: HyperlinkPattern, + config: HyperlinkConfig, ) -> &mut PathPrinterBuilder { - self.config.hyperlink_pattern = pattern; + self.config.hyperlink = config; self } @@ -140,40 +142,35 @@ impl PathPrinterBuilder { pub struct PathPrinter { config: Config, wtr: W, - buf: Vec, + interpolator: hyperlink::Interpolator, } impl PathPrinter { /// Write the given path to the underlying writer. pub fn write(&mut self, path: &Path) -> io::Result<()> { - let ppath = PrinterPath::with_separator(path, self.config.separator); + let ppath = PrinterPath::new(path.as_ref()) + .with_separator(self.config.separator); if !self.wtr.supports_color() { self.wtr.write_all(ppath.as_bytes())?; } else { - let mut hyperlink = self.start_hyperlink_span(&ppath)?; + let status = self.start_hyperlink(&ppath)?; self.wtr.set_color(self.config.colors.path())?; self.wtr.write_all(ppath.as_bytes())?; self.wtr.reset()?; - hyperlink.end(&mut self.wtr)?; + self.interpolator.finish(status, &mut self.wtr)?; } self.wtr.write_all(&[self.config.terminator]) } /// Starts a hyperlink span when applicable. - fn start_hyperlink_span( + fn start_hyperlink( &mut self, path: &PrinterPath, - ) -> io::Result { - if self.wtr.supports_hyperlinks() { - if let Some(spec) = path.create_hyperlink_spec( - &self.config.hyperlink_pattern, - None, - None, - &mut self.buf, - ) { - return Ok(HyperlinkSpan::start(&mut self.wtr, &spec)?); - } - } - Ok(HyperlinkSpan::default()) + ) -> io::Result { + let Some(hyperpath) = path.as_hyperlink() else { + return Ok(hyperlink::InterpolatorStatus::inactive()); + }; + let values = hyperlink::Values::new(hyperpath); + self.interpolator.begin(&values, &mut self.wtr) } } diff --git a/crates/printer/src/standard.rs b/crates/printer/src/standard.rs index aa925546..cd6a4e54 100644 --- a/crates/printer/src/standard.rs +++ b/crates/printer/src/standard.rs @@ -20,7 +20,7 @@ use { use crate::{ color::ColorSpecs, counter::CounterWriter, - hyperlink::{HyperlinkPattern, HyperlinkSpan}, + hyperlink::{self, HyperlinkConfig}, stats::Stats, util::{ find_iter_at_in_context, trim_ascii_prefix, trim_line_terminator, @@ -36,7 +36,7 @@ use crate::{ #[derive(Debug, Clone)] struct Config { colors: ColorSpecs, - hyperlink_pattern: HyperlinkPattern, + hyperlink: HyperlinkConfig, stats: bool, heading: bool, path: bool, @@ -62,7 +62,7 @@ impl Default for Config { fn default() -> Config { Config { colors: ColorSpecs::default(), - hyperlink_pattern: HyperlinkPattern::default(), + hyperlink: HyperlinkConfig::default(), stats: false, heading: false, path: true, @@ -131,7 +131,6 @@ impl StandardBuilder { Standard { config: self.config.clone(), wtr: RefCell::new(CounterWriter::new(wtr)), - buf: RefCell::new(vec![]), matches: vec![], } } @@ -170,7 +169,7 @@ impl StandardBuilder { self } - /// Set the hyperlink pattern to use for hyperlinks output by this printer. + /// Set the configuration to use for hyperlinks output by this printer. /// /// Regardless of the hyperlink format provided here, whether hyperlinks /// are actually used or not is determined by the implementation of @@ -180,12 +179,12 @@ impl StandardBuilder { /// /// This completely overrides any previous hyperlink format. /// - /// The default pattern format results in not emitting any hyperlinks. - pub fn hyperlink_pattern( + /// The default configuration results in not emitting any hyperlinks. + pub fn hyperlink( &mut self, - pattern: HyperlinkPattern, + config: HyperlinkConfig, ) -> &mut StandardBuilder { - self.config.hyperlink_pattern = pattern; + self.config.hyperlink = config; self } @@ -496,7 +495,6 @@ impl StandardBuilder { pub struct Standard { config: Config, wtr: RefCell>, - buf: RefCell>, matches: Vec, } @@ -533,12 +531,15 @@ impl Standard { &'s mut self, matcher: M, ) -> StandardSink<'static, 's, M, W> { + let interpolator = + hyperlink::Interpolator::new(&self.config.hyperlink); let stats = if self.config.stats { Some(Stats::new()) } else { None }; let needs_match_granularity = self.needs_match_granularity(); StandardSink { matcher, standard: self, replacer: Replacer::new(), + interpolator, path: None, start_time: Instant::now(), match_count: 0, @@ -565,16 +566,17 @@ impl Standard { if !self.config.path { return self.sink(matcher); } + let interpolator = + hyperlink::Interpolator::new(&self.config.hyperlink); let stats = if self.config.stats { Some(Stats::new()) } else { None }; - let ppath = PrinterPath::with_separator( - path.as_ref(), - self.config.separator_path, - ); + let ppath = PrinterPath::new(path.as_ref()) + .with_separator(self.config.separator_path); let needs_match_granularity = self.needs_match_granularity(); StandardSink { matcher, standard: self, replacer: Replacer::new(), + interpolator, path: Some(ppath), start_time: Instant::now(), match_count: 0, @@ -659,6 +661,7 @@ pub struct StandardSink<'p, 's, M: Matcher, W> { matcher: M, standard: &'s mut Standard, replacer: Replacer, + interpolator: hyperlink::Interpolator, path: Option>, start_time: Instant, match_count: u64, @@ -1241,22 +1244,10 @@ impl<'a, M: Matcher, W: WriteColor> StandardImpl<'a, M, W> { ) -> io::Result<()> { let mut prelude = PreludeWriter::new(self); prelude.start(line_number, column)?; - - if !self.config().heading { - prelude.write_path()?; - } - if let Some(n) = line_number { - prelude.write_line_number(n)?; - } - if let Some(n) = column { - if self.config().column { - prelude.write_column_number(n)?; - } - } - if self.config().byte_offset { - prelude.write_byte_offset(absolute_byte_offset)?; - } - + prelude.write_path()?; + prelude.write_line_number(line_number)?; + prelude.write_column_number(column)?; + prelude.write_byte_offset(absolute_byte_offset)?; prelude.end() } @@ -1507,30 +1498,30 @@ impl<'a, M: Matcher, W: WriteColor> StandardImpl<'a, M, W> { } fn write_path_hyperlink(&self, path: &PrinterPath) -> io::Result<()> { - let mut hyperlink = self.start_hyperlink_span(path, None, None)?; + let status = self.start_hyperlink(path, None, None)?; self.write_path(path)?; - hyperlink.end(&mut *self.wtr().borrow_mut()) + self.end_hyperlink(status) } - fn start_hyperlink_span( + fn start_hyperlink( &self, path: &PrinterPath, line_number: Option, column: Option, - ) -> io::Result { - let mut wtr = self.wtr().borrow_mut(); - if wtr.supports_hyperlinks() { - let mut buf = self.buf().borrow_mut(); - if let Some(spec) = path.create_hyperlink_spec( - &self.config().hyperlink_pattern, - line_number, - column, - &mut buf, - ) { - return HyperlinkSpan::start(&mut *wtr, &spec); - } - } - Ok(HyperlinkSpan::default()) + ) -> io::Result { + let Some(hyperpath) = path.as_hyperlink() else { + return Ok(hyperlink::InterpolatorStatus::inactive()); + }; + let values = + hyperlink::Values::new(hyperpath).line(line_number).column(column); + self.sink.interpolator.begin(&values, &mut *self.wtr().borrow_mut()) + } + + fn end_hyperlink( + &self, + status: hyperlink::InterpolatorStatus, + ) -> io::Result<()> { + self.sink.interpolator.finish(status, &mut *self.wtr().borrow_mut()) } fn start_color_match(&self) -> io::Result<()> { @@ -1586,12 +1577,6 @@ impl<'a, M: Matcher, W: WriteColor> StandardImpl<'a, M, W> { &self.sink.standard.wtr } - /// Return a temporary buffer, which may be used for anything. - /// It is not necessarily empty when returned. - fn buf(&self) -> &'a RefCell> { - &self.sink.standard.buf - } - /// Return the path associated with this printer, if one exists. fn path(&self) -> Option<&'a PrinterPath<'a>> { self.sink.path.as_ref() @@ -1645,7 +1630,7 @@ struct PreludeWriter<'a, M: Matcher, W> { std: &'a StandardImpl<'a, M, W>, next_separator: PreludeSeparator, field_separator: &'a [u8], - hyperlink: HyperlinkSpan, + interp_status: hyperlink::InterpolatorStatus, } /// A type of separator used in the prelude @@ -1660,45 +1645,45 @@ enum PreludeSeparator { impl<'a, M: Matcher, W: WriteColor> PreludeWriter<'a, M, W> { /// Creates a new prelude printer. + #[inline(always)] fn new(std: &'a StandardImpl<'a, M, W>) -> PreludeWriter<'a, M, W> { - Self { + PreludeWriter { std, next_separator: PreludeSeparator::None, field_separator: std.separator_field(), - hyperlink: HyperlinkSpan::default(), + interp_status: hyperlink::InterpolatorStatus::inactive(), } } /// Starts the prelude with a hyperlink when applicable. /// - /// If a heading was written, and the hyperlink pattern is invariant on + /// If a heading was written, and the hyperlink format is invariant on /// the line number, then this doesn't hyperlink each line prelude, as it /// wouldn't point to the line anyway. The hyperlink on the heading should /// be sufficient and less confusing. + #[inline(always)] fn start( &mut self, line_number: Option, column: Option, ) -> io::Result<()> { - if let Some(path) = self.std.path() { - if self.config().hyperlink_pattern.is_line_dependent() - || !self.config().heading - { - self.hyperlink = self.std.start_hyperlink_span( - path, - line_number, - column, - )?; - } + let Some(path) = self.std.path() else { return Ok(()) }; + if self.config().hyperlink.format().is_line_dependent() + || !self.config().heading + { + self.interp_status = + self.std.start_hyperlink(path, line_number, column)?; } Ok(()) } /// Ends the prelude and writes the remaining output. + #[inline(always)] fn end(&mut self) -> io::Result<()> { - if self.hyperlink.is_active() { - self.hyperlink.end(&mut *self.std.wtr().borrow_mut())?; - } + self.std.end_hyperlink(std::mem::replace( + &mut self.interp_status, + hyperlink::InterpolatorStatus::inactive(), + ))?; self.write_separator() } @@ -1706,22 +1691,30 @@ impl<'a, M: Matcher, W: WriteColor> PreludeWriter<'a, M, W> { /// write that path to the underlying writer followed by the given field /// separator. (If a path terminator is set, then that is used instead of /// the field separator.) + #[inline(always)] fn write_path(&mut self) -> io::Result<()> { - if let Some(path) = self.std.path() { - self.write_separator()?; - self.std.write_path(path)?; - - self.next_separator = if self.config().path_terminator.is_some() { - PreludeSeparator::PathTerminator - } else { - PreludeSeparator::FieldSeparator - }; + // The prelude doesn't handle headings, only what comes before a match + // on the same line. So if we are emitting paths in headings, we should + // not do it here on each line. + if self.config().heading { + return Ok(()); } + let Some(path) = self.std.path() else { return Ok(()) }; + self.write_separator()?; + self.std.write_path(path)?; + + self.next_separator = if self.config().path_terminator.is_some() { + PreludeSeparator::PathTerminator + } else { + PreludeSeparator::FieldSeparator + }; Ok(()) } - /// Writes the line number field. - fn write_line_number(&mut self, line_number: u64) -> io::Result<()> { + /// Writes the line number field if present. + #[inline(always)] + fn write_line_number(&mut self, line: Option) -> io::Result<()> { + let Some(line_number) = line else { return Ok(()) }; self.write_separator()?; let n = line_number.to_string(); self.std.write_spec(self.config().colors.line(), n.as_bytes())?; @@ -1729,8 +1722,13 @@ impl<'a, M: Matcher, W: WriteColor> PreludeWriter<'a, M, W> { Ok(()) } - /// Writes the column number field. - fn write_column_number(&mut self, column_number: u64) -> io::Result<()> { + /// Writes the column number field if present and configured to do so. + #[inline(always)] + fn write_column_number(&mut self, column: Option) -> io::Result<()> { + if !self.config().column { + return Ok(()); + } + let Some(column_number) = column else { return Ok(()) }; self.write_separator()?; let n = column_number.to_string(); self.std.write_spec(self.config().colors.column(), n.as_bytes())?; @@ -1738,8 +1736,12 @@ impl<'a, M: Matcher, W: WriteColor> PreludeWriter<'a, M, W> { Ok(()) } - /// Writes the byte offset field. + /// Writes the byte offset field if configured to do so. + #[inline(always)] fn write_byte_offset(&mut self, offset: u64) -> io::Result<()> { + if !self.config().byte_offset { + return Ok(()); + } self.write_separator()?; let n = offset.to_string(); self.std.write_spec(self.config().colors.column(), n.as_bytes())?; @@ -1751,6 +1753,7 @@ impl<'a, M: Matcher, W: WriteColor> PreludeWriter<'a, M, W> { /// /// This is called before writing the contents of a field, and at /// the end of the prelude. + #[inline(always)] fn write_separator(&mut self) -> io::Result<()> { match self.next_separator { PreludeSeparator::None => {} @@ -1767,6 +1770,7 @@ impl<'a, M: Matcher, W: WriteColor> PreludeWriter<'a, M, W> { Ok(()) } + #[inline(always)] fn config(&self) -> &Config { self.std.config() } diff --git a/crates/printer/src/summary.rs b/crates/printer/src/summary.rs index 4875bb7e..431b3a92 100644 --- a/crates/printer/src/summary.rs +++ b/crates/printer/src/summary.rs @@ -15,7 +15,7 @@ use { use crate::{ color::ColorSpecs, counter::CounterWriter, - hyperlink::{HyperlinkPattern, HyperlinkSpan}, + hyperlink::{self, HyperlinkConfig}, stats::Stats, util::{find_iter_at_in_context, PrinterPath}, }; @@ -29,7 +29,7 @@ use crate::{ struct Config { kind: SummaryKind, colors: ColorSpecs, - hyperlink_pattern: HyperlinkPattern, + hyperlink: HyperlinkConfig, stats: bool, path: bool, max_matches: Option, @@ -44,7 +44,7 @@ impl Default for Config { Config { kind: SummaryKind::Count, colors: ColorSpecs::default(), - hyperlink_pattern: HyperlinkPattern::default(), + hyperlink: HyperlinkConfig::default(), stats: false, path: true, max_matches: None, @@ -169,7 +169,6 @@ impl SummaryBuilder { Summary { config: self.config.clone(), wtr: RefCell::new(CounterWriter::new(wtr)), - buf: vec![], } } @@ -216,7 +215,7 @@ impl SummaryBuilder { self } - /// Set the hyperlink pattern to use for hyperlinks output by this printer. + /// Set the configuration to use for hyperlinks output by this printer. /// /// Regardless of the hyperlink format provided here, whether hyperlinks /// are actually used or not is determined by the implementation of @@ -226,12 +225,12 @@ impl SummaryBuilder { /// /// This completely overrides any previous hyperlink format. /// - /// The default pattern format results in not emitting any hyperlinks. - pub fn hyperlink_pattern( + /// The default configuration results in not emitting any hyperlinks. + pub fn hyperlink( &mut self, - pattern: HyperlinkPattern, + config: HyperlinkConfig, ) -> &mut SummaryBuilder { - self.config.hyperlink_pattern = pattern; + self.config.hyperlink = config; self } @@ -357,7 +356,6 @@ impl SummaryBuilder { pub struct Summary { config: Config, wtr: RefCell>, - buf: Vec, } impl Summary { @@ -400,6 +398,8 @@ impl Summary { &'s mut self, matcher: M, ) -> SummarySink<'static, 's, M, W> { + let interpolator = + hyperlink::Interpolator::new(&self.config.hyperlink); let stats = if self.config.stats || self.config.kind.requires_stats() { Some(Stats::new()) } else { @@ -408,6 +408,7 @@ impl Summary { SummarySink { matcher, summary: self, + interpolator, path: None, start_time: Instant::now(), match_count: 0, @@ -432,18 +433,19 @@ impl Summary { if !self.config.path && !self.config.kind.requires_path() { return self.sink(matcher); } + let interpolator = + hyperlink::Interpolator::new(&self.config.hyperlink); let stats = if self.config.stats || self.config.kind.requires_stats() { Some(Stats::new()) } else { None }; - let ppath = PrinterPath::with_separator( - path.as_ref(), - self.config.separator_path, - ); + let ppath = PrinterPath::new(path.as_ref()) + .with_separator(self.config.separator_path); SummarySink { matcher, summary: self, + interpolator, path: Some(ppath), start_time: Instant::now(), match_count: 0, @@ -490,6 +492,7 @@ impl Summary { pub struct SummarySink<'p, 's, M: Matcher, W> { matcher: M, summary: &'s mut Summary, + interpolator: hyperlink::Interpolator, path: Option>, start_time: Instant, match_count: u64, @@ -595,36 +598,34 @@ impl<'p, 's, M: Matcher, W: WriteColor> SummarySink<'p, 's, M, W> { /// (color and hyperlink). fn write_path(&mut self) -> io::Result<()> { if self.path.is_some() { - let mut hyperlink = self.start_hyperlink_span()?; - + let status = self.start_hyperlink()?; self.write_spec( self.summary.config.colors.path(), self.path.as_ref().unwrap().as_bytes(), )?; - - if hyperlink.is_active() { - hyperlink.end(&mut *self.summary.wtr.borrow_mut())?; - } + self.end_hyperlink(status)?; } Ok(()) } /// Starts a hyperlink span when applicable. - fn start_hyperlink_span(&mut self) -> io::Result { - if let Some(ref path) = self.path { - let mut wtr = self.summary.wtr.borrow_mut(); - if wtr.supports_hyperlinks() { - if let Some(spec) = path.create_hyperlink_spec( - &self.summary.config.hyperlink_pattern, - None, - None, - &mut self.summary.buf, - ) { - return Ok(HyperlinkSpan::start(&mut *wtr, &spec)?); - } - } - } - Ok(HyperlinkSpan::default()) + fn start_hyperlink( + &mut self, + ) -> io::Result { + let Some(hyperpath) = + self.path.as_ref().and_then(|p| p.as_hyperlink()) + else { + return Ok(hyperlink::InterpolatorStatus::inactive()); + }; + let values = hyperlink::Values::new(hyperpath); + self.interpolator.begin(&values, &mut *self.summary.wtr.borrow_mut()) + } + + fn end_hyperlink( + &self, + status: hyperlink::InterpolatorStatus, + ) -> io::Result<()> { + self.interpolator.finish(status, &mut *self.summary.wtr.borrow_mut()) } /// Write the line terminator configured on the given searcher. diff --git a/crates/printer/src/util.rs b/crates/printer/src/util.rs index a042e754..b633ec9a 100644 --- a/crates/printer/src/util.rs +++ b/crates/printer/src/util.rs @@ -1,21 +1,17 @@ -use std::{borrow::Cow, fmt, io, path::Path, time}; +use std::{borrow::Cow, cell::OnceCell, fmt, io, path::Path, time}; use { - bstr::{ByteSlice, ByteVec}, + bstr::ByteVec, grep_matcher::{Captures, LineTerminator, Match, Matcher}, grep_searcher::{ LineIter, Searcher, SinkContext, SinkContextKind, SinkError, SinkMatch, }, - termcolor::HyperlinkSpec, }; #[cfg(feature = "serde")] use serde::{Serialize, Serializer}; -use crate::{ - hyperlink::{HyperlinkPath, HyperlinkPattern, HyperlinkValues}, - MAX_LOOK_AHEAD, -}; +use crate::{hyperlink::HyperlinkPath, MAX_LOOK_AHEAD}; /// A type for handling replacements while amortizing allocation. pub(crate) struct Replacer { @@ -268,11 +264,12 @@ impl<'a> Sunk<'a> { /// something else. This allows us to amortize work if we are printing the /// file path for every match. /// -/// In the common case, no transformation is needed, which lets us avoid the -/// allocation. Typically, only Windows requires a transform, since we can't -/// access the raw bytes of a path directly and first need to lossily convert -/// to UTF-8. Windows is also typically where the path separator replacement -/// is used, e.g., in cygwin environments to use `/` instead of `\`. +/// In the common case, no transformation is needed, which lets us avoid +/// the allocation. Typically, only Windows requires a transform, since +/// it's fraught to access the raw bytes of a path directly and first need +/// to lossily convert to UTF-8. Windows is also typically where the path +/// separator replacement is used, e.g., in cygwin environments to use `/` +/// instead of `\`. /// /// Users of this type are expected to construct it from a normal `Path` /// found in the standard library. It can then be written to any `io::Write` @@ -281,54 +278,55 @@ impl<'a> Sunk<'a> { /// will not roundtrip correctly. #[derive(Clone, Debug)] pub(crate) struct PrinterPath<'a> { + // On Unix, we can re-materialize a `Path` from our `Cow<'a, [u8]>` with + // zero cost, so there's no point in storing it. At time of writing, + // OsStr::as_os_str_bytes (and its corresponding constructor) are not + // stable yet. Those would let us achieve the same end portably. (As long + // as we keep our UTF-8 requirement on Windows.) + #[cfg(not(unix))] path: &'a Path, bytes: Cow<'a, [u8]>, - hyperlink_path: std::cell::OnceCell>, + hyperlink: OnceCell>, } impl<'a> PrinterPath<'a> { /// Create a new path suitable for printing. pub(crate) fn new(path: &'a Path) -> PrinterPath<'a> { PrinterPath { + #[cfg(not(unix))] path, + // N.B. This is zero-cost on Unix and requires at least a UTF-8 + // check on Windows. This doesn't allocate on Windows unless the + // path is invalid UTF-8 (which is exceptionally rare). bytes: Vec::from_path_lossy(path), - hyperlink_path: std::cell::OnceCell::new(), + hyperlink: OnceCell::new(), } } - /// Create a new printer path from the given path which can be efficiently - /// written to a writer without allocation. + /// Set the separator on this path. /// - /// If the given separator is present, then any separators in `path` are - /// replaced with it. + /// When set, `PrinterPath::as_bytes` will return the path provided but + /// with its separator replaced with the one given. pub(crate) fn with_separator( - path: &'a Path, + mut self, sep: Option, ) -> PrinterPath<'a> { - let mut ppath = PrinterPath::new(path); - if let Some(sep) = sep { - ppath.replace_separator(sep); - } - ppath - } - - /// Replace the path separator in this path with the given separator - /// and do it in place. On Windows, both `/` and `\` are treated as - /// path separators that are both replaced by `new_sep`. In all other - /// environments, only `/` is treated as a path separator. - fn replace_separator(&mut self, new_sep: u8) { - let transformed_path: Vec = self - .as_bytes() - .bytes() - .map(|b| { - if b == b'/' || (cfg!(windows) && b == b'\\') { - new_sep - } else { - b + /// Replace the path separator in this path with the given separator + /// and do it in place. On Windows, both `/` and `\` are treated as + /// path separators that are both replaced by `new_sep`. In all other + /// environments, only `/` is treated as a path separator. + fn replace_separator(bytes: &[u8], sep: u8) -> Vec { + let mut bytes = bytes.to_vec(); + for b in bytes.iter_mut() { + if *b == b'/' || (cfg!(windows) && *b == b'\\') { + *b = sep; } - }) - .collect(); - self.bytes = Cow::Owned(transformed_path); + } + bytes + } + let Some(sep) = sep else { return self }; + self.bytes = Cow::Owned(replace_separator(self.as_bytes(), sep)); + self } /// Return the raw bytes for this path. @@ -336,32 +334,30 @@ impl<'a> PrinterPath<'a> { &self.bytes } - /// Creates a hyperlink for this path and the given line and column, using - /// the specified pattern. Uses the given buffer to store the hyperlink. - pub(crate) fn create_hyperlink_spec<'b>( - &self, - pattern: &HyperlinkPattern, - line_number: Option, - column: Option, - buffer: &'b mut Vec, - ) -> Option> { - if pattern.is_empty() { - return None; - } - let file_path = self.hyperlink_path()?; - let values = HyperlinkValues::new(file_path, line_number, column); - buffer.clear(); - pattern.render(&values, buffer).ok()?; - Some(HyperlinkSpec::open(buffer)) + /// Return this path as a hyperlink. + /// + /// Note that a hyperlink may not be able to be created from a path. + /// Namely, computing the hyperlink may require touching the file system + /// (e.g., for path canonicalization) and that can fail. This failure is + /// silent but is logged. + pub(crate) fn as_hyperlink(&self) -> Option<&HyperlinkPath> { + self.hyperlink + .get_or_init(|| HyperlinkPath::from_path(self.as_path())) + .as_ref() } - /// Returns the file path to use in hyperlinks, if any. - /// - /// This is what the {file} placeholder will be substituted with. - fn hyperlink_path(&self) -> Option<&HyperlinkPath> { - self.hyperlink_path - .get_or_init(|| HyperlinkPath::from_path(self.path)) - .as_ref() + /// Return this path as an actual `Path` type. + fn as_path(&self) -> &Path { + #[cfg(unix)] + fn imp<'p>(p: &'p PrinterPath<'_>) -> &'p Path { + use std::{ffi::OsStr, os::unix::ffi::OsStrExt}; + Path::new(OsStr::from_bytes(p.as_bytes())) + } + #[cfg(not(unix))] + fn imp<'p>(p: &'p PrinterPath<'_>) -> &'p Path { + p.path + } + imp(self) } }