diff options
47 files changed, 3191 insertions, 5909 deletions
diff --git a/.travis.yml b/.travis.yml index 5fc57d60..d47249a0 100644 --- a/.travis.yml +++ b/.travis.yml @@ -17,6 +17,8 @@ addons: # Needed for testing decompression search. - xz-utils - liblz4-tool + # For building MUSL static builds on Linux. + - musl-tools matrix: fast_finish: true include: @@ -139,12 +139,16 @@ dependencies = [ [[package]] name = "grep" -version = "0.1.9" +version = "0.2.0" dependencies = [ - "log 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)", - "memchr 2.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "regex 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", - "regex-syntax 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)", + "atty 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)", + "grep-matcher 0.0.1", + "grep-pcre2 0.0.1", + "grep-printer 0.0.1", + "grep-regex 0.0.1", + "grep-searcher 0.0.1", + "termcolor 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "walkdir 2.2.0 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] @@ -205,16 +209,6 @@ dependencies = [ ] [[package]] -name = "grep2" -version = "0.1.8" -dependencies = [ - "grep-matcher 0.0.1", - "grep-printer 0.0.1", - "grep-regex 0.0.1", - "grep-searcher 0.0.1", -] - -[[package]] name = "ignore" version = "0.4.3" dependencies = [ @@ -227,7 +221,7 @@ dependencies = [ "same-file 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", "tempdir 0.3.7 (registry+https://github.com/rust-lang/crates.io-index)", "thread_local 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", - "walkdir 2.1.4 (registry+https://github.com/rust-lang/crates.io-index)", + "walkdir 2.2.0 (registry+https://github.com/rust-lang/crates.io-index)", "winapi 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)", ] @@ -378,21 +372,16 @@ name = "ripgrep" version = "0.9.0" dependencies = [ "atty 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)", - "bytecount 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)", "clap 2.32.0 (registry+https://github.com/rust-lang/crates.io-index)", - "encoding_rs 0.8.4 (registry+https://github.com/rust-lang/crates.io-index)", - "encoding_rs_io 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", "globset 0.4.1", - "grep 0.1.9", + "grep 0.2.0", "ignore 0.4.3", "lazy_static 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", - "libc 0.2.42 (registry+https://github.com/rust-lang/crates.io-index)", "log 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)", - "memchr 2.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "memmap 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)", "num_cpus 1.8.0 (registry+https://github.com/rust-lang/crates.io-index)", "regex 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", "same-file 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", + "serde_json 1.0.24 (registry+https://github.com/rust-lang/crates.io-index)", "termcolor 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)", "winapi 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)", ] @@ -520,7 +509,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" [[package]] name = "walkdir" -version = "2.1.4" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ "same-file 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", @@ -608,7 +597,7 @@ dependencies = [ "checksum unicode-width 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "882386231c45df4700b275c7ff55b6f3698780a650026380e72dabe76fa46526" "checksum unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "fc72304796d0818e357ead4e000d19c9c174ab23dc11093ac919054d20a6a7fc" "checksum utf8-ranges 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "662fab6525a98beff2921d7f61a39e7d59e0b425ebc7d0d9e66d316e55124122" -"checksum walkdir 2.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "63636bd0eb3d00ccb8b9036381b526efac53caf112b7783b730ab3f8e44da369" +"checksum walkdir 2.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "f1b768ba943161a9226ccd59b26bcd901e5d60e6061f4fcad3034784e0c7372b" "checksum winapi 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "773ef9dcc5f24b7d850d0ff101e542ff24c3b090a9768e03ff889fdef41f00fd" "checksum winapi-i686-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" "checksum winapi-x86_64-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" @@ -35,7 +35,6 @@ path = "tests/tests.rs" members = [ "globset", "grep", - "grep2", "grep-matcher", "grep-pcre2", "grep-printer", @@ -46,20 +45,15 @@ members = [ [dependencies] atty = "0.2.11" -bytecount = "0.3.2" -encoding_rs = "0.8" -encoding_rs_io = "0.1" globset = { version = "0.4.0", path = "globset" } -grep = { version = "0.1.8", path = "grep" } +grep = { version = "0.2.0", path = "grep" } ignore = { version = "0.4.0", path = "ignore" } lazy_static = "1" -libc = "0.2" log = "0.4" -memchr = "2" -memmap = "0.6" num_cpus = "1" regex = "1" same-file = "1" +serde_json = "1" termcolor = "1" [dependencies.clap] @@ -69,7 +63,7 @@ features = ["suggestions", "color"] [target.'cfg(windows)'.dependencies.winapi] version = "0.3" -features = ["std", "winnt"] +features = ["std", "fileapi", "winnt"] [build-dependencies] lazy_static = "1" @@ -80,15 +74,9 @@ default-features = false features = ["suggestions", "color"] [features] -avx-accel = [ - "bytecount/avx-accel", - "grep2/avx-accel", -] -simd-accel = [ - "bytecount/simd-accel", - "encoding_rs/simd-accel", - "grep2/simd-accel", -] +avx-accel = ["grep/avx-accel"] +simd-accel = ["grep/simd-accel"] +pcre2 = ["grep/pcre2"] [profile.release] -debug = true +debug = 1 @@ -157,13 +157,37 @@ tool. With that said, How do I use lookaround and/or backreferences? </h3> -This isn't currently possible. ripgrep uses finite automata to implement -regular expression search, and in turn, guarantees linear time searching on all -inputs. It is difficult to efficiently support lookaround and backreferences in -finite automata engines, so ripgrep does not provide these features. +ripgrep's default regex engine does not support lookaround or backreferences. +This is primarily because the default regex engine is implemented using finite +state machines in order to guarantee a linear worst case time complexity on all +inputs. Backreferences are not possible to implement in this paradigm, and +lookaround appears difficult to do efficiently. -If a production quality regular expression engine with these features is ever -written in Rust, then it is possible ripgrep will provide it as an opt-in +However, ripgrep optionally supports using PCRE2 as the regex engine instead of +the default one based on finite state machines. You can enable PCRE2 with the +`-P/--pcre2` flag. For example, in the root of the ripgrep repo, you can easily +find all palindromes: + +``` +$ rg -P '(\w{10})\1' +tests/misc.rs +483: cmd.arg("--max-filesize").arg("44444444444444444444"); +globset/src/glob.rs +1206: matches!(match7, "a*a*a*a*a*a*a*a*a", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"); +``` + +If your version of ripgrep doesn't support PCRE2, then you'll get an error +message when you try to use the `-P/--pcre2` flag: + +``` +$ rg -P '(\w{10})\1' +PCRE2 is not available in this build of ripgrep +``` + +Most of the releases distributed by the ripgrep project here on GitHub will +come bundled with PCRE2 enabled. If you installed ripgrep through a different +means (like your system's package manager), then please reach out to the +maintainer of that package to see whether it's possible to enable the PCRE2 feature. @@ -7,7 +7,7 @@ available for [every release](https://github.com/BurntSushi/ripgrep/releases). ripgrep is similar to other popular search tools like The Silver Searcher, ack and grep. -[![Linux build status](https://travis-ci.org/BurntSushi/ripgrep.svg?branch=master)](https://travis-ci.org/BurntSushi/ripgrep) +[![Linux build status](https://travis-ci.org/BurntSushi/ripgrep.svg)](https://travis-ci.org/BurntSushi/ripgrep) [![Windows build status](https://ci.appveyor.com/api/projects/status/github/BurntSushi/ripgrep?svg=true)](https://ci.appveyor.com/project/BurntSushi/ripgrep) [![Crates.io](https://img.shields.io/crates/v/ripgrep.svg)](https://crates.io/crates/ripgrep) @@ -85,14 +85,16 @@ increases the times to `2.640s` for ripgrep and `10.277s` for GNU grep. ### Why should I use ripgrep? -* It can replace many use cases served by both The Silver Searcher and GNU grep - because it is generally faster than both. (See [the FAQ](FAQ.md#posix4ever) - for more details on whether ripgrep can truly replace grep.) -* Like The Silver Searcher, ripgrep defaults to recursive directory search - and won't search files ignored by your `.gitignore` files. It also ignores - hidden and binary files by default. ripgrep also implements full support - for `.gitignore`, whereas there are many bugs related to that functionality - in The Silver Searcher. +* It can replace many use cases served by other search tools + because it contains most of their features and is generally faster. (See + [the FAQ](FAQ.md#posix4ever) for more details on whether ripgrep can truly + replace grep.) +* Like other tools specialized to code search, ripgrep defaults to recursive + directory search and won't search files ignored by your `.gitignore` files. + It also ignores hidden and binary files by default. ripgrep also implements + full support for `.gitignore`, whereas there are many bugs related to that + functionality in other code search tools claiming to provide the same + functionality. * ripgrep can search specific types of files. For example, `rg -tpy foo` limits your search to Python files and `rg -Tjs foo` excludes Javascript files from your search. ripgrep can be taught about new file types with @@ -117,22 +119,24 @@ bugs, and Unicode support. ### Why shouldn't I use ripgrep? -I'd like to try to convince you why you *shouldn't* use ripgrep. This should -give you a glimpse at some important downsides or missing features of -ripgrep. +Despite initially not wanting to add every feature under the sun to ripgrep, +over time, ripgrep has grown support for most features found in other file +searching tools. This includes searching for results spanning across multiple +lines, and opt-in support for PCRE2, which provides look-around and +backreference support. -* ripgrep uses a regex engine based on finite automata, so if you want fancy - regex features such as backreferences or lookaround, ripgrep won't provide - them to you. ripgrep does support lots of things though, including, but not - limited to: lazy quantification (e.g., `a+?`), repetitions (e.g., `a{2,5}`), - begin/end assertions (e.g., `^\w+$`), word boundaries (e.g., `\bfoo\b`), and - support for Unicode categories (e.g., `\p{Sc}` to match currency symbols or - `\p{Lu}` to match any uppercase letter). (Fancier regexes will never be - supported.) -* ripgrep doesn't have multiline search. (Will happen as an opt-in feature.) +At this point, the primary reasons not to use ripgrep probably consist of one +or more of the following: -In other words, if you like fancy regexes or multiline search, then ripgrep -may not quite meet your needs (yet). +* You need a portable and ubiquitous tool. While ripgrep works on Windows, + macOS and Linux, it is not ubiquitous and it does not conform to any + standard such as POSIX. The best tool for this job is good old grep. +* There still exists some other minor feature (or bug) found in another tool + that isn't in ripgrep. +* There is a performance edge case where ripgrep doesn't do well where another + tool does do well. (Please file a bug report!) +* ripgrep isn't possible to install on your machine or isn't available for your + platform. (Please file a bug report!) ### Is it really faster than everything else? @@ -145,7 +149,8 @@ Summarizing, ripgrep is fast because: * It is built on top of [Rust's regex engine](https://github.com/rust-lang-nursery/regex). Rust's regex engine uses finite automata, SIMD and aggressive literal - optimizations to make searching very fast. + optimizations to make searching very fast. (PCRE2 support can be opted into + with the `-P/--pcre2` flag.) * Rust's regex library maintains performance with full Unicode support by building UTF-8 decoding directly into its deterministic finite automaton engine. @@ -168,6 +173,11 @@ Andy Lester, author of [ack](https://beyondgrep.com/), has published an excellent table comparing the features of ack, ag, git-grep, GNU grep and ripgrep: https://beyondgrep.com/feature-comparison/ +Note that ripgrep has grown a few significant new features recently that +are not yet present in Andy's table. This includes, but is not limited to, +configuration files, passthru, support for searching compressed files, +multiline search and opt-in fancy regex support via PCRE2. + ### Installation @@ -207,13 +217,15 @@ If you're a **MacPorts** user, then you can install ripgrep from the $ sudo port install ripgrep ``` -If you're a **Windows Chocolatey** user, then you can install ripgrep from the [official repo](https://chocolatey.org/packages/ripgrep): +If you're a **Windows Chocolatey** user, then you can install ripgrep from the +[official repo](https://chocolatey.org/packages/ripgrep): ``` $ choco install ripgrep ``` -If you're a **Windows Scoop** user, then you can install ripgrep from the [official bucket](https://github.com/lukesampson/scoop/blob/master/bucket/ripgrep.json): +If you're a **Windows Scoop** user, then you can install ripgrep from the +[official bucket](https://github.com/lukesampson/scoop/blob/master/bucket/ripgrep.json): ``` $ scoop install ripgrep @@ -225,32 +237,37 @@ If you're an **Arch Linux** user, then you can install ripgrep from the official $ pacman -S ripgrep ``` -If you're a **Gentoo** user, you can install ripgrep from the [official repo](https://packages.gentoo.org/packages/sys-apps/ripgrep): +If you're a **Gentoo** user, you can install ripgrep from the +[official repo](https://packages.gentoo.org/packages/sys-apps/ripgrep): ``` $ emerge sys-apps/ripgrep ``` -If you're a **Fedora 27+** user, you can install ripgrep from official repositories. +If you're a **Fedora 27+** user, you can install ripgrep from official +repositories. ``` $ sudo dnf install ripgrep ``` -If you're a **Fedora 24+** user, you can install ripgrep from [copr](https://copr.fedorainfracloud.org/coprs/carlwgeorge/ripgrep/): +If you're a **Fedora 24+** user, you can install ripgrep from +[copr](https://copr.fedorainfracloud.org/coprs/carlwgeorge/ripgrep/): ``` $ sudo dnf copr enable carlwgeorge/ripgrep $ sudo dnf install ripgrep ``` -If you're an **openSUSE Tumbleweed** user, you can install ripgrep from the [official repo](http://software.opensuse.org/package/ripgrep): +If you're an **openSUSE Tumbleweed** user, you can install ripgrep from the +[official repo](http://software.opensuse.org/package/ripgrep): ``` $ sudo zypper install ripgrep ``` -If you're a **RHEL/CentOS 7** user, you can install ripgrep from [copr](https://copr.fedorainfracloud.org/coprs/carlwgeorge/ripgrep/): +If you're a **RHEL/CentOS 7** user, you can install ripgrep from +[copr](https://copr.fedorainfracloud.org/coprs/carlwgeorge/ripgrep/): ``` $ sudo yum-config-manager --add-repo=https://copr.fedorainfracloud.org/coprs/carlwgeorge/ripgrep/repo/epel-7/carlwgeorge-ripgrep-epel-7.repo @@ -286,25 +303,29 @@ seem to work right and generate a number of very strange bug reports that I don't know how to fix and don't have the time to fix. Therefore, it is no longer a recommended installation option.) -If you're a **FreeBSD** user, then you can install ripgrep from the [official ports](https://www.freshports.org/textproc/ripgrep/): +If you're a **FreeBSD** user, then you can install ripgrep from the +[official ports](https://www.freshports.org/textproc/ripgrep/): ``` # pkg install ripgrep ``` -If you're an **OpenBSD** user, then you can install ripgrep from the [official ports](http://openports.se/textproc/ripgrep): +If you're an **OpenBSD** user, then you can install ripgrep from the +[official ports](http://openports.se/textproc/ripgrep): ``` $ doas pkg_add ripgrep ``` -If you're a **NetBSD** user, then you can install ripgrep from [pkgsrc](http://pkgsrc.se/textproc/ripgrep): +If you're a **NetBSD** user, then you can install ripgrep from +[pkgsrc](http://pkgsrc.se/textproc/ripgrep): ``` # pkgin install ripgrep ``` If you're a **Rust programmer**, ripgrep can be installed with `cargo`. + * Note that the minimum supported version of Rust for ripgrep is **1.23.0**, although ripgrep may work with older versions. * Note that the binary may be bigger than expected because it contains debug @@ -353,6 +374,35 @@ are not necessary to get SIMD optimizations for search; those are enabled automatically. Hopefully, some day, the `simd-accel` and `avx-accel` features will similarly become unnecessary. +Finally, optional PCRE2 support can be built with ripgrep by enabling the +`pcre2` feature: + +``` +$ cargo build --release --features 'pcre2' +``` + +(Tip: use `--features 'pcre2 simd-accel avx-accel'` to also include compile +time SIMD optimizations.) + +Enabling the PCRE2 feature will attempt to automatically find and link with +your system's PCRE2 library via `pkg-config`. If one doesn't exist, then +ripgrep will build PCRE2 from source using your system's C compiler and then +statically link it into the final executable. Static linking can be forced even +when there is an available PCRE2 system library by either building ripgrep with +the MUSL target or by setting `PCRE2_SYS_STATIC=1`. + +ripgrep can be built with the MUSL target on Linux by first installing the MUSL +library on your system (consult your friendly neighborhood package manager). +Then you just need to add MUSL support to your Rust toolchain and rebuild +ripgrep, which yields a fully static executable: + +``` +$ rustup target add x86_64-unknown-linux-musl +$ cargo build --release --target x86_64-unknown-linux-musl +``` + +Applying the `--features` flag from above works as expected. + ### Running tests diff --git a/appveyor.yml b/appveyor.yml index 26daf224..bea157cf 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -1,8 +1,6 @@ -# Inspired from https://github.com/habitat-sh/habitat/blob/master/appveyor.yml cache: - c:\cargo\registry - c:\cargo\git - - c:\projects\ripgrep\target init: - mkdir c:\cargo @@ -19,14 +17,20 @@ environment: PROJECT_NAME: ripgrep RUST_BACKTRACE: full matrix: - - TARGET: i686-pc-windows-gnu - CHANNEL: stable - - TARGET: i686-pc-windows-msvc - CHANNEL: stable - TARGET: x86_64-pc-windows-gnu CHANNEL: stable + BITS: 64 + MSYS2: 1 - TARGET: x86_64-pc-windows-msvc CHANNEL: stable + BITS: 64 + - TARGET: i686-pc-windows-gnu + CHANNEL: stable + BITS: 32 + MSYS2: 1 + - TARGET: i686-pc-windows-msvc + CHANNEL: stable + BITS: 32 matrix: fast_finish: true @@ -35,8 +39,9 @@ matrix: # (Based on from https://github.com/rust-lang/libc/blob/master/appveyor.yml) install: - curl -sSf -o rustup-init.exe https://win.rustup.rs/ - - rustup-init.exe -y --default-host %TARGET% --no-modify-path - - if defined MSYS2_BITS set PATH=%PATH%;C:\msys64\mingw%MSYS2_BITS%\bin + - rustup-init.exe -y --default-host %TARGET% + - set PATH=%PATH%;C:\Users\appveyor\.cargo\bin + - if defined MSYS2 set PATH=C:\msys64\mingw%BITS%\bin;%PATH% - rustc -V - cargo -V @@ -46,11 +51,11 @@ build: false # Equivalent to Travis' `script` phase # TODO modify this phase as you see fit test_script: - - cargo test --verbose --all + - cargo test --verbose --all --features pcre2 before_deploy: # Generate artifacts for release - - cargo build --release + - cargo build --release --features pcre2 - mkdir staging - copy target\release\rg.exe staging - ps: copy target\release\build\ripgrep-*\out\_rg.ps1 staging @@ -4,6 +4,7 @@ extern crate clap; extern crate lazy_static; use std::env; +use std::ffi::OsString; use std::fs::{self, File}; use std::io::{self, Read, Write}; use std::path::Path; @@ -18,6 +19,22 @@ use app::{RGArg, RGArgKind}; mod app; fn main() { + // If our version of Rust has runtime SIMD detection, then set a cfg so + // we know we can test for it. We use this when generating ripgrep's + // --version output. + let version = rustc_version(); + let parsed = match Version::parse(&version) { + Ok(parsed) => parsed, + Err(err) => { + eprintln!("failed to parse `rustc --version`: {}", err); + return; + } + }; + let minimum = Version { major: 1, minor: 27, patch: 0 }; + if version.contains("nightly") || parsed >= minimum { + println!("cargo:rustc-cfg=ripgrep_runtime_cpu"); + } + // OUT_DIR is set by Cargo and it's where any additional build artifacts // are written. let outdir = match env::var_os("OUT_DIR") { @@ -182,3 +199,63 @@ fn formatted_doc_txt(arg: &RGArg) -> io::Result<String> { fn ioerr(msg: String) -> io::Error { io::Error::new(io::ErrorKind::Other, msg) } + +fn rustc_version() -> String { + let rustc = env::var_os("RUSTC").unwrap_or(OsString::from("rustc")); + let output = process::Command::new(&rustc) + .arg("--version") + .output() + .unwrap() + .stdout; + String::from_utf8(output).unwrap() +} + +#[derive(Clone, Copy, Debug, Eq, PartialEq, PartialOrd, Ord)] +struct Version { + major: u32, + minor: u32, + patch: u32, +} + +impl Version { + fn parse(mut s: &str) -> Result<Version, String> { + if !s.starts_with("rustc ") { + return Err(format!("unrecognized version string: {}", s)); + } + s = &s["rustc ".len()..]; + + let parts: Vec<&str> = s.split(".").collect(); + if parts.len() < 3 { + return Err(format!("not enough version parts: {:?}", parts)); + } + + let mut num = String::new(); + for c in parts[0].chars() { + if !c.is_digit(10) { + break; + } + num.push(c); + } + let major = num.parse::<u32>().map_err(|e| e.to_string())?; + + num.clear(); + for c in parts[1].chars() { + if !c.is_digit(10) { + break; + } + num.push(c); + |