Compare commits

...

20 Commits

Author SHA1 Message Date
Andrew Gallant
cc6b6dcf5b fix windows build 2016-09-09 08:53:10 -04:00
Andrew Gallant
48878bbb8f update project name 2016-09-08 21:47:49 -04:00
Andrew Gallant
0766617e07 Refactor how coloring is done.
All in the name of appeasing Windows.
2016-09-08 21:46:14 -04:00
Andrew Gallant
afd99c43d7 fix deploy 2016-09-08 16:35:48 -04:00
Andrew Gallant
96e87ab738 update distributable to include readme and license 2016-09-08 16:21:37 -04:00
Andrew Gallant
a744ec133d Rename xrep to ripgrep. 2016-09-08 16:15:44 -04:00
Andrew Gallant
0042dce949 Hack in Windows console coloring.
The code has suffered and needs refactoring/commenting. BUT... IT WORKS!
2016-09-07 21:54:28 -04:00
Andrew Gallant
ca058d7584 Add support for memory maps.
I though plain `read` had usurped them, but when searching a very small
number of files, mmaps can be around 20% faster on Linux. It'd be really
unfortunate to leave that on the table.

Mmap searching doesn't support contexts yet, but we probably don't really
care. And duplicating that logic doesn't sound fun. Without contexts, mmap
searching is delightfully simple.
2016-09-06 21:47:33 -04:00
Andrew Gallant
af3b56a623 Fix grep match iterator. 2016-09-06 21:45:41 -04:00
Andrew Gallant
5938bed339 Add support for printing column numbers. 2016-09-06 19:50:27 -04:00
Andrew Gallant
feff1849c8 Tweak colors. 2016-09-06 19:35:52 -04:00
Andrew Gallant
9948e0ca07 Only create the Grep searcher once. 2016-09-06 19:33:19 -04:00
Andrew Gallant
fd3e5069b6 Fix required literal handling and add debug prints.
In particular, if we had an inner literal and were doing a case insensitive
search, then the literals are dropped because we previously only allowed
a single inner literal to have an effect. Now we allow alternations of
inner literals, but still don't quite take full advantage.
2016-09-06 19:33:03 -04:00
Andrew Gallant
0891b4a3c0 update appveyor 2016-09-05 22:01:53 -04:00
Andrew Gallant
af48aaa647 another try 2016-09-05 21:57:57 -04:00
Andrew Gallant
ee7f300ae2 windows debug, take 1 2016-09-05 21:46:11 -04:00
Andrew Gallant
a4d8db16f7 Fix glob tests.
When matching directly with a regex, we need to make sure the path is
normalized first.
2016-09-05 21:36:19 -04:00
Andrew Gallant
3bb387abdd Fix glob problem on Windows.
We weren't actually escaping every use of the file path separator. D'oh.
2016-09-05 21:20:19 -04:00
Andrew Gallant
7f0273c347 Fix yellow color to match ack. 2016-09-05 21:19:56 -04:00
Andrew Gallant
5b42999a3d windows, take 3 2016-09-05 21:02:08 -04:00
19 changed files with 1058 additions and 308 deletions

View File

@@ -16,7 +16,7 @@ cache: cargo
env:
global:
- PROJECT_NAME=xrep
- PROJECT_NAME=ripgrep
matrix:
include:
# Nightly channel

View File

@@ -1,14 +1,14 @@
[package]
publish = false
name = "xrep"
name = "ripgrep"
version = "0.1.0" #:version
authors = ["Andrew Gallant <jamslam@gmail.com>"]
description = """
Line oriented search tool using Rust's regex library.
"""
documentation = "https://github.com/BurntSushi/xrep"
homepage = "https://github.com/BurntSushi/xrep"
repository = "https://github.com/BurntSushi/xrep"
documentation = "https://github.com/BurntSushi/ripgrep"
homepage = "https://github.com/BurntSushi/ripgrep"
repository = "https://github.com/BurntSushi/ripgrep"
readme = "README.md"
keywords = ["regex", "grep", "egrep", "search", "pattern"]
license = "Unlicense/MIT"
@@ -16,7 +16,7 @@ license = "Unlicense/MIT"
[[bin]]
bench = false
path = "src/main.rs"
name = "xrep"
name = "rg"
[dependencies]
crossbeam = "0.2"

View File

@@ -1,6 +1,6 @@
environment:
global:
PROJECT_NAME: xrep
PROJECT_NAME: ripgrep
matrix:
# Nightly channel
- TARGET: i686-pc-windows-gnu
@@ -32,15 +32,14 @@ build: false
# Equivalent to Travis' `script` phase
# TODO modify this phase as you see fit
test_script:
- cargo build --verbose
- cargo test
- cargo test --verbose
before_deploy:
# Generate artifacts for release
- RUSTFLAGS="-C target-feature=+ssse3" cargo build --release --features simd-accel
# TODO(burntsushi): How can we enable SSSE3 on Windows?
- cargo build --release
- mkdir staging
# TODO update this part to copy the artifacts that make sense for your project
- copy target\release\xrep.exe staging
- copy target\release\rg.exe staging
- cd staging
# release zipfile will look like 'rust-everywhere-v1.2.3-x86_64-pc-windows-msvc'
- 7z a ../%PROJECT_NAME%-%APPVEYOR_REPO_TAG_NAME%-%TARGET%.zip *

View File

@@ -6,23 +6,22 @@ set -ex
# Generate artifacts for release
mk_artifacts() {
RUSTFLAGS="-C target-feature=+ssse3" cargo build --target $TARGET --release --features simd-accel
RUSTFLAGS="-C target-feature=+ssse3" \
cargo build --target $TARGET --release --features simd-accel
}
mk_tarball() {
# create a "staging" directory
local td=$(mktempd)
local out_dir=$(pwd)
local name="${PROJECT_NAME}-${TRAVIS_TAG}-${TARGET}"
mkdir "$td/$name"
# TODO update this part to copy the artifacts that make sense for your project
# NOTE All Cargo build artifacts will be under the 'target/$TARGET/{debug,release}'
cp target/$TARGET/release/xrep $td
cp target/$TARGET/release/rg "$td/$name/"
cp {README.md,UNLICENSE,COPYING,LICENSE-MIT} "$td/$name/"
pushd $td
# release tarball will look like 'rust-everywhere-v1.2.3-x86_64-unknown-linux-gnu.tar.gz'
tar czf $out_dir/${PROJECT_NAME}-${TRAVIS_TAG}-${TARGET}.tar.gz *
tar czf "$out_dir/$name.tar.gz" *
popd
rm -r $td
}

View File

@@ -42,7 +42,7 @@ run_test_suite() {
cargo test --target $TARGET
# sanity check the file type
file target/$TARGET/debug/xrep
file target/$TARGET/debug/rg
}
main() {

View File

@@ -6,14 +6,15 @@ authors = ["Andrew Gallant <jamslam@gmail.com>"]
description = """
Fast line oriented regex searching as a library.
"""
documentation = "https://github.com/BurntSushi/xrep"
homepage = "https://github.com/BurntSushi/xrep"
repository = "https://github.com/BurntSushi/xrep"
documentation = "https://github.com/BurntSushi/ripgrep"
homepage = "https://github.com/BurntSushi/ripgrep"
repository = "https://github.com/BurntSushi/ripgrep"
readme = "README.md"
keywords = ["regex", "grep", "egrep", "search", "pattern"]
license = "Unlicense/MIT"
[dependencies]
log = "0.3"
memchr = "0.1"
memmap = "0.2"
regex = "0.1.75"

View File

@@ -4,6 +4,8 @@
A fast line oriented regex searcher.
*/
#[macro_use]
extern crate log;
extern crate memchr;
extern crate regex;
extern crate regex_syntax as syntax;

View File

@@ -1,13 +1,22 @@
/*!
The literals module is responsible for extracting *inner* literals out of the
AST of a regular expression. Normally this is the job of the regex engine
itself, but the regex engine doesn't look for inner literals. Since we're doing
line based searching, we can use them, so we need to do it ourselves.
Note that this implementation is incredibly suspicious. We need something more
principled.
*/
use std::cmp;
use std::iter;
use regex::bytes::Regex;
use syntax::{
Expr, Literals, Lit,
Repeater,
ByteClass, ByteRange, CharClass, ClassRange, Repeater,
};
#[derive(Debug)]
#[derive(Clone, Debug)]
pub struct LiteralSets {
prefixes: Literals,
suffixes: Literals,
@@ -27,6 +36,7 @@ impl LiteralSets {
pub fn to_regex(&self) -> Option<Regex> {
if self.prefixes.all_complete() && !self.prefixes.is_empty() {
debug!("literal prefixes detected: {:?}", self.prefixes);
// When this is true, the regex engine will do a literal scan.
return None;
}
@@ -56,13 +66,27 @@ impl LiteralSets {
if suf_lcs.len() > lit.len() {
lit = suf_lcs;
}
if req.len() > lit.len() {
if req_lits.len() == 1 && req.len() > lit.len() {
lit = req;
}
if lit.is_empty() {
// Special case: if we detected an alternation of inner required
// literals and its longest literal is bigger than the longest
// prefix/suffix, then choose the alternation. In practice, this
// helps with case insensitive matching, which can generate lots of
// inner required literals.
let any_empty = req_lits.iter().any(|lit| lit.is_empty());
if req.len() > lit.len() && req_lits.len() > 1 && !any_empty {
debug!("required literals found: {:?}", req_lits);
let alts: Vec<String> =
req_lits.into_iter().map(|x| bytes_to_regex(x)).collect();
// Literals always compile.
Some(Regex::new(&alts.join("|")).unwrap())
} else if lit.is_empty() {
None
} else {
// Literals always compile.
debug!("required literal found: {:?}", show(lit));
Some(Regex::new(&bytes_to_regex(lit)).unwrap())
}
}
@@ -75,14 +99,30 @@ fn union_required(expr: &Expr, lits: &mut Literals) {
let s: String = chars.iter().cloned().collect();
lits.cross_add(s.as_bytes());
}
Literal { casei: true, .. } => {
lits.cut();
Literal { ref chars, casei: true } => {
for &c in chars {
let cls = CharClass::new(vec![
ClassRange { start: c, end: c },
]).case_fold();
if !lits.add_char_class(&cls) {
lits.cut();
return;
}
}
}
LiteralBytes { ref bytes, casei: false } => {
lits.cross_add(bytes);
}
LiteralBytes { casei: true, .. } => {
lits.cut();
LiteralBytes { ref bytes, casei: true } => {
for &b in bytes {
let cls = ByteClass::new(vec![
ByteRange { start: b, end: b },
]).case_fold();
if !lits.add_byte_class(&cls) {
lits.cut();
return;
}
}
}
Class(_) => {
lits.cut();
@@ -205,3 +245,18 @@ fn bytes_to_regex(bs: &[u8]) -> String {
}
s
}
/// Converts arbitrary bytes to a nice string.
fn show(bs: &[u8]) -> String {
// Why aren't we using this to feed to the regex? Doesn't really matter
// I guess. ---AG
use std::ascii::escape_default;
use std::str;
let mut nice = String::new();
for &b in bs {
let part: Vec<u8> = escape_default(b).collect();
nice.push_str(str::from_utf8(&part).unwrap());
}
nice
}

View File

@@ -152,6 +152,7 @@ impl GrepBuilder {
.unicode(true)
.case_insensitive(self.opts.case_insensitive)
.parse(&self.pattern));
debug!("regex ast:\n{:#?}", expr);
Ok(try!(nonl::remove(expr, self.opts.line_terminator)))
}
}
@@ -253,7 +254,7 @@ impl<'b, 's> Iterator for Iter<'b, 's> {
self.start = self.buf.len();
return None;
}
self.start = mat.end + 1;
self.start = mat.end;
Some(mat)
}
}

View File

@@ -9,13 +9,15 @@ use grep::{Grep, GrepBuilder};
use log;
use num_cpus;
use regex;
use term::Terminal;
use walkdir::WalkDir;
use gitignore::{Gitignore, GitignoreBuilder};
use ignore::Ignore;
use out::Out;
use out::{Out, OutBuffer};
use printer::Printer;
use search::{InputBuffer, Searcher};
use search_buffer::BufferSearcher;
use sys;
use types::{FileTypeDef, Types, TypesBuilder};
use walk;
@@ -27,13 +29,13 @@ use Result;
/// If you've never heard of Docopt before, see: http://docopt.org
/// (TL;DR: The CLI parser is generated from the usage string below.)
const USAGE: &'static str = "
Usage: xrep [options] <pattern> [<path> ...]
xrep [options] --files [<path> ...]
xrep [options] --type-list
xrep --help
xrep --version
Usage: rg [options] <pattern> [<path> ...]
rg [options] --files [<path> ...]
rg [options] --type-list
rg --help
rg --version
xrep is like the silver searcher and grep, but faster than both.
rg combines the usability of the silver search with the raw speed of grep.
Common options:
-a, --text Search binary files as if they were text.
@@ -75,6 +77,11 @@ Less common options:
-C, --context NUM
Show NUM lines before and after each match.
--column
Show column numbers (1 based) in output. This only shows the column
numbers for the first match on each line. Note that this doesn't try
to account for Unicode. One byte is equal to one column.
--context-separator ARG
The string to use when separating non-continuous context lines. Escape
sequences may be used. [default: --]
@@ -106,8 +113,16 @@ Less common options:
The byte to use for a line terminator. Escape sequences may be used.
[default: \\n]
--mmap
Search using memory maps when possible. This is enabled by default
when ripgrep thinks it will be faster. (Note that mmap searching
doesn't current support the various context related options.)
--no-mmap
Never use memory maps, even when they might be faster.
--no-ignore
Don't respect ignore files (.gitignore, .xrepignore, etc.)
Don't respect ignore files (.gitignore, .rgignore, etc.)
--no-ignore-parent
Don't respect ignore files in parent directories.
@@ -123,7 +138,7 @@ Less common options:
(capped at 6). [default: 0]
--version
Show the version number of xrep and exit.
Show the version number of ripgrep and exit.
File type management options:
--type-list
@@ -138,7 +153,7 @@ File type management options:
";
/// RawArgs are the args as they are parsed from Docopt. They aren't used
/// directly by the rest of xrep.
/// directly by the rest of ripgrep.
#[derive(Debug, RustcDecodable)]
pub struct RawArgs {
arg_pattern: String,
@@ -146,6 +161,7 @@ pub struct RawArgs {
flag_after_context: usize,
flag_before_context: usize,
flag_color: String,
flag_column: bool,
flag_context: usize,
flag_context_separator: String,
flag_count: bool,
@@ -160,10 +176,12 @@ pub struct RawArgs {
flag_line_number: bool,
flag_line_terminator: String,
flag_literal: bool,
flag_mmap: bool,
flag_no_heading: bool,
flag_no_ignore: bool,
flag_no_ignore_parent: bool,
flag_no_line_number: bool,
flag_no_mmap: bool,
flag_pretty: bool,
flag_quiet: bool,
flag_replace: Option<String>,
@@ -186,17 +204,20 @@ pub struct Args {
after_context: usize,
before_context: usize,
color: bool,
column: bool,
context_separator: Vec<u8>,
count: bool,
eol: u8,
files: bool,
follow: bool,
glob_overrides: Option<Gitignore>,
grep: Grep,
heading: bool,
hidden: bool,
ignore_case: bool,
invert_match: bool,
line_number: bool,
mmap: bool,
no_ignore: bool,
no_ignore_parent: bool,
quiet: bool,
@@ -210,7 +231,7 @@ pub struct Args {
}
impl RawArgs {
/// Convert arguments parsed into a configuration used by xrep.
/// Convert arguments parsed into a configuration used by ripgrep.
fn to_args(&self) -> Result<Args> {
let pattern = {
let pattern =
@@ -243,6 +264,19 @@ impl RawArgs {
} else {
(self.flag_after_context, self.flag_before_context)
};
let mmap =
if before_context > 0 || after_context > 0 || self.flag_no_mmap {
false
} else if self.flag_mmap {
true
} else {
// If we're only searching a few paths and all of them are
// files, then memory maps are probably faster.
paths.len() <= 10 && paths.iter().all(|p| p.is_file())
};
if mmap {
debug!("will try to use memory maps");
}
let eol = {
let eol = unescape(&self.flag_line_terminator);
if eol.is_empty() {
@@ -283,23 +317,32 @@ impl RawArgs {
btypes.add_defaults();
try!(self.add_types(&mut btypes));
let types = try!(btypes.build());
let grep = try!(
GrepBuilder::new(&pattern)
.case_insensitive(self.flag_ignore_case)
.line_terminator(eol)
.build()
);
let mut args = Args {
pattern: pattern,
paths: paths,
after_context: after_context,
before_context: before_context,
color: color,
column: self.flag_column,
context_separator: unescape(&self.flag_context_separator),
count: self.flag_count,
eol: eol,
files: self.flag_files,
follow: self.flag_follow,
glob_overrides: glob_overrides,
grep: grep,
heading: !self.flag_no_heading && self.flag_heading,
hidden: self.flag_hidden,
ignore_case: self.flag_ignore_case,
invert_match: self.flag_invert_match,
line_number: !self.flag_no_line_number && self.flag_line_number,
mmap: mmap,
no_ignore: self.flag_no_ignore,
no_ignore_parent: self.flag_no_ignore_parent,
quiet: self.flag_quiet,
@@ -345,7 +388,7 @@ impl Args {
///
/// If a CLI usage error occurred, then exit the process and print a usage
/// or error message. Similarly, if the user requested the version of
/// xrep, then print the version and exit.
/// ripgrep, then print the version and exit.
///
/// Also, initialize a global logger.
pub fn parse() -> Result<Args> {
@@ -367,7 +410,7 @@ impl Args {
raw.to_args().map_err(From::from)
}
/// Returns true if xrep should print the files it will search and exit
/// Returns true if ripgrep should print the files it will search and exit
/// (but not do any actual searching).
pub fn files(&self) -> bool {
self.files
@@ -378,12 +421,8 @@ impl Args {
/// basic searching of regular expressions in a single buffer.
///
/// The pattern and other flags are taken from the command line.
pub fn grep(&self) -> Result<Grep> {
GrepBuilder::new(&self.pattern)
.case_insensitive(self.ignore_case)
.line_terminator(self.eol)
.build()
.map_err(From::from)
pub fn grep(&self) -> Grep {
self.grep.clone()
}
/// Creates a new input buffer that is used in searching.
@@ -393,10 +432,16 @@ impl Args {
inp
}
/// Whether we should prefer memory maps for searching or not.
pub fn mmap(&self) -> bool {
self.mmap
}
/// Create a new printer of individual search results that writes to the
/// writer given.
pub fn printer<W: Send + io::Write>(&self, wtr: W) -> Printer<W> {
let mut p = Printer::new(wtr, self.color)
pub fn printer<W: Send + Terminal>(&self, wtr: W) -> Printer<W> {
let mut p = Printer::new(wtr)
.column(self.column)
.context_separator(self.context_separator.clone())
.eol(self.eol)
.heading(self.heading)
@@ -410,8 +455,8 @@ impl Args {
/// Create a new printer of search results for an entire file that writes
/// to the writer given.
pub fn out<W: io::Write>(&self, wtr: W) -> Out<W> {
let mut out = Out::new(wtr);
pub fn out(&self) -> Out {
let mut out = Out::new(self.color);
if self.heading && !self.count {
out = out.file_separator(b"".to_vec());
} else if self.before_context > 0 || self.after_context > 0 {
@@ -420,6 +465,11 @@ impl Args {
out
}
/// Create a new buffer for use with searching.
pub fn outbuf(&self) -> OutBuffer {
OutBuffer::new(self.color)
}
/// Return the paths that should be searched.
pub fn paths(&self) -> &[PathBuf] {
&self.paths
@@ -428,7 +478,7 @@ impl Args {
/// Create a new line based searcher whose configuration is taken from the
/// command line. This searcher supports a dizzying array of features:
/// inverted matching, line counting, context control and more.
pub fn searcher<'a, R: io::Read, W: Send + io::Write>(
pub fn searcher<'a, R: io::Read, W: Send + Terminal>(
&self,
inp: &'a mut InputBuffer,
printer: &'a mut Printer<W>,
@@ -446,6 +496,24 @@ impl Args {
.text(self.text)
}
/// Create a new line based searcher whose configuration is taken from the
/// command line. This search operates on an entire file all once (which
/// may have been memory mapped).
pub fn searcher_buffer<'a, W: Send + Terminal>(
&self,
printer: &'a mut Printer<W>,
grep: &'a Grep,
path: &'a Path,
buf: &'a [u8],
) -> BufferSearcher<'a, W> {
BufferSearcher::new(printer, grep, path, buf)
.count(self.count)
.eol(self.eol)
.line_number(self.line_number)
.invert_match(self.invert_match)
.text(self.text)
}
/// Returns the number of worker search threads that should be used.
pub fn threads(&self) -> usize {
self.threads
@@ -456,8 +524,8 @@ impl Args {
&self.type_defs
}
/// Returns true if xrep should print the type definitions currently loaded
/// and then exit.
/// Returns true if ripgrep should print the type definitions currently
/// loaded and then exit.
pub fn type_list(&self) -> bool {
self.type_list
}

View File

@@ -9,7 +9,7 @@ The motivation for this submodule is performance and portability:
2. We could shell out to a `git` sub-command like ls-files or status, but it
seems better to not rely on the existence of external programs for a search
tool. Besides, we need to implement this logic anyway to support things like
an .xrepignore file.
an .rgignore file.
The key implementation detail here is that a single gitignore file is compiled
into a single RegexSet, which can be used to report which globs match a
@@ -379,7 +379,7 @@ mod tests {
};
}
const ROOT: &'static str = "/home/foobar/rust/xrep";
const ROOT: &'static str = "/home/foobar/rust/rg";
ignored!(ig1, ROOT, "months", "months");
ignored!(ig2, ROOT, "*.lock", "Cargo.lock");

View File

@@ -29,7 +29,6 @@ to make its way into `glob` proper.
use std::error::Error as StdError;
use std::fmt;
use std::iter;
use std::path;
use std::str;
use regex;
@@ -214,7 +213,7 @@ impl Pattern {
/// regular expression and will represent the matching semantics of this
/// glob pattern and the options given.
pub fn to_regex_with(&self, options: &MatchOptions) -> String {
let sep = path::MAIN_SEPARATOR.to_string();
let seps = regex::quote(r"/\");
let mut re = String::new();
re.push_str("(?-u)");
if options.case_insensitive {
@@ -235,26 +234,27 @@ impl Pattern {
}
Token::Any => {
if options.require_literal_separator {
re.push_str(&format!("[^{}]", regex::quote(&sep)));
re.push_str(&format!("[^{}]", seps));
} else {
re.push_str(".");
}
}
Token::ZeroOrMore => {
if options.require_literal_separator {
re.push_str(&format!("[^{}]*", regex::quote(&sep)));
re.push_str(&format!("[^{}]*", seps));
} else {
re.push_str(".*");
}
}
Token::RecursivePrefix => {
re.push_str(&format!("(?:{sep}?|.*{sep})", sep=sep));
re.push_str(&format!("(?:[{sep}]?|.*[{sep}])", sep=seps));
}
Token::RecursiveSuffix => {
re.push_str(&format!("(?:{sep}?|{sep}.*)", sep=sep));
re.push_str(&format!("(?:[{sep}]?|[{sep}].*)", sep=seps));
}
Token::RecursiveZeroOrMore => {
re.push_str(&format!("(?:{sep}|{sep}.*{sep})", sep=sep));
re.push_str(&format!("(?:[{sep}]|[{sep}].*[{sep}])",
sep=seps));
}
Token::Class { negated, ref ranges } => {
re.push('[');
@@ -414,6 +414,8 @@ impl<'a> Parser<'a> {
#[cfg(test)]
mod tests {
use std::path::Path;
use regex::bytes::Regex;
use super::{Error, Pattern, MatchOptions, SetBuilder, Token};
@@ -461,8 +463,9 @@ mod tests {
#[test]
fn $name() {
let pat = Pattern::new($pat).unwrap();
let path = &Path::new($path).to_str().unwrap();
let re = Regex::new(&pat.to_regex_with(&$options)).unwrap();
assert!(re.is_match($path.as_bytes()));
assert!(re.is_match(path.as_bytes()));
}
};
}
@@ -475,8 +478,12 @@ mod tests {
#[test]
fn $name() {
let pat = Pattern::new($pat).unwrap();
let path = &Path::new($path).to_str().unwrap();
let re = Regex::new(&pat.to_regex_with(&$options)).unwrap();
assert!(!re.is_match($path.as_bytes()));
// println!("PATTERN: {}", $pat);
// println!("REGEX: {:?}", re);
// println!("PATH: {}", path);
assert!(!re.is_match(path.as_bytes()));
}
};
}
@@ -557,12 +564,11 @@ mod tests {
case_insensitive: true,
require_literal_separator: false,
};
const SEP: char = ::std::path::MAIN_SEPARATOR;
toregex!(re_casei, "a", "(?i)^a$", &CASEI);
toregex!(re_slash1, "?", format!("^[^{}]$", SEP), SLASHLIT);
toregex!(re_slash2, "*", format!("^[^{}]*$", SEP), SLASHLIT);
toregex!(re_slash1, "?", r"^[^/\\]$", SLASHLIT);
toregex!(re_slash2, "*", r"^[^/\\]*$", SLASHLIT);
toregex!(re1, "a", "^a$");
toregex!(re2, "?", "^.$");
@@ -638,6 +644,7 @@ mod tests {
matches!(matchslash1, "abc/def", "abc/def", SLASHLIT);
nmatches!(matchslash2, "abc?def", "abc/def", SLASHLIT);
nmatches!(matchslash2_win, "abc?def", "abc\\def", SLASHLIT);
nmatches!(matchslash3, "abc*def", "abc/def", SLASHLIT);
matches!(matchslash4, "abc[/]def", "abc/def", SLASHLIT); // differs

View File

@@ -5,7 +5,7 @@ whether a *single* file path should be searched or not.
In general, there are two ways to ignore a particular file:
1. Specify an ignore rule in some "global" configuration, such as a
$HOME/.xrepignore or on the command line.
$HOME/.rgignore or on the command line.
2. A specific ignore file (like .gitignore) found during directory traversal.
The `IgnoreDir` type handles ignore patterns for any one particular directory
@@ -24,7 +24,7 @@ use types::Types;
const IGNORE_NAMES: &'static [&'static str] = &[
".gitignore",
".agignore",
".xrepignore",
".rgignore",
];
/// Represents an error that can occur when parsing a gitignore file.
@@ -257,8 +257,8 @@ pub struct IgnoreDir {
/// A single accumulation of glob patterns for this directory, matched
/// using gitignore semantics.
///
/// This will include patterns from xrepignore as well. The patterns are
/// ordered so that precedence applies automatically (e.g., xrepignore
/// This will include patterns from rgignore as well. The patterns are
/// ordered so that precedence applies automatically (e.g., rgignore
/// patterns procede gitignore patterns).
gi: Option<Gitignore>,
// TODO(burntsushi): Matching other types of glob patterns that don't
@@ -422,7 +422,7 @@ mod tests {
};
}
const ROOT: &'static str = "/home/foobar/rust/xrep";
const ROOT: &'static str = "/home/foobar/rust/rg";
ignored_dir!(id1, ROOT, "src/main.rs", "", "src/main.rs");
ignored_dir!(id2, ROOT, "", "src/main.rs", "src/main.rs");

View File

@@ -34,10 +34,12 @@ use std::thread;
use crossbeam::sync::chase_lev::{self, Steal, Stealer};
use grep::Grep;
use memmap::{Mmap, Protection};
use term::Terminal;
use walkdir::DirEntry;
use args::Args;
use out::Out;
use out::{NoColorTerminal, Out, OutBuffer};
use printer::Printer;
use search::InputBuffer;
@@ -61,6 +63,7 @@ mod ignore;
mod out;
mod printer;
mod search;
mod search_buffer;
mod sys;
mod terminal;
mod types;
@@ -87,7 +90,8 @@ fn run(args: Args) -> Result<u64> {
return run_types(args);
}
let args = Arc::new(args);
let out = Arc::new(Mutex::new(args.out(io::stdout())));
let out = Arc::new(Mutex::new(args.out()));
let outbuf = args.outbuf();
let mut workers = vec![];
let mut workq = {
@@ -98,8 +102,8 @@ fn run(args: Args) -> Result<u64> {
out: out.clone(),
chan_work: stealer.clone(),
inpbuf: args.input_buffer(),
outbuf: Some(vec![]),
grep: try!(args.grep()),
outbuf: Some(outbuf.clone()),
grep: args.grep(),
match_count: 0,
};
workers.push(thread::spawn(move || worker.run()));
@@ -126,7 +130,8 @@ fn run(args: Args) -> Result<u64> {
}
fn run_files(args: Args) -> Result<u64> {
let mut printer = args.printer(io::BufWriter::new(io::stdout()));
let term = NoColorTerminal::new(io::BufWriter::new(io::stdout()));
let mut printer = args.printer(term);
let mut file_count = 0;
for p in args.paths() {
if p == Path::new("-") {
@@ -143,7 +148,8 @@ fn run_files(args: Args) -> Result<u64> {
}
fn run_types(args: Args) -> Result<u64> {
let mut printer = args.printer(io::BufWriter::new(io::stdout()));
let term = NoColorTerminal::new(io::BufWriter::new(io::stdout()));
let mut printer = args.printer(term);
let mut ty_count = 0;
for def in args.type_defs() {
printer.type_def(def);
@@ -165,10 +171,10 @@ enum WorkReady {
struct Worker {
args: Arc<Args>,
out: Arc<Mutex<Out<io::Stdout>>>,
out: Arc<Mutex<Out>>,
chan_work: Stealer<Work>,
inpbuf: InputBuffer,
outbuf: Option<Vec<u8>>,
outbuf: Option<OutBuffer>,
grep: Grep,
match_count: u64,
}
@@ -196,7 +202,7 @@ impl Worker {
let mut printer = self.args.printer(outbuf);
self.do_work(&mut printer, work);
let outbuf = printer.into_inner();
if !outbuf.is_empty() {
if !outbuf.get_ref().is_empty() {
let mut out = self.out.lock().unwrap();
out.write(&outbuf);
}
@@ -205,7 +211,7 @@ impl Worker {
self.match_count
}
fn do_work<W: Send + io::Write>(
fn do_work<W: Send + Terminal>(
&mut self,
printer: &mut Printer<W>,
work: WorkReady,
@@ -221,7 +227,11 @@ impl Worker {
if let Ok(p) = path.strip_prefix("./") {
path = p;
}
self.search(printer, path, file)
if self.args.mmap() {
self.search_mmap(printer, path, &file)
} else {
self.search(printer, path, file)
}
}
};
match result {
@@ -234,7 +244,7 @@ impl Worker {
}
}
fn search<R: io::Read, W: Send + io::Write>(
fn search<R: io::Read, W: Send + Terminal>(
&mut self,
printer: &mut Printer<W>,
path: &Path,
@@ -248,4 +258,23 @@ impl Worker {
rdr,
).run().map_err(From::from)
}
fn search_mmap<W: Send + Terminal>(
&mut self,
printer: &mut Printer<W>,
path: &Path,
file: &File,
) -> Result<u64> {
if try!(file.metadata()).len() == 0 {
// Opening a memory map with an empty file results in an error.
return Ok(0);
}
let mmap = try!(Mmap::open(file, Protection::Read));
Ok(self.args.searcher_buffer(
printer,
&self.grep,
path,
unsafe { mmap.as_slice() },
).run())
}
}

View File

@@ -1,4 +1,40 @@
use std::io::{self, Write};
use std::sync::Arc;
use term::{self, Terminal};
use term::color::Color;
use term::terminfo::TermInfo;
#[cfg(windows)]
use term::WinConsole;
use terminal::TerminfoTerminal;
pub type StdoutTerminal = Box<Terminal<Output=io::Stdout> + Send>;
/// Gets a terminal that supports color if available.
#[cfg(windows)]
fn term_stdout(color: bool) -> StdoutTerminal {
let stdout = io::stdout();
WinConsole::new(stdout)
.ok()
.map(|t| Box::new(t) as StdoutTerminal)
.unwrap_or_else(|| {
let stdout = io::stdout();
Box::new(NoColorTerminal::new(stdout)) as StdoutTerminal
})
}
/// Gets a terminal that supports color if available.
#[cfg(not(windows))]
fn term_stdout(color: bool) -> StdoutTerminal {
let stdout = io::stdout();
if !color || TERMINFO.is_none() {
Box::new(NoColorTerminal::new(stdout))
} else {
let info = TERMINFO.clone().unwrap();
Box::new(TerminfoTerminal::new_with_terminfo(stdout, info))
}
}
/// Out controls the actual output of all search results for a particular file
/// to the end user.
@@ -6,17 +42,17 @@ use std::io::{self, Write};
/// (The difference between Out and Printer is that a Printer works with
/// individual search results where as Out works with search results for each
/// file as a whole. For example, it knows when to print a file separator.)
pub struct Out<W: io::Write> {
wtr: io::BufWriter<W>,
pub struct Out {
term: StdoutTerminal,
printed: bool,
file_separator: Option<Vec<u8>>,
}
impl<W: io::Write> Out<W> {
impl Out {
/// Create a new Out that writes to the wtr given.
pub fn new(wtr: W) -> Out<W> {
pub fn new(color: bool) -> Out {
Out {
wtr: io::BufWriter::new(wtr),
term: term_stdout(color),
printed: false,
file_separator: None,
}
@@ -26,22 +62,422 @@ impl<W: io::Write> Out<W> {
/// By default, no separator is printed.
///
/// If sep is empty, then no file separator is printed.
pub fn file_separator(mut self, sep: Vec<u8>) -> Out<W> {
pub fn file_separator(mut self, sep: Vec<u8>) -> Out {
self.file_separator = Some(sep);
self
}
/// Write the search results of a single file to the underlying wtr and
/// flush wtr.
pub fn write(&mut self, buf: &[u8]) {
pub fn write(&mut self, buf: &OutBuffer) {
if let Some(ref sep) = self.file_separator {
if self.printed {
let _ = self.wtr.write_all(sep);
let _ = self.wtr.write_all(b"\n");
let _ = self.term.write_all(sep);
let _ = self.term.write_all(b"\n");
}
}
let _ = self.wtr.write_all(buf);
let _ = self.wtr.flush();
match *buf {
OutBuffer::Colored(ref tt) => {
let _ = self.term.write_all(tt.get_ref());
}
OutBuffer::Windows(ref w) => {
w.print_stdout(&mut self.term);
}
OutBuffer::NoColor(ref buf) => {
let _ = self.term.write_all(buf);
}
}
let _ = self.term.flush();
self.printed = true;
}
}
/// OutBuffer corresponds to the final output buffer for search results. All
/// search results are written to a buffer and then a buffer is flushed to
/// stdout only after the full search has completed.
#[derive(Clone, Debug)]
pub enum OutBuffer {
Colored(TerminfoTerminal<Vec<u8>>),
Windows(WindowsBuffer),
NoColor(Vec<u8>),
}
#[derive(Clone, Debug)]
pub struct WindowsBuffer {
buf: Vec<u8>,
pos: usize,
colors: Vec<WindowsColor>,
}
#[derive(Clone, Debug)]
pub struct WindowsColor {
pos: usize,
opt: WindowsOption,
}
#[derive(Clone, Debug)]
pub enum WindowsOption {
Foreground(Color),
Background(Color),
Reset,
}
lazy_static! {
static ref TERMINFO: Option<Arc<TermInfo>> = {
match TermInfo::from_env() {
Ok(info) => Some(Arc::new(info)),
Err(err) => {
debug!("error loading terminfo for coloring: {}", err);
None
}
}
};
}
impl OutBuffer {
/// Create a new output buffer.
///
/// When color is true, the buffer will attempt to support coloring.
pub fn new(color: bool) -> OutBuffer {
// If we want color, build a TerminfoTerminal and see if the current
// environment supports coloring. If not, bail with NoColor. To avoid
// losing our writer (ownership), do this the long way.
if !color {
return OutBuffer::NoColor(vec![]);
}
if cfg!(windows) {
return OutBuffer::Windows(WindowsBuffer {
buf: vec![],
pos: 0,
colors: vec![]
});
}
if TERMINFO.is_none() {
return OutBuffer::NoColor(vec![]);
}
let info = TERMINFO.clone().unwrap();
let tt = TerminfoTerminal::new_with_terminfo(vec![], info);
if !tt.supports_color() {
debug!("environment doesn't support coloring");
return OutBuffer::NoColor(tt.into_inner());
}
OutBuffer::Colored(tt)
}
/// Clear the give buffer of all search results such that it is reusable
/// in another search.
pub fn clear(&mut self) {
match *self {
OutBuffer::Colored(ref mut tt) => {
tt.get_mut().clear();
}
OutBuffer::Windows(ref mut win) => {
win.buf.clear();
win.colors.clear();
win.pos = 0;
}
OutBuffer::NoColor(ref mut buf) => {
buf.clear();
}
}
}
fn map_result<F, G>(
&mut self,
mut f: F,
mut g: G,
) -> term::Result<()>
where F: FnMut(&mut TerminfoTerminal<Vec<u8>>) -> term::Result<()>,
G: FnMut(&mut WindowsBuffer) -> term::Result<()> {
match *self {
OutBuffer::Colored(ref mut w) => f(w),
OutBuffer::Windows(ref mut w) => g(w),
OutBuffer::NoColor(_) => Err(term::Error::NotSupported),
}
}
fn map_bool<F, G>(
&self,
mut f: F,
mut g: G,
) -> bool
where F: FnMut(&TerminfoTerminal<Vec<u8>>) -> bool,
G: FnMut(&WindowsBuffer) -> bool {
match *self {
OutBuffer::Colored(ref w) => f(w),
OutBuffer::Windows(ref w) => g(w),
OutBuffer::NoColor(_) => false,
}
}
}
impl io::Write for OutBuffer {
fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
match *self {
OutBuffer::Colored(ref mut w) => w.write(buf),
OutBuffer::Windows(ref mut w) => w.write(buf),
OutBuffer::NoColor(ref mut w) => w.write(buf),
}
}
fn flush(&mut self) -> io::Result<()> {
Ok(())
}
}
impl term::Terminal for OutBuffer {
type Output = Vec<u8>;
fn fg(&mut self, fg: term::color::Color) -> term::Result<()> {
self.map_result(|w| w.fg(fg), |w| w.fg(fg))
}
fn bg(&mut self, bg: term::color::Color) -> term::Result<()> {
self.map_result(|w| w.bg(bg), |w| w.bg(bg))
}
fn attr(&mut self, attr: term::Attr) -> term::Result<()> {
self.map_result(|w| w.attr(attr), |w| w.attr(attr))
}
fn supports_attr(&self, attr: term::Attr) -> bool {
self.map_bool(|w| w.supports_attr(attr), |w| w.supports_attr(attr))
}
fn reset(&mut self) -> term::Result<()> {
self.map_result(|w| w.reset(), |w| w.reset())
}
fn supports_reset(&self) -> bool {
self.map_bool(|w| w.supports_reset(), |w| w.supports_reset())
}
fn supports_color(&self) -> bool {
self.map_bool(|w| w.supports_color(), |w| w.supports_color())
}
fn cursor_up(&mut self) -> term::Result<()> {
self.map_result(|w| w.cursor_up(), |w| w.cursor_up())
}
fn delete_line(&mut self) -> term::Result<()> {
self.map_result(|w| w.delete_line(), |w| w.delete_line())
}
fn carriage_return(&mut self) -> term::Result<()> {
self.map_result(|w| w.carriage_return(), |w| w.carriage_return())
}
fn get_ref(&self) -> &Vec<u8> {
match *self {
OutBuffer::Colored(ref w) => w.get_ref(),
OutBuffer::Windows(ref w) => w.get_ref(),
OutBuffer::NoColor(ref w) => w,
}
}
fn get_mut(&mut self) -> &mut Vec<u8> {
match *self {
OutBuffer::Colored(ref mut w) => w.get_mut(),
OutBuffer::Windows(ref mut w) => w.get_mut(),
OutBuffer::NoColor(ref mut w) => w,
}
}
fn into_inner(self) -> Vec<u8> {
match self {
OutBuffer::Colored(w) => w.into_inner(),
OutBuffer::Windows(w) => w.into_inner(),
OutBuffer::NoColor(w) => w,
}
}
}
impl WindowsBuffer {
fn push(&mut self, opt: WindowsOption) {
let pos = self.pos;
self.colors.push(WindowsColor { pos: pos, opt: opt });
}
}
impl WindowsBuffer {
/// Print the contents to the given terminal.
pub fn print_stdout(&self, tt: &mut StdoutTerminal) {
if !tt.supports_color() {
let _ = tt.write_all(&self.buf);
let _ = tt.flush();
return;
}
let mut last = 0;
for col in &self.colors {
let _ = tt.write_all(&self.buf[last..col.pos]);
match col.opt {
WindowsOption::Foreground(c) => {
let _ = tt.fg(c);
}
WindowsOption::Background(c) => {
let _ = tt.bg(c);
}
WindowsOption::Reset => {
let _ = tt.reset();
}
}
last = col.pos;
}
let _ = tt.write_all(&self.buf[last..]);
let _ = tt.flush();
}
}
impl io::Write for WindowsBuffer {
fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
let n = try!(self.buf.write(buf));
self.pos += n;
Ok(n)
}
fn flush(&mut self) -> io::Result<()> {
Ok(())
}
}
impl term::Terminal for WindowsBuffer {
type Output = Vec<u8>;
fn fg(&mut self, fg: term::color::Color) -> term::Result<()> {
self.push(WindowsOption::Foreground(fg));
Ok(())
}
fn bg(&mut self, bg: term::color::Color) -> term::Result<()> {
self.push(WindowsOption::Background(bg));
Ok(())
}
fn attr(&mut self, attr: term::Attr) -> term::Result<()> {
Err(term::Error::NotSupported)
}
fn supports_attr(&self, attr: term::Attr) -> bool {
false
}
fn reset(&mut self) -> term::Result<()> {
self.push(WindowsOption::Reset);
Ok(())
}
fn supports_reset(&self) -> bool {
true
}
fn supports_color(&self) -> bool {
true
}
fn cursor_up(&mut self) -> term::Result<()> {
Err(term::Error::NotSupported)
}
fn delete_line(&mut self) -> term::Result<()> {
Err(term::Error::NotSupported)
}
fn carriage_return(&mut self) -> term::Result<()> {
Err(term::Error::NotSupported)
}
fn get_ref(&self) -> &Vec<u8> {
&self.buf
}
fn get_mut(&mut self) -> &mut Vec<u8> {
&mut self.buf
}
fn into_inner(self) -> Vec<u8> {
self.buf
}
}
/// NoColorTerminal implements Terminal, but supports no coloring.
///
/// Its useful when an API requires a Terminal, but coloring isn't needed.
pub struct NoColorTerminal<W> {
wtr: W,
}
impl<W: Send + io::Write> NoColorTerminal<W> {
/// Wrap the given writer in a Terminal interface.
pub fn new(wtr: W) -> NoColorTerminal<W> {
NoColorTerminal {
wtr: wtr,
}
}
}
impl<W: Send + io::Write> io::Write for NoColorTerminal<W> {
fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
self.wtr.write(buf)
}
fn flush(&mut self) -> io::Result<()> {
self.wtr.flush()
}
}
impl<W: Send + io::Write> term::Terminal for NoColorTerminal<W> {
type Output = W;
fn fg(&mut self, fg: term::color::Color) -> term::Result<()> {
Err(term::Error::NotSupported)
}
fn bg(&mut self, bg: term::color::Color) -> term::Result<()> {
Err(term::Error::NotSupported)
}
fn attr(&mut self, attr: term::Attr) -> term::Result<()> {
Err(term::Error::NotSupported)
}
fn supports_attr(&self, attr: term::Attr) -> bool {
false
}
fn reset(&mut self) -> term::Result<()> {
Err(term::Error::NotSupported)
}
fn supports_reset(&self) -> bool {
false
}
fn supports_color(&self) -> bool {
false
}
fn cursor_up(&mut self) -> term::Result<()> {
Err(term::Error::NotSupported)
}
fn delete_line(&mut self) -> term::Result<()> {
Err(term::Error::NotSupported)
}
fn carriage_return(&mut self) -> term::Result<()> {
Err(term::Error::NotSupported)
}
fn get_ref(&self) -> &W {
&self.wtr
}
fn get_mut(&mut self) -> &mut W {
&mut self.wtr
}
fn into_inner(self) -> W {
self.wtr
}
}

View File

@@ -1,17 +1,11 @@
use std::io::{self, Write};
use std::path::Path;
use std::sync::Arc;
use regex::bytes::Regex;
use term::{self, Terminal};
use term::color::*;
use term::terminfo::TermInfo;
use term::{Attr, Terminal};
use term::color;
use terminal::TerminfoTerminal;
use types::FileTypeDef;
use self::Writer::*;
/// Printer encapsulates all output logic for searching.
///
/// Note that we currently ignore all write errors. It's probably worthwhile
@@ -19,9 +13,11 @@ use self::Writer::*;
/// writes to memory, neither of which commonly fail.
pub struct Printer<W> {
/// The underlying writer.
wtr: Writer<W>,
wtr: W,
/// Whether anything has been printed to wtr yet.
has_printed: bool,
/// Whether to show column numbers for the first match or not.
column: bool,
/// The string to use to separate non-contiguous runs of context lines.
context_separator: Vec<u8>,
/// The end-of-line terminator used by the printer. In general, eols are
@@ -40,14 +36,13 @@ pub struct Printer<W> {
with_filename: bool,
}
impl<W: Send + io::Write> Printer<W> {
impl<W: Send + Terminal> Printer<W> {
/// Create a new printer that writes to wtr.
///
/// `color` should be true if the printer should try to use coloring.
pub fn new(wtr: W, color: bool) -> Printer<W> {
pub fn new(wtr: W) -> Printer<W> {
Printer {
wtr: Writer::new(wtr, color),
wtr: wtr,
has_printed: false,
column: false,
context_separator: "--".to_string().into_bytes(),
eol: b'\n',
heading: false,
@@ -57,6 +52,13 @@ impl<W: Send + io::Write> Printer<W> {
}
}
/// When set, column numbers will be printed for the first match on each
/// line.
pub fn column(mut self, yes: bool) -> Printer<W> {
self.column = yes;
self
}
/// Set the context separator. The default is `--`.
pub fn context_separator(mut self, sep: Vec<u8>) -> Printer<W> {
self.context_separator = sep;
@@ -107,7 +109,7 @@ impl<W: Send + io::Write> Printer<W> {
/// Flushes the underlying writer and returns it.
pub fn into_inner(mut self) -> W {
let _ = self.wtr.flush();
self.wtr.into_inner()
self.wtr
}
/// Prints a type definition.
@@ -173,6 +175,11 @@ impl<W: Send + io::Write> Printer<W> {
if let Some(line_number) = line_number {
self.line_number(line_number, b':');
}
if self.column {
let c = re.find(&buf[start..end]).map(|(s, _)| s + 1).unwrap_or(0);
self.write(c.to_string().as_bytes());
self.write(b":");
}
if self.replace.is_some() {
let line = re.replace_all(
&buf[start..end], &**self.replace.as_ref().unwrap());
@@ -186,15 +193,15 @@ impl<W: Send + io::Write> Printer<W> {
}
pub fn write_match(&mut self, re: &Regex, buf: &[u8]) {
if !self.wtr.is_color() {
if !self.wtr.supports_color() {
self.write(buf);
return;
}
let mut last_written = 0;
for (s, e) in re.find_iter(buf) {
self.write(&buf[last_written..s]);
let _ = self.wtr.fg(BRIGHT_RED);
let _ = self.wtr.attr(term::Attr::Bold);
let _ = self.wtr.fg(color::BRIGHT_RED);
let _ = self.wtr.attr(Attr::Bold);
self.write(&buf[s..e]);
let _ = self.wtr.reset();
last_written = e;
@@ -226,22 +233,24 @@ impl<W: Send + io::Write> Printer<W> {
}
fn write_heading<P: AsRef<Path>>(&mut self, path: P) {
if self.wtr.is_color() {
let _ = self.wtr.fg(GREEN);
if self.wtr.supports_color() {
let _ = self.wtr.fg(color::BRIGHT_GREEN);
let _ = self.wtr.attr(Attr::Bold);
}
self.write(path.as_ref().to_string_lossy().as_bytes());
self.write_eol();
if self.wtr.is_color() {
if self.wtr.supports_color() {
let _ = self.wtr.reset();
}
}
fn line_number(&mut self, n: u64, sep: u8) {
if self.wtr.is_color() {
let _ = self.wtr.fg(YELLOW);
if self.wtr.supports_color() {
let _ = self.wtr.fg(color::BRIGHT_BLUE);
let _ = self.wtr.attr(Attr::Bold);
}
self.write(n.to_string().as_bytes());
if self.wtr.is_color() {
if self.wtr.supports_color() {
let _ = self.wtr.reset();
}
self.write(&[sep]);
@@ -260,148 +269,3 @@ impl<W: Send + io::Write> Printer<W> {
self.write(&[eol]);
}
}
enum Writer<W> {
Colored(TerminfoTerminal<W>),
NoColor(W),
}
lazy_static! {
static ref TERMINFO: Option<Arc<TermInfo>> = {
match term::terminfo::TermInfo::from_env() {
Ok(info) => Some(Arc::new(info)),
Err(err) => {
debug!("error loading terminfo for coloring: {}", err);
None
}
}
};
}
impl<W: Send + io::Write> Writer<W> {
fn new(wtr: W, color: bool) -> Writer<W> {
// If we want color, build a TerminfoTerminal and see if the current
// environment supports coloring. If not, bail with NoColor. To avoid
// losing our writer (ownership), do this the long way.
if !color || TERMINFO.is_none() {
return NoColor(wtr);
}
let info = TERMINFO.clone().unwrap();
let tt = TerminfoTerminal::new_with_terminfo(wtr, info);
if !tt.supports_color() {
debug!("environment doesn't support coloring");
return NoColor(tt.into_inner());
}
Colored(tt)
}
fn is_color(&self) -> bool {
match *self {
Colored(_) => true,
NoColor(_) => false,
}
}
fn map_result<F>(
&mut self,
mut f: F,
) -> term::Result<()>
where F: FnMut(&mut TerminfoTerminal<W>) -> term::Result<()> {
match *self {
Colored(ref mut w) => f(w),
NoColor(_) => Err(term::Error::NotSupported),
}
}
fn map_bool<F>(
&self,
mut f: F,
) -> bool
where F: FnMut(&TerminfoTerminal<W>) -> bool {
match *self {
Colored(ref w) => f(w),
NoColor(_) => false,
}
}
}
impl<W: Send + io::Write> io::Write for Writer<W> {
fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
match *self {
Colored(ref mut w) => w.write(buf),
NoColor(ref mut w) => w.write(buf),
}
}
fn flush(&mut self) -> io::Result<()> {
match *self {
Colored(ref mut w) => w.flush(),
NoColor(ref mut w) => w.flush(),
}
}
}
impl<W: Send + io::Write> term::Terminal for Writer<W> {
type Output = W;
fn fg(&mut self, fg: term::color::Color) -> term::Result<()> {
self.map_result(|w| w.fg(fg))
}
fn bg(&mut self, bg: term::color::Color) -> term::Result<()> {
self.map_result(|w| w.bg(bg))
}
fn attr(&mut self, attr: term::Attr) -> term::Result<()> {
self.map_result(|w| w.attr(attr))
}
fn supports_attr(&self, attr: term::Attr) -> bool {
self.map_bool(|w| w.supports_attr(attr))
}
fn reset(&mut self) -> term::Result<()> {
self.map_result(|w| w.reset())
}
fn supports_reset(&self) -> bool {
self.map_bool(|w| w.supports_reset())
}
fn supports_color(&self) -> bool {
self.map_bool(|w| w.supports_color())
}
fn cursor_up(&mut self) -> term::Result<()> {
self.map_result(|w| w.cursor_up())
}
fn delete_line(&mut self) -> term::Result<()> {
self.map_result(|w| w.delete_line())
}
fn carriage_return(&mut self) -> term::Result<()> {
self.map_result(|w| w.carriage_return())
}
fn get_ref(&self) -> &W {
match *self {
Colored(ref w) => w.get_ref(),
NoColor(ref w) => w,
}
}
fn get_mut(&mut self) -> &mut W {
match *self {
Colored(ref mut w) => w.get_mut(),
NoColor(ref mut w) => w,
}
}
fn into_inner(self) -> W {
match self {
Colored(w) => w.into_inner(),
NoColor(w) => w,
}
}
}

View File

@@ -11,6 +11,7 @@ use std::path::{Path, PathBuf};
use grep::{Grep, Match};
use memchr::{memchr, memrchr};
use term::Terminal;
use printer::Printer;
@@ -74,14 +75,14 @@ pub struct Searcher<'a, R, W: 'a> {
/// Options for configuring search.
#[derive(Clone)]
struct Options {
after_context: usize,
before_context: usize,
count: bool,
eol: u8,
invert_match: bool,
line_number: bool,
text: bool,
pub struct Options {
pub after_context: usize,
pub before_context: usize,
pub count: bool,
pub eol: u8,
pub invert_match: bool,
pub line_number: bool,
pub text: bool,
}
impl Default for Options {
@@ -98,7 +99,7 @@ impl Default for Options {
}
}
impl<'a, R: io::Read, W: Send + io::Write> Searcher<'a, R, W> {
impl<'a, R: io::Read, W: Send + Terminal> Searcher<'a, R, W> {
/// Create a new searcher.
///
/// `inp` is a reusable input buffer that is used as scratch space by this
@@ -219,14 +220,11 @@ impl<'a, R: io::Read, W: Send + io::Write> Searcher<'a, R, W> {
self.print_inverted_matches(upto);
}
} else if matched {
self.match_count += 1;
if !self.opts.count {
let start = self.last_match.start();
let end = self.last_match.end();
self.print_after_context(start);
self.print_before_context(start);
self.print_match(start, end);
}
let start = self.last_match.start();
let end = self.last_match.end();
self.print_after_context(start);
self.print_before_context(start);
self.print_match(start, end);
}
if matched {
self.inp.pos = self.last_match.end();
@@ -275,11 +273,8 @@ impl<'a, R: io::Read, W: Send + io::Write> Searcher<'a, R, W> {
debug_assert!(self.opts.invert_match);
let mut it = IterLines::new(self.opts.eol, self.inp.pos);
while let Some((start, end)) = it.next(&self.inp.buf[..upto]) {
if !self.opts.count {
self.print_match(start, end);
}
self.print_match(start, end);
self.inp.pos = end;
self.match_count += 1;
}
}
@@ -325,11 +320,15 @@ impl<'a, R: io::Read, W: Send + io::Write> Searcher<'a, R, W> {
#[inline(always)]
fn print_match(&mut self, start: usize, end: usize) {
self.match_count += 1;
if self.opts.count {
return;
}
self.print_separator(start);
self.count_lines(start);
self.add_line(end);
self.printer.matched(
self.grep.regex(), &self.path,
self.grep.regex(), self.path,
&self.inp.buf, start, end, self.line_count);
self.last_printed = end;
self.after_context_remaining = self.opts.after_context;
@@ -535,7 +534,7 @@ impl InputBuffer {
///
/// Note that this may return both false positives and false negatives.
#[inline(always)]
fn is_binary(buf: &[u8]) -> bool {
pub fn is_binary(buf: &[u8]) -> bool {
if buf.len() >= 4 && &buf[0..4] == b"%PDF" {
return true;
}
@@ -544,7 +543,7 @@ fn is_binary(buf: &[u8]) -> bool {
/// Count the number of lines in the given buffer.
#[inline(always)]
fn count_lines(mut buf: &[u8], eol: u8) -> u64 {
pub fn count_lines(mut buf: &[u8], eol: u8) -> u64 {
let mut count = 0;
while let Some(pos) = memchr(eol, buf) {
count += 1;
@@ -575,7 +574,7 @@ fn replace_buf(buf: &mut [u8], a: u8, b: u8) {
/// advance over the positions of each line. We neglect that approach to avoid
/// the borrow in the search code. (Because the borrow prevents composition
/// through other mutable methods.)
struct IterLines {
pub struct IterLines {
eol: u8,
pos: usize,
}
@@ -585,7 +584,7 @@ impl IterLines {
///
/// The buffer is passed to the `next` method.
#[inline(always)]
fn new(eol: u8, start: usize) -> IterLines {
pub fn new(eol: u8, start: usize) -> IterLines {
IterLines {
eol: eol,
pos: start,
@@ -597,7 +596,7 @@ impl IterLines {
///
/// The range returned includes the new line.
#[inline(always)]
fn next(&mut self, buf: &[u8]) -> Option<(usize, usize)> {
pub fn next(&mut self, buf: &[u8]) -> Option<(usize, usize)> {
match memchr(self.eol, &buf[self.pos..]) {
None => {
if self.pos < buf.len() {
@@ -689,7 +688,9 @@ mod tests {
use std::path::Path;
use grep::{Grep, GrepBuilder};
use term::Terminal;
use out::OutBuffer;
use printer::Printer;
use super::{InputBuffer, Searcher, start_of_previous_lines};
@@ -732,7 +733,7 @@ fn main() {
&Path::new("/baz.rs")
}
type TestSearcher<'a> = Searcher<'a, io::Cursor<Vec<u8>>, Vec<u8>>;
type TestSearcher<'a> = Searcher<'a, io::Cursor<Vec<u8>>, OutBuffer>;
fn search_smallcap<F: FnMut(TestSearcher) -> TestSearcher>(
pat: &str,
@@ -740,14 +741,15 @@ fn main() {
mut map: F,
) -> (u64, String) {
let mut inp = InputBuffer::with_capacity(1);
let mut pp = Printer::new(vec![], false).with_filename(true);
let outbuf = OutBuffer::NoColor(vec![]);
let mut pp = Printer::new(outbuf).with_filename(true);
let grep = GrepBuilder::new(pat).build().unwrap();
let count = {
let searcher = Searcher::new(
&mut inp, &mut pp, &grep, test_path(), hay(haystack));
map(searcher).run().unwrap()
};
(count, String::from_utf8(pp.into_inner()).unwrap())
(count, String::from_utf8(pp.into_inner().into_inner()).unwrap())
}
fn search<F: FnMut(TestSearcher) -> TestSearcher>(
@@ -756,14 +758,15 @@ fn main() {
mut map: F,
) -> (u64, String) {
let mut inp = InputBuffer::with_capacity(4096);
let mut pp = Printer::new(vec![], false).with_filename(true);
let outbuf = OutBuffer::NoColor(vec![]);
let mut pp = Printer::new(outbuf).with_filename(true);
let grep = GrepBuilder::new(pat).build().unwrap();
let count = {
let searcher = Searcher::new(
&mut inp, &mut pp, &grep, test_path(), hay(haystack));
map(searcher).run().unwrap()
};
(count, String::from_utf8(pp.into_inner()).unwrap())
(count, String::from_utf8(pp.into_inner().into_inner()).unwrap())
}
#[test]
@@ -870,7 +873,7 @@ fn main() {
}
#[test]
fn basic_search() {
fn basic_search1() {
let (count, out) = search_smallcap("Sherlock", &*SHERLOCK, |s|s);
assert_eq!(2, count);
assert_eq!(out, "\
@@ -887,7 +890,6 @@ fn main() {
assert_eq!(out, "");
}
#[test]
fn binary_text() {
let text = "Sherlock\n\x00Holmes\n";

287
src/search_buffer.rs Normal file
View File

@@ -0,0 +1,287 @@
use std::cmp;
use std::path::Path;
use grep::Grep;
use term::Terminal;
use printer::Printer;
use search::{IterLines, Options, count_lines, is_binary};
pub struct BufferSearcher<'a, W: 'a> {
opts: Options,
printer: &'a mut Printer<W>,
grep: &'a Grep,
path: &'a Path,
buf: &'a [u8],
match_count: u64,
line_count: Option<u64>,
last_line: usize,
}
impl<'a, W: Send + Terminal> BufferSearcher<'a, W> {
pub fn new(
printer: &'a mut Printer<W>,
grep: &'a Grep,
path: &'a Path,
buf: &'a [u8],
) -> BufferSearcher<'a, W> {
BufferSearcher {
opts: Options::default(),
printer: printer,
grep: grep,
path: path,
buf: buf,
match_count: 0,
line_count: None,
last_line: 0,
}
}
/// If enabled, searching will print a count instead of each match.
///
/// Disabled by default.
pub fn count(mut self, yes: bool) -> Self {
self.opts.count = yes;
self
}
/// Set the end-of-line byte used by this searcher.
pub fn eol(mut self, eol: u8) -> Self {
self.opts.eol = eol;
self
}
/// If enabled, matching is inverted so that lines that *don't* match the
/// given pattern are treated as matches.
pub fn invert_match(mut self, yes: bool) -> Self {
self.opts.invert_match = yes;
self
}
/// If enabled, compute line numbers and prefix each line of output with
/// them.
pub fn line_number(mut self, yes: bool) -> Self {
self.opts.line_number = yes;
self
}
/// If enabled, search binary files as if they were text.
pub fn text(mut self, yes: bool) -> Self {
self.opts.text = yes;
self
}
#[inline(never)]
pub fn run(mut self) -> u64 {
let binary_upto = cmp::min(4096, self.buf.len());
if !self.opts.text && is_binary(&self.buf[..binary_upto]) {
return 0;
}
self.match_count = 0;
self.line_count = if self.opts.line_number { Some(0) } else { None };
let mut last_end = 0;
for m in self.grep.iter(self.buf) {
if self.opts.invert_match {
self.print_inverted_matches(last_end, m.start());
} else {
self.print_match(m.start(), m.end());
}
last_end = m.end();
}
if self.opts.invert_match {
let upto = self.buf.len();
self.print_inverted_matches(last_end, upto);
}
if self.opts.count && self.match_count > 0 {
self.printer.path_count(self.path, self.match_count);
}
self.match_count
}
#[inline(always)]
pub fn print_match(&mut self, start: usize, end: usize) {
self.match_count += 1;
if self.opts.count {
return;
}
self.count_lines(start);
self.add_line(end);
self.printer.matched(
self.grep.regex(), self.path, self.buf,
start, end, self.line_count);
}
#[inline(always)]
fn print_inverted_matches(&mut self, start: usize, end: usize) {
debug_assert!(self.opts.invert_match);
let mut it = IterLines::new(self.opts.eol, start);
while let Some((s, e)) = it.next(&self.buf[..end]) {
self.print_match(s, e);
}
}
#[inline(always)]
fn count_lines(&mut self, upto: usize) {
if let Some(ref mut line_count) = self.line_count {
*line_count += count_lines(
&self.buf[self.last_line..upto], self.opts.eol);
self.last_line = upto;
}
}
#[inline(always)]
fn add_line(&mut self, line_end: usize) {
if let Some(ref mut line_count) = self.line_count {
*line_count += 1;
self.last_line = line_end;
}
}
}
#[cfg(test)]
mod tests {
use std::path::Path;
use grep::{Grep, GrepBuilder};
use term::Terminal;
use out::OutBuffer;
use printer::Printer;
use super::BufferSearcher;
lazy_static! {
static ref SHERLOCK: &'static str = "\
For the Doctor Watsons of this world, as opposed to the Sherlock
Holmeses, success in the province of detective work must always
be, to a very large extent, the result of luck. Sherlock Holmes
can extract a clew from a wisp of straw or a flake of cigar ash;
but Doctor Watson has to have it taken out for him and dusted,
and exhibited clearly, with a label attached.\
";
static ref CODE: &'static str = "\
extern crate snap;
use std::io;
fn main() {
let stdin = io::stdin();
let stdout = io::stdout();
// Wrap the stdin reader in a Snappy reader.
let mut rdr = snap::Reader::new(stdin.lock());
let mut wtr = stdout.lock();
io::copy(&mut rdr, &mut wtr).expect(\"I/O operation failed\");
}
";
}
fn matcher(pat: &str) -> Grep {
GrepBuilder::new(pat).build().unwrap()
}
fn test_path() -> &'static Path {
&Path::new("/baz.rs")
}
type TestSearcher<'a> = BufferSearcher<'a, OutBuffer>;
fn search<F: FnMut(TestSearcher) -> TestSearcher>(
pat: &str,
haystack: &str,
mut map: F,
) -> (u64, String) {
let outbuf = OutBuffer::NoColor(vec![]);
let mut pp = Printer::new(outbuf).with_filename(true);
let grep = GrepBuilder::new(pat).build().unwrap();
let count = {
let searcher = BufferSearcher::new(
&mut pp, &grep, test_path(), haystack.as_bytes());
map(searcher).run()
};
(count, String::from_utf8(pp.into_inner().into_inner()).unwrap())
}
#[test]
fn basic_search() {
let (count, out) = search("Sherlock", &*SHERLOCK, |s|s);
assert_eq!(2, count);
assert_eq!(out, "\
/baz.rs:For the Doctor Watsons of this world, as opposed to the Sherlock
/baz.rs:be, to a very large extent, the result of luck. Sherlock Holmes
");
}
#[test]
fn binary() {
let text = "Sherlock\n\x00Holmes\n";
let (count, out) = search("Sherlock|Holmes", text, |s|s);
assert_eq!(0, count);
assert_eq!(out, "");
}
#[test]
fn binary_text() {
let text = "Sherlock\n\x00Holmes\n";
let (count, out) = search("Sherlock|Holmes", text, |s| s.text(true));
assert_eq!(2, count);
assert_eq!(out, "/baz.rs:Sherlock\n/baz.rs:\x00Holmes\n");
}
#[test]
fn line_numbers() {
let (count, out) = search(
"Sherlock", &*SHERLOCK, |s| s.line_number(true));
assert_eq!(2, count);
assert_eq!(out, "\
/baz.rs:1:For the Doctor Watsons of this world, as opposed to the Sherlock
/baz.rs:3:be, to a very large extent, the result of luck. Sherlock Holmes
");
}
#[test]
fn count() {
let (count, out) = search(
"Sherlock", &*SHERLOCK, |s| s.count(true));
assert_eq!(2, count);
assert_eq!(out, "/baz.rs:2\n");
}
#[test]
fn invert_match() {
let (count, out) = search(
"Sherlock", &*SHERLOCK, |s| s.invert_match(true));
assert_eq!(4, count);
assert_eq!(out, "\
/baz.rs:Holmeses, success in the province of detective work must always
/baz.rs:can extract a clew from a wisp of straw or a flake of cigar ash;
/baz.rs:but Doctor Watson has to have it taken out for him and dusted,
/baz.rs:and exhibited clearly, with a label attached.
");
}
#[test]
fn invert_match_line_numbers() {
let (count, out) = search("Sherlock", &*SHERLOCK, |s| {
s.invert_match(true).line_number(true)
});
assert_eq!(4, count);
assert_eq!(out, "\
/baz.rs:2:Holmeses, success in the province of detective work must always
/baz.rs:4:can extract a clew from a wisp of straw or a flake of cigar ash;
/baz.rs:5:but Doctor Watson has to have it taken out for him and dusted,
/baz.rs:6:and exhibited clearly, with a label attached.
");
}
#[test]
fn invert_match_count() {
let (count, out) = search("Sherlock", &*SHERLOCK, |s| {
s.invert_match(true).count(true)
});
assert_eq!(4, count);
assert_eq!(out, "/baz.rs:4\n");
}
}

View File

@@ -1,19 +1,19 @@
/*!
This io module contains various platform specific functions for detecting
how xrep is being used. e.g., Is stdin being piped into it? Is stdout being
how ripgrep is being used. e.g., Is stdin being piped into it? Is stdout being
redirected to a file? etc... We use this information to tweak various default
configuration parameters such as colors and match formatting.
*/
use libc;
#[cfg(unix)]
pub fn stdin_is_atty() -> bool {
use libc;
0 < unsafe { libc::isatty(libc::STDIN_FILENO) }
}
#[cfg(unix)]
pub fn stdout_is_atty() -> bool {
use libc;
0 < unsafe { libc::isatty(libc::STDOUT_FILENO) }
}
@@ -37,6 +37,6 @@ pub fn stdout_is_atty() -> bool {
unsafe {
let fd = winapi::winbase::STD_OUTPUT_HANDLE;
let mut out = 0;
kernel32::GetConsoleMode(handle, &mut out) != 0
kernel32::GetConsoleMode(kernel32::GetStdHandle(fd), &mut out) != 0
}
}