mirror of
https://github.com/BurntSushi/ripgrep.git
synced 2025-08-17 05:03:50 -07:00
libripgrep: initial commit introducing libripgrep
libripgrep is not any one library, but rather, a collection of libraries that roughly separate the following key distinct phases in a grep implementation: 1. Pattern matching (e.g., by a regex engine). 2. Searching a file using a pattern matcher. 3. Printing results. Ultimately, both (1) and (3) are defined by de-coupled interfaces, of which there may be multiple implementations. Namely, (1) is satisfied by the `Matcher` trait in the `grep-matcher` crate and (3) is satisfied by the `Sink` trait in the `grep2` crate. The searcher (2) ties everything together and finds results using a matcher and reports those results using a `Sink` implementation.
This commit is contained in:
24
grep-matcher/Cargo.toml
Normal file
24
grep-matcher/Cargo.toml
Normal file
@@ -0,0 +1,24 @@
|
||||
[package]
|
||||
name = "grep-matcher"
|
||||
version = "0.0.1" #:version
|
||||
authors = ["Andrew Gallant <jamslam@gmail.com>"]
|
||||
description = """
|
||||
A trait for regular expressions, with a focus on line oriented search.
|
||||
"""
|
||||
documentation = "https://docs.rs/grep-matcher"
|
||||
homepage = "https://github.com/BurntSushi/ripgrep"
|
||||
repository = "https://github.com/BurntSushi/ripgrep"
|
||||
readme = "README.md"
|
||||
keywords = ["regex", "pattern", "trait"]
|
||||
license = "Unlicense/MIT"
|
||||
autotests = false
|
||||
|
||||
[dependencies]
|
||||
memchr = "2"
|
||||
|
||||
[dev-dependencies]
|
||||
regex = "1"
|
||||
|
||||
[[test]]
|
||||
name = "integration"
|
||||
path = "tests/tests.rs"
|
21
grep-matcher/LICENSE-MIT
Normal file
21
grep-matcher/LICENSE-MIT
Normal file
@@ -0,0 +1,21 @@
|
||||
The MIT License (MIT)
|
||||
|
||||
Copyright (c) 2015 Andrew Gallant
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
4
grep-matcher/README.md
Normal file
4
grep-matcher/README.md
Normal file
@@ -0,0 +1,4 @@
|
||||
grep
|
||||
----
|
||||
This is a *library* that provides grep-style line-by-line regex searching (with
|
||||
comparable performance to `grep` itself).
|
24
grep-matcher/UNLICENSE
Normal file
24
grep-matcher/UNLICENSE
Normal file
@@ -0,0 +1,24 @@
|
||||
This is free and unencumbered software released into the public domain.
|
||||
|
||||
Anyone is free to copy, modify, publish, use, compile, sell, or
|
||||
distribute this software, either in source code form or as a compiled
|
||||
binary, for any purpose, commercial or non-commercial, and by any
|
||||
means.
|
||||
|
||||
In jurisdictions that recognize copyright laws, the author or authors
|
||||
of this software dedicate any and all copyright interest in the
|
||||
software to the public domain. We make this dedication for the benefit
|
||||
of the public at large and to the detriment of our heirs and
|
||||
successors. We intend this dedication to be an overt act of
|
||||
relinquishment in perpetuity of all present and future rights to this
|
||||
software under copyright law.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
For more information, please refer to <http://unlicense.org/>
|
328
grep-matcher/src/interpolate.rs
Normal file
328
grep-matcher/src/interpolate.rs
Normal file
@@ -0,0 +1,328 @@
|
||||
use std::str;
|
||||
|
||||
use memchr::memchr;
|
||||
|
||||
/// Interpolate capture references in `replacement` and write the interpolation
|
||||
/// result to `dst`. References in `replacement` take the form of $N or $name,
|
||||
/// where `N` is a capture group index and `name` is a capture group name. The
|
||||
/// function provided, `name_to_index`, maps capture group names to indices.
|
||||
///
|
||||
/// The `append` function given is responsible for writing the replacement
|
||||
/// to the `dst` buffer. That is, it is called with the capture group index
|
||||
/// of a capture group reference and is expected to resolve the index to its
|
||||
/// corresponding matched text. If no such match exists, then `append` should
|
||||
/// not write anything to its given buffer.
|
||||
pub fn interpolate<A, N>(
|
||||
mut replacement: &[u8],
|
||||
mut append: A,
|
||||
mut name_to_index: N,
|
||||
dst: &mut Vec<u8>,
|
||||
) where
|
||||
A: FnMut(usize, &mut Vec<u8>),
|
||||
N: FnMut(&str) -> Option<usize>
|
||||
{
|
||||
while !replacement.is_empty() {
|
||||
match memchr(b'$', replacement) {
|
||||
None => break,
|
||||
Some(i) => {
|
||||
dst.extend(&replacement[..i]);
|
||||
replacement = &replacement[i..];
|
||||
}
|
||||
}
|
||||
if replacement.get(1).map_or(false, |&b| b == b'$') {
|
||||
dst.push(b'$');
|
||||
replacement = &replacement[2..];
|
||||
continue;
|
||||
}
|
||||
debug_assert!(!replacement.is_empty());
|
||||
let cap_ref = match find_cap_ref(replacement) {
|
||||
Some(cap_ref) => cap_ref,
|
||||
None => {
|
||||
dst.push(b'$');
|
||||
replacement = &replacement[1..];
|
||||
continue;
|
||||
}
|
||||
};
|
||||
replacement = &replacement[cap_ref.end..];
|
||||
match cap_ref.cap {
|
||||
Ref::Number(i) => append(i, dst),
|
||||
Ref::Named(name) => {
|
||||
if let Some(i) = name_to_index(name) {
|
||||
append(i, dst);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
dst.extend(replacement);
|
||||
}
|
||||
|
||||
/// `CaptureRef` represents a reference to a capture group inside some text.
|
||||
/// The reference is either a capture group name or a number.
|
||||
///
|
||||
/// It is also tagged with the position in the text immediately proceding the
|
||||
/// capture reference.
|
||||
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
|
||||
struct CaptureRef<'a> {
|
||||
cap: Ref<'a>,
|
||||
end: usize,
|
||||
}
|
||||
|
||||
/// A reference to a capture group in some text.
|
||||
///
|
||||
/// e.g., `$2`, `$foo`, `${foo}`.
|
||||
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
|
||||
enum Ref<'a> {
|
||||
Named(&'a str),
|
||||
Number(usize),
|
||||
}
|
||||
|
||||
impl<'a> From<&'a str> for Ref<'a> {
|
||||
fn from(x: &'a str) -> Ref<'a> {
|
||||
Ref::Named(x)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<usize> for Ref<'static> {
|
||||
fn from(x: usize) -> Ref<'static> {
|
||||
Ref::Number(x)
|
||||
}
|
||||
}
|
||||
|
||||
/// Parses a possible reference to a capture group name in the given text,
|
||||
/// starting at the beginning of `replacement`.
|
||||
///
|
||||
/// If no such valid reference could be found, None is returned.
|
||||
fn find_cap_ref(replacement: &[u8]) -> Option<CaptureRef> {
|
||||
let mut i = 0;
|
||||
if replacement.len() <= 1 || replacement[0] != b'$' {
|
||||
return None;
|
||||
}
|
||||
let mut brace = false;
|
||||
i += 1;
|
||||
if replacement[i] == b'{' {
|
||||
brace = true;
|
||||
i += 1;
|
||||
}
|
||||
let mut cap_end = i;
|
||||
while replacement.get(cap_end).map_or(false, is_valid_cap_letter) {
|
||||
cap_end += 1;
|
||||
}
|
||||
if cap_end == i {
|
||||
return None;
|
||||
}
|
||||
// We just verified that the range 0..cap_end is valid ASCII, so it must
|
||||
// therefore be valid UTF-8. If we really cared, we could avoid this UTF-8
|
||||
// check with an unchecked conversion or by parsing the number straight
|
||||
// from &[u8].
|
||||
let cap = str::from_utf8(&replacement[i..cap_end])
|
||||
.expect("valid UTF-8 capture name");
|
||||
if brace {
|
||||
if !replacement.get(cap_end).map_or(false, |&b| b == b'}') {
|
||||
return None;
|
||||
}
|
||||
cap_end += 1;
|
||||
}
|
||||
Some(CaptureRef {
|
||||
cap: match cap.parse::<u32>() {
|
||||
Ok(i) => Ref::Number(i as usize),
|
||||
Err(_) => Ref::Named(cap),
|
||||
},
|
||||
end: cap_end,
|
||||
})
|
||||
}
|
||||
|
||||
/// Returns true if and only if the given byte is allowed in a capture name.
|
||||
fn is_valid_cap_letter(b: &u8) -> bool {
|
||||
match *b {
|
||||
b'0' ... b'9' | b'a' ... b'z' | b'A' ... b'Z' | b'_' => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::{CaptureRef, find_cap_ref, interpolate};
|
||||
|
||||
macro_rules! find {
|
||||
($name:ident, $text:expr) => {
|
||||
#[test]
|
||||
fn $name() {
|
||||
assert_eq!(None, find_cap_ref($text.as_bytes()));
|
||||
}
|
||||
};
|
||||
($name:ident, $text:expr, $capref:expr) => {
|
||||
#[test]
|
||||
fn $name() {
|
||||
assert_eq!(Some($capref), find_cap_ref($text.as_bytes()));
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! c {
|
||||
($name_or_number:expr, $pos:expr) => {
|
||||
CaptureRef { cap: $name_or_number.into(), end: $pos }
|
||||
};
|
||||
}
|
||||
|
||||
find!(find_cap_ref1, "$foo", c!("foo", 4));
|
||||
find!(find_cap_ref2, "${foo}", c!("foo", 6));
|
||||
find!(find_cap_ref3, "$0", c!(0, 2));
|
||||
find!(find_cap_ref4, "$5", c!(5, 2));
|
||||
find!(find_cap_ref5, "$10", c!(10, 3));
|
||||
find!(find_cap_ref6, "$42a", c!("42a", 4));
|
||||
find!(find_cap_ref7, "${42}a", c!(42, 5));
|
||||
find!(find_cap_ref8, "${42");
|
||||
find!(find_cap_ref9, "${42 ");
|
||||
find!(find_cap_ref10, " $0 ");
|
||||
find!(find_cap_ref11, "$");
|
||||
find!(find_cap_ref12, " ");
|
||||
find!(find_cap_ref13, "");
|
||||
|
||||
// A convenience routine for using interpolate's unwieldy but flexible API.
|
||||
fn interpolate_string(
|
||||
mut name_to_index: Vec<(&'static str, usize)>,
|
||||
caps: Vec<&'static str>,
|
||||
replacement: &str,
|
||||
) -> String {
|
||||
name_to_index.sort_by_key(|x| x.0);
|
||||
|
||||
let mut dst = vec![];
|
||||
interpolate(
|
||||
replacement.as_bytes(),
|
||||
|i, dst| {
|
||||
if let Some(&s) = caps.get(i) {
|
||||
dst.extend(s.as_bytes());
|
||||
}
|
||||
},
|
||||
|name| -> Option<usize> {
|
||||
name_to_index
|
||||
.binary_search_by_key(&name, |x| x.0)
|
||||
.ok()
|
||||
.map(|i| name_to_index[i].1)
|
||||
},
|
||||
&mut dst,
|
||||
);
|
||||
String::from_utf8(dst).unwrap()
|
||||
}
|
||||
|
||||
macro_rules! interp {
|
||||
($name:ident, $map:expr, $caps:expr, $hay:expr, $expected:expr $(,)*) => {
|
||||
#[test]
|
||||
fn $name() {
|
||||
assert_eq!($expected, interpolate_string($map, $caps, $hay));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
interp!(
|
||||
interp1,
|
||||
vec![("foo", 2)],
|
||||
vec!["", "", "xxx"],
|
||||
"test $foo test",
|
||||
"test xxx test",
|
||||
);
|
||||
|
||||
interp!(
|
||||
interp2,
|
||||
vec![("foo", 2)],
|
||||
vec!["", "", "xxx"],
|
||||
"test$footest",
|
||||
"test",
|
||||
);
|
||||
|
||||
interp!(
|
||||
interp3,
|
||||
vec![("foo", 2)],
|
||||
vec!["", "", "xxx"],
|
||||
"test${foo}test",
|
||||
"testxxxtest",
|
||||
);
|
||||
|
||||
interp!(
|
||||
interp4,
|
||||
vec![("foo", 2)],
|
||||
vec!["", "", "xxx"],
|
||||
"test$2test",
|
||||
"test",
|
||||
);
|
||||
|
||||
interp!(
|
||||
interp5,
|
||||
vec![("foo", 2)],
|
||||
vec!["", "", "xxx"],
|
||||
"test${2}test",
|
||||
"testxxxtest",
|
||||
);
|
||||
|
||||
interp!(
|
||||
interp6,
|
||||
vec![("foo", 2)],
|
||||
vec!["", "", "xxx"],
|
||||
"test $$foo test",
|
||||
"test $foo test",
|
||||
);
|
||||
|
||||
interp!(
|
||||
interp7,
|
||||
vec![("foo", 2)],
|
||||
vec!["", "", "xxx"],
|
||||
"test $foo",
|
||||
"test xxx",
|
||||
);
|
||||
|
||||
interp!(
|
||||
interp8,
|
||||
vec![("foo", 2)],
|
||||
vec!["", "", "xxx"],
|
||||
"$foo test",
|
||||
"xxx test",
|
||||
);
|
||||
|
||||
interp!(
|
||||
interp9,
|
||||
vec![("bar", 1), ("foo", 2)],
|
||||
vec!["", "yyy", "xxx"],
|
||||
"test $bar$foo",
|
||||
"test yyyxxx",
|
||||
);
|
||||
|
||||
interp!(
|
||||
interp10,
|
||||
vec![("bar", 1), ("foo", 2)],
|
||||
vec!["", "yyy", "xxx"],
|
||||
"test $ test",
|
||||
"test $ test",
|
||||
);
|
||||
|
||||
interp!(
|
||||
interp11,
|
||||
vec![("bar", 1), ("foo", 2)],
|
||||
vec!["", "yyy", "xxx"],
|
||||
"test ${} test",
|
||||
"test ${} test",
|
||||
);
|
||||
|
||||
interp!(
|
||||
interp12,
|
||||
vec![("bar", 1), ("foo", 2)],
|
||||
vec!["", "yyy", "xxx"],
|
||||
"test ${ } test",
|
||||
"test ${ } test",
|
||||
);
|
||||
|
||||
interp!(
|
||||
interp13,
|
||||
vec![("bar", 1), ("foo", 2)],
|
||||
vec!["", "yyy", "xxx"],
|
||||
"test ${a b} test",
|
||||
"test ${a b} test",
|
||||
);
|
||||
|
||||
interp!(
|
||||
interp14,
|
||||
vec![("bar", 1), ("foo", 2)],
|
||||
vec!["", "yyy", "xxx"],
|
||||
"test ${a} test",
|
||||
"test test",
|
||||
);
|
||||
}
|
1072
grep-matcher/src/lib.rs
Normal file
1072
grep-matcher/src/lib.rs
Normal file
File diff suppressed because it is too large
Load Diff
208
grep-matcher/tests/test_matcher.rs
Normal file
208
grep-matcher/tests/test_matcher.rs
Normal file
@@ -0,0 +1,208 @@
|
||||
use grep_matcher::{Captures, Match, Matcher};
|
||||
use regex::bytes::Regex;
|
||||
|
||||
use util::{RegexMatcher, RegexMatcherNoCaps};
|
||||
|
||||
fn matcher(pattern: &str) -> RegexMatcher {
|
||||
RegexMatcher::new(Regex::new(pattern).unwrap())
|
||||
}
|
||||
|
||||
fn matcher_no_caps(pattern: &str) -> RegexMatcherNoCaps {
|
||||
RegexMatcherNoCaps(Regex::new(pattern).unwrap())
|
||||
}
|
||||
|
||||
fn m(start: usize, end: usize) -> Match {
|
||||
Match::new(start, end)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn find() {
|
||||
let matcher = matcher(r"(\w+)\s+(\w+)");
|
||||
assert_eq!(matcher.find(b" homer simpson ").unwrap(), Some(m(1, 14)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn find_iter() {
|
||||
let matcher = matcher(r"(\w+)\s+(\w+)");
|
||||
let mut matches = vec![];
|
||||
matcher.find_iter(b"aa bb cc dd", |m| {
|
||||
matches.push(m);
|
||||
true
|
||||
}).unwrap();
|
||||
assert_eq!(matches, vec![m(0, 5), m(6, 11)]);
|
||||
|
||||
// Test that find_iter respects short circuiting.
|
||||
matches.clear();
|
||||
matcher.find_iter(b"aa bb cc dd", |m| {
|
||||
matches.push(m);
|
||||
false
|
||||
}).unwrap();
|
||||
assert_eq!(matches, vec![m(0, 5)]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn try_find_iter() {
|
||||
#[derive(Clone, Debug, Eq, PartialEq)]
|
||||
struct MyError;
|
||||
|
||||
let matcher = matcher(r"(\w+)\s+(\w+)");
|
||||
let mut matches = vec![];
|
||||
let err = matcher.try_find_iter(b"aa bb cc dd", |m| {
|
||||
if matches.is_empty() {
|
||||
matches.push(m);
|
||||
Ok(true)
|
||||
} else {
|
||||
Err(MyError)
|
||||
}
|
||||
}).unwrap().unwrap_err();
|
||||
assert_eq!(matches, vec![m(0, 5)]);
|
||||
assert_eq!(err, MyError);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn shortest_match() {
|
||||
let matcher = matcher(r"a+");
|
||||
// This tests that the default impl isn't doing anything smart, and simply
|
||||
// defers to `find`.
|
||||
assert_eq!(matcher.shortest_match(b"aaa").unwrap(), Some(3));
|
||||
// The actual underlying regex is smarter.
|
||||
assert_eq!(matcher.re.shortest_match(b"aaa"), Some(1));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn captures() {
|
||||
let matcher = matcher(r"(?P<a>\w+)\s+(?P<b>\w+)");
|
||||
assert_eq!(matcher.capture_count(), 3);
|
||||
assert_eq!(matcher.capture_index("a"), Some(1));
|
||||
assert_eq!(matcher.capture_index("b"), Some(2));
|
||||
assert_eq!(matcher.capture_index("nada"), None);
|
||||
|
||||
let mut caps = matcher.new_captures().unwrap();
|
||||
assert!(matcher.captures(b" homer simpson ", &mut caps).unwrap());
|
||||
assert_eq!(caps.get(0), Some(m(1, 14)));
|
||||
assert_eq!(caps.get(1), Some(m(1, 6)));
|
||||
assert_eq!(caps.get(2), Some(m(7, 14)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn captures_iter() {
|
||||
let matcher = matcher(r"(?P<a>\w+)\s+(?P<b>\w+)");
|
||||
let mut caps = matcher.new_captures().unwrap();
|
||||
let mut matches = vec![];
|
||||
matcher.captures_iter(b"aa bb cc dd", &mut caps, |caps| {
|
||||
matches.push(caps.get(0).unwrap());
|
||||
matches.push(caps.get(1).unwrap());
|
||||
matches.push(caps.get(2).unwrap());
|
||||
true
|
||||
}).unwrap();
|
||||
assert_eq!(matches, vec![
|
||||
m(0, 5), m(0, 2), m(3, 5),
|
||||
m(6, 11), m(6, 8), m(9, 11),
|
||||
]);
|
||||
|
||||
// Test that captures_iter respects short circuiting.
|
||||
matches.clear();
|
||||
matcher.captures_iter(b"aa bb cc dd", &mut caps, |caps| {
|
||||
matches.push(caps.get(0).unwrap());
|
||||
matches.push(caps.get(1).unwrap());
|
||||
matches.push(caps.get(2).unwrap());
|
||||
false
|
||||
}).unwrap();
|
||||
assert_eq!(matches, vec![
|
||||
m(0, 5), m(0, 2), m(3, 5),
|
||||
]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn try_captures_iter() {
|
||||
#[derive(Clone, Debug, Eq, PartialEq)]
|
||||
struct MyError;
|
||||
|
||||
let matcher = matcher(r"(?P<a>\w+)\s+(?P<b>\w+)");
|
||||
let mut caps = matcher.new_captures().unwrap();
|
||||
let mut matches = vec![];
|
||||
let err = matcher.try_captures_iter(b"aa bb cc dd", &mut caps, |caps| {
|
||||
if matches.is_empty() {
|
||||
matches.push(caps.get(0).unwrap());
|
||||
matches.push(caps.get(1).unwrap());
|
||||
matches.push(caps.get(2).unwrap());
|
||||
Ok(true)
|
||||
} else {
|
||||
Err(MyError)
|
||||
}
|
||||
}).unwrap().unwrap_err();
|
||||
assert_eq!(matches, vec![m(0, 5), m(0, 2), m(3, 5)]);
|
||||
assert_eq!(err, MyError);
|
||||
}
|
||||
|
||||
// Test that our default impls for capturing are correct. Namely, when
|
||||
// capturing isn't supported by the underlying matcher, then all of the
|
||||
// various capturing related APIs fail fast.
|
||||
#[test]
|
||||
fn no_captures() {
|
||||
let matcher = matcher_no_caps(r"(?P<a>\w+)\s+(?P<b>\w+)");
|
||||
assert_eq!(matcher.capture_count(), 0);
|
||||
assert_eq!(matcher.capture_index("a"), None);
|
||||
assert_eq!(matcher.capture_index("b"), None);
|
||||
assert_eq!(matcher.capture_index("nada"), None);
|
||||
|
||||
let mut caps = matcher.new_captures().unwrap();
|
||||
assert!(!matcher.captures(b"homer simpson", &mut caps).unwrap());
|
||||
|
||||
let mut called = false;
|
||||
matcher.captures_iter(b"homer simpson", &mut caps, |_| {
|
||||
called = true;
|
||||
true
|
||||
}).unwrap();
|
||||
assert!(!called);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn replace() {
|
||||
let matcher = matcher(r"(\w+)\s+(\w+)");
|
||||
let mut dst = vec![];
|
||||
matcher.replace(b"aa bb cc dd", &mut dst, |_, dst| {
|
||||
dst.push(b'z');
|
||||
true
|
||||
}).unwrap();
|
||||
assert_eq!(dst, b"z z");
|
||||
|
||||
// Test that replacements respect short circuiting.
|
||||
dst.clear();
|
||||
matcher.replace(b"aa bb cc dd", &mut dst, |_, dst| {
|
||||
dst.push(b'z');
|
||||
false
|
||||
}).unwrap();
|
||||
assert_eq!(dst, b"z cc dd");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn replace_with_captures() {
|
||||
let matcher = matcher(r"(\w+)\s+(\w+)");
|
||||
let haystack = b"aa bb cc dd";
|
||||
let mut caps = matcher.new_captures().unwrap();
|
||||
let mut dst = vec![];
|
||||
matcher.replace_with_captures(haystack, &mut caps, &mut dst, |caps, dst| {
|
||||
caps.interpolate(
|
||||
|name| matcher.capture_index(name),
|
||||
haystack,
|
||||
b"$2 $1",
|
||||
dst,
|
||||
);
|
||||
true
|
||||
}).unwrap();
|
||||
assert_eq!(dst, b"bb aa dd cc");
|
||||
|
||||
// Test that replacements respect short circuiting.
|
||||
dst.clear();
|
||||
matcher.replace_with_captures(haystack, &mut caps, &mut dst, |caps, dst| {
|
||||
caps.interpolate(
|
||||
|name| matcher.capture_index(name),
|
||||
haystack,
|
||||
b"$2 $1",
|
||||
dst,
|
||||
);
|
||||
false
|
||||
}).unwrap();
|
||||
assert_eq!(dst, b"bb aa cc dd");
|
||||
}
|
6
grep-matcher/tests/tests.rs
Normal file
6
grep-matcher/tests/tests.rs
Normal file
@@ -0,0 +1,6 @@
|
||||
extern crate grep_matcher;
|
||||
extern crate regex;
|
||||
|
||||
mod util;
|
||||
|
||||
mod test_matcher;
|
104
grep-matcher/tests/util.rs
Normal file
104
grep-matcher/tests/util.rs
Normal file
@@ -0,0 +1,104 @@
|
||||
use std::collections::HashMap;
|
||||
use std::result;
|
||||
|
||||
use grep_matcher::{Captures, Match, Matcher, NoCaptures, NoError};
|
||||
use regex::bytes::{CaptureLocations, Regex};
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct RegexMatcher {
|
||||
pub re: Regex,
|
||||
pub names: HashMap<String, usize>,
|
||||
}
|
||||
|
||||
impl RegexMatcher {
|
||||
pub fn new(re: Regex) -> RegexMatcher {
|
||||
let mut names = HashMap::new();
|
||||
for (i, optional_name) in re.capture_names().enumerate() {
|
||||
if let Some(name) = optional_name {
|
||||
names.insert(name.to_string(), i);
|
||||
}
|
||||
}
|
||||
RegexMatcher {
|
||||
re: re,
|
||||
names: names,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type Result<T> = result::Result<T, NoError>;
|
||||
|
||||
impl Matcher for RegexMatcher {
|
||||
type Captures = RegexCaptures;
|
||||
type Error = NoError;
|
||||
|
||||
fn find_at(
|
||||
&self,
|
||||
haystack: &[u8],
|
||||
at: usize,
|
||||
) -> Result<Option<Match>> {
|
||||
Ok(self.re
|
||||
.find_at(haystack, at)
|
||||
.map(|m| Match::new(m.start(), m.end())))
|
||||
}
|
||||
|
||||
fn new_captures(&self) -> Result<RegexCaptures> {
|
||||
Ok(RegexCaptures(self.re.capture_locations()))
|
||||
}
|
||||
|
||||
fn captures_at(
|
||||
&self,
|
||||
haystack: &[u8],
|
||||
at: usize,
|
||||
caps: &mut RegexCaptures,
|
||||
) -> Result<bool> {
|
||||
Ok(self.re.captures_read_at(&mut caps.0, haystack, at).is_some())
|
||||
}
|
||||
|
||||
fn capture_count(&self) -> usize {
|
||||
self.re.captures_len()
|
||||
}
|
||||
|
||||
fn capture_index(&self, name: &str) -> Option<usize> {
|
||||
self.names.get(name).map(|i| *i)
|
||||
}
|
||||
|
||||
// We purposely don't implement any other methods, so that we test the
|
||||
// default impls. The "real" Regex impl for Matcher provides a few more
|
||||
// impls. e.g., Its `find_iter` impl is faster than what we can do here,
|
||||
// since the regex crate avoids synchronization overhead.
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct RegexMatcherNoCaps(pub Regex);
|
||||
|
||||
impl Matcher for RegexMatcherNoCaps {
|
||||
type Captures = NoCaptures;
|
||||
type Error = NoError;
|
||||
|
||||
fn find_at(
|
||||
&self,
|
||||
haystack: &[u8],
|
||||
at: usize,
|
||||
) -> Result<Option<Match>> {
|
||||
Ok(self.0
|
||||
.find_at(haystack, at)
|
||||
.map(|m| Match::new(m.start(), m.end())))
|
||||
}
|
||||
|
||||
fn new_captures(&self) -> Result<NoCaptures> {
|
||||
Ok(NoCaptures::new())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct RegexCaptures(CaptureLocations);
|
||||
|
||||
impl Captures for RegexCaptures {
|
||||
fn len(&self) -> usize {
|
||||
self.0.len()
|
||||
}
|
||||
|
||||
fn get(&self, i: usize) -> Option<Match> {
|
||||
self.0.pos(i).map(|(s, e)| Match::new(s, e))
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user