mirror of
https://github.com/BurntSushi/ripgrep.git
synced 2025-07-26 01:31:57 -07:00
repo: move all source code in crates directory
The top-level listing was just getting a bit too long for my taste. So put all of the code in one directory and shrink the large top-level mess to a small top-level mess. NOTE: This commit only contains renames. The subsequent commit will actually make ripgrep build again. We do it this way with the naive hope that this will make it easier for git history to track the renames. Sigh.
This commit is contained in:
24
crates/matcher/Cargo.toml
Normal file
24
crates/matcher/Cargo.toml
Normal file
@@ -0,0 +1,24 @@
|
||||
[package]
|
||||
name = "grep-matcher"
|
||||
version = "0.1.3" #:version
|
||||
authors = ["Andrew Gallant <jamslam@gmail.com>"]
|
||||
description = """
|
||||
A trait for regular expressions, with a focus on line oriented search.
|
||||
"""
|
||||
documentation = "https://docs.rs/grep-matcher"
|
||||
homepage = "https://github.com/BurntSushi/ripgrep"
|
||||
repository = "https://github.com/BurntSushi/ripgrep"
|
||||
readme = "README.md"
|
||||
keywords = ["regex", "pattern", "trait"]
|
||||
license = "Unlicense/MIT"
|
||||
autotests = false
|
||||
|
||||
[dependencies]
|
||||
memchr = "2.1"
|
||||
|
||||
[dev-dependencies]
|
||||
regex = "1.1"
|
||||
|
||||
[[test]]
|
||||
name = "integration"
|
||||
path = "tests/tests.rs"
|
21
crates/matcher/LICENSE-MIT
Normal file
21
crates/matcher/LICENSE-MIT
Normal file
@@ -0,0 +1,21 @@
|
||||
The MIT License (MIT)
|
||||
|
||||
Copyright (c) 2015 Andrew Gallant
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
36
crates/matcher/README.md
Normal file
36
crates/matcher/README.md
Normal file
@@ -0,0 +1,36 @@
|
||||
grep-matcher
|
||||
------------
|
||||
This crate provides a low level interface for describing regular expression
|
||||
matchers. The `grep` crate uses this interface in order to make the regex
|
||||
engine it uses pluggable.
|
||||
|
||||
[](https://travis-ci.org/BurntSushi/ripgrep)
|
||||
[](https://ci.appveyor.com/project/BurntSushi/ripgrep)
|
||||
[](https://crates.io/crates/grep-matcher)
|
||||
|
||||
Dual-licensed under MIT or the [UNLICENSE](http://unlicense.org).
|
||||
|
||||
### Documentation
|
||||
|
||||
[https://docs.rs/grep-matcher](https://docs.rs/grep-matcher)
|
||||
|
||||
**NOTE:** You probably don't want to use this crate directly. Instead, you
|
||||
should prefer the facade defined in the
|
||||
[`grep`](https://docs.rs/grep)
|
||||
crate.
|
||||
|
||||
|
||||
### Usage
|
||||
|
||||
Add this to your `Cargo.toml`:
|
||||
|
||||
```toml
|
||||
[dependencies]
|
||||
grep-matcher = "0.1"
|
||||
```
|
||||
|
||||
and this to your crate root:
|
||||
|
||||
```rust
|
||||
extern crate grep_matcher;
|
||||
```
|
24
crates/matcher/UNLICENSE
Normal file
24
crates/matcher/UNLICENSE
Normal file
@@ -0,0 +1,24 @@
|
||||
This is free and unencumbered software released into the public domain.
|
||||
|
||||
Anyone is free to copy, modify, publish, use, compile, sell, or
|
||||
distribute this software, either in source code form or as a compiled
|
||||
binary, for any purpose, commercial or non-commercial, and by any
|
||||
means.
|
||||
|
||||
In jurisdictions that recognize copyright laws, the author or authors
|
||||
of this software dedicate any and all copyright interest in the
|
||||
software to the public domain. We make this dedication for the benefit
|
||||
of the public at large and to the detriment of our heirs and
|
||||
successors. We intend this dedication to be an overt act of
|
||||
relinquishment in perpetuity of all present and future rights to this
|
||||
software under copyright law.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
For more information, please refer to <http://unlicense.org/>
|
328
crates/matcher/src/interpolate.rs
Normal file
328
crates/matcher/src/interpolate.rs
Normal file
@@ -0,0 +1,328 @@
|
||||
use std::str;
|
||||
|
||||
use memchr::memchr;
|
||||
|
||||
/// Interpolate capture references in `replacement` and write the interpolation
|
||||
/// result to `dst`. References in `replacement` take the form of $N or $name,
|
||||
/// where `N` is a capture group index and `name` is a capture group name. The
|
||||
/// function provided, `name_to_index`, maps capture group names to indices.
|
||||
///
|
||||
/// The `append` function given is responsible for writing the replacement
|
||||
/// to the `dst` buffer. That is, it is called with the capture group index
|
||||
/// of a capture group reference and is expected to resolve the index to its
|
||||
/// corresponding matched text. If no such match exists, then `append` should
|
||||
/// not write anything to its given buffer.
|
||||
pub fn interpolate<A, N>(
|
||||
mut replacement: &[u8],
|
||||
mut append: A,
|
||||
mut name_to_index: N,
|
||||
dst: &mut Vec<u8>,
|
||||
) where
|
||||
A: FnMut(usize, &mut Vec<u8>),
|
||||
N: FnMut(&str) -> Option<usize>,
|
||||
{
|
||||
while !replacement.is_empty() {
|
||||
match memchr(b'$', replacement) {
|
||||
None => break,
|
||||
Some(i) => {
|
||||
dst.extend(&replacement[..i]);
|
||||
replacement = &replacement[i..];
|
||||
}
|
||||
}
|
||||
if replacement.get(1).map_or(false, |&b| b == b'$') {
|
||||
dst.push(b'$');
|
||||
replacement = &replacement[2..];
|
||||
continue;
|
||||
}
|
||||
debug_assert!(!replacement.is_empty());
|
||||
let cap_ref = match find_cap_ref(replacement) {
|
||||
Some(cap_ref) => cap_ref,
|
||||
None => {
|
||||
dst.push(b'$');
|
||||
replacement = &replacement[1..];
|
||||
continue;
|
||||
}
|
||||
};
|
||||
replacement = &replacement[cap_ref.end..];
|
||||
match cap_ref.cap {
|
||||
Ref::Number(i) => append(i, dst),
|
||||
Ref::Named(name) => {
|
||||
if let Some(i) = name_to_index(name) {
|
||||
append(i, dst);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
dst.extend(replacement);
|
||||
}
|
||||
|
||||
/// `CaptureRef` represents a reference to a capture group inside some text.
|
||||
/// The reference is either a capture group name or a number.
|
||||
///
|
||||
/// It is also tagged with the position in the text immediately proceding the
|
||||
/// capture reference.
|
||||
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
|
||||
struct CaptureRef<'a> {
|
||||
cap: Ref<'a>,
|
||||
end: usize,
|
||||
}
|
||||
|
||||
/// A reference to a capture group in some text.
|
||||
///
|
||||
/// e.g., `$2`, `$foo`, `${foo}`.
|
||||
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
|
||||
enum Ref<'a> {
|
||||
Named(&'a str),
|
||||
Number(usize),
|
||||
}
|
||||
|
||||
impl<'a> From<&'a str> for Ref<'a> {
|
||||
fn from(x: &'a str) -> Ref<'a> {
|
||||
Ref::Named(x)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<usize> for Ref<'static> {
|
||||
fn from(x: usize) -> Ref<'static> {
|
||||
Ref::Number(x)
|
||||
}
|
||||
}
|
||||
|
||||
/// Parses a possible reference to a capture group name in the given text,
|
||||
/// starting at the beginning of `replacement`.
|
||||
///
|
||||
/// If no such valid reference could be found, None is returned.
|
||||
fn find_cap_ref(replacement: &[u8]) -> Option<CaptureRef> {
|
||||
let mut i = 0;
|
||||
if replacement.len() <= 1 || replacement[0] != b'$' {
|
||||
return None;
|
||||
}
|
||||
let mut brace = false;
|
||||
i += 1;
|
||||
if replacement[i] == b'{' {
|
||||
brace = true;
|
||||
i += 1;
|
||||
}
|
||||
let mut cap_end = i;
|
||||
while replacement.get(cap_end).map_or(false, is_valid_cap_letter) {
|
||||
cap_end += 1;
|
||||
}
|
||||
if cap_end == i {
|
||||
return None;
|
||||
}
|
||||
// We just verified that the range 0..cap_end is valid ASCII, so it must
|
||||
// therefore be valid UTF-8. If we really cared, we could avoid this UTF-8
|
||||
// check with an unchecked conversion or by parsing the number straight
|
||||
// from &[u8].
|
||||
let cap = str::from_utf8(&replacement[i..cap_end])
|
||||
.expect("valid UTF-8 capture name");
|
||||
if brace {
|
||||
if !replacement.get(cap_end).map_or(false, |&b| b == b'}') {
|
||||
return None;
|
||||
}
|
||||
cap_end += 1;
|
||||
}
|
||||
Some(CaptureRef {
|
||||
cap: match cap.parse::<u32>() {
|
||||
Ok(i) => Ref::Number(i as usize),
|
||||
Err(_) => Ref::Named(cap),
|
||||
},
|
||||
end: cap_end,
|
||||
})
|
||||
}
|
||||
|
||||
/// Returns true if and only if the given byte is allowed in a capture name.
|
||||
fn is_valid_cap_letter(b: &u8) -> bool {
|
||||
match *b {
|
||||
b'0'..=b'9' | b'a'..=b'z' | b'A'..=b'Z' | b'_' => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::{find_cap_ref, interpolate, CaptureRef};
|
||||
|
||||
macro_rules! find {
|
||||
($name:ident, $text:expr) => {
|
||||
#[test]
|
||||
fn $name() {
|
||||
assert_eq!(None, find_cap_ref($text.as_bytes()));
|
||||
}
|
||||
};
|
||||
($name:ident, $text:expr, $capref:expr) => {
|
||||
#[test]
|
||||
fn $name() {
|
||||
assert_eq!(Some($capref), find_cap_ref($text.as_bytes()));
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! c {
|
||||
($name_or_number:expr, $pos:expr) => {
|
||||
CaptureRef { cap: $name_or_number.into(), end: $pos }
|
||||
};
|
||||
}
|
||||
|
||||
find!(find_cap_ref1, "$foo", c!("foo", 4));
|
||||
find!(find_cap_ref2, "${foo}", c!("foo", 6));
|
||||
find!(find_cap_ref3, "$0", c!(0, 2));
|
||||
find!(find_cap_ref4, "$5", c!(5, 2));
|
||||
find!(find_cap_ref5, "$10", c!(10, 3));
|
||||
find!(find_cap_ref6, "$42a", c!("42a", 4));
|
||||
find!(find_cap_ref7, "${42}a", c!(42, 5));
|
||||
find!(find_cap_ref8, "${42");
|
||||
find!(find_cap_ref9, "${42 ");
|
||||
find!(find_cap_ref10, " $0 ");
|
||||
find!(find_cap_ref11, "$");
|
||||
find!(find_cap_ref12, " ");
|
||||
find!(find_cap_ref13, "");
|
||||
|
||||
// A convenience routine for using interpolate's unwieldy but flexible API.
|
||||
fn interpolate_string(
|
||||
mut name_to_index: Vec<(&'static str, usize)>,
|
||||
caps: Vec<&'static str>,
|
||||
replacement: &str,
|
||||
) -> String {
|
||||
name_to_index.sort_by_key(|x| x.0);
|
||||
|
||||
let mut dst = vec![];
|
||||
interpolate(
|
||||
replacement.as_bytes(),
|
||||
|i, dst| {
|
||||
if let Some(&s) = caps.get(i) {
|
||||
dst.extend(s.as_bytes());
|
||||
}
|
||||
},
|
||||
|name| -> Option<usize> {
|
||||
name_to_index
|
||||
.binary_search_by_key(&name, |x| x.0)
|
||||
.ok()
|
||||
.map(|i| name_to_index[i].1)
|
||||
},
|
||||
&mut dst,
|
||||
);
|
||||
String::from_utf8(dst).unwrap()
|
||||
}
|
||||
|
||||
macro_rules! interp {
|
||||
($name:ident, $map:expr, $caps:expr, $hay:expr, $expected:expr $(,)*) => {
|
||||
#[test]
|
||||
fn $name() {
|
||||
assert_eq!($expected, interpolate_string($map, $caps, $hay));
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
interp!(
|
||||
interp1,
|
||||
vec![("foo", 2)],
|
||||
vec!["", "", "xxx"],
|
||||
"test $foo test",
|
||||
"test xxx test",
|
||||
);
|
||||
|
||||
interp!(
|
||||
interp2,
|
||||
vec![("foo", 2)],
|
||||
vec!["", "", "xxx"],
|
||||
"test$footest",
|
||||
"test",
|
||||
);
|
||||
|
||||
interp!(
|
||||
interp3,
|
||||
vec![("foo", 2)],
|
||||
vec!["", "", "xxx"],
|
||||
"test${foo}test",
|
||||
"testxxxtest",
|
||||
);
|
||||
|
||||
interp!(
|
||||
interp4,
|
||||
vec![("foo", 2)],
|
||||
vec!["", "", "xxx"],
|
||||
"test$2test",
|
||||
"test",
|
||||
);
|
||||
|
||||
interp!(
|
||||
interp5,
|
||||
vec![("foo", 2)],
|
||||
vec!["", "", "xxx"],
|
||||
"test${2}test",
|
||||
"testxxxtest",
|
||||
);
|
||||
|
||||
interp!(
|
||||
interp6,
|
||||
vec![("foo", 2)],
|
||||
vec!["", "", "xxx"],
|
||||
"test $$foo test",
|
||||
"test $foo test",
|
||||
);
|
||||
|
||||
interp!(
|
||||
interp7,
|
||||
vec![("foo", 2)],
|
||||
vec!["", "", "xxx"],
|
||||
"test $foo",
|
||||
"test xxx",
|
||||
);
|
||||
|
||||
interp!(
|
||||
interp8,
|
||||
vec![("foo", 2)],
|
||||
vec!["", "", "xxx"],
|
||||
"$foo test",
|
||||
"xxx test",
|
||||
);
|
||||
|
||||
interp!(
|
||||
interp9,
|
||||
vec![("bar", 1), ("foo", 2)],
|
||||
vec!["", "yyy", "xxx"],
|
||||
"test $bar$foo",
|
||||
"test yyyxxx",
|
||||
);
|
||||
|
||||
interp!(
|
||||
interp10,
|
||||
vec![("bar", 1), ("foo", 2)],
|
||||
vec!["", "yyy", "xxx"],
|
||||
"test $ test",
|
||||
"test $ test",
|
||||
);
|
||||
|
||||
interp!(
|
||||
interp11,
|
||||
vec![("bar", 1), ("foo", 2)],
|
||||
vec!["", "yyy", "xxx"],
|
||||
"test ${} test",
|
||||
"test ${} test",
|
||||
);
|
||||
|
||||
interp!(
|
||||
interp12,
|
||||
vec![("bar", 1), ("foo", 2)],
|
||||
vec!["", "yyy", "xxx"],
|
||||
"test ${ } test",
|
||||
"test ${ } test",
|
||||
);
|
||||
|
||||
interp!(
|
||||
interp13,
|
||||
vec![("bar", 1), ("foo", 2)],
|
||||
vec!["", "yyy", "xxx"],
|
||||
"test ${a b} test",
|
||||
"test ${a b} test",
|
||||
);
|
||||
|
||||
interp!(
|
||||
interp14,
|
||||
vec![("bar", 1), ("foo", 2)],
|
||||
vec!["", "yyy", "xxx"],
|
||||
"test ${a} test",
|
||||
"test test",
|
||||
);
|
||||
}
|
1151
crates/matcher/src/lib.rs
Normal file
1151
crates/matcher/src/lib.rs
Normal file
File diff suppressed because it is too large
Load Diff
230
crates/matcher/tests/test_matcher.rs
Normal file
230
crates/matcher/tests/test_matcher.rs
Normal file
@@ -0,0 +1,230 @@
|
||||
use grep_matcher::{Captures, Match, Matcher};
|
||||
use regex::bytes::Regex;
|
||||
|
||||
use util::{RegexMatcher, RegexMatcherNoCaps};
|
||||
|
||||
fn matcher(pattern: &str) -> RegexMatcher {
|
||||
RegexMatcher::new(Regex::new(pattern).unwrap())
|
||||
}
|
||||
|
||||
fn matcher_no_caps(pattern: &str) -> RegexMatcherNoCaps {
|
||||
RegexMatcherNoCaps(Regex::new(pattern).unwrap())
|
||||
}
|
||||
|
||||
fn m(start: usize, end: usize) -> Match {
|
||||
Match::new(start, end)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn find() {
|
||||
let matcher = matcher(r"(\w+)\s+(\w+)");
|
||||
assert_eq!(matcher.find(b" homer simpson ").unwrap(), Some(m(1, 14)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn find_iter() {
|
||||
let matcher = matcher(r"(\w+)\s+(\w+)");
|
||||
let mut matches = vec![];
|
||||
matcher
|
||||
.find_iter(b"aa bb cc dd", |m| {
|
||||
matches.push(m);
|
||||
true
|
||||
})
|
||||
.unwrap();
|
||||
assert_eq!(matches, vec![m(0, 5), m(6, 11)]);
|
||||
|
||||
// Test that find_iter respects short circuiting.
|
||||
matches.clear();
|
||||
matcher
|
||||
.find_iter(b"aa bb cc dd", |m| {
|
||||
matches.push(m);
|
||||
false
|
||||
})
|
||||
.unwrap();
|
||||
assert_eq!(matches, vec![m(0, 5)]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn try_find_iter() {
|
||||
#[derive(Clone, Debug, Eq, PartialEq)]
|
||||
struct MyError;
|
||||
|
||||
let matcher = matcher(r"(\w+)\s+(\w+)");
|
||||
let mut matches = vec![];
|
||||
let err = matcher
|
||||
.try_find_iter(b"aa bb cc dd", |m| {
|
||||
if matches.is_empty() {
|
||||
matches.push(m);
|
||||
Ok(true)
|
||||
} else {
|
||||
Err(MyError)
|
||||
}
|
||||
})
|
||||
.unwrap()
|
||||
.unwrap_err();
|
||||
assert_eq!(matches, vec![m(0, 5)]);
|
||||
assert_eq!(err, MyError);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn shortest_match() {
|
||||
let matcher = matcher(r"a+");
|
||||
// This tests that the default impl isn't doing anything smart, and simply
|
||||
// defers to `find`.
|
||||
assert_eq!(matcher.shortest_match(b"aaa").unwrap(), Some(3));
|
||||
// The actual underlying regex is smarter.
|
||||
assert_eq!(matcher.re.shortest_match(b"aaa"), Some(1));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn captures() {
|
||||
let matcher = matcher(r"(?P<a>\w+)\s+(?P<b>\w+)");
|
||||
assert_eq!(matcher.capture_count(), 3);
|
||||
assert_eq!(matcher.capture_index("a"), Some(1));
|
||||
assert_eq!(matcher.capture_index("b"), Some(2));
|
||||
assert_eq!(matcher.capture_index("nada"), None);
|
||||
|
||||
let mut caps = matcher.new_captures().unwrap();
|
||||
assert!(matcher.captures(b" homer simpson ", &mut caps).unwrap());
|
||||
assert_eq!(caps.get(0), Some(m(1, 14)));
|
||||
assert_eq!(caps.get(1), Some(m(1, 6)));
|
||||
assert_eq!(caps.get(2), Some(m(7, 14)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn captures_iter() {
|
||||
let matcher = matcher(r"(?P<a>\w+)\s+(?P<b>\w+)");
|
||||
let mut caps = matcher.new_captures().unwrap();
|
||||
let mut matches = vec![];
|
||||
matcher
|
||||
.captures_iter(b"aa bb cc dd", &mut caps, |caps| {
|
||||
matches.push(caps.get(0).unwrap());
|
||||
matches.push(caps.get(1).unwrap());
|
||||
matches.push(caps.get(2).unwrap());
|
||||
true
|
||||
})
|
||||
.unwrap();
|
||||
assert_eq!(
|
||||
matches,
|
||||
vec![m(0, 5), m(0, 2), m(3, 5), m(6, 11), m(6, 8), m(9, 11),]
|
||||
);
|
||||
|
||||
// Test that captures_iter respects short circuiting.
|
||||
matches.clear();
|
||||
matcher
|
||||
.captures_iter(b"aa bb cc dd", &mut caps, |caps| {
|
||||
matches.push(caps.get(0).unwrap());
|
||||
matches.push(caps.get(1).unwrap());
|
||||
matches.push(caps.get(2).unwrap());
|
||||
false
|
||||
})
|
||||
.unwrap();
|
||||
assert_eq!(matches, vec![m(0, 5), m(0, 2), m(3, 5),]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn try_captures_iter() {
|
||||
#[derive(Clone, Debug, Eq, PartialEq)]
|
||||
struct MyError;
|
||||
|
||||
let matcher = matcher(r"(?P<a>\w+)\s+(?P<b>\w+)");
|
||||
let mut caps = matcher.new_captures().unwrap();
|
||||
let mut matches = vec![];
|
||||
let err = matcher
|
||||
.try_captures_iter(b"aa bb cc dd", &mut caps, |caps| {
|
||||
if matches.is_empty() {
|
||||
matches.push(caps.get(0).unwrap());
|
||||
matches.push(caps.get(1).unwrap());
|
||||
matches.push(caps.get(2).unwrap());
|
||||
Ok(true)
|
||||
} else {
|
||||
Err(MyError)
|
||||
}
|
||||
})
|
||||
.unwrap()
|
||||
.unwrap_err();
|
||||
assert_eq!(matches, vec![m(0, 5), m(0, 2), m(3, 5)]);
|
||||
assert_eq!(err, MyError);
|
||||
}
|
||||
|
||||
// Test that our default impls for capturing are correct. Namely, when
|
||||
// capturing isn't supported by the underlying matcher, then all of the
|
||||
// various capturing related APIs fail fast.
|
||||
#[test]
|
||||
fn no_captures() {
|
||||
let matcher = matcher_no_caps(r"(?P<a>\w+)\s+(?P<b>\w+)");
|
||||
assert_eq!(matcher.capture_count(), 0);
|
||||
assert_eq!(matcher.capture_index("a"), None);
|
||||
assert_eq!(matcher.capture_index("b"), None);
|
||||
assert_eq!(matcher.capture_index("nada"), None);
|
||||
|
||||
let mut caps = matcher.new_captures().unwrap();
|
||||
assert!(!matcher.captures(b"homer simpson", &mut caps).unwrap());
|
||||
|
||||
let mut called = false;
|
||||
matcher
|
||||
.captures_iter(b"homer simpson", &mut caps, |_| {
|
||||
called = true;
|
||||
true
|
||||
})
|
||||
.unwrap();
|
||||
assert!(!called);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn replace() {
|
||||
let matcher = matcher(r"(\w+)\s+(\w+)");
|
||||
let mut dst = vec![];
|
||||
matcher
|
||||
.replace(b"aa bb cc dd", &mut dst, |_, dst| {
|
||||
dst.push(b'z');
|
||||
true
|
||||
})
|
||||
.unwrap();
|
||||
assert_eq!(dst, b"z z");
|
||||
|
||||
// Test that replacements respect short circuiting.
|
||||
dst.clear();
|
||||
matcher
|
||||
.replace(b"aa bb cc dd", &mut dst, |_, dst| {
|
||||
dst.push(b'z');
|
||||
false
|
||||
})
|
||||
.unwrap();
|
||||
assert_eq!(dst, b"z cc dd");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn replace_with_captures() {
|
||||
let matcher = matcher(r"(\w+)\s+(\w+)");
|
||||
let haystack = b"aa bb cc dd";
|
||||
let mut caps = matcher.new_captures().unwrap();
|
||||
let mut dst = vec![];
|
||||
matcher
|
||||
.replace_with_captures(haystack, &mut caps, &mut dst, |caps, dst| {
|
||||
caps.interpolate(
|
||||
|name| matcher.capture_index(name),
|
||||
haystack,
|
||||
b"$2 $1",
|
||||
dst,
|
||||
);
|
||||
true
|
||||
})
|
||||
.unwrap();
|
||||
assert_eq!(dst, b"bb aa dd cc");
|
||||
|
||||
// Test that replacements respect short circuiting.
|
||||
dst.clear();
|
||||
matcher
|
||||
.replace_with_captures(haystack, &mut caps, &mut dst, |caps, dst| {
|
||||
caps.interpolate(
|
||||
|name| matcher.capture_index(name),
|
||||
haystack,
|
||||
b"$2 $1",
|
||||
dst,
|
||||
);
|
||||
false
|
||||
})
|
||||
.unwrap();
|
||||
assert_eq!(dst, b"bb aa cc dd");
|
||||
}
|
6
crates/matcher/tests/tests.rs
Normal file
6
crates/matcher/tests/tests.rs
Normal file
@@ -0,0 +1,6 @@
|
||||
extern crate grep_matcher;
|
||||
extern crate regex;
|
||||
|
||||
mod util;
|
||||
|
||||
mod test_matcher;
|
95
crates/matcher/tests/util.rs
Normal file
95
crates/matcher/tests/util.rs
Normal file
@@ -0,0 +1,95 @@
|
||||
use std::collections::HashMap;
|
||||
use std::result;
|
||||
|
||||
use grep_matcher::{Captures, Match, Matcher, NoCaptures, NoError};
|
||||
use regex::bytes::{CaptureLocations, Regex};
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct RegexMatcher {
|
||||
pub re: Regex,
|
||||
pub names: HashMap<String, usize>,
|
||||
}
|
||||
|
||||
impl RegexMatcher {
|
||||
pub fn new(re: Regex) -> RegexMatcher {
|
||||
let mut names = HashMap::new();
|
||||
for (i, optional_name) in re.capture_names().enumerate() {
|
||||
if let Some(name) = optional_name {
|
||||
names.insert(name.to_string(), i);
|
||||
}
|
||||
}
|
||||
RegexMatcher { re: re, names: names }
|
||||
}
|
||||
}
|
||||
|
||||
type Result<T> = result::Result<T, NoError>;
|
||||
|
||||
impl Matcher for RegexMatcher {
|
||||
type Captures = RegexCaptures;
|
||||
type Error = NoError;
|
||||
|
||||
fn find_at(&self, haystack: &[u8], at: usize) -> Result<Option<Match>> {
|
||||
Ok(self
|
||||
.re
|
||||
.find_at(haystack, at)
|
||||
.map(|m| Match::new(m.start(), m.end())))
|
||||
}
|
||||
|
||||
fn new_captures(&self) -> Result<RegexCaptures> {
|
||||
Ok(RegexCaptures(self.re.capture_locations()))
|
||||
}
|
||||
|
||||
fn captures_at(
|
||||
&self,
|
||||
haystack: &[u8],
|
||||
at: usize,
|
||||
caps: &mut RegexCaptures,
|
||||
) -> Result<bool> {
|
||||
Ok(self.re.captures_read_at(&mut caps.0, haystack, at).is_some())
|
||||
}
|
||||
|
||||
fn capture_count(&self) -> usize {
|
||||
self.re.captures_len()
|
||||
}
|
||||
|
||||
fn capture_index(&self, name: &str) -> Option<usize> {
|
||||
self.names.get(name).map(|i| *i)
|
||||
}
|
||||
|
||||
// We purposely don't implement any other methods, so that we test the
|
||||
// default impls. The "real" Regex impl for Matcher provides a few more
|
||||
// impls. e.g., Its `find_iter` impl is faster than what we can do here,
|
||||
// since the regex crate avoids synchronization overhead.
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct RegexMatcherNoCaps(pub Regex);
|
||||
|
||||
impl Matcher for RegexMatcherNoCaps {
|
||||
type Captures = NoCaptures;
|
||||
type Error = NoError;
|
||||
|
||||
fn find_at(&self, haystack: &[u8], at: usize) -> Result<Option<Match>> {
|
||||
Ok(self
|
||||
.0
|
||||
.find_at(haystack, at)
|
||||
.map(|m| Match::new(m.start(), m.end())))
|
||||
}
|
||||
|
||||
fn new_captures(&self) -> Result<NoCaptures> {
|
||||
Ok(NoCaptures::new())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct RegexCaptures(CaptureLocations);
|
||||
|
||||
impl Captures for RegexCaptures {
|
||||
fn len(&self) -> usize {
|
||||
self.0.len()
|
||||
}
|
||||
|
||||
fn get(&self, i: usize) -> Option<Match> {
|
||||
self.0.pos(i).map(|(s, e)| Match::new(s, e))
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user