mirror of
https://github.com/BurntSushi/ripgrep.git
synced 2025-05-18 17:20:21 -07:00
This is why I was so intent on clearing the PR queue. This will effectively invalidate all existing patches, so I wanted to start from a clean slate. We do make one little tweak: we put the default type definitions in their own file and tell rustfmt to keep its grubby mits off of it. We also sort it lexicographically and hopefully will enforce that from here on.
231 lines
6.3 KiB
Rust
231 lines
6.3 KiB
Rust
use grep_matcher::{Captures, Match, Matcher};
|
|
use regex::bytes::Regex;
|
|
|
|
use util::{RegexMatcher, RegexMatcherNoCaps};
|
|
|
|
fn matcher(pattern: &str) -> RegexMatcher {
|
|
RegexMatcher::new(Regex::new(pattern).unwrap())
|
|
}
|
|
|
|
fn matcher_no_caps(pattern: &str) -> RegexMatcherNoCaps {
|
|
RegexMatcherNoCaps(Regex::new(pattern).unwrap())
|
|
}
|
|
|
|
fn m(start: usize, end: usize) -> Match {
|
|
Match::new(start, end)
|
|
}
|
|
|
|
#[test]
|
|
fn find() {
|
|
let matcher = matcher(r"(\w+)\s+(\w+)");
|
|
assert_eq!(matcher.find(b" homer simpson ").unwrap(), Some(m(1, 14)));
|
|
}
|
|
|
|
#[test]
|
|
fn find_iter() {
|
|
let matcher = matcher(r"(\w+)\s+(\w+)");
|
|
let mut matches = vec![];
|
|
matcher
|
|
.find_iter(b"aa bb cc dd", |m| {
|
|
matches.push(m);
|
|
true
|
|
})
|
|
.unwrap();
|
|
assert_eq!(matches, vec![m(0, 5), m(6, 11)]);
|
|
|
|
// Test that find_iter respects short circuiting.
|
|
matches.clear();
|
|
matcher
|
|
.find_iter(b"aa bb cc dd", |m| {
|
|
matches.push(m);
|
|
false
|
|
})
|
|
.unwrap();
|
|
assert_eq!(matches, vec![m(0, 5)]);
|
|
}
|
|
|
|
#[test]
|
|
fn try_find_iter() {
|
|
#[derive(Clone, Debug, Eq, PartialEq)]
|
|
struct MyError;
|
|
|
|
let matcher = matcher(r"(\w+)\s+(\w+)");
|
|
let mut matches = vec![];
|
|
let err = matcher
|
|
.try_find_iter(b"aa bb cc dd", |m| {
|
|
if matches.is_empty() {
|
|
matches.push(m);
|
|
Ok(true)
|
|
} else {
|
|
Err(MyError)
|
|
}
|
|
})
|
|
.unwrap()
|
|
.unwrap_err();
|
|
assert_eq!(matches, vec![m(0, 5)]);
|
|
assert_eq!(err, MyError);
|
|
}
|
|
|
|
#[test]
|
|
fn shortest_match() {
|
|
let matcher = matcher(r"a+");
|
|
// This tests that the default impl isn't doing anything smart, and simply
|
|
// defers to `find`.
|
|
assert_eq!(matcher.shortest_match(b"aaa").unwrap(), Some(3));
|
|
// The actual underlying regex is smarter.
|
|
assert_eq!(matcher.re.shortest_match(b"aaa"), Some(1));
|
|
}
|
|
|
|
#[test]
|
|
fn captures() {
|
|
let matcher = matcher(r"(?P<a>\w+)\s+(?P<b>\w+)");
|
|
assert_eq!(matcher.capture_count(), 3);
|
|
assert_eq!(matcher.capture_index("a"), Some(1));
|
|
assert_eq!(matcher.capture_index("b"), Some(2));
|
|
assert_eq!(matcher.capture_index("nada"), None);
|
|
|
|
let mut caps = matcher.new_captures().unwrap();
|
|
assert!(matcher.captures(b" homer simpson ", &mut caps).unwrap());
|
|
assert_eq!(caps.get(0), Some(m(1, 14)));
|
|
assert_eq!(caps.get(1), Some(m(1, 6)));
|
|
assert_eq!(caps.get(2), Some(m(7, 14)));
|
|
}
|
|
|
|
#[test]
|
|
fn captures_iter() {
|
|
let matcher = matcher(r"(?P<a>\w+)\s+(?P<b>\w+)");
|
|
let mut caps = matcher.new_captures().unwrap();
|
|
let mut matches = vec![];
|
|
matcher
|
|
.captures_iter(b"aa bb cc dd", &mut caps, |caps| {
|
|
matches.push(caps.get(0).unwrap());
|
|
matches.push(caps.get(1).unwrap());
|
|
matches.push(caps.get(2).unwrap());
|
|
true
|
|
})
|
|
.unwrap();
|
|
assert_eq!(
|
|
matches,
|
|
vec![m(0, 5), m(0, 2), m(3, 5), m(6, 11), m(6, 8), m(9, 11),]
|
|
);
|
|
|
|
// Test that captures_iter respects short circuiting.
|
|
matches.clear();
|
|
matcher
|
|
.captures_iter(b"aa bb cc dd", &mut caps, |caps| {
|
|
matches.push(caps.get(0).unwrap());
|
|
matches.push(caps.get(1).unwrap());
|
|
matches.push(caps.get(2).unwrap());
|
|
false
|
|
})
|
|
.unwrap();
|
|
assert_eq!(matches, vec![m(0, 5), m(0, 2), m(3, 5),]);
|
|
}
|
|
|
|
#[test]
|
|
fn try_captures_iter() {
|
|
#[derive(Clone, Debug, Eq, PartialEq)]
|
|
struct MyError;
|
|
|
|
let matcher = matcher(r"(?P<a>\w+)\s+(?P<b>\w+)");
|
|
let mut caps = matcher.new_captures().unwrap();
|
|
let mut matches = vec![];
|
|
let err = matcher
|
|
.try_captures_iter(b"aa bb cc dd", &mut caps, |caps| {
|
|
if matches.is_empty() {
|
|
matches.push(caps.get(0).unwrap());
|
|
matches.push(caps.get(1).unwrap());
|
|
matches.push(caps.get(2).unwrap());
|
|
Ok(true)
|
|
} else {
|
|
Err(MyError)
|
|
}
|
|
})
|
|
.unwrap()
|
|
.unwrap_err();
|
|
assert_eq!(matches, vec![m(0, 5), m(0, 2), m(3, 5)]);
|
|
assert_eq!(err, MyError);
|
|
}
|
|
|
|
// Test that our default impls for capturing are correct. Namely, when
|
|
// capturing isn't supported by the underlying matcher, then all of the
|
|
// various capturing related APIs fail fast.
|
|
#[test]
|
|
fn no_captures() {
|
|
let matcher = matcher_no_caps(r"(?P<a>\w+)\s+(?P<b>\w+)");
|
|
assert_eq!(matcher.capture_count(), 0);
|
|
assert_eq!(matcher.capture_index("a"), None);
|
|
assert_eq!(matcher.capture_index("b"), None);
|
|
assert_eq!(matcher.capture_index("nada"), None);
|
|
|
|
let mut caps = matcher.new_captures().unwrap();
|
|
assert!(!matcher.captures(b"homer simpson", &mut caps).unwrap());
|
|
|
|
let mut called = false;
|
|
matcher
|
|
.captures_iter(b"homer simpson", &mut caps, |_| {
|
|
called = true;
|
|
true
|
|
})
|
|
.unwrap();
|
|
assert!(!called);
|
|
}
|
|
|
|
#[test]
|
|
fn replace() {
|
|
let matcher = matcher(r"(\w+)\s+(\w+)");
|
|
let mut dst = vec![];
|
|
matcher
|
|
.replace(b"aa bb cc dd", &mut dst, |_, dst| {
|
|
dst.push(b'z');
|
|
true
|
|
})
|
|
.unwrap();
|
|
assert_eq!(dst, b"z z");
|
|
|
|
// Test that replacements respect short circuiting.
|
|
dst.clear();
|
|
matcher
|
|
.replace(b"aa bb cc dd", &mut dst, |_, dst| {
|
|
dst.push(b'z');
|
|
false
|
|
})
|
|
.unwrap();
|
|
assert_eq!(dst, b"z cc dd");
|
|
}
|
|
|
|
#[test]
|
|
fn replace_with_captures() {
|
|
let matcher = matcher(r"(\w+)\s+(\w+)");
|
|
let haystack = b"aa bb cc dd";
|
|
let mut caps = matcher.new_captures().unwrap();
|
|
let mut dst = vec![];
|
|
matcher
|
|
.replace_with_captures(haystack, &mut caps, &mut dst, |caps, dst| {
|
|
caps.interpolate(
|
|
|name| matcher.capture_index(name),
|
|
haystack,
|
|
b"$2 $1",
|
|
dst,
|
|
);
|
|
true
|
|
})
|
|
.unwrap();
|
|
assert_eq!(dst, b"bb aa dd cc");
|
|
|
|
// Test that replacements respect short circuiting.
|
|
dst.clear();
|
|
matcher
|
|
.replace_with_captures(haystack, &mut caps, &mut dst, |caps, dst| {
|
|
caps.interpolate(
|
|
|name| matcher.capture_index(name),
|
|
haystack,
|
|
b"$2 $1",
|
|
dst,
|
|
);
|
|
false
|
|
})
|
|
.unwrap();
|
|
assert_eq!(dst, b"bb aa cc dd");
|
|
}
|