style: rustfmt everything

This is why I was so intent on clearing the PR queue. This will
effectively invalidate all existing patches, so I wanted to start from a
clean slate.

We do make one little tweak: we put the default type definitions in
their own file and tell rustfmt to keep its grubby mits off of it. We
also sort it lexicographically and hopefully will enforce that from here
on.
This commit is contained in:
Andrew Gallant
2020-02-17 18:08:47 -05:00
parent c95f29e3ba
commit 0bc4f0447b
67 changed files with 2707 additions and 2675 deletions

View File

@@ -1,5 +1,5 @@
use regex_syntax::ast::{self, Ast};
use regex_syntax::ast::parse::Parser;
use regex_syntax::ast::{self, Ast};
/// The results of analyzing AST of a regular expression (e.g., for supporting
/// smart case).

View File

@@ -51,8 +51,8 @@ impl Default for Config {
octal: false,
// These size limits are much bigger than what's in the regex
// crate.
size_limit: 100 * (1<<20),
dfa_size_limit: 1000 * (1<<20),
size_limit: 100 * (1 << 20),
dfa_size_limit: 1000 * (1 << 20),
nest_limit: 250,
line_terminator: None,
crlf: false,
@@ -95,10 +95,7 @@ impl Config {
/// Accounting for the `smart_case` config knob, return true if and only if
/// this pattern should be matched case insensitively.
fn is_case_insensitive(
&self,
analysis: &AstAnalysis,
) -> bool {
fn is_case_insensitive(&self, analysis: &AstAnalysis) -> bool {
if self.case_insensitive {
return true;
}
@@ -116,9 +113,7 @@ impl Config {
/// are enabled, since if multi-line can impact the match semantics of a
/// regex, then it is by definition not a simple alternation of literals.
pub fn can_plain_aho_corasick(&self) -> bool {
!self.word
&& !self.case_insensitive
&& !self.case_smart
!self.word && !self.case_insensitive && !self.case_smart
}
/// Perform analysis on the AST of this pattern.
@@ -203,8 +198,7 @@ impl ConfiguredHIR {
pub fn with_pattern<F: FnMut(&str) -> String>(
&self,
mut f: F,
) -> Result<ConfiguredHIR, Error>
{
) -> Result<ConfiguredHIR, Error> {
self.pattern_to_hir(&f(&self.expr.to_string()))
}

View File

@@ -76,9 +76,8 @@ impl Matcher for CRLFMatcher {
caps: &mut RegexCaptures,
) -> Result<bool, NoError> {
caps.strip_crlf(false);
let r = self.regex.captures_read_at(
caps.locations_mut(), haystack, at,
);
let r =
self.regex.captures_read_at(caps.locations_mut(), haystack, at);
if !r.is_some() {
return Ok(false);
}
@@ -163,8 +162,8 @@ pub fn crlfify(expr: Hir) -> Hir {
#[cfg(test)]
mod tests {
use regex_syntax::Parser;
use super::crlfify;
use regex_syntax::Parser;
fn roundtrip(pattern: &str) -> String {
let expr1 = Parser::new().parse(pattern).unwrap();

View File

@@ -5,8 +5,8 @@ the regex engine doesn't look for inner literals. Since we're doing line based
searching, we can use them, so we need to do it ourselves.
*/
use regex_syntax::hir::{self, Hir, HirKind};
use regex_syntax::hir::literal::{Literal, Literals};
use regex_syntax::hir::{self, Hir, HirKind};
use util;
@@ -159,10 +159,8 @@ impl LiteralSets {
};
debug!("prefix/suffix literals found: {:?}", lits);
let alts: Vec<String> = lits
.into_iter()
.map(|x| util::bytes_to_regex(x))
.collect();
let alts: Vec<String> =
lits.into_iter().map(|x| util::bytes_to_regex(x)).collect();
// We're matching raw bytes, so disable Unicode mode.
Some(format!("(?-u:{})", alts.join("|")))
} else {
@@ -194,24 +192,28 @@ fn union_required(expr: &Hir, lits: &mut Literals) {
HirKind::Group(hir::Group { ref hir, .. }) => {
union_required(&**hir, lits);
}
HirKind::Repetition(ref x) => {
match x.kind {
hir::RepetitionKind::ZeroOrOne => lits.cut(),
hir::RepetitionKind::ZeroOrMore => lits.cut(),
hir::RepetitionKind::OneOrMore => {
union_required(&x.hir, lits);
}
hir::RepetitionKind::Range(ref rng) => {
let (min, max) = match *rng {
hir::RepetitionRange::Exactly(m) => (m, Some(m)),
hir::RepetitionRange::AtLeast(m) => (m, None),
hir::RepetitionRange::Bounded(m, n) => (m, Some(n)),
};
repeat_range_literals(
&x.hir, min, max, x.greedy, lits, union_required);
}
HirKind::Repetition(ref x) => match x.kind {
hir::RepetitionKind::ZeroOrOne => lits.cut(),
hir::RepetitionKind::ZeroOrMore => lits.cut(),
hir::RepetitionKind::OneOrMore => {
union_required(&x.hir, lits);
}
}
hir::RepetitionKind::Range(ref rng) => {
let (min, max) = match *rng {
hir::RepetitionRange::Exactly(m) => (m, Some(m)),
hir::RepetitionRange::AtLeast(m) => (m, None),
hir::RepetitionRange::Bounded(m, n) => (m, Some(n)),
};
repeat_range_literals(
&x.hir,
min,
max,
x.greedy,
lits,
union_required,
);
}
},
HirKind::Concat(ref es) if es.is_empty() => {}
HirKind::Concat(ref es) if es.len() == 1 => {
union_required(&es[0], lits)
@@ -310,9 +312,9 @@ fn is_simple(expr: &Hir) -> bool {
| HirKind::Repetition(_)
| HirKind::Concat(_)
| HirKind::Alternation(_) => true,
HirKind::Anchor(_)
| HirKind::WordBoundary(_)
| HirKind::Group(_) => false,
HirKind::Anchor(_) | HirKind::WordBoundary(_) | HirKind::Group(_) => {
false
}
}
}
@@ -328,8 +330,8 @@ fn count_byte_class(cls: &hir::ClassBytes) -> u32 {
#[cfg(test)]
mod tests {
use regex_syntax::Parser;
use super::LiteralSets;
use regex_syntax::Parser;
fn sets(pattern: &str) -> LiteralSets {
let hir = Parser::new().parse(pattern).unwrap();
@@ -380,8 +382,10 @@ mod tests {
fn regression_1319() {
// Regression from:
// https://github.com/BurntSushi/ripgrep/issues/1319
assert_eq!(one_regex(r"TTGAGTCCAGGAG[ATCG]{2}C"),
assert_eq!(
one_regex(r"TTGAGTCCAGGAG[ATCG]{2}C"),
pat("TTGAGTCCAGGAGA|TTGAGTCCAGGAGC|\
TTGAGTCCAGGAGG|TTGAGTCCAGGAGT"));
TTGAGTCCAGGAGG|TTGAGTCCAGGAGT")
);
}
}

View File

@@ -1,7 +1,7 @@
use std::collections::HashMap;
use grep_matcher::{
Captures, LineMatchKind, LineTerminator, Match, Matcher, NoError, ByteSet,
ByteSet, Captures, LineMatchKind, LineTerminator, Match, Matcher, NoError,
};
use regex::bytes::{CaptureLocations, Regex};
@@ -34,9 +34,7 @@ impl Default for RegexMatcherBuilder {
impl RegexMatcherBuilder {
/// Create a new builder for configuring a regex matcher.
pub fn new() -> RegexMatcherBuilder {
RegexMatcherBuilder {
config: Config::default(),
}
RegexMatcherBuilder { config: Config::default() }
}
/// Build a new matcher using the current configuration for the provided
@@ -382,9 +380,7 @@ impl RegexMatcher {
/// given pattern contains a literal `\n`. Other uses of `\n` (such as in
/// `\s`) are removed transparently.
pub fn new_line_matcher(pattern: &str) -> Result<RegexMatcher, Error> {
RegexMatcherBuilder::new()
.line_terminator(Some(b'\n'))
.build(pattern)
RegexMatcherBuilder::new().line_terminator(Some(b'\n')).build(pattern)
}
}
@@ -499,12 +495,9 @@ impl Matcher for RegexMatcher {
}
}
fn find_iter<F>(
&self,
haystack: &[u8],
matched: F,
) -> Result<(), NoError>
where F: FnMut(Match) -> bool
fn find_iter<F>(&self, haystack: &[u8], matched: F) -> Result<(), NoError>
where
F: FnMut(Match) -> bool,
{
use self::RegexMatcherImpl::*;
match self.matcher {
@@ -520,7 +513,8 @@ impl Matcher for RegexMatcher {
haystack: &[u8],
matched: F,
) -> Result<Result<(), E>, NoError>
where F: FnMut(Match) -> Result<bool, E>
where
F: FnMut(Match) -> Result<bool, E>,
{
use self::RegexMatcherImpl::*;
match self.matcher {
@@ -551,7 +545,8 @@ impl Matcher for RegexMatcher {
caps: &mut RegexCaptures,
matched: F,
) -> Result<(), NoError>
where F: FnMut(&RegexCaptures) -> bool
where
F: FnMut(&RegexCaptures) -> bool,
{
use self::RegexMatcherImpl::*;
match self.matcher {
@@ -568,7 +563,8 @@ impl Matcher for RegexMatcher {
caps: &mut RegexCaptures,
matched: F,
) -> Result<Result<(), E>, NoError>
where F: FnMut(&RegexCaptures) -> Result<bool, E>
where
F: FnMut(&RegexCaptures) -> Result<bool, E>,
{
use self::RegexMatcherImpl::*;
match self.matcher {
@@ -602,7 +598,8 @@ impl Matcher for RegexMatcher {
dst: &mut Vec<u8>,
append: F,
) -> Result<(), NoError>
where F: FnMut(Match, &mut Vec<u8>) -> bool
where
F: FnMut(Match, &mut Vec<u8>) -> bool,
{
use self::RegexMatcherImpl::*;
match self.matcher {
@@ -620,7 +617,8 @@ impl Matcher for RegexMatcher {
dst: &mut Vec<u8>,
append: F,
) -> Result<(), NoError>
where F: FnMut(&Self::Captures, &mut Vec<u8>) -> bool
where
F: FnMut(&Self::Captures, &mut Vec<u8>) -> bool,
{
use self::RegexMatcherImpl::*;
match self.matcher {
@@ -745,7 +743,8 @@ impl Matcher for StandardMatcher {
haystack: &[u8],
at: usize,
) -> Result<Option<Match>, NoError> {
Ok(self.regex
Ok(self
.regex
.find_at(haystack, at)
.map(|m| Match::new(m.start(), m.end())))
}
@@ -767,7 +766,8 @@ impl Matcher for StandardMatcher {
haystack: &[u8],
mut matched: F,
) -> Result<Result<(), E>, NoError>
where F: FnMut(Match) -> Result<bool, E>
where
F: FnMut(Match) -> Result<bool, E>,
{
for m in self.regex.find_iter(haystack) {
match matched(Match::new(m.start(), m.end())) {
@@ -785,9 +785,10 @@ impl Matcher for StandardMatcher {
at: usize,
caps: &mut RegexCaptures,
) -> Result<bool, NoError> {
Ok(self.regex.captures_read_at(
&mut caps.locations_mut(), haystack, at,
).is_some())
Ok(self
.regex
.captures_read_at(&mut caps.locations_mut(), haystack, at)
.is_some())
}
fn shortest_match_at(
@@ -901,7 +902,9 @@ impl RegexCaptures {
offset: usize,
) -> RegexCaptures {
RegexCaptures(RegexCapturesImp::Regex {
locs, offset, strip_crlf: false,
locs,
offset,
strip_crlf: false,
})
}
@@ -910,9 +913,7 @@ impl RegexCaptures {
RegexCapturesImp::AhoCorasick { .. } => {
panic!("getting locations for simple captures is invalid")
}
RegexCapturesImp::Regex { ref locs, .. } => {
locs
}
RegexCapturesImp::Regex { ref locs, .. } => locs,
}
}
@@ -921,9 +922,7 @@ impl RegexCaptures {
RegexCapturesImp::AhoCorasick { .. } => {
panic!("getting locations for simple captures is invalid")
}
RegexCapturesImp::Regex { ref mut locs, .. } => {
locs
}
RegexCapturesImp::Regex { ref mut locs, .. } => locs,
}
}
@@ -952,23 +951,19 @@ impl RegexCaptures {
#[cfg(test)]
mod tests {
use grep_matcher::{LineMatchKind, Matcher};
use super::*;
use grep_matcher::{LineMatchKind, Matcher};
// Test that enabling word matches does the right thing and demonstrate
// the difference between it and surrounding the regex in `\b`.
#[test]
fn word() {
let matcher = RegexMatcherBuilder::new()
.word(true)
.build(r"-2")
.unwrap();
let matcher =
RegexMatcherBuilder::new().word(true).build(r"-2").unwrap();
assert!(matcher.is_match(b"abc -2 foo").unwrap());
let matcher = RegexMatcherBuilder::new()
.word(false)
.build(r"\b-2\b")
.unwrap();
let matcher =
RegexMatcherBuilder::new().word(false).build(r"\b-2\b").unwrap();
assert!(!matcher.is_match(b"abc -2 foo").unwrap());
}
@@ -977,9 +972,7 @@ mod tests {
#[test]
fn line_terminator() {
// This works, because there's no line terminator specified.
let matcher = RegexMatcherBuilder::new()
.build(r"abc\sxyz")
.unwrap();
let matcher = RegexMatcherBuilder::new().build(r"abc\sxyz").unwrap();
assert!(matcher.is_match(b"abc\nxyz").unwrap());
// This doesn't.
@@ -1029,16 +1022,12 @@ mod tests {
// Test that smart case works.
#[test]
fn case_smart() {
let matcher = RegexMatcherBuilder::new()
.case_smart(true)
.build(r"abc")
.unwrap();
let matcher =
RegexMatcherBuilder::new().case_smart(true).build(r"abc").unwrap();
assert!(matcher.is_match(b"ABC").unwrap());
let matcher = RegexMatcherBuilder::new()
.case_smart(true)
.build(r"aBc")
.unwrap();
let matcher =
RegexMatcherBuilder::new().case_smart(true).build(r"aBc").unwrap();
assert!(!matcher.is_match(b"ABC").unwrap());
}
@@ -1060,9 +1049,7 @@ mod tests {
// With no line terminator set, we can't employ any optimizations,
// so we get a confirmed match.
let matcher = RegexMatcherBuilder::new()
.build(r"\wfoo\s")
.unwrap();
let matcher = RegexMatcherBuilder::new().build(r"\wfoo\s").unwrap();
let m = matcher.find_candidate_line(b"afoo ").unwrap().unwrap();
assert!(is_confirmed(m));

View File

@@ -1,5 +1,5 @@
use aho_corasick::{AhoCorasick, AhoCorasickBuilder, MatchKind};
use grep_matcher::{Matcher, Match, NoError};
use grep_matcher::{Match, Matcher, NoError};
use regex_syntax::hir::Hir;
use error::Error;
@@ -93,15 +93,13 @@ pub fn alternation_literals(expr: &Hir) -> Option<Vec<Vec<u8>>> {
_ => return None, // one literal isn't worth it
};
let extendlit = |lit: &Literal, dst: &mut Vec<u8>| {
match *lit {
Literal::Unicode(c) => {
let mut buf = [0; 4];
dst.extend_from_slice(c.encode_utf8(&mut buf).as_bytes());
}
Literal::Byte(b) => {
dst.push(b);
}
let extendlit = |lit: &Literal, dst: &mut Vec<u8>| match *lit {
Literal::Unicode(c) => {
let mut buf = [0; 4];
dst.extend_from_slice(c.encode_utf8(&mut buf).as_bytes());
}
Literal::Byte(b) => {
dst.push(b);
}
};

View File

@@ -11,14 +11,9 @@ pub fn non_matching_bytes(expr: &Hir) -> ByteSet {
/// Remove any bytes from the given set that can occur in a matched produced by
/// the given expression.
fn remove_matching_bytes(
expr: &Hir,
set: &mut ByteSet,
) {
fn remove_matching_bytes(expr: &Hir, set: &mut ByteSet) {
match *expr.kind() {
HirKind::Empty
| HirKind::Anchor(_)
| HirKind::WordBoundary(_) => {}
HirKind::Empty | HirKind::Anchor(_) | HirKind::WordBoundary(_) => {}
HirKind::Literal(hir::Literal::Unicode(c)) => {
for &b in c.encode_utf8(&mut [0; 4]).as_bytes() {
set.remove(b);
@@ -105,15 +100,20 @@ mod tests {
#[test]
fn dot() {
assert_eq!(sparse(&extract(".")), vec![
b'\n',
192, 193, 245, 246, 247, 248, 249,
250, 251, 252, 253, 254, 255,
]);
assert_eq!(sparse(&extract("(?s).")), vec![
192, 193, 245, 246, 247, 248, 249,
250, 251, 252, 253, 254, 255,
]);
assert_eq!(
sparse(&extract(".")),
vec![
b'\n', 192, 193, 245, 246, 247, 248, 249, 250, 251, 252, 253,
254, 255,
]
);
assert_eq!(
sparse(&extract("(?s).")),
vec![
192, 193, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254,
255,
]
);
assert_eq!(sparse(&extract("(?-u).")), vec![b'\n']);
assert_eq!(sparse(&extract("(?s-u).")), vec![]);
}

View File

@@ -33,10 +33,7 @@ pub fn strip_from_match(
/// The implementation of strip_from_match. The given byte must be ASCII. This
/// function panics otherwise.
fn strip_from_match_ascii(
expr: Hir,
byte: u8,
) -> Result<Hir, Error> {
fn strip_from_match_ascii(expr: Hir, byte: u8) -> Result<Hir, Error> {
assert!(byte <= 0x7F);
let chr = byte as char;
assert_eq!(chr.len_utf8(), 1);
@@ -88,13 +85,15 @@ fn strip_from_match_ascii(
Hir::group(x)
}
HirKind::Concat(xs) => {
let xs = xs.into_iter()
let xs = xs
.into_iter()
.map(|e| strip_from_match_ascii(e, byte))
.collect::<Result<Vec<Hir>, Error>>()?;
Hir::concat(xs)
}
HirKind::Alternation(xs) => {
let xs = xs.into_iter()
let xs = xs
.into_iter()
.map(|e| strip_from_match_ascii(e, byte))
.collect::<Result<Vec<Hir>, Error>>()?;
Hir::alternation(xs)
@@ -106,8 +105,8 @@ fn strip_from_match_ascii(
mod tests {
use regex_syntax::Parser;
use super::{strip_from_match, LineTerminator};
use error::Error;
use super::{LineTerminator, strip_from_match};
fn roundtrip(pattern: &str, byte: u8) -> String {
roundtrip_line_term(pattern, LineTerminator::byte(byte)).unwrap()

View File

@@ -1,8 +1,8 @@
/// Converts an arbitrary sequence of bytes to a literal suitable for building
/// a regular expression.
pub fn bytes_to_regex(bs: &[u8]) -> String {
use std::fmt::Write;
use regex_syntax::is_meta_character;
use std::fmt::Write;
let mut s = String::with_capacity(bs.len());
for &b in bs {

View File

@@ -1,5 +1,5 @@
use std::collections::HashMap;
use std::cell::RefCell;
use std::collections::HashMap;
use std::sync::Arc;
use grep_matcher::{Match, Matcher, NoError};
@@ -45,9 +45,8 @@ impl WordMatcher {
/// The given options are used to construct the regular expression
/// internally.
pub fn new(expr: &ConfiguredHIR) -> Result<WordMatcher, Error> {
let original = expr.with_pattern(|pat| {
format!("^(?:{})$", pat)
})?.regex()?;
let original =
expr.with_pattern(|pat| format!("^(?:{})$", pat))?.regex()?;
let word_expr = expr.with_pattern(|pat| {
let pat = format!(r"(?:(?-m:^)|\W)({})(?:(?-m:$)|\W)", pat);
debug!("word regex: {:?}", pat);
@@ -112,9 +111,8 @@ impl WordMatcher {
}
let (_, slen) = bstr::decode_utf8(&haystack[cand]);
let (_, elen) = bstr::decode_last_utf8(&haystack[cand]);
cand = cand
.with_start(cand.start() + slen)
.with_end(cand.end() - elen);
cand =
cand.with_start(cand.start() + slen).with_end(cand.end() - elen);
if self.original.is_match(&haystack[cand]) {
Ok(Some(cand))
} else {
@@ -148,9 +146,8 @@ impl Matcher for WordMatcher {
Err(()) => {}
}
let cell = self.locs.get_or(|| {
RefCell::new(self.regex.capture_locations())
});
let cell =
self.locs.get_or(|| RefCell::new(self.regex.capture_locations()));
let mut caps = cell.borrow_mut();
self.regex.captures_read_at(&mut caps, haystack, at);
Ok(caps.get(1).map(|m| Match::new(m.0, m.1)))
@@ -174,9 +171,8 @@ impl Matcher for WordMatcher {
at: usize,
caps: &mut RegexCaptures,
) -> Result<bool, NoError> {
let r = self.regex.captures_read_at(
caps.locations_mut(), haystack, at,
);
let r =
self.regex.captures_read_at(caps.locations_mut(), haystack, at);
Ok(r.is_some())
}
@@ -187,9 +183,9 @@ impl Matcher for WordMatcher {
#[cfg(test)]
mod tests {
use grep_matcher::{Captures, Match, Matcher};
use config::Config;
use super::WordMatcher;
use config::Config;
use grep_matcher::{Captures, Match, Matcher};
fn matcher(pattern: &str) -> WordMatcher {
let chir = Config::default().hir(pattern).unwrap();