mirror of
https://github.com/BurntSushi/ripgrep.git
synced 2025-09-02 05:13:53 -07:00
globset: support nested alternates
For example, `**/{node_modules/**/*/{ts,js},crates/**/*.{rs,toml}`. I originally didn't add this I think for implementation simplicity, but it turns out that it really isn't much work to do. There might have also been some odd behavior in the regex engine for dealing with empty alternates, but that has all been long fixed. Closes #3048, Closes #3112
This commit is contained in:
committed by
Andrew Gallant
parent
2d763a9a1b
commit
43eff4397f
@@ -37,6 +37,8 @@ Feature enhancements:
|
||||
When using multithreading, schedule files to search in order given on CLI.
|
||||
* [FEATURE #2943](https://github.com/BurntSushi/ripgrep/issues/2943):
|
||||
Add `aarch64` release artifacts for Windows.
|
||||
* [FEATURE #3048](https://github.com/BurntSushi/ripgrep/pull/3048):
|
||||
Globs in ripgrep (and the `globset` crate) now support nested alternates.
|
||||
|
||||
|
||||
14.1.1 (2024-09-08)
|
||||
|
@@ -583,25 +583,26 @@ impl<'a> GlobBuilder<'a> {
|
||||
pub fn build(&self) -> Result<Glob, Error> {
|
||||
let mut p = Parser {
|
||||
glob: &self.glob,
|
||||
stack: vec![Tokens::default()],
|
||||
alternates_stack: Vec::new(),
|
||||
branches: vec![Tokens::default()],
|
||||
chars: self.glob.chars().peekable(),
|
||||
prev: None,
|
||||
cur: None,
|
||||
opts: &self.opts,
|
||||
};
|
||||
p.parse()?;
|
||||
if p.stack.is_empty() {
|
||||
Err(Error {
|
||||
glob: Some(self.glob.to_string()),
|
||||
kind: ErrorKind::UnopenedAlternates,
|
||||
})
|
||||
} else if p.stack.len() > 1 {
|
||||
if p.branches.is_empty() {
|
||||
// OK because of how the the branches/alternate_stack are managed.
|
||||
// If we end up here, then there *must* be a bug in the parser
|
||||
// somewhere.
|
||||
unreachable!()
|
||||
} else if p.branches.len() > 1 {
|
||||
Err(Error {
|
||||
glob: Some(self.glob.to_string()),
|
||||
kind: ErrorKind::UnclosedAlternates,
|
||||
})
|
||||
} else {
|
||||
let tokens = p.stack.pop().unwrap();
|
||||
let tokens = p.branches.pop().unwrap();
|
||||
Ok(Glob {
|
||||
glob: self.glob.to_string(),
|
||||
re: tokens.to_regex_with(&self.opts),
|
||||
@@ -776,7 +777,11 @@ fn bytes_to_escaped_literal(bs: &[u8]) -> String {
|
||||
|
||||
struct Parser<'a> {
|
||||
glob: &'a str,
|
||||
stack: Vec<Tokens>,
|
||||
// Marks the index in `stack` where the alternation started.
|
||||
alternates_stack: Vec<usize>,
|
||||
// The set of active alternation branches being parsed.
|
||||
// Tokens are added to the end of the last one.
|
||||
branches: Vec<Tokens>,
|
||||
chars: std::iter::Peekable<std::str::Chars<'a>>,
|
||||
prev: Option<char>,
|
||||
cur: Option<char>,
|
||||
@@ -805,36 +810,37 @@ impl<'a> Parser<'a> {
|
||||
}
|
||||
|
||||
fn push_alternate(&mut self) -> Result<(), Error> {
|
||||
if self.stack.len() > 1 {
|
||||
return Err(self.error(ErrorKind::NestedAlternates));
|
||||
}
|
||||
Ok(self.stack.push(Tokens::default()))
|
||||
self.alternates_stack.push(self.branches.len());
|
||||
self.branches.push(Tokens::default());
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn pop_alternate(&mut self) -> Result<(), Error> {
|
||||
let mut alts = vec![];
|
||||
while self.stack.len() >= 2 {
|
||||
alts.push(self.stack.pop().unwrap());
|
||||
}
|
||||
self.push_token(Token::Alternates(alts))
|
||||
let Some(start) = self.alternates_stack.pop() else {
|
||||
return Err(self.error(ErrorKind::UnopenedAlternates));
|
||||
};
|
||||
assert!(start <= self.branches.len());
|
||||
let alts = Token::Alternates(self.branches.drain(start..).collect());
|
||||
self.push_token(alts)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn push_token(&mut self, tok: Token) -> Result<(), Error> {
|
||||
if let Some(ref mut pat) = self.stack.last_mut() {
|
||||
if let Some(ref mut pat) = self.branches.last_mut() {
|
||||
return Ok(pat.push(tok));
|
||||
}
|
||||
Err(self.error(ErrorKind::UnopenedAlternates))
|
||||
}
|
||||
|
||||
fn pop_token(&mut self) -> Result<Token, Error> {
|
||||
if let Some(ref mut pat) = self.stack.last_mut() {
|
||||
if let Some(ref mut pat) = self.branches.last_mut() {
|
||||
return Ok(pat.pop().unwrap());
|
||||
}
|
||||
Err(self.error(ErrorKind::UnopenedAlternates))
|
||||
}
|
||||
|
||||
fn have_tokens(&self) -> Result<bool, Error> {
|
||||
match self.stack.last() {
|
||||
match self.branches.last() {
|
||||
None => Err(self.error(ErrorKind::UnopenedAlternates)),
|
||||
Some(ref pat) => Ok(!pat.is_empty()),
|
||||
}
|
||||
@@ -843,11 +849,11 @@ impl<'a> Parser<'a> {
|
||||
fn parse_comma(&mut self) -> Result<(), Error> {
|
||||
// If we aren't inside a group alternation, then don't
|
||||
// treat commas specially. Otherwise, we need to start
|
||||
// a new alternate.
|
||||
if self.stack.len() <= 1 {
|
||||
// a new alternate branch.
|
||||
if self.alternates_stack.is_empty() {
|
||||
self.push_token(Token::Literal(','))
|
||||
} else {
|
||||
Ok(self.stack.push(Tokens::default()))
|
||||
Ok(self.branches.push(Tokens::default()))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -884,7 +890,7 @@ impl<'a> Parser<'a> {
|
||||
}
|
||||
|
||||
if !prev.map(is_separator).unwrap_or(false) {
|
||||
if self.stack.len() <= 1
|
||||
if self.branches.len() <= 1
|
||||
|| (prev != Some(',') && prev != Some('{'))
|
||||
{
|
||||
self.push_token(Token::ZeroOrMore)?;
|
||||
@@ -897,7 +903,7 @@ impl<'a> Parser<'a> {
|
||||
assert!(self.bump().is_none());
|
||||
true
|
||||
}
|
||||
Some(',') | Some('}') if self.stack.len() >= 2 => true,
|
||||
Some(',') | Some('}') if self.branches.len() >= 2 => true,
|
||||
Some(c) if is_separator(c) => {
|
||||
assert!(self.bump().map(is_separator).unwrap_or(false));
|
||||
false
|
||||
@@ -1225,6 +1231,10 @@ mod tests {
|
||||
syntaxerr!(err_unclosed4, "[!]", ErrorKind::UnclosedClass);
|
||||
syntaxerr!(err_range1, "[z-a]", ErrorKind::InvalidRange('z', 'a'));
|
||||
syntaxerr!(err_range2, "[z--]", ErrorKind::InvalidRange('z', '-'));
|
||||
syntaxerr!(err_alt1, "{a,b", ErrorKind::UnclosedAlternates);
|
||||
syntaxerr!(err_alt2, "{a,{b,c}", ErrorKind::UnclosedAlternates);
|
||||
syntaxerr!(err_alt3, "a,b}", ErrorKind::UnopenedAlternates);
|
||||
syntaxerr!(err_alt4, "{a,b}}", ErrorKind::UnopenedAlternates);
|
||||
|
||||
const CASEI: Options =
|
||||
Options { casei: Some(true), litsep: None, bsesc: None, ealtre: None };
|
||||
@@ -1245,6 +1255,8 @@ mod tests {
|
||||
ealtre: Some(true),
|
||||
};
|
||||
|
||||
toregex!(re_empty, "", "^$");
|
||||
|
||||
toregex!(re_casei, "a", "(?i)^a$", &CASEI);
|
||||
|
||||
toregex!(re_slash1, "?", r"^[^/]$", SLASHLIT);
|
||||
@@ -1284,7 +1296,9 @@ mod tests {
|
||||
toregex!(re32, "/a**", r"^/a.*.*$");
|
||||
toregex!(re33, "/**a", r"^/.*.*a$");
|
||||
toregex!(re34, "/a**b", r"^/a.*.*b$");
|
||||
toregex!(re35, "{a,b}", r"^(?:b|a)$");
|
||||
toregex!(re35, "{a,b}", r"^(?:a|b)$");
|
||||
toregex!(re36, "{a,{b,c}}", r"^(?:a|(?:b|c))$");
|
||||
toregex!(re37, "{{a,b},{c,d}}", r"^(?:(?:a|b)|(?:c|d))$");
|
||||
|
||||
matches!(match1, "a", "a");
|
||||
matches!(match2, "a*b", "a_b");
|
||||
@@ -1372,6 +1386,9 @@ mod tests {
|
||||
matches!(matchalt14, "foo{,.txt}", "foo.txt");
|
||||
nmatches!(matchalt15, "foo{,.txt}", "foo");
|
||||
matches!(matchalt16, "foo{,.txt}", "foo", EALTRE);
|
||||
matches!(matchalt17, "{a,b{c,d}}", "bc");
|
||||
matches!(matchalt18, "{a,b{c,d}}", "bd");
|
||||
matches!(matchalt19, "{a,b{c,d}}", "a");
|
||||
|
||||
matches!(matchslash1, "abc/def", "abc/def", SLASHLIT);
|
||||
#[cfg(unix)]
|
||||
|
@@ -182,8 +182,11 @@ pub enum ErrorKind {
|
||||
UnopenedAlternates,
|
||||
/// Occurs when a `{` is found without a matching `}`.
|
||||
UnclosedAlternates,
|
||||
/// Occurs when an alternating group is nested inside another alternating
|
||||
/// group, e.g., `{{a,b},{c,d}}`.
|
||||
/// **DEPRECATED**.
|
||||
///
|
||||
/// This error used to occur when an alternating group was nested inside
|
||||
/// another alternating group, e.g., `{{a,b},{c,d}}`. However, this is now
|
||||
/// supported and as such this error cannot occur.
|
||||
NestedAlternates,
|
||||
/// Occurs when an unescaped '\' is found at the end of a glob.
|
||||
DanglingEscape,
|
||||
|
Reference in New Issue
Block a user