mirror of
https://github.com/BurntSushi/ripgrep.git
synced 2025-05-19 09:40:22 -07:00
parent
3d6a39be06
commit
fd5ae2f795
170
src/glob.rs
170
src/glob.rs
@ -43,12 +43,19 @@ use regex::bytes::RegexSet;
|
|||||||
|
|
||||||
use pathutil::file_name;
|
use pathutil::file_name;
|
||||||
|
|
||||||
|
lazy_static! {
|
||||||
|
static ref FILE_SEPARATORS: String = regex::quote(r"/\");
|
||||||
|
}
|
||||||
|
|
||||||
/// Represents an error that can occur when parsing a glob pattern.
|
/// Represents an error that can occur when parsing a glob pattern.
|
||||||
#[derive(Clone, Debug, Eq, PartialEq)]
|
#[derive(Clone, Debug, Eq, PartialEq)]
|
||||||
pub enum Error {
|
pub enum Error {
|
||||||
InvalidRecursive,
|
InvalidRecursive,
|
||||||
UnclosedClass,
|
UnclosedClass,
|
||||||
InvalidRange(char, char),
|
InvalidRange(char, char),
|
||||||
|
UnopenedAlternates,
|
||||||
|
UnclosedAlternates,
|
||||||
|
NestedAlternates,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl StdError for Error {
|
impl StdError for Error {
|
||||||
@ -63,6 +70,17 @@ impl StdError for Error {
|
|||||||
Error::InvalidRange(_, _) => {
|
Error::InvalidRange(_, _) => {
|
||||||
"invalid character range"
|
"invalid character range"
|
||||||
}
|
}
|
||||||
|
Error::UnopenedAlternates => {
|
||||||
|
"unopened alternate group; missing '{' \
|
||||||
|
(maybe escape '}' with '[}]'?)"
|
||||||
|
}
|
||||||
|
Error::UnclosedAlternates => {
|
||||||
|
"unclosed alternate group; missing '}' \
|
||||||
|
(maybe escape '{' with '[{]'?)"
|
||||||
|
}
|
||||||
|
Error::NestedAlternates => {
|
||||||
|
"nested alternate groups are not allowed"
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -70,7 +88,11 @@ impl StdError for Error {
|
|||||||
impl fmt::Display for Error {
|
impl fmt::Display for Error {
|
||||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||||
match *self {
|
match *self {
|
||||||
Error::InvalidRecursive | Error::UnclosedClass => {
|
Error::InvalidRecursive
|
||||||
|
| Error::UnclosedClass
|
||||||
|
| Error::UnopenedAlternates
|
||||||
|
| Error::UnclosedAlternates
|
||||||
|
| Error::NestedAlternates => {
|
||||||
write!(f, "{}", self.description())
|
write!(f, "{}", self.description())
|
||||||
}
|
}
|
||||||
Error::InvalidRange(s, e) => {
|
Error::InvalidRange(s, e) => {
|
||||||
@ -322,7 +344,7 @@ impl SetBuilder {
|
|||||||
///
|
///
|
||||||
/// It cannot be used directly to match file paths, but it can be converted
|
/// It cannot be used directly to match file paths, but it can be converted
|
||||||
/// to a regular expression string.
|
/// to a regular expression string.
|
||||||
#[derive(Clone, Debug, Default)]
|
#[derive(Clone, Debug, Default, Eq, PartialEq)]
|
||||||
pub struct Pattern {
|
pub struct Pattern {
|
||||||
tokens: Vec<Token>,
|
tokens: Vec<Token>,
|
||||||
}
|
}
|
||||||
@ -350,6 +372,7 @@ enum Token {
|
|||||||
negated: bool,
|
negated: bool,
|
||||||
ranges: Vec<(char, char)>,
|
ranges: Vec<(char, char)>,
|
||||||
},
|
},
|
||||||
|
Alternates(Vec<Pattern>),
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Pattern {
|
impl Pattern {
|
||||||
@ -358,13 +381,19 @@ impl Pattern {
|
|||||||
/// If the pattern is not a valid glob, then an error is returned.
|
/// If the pattern is not a valid glob, then an error is returned.
|
||||||
pub fn new(pat: &str) -> Result<Pattern, Error> {
|
pub fn new(pat: &str) -> Result<Pattern, Error> {
|
||||||
let mut p = Parser {
|
let mut p = Parser {
|
||||||
p: Pattern::default(),
|
stack: vec![Pattern::default()],
|
||||||
chars: pat.chars().peekable(),
|
chars: pat.chars().peekable(),
|
||||||
prev: None,
|
prev: None,
|
||||||
cur: None,
|
cur: None,
|
||||||
};
|
};
|
||||||
try!(p.parse());
|
try!(p.parse());
|
||||||
Ok(p.p)
|
if p.stack.is_empty() {
|
||||||
|
Err(Error::UnopenedAlternates)
|
||||||
|
} else if p.stack.len() > 1 {
|
||||||
|
Err(Error::UnclosedAlternates)
|
||||||
|
} else {
|
||||||
|
Ok(p.stack.pop().unwrap())
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns an extension if this pattern exclusively matches it.
|
/// Returns an extension if this pattern exclusively matches it.
|
||||||
@ -506,7 +535,6 @@ impl Pattern {
|
|||||||
/// regular expression and will represent the matching semantics of this
|
/// regular expression and will represent the matching semantics of this
|
||||||
/// glob pattern and the options given.
|
/// glob pattern and the options given.
|
||||||
pub fn to_regex_with(&self, options: &MatchOptions) -> String {
|
pub fn to_regex_with(&self, options: &MatchOptions) -> String {
|
||||||
let seps = regex::quote(r"/\");
|
|
||||||
let mut re = String::new();
|
let mut re = String::new();
|
||||||
re.push_str("(?-u)");
|
re.push_str("(?-u)");
|
||||||
if options.case_insensitive {
|
if options.case_insensitive {
|
||||||
@ -520,7 +548,20 @@ impl Pattern {
|
|||||||
re.push('$');
|
re.push('$');
|
||||||
return re;
|
return re;
|
||||||
}
|
}
|
||||||
for tok in &self.tokens {
|
self.tokens_to_regex(options, &self.tokens, &mut re);
|
||||||
|
re.push('$');
|
||||||
|
re
|
||||||
|
}
|
||||||
|
|
||||||
|
fn tokens_to_regex(
|
||||||
|
&self,
|
||||||
|
options: &MatchOptions,
|
||||||
|
tokens: &[Token],
|
||||||
|
re: &mut String,
|
||||||
|
) {
|
||||||
|
let seps = &*FILE_SEPARATORS;
|
||||||
|
|
||||||
|
for tok in tokens {
|
||||||
match *tok {
|
match *tok {
|
||||||
Token::Literal(c) => {
|
Token::Literal(c) => {
|
||||||
re.push_str(®ex::quote(&c.to_string()));
|
re.push_str(®ex::quote(&c.to_string()));
|
||||||
@ -566,15 +607,22 @@ impl Pattern {
|
|||||||
}
|
}
|
||||||
re.push(']');
|
re.push(']');
|
||||||
}
|
}
|
||||||
|
Token::Alternates(ref patterns) => {
|
||||||
|
let mut parts = vec![];
|
||||||
|
for pat in patterns {
|
||||||
|
let mut altre = String::new();
|
||||||
|
self.tokens_to_regex(options, &pat.tokens, &mut altre);
|
||||||
|
parts.push(altre);
|
||||||
|
}
|
||||||
|
re.push_str(&parts.join("|"));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
re.push('$');
|
|
||||||
re
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
struct Parser<'a> {
|
struct Parser<'a> {
|
||||||
p: Pattern,
|
stack: Vec<Pattern>,
|
||||||
chars: iter::Peekable<str::Chars<'a>>,
|
chars: iter::Peekable<str::Chars<'a>>,
|
||||||
prev: Option<char>,
|
prev: Option<char>,
|
||||||
cur: Option<char>,
|
cur: Option<char>,
|
||||||
@ -584,44 +632,101 @@ impl<'a> Parser<'a> {
|
|||||||
fn parse(&mut self) -> Result<(), Error> {
|
fn parse(&mut self) -> Result<(), Error> {
|
||||||
while let Some(c) = self.bump() {
|
while let Some(c) = self.bump() {
|
||||||
match c {
|
match c {
|
||||||
'?' => self.p.tokens.push(Token::Any),
|
'?' => try!(self.push_token(Token::Any)),
|
||||||
'*' => try!(self.parse_star()),
|
'*' => try!(self.parse_star()),
|
||||||
'[' => try!(self.parse_class()),
|
'[' => try!(self.parse_class()),
|
||||||
c => self.p.tokens.push(Token::Literal(c)),
|
'{' => try!(self.push_alternate()),
|
||||||
|
'}' => try!(self.pop_alternate()),
|
||||||
|
',' => try!(self.parse_comma()),
|
||||||
|
c => try!(self.push_token(Token::Literal(c))),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn push_alternate(&mut self) -> Result<(), Error> {
|
||||||
|
if self.stack.len() > 1 {
|
||||||
|
return Err(Error::NestedAlternates);
|
||||||
|
}
|
||||||
|
Ok(self.stack.push(Pattern::default()))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn pop_alternate(&mut self) -> Result<(), Error> {
|
||||||
|
let mut alts = vec![];
|
||||||
|
while self.stack.len() >= 2 {
|
||||||
|
alts.push(self.stack.pop().unwrap());
|
||||||
|
}
|
||||||
|
self.push_token(Token::Alternates(alts))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn push_token(&mut self, tok: Token) -> Result<(), Error> {
|
||||||
|
match self.stack.last_mut() {
|
||||||
|
None => Err(Error::UnopenedAlternates),
|
||||||
|
Some(ref mut pat) => Ok(pat.tokens.push(tok)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn pop_token(&mut self) -> Result<Token, Error> {
|
||||||
|
match self.stack.last_mut() {
|
||||||
|
None => Err(Error::UnopenedAlternates),
|
||||||
|
Some(ref mut pat) => Ok(pat.tokens.pop().unwrap()),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn have_tokens(&self) -> Result<bool, Error> {
|
||||||
|
match self.stack.last() {
|
||||||
|
None => Err(Error::UnopenedAlternates),
|
||||||
|
Some(ref pat) => Ok(!pat.tokens.is_empty()),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_comma(&mut self) -> Result<(), Error> {
|
||||||
|
// If we aren't inside a group alternation, then don't
|
||||||
|
// treat commas specially. Otherwise, we need to start
|
||||||
|
// a new alternate.
|
||||||
|
if self.stack.len() <= 1 {
|
||||||
|
self.push_token(Token::Literal(','))
|
||||||
|
} else {
|
||||||
|
Ok(self.stack.push(Pattern::default()))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
fn parse_star(&mut self) -> Result<(), Error> {
|
fn parse_star(&mut self) -> Result<(), Error> {
|
||||||
let prev = self.prev;
|
let prev = self.prev;
|
||||||
if self.chars.peek() != Some(&'*') {
|
if self.chars.peek() != Some(&'*') {
|
||||||
self.p.tokens.push(Token::ZeroOrMore);
|
try!(self.push_token(Token::ZeroOrMore));
|
||||||
return Ok(());
|
return Ok(());
|
||||||
}
|
}
|
||||||
assert!(self.bump() == Some('*'));
|
assert!(self.bump() == Some('*'));
|
||||||
if self.p.tokens.is_empty() {
|
if !try!(self.have_tokens()) {
|
||||||
self.p.tokens.push(Token::RecursivePrefix);
|
try!(self.push_token(Token::RecursivePrefix));
|
||||||
let next = self.bump();
|
let next = self.bump();
|
||||||
if !next.is_none() && next != Some('/') {
|
if !next.is_none() && next != Some('/') {
|
||||||
return Err(Error::InvalidRecursive);
|
return Err(Error::InvalidRecursive);
|
||||||
}
|
}
|
||||||
return Ok(());
|
return Ok(());
|
||||||
}
|
}
|
||||||
self.p.tokens.pop().unwrap();
|
try!(self.pop_token());
|
||||||
if prev != Some('/') {
|
if prev != Some('/') {
|
||||||
|
if self.stack.len() <= 1
|
||||||
|
|| (prev != Some(',') && prev != Some('{')) {
|
||||||
return Err(Error::InvalidRecursive);
|
return Err(Error::InvalidRecursive);
|
||||||
}
|
}
|
||||||
let next = self.bump();
|
|
||||||
if next.is_none() {
|
|
||||||
self.p.tokens.push(Token::RecursiveSuffix);
|
|
||||||
return Ok(());
|
|
||||||
}
|
}
|
||||||
if next != Some('/') {
|
match self.chars.peek() {
|
||||||
return Err(Error::InvalidRecursive);
|
None => {
|
||||||
|
assert!(self.bump().is_none());
|
||||||
|
self.push_token(Token::RecursiveSuffix)
|
||||||
|
}
|
||||||
|
Some(&',') | Some(&'}') if self.stack.len() >= 2 => {
|
||||||
|
self.push_token(Token::RecursiveSuffix)
|
||||||
|
}
|
||||||
|
Some(&'/') => {
|
||||||
|
assert!(self.bump() == Some('/'));
|
||||||
|
self.push_token(Token::RecursiveZeroOrMore)
|
||||||
|
}
|
||||||
|
_ => Err(Error::InvalidRecursive),
|
||||||
}
|
}
|
||||||
self.p.tokens.push(Token::RecursiveZeroOrMore);
|
|
||||||
Ok(())
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_class(&mut self) -> Result<(), Error> {
|
fn parse_class(&mut self) -> Result<(), Error> {
|
||||||
@ -691,11 +796,10 @@ impl<'a> Parser<'a> {
|
|||||||
// it as a literal.
|
// it as a literal.
|
||||||
ranges.push(('-', '-'));
|
ranges.push(('-', '-'));
|
||||||
}
|
}
|
||||||
self.p.tokens.push(Token::Class {
|
self.push_token(Token::Class {
|
||||||
negated: negated,
|
negated: negated,
|
||||||
ranges: ranges,
|
ranges: ranges,
|
||||||
});
|
})
|
||||||
Ok(())
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn bump(&mut self) -> Option<char> {
|
fn bump(&mut self) -> Option<char> {
|
||||||
@ -995,6 +1099,20 @@ mod tests {
|
|||||||
matches!(matchcasei3, "aBcDeFg", "ABCDEFG", CASEI);
|
matches!(matchcasei3, "aBcDeFg", "ABCDEFG", CASEI);
|
||||||
matches!(matchcasei4, "aBcDeFg", "AbCdEfG", CASEI);
|
matches!(matchcasei4, "aBcDeFg", "AbCdEfG", CASEI);
|
||||||
|
|
||||||
|
matches!(matchalt1, "a,b", "a,b");
|
||||||
|
matches!(matchalt2, ",", ",");
|
||||||
|
matches!(matchalt3, "{a,b}", "a");
|
||||||
|
matches!(matchalt4, "{a,b}", "b");
|
||||||
|
matches!(matchalt5, "{**/src/**,foo}", "abc/src/bar");
|
||||||
|
matches!(matchalt6, "{**/src/**,foo}", "foo");
|
||||||
|
matches!(matchalt7, "{[}],foo}", "}");
|
||||||
|
matches!(matchalt8, "{foo}", "foo");
|
||||||
|
matches!(matchalt9, "{}", "");
|
||||||
|
matches!(matchalt10, "{,}", "");
|
||||||
|
matches!(matchalt11, "{*.foo,*.bar,*.wat}", "test.foo");
|
||||||
|
matches!(matchalt12, "{*.foo,*.bar,*.wat}", "test.bar");
|
||||||
|
matches!(matchalt13, "{*.foo,*.bar,*.wat}", "test.wat");
|
||||||
|
|
||||||
matches!(matchslash1, "abc/def", "abc/def", SLASHLIT);
|
matches!(matchslash1, "abc/def", "abc/def", SLASHLIT);
|
||||||
nmatches!(matchslash2, "abc?def", "abc/def", SLASHLIT);
|
nmatches!(matchslash2, "abc?def", "abc/def", SLASHLIT);
|
||||||
nmatches!(matchslash2_win, "abc?def", "abc\\def", SLASHLIT);
|
nmatches!(matchslash2_win, "abc?def", "abc\\def", SLASHLIT);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user