Add curly brace alternates to glob format.

Closes #80.
This commit is contained in:
Andrew Gallant 2016-09-25 17:28:23 -04:00
parent 3d6a39be06
commit fd5ae2f795

View File

@ -43,12 +43,19 @@ use regex::bytes::RegexSet;
use pathutil::file_name; use pathutil::file_name;
lazy_static! {
static ref FILE_SEPARATORS: String = regex::quote(r"/\");
}
/// Represents an error that can occur when parsing a glob pattern. /// Represents an error that can occur when parsing a glob pattern.
#[derive(Clone, Debug, Eq, PartialEq)] #[derive(Clone, Debug, Eq, PartialEq)]
pub enum Error { pub enum Error {
InvalidRecursive, InvalidRecursive,
UnclosedClass, UnclosedClass,
InvalidRange(char, char), InvalidRange(char, char),
UnopenedAlternates,
UnclosedAlternates,
NestedAlternates,
} }
impl StdError for Error { impl StdError for Error {
@ -63,6 +70,17 @@ impl StdError for Error {
Error::InvalidRange(_, _) => { Error::InvalidRange(_, _) => {
"invalid character range" "invalid character range"
} }
Error::UnopenedAlternates => {
"unopened alternate group; missing '{' \
(maybe escape '}' with '[}]'?)"
}
Error::UnclosedAlternates => {
"unclosed alternate group; missing '}' \
(maybe escape '{' with '[{]'?)"
}
Error::NestedAlternates => {
"nested alternate groups are not allowed"
}
} }
} }
} }
@ -70,7 +88,11 @@ impl StdError for Error {
impl fmt::Display for Error { impl fmt::Display for Error {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match *self { match *self {
Error::InvalidRecursive | Error::UnclosedClass => { Error::InvalidRecursive
| Error::UnclosedClass
| Error::UnopenedAlternates
| Error::UnclosedAlternates
| Error::NestedAlternates => {
write!(f, "{}", self.description()) write!(f, "{}", self.description())
} }
Error::InvalidRange(s, e) => { Error::InvalidRange(s, e) => {
@ -322,7 +344,7 @@ impl SetBuilder {
/// ///
/// It cannot be used directly to match file paths, but it can be converted /// It cannot be used directly to match file paths, but it can be converted
/// to a regular expression string. /// to a regular expression string.
#[derive(Clone, Debug, Default)] #[derive(Clone, Debug, Default, Eq, PartialEq)]
pub struct Pattern { pub struct Pattern {
tokens: Vec<Token>, tokens: Vec<Token>,
} }
@ -350,6 +372,7 @@ enum Token {
negated: bool, negated: bool,
ranges: Vec<(char, char)>, ranges: Vec<(char, char)>,
}, },
Alternates(Vec<Pattern>),
} }
impl Pattern { impl Pattern {
@ -358,13 +381,19 @@ impl Pattern {
/// If the pattern is not a valid glob, then an error is returned. /// If the pattern is not a valid glob, then an error is returned.
pub fn new(pat: &str) -> Result<Pattern, Error> { pub fn new(pat: &str) -> Result<Pattern, Error> {
let mut p = Parser { let mut p = Parser {
p: Pattern::default(), stack: vec![Pattern::default()],
chars: pat.chars().peekable(), chars: pat.chars().peekable(),
prev: None, prev: None,
cur: None, cur: None,
}; };
try!(p.parse()); try!(p.parse());
Ok(p.p) if p.stack.is_empty() {
Err(Error::UnopenedAlternates)
} else if p.stack.len() > 1 {
Err(Error::UnclosedAlternates)
} else {
Ok(p.stack.pop().unwrap())
}
} }
/// Returns an extension if this pattern exclusively matches it. /// Returns an extension if this pattern exclusively matches it.
@ -506,7 +535,6 @@ impl Pattern {
/// regular expression and will represent the matching semantics of this /// regular expression and will represent the matching semantics of this
/// glob pattern and the options given. /// glob pattern and the options given.
pub fn to_regex_with(&self, options: &MatchOptions) -> String { pub fn to_regex_with(&self, options: &MatchOptions) -> String {
let seps = regex::quote(r"/\");
let mut re = String::new(); let mut re = String::new();
re.push_str("(?-u)"); re.push_str("(?-u)");
if options.case_insensitive { if options.case_insensitive {
@ -520,7 +548,20 @@ impl Pattern {
re.push('$'); re.push('$');
return re; return re;
} }
for tok in &self.tokens { self.tokens_to_regex(options, &self.tokens, &mut re);
re.push('$');
re
}
fn tokens_to_regex(
&self,
options: &MatchOptions,
tokens: &[Token],
re: &mut String,
) {
let seps = &*FILE_SEPARATORS;
for tok in tokens {
match *tok { match *tok {
Token::Literal(c) => { Token::Literal(c) => {
re.push_str(&regex::quote(&c.to_string())); re.push_str(&regex::quote(&c.to_string()));
@ -566,15 +607,22 @@ impl Pattern {
} }
re.push(']'); re.push(']');
} }
Token::Alternates(ref patterns) => {
let mut parts = vec![];
for pat in patterns {
let mut altre = String::new();
self.tokens_to_regex(options, &pat.tokens, &mut altre);
parts.push(altre);
}
re.push_str(&parts.join("|"));
}
} }
} }
re.push('$');
re
} }
} }
struct Parser<'a> { struct Parser<'a> {
p: Pattern, stack: Vec<Pattern>,
chars: iter::Peekable<str::Chars<'a>>, chars: iter::Peekable<str::Chars<'a>>,
prev: Option<char>, prev: Option<char>,
cur: Option<char>, cur: Option<char>,
@ -584,44 +632,101 @@ impl<'a> Parser<'a> {
fn parse(&mut self) -> Result<(), Error> { fn parse(&mut self) -> Result<(), Error> {
while let Some(c) = self.bump() { while let Some(c) = self.bump() {
match c { match c {
'?' => self.p.tokens.push(Token::Any), '?' => try!(self.push_token(Token::Any)),
'*' => try!(self.parse_star()), '*' => try!(self.parse_star()),
'[' => try!(self.parse_class()), '[' => try!(self.parse_class()),
c => self.p.tokens.push(Token::Literal(c)), '{' => try!(self.push_alternate()),
'}' => try!(self.pop_alternate()),
',' => try!(self.parse_comma()),
c => try!(self.push_token(Token::Literal(c))),
} }
} }
Ok(()) Ok(())
} }
fn push_alternate(&mut self) -> Result<(), Error> {
if self.stack.len() > 1 {
return Err(Error::NestedAlternates);
}
Ok(self.stack.push(Pattern::default()))
}
fn pop_alternate(&mut self) -> Result<(), Error> {
let mut alts = vec![];
while self.stack.len() >= 2 {
alts.push(self.stack.pop().unwrap());
}
self.push_token(Token::Alternates(alts))
}
fn push_token(&mut self, tok: Token) -> Result<(), Error> {
match self.stack.last_mut() {
None => Err(Error::UnopenedAlternates),
Some(ref mut pat) => Ok(pat.tokens.push(tok)),
}
}
fn pop_token(&mut self) -> Result<Token, Error> {
match self.stack.last_mut() {
None => Err(Error::UnopenedAlternates),
Some(ref mut pat) => Ok(pat.tokens.pop().unwrap()),
}
}
fn have_tokens(&self) -> Result<bool, Error> {
match self.stack.last() {
None => Err(Error::UnopenedAlternates),
Some(ref pat) => Ok(!pat.tokens.is_empty()),
}
}
fn parse_comma(&mut self) -> Result<(), Error> {
// If we aren't inside a group alternation, then don't
// treat commas specially. Otherwise, we need to start
// a new alternate.
if self.stack.len() <= 1 {
self.push_token(Token::Literal(','))
} else {
Ok(self.stack.push(Pattern::default()))
}
}
fn parse_star(&mut self) -> Result<(), Error> { fn parse_star(&mut self) -> Result<(), Error> {
let prev = self.prev; let prev = self.prev;
if self.chars.peek() != Some(&'*') { if self.chars.peek() != Some(&'*') {
self.p.tokens.push(Token::ZeroOrMore); try!(self.push_token(Token::ZeroOrMore));
return Ok(()); return Ok(());
} }
assert!(self.bump() == Some('*')); assert!(self.bump() == Some('*'));
if self.p.tokens.is_empty() { if !try!(self.have_tokens()) {
self.p.tokens.push(Token::RecursivePrefix); try!(self.push_token(Token::RecursivePrefix));
let next = self.bump(); let next = self.bump();
if !next.is_none() && next != Some('/') { if !next.is_none() && next != Some('/') {
return Err(Error::InvalidRecursive); return Err(Error::InvalidRecursive);
} }
return Ok(()); return Ok(());
} }
self.p.tokens.pop().unwrap(); try!(self.pop_token());
if prev != Some('/') { if prev != Some('/') {
if self.stack.len() <= 1
|| (prev != Some(',') && prev != Some('{')) {
return Err(Error::InvalidRecursive); return Err(Error::InvalidRecursive);
} }
let next = self.bump();
if next.is_none() {
self.p.tokens.push(Token::RecursiveSuffix);
return Ok(());
} }
if next != Some('/') { match self.chars.peek() {
return Err(Error::InvalidRecursive); None => {
assert!(self.bump().is_none());
self.push_token(Token::RecursiveSuffix)
}
Some(&',') | Some(&'}') if self.stack.len() >= 2 => {
self.push_token(Token::RecursiveSuffix)
}
Some(&'/') => {
assert!(self.bump() == Some('/'));
self.push_token(Token::RecursiveZeroOrMore)
}
_ => Err(Error::InvalidRecursive),
} }
self.p.tokens.push(Token::RecursiveZeroOrMore);
Ok(())
} }
fn parse_class(&mut self) -> Result<(), Error> { fn parse_class(&mut self) -> Result<(), Error> {
@ -691,11 +796,10 @@ impl<'a> Parser<'a> {
// it as a literal. // it as a literal.
ranges.push(('-', '-')); ranges.push(('-', '-'));
} }
self.p.tokens.push(Token::Class { self.push_token(Token::Class {
negated: negated, negated: negated,
ranges: ranges, ranges: ranges,
}); })
Ok(())
} }
fn bump(&mut self) -> Option<char> { fn bump(&mut self) -> Option<char> {
@ -995,6 +1099,20 @@ mod tests {
matches!(matchcasei3, "aBcDeFg", "ABCDEFG", CASEI); matches!(matchcasei3, "aBcDeFg", "ABCDEFG", CASEI);
matches!(matchcasei4, "aBcDeFg", "AbCdEfG", CASEI); matches!(matchcasei4, "aBcDeFg", "AbCdEfG", CASEI);
matches!(matchalt1, "a,b", "a,b");
matches!(matchalt2, ",", ",");
matches!(matchalt3, "{a,b}", "a");
matches!(matchalt4, "{a,b}", "b");
matches!(matchalt5, "{**/src/**,foo}", "abc/src/bar");
matches!(matchalt6, "{**/src/**,foo}", "foo");
matches!(matchalt7, "{[}],foo}", "}");
matches!(matchalt8, "{foo}", "foo");
matches!(matchalt9, "{}", "");
matches!(matchalt10, "{,}", "");
matches!(matchalt11, "{*.foo,*.bar,*.wat}", "test.foo");
matches!(matchalt12, "{*.foo,*.bar,*.wat}", "test.bar");
matches!(matchalt13, "{*.foo,*.bar,*.wat}", "test.wat");
matches!(matchslash1, "abc/def", "abc/def", SLASHLIT); matches!(matchslash1, "abc/def", "abc/def", SLASHLIT);
nmatches!(matchslash2, "abc?def", "abc/def", SLASHLIT); nmatches!(matchslash2, "abc?def", "abc/def", SLASHLIT);
nmatches!(matchslash2_win, "abc?def", "abc\\def", SLASHLIT); nmatches!(matchslash2_win, "abc?def", "abc\\def", SLASHLIT);