Add better error messages for invalid globs.

This threads the original glob given by end users through all of the
glob parsing errors. This was slightly trickier than it might appear
because the gitignore implementation actually modifies the glob before
compiling it. So in order to get better glob error messages everywhere,
we need to track the original glob both in the glob parser and in the
higher-level abstractions in the `ignore` crate.

Fixes #444
This commit is contained in:
Andrew Gallant 2017-04-12 18:12:34 -04:00
parent 7ad23e5565
commit c50b8b4125
5 changed files with 151 additions and 58 deletions

View File

@ -9,7 +9,7 @@ use std::str;
use regex; use regex;
use regex::bytes::Regex; use regex::bytes::Regex;
use {Candidate, Error, new_regex}; use {Candidate, Error, ErrorKind, new_regex};
/// Describes a matching strategy for a particular pattern. /// Describes a matching strategy for a particular pattern.
/// ///
@ -544,6 +544,7 @@ impl<'a> GlobBuilder<'a> {
/// Parses and builds the pattern. /// Parses and builds the pattern.
pub fn build(&self) -> Result<Glob, Error> { pub fn build(&self) -> Result<Glob, Error> {
let mut p = Parser { let mut p = Parser {
glob: &self.glob,
stack: vec![Tokens::default()], stack: vec![Tokens::default()],
chars: self.glob.chars().peekable(), chars: self.glob.chars().peekable(),
prev: None, prev: None,
@ -551,9 +552,15 @@ impl<'a> GlobBuilder<'a> {
}; };
try!(p.parse()); try!(p.parse());
if p.stack.is_empty() { if p.stack.is_empty() {
Err(Error::UnopenedAlternates) Err(Error {
glob: Some(self.glob.to_string()),
kind: ErrorKind::UnopenedAlternates,
})
} else if p.stack.len() > 1 { } else if p.stack.len() > 1 {
Err(Error::UnclosedAlternates) Err(Error {
glob: Some(self.glob.to_string()),
kind: ErrorKind::UnclosedAlternates,
})
} else { } else {
let tokens = p.stack.pop().unwrap(); let tokens = p.stack.pop().unwrap();
Ok(Glob { Ok(Glob {
@ -698,6 +705,7 @@ fn bytes_to_escaped_literal(bs: &[u8]) -> String {
} }
struct Parser<'a> { struct Parser<'a> {
glob: &'a str,
stack: Vec<Tokens>, stack: Vec<Tokens>,
chars: iter::Peekable<str::Chars<'a>>, chars: iter::Peekable<str::Chars<'a>>,
prev: Option<char>, prev: Option<char>,
@ -705,6 +713,10 @@ struct Parser<'a> {
} }
impl<'a> Parser<'a> { impl<'a> Parser<'a> {
fn error(&self, kind: ErrorKind) -> Error {
Error { glob: Some(self.glob.to_string()), kind: kind }
}
fn parse(&mut self) -> Result<(), Error> { fn parse(&mut self) -> Result<(), Error> {
while let Some(c) = self.bump() { while let Some(c) = self.bump() {
match c { match c {
@ -729,7 +741,7 @@ impl<'a> Parser<'a> {
fn push_alternate(&mut self) -> Result<(), Error> { fn push_alternate(&mut self) -> Result<(), Error> {
if self.stack.len() > 1 { if self.stack.len() > 1 {
return Err(Error::NestedAlternates); return Err(self.error(ErrorKind::NestedAlternates));
} }
Ok(self.stack.push(Tokens::default())) Ok(self.stack.push(Tokens::default()))
} }
@ -743,22 +755,22 @@ impl<'a> Parser<'a> {
} }
fn push_token(&mut self, tok: Token) -> Result<(), Error> { fn push_token(&mut self, tok: Token) -> Result<(), Error> {
match self.stack.last_mut() { if let Some(ref mut pat) = self.stack.last_mut() {
None => Err(Error::UnopenedAlternates), return Ok(pat.push(tok));
Some(ref mut pat) => Ok(pat.push(tok)),
} }
Err(self.error(ErrorKind::UnopenedAlternates))
} }
fn pop_token(&mut self) -> Result<Token, Error> { fn pop_token(&mut self) -> Result<Token, Error> {
match self.stack.last_mut() { if let Some(ref mut pat) = self.stack.last_mut() {
None => Err(Error::UnopenedAlternates), return Ok(pat.pop().unwrap());
Some(ref mut pat) => Ok(pat.pop().unwrap()),
} }
Err(self.error(ErrorKind::UnopenedAlternates))
} }
fn have_tokens(&self) -> Result<bool, Error> { fn have_tokens(&self) -> Result<bool, Error> {
match self.stack.last() { match self.stack.last() {
None => Err(Error::UnopenedAlternates), None => Err(self.error(ErrorKind::UnopenedAlternates)),
Some(ref pat) => Ok(!pat.is_empty()), Some(ref pat) => Ok(!pat.is_empty()),
} }
} }
@ -785,7 +797,7 @@ impl<'a> Parser<'a> {
try!(self.push_token(Token::RecursivePrefix)); try!(self.push_token(Token::RecursivePrefix));
let next = self.bump(); let next = self.bump();
if !next.map(is_separator).unwrap_or(true) { if !next.map(is_separator).unwrap_or(true) {
return Err(Error::InvalidRecursive); return Err(self.error(ErrorKind::InvalidRecursive));
} }
return Ok(()); return Ok(());
} }
@ -793,7 +805,7 @@ impl<'a> Parser<'a> {
if !prev.map(is_separator).unwrap_or(false) { if !prev.map(is_separator).unwrap_or(false) {
if self.stack.len() <= 1 if self.stack.len() <= 1
|| (prev != Some(',') && prev != Some('{')) { || (prev != Some(',') && prev != Some('{')) {
return Err(Error::InvalidRecursive); return Err(self.error(ErrorKind::InvalidRecursive));
} }
} }
match self.chars.peek() { match self.chars.peek() {
@ -808,18 +820,22 @@ impl<'a> Parser<'a> {
assert!(self.bump().map(is_separator).unwrap_or(false)); assert!(self.bump().map(is_separator).unwrap_or(false));
self.push_token(Token::RecursiveZeroOrMore) self.push_token(Token::RecursiveZeroOrMore)
} }
_ => Err(Error::InvalidRecursive), _ => Err(self.error(ErrorKind::InvalidRecursive)),
} }
} }
fn parse_class(&mut self) -> Result<(), Error> { fn parse_class(&mut self) -> Result<(), Error> {
fn add_to_last_range( fn add_to_last_range(
glob: &str,
r: &mut (char, char), r: &mut (char, char),
add: char, add: char,
) -> Result<(), Error> { ) -> Result<(), Error> {
r.1 = add; r.1 = add;
if r.1 < r.0 { if r.1 < r.0 {
Err(Error::InvalidRange(r.0, r.1)) Err(Error {
glob: Some(glob.to_string()),
kind: ErrorKind::InvalidRange(r.0, r.1),
})
} else { } else {
Ok(()) Ok(())
} }
@ -837,7 +853,7 @@ impl<'a> Parser<'a> {
Some(c) => c, Some(c) => c,
// The only way to successfully break this loop is to observe // The only way to successfully break this loop is to observe
// a ']'. // a ']'.
None => return Err(Error::UnclosedClass), None => return Err(self.error(ErrorKind::UnclosedClass)),
}; };
match c { match c {
']' => { ']' => {
@ -854,7 +870,7 @@ impl<'a> Parser<'a> {
// invariant: in_range is only set when there is // invariant: in_range is only set when there is
// already at least one character seen. // already at least one character seen.
let r = ranges.last_mut().unwrap(); let r = ranges.last_mut().unwrap();
try!(add_to_last_range(r, '-')); try!(add_to_last_range(&self.glob, r, '-'));
in_range = false; in_range = false;
} else { } else {
assert!(!ranges.is_empty()); assert!(!ranges.is_empty());
@ -865,7 +881,8 @@ impl<'a> Parser<'a> {
if in_range { if in_range {
// invariant: in_range is only set when there is // invariant: in_range is only set when there is
// already at least one character seen. // already at least one character seen.
try!(add_to_last_range(ranges.last_mut().unwrap(), c)); try!(add_to_last_range(
&self.glob, ranges.last_mut().unwrap(), c));
} else { } else {
ranges.push((c, c)); ranges.push((c, c));
} }
@ -909,7 +926,7 @@ fn ends_with(needle: &[u8], haystack: &[u8]) -> bool {
mod tests { mod tests {
use std::ffi::{OsStr, OsString}; use std::ffi::{OsStr, OsString};
use {GlobSetBuilder, Error}; use {GlobSetBuilder, ErrorKind};
use super::{Glob, GlobBuilder, Token}; use super::{Glob, GlobBuilder, Token};
use super::Token::*; use super::Token::*;
@ -934,7 +951,7 @@ mod tests {
#[test] #[test]
fn $name() { fn $name() {
let err = Glob::new($pat).unwrap_err(); let err = Glob::new($pat).unwrap_err();
assert_eq!($err, err); assert_eq!(&$err, err.kind());
} }
} }
} }
@ -1057,19 +1074,19 @@ mod tests {
syntax!(cls18, "[!0-9a-z]", vec![rclassn(&[('0', '9'), ('a', 'z')])]); syntax!(cls18, "[!0-9a-z]", vec![rclassn(&[('0', '9'), ('a', 'z')])]);
syntax!(cls19, "[!a-z0-9]", vec![rclassn(&[('a', 'z'), ('0', '9')])]); syntax!(cls19, "[!a-z0-9]", vec![rclassn(&[('a', 'z'), ('0', '9')])]);
syntaxerr!(err_rseq1, "a**", Error::InvalidRecursive); syntaxerr!(err_rseq1, "a**", ErrorKind::InvalidRecursive);
syntaxerr!(err_rseq2, "**a", Error::InvalidRecursive); syntaxerr!(err_rseq2, "**a", ErrorKind::InvalidRecursive);
syntaxerr!(err_rseq3, "a**b", Error::InvalidRecursive); syntaxerr!(err_rseq3, "a**b", ErrorKind::InvalidRecursive);
syntaxerr!(err_rseq4, "***", Error::InvalidRecursive); syntaxerr!(err_rseq4, "***", ErrorKind::InvalidRecursive);
syntaxerr!(err_rseq5, "/a**", Error::InvalidRecursive); syntaxerr!(err_rseq5, "/a**", ErrorKind::InvalidRecursive);
syntaxerr!(err_rseq6, "/**a", Error::InvalidRecursive); syntaxerr!(err_rseq6, "/**a", ErrorKind::InvalidRecursive);
syntaxerr!(err_rseq7, "/a**b", Error::InvalidRecursive); syntaxerr!(err_rseq7, "/a**b", ErrorKind::InvalidRecursive);
syntaxerr!(err_unclosed1, "[", Error::UnclosedClass); syntaxerr!(err_unclosed1, "[", ErrorKind::UnclosedClass);
syntaxerr!(err_unclosed2, "[]", Error::UnclosedClass); syntaxerr!(err_unclosed2, "[]", ErrorKind::UnclosedClass);
syntaxerr!(err_unclosed3, "[!", Error::UnclosedClass); syntaxerr!(err_unclosed3, "[!", ErrorKind::UnclosedClass);
syntaxerr!(err_unclosed4, "[!]", Error::UnclosedClass); syntaxerr!(err_unclosed4, "[!]", ErrorKind::UnclosedClass);
syntaxerr!(err_range1, "[z-a]", Error::InvalidRange('z', 'a')); syntaxerr!(err_range1, "[z-a]", ErrorKind::InvalidRange('z', 'a'));
syntaxerr!(err_range2, "[z--]", Error::InvalidRange('z', '-')); syntaxerr!(err_range2, "[z--]", ErrorKind::InvalidRange('z', '-'));
const CASEI: Options = Options { const CASEI: Options = Options {
casei: true, casei: true,

View File

@ -128,7 +128,16 @@ mod pathutil;
/// Represents an error that can occur when parsing a glob pattern. /// Represents an error that can occur when parsing a glob pattern.
#[derive(Clone, Debug, Eq, PartialEq)] #[derive(Clone, Debug, Eq, PartialEq)]
pub enum Error { pub struct Error {
/// The original glob provided by the caller.
glob: Option<String>,
/// The kind of error.
kind: ErrorKind,
}
/// The kind of error that can occur when parsing a glob pattern.
#[derive(Clone, Debug, Eq, PartialEq)]
pub enum ErrorKind {
/// Occurs when a use of `**` is invalid. Namely, `**` can only appear /// Occurs when a use of `**` is invalid. Namely, `**` can only appear
/// adjacent to a path separator, or the beginning/end of a glob. /// adjacent to a path separator, or the beginning/end of a glob.
InvalidRecursive, InvalidRecursive,
@ -150,45 +159,74 @@ pub enum Error {
} }
impl StdError for Error { impl StdError for Error {
fn description(&self) -> &str {
self.kind.description()
}
}
impl Error {
/// Return the glob that caused this error, if one exists.
pub fn glob(&self) -> Option<&str> {
self.glob.as_ref().map(|s| &**s)
}
/// Return the kind of this error.
pub fn kind(&self) -> &ErrorKind {
&self.kind
}
}
impl ErrorKind {
fn description(&self) -> &str { fn description(&self) -> &str {
match *self { match *self {
Error::InvalidRecursive => { ErrorKind::InvalidRecursive => {
"invalid use of **; must be one path component" "invalid use of **; must be one path component"
} }
Error::UnclosedClass => { ErrorKind::UnclosedClass => {
"unclosed character class; missing ']'" "unclosed character class; missing ']'"
} }
Error::InvalidRange(_, _) => { ErrorKind::InvalidRange(_, _) => {
"invalid character range" "invalid character range"
} }
Error::UnopenedAlternates => { ErrorKind::UnopenedAlternates => {
"unopened alternate group; missing '{' \ "unopened alternate group; missing '{' \
(maybe escape '}' with '[}]'?)" (maybe escape '}' with '[}]'?)"
} }
Error::UnclosedAlternates => { ErrorKind::UnclosedAlternates => {
"unclosed alternate group; missing '}' \ "unclosed alternate group; missing '}' \
(maybe escape '{' with '[{]'?)" (maybe escape '{' with '[{]'?)"
} }
Error::NestedAlternates => { ErrorKind::NestedAlternates => {
"nested alternate groups are not allowed" "nested alternate groups are not allowed"
} }
Error::Regex(ref err) => err, ErrorKind::Regex(ref err) => err,
} }
} }
} }
impl fmt::Display for Error { impl fmt::Display for Error {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self.glob {
None => self.kind.fmt(f),
Some(ref glob) => {
write!(f, "error parsing glob '{}': {}", glob, self.kind)
}
}
}
}
impl fmt::Display for ErrorKind {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match *self { match *self {
Error::InvalidRecursive ErrorKind::InvalidRecursive
| Error::UnclosedClass | ErrorKind::UnclosedClass
| Error::UnopenedAlternates | ErrorKind::UnopenedAlternates
| Error::UnclosedAlternates | ErrorKind::UnclosedAlternates
| Error::NestedAlternates | ErrorKind::NestedAlternates
| Error::Regex(_) => { | ErrorKind::Regex(_) => {
write!(f, "{}", self.description()) write!(f, "{}", self.description())
} }
Error::InvalidRange(s, e) => { ErrorKind::InvalidRange(s, e) => {
write!(f, "invalid range; '{}' > '{}'", s, e) write!(f, "invalid range; '{}' > '{}'", s, e)
} }
} }
@ -201,12 +239,22 @@ fn new_regex(pat: &str) -> Result<Regex, Error> {
.size_limit(10 * (1 << 20)) .size_limit(10 * (1 << 20))
.dfa_size_limit(10 * (1 << 20)) .dfa_size_limit(10 * (1 << 20))
.build() .build()
.map_err(|err| Error::Regex(err.to_string())) .map_err(|err| {
Error {
glob: Some(pat.to_string()),
kind: ErrorKind::Regex(err.to_string()),
}
})
} }
fn new_regex_set<I, S>(pats: I) -> Result<RegexSet, Error> fn new_regex_set<I, S>(pats: I) -> Result<RegexSet, Error>
where S: AsRef<str>, I: IntoIterator<Item=S> { where S: AsRef<str>, I: IntoIterator<Item=S> {
RegexSet::new(pats).map_err(|err| Error::Regex(err.to_string())) RegexSet::new(pats).map_err(|err| {
Error {
glob: None,
kind: ErrorKind::Regex(err.to_string()),
}
})
} }
type Fnv = hash::BuildHasherDefault<fnv::FnvHasher>; type Fnv = hash::BuildHasherDefault<fnv::FnvHasher>;

View File

@ -279,7 +279,12 @@ impl GitignoreBuilder {
let nignore = self.globs.iter().filter(|g| !g.is_whitelist()).count(); let nignore = self.globs.iter().filter(|g| !g.is_whitelist()).count();
let nwhite = self.globs.iter().filter(|g| g.is_whitelist()).count(); let nwhite = self.globs.iter().filter(|g| g.is_whitelist()).count();
let set = try!( let set = try!(
self.builder.build().map_err(|err| Error::Glob(err.to_string()))); self.builder.build().map_err(|err| {
Error::Glob {
glob: None,
err: err.to_string(),
}
}));
Ok(Gitignore { Ok(Gitignore {
set: set, set: set,
root: self.root.clone(), root: self.root.clone(),
@ -420,7 +425,12 @@ impl GitignoreBuilder {
GlobBuilder::new(&glob.actual) GlobBuilder::new(&glob.actual)
.literal_separator(literal_separator) .literal_separator(literal_separator)
.build() .build()
.map_err(|err| Error::Glob(err.to_string()))); .map_err(|err| {
Error::Glob {
glob: Some(glob.original.clone()),
err: err.kind().to_string(),
}
}));
self.builder.add(parsed); self.builder.add(parsed);
self.globs.push(glob); self.globs.push(glob);
Ok(self) Ok(self)

View File

@ -112,7 +112,17 @@ pub enum Error {
/// An error that occurs when doing I/O, such as reading an ignore file. /// An error that occurs when doing I/O, such as reading an ignore file.
Io(io::Error), Io(io::Error),
/// An error that occurs when trying to parse a glob. /// An error that occurs when trying to parse a glob.
Glob(String), Glob {
/// The original glob that caused this error. This glob, when
/// available, always corresponds to the glob provided by an end user.
/// e.g., It is the glob as writtein in a `.gitignore` file.
///
/// (This glob may be distinct from the glob that is actually
/// compiled, after accounting for `gitignore` semantics.)
glob: Option<String>,
/// The underlying glob error as a string.
err: String,
},
/// A type selection for a file type that is not defined. /// A type selection for a file type that is not defined.
UnrecognizedFileType(String), UnrecognizedFileType(String),
/// A user specified file type definition could not be parsed. /// A user specified file type definition could not be parsed.
@ -144,7 +154,7 @@ impl Error {
Error::WithDepth { ref err, .. } => err.is_io(), Error::WithDepth { ref err, .. } => err.is_io(),
Error::Loop { .. } => false, Error::Loop { .. } => false,
Error::Io(_) => true, Error::Io(_) => true,
Error::Glob(_) => false, Error::Glob { .. } => false,
Error::UnrecognizedFileType(_) => false, Error::UnrecognizedFileType(_) => false,
Error::InvalidDefinition => false, Error::InvalidDefinition => false,
} }
@ -199,7 +209,7 @@ impl error::Error for Error {
Error::WithDepth { ref err, .. } => err.description(), Error::WithDepth { ref err, .. } => err.description(),
Error::Loop { .. } => "file system loop found", Error::Loop { .. } => "file system loop found",
Error::Io(ref err) => err.description(), Error::Io(ref err) => err.description(),
Error::Glob(ref msg) => msg, Error::Glob { ref err, .. } => err,
Error::UnrecognizedFileType(_) => "unrecognized file type", Error::UnrecognizedFileType(_) => "unrecognized file type",
Error::InvalidDefinition => "invalid definition", Error::InvalidDefinition => "invalid definition",
} }
@ -227,7 +237,10 @@ impl fmt::Display for Error {
child.display(), ancestor.display()) child.display(), ancestor.display())
} }
Error::Io(ref err) => err.fmt(f), Error::Io(ref err) => err.fmt(f),
Error::Glob(ref msg) => write!(f, "{}", msg), Error::Glob { glob: None, ref err } => write!(f, "{}", err),
Error::Glob { glob: Some(ref glob), ref err } => {
write!(f, "error parsing glob '{}': {}", glob, err)
}
Error::UnrecognizedFileType(ref ty) => { Error::UnrecognizedFileType(ref ty) => {
write!(f, "unrecognized file type: {}", ty) write!(f, "unrecognized file type: {}", ty)
} }

View File

@ -448,13 +448,18 @@ impl TypesBuilder {
GlobBuilder::new(glob) GlobBuilder::new(glob)
.literal_separator(true) .literal_separator(true)
.build() .build()
.map_err(|err| Error::Glob(err.to_string())))); .map_err(|err| {
Error::Glob {
glob: Some(glob.to_string()),
err: err.kind().to_string(),
}
})));
glob_to_selection.push((isel, iglob)); glob_to_selection.push((isel, iglob));
} }
selections.push(selection.clone().map(move |_| def)); selections.push(selection.clone().map(move |_| def));
} }
let set = try!(build_set.build().map_err(|err| { let set = try!(build_set.build().map_err(|err| {
Error::Glob(err.to_string()) Error::Glob { glob: None, err: err.to_string() }
})); }));
Ok(Types { Ok(Types {
defs: defs, defs: defs,