Provide a mechanism to compose type definitions

This extends the syntax of the --type-add flag to allow including the globs of
other already defined types.

Fixes #83.
This commit is contained in:
Ian Kerins 2017-01-01 19:32:46 -05:00 committed by Andrew Gallant
parent 8f7b9be356
commit ed01e80a79
5 changed files with 166 additions and 16 deletions

View File

@ -397,9 +397,38 @@ inside of ripgrep.
Note that this must be passed to every invocation of rg. Note that this must be passed to every invocation of rg.
Type settings are NOT persisted. Type settings are NOT persisted.
.RS .RS
.IP
.nf
\f[C]
\ \ Example:\ `rg\ \-\-type\-add\ \[aq]foo:*.foo\[aq]\ \-tfoo\ PATTERN`
\f[]
.fi
.PP .PP
Example: \-\-type\-add can also be used to include rules from other types with
\f[C]rg\ \-\-type\-add\ \[aq]foo:*.foo\[aq]\ \-tfoo\ PATTERN\f[] the special include directive.
The include directive permits specifying one or more other type names
(separated by a comma) that have been defined and its rules will
automatically be imported into the type specified.
For example, to create a type called src that matches C++, Python and
Markdown files, one can use:
.IP
.nf
\f[C]
\ \ `\-\-type\-add\ \[aq]src:include:cpp,py,md\[aq]`
\f[]
.fi
.PP
Additional glob rules can still be added to the src type by using the
\-\-type\-add flag again:
.IP
.nf
\f[C]
\ \ `\-\-type\-add\ \[aq]src:include:cpp,py,md\[aq]\ \-\-type\-add\ \[aq]src:*.foo\[aq]`
\f[]
.fi
.PP
Note that type names must consist only of Unicode letters or numbers.
Punctuation characters are not allowed.
.RE .RE
.TP .TP
.B \-\-type\-clear \f[I]TYPE\f[] ... .B \-\-type\-clear \f[I]TYPE\f[] ...

View File

@ -269,6 +269,24 @@ Project home page: https://github.com/BurntSushi/ripgrep
Example: `rg --type-add 'foo:*.foo' -tfoo PATTERN` Example: `rg --type-add 'foo:*.foo' -tfoo PATTERN`
--type-add can also be used to include rules from other types
with the special include directive. The include directive
permits specifying one or more other type names (separated by a
comma) that have been defined and its rules will automatically
be imported into the type specified. For example, to create a
type called src that matches C++, Python and Markdown files, one
can use:
`--type-add 'src:include:cpp,py,md'`
Additional glob rules can still be added to the src type by
using the --type-add flag again:
`--type-add 'src:include:cpp,py,md' --type-add 'src:*.foo'`
Note that type names must consist only of Unicode letters or
numbers. Punctuation characters are not allowed.
--type-clear *TYPE* ... --type-clear *TYPE* ...
: Clear the file type globs previously defined for TYPE. This only clears : Clear the file type globs previously defined for TYPE. This only clears
the default type definitions that are found inside of ripgrep. Note the default type definitions that are found inside of ripgrep. Note

View File

@ -66,6 +66,22 @@ assert!(matcher.matched("x.foo", false).is_whitelist());
// This is ignored because we only selected the `foo` file type. // This is ignored because we only selected the `foo` file type.
assert!(matcher.matched("x.bar", false).is_ignore()); assert!(matcher.matched("x.bar", false).is_ignore());
``` ```
We can also add file type definitions based on other definitions.
```
use ignore::types::TypesBuilder;
let mut builder = TypesBuilder::new();
builder.add_defaults();
builder.add("foo", "*.foo");
builder.add_def("bar:include:foo,cpp");
builder.select("bar");
let matcher = builder.build().unwrap();
assert!(matcher.matched("x.foo", false).is_whitelist());
assert!(matcher.matched("y.cpp", false).is_whitelist());
```
*/ */
use std::cell::RefCell; use std::cell::RefCell;
@ -74,6 +90,7 @@ use std::path::Path;
use std::sync::Arc; use std::sync::Arc;
use globset::{GlobBuilder, GlobSet, GlobSetBuilder}; use globset::{GlobBuilder, GlobSet, GlobSetBuilder};
use regex::Regex;
use thread_local::ThreadLocal; use thread_local::ThreadLocal;
use pathutil::file_name; use pathutil::file_name;
@ -219,7 +236,7 @@ impl<'a> Glob<'a> {
/// File type definitions can be retrieved in aggregate from a file type /// File type definitions can be retrieved in aggregate from a file type
/// matcher. File type definitions are also reported when its responsible /// matcher. File type definitions are also reported when its responsible
/// for a match. /// for a match.
#[derive(Clone, Debug)] #[derive(Clone, Debug, Eq, PartialEq)]
pub struct FileTypeDef { pub struct FileTypeDef {
name: String, name: String,
globs: Vec<String>, globs: Vec<String>,
@ -492,10 +509,13 @@ impl TypesBuilder {
/// Add a new file type definition. `name` can be arbitrary and `pat` /// Add a new file type definition. `name` can be arbitrary and `pat`
/// should be a glob recognizing file paths belonging to the `name` type. /// should be a glob recognizing file paths belonging to the `name` type.
/// ///
/// If `name` is `all` or otherwise contains a `:`, then an error is /// If `name` is `all` or otherwise contains any character that is not a
/// returned. /// Unicode letter or number, then an error is returned.
pub fn add(&mut self, name: &str, glob: &str) -> Result<(), Error> { pub fn add(&mut self, name: &str, glob: &str) -> Result<(), Error> {
if name == "all" || name.contains(':') { lazy_static! {
static ref RE: Regex = Regex::new(r"^[\pL\pN]+$").unwrap();
};
if name == "all" || !RE.is_match(name) {
return Err(Error::InvalidDefinition); return Err(Error::InvalidDefinition);
} }
let (key, glob) = (name.to_string(), glob.to_string()); let (key, glob) = (name.to_string(), glob.to_string());
@ -505,15 +525,48 @@ impl TypesBuilder {
Ok(()) Ok(())
} }
/// Add a new file type definition specified in string form. The format /// Add a new file type definition specified in string form. There are two
/// is `name:glob`. Names may not include a colon. /// valid formats:
/// 1. `{name}:{glob}`. This defines a 'root' definition that associates the
/// given name with the given glob.
/// 2. `{name}:include:{comma-separated list of already defined names}.
/// This defines an 'include' definition that associates the given name
/// with the definitions of the given existing types.
/// Names may not include any characters that are not
/// Unicode letters or numbers.
pub fn add_def(&mut self, def: &str) -> Result<(), Error> { pub fn add_def(&mut self, def: &str) -> Result<(), Error> {
let name: String = def.chars().take_while(|&c| c != ':').collect(); let parts: Vec<&str> = def.split(':').collect();
let pat: String = def.chars().skip(name.chars().count() + 1).collect(); match parts.len() {
if name.is_empty() || pat.is_empty() { 2 => {
let name = parts[0];
let glob = parts[1];
if name.is_empty() || glob.is_empty() {
return Err(Error::InvalidDefinition); return Err(Error::InvalidDefinition);
} }
self.add(&name, &pat) self.add(name, glob)
}
3 => {
let name = parts[0];
let types_string = parts[2];
if name.is_empty() || parts[1] != "include" || types_string.is_empty() {
return Err(Error::InvalidDefinition);
}
let types = types_string.split(',');
// Check ahead of time to ensure that all types specified are
// present and fail fast if not.
if types.clone().any(|t| !self.types.contains_key(t)) {
return Err(Error::InvalidDefinition);
}
for type_name in types {
let globs = self.types.get(type_name).unwrap().globs.clone();
for glob in globs {
try!(self.add(name, &glob));
}
}
Ok(())
}
_ => Err(Error::InvalidDefinition)
}
} }
/// Add a set of default file type definitions. /// Add a set of default file type definitions.
@ -569,6 +622,7 @@ mod tests {
"rust:*.rs", "rust:*.rs",
"js:*.js", "js:*.js",
"foo:*.{rs,foo}", "foo:*.{rs,foo}",
"combo:include:html,rust"
] ]
} }
@ -579,10 +633,35 @@ mod tests {
matched!(match5, types(), vec![], vec![], "index.html"); matched!(match5, types(), vec![], vec![], "index.html");
matched!(match6, types(), vec![], vec!["rust"], "index.html"); matched!(match6, types(), vec![], vec!["rust"], "index.html");
matched!(match7, types(), vec!["foo"], vec!["rust"], "main.foo"); matched!(match7, types(), vec!["foo"], vec!["rust"], "main.foo");
matched!(match8, types(), vec!["combo"], vec![], "index.html");
matched!(match9, types(), vec!["combo"], vec![], "lib.rs");
matched!(not, matchnot1, types(), vec!["rust"], vec![], "index.html"); matched!(not, matchnot1, types(), vec!["rust"], vec![], "index.html");
matched!(not, matchnot2, types(), vec![], vec!["rust"], "main.rs"); matched!(not, matchnot2, types(), vec![], vec!["rust"], "main.rs");
matched!(not, matchnot3, types(), vec!["foo"], vec!["rust"], "main.rs"); matched!(not, matchnot3, types(), vec!["foo"], vec!["rust"], "main.rs");
matched!(not, matchnot4, types(), vec!["rust"], vec!["foo"], "main.rs"); matched!(not, matchnot4, types(), vec!["rust"], vec!["foo"], "main.rs");
matched!(not, matchnot5, types(), vec!["rust"], vec!["foo"], "main.foo"); matched!(not, matchnot5, types(), vec!["rust"], vec!["foo"], "main.foo");
matched!(not, matchnot6, types(), vec!["combo"], vec![], "leftpad.js");
#[test]
fn test_invalid_defs() {
let mut btypes = TypesBuilder::new();
for tydef in types() {
btypes.add_def(tydef).unwrap();
}
// Preserve the original definitions for later comparison.
let original_defs = btypes.definitions();
let bad_defs = vec![
// Reference to type that does not exist
"combo:include:html,python",
// Bad format
"combo:foobar:html,rust",
""
];
for def in bad_defs {
assert!(btypes.add_def(def).is_err());
// Ensure that nothing changed, even if some of the includes were valid.
assert_eq!(btypes.definitions(), original_defs);
}
}
} }

View File

@ -450,11 +450,24 @@ lazy_static! {
globs defined inside of ripgrep.\n\nNote that this MUST be \ globs defined inside of ripgrep.\n\nNote that this MUST be \
passed to every invocation of ripgrep. Type settings are NOT \ passed to every invocation of ripgrep. Type settings are NOT \
persisted.\n\nExample: \ persisted.\n\nExample: \
rg --type-add 'foo:*.foo' -tfoo PATTERN."); rg --type-add 'foo:*.foo' -tfoo PATTERN.\n\n\
--type-add can also be used to include rules from other types \
with the special include directive. The include directive \
permits specifying one or more other type names (separated by a \
comma) that have been defined and its rules will automatically \
be imported into the type specified. For example, to create a \
type called src that matches C++, Python and Markdown files, one \
can use:\n\n\
--type-add 'src:include:cpp,py,md'\n\n\
Additional glob rules can still be added to the src type by \
using the --type-add flag again:\n\n\
--type-add 'src:include:cpp,py,md' --type-add 'src:*.foo'\n\n\
Note that type names must consist only of Unicode letters or \
numbers. Punctuation characters are not allowed.");
doc!(h, "type-clear", doc!(h, "type-clear",
"Clear globs for given file type.", "Clear globs for given file type.",
"Clear the file type globs previously defined for TYPE. This \ "Clear the file type globs previously defined for TYPE. This \
only clears the default tpye definitions that are found inside \ only clears the default type definitions that are found inside \
of ripgrep.\n\nNote that this MUST be passed to every \ of ripgrep.\n\nNote that this MUST be passed to every \
invocation of ripgrep. Type settings are NOT persisted."); invocation of ripgrep. Type settings are NOT persisted.");

View File

@ -308,6 +308,17 @@ sherlock!(file_type_add, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
assert_eq!(lines, "file.wat:Sherlock\n"); assert_eq!(lines, "file.wat:Sherlock\n");
}); });
sherlock!(file_type_add_compose, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
wd.create("file.py", "Sherlock");
wd.create("file.rs", "Sherlock");
wd.create("file.wat", "Sherlock");
cmd.arg("--type-add").arg("wat:*.wat");
cmd.arg("--type-add").arg("combo:include:wat,py").arg("-t").arg("combo");
let lines: String = wd.stdout(&mut cmd);
println!("{}", lines);
assert_eq!(lines, "file.py:Sherlock\nfile.wat:Sherlock\n");
});
sherlock!(glob, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| { sherlock!(glob, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
wd.create("file.py", "Sherlock"); wd.create("file.py", "Sherlock");
wd.create("file.rs", "Sherlock"); wd.create("file.rs", "Sherlock");