diff --git a/doc/rg.1 b/doc/rg.1 index 7b87ea40..1f29c8f0 100644 --- a/doc/rg.1 +++ b/doc/rg.1 @@ -397,9 +397,38 @@ inside of ripgrep. Note that this must be passed to every invocation of rg. Type settings are NOT persisted. .RS +.IP +.nf +\f[C] +\ \ Example:\ `rg\ \-\-type\-add\ \[aq]foo:*.foo\[aq]\ \-tfoo\ PATTERN` +\f[] +.fi .PP -Example: -\f[C]rg\ \-\-type\-add\ \[aq]foo:*.foo\[aq]\ \-tfoo\ PATTERN\f[] +\-\-type\-add can also be used to include rules from other types with +the special include directive. +The include directive permits specifying one or more other type names +(separated by a comma) that have been defined and its rules will +automatically be imported into the type specified. +For example, to create a type called src that matches C++, Python and +Markdown files, one can use: +.IP +.nf +\f[C] +\ \ `\-\-type\-add\ \[aq]src:include:cpp,py,md\[aq]` +\f[] +.fi +.PP +Additional glob rules can still be added to the src type by using the +\-\-type\-add flag again: +.IP +.nf +\f[C] +\ \ `\-\-type\-add\ \[aq]src:include:cpp,py,md\[aq]\ \-\-type\-add\ \[aq]src:*.foo\[aq]` +\f[] +.fi +.PP +Note that type names must consist only of Unicode letters or numbers. +Punctuation characters are not allowed. .RE .TP .B \-\-type\-clear \f[I]TYPE\f[] ... diff --git a/doc/rg.1.md b/doc/rg.1.md index 0bd08504..b920c0ef 100644 --- a/doc/rg.1.md +++ b/doc/rg.1.md @@ -267,7 +267,25 @@ Project home page: https://github.com/BurntSushi/ripgrep this must be passed to every invocation of rg. Type settings are NOT persisted. - Example: `rg --type-add 'foo:*.foo' -tfoo PATTERN` + Example: `rg --type-add 'foo:*.foo' -tfoo PATTERN` + + --type-add can also be used to include rules from other types + with the special include directive. The include directive + permits specifying one or more other type names (separated by a + comma) that have been defined and its rules will automatically + be imported into the type specified. For example, to create a + type called src that matches C++, Python and Markdown files, one + can use: + + `--type-add 'src:include:cpp,py,md'` + + Additional glob rules can still be added to the src type by + using the --type-add flag again: + + `--type-add 'src:include:cpp,py,md' --type-add 'src:*.foo'` + + Note that type names must consist only of Unicode letters or + numbers. Punctuation characters are not allowed. --type-clear *TYPE* ... : Clear the file type globs previously defined for TYPE. This only clears diff --git a/ignore/src/types.rs b/ignore/src/types.rs index 1f032584..213f1dfd 100644 --- a/ignore/src/types.rs +++ b/ignore/src/types.rs @@ -66,6 +66,22 @@ assert!(matcher.matched("x.foo", false).is_whitelist()); // This is ignored because we only selected the `foo` file type. assert!(matcher.matched("x.bar", false).is_ignore()); ``` + +We can also add file type definitions based on other definitions. + +``` +use ignore::types::TypesBuilder; + +let mut builder = TypesBuilder::new(); +builder.add_defaults(); +builder.add("foo", "*.foo"); +builder.add_def("bar:include:foo,cpp"); +builder.select("bar"); +let matcher = builder.build().unwrap(); + +assert!(matcher.matched("x.foo", false).is_whitelist()); +assert!(matcher.matched("y.cpp", false).is_whitelist()); +``` */ use std::cell::RefCell; @@ -74,6 +90,7 @@ use std::path::Path; use std::sync::Arc; use globset::{GlobBuilder, GlobSet, GlobSetBuilder}; +use regex::Regex; use thread_local::ThreadLocal; use pathutil::file_name; @@ -219,7 +236,7 @@ impl<'a> Glob<'a> { /// File type definitions can be retrieved in aggregate from a file type /// matcher. File type definitions are also reported when its responsible /// for a match. -#[derive(Clone, Debug)] +#[derive(Clone, Debug, Eq, PartialEq)] pub struct FileTypeDef { name: String, globs: Vec, @@ -492,10 +509,13 @@ impl TypesBuilder { /// Add a new file type definition. `name` can be arbitrary and `pat` /// should be a glob recognizing file paths belonging to the `name` type. /// - /// If `name` is `all` or otherwise contains a `:`, then an error is - /// returned. + /// If `name` is `all` or otherwise contains any character that is not a + /// Unicode letter or number, then an error is returned. pub fn add(&mut self, name: &str, glob: &str) -> Result<(), Error> { - if name == "all" || name.contains(':') { + lazy_static! { + static ref RE: Regex = Regex::new(r"^[\pL\pN]+$").unwrap(); + }; + if name == "all" || !RE.is_match(name) { return Err(Error::InvalidDefinition); } let (key, glob) = (name.to_string(), glob.to_string()); @@ -505,15 +525,48 @@ impl TypesBuilder { Ok(()) } - /// Add a new file type definition specified in string form. The format - /// is `name:glob`. Names may not include a colon. + /// Add a new file type definition specified in string form. There are two + /// valid formats: + /// 1. `{name}:{glob}`. This defines a 'root' definition that associates the + /// given name with the given glob. + /// 2. `{name}:include:{comma-separated list of already defined names}. + /// This defines an 'include' definition that associates the given name + /// with the definitions of the given existing types. + /// Names may not include any characters that are not + /// Unicode letters or numbers. pub fn add_def(&mut self, def: &str) -> Result<(), Error> { - let name: String = def.chars().take_while(|&c| c != ':').collect(); - let pat: String = def.chars().skip(name.chars().count() + 1).collect(); - if name.is_empty() || pat.is_empty() { - return Err(Error::InvalidDefinition); + let parts: Vec<&str> = def.split(':').collect(); + match parts.len() { + 2 => { + let name = parts[0]; + let glob = parts[1]; + if name.is_empty() || glob.is_empty() { + return Err(Error::InvalidDefinition); + } + self.add(name, glob) + } + 3 => { + let name = parts[0]; + let types_string = parts[2]; + if name.is_empty() || parts[1] != "include" || types_string.is_empty() { + return Err(Error::InvalidDefinition); + } + let types = types_string.split(','); + // Check ahead of time to ensure that all types specified are + // present and fail fast if not. + if types.clone().any(|t| !self.types.contains_key(t)) { + return Err(Error::InvalidDefinition); + } + for type_name in types { + let globs = self.types.get(type_name).unwrap().globs.clone(); + for glob in globs { + try!(self.add(name, &glob)); + } + } + Ok(()) + } + _ => Err(Error::InvalidDefinition) } - self.add(&name, &pat) } /// Add a set of default file type definitions. @@ -569,6 +622,7 @@ mod tests { "rust:*.rs", "js:*.js", "foo:*.{rs,foo}", + "combo:include:html,rust" ] } @@ -579,10 +633,35 @@ mod tests { matched!(match5, types(), vec![], vec![], "index.html"); matched!(match6, types(), vec![], vec!["rust"], "index.html"); matched!(match7, types(), vec!["foo"], vec!["rust"], "main.foo"); + matched!(match8, types(), vec!["combo"], vec![], "index.html"); + matched!(match9, types(), vec!["combo"], vec![], "lib.rs"); matched!(not, matchnot1, types(), vec!["rust"], vec![], "index.html"); matched!(not, matchnot2, types(), vec![], vec!["rust"], "main.rs"); matched!(not, matchnot3, types(), vec!["foo"], vec!["rust"], "main.rs"); matched!(not, matchnot4, types(), vec!["rust"], vec!["foo"], "main.rs"); matched!(not, matchnot5, types(), vec!["rust"], vec!["foo"], "main.foo"); + matched!(not, matchnot6, types(), vec!["combo"], vec![], "leftpad.js"); + + #[test] + fn test_invalid_defs() { + let mut btypes = TypesBuilder::new(); + for tydef in types() { + btypes.add_def(tydef).unwrap(); + } + // Preserve the original definitions for later comparison. + let original_defs = btypes.definitions(); + let bad_defs = vec![ + // Reference to type that does not exist + "combo:include:html,python", + // Bad format + "combo:foobar:html,rust", + "" + ]; + for def in bad_defs { + assert!(btypes.add_def(def).is_err()); + // Ensure that nothing changed, even if some of the includes were valid. + assert_eq!(btypes.definitions(), original_defs); + } + } } diff --git a/src/app.rs b/src/app.rs index d262e5de..ff61c0a3 100644 --- a/src/app.rs +++ b/src/app.rs @@ -450,11 +450,24 @@ lazy_static! { globs defined inside of ripgrep.\n\nNote that this MUST be \ passed to every invocation of ripgrep. Type settings are NOT \ persisted.\n\nExample: \ - rg --type-add 'foo:*.foo' -tfoo PATTERN."); + rg --type-add 'foo:*.foo' -tfoo PATTERN.\n\n\ + --type-add can also be used to include rules from other types \ + with the special include directive. The include directive \ + permits specifying one or more other type names (separated by a \ + comma) that have been defined and its rules will automatically \ + be imported into the type specified. For example, to create a \ + type called src that matches C++, Python and Markdown files, one \ + can use:\n\n\ + --type-add 'src:include:cpp,py,md'\n\n\ + Additional glob rules can still be added to the src type by \ + using the --type-add flag again:\n\n\ + --type-add 'src:include:cpp,py,md' --type-add 'src:*.foo'\n\n\ + Note that type names must consist only of Unicode letters or \ + numbers. Punctuation characters are not allowed."); doc!(h, "type-clear", "Clear globs for given file type.", "Clear the file type globs previously defined for TYPE. This \ - only clears the default tpye definitions that are found inside \ + only clears the default type definitions that are found inside \ of ripgrep.\n\nNote that this MUST be passed to every \ invocation of ripgrep. Type settings are NOT persisted."); diff --git a/tests/tests.rs b/tests/tests.rs index 1802e186..fa024060 100644 --- a/tests/tests.rs +++ b/tests/tests.rs @@ -308,6 +308,17 @@ sherlock!(file_type_add, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| { assert_eq!(lines, "file.wat:Sherlock\n"); }); +sherlock!(file_type_add_compose, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| { + wd.create("file.py", "Sherlock"); + wd.create("file.rs", "Sherlock"); + wd.create("file.wat", "Sherlock"); + cmd.arg("--type-add").arg("wat:*.wat"); + cmd.arg("--type-add").arg("combo:include:wat,py").arg("-t").arg("combo"); + let lines: String = wd.stdout(&mut cmd); + println!("{}", lines); + assert_eq!(lines, "file.py:Sherlock\nfile.wat:Sherlock\n"); +}); + sherlock!(glob, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| { wd.create("file.py", "Sherlock"); wd.create("file.rs", "Sherlock");