From 0163b39faa14aa328ca93897a17e4d87a87bacc4 Mon Sep 17 00:00:00 2001 From: Andrew Gallant Date: Mon, 20 Jun 2016 16:53:48 -0400 Subject: [PATCH] refactor progress --- Cargo.toml | 1 + grep/Cargo.toml | 20 +++ grep/src/lib.rs | 72 ++++++++ {src => grep/src}/literals.rs | 24 ++- grep/src/nonl.rs | 65 +++++++ grep/src/search.rs | 307 ++++++++++++++++++++++++++++++++++ src/main.rs | 78 ++------- src/nonl.rs | 55 ------ src/search.rs | 168 ------------------- 9 files changed, 492 insertions(+), 298 deletions(-) create mode 100644 grep/Cargo.toml create mode 100644 grep/src/lib.rs rename {src => grep/src}/literals.rs (89%) create mode 100644 grep/src/nonl.rs create mode 100644 grep/src/search.rs delete mode 100644 src/nonl.rs delete mode 100644 src/search.rs diff --git a/Cargo.toml b/Cargo.toml index a20d60ac..9f36d765 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,6 +15,7 @@ license = "Unlicense/MIT" [dependencies] docopt = "0.6" +grep = { version = "0.1", path = "grep" } memchr = "0.1" memmap = "0.2" regex = { version = "0.1", path = "/home/andrew/rust/regex" } diff --git a/grep/Cargo.toml b/grep/Cargo.toml new file mode 100644 index 00000000..5da201e9 --- /dev/null +++ b/grep/Cargo.toml @@ -0,0 +1,20 @@ +[package] +publish = false +name = "grep" +version = "0.1.0" #:version +authors = ["Andrew Gallant "] +description = """ +Fast line oriented regex searching as a library. +""" +documentation = "https://github.com/BurntSushi/xrep" +homepage = "https://github.com/BurntSushi/xrep" +repository = "https://github.com/BurntSushi/xrep" +readme = "README.md" +keywords = ["regex", "grep", "egrep", "search", "pattern"] +license = "Unlicense/MIT" + +[dependencies] +memchr = "0.1" +memmap = "0.2" +regex = { version = "0.1", path = "/home/andrew/rust/regex" } +regex-syntax = { version = "0.3.1", path = "/home/andrew/rust/regex/regex-syntax" } diff --git a/grep/src/lib.rs b/grep/src/lib.rs new file mode 100644 index 00000000..d45b142f --- /dev/null +++ b/grep/src/lib.rs @@ -0,0 +1,72 @@ +extern crate memchr; +extern crate regex; +extern crate regex_syntax as syntax; + +use std::error; +use std::fmt; +use std::result; + +pub use search::{Grep, GrepBuilder}; + +mod literals; +mod nonl; +mod search; + +/// Result is a convenient type alias that fixes the type of the error to +/// the `Error` type defined in this crate. +pub type Result = result::Result; + +/// Error enumerates the list of possible error conditions when building or +/// using a `Grep` line searcher. +#[derive(Debug)] +pub enum Error { + /// An error from parsing or compiling a regex. + Regex(regex::Error), + /// This error occurs when an illegal literal was found in the regex + /// pattern. For example, if the line terminator is `\n` and the regex + /// pattern is `\w+\n\w+`, then the presence of `\n` will cause this error. + LiteralNotAllowed(char), + #[doc(hidden)] + __Nonexhaustive, +} + +impl error::Error for Error { + fn description(&self) -> &str { + match *self { + Error::Regex(ref err) => err.description(), + Error::LiteralNotAllowed(_) => "use of forbidden literal", + Error::__Nonexhaustive => unreachable!(), + } + } + + fn cause(&self) -> Option<&error::Error> { + match *self { + Error::Regex(ref err) => err.cause(), + _ => None, + } + } +} + +impl fmt::Display for Error { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match *self { + Error::Regex(ref err) => err.fmt(f), + Error::LiteralNotAllowed(chr) => { + write!(f, "Literal '{}' not allowed.", chr) + } + Error::__Nonexhaustive => unreachable!(), + } + } +} + +impl From for Error { + fn from(err: regex::Error) -> Error { + Error::Regex(err) + } +} + +impl From for Error { + fn from(err: syntax::Error) -> Error { + Error::Regex(regex::Error::Syntax(err)) + } +} diff --git a/src/literals.rs b/grep/src/literals.rs similarity index 89% rename from src/literals.rs rename to grep/src/literals.rs index be91d550..5408faea 100644 --- a/src/literals.rs +++ b/grep/src/literals.rs @@ -25,11 +25,16 @@ impl LiteralSets { } } - pub fn to_matcher(&self) -> Option { + pub fn to_regex(&self) -> Option { if self.prefixes.all_complete() && !self.prefixes.is_empty() { // When this is true, the regex engine will do a literal scan. return None; } + + // Out of inner required literals, prefixes and suffixes, which one + // is the longest? We pick the longest to do fast literal scan under + // the assumption that a longer literal will have a lower false + // positive rate. let pre_lcp = self.prefixes.longest_common_prefix(); let pre_lcs = self.prefixes.longest_common_suffix(); let suf_lcp = self.suffixes.longest_common_prefix(); @@ -70,32 +75,33 @@ fn union_required(expr: &Expr, lits: &mut Literals) { let s: String = chars.iter().cloned().collect(); lits.cross_add(s.as_bytes()); } - Literal { ref chars, casei: true } => { + Literal { casei: true, .. } => { lits.cut(); } LiteralBytes { ref bytes, casei: false } => { lits.cross_add(bytes); } - LiteralBytes { ref bytes, casei: true } => { + LiteralBytes { casei: true, .. } => { lits.cut(); } - Class(ref cls) => { + Class(_) => { lits.cut(); } - ClassBytes(ref cls) => { + ClassBytes(_) => { lits.cut(); } Group { ref e, .. } => { union_required(&**e, lits); } - Repeat { ref e, r: Repeater::ZeroOrOne, .. } => lits.cut(), - Repeat { ref e, r: Repeater::ZeroOrMore, .. } => lits.cut(), + Repeat { r: Repeater::ZeroOrOne, .. } => lits.cut(), + Repeat { r: Repeater::ZeroOrMore, .. } => lits.cut(), Repeat { ref e, r: Repeater::OneOrMore, .. } => { union_required(&**e, lits); lits.cut(); } Repeat { ref e, r: Repeater::Range { min, max }, greedy } => { - repeat_range_literals(&**e, min, max, greedy, lits, union_required); + repeat_range_literals( + &**e, min, max, greedy, lits, union_required); } Concat(ref es) if es.is_empty() => {} Concat(ref es) if es.len() == 1 => union_required(&es[0], lits), @@ -131,7 +137,7 @@ fn repeat_range_literals( e: &Expr, min: u32, max: Option, - greedy: bool, + _greedy: bool, lits: &mut Literals, mut f: F, ) { diff --git a/grep/src/nonl.rs b/grep/src/nonl.rs new file mode 100644 index 00000000..e4dad13f --- /dev/null +++ b/grep/src/nonl.rs @@ -0,0 +1,65 @@ +use syntax::Expr; + +use {Error, Result}; + +/// Returns a new expression that is guaranteed to never match the given +/// ASCII character. +/// +/// If the expression contains the literal byte, then an error is returned. +/// +/// If `byte` is not an ASCII character (i.e., greater than `0x7F`), then this +/// function panics. +pub fn remove(expr: Expr, byte: u8) -> Result { + use syntax::Expr::*; + assert!(byte <= 0x7F); + let chr = byte as char; + assert!(chr.len_utf8() == 1); + + Ok(match expr { + Literal { chars, casei } => { + if chars.iter().position(|&c| c == chr).is_some() { + return Err(Error::LiteralNotAllowed(chr)); + } + Literal { chars: chars, casei: casei } + } + LiteralBytes { bytes, casei } => { + if bytes.iter().position(|&b| b == byte).is_some() { + return Err(Error::LiteralNotAllowed(chr)); + } + LiteralBytes { bytes: bytes, casei: casei } + } + AnyChar => AnyCharNoNL, + AnyByte => AnyByteNoNL, + Class(mut cls) => { + cls.remove(chr); + Class(cls) + } + ClassBytes(mut cls) => { + cls.remove(byte); + ClassBytes(cls) + } + Group { e, i, name } => { + Group { + e: Box::new(try!(remove(*e, byte))), + i: i, + name: name, + } + } + Repeat { e, r, greedy } => { + Repeat { + e: Box::new(try!(remove(*e, byte))), + r: r, + greedy: greedy, + } + } + Concat(exprs) => { + Concat(try!( + exprs.into_iter().map(|e| remove(e, byte)).collect())) + } + Alternate(exprs) => { + Alternate(try!( + exprs.into_iter().map(|e| remove(e, byte)).collect())) + } + e => e, + }) +} diff --git a/grep/src/search.rs b/grep/src/search.rs new file mode 100644 index 00000000..eb4ff6fc --- /dev/null +++ b/grep/src/search.rs @@ -0,0 +1,307 @@ +use std::io; + +use memchr::{memchr, memrchr}; +use regex::bytes::{Regex, RegexBuilder}; +use syntax; + +use literals::LiteralSets; +use nonl; +use Result; + +#[derive(Clone, Debug)] +pub struct Grep { + re: Regex, + required: Option, + opts: Options, +} + +#[derive(Clone, Debug)] +pub struct GrepBuilder { + pattern: String, + opts: Options, +} + +#[derive(Clone, Debug)] +struct Options { + case_insensitive: bool, + lines: bool, + locations: bool, + line_terminator: u8, + size_limit: usize, + dfa_size_limit: usize, +} + +impl Default for Options { + fn default() -> Options { + Options { + case_insensitive: false, + lines: false, + locations: false, + line_terminator: b'\n', + size_limit: 10 * (1 << 20), + dfa_size_limit: 10 * (1 << 20), + } + } +} + +impl GrepBuilder { + /// Create a new builder for line searching. + /// + /// The pattern given should be a regular expression. The precise syntax + /// supported is documented on the regex crate. + pub fn new(pattern: &str) -> GrepBuilder { + GrepBuilder { + pattern: pattern.to_string(), + opts: Options::default(), + } + } + + /// Sets whether line numbers are reported for each match. + /// + /// When enabled (disabled by default), every matching line is tagged with + /// its corresponding line number accoring to the line terminator that is + /// set. Note that this requires extra processing which can slow down + /// search. + pub fn line_numbers(mut self, yes: bool) -> GrepBuilder { + self.opts.lines = yes; + self + } + + /// Set whether precise match locations are reported for each matching + /// line. + /// + /// When enabled (disabled by default), every match of the regex on each + /// matchling line is reported via byte offsets. Note that this requires + /// extra processing which can slow down search. + pub fn locations(mut self, yes: bool) -> GrepBuilder { + self.opts.locations = yes; + self + } + + /// Set the line terminator. + /// + /// The line terminator can be any ASCII character and serves to delineate + /// the match boundaries in the text searched. + /// + /// This panics if `ascii_byte` is greater than `0x7F` (i.e., not ASCII). + pub fn line_terminator(mut self, ascii_byte: u8) -> GrepBuilder { + assert!(ascii_byte <= 0x7F); + self.opts.line_terminator = ascii_byte; + self + } + + /// Set the case sensitive flag (`i`) on the regex. + pub fn case_insensitive(mut self, yes: bool) -> GrepBuilder { + self.opts.case_insensitive = yes; + self + } + + /// Set the approximate size limit of the compiled regular expression. + /// + /// This roughly corresponds to the number of bytes occupied by a + /// single compiled program. If the program exceeds this number, then a + /// compilation error is returned. + pub fn size_limit(mut self, limit: usize) -> GrepBuilder { + self.opts.size_limit = limit; + self + } + + /// Set the approximate size of the cache used by the DFA. + /// + /// This roughly corresponds to the number of bytes that the DFA will use + /// while searching. + /// + /// Note that this is a per thread limit. There is no way to set a global + /// limit. In particular, if a regex is used from multiple threads + /// simulanteously, then each thread may use up to the number of bytes + /// specified here. + pub fn dfa_size_limit(mut self, limit: usize) -> GrepBuilder { + self.opts.dfa_size_limit = limit; + self + } + + /// Create a line searcher. + /// + /// If there was a problem parsing or compiling the regex with the given + /// options, then an error is returned. + pub fn create(self) -> Result { + let expr = try!(self.parse()); + let literals = LiteralSets::create(&expr); + let re = try!( + RegexBuilder::new(&expr.to_string()) + .case_insensitive(self.opts.case_insensitive) + .multi_line(true) + .unicode(true) + .size_limit(self.opts.size_limit) + .dfa_size_limit(self.opts.dfa_size_limit) + .compile() + ); + Ok(Grep { + re: re, + required: literals.to_regex(), + opts: self.opts, + }) + } + + /// Parses the underlying pattern and ensures the pattern can never match + /// the line terminator. + fn parse(&self) -> Result { + let expr = + try!(syntax::ExprBuilder::new() + .allow_bytes(true) + .unicode(true) + .parse(&self.pattern)); + Ok(try!(nonl::remove(expr, self.opts.line_terminator))) + } +} + +impl Grep { + pub fn iter<'b, 's>(&'s self, buf: &'b [u8]) -> Iter<'b, 's> { + Iter { + searcher: self, + buf: buf, + start: 0, + } + } + + pub fn read_match( + &self, + mat: &mut Match, + buf: &[u8], + mut start: usize, + ) -> bool { + if start >= buf.len() { + return false; + } + if let Some(ref req) = self.required { + while start < buf.len() { + let e = match req.shortest_match(&buf[start..]) { + None => return false, + Some(e) => start + e, + }; + let (prevnl, nextnl) = self.find_line(buf, e, e); + match self.re.shortest_match(&buf[prevnl..nextnl]) { + None => { + start = nextnl + 1; + continue; + } + Some(_) => { + self.fill_match(mat, prevnl, nextnl); + return true; + } + } + } + false + } else { + let e = match self.re.shortest_match(&buf[start..]) { + None => return false, + Some(e) => start + e, + }; + let (s, e) = self.find_line(buf, e, e); + self.fill_match(mat, s, e); + true + } + } + + fn fill_match(&self, mat: &mut Match, start: usize, end: usize) { + mat.start = start; + mat.end = end; + } + + fn find_line(&self, buf: &[u8], s: usize, e: usize) -> (usize, usize) { + (self.find_line_start(buf, s), self.find_line_end(buf, e)) + } + + fn find_line_start(&self, buf: &[u8], pos: usize) -> usize { + memrchr(self.opts.line_terminator, &buf[0..pos]).map_or(0, |i| i + 1) + } + + fn find_line_end(&self, buf: &[u8], pos: usize) -> usize { + memchr(self.opts.line_terminator, &buf[pos..]) + .map_or(buf.len(), |i| pos + i) + } +} + +#[derive(Clone, Debug, Default, Eq, PartialEq)] +pub struct Match { + start: usize, + end: usize, + line: Option, + locations: Vec<(usize, usize)>, +} + +impl Match { + pub fn new() -> Match { + Match::default() + } + + /// Return the starting byte offset of the line that matched. + #[inline] + pub fn start(&self) -> usize { + self.start + } + + /// Return the ending byte offset of the line that matched. + #[inline] + pub fn end(&self) -> usize { + self.end + } + + /// Return the line number that this match corresponds to. + /// + /// Note that this is `None` if line numbers aren't being computed. Line + /// number tracking can be enabled using `GrepBuilder`. + #[inline] + pub fn line(&self) -> Option { + self.line + } + + /// Return the exact start and end locations (in byte offsets) of every + /// regex match in this line. + /// + /// Note that this always returns an empty slice if exact locations aren't + /// computed. Exact location tracking can be enabled using `GrepBuilder`. + #[inline] + pub fn locations(&self) -> &[(usize, usize)] { + &self.locations + } +} + +pub struct Iter<'b, 's> { + searcher: &'s Grep, + buf: &'b [u8], + start: usize, +} + +impl<'b, 's> Iterator for Iter<'b, 's> { + type Item = Match; + + fn next(&mut self) -> Option { + let mut mat = Match::default(); + if !self.searcher.read_match(&mut mat, self.buf, self.start) { + self.start = self.buf.len(); + return None; + } + self.start = mat.end + 1; + Some(mat) + } +} + +pub struct GrepBuffered<'g, B> { + grep: &'g Grep, + buf: B, + start: usize, +} + +impl<'g, B: BufRead> GrepBuffered { + pub fn read_match( + &self, + mat: &mut Match, + ) -> io::Result { + let buf = try!(self.buf.fill_buf()); + if buf.is_empty() { + return Ok(false); + } + Ok(false) + } +} diff --git a/src/main.rs b/src/main.rs index 323b6641..d93496d1 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,6 +1,7 @@ #![allow(dead_code, unused_variables)] extern crate docopt; +extern crate grep; extern crate memchr; extern crate memmap; extern crate regex; @@ -15,19 +16,13 @@ Options: "; use std::error::Error; -use std::io::{self, BufRead, Write}; +use std::io::{self, Write}; use std::process; use std::result; use docopt::Docopt; -use regex::bytes::Regex; -use literals::LiteralSets; -use search::{LineSearcher, LineSearcherBuilder}; - -mod literals; -mod nonl; -mod search; +use grep::{Grep, GrepBuilder}; pub type Result = result::Result>; @@ -53,36 +48,20 @@ fn main() { fn run(args: &Args) -> Result { if args.arg_file.is_empty() { - let expr = try!(parse(&args.arg_pattern)); - let literals = LiteralSets::create(&expr); - let re = Regex::new(&expr.to_string()).unwrap(); - let _stdin = io::stdin(); - let stdin = _stdin.lock(); - run_by_line(args, &re, stdin) + unimplemented!() } else { let searcher = - try!(LineSearcherBuilder::new(&args.arg_pattern).create()); - if args.flag_count { - run_mmap_count_only(args, &searcher) - } else { - run_mmap(args, &searcher) - } + try!(GrepBuilder::new(&args.arg_pattern).create()); + run_mmap(args, &searcher) } } -#[inline(never)] -fn run_mmap(args: &Args, searcher: &LineSearcher) -> Result { - use memmap::{Mmap, Protection}; - - assert!(args.arg_file.len() == 1); - let mut wtr = io::BufWriter::new(io::stdout()); - let mmap = try!(Mmap::open_path(&args.arg_file[0], Protection::Read)); - let text = unsafe { mmap.as_slice() }; - - let mut count = 0; - for m in searcher.search(text) { - try!(wtr.write(&text[m.start..m.end])); - try!(wtr.write(b"\n")); +fn run_mmap(args: &Args, searcher: &Grep) -> Result { + for m in searcher.iter(text) { + if !args.flag_count { + try!(wtr.write(&text[m.start()..m.end()])); + try!(wtr.write(b"\n")); + } count += 1; } Ok(count) @@ -100,36 +79,3 @@ fn run_mmap_count_only(args: &Args, searcher: &LineSearcher) -> Result { try!(writeln!(wtr, "{}", count)); Ok(count) } - -fn run_by_line( - args: &Args, - re: &Regex, - mut rdr: B, -) -> Result { - let mut wtr = io::BufWriter::new(io::stdout()); - let mut count = 0; - let mut nline = 0; - let mut line = vec![]; - loop { - line.clear(); - let n = try!(rdr.read_until(b'\n', &mut line)); - if n == 0 { - break; - } - nline += 1; - if re.is_match(&line) { - count += 1; - try!(wtr.write(&line)); - } - } - Ok(count) -} - -fn parse(re: &str) -> Result { - let expr = - try!(syntax::ExprBuilder::new() - .allow_bytes(true) - .unicode(false) - .parse(re)); - Ok(try!(nonl::remove(expr))) -} diff --git a/src/nonl.rs b/src/nonl.rs deleted file mode 100644 index 96ae3937..00000000 --- a/src/nonl.rs +++ /dev/null @@ -1,55 +0,0 @@ -use syntax::Expr; - -use Result; - -/// Returns a new expression that is guaranteed to never match `\n`. -/// -/// If the expression contains a literal `\n`, then an error is returned. -pub fn remove(expr: Expr) -> Result { - use syntax::Expr::*; - Ok(match expr { - Literal { chars, casei } => { - if chars.iter().position(|&c| c == '\n').is_some() { - return Err(format!("Literal '\\n' are not allowed.").into()); - } - Literal { chars: chars, casei: casei } - } - LiteralBytes { bytes, casei } => { - if bytes.iter().position(|&b| b == b'\n').is_some() { - return Err(format!("Literal '\\n' are not allowed.").into()); - } - LiteralBytes { bytes: bytes, casei: casei } - } - AnyChar => AnyCharNoNL, - AnyByte => AnyByteNoNL, - Class(mut cls) => { - cls.remove('\n'); - Class(cls) - } - ClassBytes(mut cls) => { - cls.remove(b'\n'); - ClassBytes(cls) - } - Group { e, i, name } => { - Group { - e: Box::new(try!(remove(*e))), - i: i, - name: name, - } - } - Repeat { e, r, greedy } => { - Repeat { - e: Box::new(try!(remove(*e))), - r: r, - greedy: greedy, - } - } - Concat(exprs) => { - Concat(try!(exprs.into_iter().map(remove).collect())) - } - Alternate(exprs) => { - Alternate(try!(exprs.into_iter().map(remove).collect())) - } - e => e, - }) -} diff --git a/src/search.rs b/src/search.rs deleted file mode 100644 index b4b0b536..00000000 --- a/src/search.rs +++ /dev/null @@ -1,168 +0,0 @@ -use std::cmp; - -use memchr::{memchr, memrchr}; -use regex::bytes::Regex; -use syntax; - -use literals::LiteralSets; -use nonl; -use Result; - -#[derive(Clone, Debug)] -pub struct LineSearcher { - re: Regex, - required: Option, - opts: Options, -} - -#[derive(Clone, Debug)] -pub struct LineSearcherBuilder { - pattern: String, - opts: Options, -} - -#[derive(Clone, Debug, Default)] -struct Options { - case_insensitive: bool, - lines: bool, - locations: bool, -} - -impl LineSearcherBuilder { - pub fn new(pattern: &str) -> LineSearcherBuilder { - LineSearcherBuilder { - pattern: pattern.to_string(), - opts: Options::default(), - } - } - - pub fn case_insensitive(mut self, yes: bool) -> LineSearcherBuilder { - self.opts.case_insensitive = yes; - self - } - - pub fn line_numbers(mut self, yes: bool) -> LineSearcherBuilder { - self.opts.lines = yes; - self - } - - pub fn locations(mut self, yes: bool) -> LineSearcherBuilder { - self.opts.locations = yes; - self - } - - pub fn create(self) -> Result { - let expr = try!(parse(&self.pattern)); - let literals = LiteralSets::create(&expr); - let pat = - if self.opts.case_insensitive { - format!("(?i){}", expr) - } else { - expr.to_string() - }; - // We've already parsed the pattern, so we know it will compiled. - let re = Regex::new(&pat).unwrap(); - Ok(LineSearcher { - re: re, - required: literals.to_matcher(), - opts: self.opts, - }) - } -} - -impl LineSearcher { - pub fn search<'b, 's>(&'s self, buf: &'b [u8]) -> Iter<'b, 's> { - Iter { - searcher: self, - buf: buf, - start: 0, - count: 0, - } - } -} - -pub struct Match { - pub start: usize, - pub end: usize, - pub count: u64, - pub line: Option, - pub locations: Vec<(usize, usize)>, -} - -pub struct Iter<'b, 's> { - searcher: &'s LineSearcher, - buf: &'b [u8], - start: usize, - count: u64, -} - -impl<'b, 's> Iter<'b, 's> { - #[inline(always)] // reduces constant overhead - fn next_line_match(&mut self) -> Option<(usize, usize)> { - if let Some(ref req) = self.searcher.required { - while self.start < self.buf.len() { - let e = match req.shortest_match(&self.buf[self.start..]) { - None => return None, - Some(e) => self.start + e, - }; - let (prevnl, nextnl) = self.find_line(e, e); - match self.searcher.re.shortest_match(&self.buf[prevnl..nextnl]) { - None => { - self.start = nextnl + 1; - continue; - } - Some(_) => return Some((prevnl, nextnl)), - } - } - None - } else { - self.searcher.re - .shortest_match(&self.buf[self.start..]) - .map(|e| self.find_line(self.start + e, self.start + e)) - } - } - - fn find_line(&self, s: usize, e: usize) -> (usize, usize) { - (self.find_line_start(s), self.find_line_end(e)) - } - - fn find_line_start(&self, pos: usize) -> usize { - memrchr(b'\n', &self.buf[0..pos]).map_or(0, |i| i + 1) - } - - fn find_line_end(&self, pos: usize) -> usize { - memchr(b'\n', &self.buf[pos..]).map_or(self.buf.len(), |i| pos + i) - } -} - -impl<'b, 's> Iterator for Iter<'b, 's> { - type Item = Match; - - #[inline(always)] // reduces constant overhead - fn next(&mut self) -> Option { - match self.next_line_match() { - None => None, - Some((prevnl, nextnl)) => { - let count = self.count; - self.start = cmp::min(self.buf.len(), nextnl + 1); - self.count += 1; - Some(Match { - start: prevnl, - end: nextnl, - count: count, - line: None, - locations: vec![], - }) - } - } - } -} - -fn parse(re: &str) -> Result { - let expr = - try!(syntax::ExprBuilder::new() - .allow_bytes(true) - .unicode(false) - .parse(re)); - Ok(try!(nonl::remove(expr))) -}