docs and small polish

2025-05-19 09:40:22 -07:00 · 2016-08-24 18:33:35 -04:00 · 2016-08-24 18:33:35 -04:00 · 957f90c898
commit 957f90c898
parent 61f49ba716
2 changed files with 35 additions and 75 deletions
--- a/grep/src/lib.rs
+++ b/grep/src/lib.rs
@ -1,10 +1,15 @@
 #![deny(missing_docs)]
 /*!
 A fast line oriented regex searcher.
 */
 extern crate memchr;
 extern crate regex;
 extern crate regex_syntax as syntax;
 use std::error;
 use std::fmt;
 use std::io;
 use std::result;
 pub use search::{Grep, GrepBuilder};
@ -27,11 +32,6 @@ pub enum Error {
    /// pattern. For example, if the line terminator is `\n` and the regex
    /// pattern is `\w+\n\w+`, then the presence of `\n` will cause this error.
    LiteralNotAllowed(char),
    /// This errors occurs when a line exceeds the buffer size. The buffer
    /// size is given.
    LineTooLong(usize),
    /// An IO error occurred while searching.
    Io(io::Error),
    /// An unused enum variant that indicates this enum may be expanded in
    /// the future and therefore should not be exhaustively matched.
    #[doc(hidden)]
@ -43,8 +43,6 @@ impl error::Error for Error {
        match *self {
            Error::Regex(ref err) => err.description(),
            Error::LiteralNotAllowed(_) => "use of forbidden literal",
            Error::LineTooLong(_) => "line exceeds buffer size",
            Error::Io(ref err) => err.description(),
            Error::__Nonexhaustive => unreachable!(),
        }
    }
@ -52,7 +50,6 @@ impl error::Error for Error {
    fn cause(&self) -> Option<&error::Error> {
        match *self {
            Error::Regex(ref err) => err.cause(),
            Error::Io(ref err) => err.cause(),
            _ => None,
        }
    }
@ -65,11 +62,6 @@ impl fmt::Display for Error {
            Error::LiteralNotAllowed(chr) => {
                write!(f, "Literal '{}' not allowed.", chr)
            }
            Error::LineTooLong(limit) => {
                write!(f, "Line exceeded buffer size of {} bytes, try \
                           searching with memory maps instead.", limit)
            }
            Error::Io(ref err) => err.fmt(f),
            Error::__Nonexhaustive => unreachable!(),
        }
    }
@ -86,9 +78,3 @@ impl From<syntax::Error> for Error {
        Error::Regex(regex::Error::Syntax(err))
    }
 }
 impl From<io::Error> for Error {
    fn from(err: io::Error) -> Error {
        Error::Io(err)
    }
 }
--- a/grep/src/search.rs
+++ b/grep/src/search.rs
@ -6,15 +6,15 @@ use literals::LiteralSets;
 use nonl;
 use Result;
 /// A matched line.
 #[derive(Clone, Debug, Default, Eq, PartialEq)]
 pub struct Match {
    start: usize,
    end: usize,
    line: Option<usize>,
    locations: Vec<(usize, usize)>,
 }
 impl Match {
    /// Create a new empty match value.
    pub fn new() -> Match {
        Match::default()
    }
@ -30,27 +30,9 @@ impl Match {
    pub fn end(&self) -> usize {
        self.end
    }
    /// Return the line number that this match corresponds to.
    ///
    /// Note that this is `None` if line numbers aren't being computed. Line
    /// number tracking can be enabled using `GrepBuilder`.
    #[inline]
    pub fn line(&self) -> Option<usize> {
        self.line
    }
    /// Return the exact start and end locations (in byte offsets) of every
    /// regex match in this line.
    ///
    /// Note that this always returns an empty slice if exact locations aren't
    /// computed. Exact location tracking can be enabled using `GrepBuilder`.
    #[inline]
    pub fn locations(&self) -> &[(usize, usize)] {
        &self.locations
    }
 }
 /// A fast line oriented regex searcher.
 #[derive(Clone, Debug)]
 pub struct Grep {
    re: Regex,
@ -58,6 +40,7 @@ pub struct Grep {
    opts: Options,
 }
 /// A builder for a grep searcher.
 #[derive(Clone, Debug)]
 pub struct GrepBuilder {
    pattern: String,
@ -67,8 +50,6 @@ pub struct GrepBuilder {
 #[derive(Clone, Debug)]
 struct Options {
    case_insensitive: bool,
    lines: bool,
    locations: bool,
    line_terminator: u8,
    size_limit: usize,
    dfa_size_limit: usize,
@ -78,8 +59,6 @@ impl Default for Options {
    fn default() -> Options {
        Options {
            case_insensitive: false,
            lines: false,
            locations: false,
            line_terminator: b'\n',
            size_limit: 10 * (1 << 20),
            dfa_size_limit: 10 * (1 << 20),
@ -99,28 +78,6 @@ impl GrepBuilder {
        }
    }
    /// Sets whether line numbers are reported for each match.
    ///
    /// When enabled (disabled by default), every matching line is tagged with
    /// its corresponding line number according to the line terminator that is
    /// set. Note that this requires extra processing which can slow down
    /// search.
    pub fn line_numbers(mut self, yes: bool) -> GrepBuilder {
        self.opts.lines = yes;
        self
    }
    /// Set whether precise match locations are reported for each matching
    /// line.
    ///
    /// When enabled (disabled by default), every match of the regex on each
    /// matchling line is reported via byte offsets. Note that this requires
    /// extra processing which can slow down search.
    pub fn locations(mut self, yes: bool) -> GrepBuilder {
        self.opts.locations = yes;
        self
    }
    /// Set the line terminator.
    ///
    /// The line terminator can be any ASCII character and serves to delineate
@ -167,7 +124,7 @@ impl GrepBuilder {
    ///
    /// If there was a problem parsing or compiling the regex with the given
    /// options, then an error is returned.
-    pub fn create(self) -> Result<Grep> {
+    pub fn build(self) -> Result<Grep> {
        let expr = try!(self.parse());
        let literals = LiteralSets::create(&expr);
        let re = try!(
@ -199,6 +156,12 @@ impl GrepBuilder {
 }
 impl Grep {
    /// Returns a reference to the underlying regex used by the searcher.
    pub fn regex(&self) -> &Regex {
        &self.re
    }
    /// Returns an iterator over all matches in the given buffer.
    pub fn iter<'b, 's>(&'s self, buf: &'b [u8]) -> Iter<'b, 's> {
        Iter {
            searcher: self,
@ -207,6 +170,11 @@ impl Grep {
        }
    }
    /// Fills in the next line that matches in the given buffer starting at
    /// the position given.
    ///
    /// If no match could be found, `false` is returned, otherwise, `true` is
    /// returned.
    pub fn read_match(
        &self,
        mat: &mut Match,
@ -265,6 +233,10 @@ impl Grep {
    }
 }
 /// An iterator over all matches in a particular buffer.
 ///
 /// `'b` refers to the lifetime of the buffer, and `'s` refers to the lifetime
 /// of the searcher.
 pub struct Iter<'b, 's> {
    searcher: &'s Grep,
    buf: &'b [u8],
@ -292,7 +264,7 @@ mod tests {
    use memchr::{memchr, memrchr};
    use regex::bytes::Regex;
-    use super::GrepBuilder;
+    use super::{GrepBuilder, Match};
    static SHERLOCK: &'static [u8] = include_bytes!("./data/sherlock.txt");
@ -301,7 +273,7 @@ mod tests {
        String::from_utf8(bytes.to_vec()).unwrap()
    }
-    fn find_lines(pat: &str, haystack: &[u8]) -> Vec<(usize, usize)> {
+    fn find_lines(pat: &str, haystack: &[u8]) -> Vec<Match> {
        let re = Regex::new(pat).unwrap();
        let mut lines = vec![];
        for (s, e) in re.find_iter(haystack) {
@ -309,15 +281,17 @@ mod tests {
                        .map_or(0, |i| i + 1);
            let end = memchr(b'\n', &haystack[e..])
                      .map_or(haystack.len(), |i| e + i);
-            lines.push((start, end));
+            lines.push(Match {
                start: start,
                end: end,
            });
        }
        lines
    }
-    fn grep_lines(pat: &str, haystack: &[u8]) -> Vec<(usize, usize)> {
+    fn grep_lines(pat: &str, haystack: &[u8]) -> Vec<Match> {
-        let g = GrepBuilder::new(pat).create().unwrap();
+        let g = GrepBuilder::new(pat).build().unwrap();
-        let it = g.iter(haystack);
+        g.iter(haystack).collect()
        it.map(|m| (m.start(), m.end())).collect()
    }
    #[test]