mirror of
https://github.com/BurntSushi/ripgrep.git
synced 2025-08-31 04:13:53 -07:00
The top-level listing was just getting a bit too long for my taste. So put all of the code in one directory and shrink the large top-level mess to a small top-level mess. NOTE: This commit only contains renames. The subsequent commit will actually make ripgrep build again. We do it this way with the naive hope that this will make it easier for git history to track the renames. Sigh.
468 lines
16 KiB
Rust
468 lines
16 KiB
Rust
/*!
|
|
A collection of routines for performing operations on lines.
|
|
*/
|
|
|
|
use bstr::ByteSlice;
|
|
use bytecount;
|
|
use grep_matcher::{LineTerminator, Match};
|
|
|
|
/// An iterator over lines in a particular slice of bytes.
|
|
///
|
|
/// Line terminators are considered part of the line they terminate. All lines
|
|
/// yielded by the iterator are guaranteed to be non-empty.
|
|
///
|
|
/// `'b` refers to the lifetime of the underlying bytes.
|
|
#[derive(Debug)]
|
|
pub struct LineIter<'b> {
|
|
bytes: &'b [u8],
|
|
stepper: LineStep,
|
|
}
|
|
|
|
impl<'b> LineIter<'b> {
|
|
/// Create a new line iterator that yields lines in the given bytes that
|
|
/// are terminated by `line_term`.
|
|
pub fn new(line_term: u8, bytes: &'b [u8]) -> LineIter<'b> {
|
|
LineIter {
|
|
bytes: bytes,
|
|
stepper: LineStep::new(line_term, 0, bytes.len()),
|
|
}
|
|
}
|
|
}
|
|
|
|
impl<'b> Iterator for LineIter<'b> {
|
|
type Item = &'b [u8];
|
|
|
|
fn next(&mut self) -> Option<&'b [u8]> {
|
|
self.stepper.next_match(self.bytes).map(|m| &self.bytes[m])
|
|
}
|
|
}
|
|
|
|
/// An explicit iterator over lines in a particular slice of bytes.
|
|
///
|
|
/// This iterator avoids borrowing the bytes themselves, and instead requires
|
|
/// callers to explicitly provide the bytes when moving through the iterator.
|
|
/// While not idiomatic, this provides a simple way of iterating over lines
|
|
/// that doesn't require borrowing the slice itself, which can be convenient.
|
|
///
|
|
/// Line terminators are considered part of the line they terminate. All lines
|
|
/// yielded by the iterator are guaranteed to be non-empty.
|
|
#[derive(Debug)]
|
|
pub struct LineStep {
|
|
line_term: u8,
|
|
pos: usize,
|
|
end: usize,
|
|
}
|
|
|
|
impl LineStep {
|
|
/// Create a new line iterator over the given range of bytes using the
|
|
/// given line terminator.
|
|
///
|
|
/// Callers should provide the actual bytes for each call to `next`. The
|
|
/// same slice must be provided to each call.
|
|
///
|
|
/// This panics if `start` is not less than or equal to `end`.
|
|
pub fn new(line_term: u8, start: usize, end: usize) -> LineStep {
|
|
LineStep { line_term, pos: start, end: end }
|
|
}
|
|
|
|
/// Return the start and end position of the next line in the given bytes.
|
|
///
|
|
/// The caller must past exactly the same slice of bytes for each call to
|
|
/// `next`.
|
|
///
|
|
/// The range returned includes the line terminator. Ranges are always
|
|
/// non-empty.
|
|
pub fn next(&mut self, bytes: &[u8]) -> Option<(usize, usize)> {
|
|
self.next_impl(bytes)
|
|
}
|
|
|
|
/// Like next, but returns a `Match` instead of a tuple.
|
|
#[inline(always)]
|
|
pub(crate) fn next_match(&mut self, bytes: &[u8]) -> Option<Match> {
|
|
self.next_impl(bytes).map(|(s, e)| Match::new(s, e))
|
|
}
|
|
|
|
#[inline(always)]
|
|
fn next_impl(&mut self, mut bytes: &[u8]) -> Option<(usize, usize)> {
|
|
bytes = &bytes[..self.end];
|
|
match bytes[self.pos..].find_byte(self.line_term) {
|
|
None => {
|
|
if self.pos < bytes.len() {
|
|
let m = (self.pos, bytes.len());
|
|
assert!(m.0 <= m.1);
|
|
|
|
self.pos = m.1;
|
|
Some(m)
|
|
} else {
|
|
None
|
|
}
|
|
}
|
|
Some(line_end) => {
|
|
let m = (self.pos, self.pos + line_end + 1);
|
|
assert!(m.0 <= m.1);
|
|
|
|
self.pos = m.1;
|
|
Some(m)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Count the number of occurrences of `line_term` in `bytes`.
|
|
pub fn count(bytes: &[u8], line_term: u8) -> u64 {
|
|
bytecount::count(bytes, line_term) as u64
|
|
}
|
|
|
|
/// Given a line that possibly ends with a terminator, return that line without
|
|
/// the terminator.
|
|
#[inline(always)]
|
|
pub fn without_terminator(bytes: &[u8], line_term: LineTerminator) -> &[u8] {
|
|
let line_term = line_term.as_bytes();
|
|
let start = bytes.len().saturating_sub(line_term.len());
|
|
if bytes.get(start..) == Some(line_term) {
|
|
return &bytes[..bytes.len() - line_term.len()];
|
|
}
|
|
bytes
|
|
}
|
|
|
|
/// Return the start and end offsets of the lines containing the given range
|
|
/// of bytes.
|
|
///
|
|
/// Line terminators are considered part of the line they terminate.
|
|
#[inline(always)]
|
|
pub fn locate(bytes: &[u8], line_term: u8, range: Match) -> Match {
|
|
let line_start =
|
|
bytes[..range.start()].rfind_byte(line_term).map_or(0, |i| i + 1);
|
|
let line_end =
|
|
if range.end() > line_start && bytes[range.end() - 1] == line_term {
|
|
range.end()
|
|
} else {
|
|
bytes[range.end()..]
|
|
.find_byte(line_term)
|
|
.map_or(bytes.len(), |i| range.end() + i + 1)
|
|
};
|
|
Match::new(line_start, line_end)
|
|
}
|
|
|
|
/// Returns the minimal starting offset of the line that occurs `count` lines
|
|
/// before the last line in `bytes`.
|
|
///
|
|
/// Lines are terminated by `line_term`. If `count` is zero, then this returns
|
|
/// the starting offset of the last line in `bytes`.
|
|
///
|
|
/// If `bytes` ends with a line terminator, then the terminator itself is
|
|
/// considered part of the last line.
|
|
pub fn preceding(bytes: &[u8], line_term: u8, count: usize) -> usize {
|
|
preceding_by_pos(bytes, bytes.len(), line_term, count)
|
|
}
|
|
|
|
/// Returns the minimal starting offset of the line that occurs `count` lines
|
|
/// before the line containing `pos`. Lines are terminated by `line_term`.
|
|
/// If `count` is zero, then this returns the starting offset of the line
|
|
/// containing `pos`.
|
|
///
|
|
/// If `pos` points just past a line terminator, then it is considered part of
|
|
/// the line that it terminates. For example, given `bytes = b"abc\nxyz\n"`
|
|
/// and `pos = 7`, `preceding(bytes, pos, b'\n', 0)` returns `4` (as does `pos
|
|
/// = 8`) and `preceding(bytes, pos, `b'\n', 1)` returns `0`.
|
|
fn preceding_by_pos(
|
|
bytes: &[u8],
|
|
mut pos: usize,
|
|
line_term: u8,
|
|
mut count: usize,
|
|
) -> usize {
|
|
if pos == 0 {
|
|
return 0;
|
|
} else if bytes[pos - 1] == line_term {
|
|
pos -= 1;
|
|
}
|
|
loop {
|
|
match bytes[..pos].rfind_byte(line_term) {
|
|
None => {
|
|
return 0;
|
|
}
|
|
Some(i) => {
|
|
if count == 0 {
|
|
return i + 1;
|
|
} else if i == 0 {
|
|
return 0;
|
|
}
|
|
count -= 1;
|
|
pos = i;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
use grep_matcher::Match;
|
|
use std::ops::Range;
|
|
use std::str;
|
|
|
|
const SHERLOCK: &'static str = "\
|
|
For the Doctor Watsons of this world, as opposed to the Sherlock
|
|
Holmeses, success in the province of detective work must always
|
|
be, to a very large extent, the result of luck. Sherlock Holmes
|
|
can extract a clew from a wisp of straw or a flake of cigar ash;
|
|
but Doctor Watson has to have it taken out for him and dusted,
|
|
and exhibited clearly, with a label attached.\
|
|
";
|
|
|
|
fn m(start: usize, end: usize) -> Match {
|
|
Match::new(start, end)
|
|
}
|
|
|
|
fn lines(text: &str) -> Vec<&str> {
|
|
let mut results = vec![];
|
|
let mut it = LineStep::new(b'\n', 0, text.len());
|
|
while let Some(m) = it.next_match(text.as_bytes()) {
|
|
results.push(&text[m]);
|
|
}
|
|
results
|
|
}
|
|
|
|
fn line_ranges(text: &str) -> Vec<Range<usize>> {
|
|
let mut results = vec![];
|
|
let mut it = LineStep::new(b'\n', 0, text.len());
|
|
while let Some(m) = it.next_match(text.as_bytes()) {
|
|
results.push(m.start()..m.end());
|
|
}
|
|
results
|
|
}
|
|
|
|
fn prev(text: &str, pos: usize, count: usize) -> usize {
|
|
preceding_by_pos(text.as_bytes(), pos, b'\n', count)
|
|
}
|
|
|
|
fn loc(text: &str, start: usize, end: usize) -> Match {
|
|
locate(text.as_bytes(), b'\n', Match::new(start, end))
|
|
}
|
|
|
|
#[test]
|
|
fn line_count() {
|
|
assert_eq!(0, count(b"", b'\n'));
|
|
assert_eq!(1, count(b"\n", b'\n'));
|
|
assert_eq!(2, count(b"\n\n", b'\n'));
|
|
assert_eq!(2, count(b"a\nb\nc", b'\n'));
|
|
}
|
|
|
|
#[test]
|
|
fn line_locate() {
|
|
let t = SHERLOCK;
|
|
let lines = line_ranges(t);
|
|
|
|
assert_eq!(
|
|
loc(t, lines[0].start, lines[0].end),
|
|
m(lines[0].start, lines[0].end)
|
|
);
|
|
assert_eq!(
|
|
loc(t, lines[0].start + 1, lines[0].end),
|
|
m(lines[0].start, lines[0].end)
|
|
);
|
|
assert_eq!(
|
|
loc(t, lines[0].end - 1, lines[0].end),
|
|
m(lines[0].start, lines[0].end)
|
|
);
|
|
assert_eq!(
|
|
loc(t, lines[0].end, lines[0].end),
|
|
m(lines[1].start, lines[1].end)
|
|
);
|
|
|
|
assert_eq!(
|
|
loc(t, lines[5].start, lines[5].end),
|
|
m(lines[5].start, lines[5].end)
|
|
);
|
|
assert_eq!(
|
|
loc(t, lines[5].start + 1, lines[5].end),
|
|
m(lines[5].start, lines[5].end)
|
|
);
|
|
assert_eq!(
|
|
loc(t, lines[5].end - 1, lines[5].end),
|
|
m(lines[5].start, lines[5].end)
|
|
);
|
|
assert_eq!(
|
|
loc(t, lines[5].end, lines[5].end),
|
|
m(lines[5].start, lines[5].end)
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn line_locate_weird() {
|
|
assert_eq!(loc("", 0, 0), m(0, 0));
|
|
|
|
assert_eq!(loc("\n", 0, 1), m(0, 1));
|
|
assert_eq!(loc("\n", 1, 1), m(1, 1));
|
|
|
|
assert_eq!(loc("\n\n", 0, 0), m(0, 1));
|
|
assert_eq!(loc("\n\n", 0, 1), m(0, 1));
|
|
assert_eq!(loc("\n\n", 1, 1), m(1, 2));
|
|
assert_eq!(loc("\n\n", 1, 2), m(1, 2));
|
|
assert_eq!(loc("\n\n", 2, 2), m(2, 2));
|
|
|
|
assert_eq!(loc("a\nb\nc", 0, 1), m(0, 2));
|
|
assert_eq!(loc("a\nb\nc", 1, 2), m(0, 2));
|
|
assert_eq!(loc("a\nb\nc", 2, 3), m(2, 4));
|
|
assert_eq!(loc("a\nb\nc", 3, 4), m(2, 4));
|
|
assert_eq!(loc("a\nb\nc", 4, 5), m(4, 5));
|
|
assert_eq!(loc("a\nb\nc", 5, 5), m(4, 5));
|
|
}
|
|
|
|
#[test]
|
|
fn line_iter() {
|
|
assert_eq!(lines("abc"), vec!["abc"]);
|
|
|
|
assert_eq!(lines("abc\n"), vec!["abc\n"]);
|
|
assert_eq!(lines("abc\nxyz"), vec!["abc\n", "xyz"]);
|
|
assert_eq!(lines("abc\nxyz\n"), vec!["abc\n", "xyz\n"]);
|
|
|
|
assert_eq!(lines("abc\n\n"), vec!["abc\n", "\n"]);
|
|
assert_eq!(lines("abc\n\n\n"), vec!["abc\n", "\n", "\n"]);
|
|
assert_eq!(lines("abc\n\nxyz"), vec!["abc\n", "\n", "xyz"]);
|
|
assert_eq!(lines("abc\n\nxyz\n"), vec!["abc\n", "\n", "xyz\n"]);
|
|
assert_eq!(lines("abc\nxyz\n\n"), vec!["abc\n", "xyz\n", "\n"]);
|
|
|
|
assert_eq!(lines("\n"), vec!["\n"]);
|
|
assert_eq!(lines(""), Vec::<&str>::new());
|
|
}
|
|
|
|
#[test]
|
|
fn line_iter_empty() {
|
|
let mut it = LineStep::new(b'\n', 0, 0);
|
|
assert_eq!(it.next(b"abc"), None);
|
|
}
|
|
|
|
#[test]
|
|
fn preceding_lines_doc() {
|
|
// These are the examples mentions in the documentation of `preceding`.
|
|
let bytes = b"abc\nxyz\n";
|
|
assert_eq!(4, preceding_by_pos(bytes, 7, b'\n', 0));
|
|
assert_eq!(4, preceding_by_pos(bytes, 8, b'\n', 0));
|
|
assert_eq!(0, preceding_by_pos(bytes, 7, b'\n', 1));
|
|
assert_eq!(0, preceding_by_pos(bytes, 8, b'\n', 1));
|
|
}
|
|
|
|
#[test]
|
|
fn preceding_lines_sherlock() {
|
|
let t = SHERLOCK;
|
|
let lines = line_ranges(t);
|
|
|
|
// The following tests check the count == 0 case, i.e., finding the
|
|
// beginning of the line containing the given position.
|
|
assert_eq!(0, prev(t, 0, 0));
|
|
assert_eq!(0, prev(t, 1, 0));
|
|
// The line terminator is addressed by `end-1` and terminates the line
|
|
// it is part of.
|
|
assert_eq!(0, prev(t, lines[0].end - 1, 0));
|
|
assert_eq!(lines[0].start, prev(t, lines[0].end, 0));
|
|
// The end position of line addresses the byte immediately following a
|
|
// line terminator, which puts it on the following line.
|
|
assert_eq!(lines[1].start, prev(t, lines[0].end + 1, 0));
|
|
|
|
// Now tests for count > 0.
|
|
assert_eq!(0, prev(t, 0, 1));
|
|
assert_eq!(0, prev(t, 0, 2));
|
|
assert_eq!(0, prev(t, 1, 1));
|
|
assert_eq!(0, prev(t, 1, 2));
|
|
assert_eq!(0, prev(t, lines[0].end - 1, 1));
|
|
assert_eq!(0, prev(t, lines[0].end - 1, 2));
|
|
assert_eq!(0, prev(t, lines[0].end, 1));
|
|
assert_eq!(0, prev(t, lines[0].end, 2));
|
|
assert_eq!(lines[3].start, prev(t, lines[4].end - 1, 1));
|
|
assert_eq!(lines[3].start, prev(t, lines[4].end, 1));
|
|
assert_eq!(lines[4].start, prev(t, lines[4].end + 1, 1));
|
|
|
|
// The last line has no line terminator.
|
|
assert_eq!(lines[5].start, prev(t, lines[5].end, 0));
|
|
assert_eq!(lines[5].start, prev(t, lines[5].end - 1, 0));
|
|
assert_eq!(lines[4].start, prev(t, lines[5].end, 1));
|
|
assert_eq!(lines[0].start, prev(t, lines[5].end, 5));
|
|
}
|
|
|
|
#[test]
|
|
fn preceding_lines_short() {
|
|
let t = "a\nb\nc\nd\ne\nf\n";
|
|
let lines = line_ranges(t);
|
|
assert_eq!(12, t.len());
|
|
|
|
assert_eq!(lines[5].start, prev(t, lines[5].end, 0));
|
|
assert_eq!(lines[4].start, prev(t, lines[5].end, 1));
|
|
assert_eq!(lines[3].start, prev(t, lines[5].end, 2));
|
|
assert_eq!(lines[2].start, prev(t, lines[5].end, 3));
|
|
assert_eq!(lines[1].start, prev(t, lines[5].end, 4));
|
|
assert_eq!(lines[0].start, prev(t, lines[5].end, 5));
|
|
assert_eq!(lines[0].start, prev(t, lines[5].end, 6));
|
|
|
|
assert_eq!(lines[5].start, prev(t, lines[5].end - 1, 0));
|
|
assert_eq!(lines[4].start, prev(t, lines[5].end - 1, 1));
|
|
assert_eq!(lines[3].start, prev(t, lines[5].end - 1, 2));
|
|
assert_eq!(lines[2].start, prev(t, lines[5].end - 1, 3));
|
|
assert_eq!(lines[1].start, prev(t, lines[5].end - 1, 4));
|
|
assert_eq!(lines[0].start, prev(t, lines[5].end - 1, 5));
|
|
assert_eq!(lines[0].start, prev(t, lines[5].end - 1, 6));
|
|
|
|
assert_eq!(lines[4].start, prev(t, lines[5].start, 0));
|
|
assert_eq!(lines[3].start, prev(t, lines[5].start, 1));
|
|
assert_eq!(lines[2].start, prev(t, lines[5].start, 2));
|
|
assert_eq!(lines[1].start, prev(t, lines[5].start, 3));
|
|
assert_eq!(lines[0].start, prev(t, lines[5].start, 4));
|
|
assert_eq!(lines[0].start, prev(t, lines[5].start, 5));
|
|
|
|
assert_eq!(lines[3].start, prev(t, lines[4].end - 1, 1));
|
|
assert_eq!(lines[2].start, prev(t, lines[4].start, 1));
|
|
|
|
assert_eq!(lines[2].start, prev(t, lines[3].end - 1, 1));
|
|
assert_eq!(lines[1].start, prev(t, lines[3].start, 1));
|
|
|
|
assert_eq!(lines[1].start, prev(t, lines[2].end - 1, 1));
|
|
assert_eq!(lines[0].start, prev(t, lines[2].start, 1));
|
|
|
|
assert_eq!(lines[0].start, prev(t, lines[1].end - 1, 1));
|
|
assert_eq!(lines[0].start, prev(t, lines[1].start, 1));
|
|
|
|
assert_eq!(lines[0].start, prev(t, lines[0].end - 1, 1));
|
|
assert_eq!(lines[0].start, prev(t, lines[0].start, 1));
|
|
}
|
|
|
|
#[test]
|
|
fn preceding_lines_empty1() {
|
|
let t = "\n\n\nd\ne\nf\n";
|
|
let lines = line_ranges(t);
|
|
assert_eq!(9, t.len());
|
|
|
|
assert_eq!(lines[0].start, prev(t, lines[0].end, 0));
|
|
assert_eq!(lines[0].start, prev(t, lines[0].end, 1));
|
|
assert_eq!(lines[1].start, prev(t, lines[1].end, 0));
|
|
assert_eq!(lines[0].start, prev(t, lines[1].end, 1));
|
|
|
|
assert_eq!(lines[5].start, prev(t, lines[5].end, 0));
|
|
assert_eq!(lines[4].start, prev(t, lines[5].end, 1));
|
|
assert_eq!(lines[3].start, prev(t, lines[5].end, 2));
|
|
assert_eq!(lines[2].start, prev(t, lines[5].end, 3));
|
|
assert_eq!(lines[1].start, prev(t, lines[5].end, 4));
|
|
assert_eq!(lines[0].start, prev(t, lines[5].end, 5));
|
|
assert_eq!(lines[0].start, prev(t, lines[5].end, 6));
|
|
}
|
|
|
|
#[test]
|
|
fn preceding_lines_empty2() {
|
|
let t = "a\n\n\nd\ne\nf\n";
|
|
let lines = line_ranges(t);
|
|
assert_eq!(10, t.len());
|
|
|
|
assert_eq!(lines[0].start, prev(t, lines[0].end, 0));
|
|
assert_eq!(lines[0].start, prev(t, lines[0].end, 1));
|
|
assert_eq!(lines[1].start, prev(t, lines[1].end, 0));
|
|
assert_eq!(lines[0].start, prev(t, lines[1].end, 1));
|
|
|
|
assert_eq!(lines[5].start, prev(t, lines[5].end, 0));
|
|
assert_eq!(lines[4].start, prev(t, lines[5].end, 1));
|
|
assert_eq!(lines[3].start, prev(t, lines[5].end, 2));
|
|
assert_eq!(lines[2].start, prev(t, lines[5].end, 3));
|
|
assert_eq!(lines[1].start, prev(t, lines[5].end, 4));
|
|
assert_eq!(lines[0].start, prev(t, lines[5].end, 5));
|
|
assert_eq!(lines[0].start, prev(t, lines[5].end, 6));
|
|
}
|
|
}
|