diff --git a/CHANGELOG.md b/CHANGELOG.md
index 900162bf..6ba9bd56 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -26,6 +26,13 @@ discussion on this. Previously, every line in a match was duplicated, even
 when it spanned multiple lines. There are no changes to vimgrep output when
 multi-line mode is disabled.
 
+**In multi-line mode, --count is now equivalent to --count-matches.**
+
+This appears to match how `pcre2grep` implements `--count`. Previously, ripgrep
+would produce outright incorrect counts. Another alternative would be to simply
+count the number of lines---even if it's more than the number of matches---but
+that seems highly unintuitive.
+
 Security fixes:
 
 * [CVE-2021-3013](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-3013):
@@ -64,6 +71,8 @@ Bug fixes:
   Document cygwin path translation behavior in the FAQ.
 * [BUG #1311](https://github.com/BurntSushi/ripgrep/issues/1311):
   Fix multi-line bug where a search & replace for `\n` didn't work as expected.
+* [BUG #1412](https://github.com/BurntSushi/ripgrep/issues/1412):
+  Fix multi-line bug with searches using look-around past matching lines.
 * [BUG #1642](https://github.com/BurntSushi/ripgrep/issues/1642):
   Fixes a bug where using `-m` and `-A` printed more matches than the limit.
 * [BUG #1703](https://github.com/BurntSushi/ripgrep/issues/1703):
diff --git a/crates/core/app.rs b/crates/core/app.rs
index 5058c0ea..96801194 100644
--- a/crates/core/app.rs
+++ b/crates/core/app.rs
@@ -1057,11 +1057,13 @@ fn flag_count(args: &mut Vec<RGArg>) {
 This flag suppresses normal output and shows the number of lines that match
 the given patterns for each file searched. Each file containing a match has its
 path and count printed on each line. Note that this reports the number of lines
-that match and not the total number of matches.
+that match and not the total number of matches, unless -U/--multiline is
+enabled. In multiline mode, --count is equivalent to --count-matches.
 
 If only one file is given to ripgrep, then only the count is printed if there
 is a match. The --with-filename flag can be used to force printing the file
-path in this case.
+path in this case. If you need a count to be printed regardless of whether
+there is a match, then use --include-zero.
 
 This overrides the --count-matches flag. Note that when --count is combined
 with --only-matching, then ripgrep behaves as if --count-matches was given.
diff --git a/crates/matcher/src/lib.rs b/crates/matcher/src/lib.rs
index 2bcd0c12..4859de39 100644
--- a/crates/matcher/src/lib.rs
+++ b/crates/matcher/src/lib.rs
@@ -618,12 +618,31 @@ pub trait Matcher {
     fn find_iter<F>(
         &self,
         haystack: &[u8],
+        matched: F,
+    ) -> Result<(), Self::Error>
+    where
+        F: FnMut(Match) -> bool,
+    {
+        self.find_iter_at(haystack, 0, matched)
+    }
+
+    /// Executes the given function over successive non-overlapping matches
+    /// in `haystack`. If no match exists, then the given function is never
+    /// called. If the function returns `false`, then iteration stops.
+    ///
+    /// The significance of the starting point is that it takes the surrounding
+    /// context into consideration. For example, the `\A` anchor can only
+    /// match when `at == 0`.
+    fn find_iter_at<F>(
+        &self,
+        haystack: &[u8],
+        at: usize,
         mut matched: F,
     ) -> Result<(), Self::Error>
     where
         F: FnMut(Match) -> bool,
     {
-        self.try_find_iter(haystack, |m| Ok(matched(m)))
+        self.try_find_iter_at(haystack, at, |m| Ok(matched(m)))
             .map(|r: Result<(), ()>| r.unwrap())
     }
 
@@ -637,12 +656,35 @@ pub trait Matcher {
     fn try_find_iter<F, E>(
         &self,
         haystack: &[u8],
+        matched: F,
+    ) -> Result<Result<(), E>, Self::Error>
+    where
+        F: FnMut(Match) -> Result<bool, E>,
+    {
+        self.try_find_iter_at(haystack, 0, matched)
+    }
+
+    /// Executes the given function over successive non-overlapping matches
+    /// in `haystack`. If no match exists, then the given function is never
+    /// called. If the function returns `false`, then iteration stops.
+    /// Similarly, if the function returns an error then iteration stops and
+    /// the error is yielded. If an error occurs while executing the search,
+    /// then it is converted to
+    /// `E`.
+    ///
+    /// The significance of the starting point is that it takes the surrounding
+    /// context into consideration. For example, the `\A` anchor can only
+    /// match when `at == 0`.
+    fn try_find_iter_at<F, E>(
+        &self,
+        haystack: &[u8],
+        at: usize,
         mut matched: F,
     ) -> Result<Result<(), E>, Self::Error>
     where
         F: FnMut(Match) -> Result<bool, E>,
     {
-        let mut last_end = 0;
+        let mut last_end = at;
         let mut last_match = None;
 
         loop {
@@ -696,12 +738,33 @@ pub trait Matcher {
         &self,
         haystack: &[u8],
         caps: &mut Self::Captures,
+        matched: F,
+    ) -> Result<(), Self::Error>
+    where
+        F: FnMut(&Self::Captures) -> bool,
+    {
+        self.captures_iter_at(haystack, 0, caps, matched)
+    }
+
+    /// Executes the given function over successive non-overlapping matches
+    /// in `haystack` with capture groups extracted from each match. If no
+    /// match exists, then the given function is never called. If the function
+    /// returns `false`, then iteration stops.
+    ///
+    /// The significance of the starting point is that it takes the surrounding
+    /// context into consideration. For example, the `\A` anchor can only
+    /// match when `at == 0`.
+    fn captures_iter_at<F>(
+        &self,
+        haystack: &[u8],
+        at: usize,
+        caps: &mut Self::Captures,
         mut matched: F,
     ) -> Result<(), Self::Error>
     where
         F: FnMut(&Self::Captures) -> bool,
     {
-        self.try_captures_iter(haystack, caps, |caps| Ok(matched(caps)))
+        self.try_captures_iter_at(haystack, at, caps, |caps| Ok(matched(caps)))
             .map(|r: Result<(), ()>| r.unwrap())
     }
 
@@ -716,12 +779,36 @@ pub trait Matcher {
         &self,
         haystack: &[u8],
         caps: &mut Self::Captures,
+        matched: F,
+    ) -> Result<Result<(), E>, Self::Error>
+    where
+        F: FnMut(&Self::Captures) -> Result<bool, E>,
+    {
+        self.try_captures_iter_at(haystack, 0, caps, matched)
+    }
+
+    /// Executes the given function over successive non-overlapping matches
+    /// in `haystack` with capture groups extracted from each match. If no
+    /// match exists, then the given function is never called. If the function
+    /// returns `false`, then iteration stops. Similarly, if the function
+    /// returns an error then iteration stops and the error is yielded. If
+    /// an error occurs while executing the search, then it is converted to
+    /// `E`.
+    ///
+    /// The significance of the starting point is that it takes the surrounding
+    /// context into consideration. For example, the `\A` anchor can only
+    /// match when `at == 0`.
+    fn try_captures_iter_at<F, E>(
+        &self,
+        haystack: &[u8],
+        at: usize,
+        caps: &mut Self::Captures,
         mut matched: F,
     ) -> Result<Result<(), E>, Self::Error>
     where
         F: FnMut(&Self::Captures) -> Result<bool, E>,
     {
-        let mut last_end = 0;
+        let mut last_end = at;
         let mut last_match = None;
 
         loop {
@@ -819,13 +906,35 @@ pub trait Matcher {
         haystack: &[u8],
         caps: &mut Self::Captures,
         dst: &mut Vec<u8>,
+        append: F,
+    ) -> Result<(), Self::Error>
+    where
+        F: FnMut(&Self::Captures, &mut Vec<u8>) -> bool,
+    {
+        self.replace_with_captures_at(haystack, 0, caps, dst, append)
+    }
+
+    /// Replaces every match in the given haystack with the result of calling
+    /// `append` with the matching capture groups.
+    ///
+    /// If the given `append` function returns `false`, then replacement stops.
+    ///
+    /// The significance of the starting point is that it takes the surrounding
+    /// context into consideration. For example, the `\A` anchor can only
+    /// match when `at == 0`.
+    fn replace_with_captures_at<F>(
+        &self,
+        haystack: &[u8],
+        at: usize,
+        caps: &mut Self::Captures,
+        dst: &mut Vec<u8>,
         mut append: F,
     ) -> Result<(), Self::Error>
     where
         F: FnMut(&Self::Captures, &mut Vec<u8>) -> bool,
     {
-        let mut last_match = 0;
-        self.captures_iter(haystack, caps, |caps| {
+        let mut last_match = at;
+        self.captures_iter_at(haystack, at, caps, |caps| {
             let m = caps.get(0).unwrap();
             dst.extend(&haystack[last_match..m.start]);
             last_match = m.end;
@@ -1039,6 +1148,18 @@ impl<'a, M: Matcher> Matcher for &'a M {
         (*self).find_iter(haystack, matched)
     }
 
+    fn find_iter_at<F>(
+        &self,
+        haystack: &[u8],
+        at: usize,
+        matched: F,
+    ) -> Result<(), Self::Error>
+    where
+        F: FnMut(Match) -> bool,
+    {
+        (*self).find_iter_at(haystack, at, matched)
+    }
+
     fn try_find_iter<F, E>(
         &self,
         haystack: &[u8],
@@ -1050,6 +1171,18 @@ impl<'a, M: Matcher> Matcher for &'a M {
         (*self).try_find_iter(haystack, matched)
     }
 
+    fn try_find_iter_at<F, E>(
+        &self,
+        haystack: &[u8],
+        at: usize,
+        matched: F,
+    ) -> Result<Result<(), E>, Self::Error>
+    where
+        F: FnMut(Match) -> Result<bool, E>,
+    {
+        (*self).try_find_iter_at(haystack, at, matched)
+    }
+
     fn captures(
         &self,
         haystack: &[u8],
@@ -1070,6 +1203,19 @@ impl<'a, M: Matcher> Matcher for &'a M {
         (*self).captures_iter(haystack, caps, matched)
     }
 
+    fn captures_iter_at<F>(
+        &self,
+        haystack: &[u8],
+        at: usize,
+        caps: &mut Self::Captures,
+        matched: F,
+    ) -> Result<(), Self::Error>
+    where
+        F: FnMut(&Self::Captures) -> bool,
+    {
+        (*self).captures_iter_at(haystack, at, caps, matched)
+    }
+
     fn try_captures_iter<F, E>(
         &self,
         haystack: &[u8],
@@ -1082,6 +1228,19 @@ impl<'a, M: Matcher> Matcher for &'a M {
         (*self).try_captures_iter(haystack, caps, matched)
     }
 
+    fn try_captures_iter_at<F, E>(
+        &self,
+        haystack: &[u8],
+        at: usize,
+        caps: &mut Self::Captures,
+        matched: F,
+    ) -> Result<Result<(), E>, Self::Error>
+    where
+        F: FnMut(&Self::Captures) -> Result<bool, E>,
+    {
+        (*self).try_captures_iter_at(haystack, at, caps, matched)
+    }
+
     fn replace<F>(
         &self,
         haystack: &[u8],
@@ -1107,6 +1266,20 @@ impl<'a, M: Matcher> Matcher for &'a M {
         (*self).replace_with_captures(haystack, caps, dst, append)
     }
 
+    fn replace_with_captures_at<F>(
+        &self,
+        haystack: &[u8],
+        at: usize,
+        caps: &mut Self::Captures,
+        dst: &mut Vec<u8>,
+        append: F,
+    ) -> Result<(), Self::Error>
+    where
+        F: FnMut(&Self::Captures, &mut Vec<u8>) -> bool,
+    {
+        (*self).replace_with_captures_at(haystack, at, caps, dst, append)
+    }
+
     fn is_match(&self, haystack: &[u8]) -> Result<bool, Self::Error> {
         (*self).is_match(haystack)
     }
diff --git a/crates/printer/src/json.rs b/crates/printer/src/json.rs
index b1a6fa2e..8500e6a1 100644
--- a/crates/printer/src/json.rs
+++ b/crates/printer/src/json.rs
@@ -4,14 +4,14 @@ use std::time::Instant;
 
 use grep_matcher::{Match, Matcher};
 use grep_searcher::{
-    Searcher, Sink, SinkContext, SinkContextKind, SinkError, SinkFinish,
-    SinkMatch,
+    Searcher, Sink, SinkContext, SinkContextKind, SinkFinish, SinkMatch,
 };
 use serde_json as json;
 
 use counter::CounterWriter;
 use jsont;
 use stats::Stats;
+use util::find_iter_at_in_context;
 
 /// The configuration for the JSON printer.
 ///
@@ -603,7 +603,12 @@ impl<'p, 's, M: Matcher, W: io::Write> JSONSink<'p, 's, M, W> {
 
     /// Execute the matcher over the given bytes and record the match
     /// locations if the current configuration demands match granularity.
-    fn record_matches(&mut self, bytes: &[u8]) -> io::Result<()> {
+    fn record_matches(
+        &mut self,
+        searcher: &Searcher,
+        bytes: &[u8],
+        range: std::ops::Range<usize>,
+    ) -> io::Result<()> {
         self.json.matches.clear();
         // If printing requires knowing the location of each individual match,
         // then compute and stored those right now for use later. While this
@@ -612,12 +617,17 @@ impl<'p, 's, M: Matcher, W: io::Write> JSONSink<'p, 's, M, W> {
         // the extent that it's easy to ensure that we never do more than
         // one search to find the matches.
         let matches = &mut self.json.matches;
-        self.matcher
-            .find_iter(bytes, |m| {
-                matches.push(m);
+        find_iter_at_in_context(
+            searcher,
+            &self.matcher,
+            bytes,
+            range.clone(),
+            |m| {
+                let (s, e) = (m.start() - range.start, m.end() - range.start);
+                matches.push(Match::new(s, e));
                 true
-            })
-            .map_err(io::Error::error_message)?;
+            },
+        )?;
         // Don't report empty matches appearing at the end of the bytes.
         if !matches.is_empty()
             && matches.last().unwrap().is_empty()
@@ -691,7 +701,11 @@ impl<'p, 's, M: Matcher, W: io::Write> Sink for JSONSink<'p, 's, M, W> {
             self.after_context_remaining = searcher.after_context() as u64;
         }
 
-        self.record_matches(mat.bytes())?;
+        self.record_matches(
+            searcher,
+            mat.buffer(),
+            mat.bytes_range_in_buffer(),
+        )?;
         self.stats.add_matches(self.json.matches.len() as u64);
         self.stats.add_matched_lines(mat.lines().count() as u64);
 
@@ -720,7 +734,7 @@ impl<'p, 's, M: Matcher, W: io::Write> Sink for JSONSink<'p, 's, M, W> {
                 self.after_context_remaining.saturating_sub(1);
         }
         let submatches = if searcher.invert_match() {
-            self.record_matches(ctx.bytes())?;
+            self.record_matches(searcher, ctx.bytes(), 0..ctx.bytes().len())?;
             SubMatches::new(ctx.bytes(), &self.json.matches)
         } else {
             SubMatches::empty()
diff --git a/crates/printer/src/lib.rs b/crates/printer/src/lib.rs
index fa321dcd..abb1087f 100644
--- a/crates/printer/src/lib.rs
+++ b/crates/printer/src/lib.rs
@@ -92,6 +92,17 @@ pub use stats::Stats;
 pub use summary::{Summary, SummaryBuilder, SummaryKind, SummarySink};
 pub use util::PrinterPath;
 
+// The maximum number of bytes to execute a search to account for look-ahead.
+//
+// This is an unfortunate kludge since PCRE2 doesn't provide a way to search
+// a substring of some input while accounting for look-ahead. In theory, we
+// could refactor the various 'grep' interfaces to account for it, but it would
+// be a large change. So for now, we just let PCRE2 go looking a bit for a
+// match without searching the entire rest of the contents.
+//
+// Note that this kludge is only active in multi-line mode.
+const MAX_LOOK_AHEAD: usize = 128;
+
 #[macro_use]
 mod macros;
 
diff --git a/crates/printer/src/standard.rs b/crates/printer/src/standard.rs
index e7fe7c38..0c853f1b 100644
--- a/crates/printer/src/standard.rs
+++ b/crates/printer/src/standard.rs
@@ -8,15 +8,17 @@ use std::time::Instant;
 use bstr::ByteSlice;
 use grep_matcher::{Match, Matcher};
 use grep_searcher::{
-    LineStep, Searcher, Sink, SinkContext, SinkContextKind, SinkError,
-    SinkFinish, SinkMatch,
+    LineStep, Searcher, Sink, SinkContext, SinkContextKind, SinkFinish,
+    SinkMatch,
 };
 use termcolor::{ColorSpec, NoColor, WriteColor};
 
 use color::ColorSpecs;
 use counter::CounterWriter;
 use stats::Stats;
-use util::{trim_ascii_prefix, PrinterPath, Replacer, Sunk};
+use util::{
+    find_iter_at_in_context, trim_ascii_prefix, PrinterPath, Replacer, Sunk,
+};
 
 /// The configuration for the standard printer.
 ///
@@ -682,7 +684,12 @@ impl<'p, 's, M: Matcher, W: WriteColor> StandardSink<'p, 's, M, W> {
 
     /// Execute the matcher over the given bytes and record the match
     /// locations if the current configuration demands match granularity.
-    fn record_matches(&mut self, bytes: &[u8]) -> io::Result<()> {
+    fn record_matches(
+        &mut self,
+        searcher: &Searcher,
+        bytes: &[u8],
+        range: std::ops::Range<usize>,
+    ) -> io::Result<()> {
         self.standard.matches.clear();
         if !self.needs_match_granularity {
             return Ok(());
@@ -695,16 +702,21 @@ impl<'p, 's, M: Matcher, W: WriteColor> StandardSink<'p, 's, M, W> {
         // one search to find the matches (well, for replacements, we do one
         // additional search to perform the actual replacement).
         let matches = &mut self.standard.matches;
-        self.matcher
-            .find_iter(bytes, |m| {
-                matches.push(m);
+        find_iter_at_in_context(
+            searcher,
+            &self.matcher,
+            bytes,
+            range.clone(),
+            |m| {
+                let (s, e) = (m.start() - range.start, m.end() - range.start);
+                matches.push(Match::new(s, e));
                 true
-            })
-            .map_err(io::Error::error_message)?;
+            },
+        )?;
         // Don't report empty matches appearing at the end of the bytes.
         if !matches.is_empty()
             && matches.last().unwrap().is_empty()
-            && matches.last().unwrap().start() >= bytes.len()
+            && matches.last().unwrap().start() >= range.end
         {
             matches.pop().unwrap();
         }
@@ -715,14 +727,25 @@ impl<'p, 's, M: Matcher, W: WriteColor> StandardSink<'p, 's, M, W> {
     /// replacement, lazily allocating memory if necessary.
     ///
     /// To access the result of a replacement, use `replacer.replacement()`.
-    fn replace(&mut self, bytes: &[u8]) -> io::Result<()> {
+    fn replace(
+        &mut self,
+        searcher: &Searcher,
+        bytes: &[u8],
+        range: std::ops::Range<usize>,
+    ) -> io::Result<()> {
         self.replacer.clear();
         if self.standard.config.replacement.is_some() {
             let replacement = (*self.standard.config.replacement)
                 .as_ref()
                 .map(|r| &*r)
                 .unwrap();
-            self.replacer.replace_all(&self.matcher, bytes, replacement)?;
+            self.replacer.replace_all(
+                searcher,
+                &self.matcher,
+                bytes,
+                range,
+                replacement,
+            )?;
         }
         Ok(())
     }
@@ -777,8 +800,12 @@ impl<'p, 's, M: Matcher, W: WriteColor> Sink for StandardSink<'p, 's, M, W> {
             self.after_context_remaining = searcher.after_context() as u64;
         }
 
-        self.record_matches(mat.bytes())?;
-        self.replace(mat.bytes())?;
+        self.record_matches(
+            searcher,
+            mat.buffer(),
+            mat.bytes_range_in_buffer(),
+        )?;
+        self.replace(searcher, mat.buffer(), mat.bytes_range_in_buffer())?;
 
         if let Some(ref mut stats) = self.stats {
             stats.add_matches(self.standard.matches.len() as u64);
@@ -807,8 +834,8 @@ impl<'p, 's, M: Matcher, W: WriteColor> Sink for StandardSink<'p, 's, M, W> {
                 self.after_context_remaining.saturating_sub(1);
         }
         if searcher.invert_match() {
-            self.record_matches(ctx.bytes())?;
-            self.replace(ctx.bytes())?;
+            self.record_matches(searcher, ctx.bytes(), 0..ctx.bytes().len())?;
+            self.replace(searcher, ctx.bytes(), 0..ctx.bytes().len())?;
         }
         if searcher.binary_detection().convert_byte().is_some() {
             if self.binary_byte_offset.is_some() {
diff --git a/crates/printer/src/summary.rs b/crates/printer/src/summary.rs
index 5ce087ef..e062662d 100644
--- a/crates/printer/src/summary.rs
+++ b/crates/printer/src/summary.rs
@@ -11,7 +11,7 @@ use termcolor::{ColorSpec, NoColor, WriteColor};
 use color::ColorSpecs;
 use counter::CounterWriter;
 use stats::Stats;
-use util::PrinterPath;
+use util::{find_iter_at_in_context, PrinterPath};
 
 /// The configuration for the summary printer.
 ///
@@ -504,6 +504,17 @@ impl<'p, 's, M: Matcher, W: WriteColor> SummarySink<'p, 's, M, W> {
         self.stats.as_ref()
     }
 
+    /// Returns true if and only if the searcher may report matches over
+    /// multiple lines.
+    ///
+    /// Note that this doesn't just return whether the searcher is in multi
+    /// line mode, but also checks if the mater can match over multiple lines.
+    /// If it can't, then we don't need multi line handling, even if the
+    /// searcher has multi line mode enabled.
+    fn multi_line(&self, searcher: &Searcher) -> bool {
+        searcher.multi_line_with_matcher(&self.matcher)
+    }
+
     /// Returns true if this printer should quit.
     ///
     /// This implements the logic for handling quitting after seeing a certain
@@ -579,32 +590,39 @@ impl<'p, 's, M: Matcher, W: WriteColor> Sink for SummarySink<'p, 's, M, W> {
 
     fn matched(
         &mut self,
-        _searcher: &Searcher,
+        searcher: &Searcher,
         mat: &SinkMatch,
     ) -> Result<bool, io::Error> {
-        self.match_count += 1;
-        if let Some(ref mut stats) = self.stats {
-            let mut match_count = 0;
-            self.matcher
-                .find_iter(mat.bytes(), |_| {
-                    match_count += 1;
+        let is_multi_line = self.multi_line(searcher);
+        let sink_match_count = if self.stats.is_none() && !is_multi_line {
+            1
+        } else {
+            // This gives us as many bytes as the searcher can offer. This
+            // isn't guaranteed to hold the necessary context to get match
+            // detection correct (because of look-around), but it does in
+            // practice.
+            let buf = mat.buffer();
+            let range = mat.bytes_range_in_buffer();
+            let mut count = 0;
+            find_iter_at_in_context(
+                searcher,
+                &self.matcher,
+                buf,
+                range,
+                |_| {
+                    count += 1;
                     true
-                })
-                .map_err(io::Error::error_message)?;
-            if match_count == 0 {
-                // It is possible for the match count to be zero when
-                // look-around is used. Since `SinkMatch` won't necessarily
-                // contain the look-around in its match span, the search here
-                // could fail to find anything.
-                //
-                // It seems likely that setting match_count=1 here is probably
-                // wrong in some cases, but I don't think we can do any
-                // better. (Because this printer cannot assume that subsequent
-                // contents have been loaded into memory, so we have no way of
-                // increasing the search span here.)
-                match_count = 1;
-            }
-            stats.add_matches(match_count);
+                },
+            )?;
+            count
+        };
+        if is_multi_line {
+            self.match_count += sink_match_count;
+        } else {
+            self.match_count += 1;
+        }
+        if let Some(ref mut stats) = self.stats {
+            stats.add_matches(sink_match_count);
             stats.add_matched_lines(mat.lines().count() as u64);
         } else if self.summary.config.kind.quit_early() {
             return Ok(false);
diff --git a/crates/printer/src/util.rs b/crates/printer/src/util.rs
index 3948d970..37e56529 100644
--- a/crates/printer/src/util.rs
+++ b/crates/printer/src/util.rs
@@ -7,11 +7,13 @@ use std::time;
 use bstr::{ByteSlice, ByteVec};
 use grep_matcher::{Captures, LineTerminator, Match, Matcher};
 use grep_searcher::{
-    LineIter, SinkContext, SinkContextKind, SinkError, SinkMatch,
+    LineIter, Searcher, SinkContext, SinkContextKind, SinkError, SinkMatch,
 };
 #[cfg(feature = "serde1")]
 use serde::{Serialize, Serializer};
 
+use MAX_LOOK_AHEAD;
+
 /// A type for handling replacements while amortizing allocation.
 pub struct Replacer<M: Matcher> {
     space: Option<Space<M>>,
@@ -52,10 +54,22 @@ impl<M: Matcher> Replacer<M> {
     /// This can fail if the underlying matcher reports an error.
     pub fn replace_all<'a>(
         &'a mut self,
+        searcher: &Searcher,
         matcher: &M,
-        subject: &[u8],
+        mut subject: &[u8],
+        range: std::ops::Range<usize>,
         replacement: &[u8],
     ) -> io::Result<()> {
+        // See the giant comment in 'find_iter_at_in_context' below for why we
+        // do this dance.
+        let is_multi_line = searcher.multi_line_with_matcher(&matcher);
+        if is_multi_line {
+            if subject[range.end..].len() >= MAX_LOOK_AHEAD {
+                subject = &subject[..range.end + MAX_LOOK_AHEAD];
+            }
+        } else {
+            subject = &subject[..range.end];
+        }
         {
             let &mut Space { ref mut dst, ref mut caps, ref mut matches } =
                 self.allocate(matcher)?;
@@ -63,18 +77,24 @@ impl<M: Matcher> Replacer<M> {
             matches.clear();
 
             matcher
-                .replace_with_captures(subject, caps, dst, |caps, dst| {
-                    let start = dst.len();
-                    caps.interpolate(
-                        |name| matcher.capture_index(name),
-                        subject,
-                        replacement,
-                        dst,
-                    );
-                    let end = dst.len();
-                    matches.push(Match::new(start, end));
-                    true
-                })
+                .replace_with_captures_at(
+                    subject,
+                    range.start,
+                    caps,
+                    dst,
+                    |caps, dst| {
+                        let start = dst.len();
+                        caps.interpolate(
+                            |name| matcher.capture_index(name),
+                            subject,
+                            replacement,
+                            dst,
+                        );
+                        let end = dst.len();
+                        matches.push(Match::new(start, end));
+                        true
+                    },
+                )
                 .map_err(io::Error::error_message)?;
         }
         Ok(())
@@ -357,3 +377,55 @@ pub fn trim_ascii_prefix(
         .count();
     range.with_start(range.start() + count)
 }
+
+pub fn find_iter_at_in_context<M, F>(
+    searcher: &Searcher,
+    matcher: M,
+    mut bytes: &[u8],
+    range: std::ops::Range<usize>,
+    mut matched: F,
+) -> io::Result<()>
+where
+    M: Matcher,
+    F: FnMut(Match) -> bool,
+{
+    // This strange dance is to account for the possibility of look-ahead in
+    // the regex. The problem here is that mat.bytes() doesn't include the
+    // lines beyond the match boundaries in mulit-line mode, which means that
+    // when we try to rediscover the full set of matches here, the regex may no
+    // longer match if it required some look-ahead beyond the matching lines.
+    //
+    // PCRE2 (and the grep-matcher interfaces) has no way of specifying an end
+    // bound of the search. So we kludge it and let the regex engine search the
+    // rest of the buffer... But to avoid things getting too crazy, we cap the
+    // buffer.
+    //
+    // If it weren't for multi-line mode, then none of this would be needed.
+    // Alternatively, if we refactored the grep interfaces to pass along the
+    // full set of matches (if available) from the searcher, then that might
+    // also help here. But that winds up paying an upfront unavoidable cost for
+    // the case where matches don't need to be counted. So then you'd have to
+    // introduce a way to pass along matches conditionally, only when needed.
+    // Yikes.
+    //
+    // Maybe the bigger picture thing here is that the searcher should be
+    // responsible for finding matches when necessary, and the printer
+    // shouldn't be involved in this business in the first place. Sigh. Live
+    // and learn. Abstraction boundaries are hard.
+    let is_multi_line = searcher.multi_line_with_matcher(&matcher);
+    if is_multi_line {
+        if bytes[range.end..].len() >= MAX_LOOK_AHEAD {
+            bytes = &bytes[..range.end + MAX_LOOK_AHEAD];
+        }
+    } else {
+        bytes = &bytes[..range.end];
+    }
+    matcher
+        .find_iter_at(bytes, range.start, |m| {
+            if m.start() >= range.end {
+                return false;
+            }
+            matched(m)
+        })
+        .map_err(io::Error::error_message)
+}
diff --git a/crates/regex/src/word.rs b/crates/regex/src/word.rs
index 8f744516..1a75ba48 100644
--- a/crates/regex/src/word.rs
+++ b/crates/regex/src/word.rs
@@ -48,7 +48,7 @@ impl WordMatcher {
         let original =
             expr.with_pattern(|pat| format!("^(?:{})$", pat))?.regex()?;
         let word_expr = expr.with_pattern(|pat| {
-            let pat = format!(r"(?:(?-m:^)|\W)({})(?:(?-m:$)|\W)", pat);
+            let pat = format!(r"(?:(?m:^)|\W)({})(?:\W|(?m:$))", pat);
             debug!("word regex: {:?}", pat);
             pat
         })?;
@@ -237,6 +237,8 @@ mod tests {
         assert_eq!(Some((2, 5)), find(r"!?foo!?", "a!foo!a"));
 
         assert_eq!(Some((2, 7)), find(r"!?foo!?", "##!foo!\n"));
+        assert_eq!(Some((3, 8)), find(r"!?foo!?", "##\n!foo!##"));
+        assert_eq!(Some((3, 8)), find(r"!?foo!?", "##\n!foo!\n##"));
         assert_eq!(Some((3, 7)), find(r"f?oo!?", "##\nfoo!##"));
         assert_eq!(Some((2, 5)), find(r"(?-u)foo[^a]*", "#!foo☃aaa"));
     }
diff --git a/crates/searcher/src/searcher/core.rs b/crates/searcher/src/searcher/core.rs
index fe4254ea..21c29303 100644
--- a/crates/searcher/src/searcher/core.rs
+++ b/crates/searcher/src/searcher/core.rs
@@ -441,6 +441,8 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
                 bytes: linebuf,
                 absolute_byte_offset: offset,
                 line_number: self.line_number,
+                buffer: buf,
+                bytes_range_in_buffer: range.start()..range.end(),
             },
         )?;
         if !keepgoing {
diff --git a/crates/searcher/src/sink.rs b/crates/searcher/src/sink.rs
index 750aefbe..f82ffda7 100644
--- a/crates/searcher/src/sink.rs
+++ b/crates/searcher/src/sink.rs
@@ -365,6 +365,8 @@ pub struct SinkMatch<'b> {
     pub(crate) bytes: &'b [u8],
     pub(crate) absolute_byte_offset: u64,
     pub(crate) line_number: Option<u64>,
+    pub(crate) buffer: &'b [u8],
+    pub(crate) bytes_range_in_buffer: std::ops::Range<usize>,
 }
 
 impl<'b> SinkMatch<'b> {
@@ -405,6 +407,18 @@ impl<'b> SinkMatch<'b> {
     pub fn line_number(&self) -> Option<u64> {
         self.line_number
     }
+
+    /// TODO
+    #[inline]
+    pub fn buffer(&self) -> &'b [u8] {
+        self.buffer
+    }
+
+    /// TODO
+    #[inline]
+    pub fn bytes_range_in_buffer(&self) -> std::ops::Range<usize> {
+        self.bytes_range_in_buffer.clone()
+    }
 }
 
 /// The type of context reported by a searcher.
diff --git a/tests/json.rs b/tests/json.rs
index 97d8e719..ff0b5aae 100644
--- a/tests/json.rs
+++ b/tests/json.rs
@@ -339,3 +339,22 @@ rgtest!(r1095_crlf_empty_match, |dir: Dir, mut cmd: TestCommand| {
     assert_eq!(m.submatches[0].m, Data::text("\n"));
     assert_eq!(m.submatches[1].m, Data::text("\n"));
 });
+
+// See: https://github.com/BurntSushi/ripgrep/issues/1412
+rgtest!(r1412_look_behind_match_missing, |dir: Dir, mut cmd: TestCommand| {
+    // Only PCRE2 supports look-around.
+    if !dir.is_pcre2() {
+        return;
+    }
+
+    dir.create("test", "foo\nbar\n");
+
+    let msgs = json_decode(
+        &cmd.arg("-U").arg("--json").arg(r"(?<=foo\n)bar").stdout(),
+    );
+    assert_eq!(msgs.len(), 4);
+
+    let m = msgs[1].unwrap_match();
+    assert_eq!(m.lines, Data::text("bar\n"));
+    assert_eq!(m.submatches.len(), 1);
+});
diff --git a/tests/regression.rs b/tests/regression.rs
index 94e62969..d110c99c 100644
--- a/tests/regression.rs
+++ b/tests/regression.rs
@@ -806,6 +806,19 @@ rgtest!(r1389_bad_symlinks_no_biscuit, |dir: Dir, mut cmd: TestCommand| {
     eqnice!("mylink/file.txt:test\n", stdout);
 });
 
+// printf "foo\nbar\n" | rg -PU '(?<=foo\n)bar' -r quux
+// See: https://github.com/BurntSushi/ripgrep/issues/1412
+rgtest!(r1412_look_behind_no_replacement, |dir: Dir, mut cmd: TestCommand| {
+    // Only PCRE2 supports look-around.
+    if !dir.is_pcre2() {
+        return;
+    }
+
+    dir.create("test", "foo\nbar\n");
+    cmd.args(&["-nU", "-rquux", r"(?<=foo\n)bar", "test"]);
+    eqnice!("2:quux\n", cmd.stdout());
+});
+
 // See: https://github.com/BurntSushi/ripgrep/pull/1446
 rgtest!(
     r1446_respect_excludes_in_worktree,