3 files changed, 31 insertions, 2 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 1b435273..c637aeae 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -38,6 +38,8 @@ Bug fixes:
   Fix bug when using inline regex flags with `-e/--regexp`.
 * [BUG #2523](https://github.com/BurntSushi/ripgrep/issues/2523):
   Make executable searching take `.com` into account on Windows.
+* [BUG #2574](https://github.com/BurntSushi/ripgrep/issues/2574):
+  Fix bug in `-w/--word-regexp` that would result in incorrect match offsets.
 
 
 13.0.0 (2021-06-12)
diff --git a/crates/regex/src/word.rs b/crates/regex/src/word.rs
index af4480ab..52fb61ce 100644
--- a/crates/regex/src/word.rs
+++ b/crates/regex/src/word.rs
@@ -128,6 +128,9 @@ impl WordMatcher {
         // The reason why we cannot handle the ^/$ cases here is because we
         // can't assume anything about the original pattern. (Try commenting
         // out the checks for ^/$ below and run the tests to see examples.)
+        //
+        // NOTE(2023-07-31): After fixing #2574, this logic honestly still
+        // doesn't seem correct. Regex composition is hard.
         let input = Input::new(haystack).span(at..haystack.len());
         let mut cand = match self.regex.find(input) {
             None => return Ok(None),
@@ -136,8 +139,17 @@ impl WordMatcher {
         if cand.start() == 0 || cand.end() == haystack.len() {
             return Err(());
         }
-        let (_, slen) = bstr::decode_utf8(&haystack[cand]);
-        let (_, elen) = bstr::decode_last_utf8(&haystack[cand]);
+        // We decode the chars on either side of the match. If either char is
+        // a word character, then that means the ^/$ matched and not \W. In
+        // that case, we defer to the slower engine.
+        let (ch, slen) = bstr::decode_utf8(&haystack[cand]);
+        if ch.map_or(true, regex_syntax::is_word_character) {
+            return Err(());
+        }
+        let (ch, elen) = bstr::decode_last_utf8(&haystack[cand]);
+        if ch.map_or(true, regex_syntax::is_word_character) {
+            return Err(());
+        }
         let new_start = cand.start() + slen;
         let new_end = cand.end() - elen;
         // This occurs the original regex can match the empty string. In this
diff --git a/tests/regression.rs b/tests/regression.rs
index b9076803..5ef741cf 100644
--- a/tests/regression.rs
+++ b/tests/regression.rs
@@ -1173,3 +1173,18 @@ rgtest!(r2480, |dir: Dir, mut cmd: TestCommand| {
     cmd.args(&["--only-matching", "-e", "(?i)notfoo", "-e", "bar", "file"]);
     cmd.assert_err();
 });
+
+// See: https://github.com/BurntSushi/ripgrep/issues/2574
+rgtest!(r2574, |dir: Dir, mut cmd: TestCommand| {
+    dir.create("haystack", "some.domain.com\nsome.domain.com/x\n");
+    let got = cmd
+        .args(&[
+            "--no-filename",
+            "--no-unicode",
+            "-w",
+            "-o",
+            r"(\w+\.)*domain\.(\w+)",
+        ])
+        .stdout();
+    eqnice!("some.domain.com\nsome.domain.com\n", got);
+});