Bump and update deps.

Add new -M/--max-columns option.
This permits setting the maximum line width with respect to the number of bytes in a line. Omitted lines (whether part of a match, replacement or context) are replaced with a message stating that the line was elided. Fixes #129
2025-07-31 04:02:00 -07:00 · 2017-03-12 21:33:13 -04:00 · 2017-03-12 21:21:28 -04:00 · 2017-03-12 20:52:28 -04:00 · 2017-03-12 20:31:09 -04:00 · 2017-03-12 20:24:45 -04:00
40 changed files with 1386 additions and 171 deletions
--- a/.travis.yml
+++ b/.travis.yml
@@ -9,13 +9,13 @@ matrix:
    # (All *nix releases are done on the nightly channel to take advantage
    # of the regex library's multiple pattern SIMD search.)
    - os: linux
-      rust: nightly
+      rust: nightly-2017-03-13
      env: TARGET=i686-unknown-linux-musl
    - os: linux
-      rust: nightly
+      rust: nightly-2017-03-13
      env: TARGET=x86_64-unknown-linux-musl
    - os: osx
-      rust: nightly
+      rust: nightly-2017-03-13
      env: TARGET=x86_64-apple-darwin
    # Beta channel.
    - os: linux
@@ -57,7 +57,7 @@ deploy:
    # channel to use to produce the release artifacts
    # NOTE make sure you only release *once* per target
    # TODO you may want to pick a different channel
-    condition: $TRAVIS_RUST_VERSION = nightly
+    condition: $TRAVIS_RUST_VERSION = nightly-2017-03-13
    tags: true

 branches:
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -4,26 +4,25 @@ version = "0.4.0"
 dependencies = [
 "atty 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
 "bytecount 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)",
- "clap 2.19.3 (registry+https://github.com/rust-lang/crates.io-index)",
- "env_logger 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)",
- "grep 0.1.5",
- "ignore 0.1.7",
- "kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
- "lazy_static 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
- "libc 0.2.20 (registry+https://github.com/rust-lang/crates.io-index)",
- "log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
+ "clap 2.21.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "encoding_rs 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "env_logger 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)",
+ "grep 0.1.6",
+ "ignore 0.1.8",
+ "lazy_static 0.2.4 (registry+https://github.com/rust-lang/crates.io-index)",
+ "libc 0.2.21 (registry+https://github.com/rust-lang/crates.io-index)",
+ "log 0.3.7 (registry+https://github.com/rust-lang/crates.io-index)",
 "memchr 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "memmap 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)",
- "num_cpus 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "memmap 0.5.2 (registry+https://github.com/rust-lang/crates.io-index)",
+ "num_cpus 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
 "regex 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "same-file 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
- "termcolor 0.2.0",
- "winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)",
+ "same-file 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
+ "termcolor 0.3.1",
 ]

 [[package]]
 name = "aho-corasick"
-version = "0.6.1"
+version = "0.6.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 dependencies = [
 "memchr 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
@@ -40,13 +39,13 @@ version = "0.2.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 dependencies = [
 "kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
- "libc 0.2.20 (registry+https://github.com/rust-lang/crates.io-index)",
+ "libc 0.2.21 (registry+https://github.com/rust-lang/crates.io-index)",
 "winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)",
 ]

 [[package]]
 name = "bitflags"
-version = "0.7.0"
+version = "0.8.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"

 [[package]]
@@ -57,19 +56,24 @@ dependencies = [
 "simd 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
 ]

+[[package]]
+name = "cfg-if"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
 [[package]]
 name = "clap"
-version = "2.19.3"
+version = "2.21.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 dependencies = [
 "ansi_term 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)",
- "bitflags 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)",
- "libc 0.2.20 (registry+https://github.com/rust-lang/crates.io-index)",
- "strsim 0.5.2 (registry+https://github.com/rust-lang/crates.io-index)",
- "term_size 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "unicode-segmentation 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
+ "atty 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
+ "bitflags 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "strsim 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "term_size 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)",
+ "unicode-segmentation 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
 "unicode-width 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)",
- "vec_map 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "vec_map 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)",
 ]

 [[package]]
@@ -78,11 +82,19 @@ version = "0.2.10"
 source = "registry+https://github.com/rust-lang/crates.io-index"

 [[package]]
-name = "env_logger"
-version = "0.3.5"
+name = "encoding_rs"
+version = "0.5.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 dependencies = [
- "log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
+ "cfg-if 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "env_logger"
+version = "0.4.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "log 0.3.7 (registry+https://github.com/rust-lang/crates.io-index)",
 ]

 [[package]]
@@ -92,30 +104,30 @@ source = "registry+https://github.com/rust-lang/crates.io-index"

 [[package]]
 name = "fs2"
-version = "0.3.0"
+version = "0.4.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 dependencies = [
 "kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
- "libc 0.2.20 (registry+https://github.com/rust-lang/crates.io-index)",
+ "libc 0.2.21 (registry+https://github.com/rust-lang/crates.io-index)",
 "winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)",
 ]

 [[package]]
 name = "globset"
-version = "0.1.3"
+version = "0.1.4"
 dependencies = [
- "aho-corasick 0.6.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "aho-corasick 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)",
 "fnv 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)",
- "log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
+ "log 0.3.7 (registry+https://github.com/rust-lang/crates.io-index)",
 "memchr 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
 "regex 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
 ]

 [[package]]
 name = "grep"
-version = "0.1.5"
+version = "0.1.6"
 dependencies = [
- "log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
+ "log 0.3.7 (registry+https://github.com/rust-lang/crates.io-index)",
 "memchr 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
 "regex 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
 "regex-syntax 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
@@ -123,15 +135,15 @@ dependencies = [

 [[package]]
 name = "ignore"
-version = "0.1.7"
+version = "0.1.8"
 dependencies = [
 "crossbeam 0.2.10 (registry+https://github.com/rust-lang/crates.io-index)",
- "globset 0.1.3",
- "lazy_static 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
- "log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
+ "globset 0.1.4",
+ "lazy_static 0.2.4 (registry+https://github.com/rust-lang/crates.io-index)",
+ "log 0.3.7 (registry+https://github.com/rust-lang/crates.io-index)",
 "memchr 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
 "regex 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "thread_local 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)",
+ "thread_local 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
 "walkdir 1.0.7 (registry+https://github.com/rust-lang/crates.io-index)",
 ]

@@ -146,17 +158,17 @@ dependencies = [

 [[package]]
 name = "lazy_static"
-version = "0.2.2"
+version = "0.2.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"

 [[package]]
 name = "libc"
-version = "0.2.20"
+version = "0.2.21"
 source = "registry+https://github.com/rust-lang/crates.io-index"

 [[package]]
 name = "log"
-version = "0.3.6"
+version = "0.3.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"

 [[package]]
@@ -164,26 +176,26 @@ name = "memchr"
 version = "1.0.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 dependencies = [
- "libc 0.2.20 (registry+https://github.com/rust-lang/crates.io-index)",
+ "libc 0.2.21 (registry+https://github.com/rust-lang/crates.io-index)",
 ]

 [[package]]
 name = "memmap"
-version = "0.5.0"
+version = "0.5.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 dependencies = [
- "fs2 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "fs2 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)",
 "kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
- "libc 0.2.20 (registry+https://github.com/rust-lang/crates.io-index)",
+ "libc 0.2.21 (registry+https://github.com/rust-lang/crates.io-index)",
 "winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)",
 ]

 [[package]]
 name = "num_cpus"
-version = "1.2.1"
+version = "1.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 dependencies = [
- "libc 0.2.20 (registry+https://github.com/rust-lang/crates.io-index)",
+ "libc 0.2.21 (registry+https://github.com/rust-lang/crates.io-index)",
 ]

 [[package]]
@@ -191,11 +203,11 @@ name = "regex"
 version = "0.2.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 dependencies = [
- "aho-corasick 0.6.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "aho-corasick 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)",
 "memchr 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
 "regex-syntax 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
 "simd 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "thread_local 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)",
+ "thread_local 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
 "utf8-ranges 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
 ]

@@ -206,11 +218,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index"

 [[package]]
 name = "same-file"
-version = "0.1.2"
+version = "0.1.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 dependencies = [
 "kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
- "libc 0.2.20 (registry+https://github.com/rust-lang/crates.io-index)",
 "winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)",
 ]

@@ -221,24 +232,24 @@ source = "registry+https://github.com/rust-lang/crates.io-index"

 [[package]]
 name = "strsim"
-version = "0.5.2"
+version = "0.6.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"

 [[package]]
 name = "term_size"
-version = "0.2.1"
+version = "0.2.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 dependencies = [
 "kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
- "libc 0.2.20 (registry+https://github.com/rust-lang/crates.io-index)",
+ "libc 0.2.21 (registry+https://github.com/rust-lang/crates.io-index)",
 "winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)",
 ]

 [[package]]
 name = "termcolor"
-version = "0.2.0"
+version = "0.3.1"
 dependencies = [
- "wincolor 0.1.2",
+ "wincolor 0.1.3",
 ]

 [[package]]
@@ -247,12 +258,12 @@ version = "3.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 dependencies = [
 "kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
- "libc 0.2.20 (registry+https://github.com/rust-lang/crates.io-index)",
+ "libc 0.2.21 (registry+https://github.com/rust-lang/crates.io-index)",
 ]

 [[package]]
 name = "thread_local"
-version = "0.3.2"
+version = "0.3.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 dependencies = [
 "thread-id 3.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
@@ -261,7 +272,7 @@ dependencies = [

 [[package]]
 name = "unicode-segmentation"
-version = "0.1.3"
+version = "1.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"

 [[package]]
@@ -284,7 +295,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"

 [[package]]
 name = "vec_map"
-version = "0.6.0"
+version = "0.7.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"

 [[package]]
@@ -298,7 +309,7 @@ version = "1.0.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 dependencies = [
 "kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
- "same-file 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
+ "same-file 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
 "winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)",
 ]

@@ -314,43 +325,45 @@ source = "registry+https://github.com/rust-lang/crates.io-index"

 [[package]]
 name = "wincolor"
-version = "0.1.2"
+version = "0.1.3"
 dependencies = [
 "kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
 "winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)",
 ]

 [metadata]
-"checksum aho-corasick 0.6.1 (registry+https://github.com/rust-lang/crates.io-index)" = "4f660b942762979b56c9f07b4b36bb559776fbad102f05d6771e1b629e8fd5bf"
+"checksum aho-corasick 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)" = "0638fd549427caa90c499814196d1b9e3725eb4d15d7339d6de073a680ed0ca2"
 "checksum ansi_term 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "23ac7c30002a5accbf7e8987d0632fa6de155b7c3d39d0067317a391e00a2ef6"
 "checksum atty 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "d912da0db7fa85514874458ca3651fe2cddace8d0b0505571dbdcd41ab490159"
-"checksum bitflags 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "aad18937a628ec6abcd26d1489012cc0e18c21798210f491af69ded9b881106d"
+"checksum bitflags 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "826e1ab483fc81a8143faa7203c4a3c02888ebd1a782e37e41fa34753ba9a162"
 "checksum bytecount 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "1e8f09fbc8c6726a4b616dcfbd4f54491068d6bb1b93ac03c78ac18ff9a5924a"
-"checksum clap 2.19.3 (registry+https://github.com/rust-lang/crates.io-index)" = "95b78f3fe0fc94c13c731714363260e04b557a637166f33a4570d3189d642374"
+"checksum cfg-if 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "de1e760d7b6535af4241fca8bd8adf68e2e7edacc6b29f5d399050c5e48cf88c"
+"checksum clap 2.21.1 (registry+https://github.com/rust-lang/crates.io-index)" = "74a80f603221c9cd9aa27a28f52af452850051598537bb6b359c38a7d61e5cda"
 "checksum crossbeam 0.2.10 (registry+https://github.com/rust-lang/crates.io-index)" = "0c5ea215664ca264da8a9d9c3be80d2eaf30923c259d03e870388eb927508f97"
-"checksum env_logger 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "15abd780e45b3ea4f76b4e9a26ff4843258dd8a3eed2775a0e7368c2e7936c2f"
+"checksum encoding_rs 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "7a1cca0a26f904955d80d70b9bff1019e4f4cbc06f2fcbccf8bd3d889cc1c9b7"
+"checksum env_logger 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)" = "e3856f1697098606fc6cb97a93de88ca3f3bc35bb878c725920e6e82ecf05e83"
 "checksum fnv 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)" = "6cc484842f1e2884faf56f529f960cc12ad8c71ce96cc7abba0a067c98fee344"
-"checksum fs2 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "640001e1bd865c7c32806292822445af576a6866175b5225aa2087ca5e3de551"
+"checksum fs2 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)" = "34edaee07555859dc13ca387e6ae05686bb4d0364c95d649b6dab959511f4baf"
 "checksum kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7507624b29483431c0ba2d82aece8ca6cdba9382bff4ddd0f7490560c056098d"
-"checksum lazy_static 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "6abe0ee2e758cd6bc8a2cd56726359007748fbf4128da998b65d0b70f881e19b"
-"checksum libc 0.2.20 (registry+https://github.com/rust-lang/crates.io-index)" = "684f330624d8c3784fb9558ca46c4ce488073a8d22450415c5eb4f4cfb0d11b5"
-"checksum log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "ab83497bf8bf4ed2a74259c1c802351fcd67a65baa86394b6ba73c36f4838054"
+"checksum lazy_static 0.2.4 (registry+https://github.com/rust-lang/crates.io-index)" = "7291b1dd97d331f752620b02dfdbc231df7fc01bf282a00769e1cdb963c460dc"
+"checksum libc 0.2.21 (registry+https://github.com/rust-lang/crates.io-index)" = "88ee81885f9f04bff991e306fea7c1c60a5f0f9e409e99f6b40e3311a3363135"
+"checksum log 0.3.7 (registry+https://github.com/rust-lang/crates.io-index)" = "5141eca02775a762cc6cd564d8d2c50f67c0ea3a372cbf1c51592b3e029e10ad"
 "checksum memchr 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "1dbccc0e46f1ea47b9f17e6d67c5a96bd27030519c519c9c91327e31275a47b4"
-"checksum memmap 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "065ce59af31c18ea2c419100bda6247dd4ec3099423202b12f0bd32e529fabd2"
-"checksum num_cpus 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "a225d1e2717567599c24f88e49f00856c6e825a12125181ee42c4257e3688d39"
+"checksum memmap 0.5.2 (registry+https://github.com/rust-lang/crates.io-index)" = "46f3c7359028b31999287dae4e5047ddfe90a23b7dca2282ce759b491080c99b"
+"checksum num_cpus 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a18c392466409c50b87369414a2680c93e739aedeb498eb2bff7d7eb569744e2"
 "checksum regex 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "4278c17d0f6d62dfef0ab00028feb45bd7d2102843f80763474eeb1be8a10c01"
 "checksum regex-syntax 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "2f9191b1f57603095f105d317e375d19b1c9c5c3185ea9633a99a6dcbed04457"
-"checksum same-file 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "c722bde68d432ad7982a6431b13264cc558af1707c0f321820e238c5671856ea"
+"checksum same-file 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "d931a44fdaa43b8637009e7632a02adc4f2b2e0733c08caa4cf00e8da4a117a7"
 "checksum simd 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "63b5847c2d766ca7ce7227672850955802fabd779ba616aeabead4c2c3877023"
-"checksum strsim 0.5.2 (registry+https://github.com/rust-lang/crates.io-index)" = "67f84c44fbb2f91db7fef94554e6b2ac05909c9c0b0bc23bb98d3a1aebfe7f7c"
-"checksum term_size 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "3f7f5f3f71b0040cecc71af239414c23fd3c73570f5ff54cf50e03cef637f2a0"
+"checksum strsim 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b4d15c810519a91cf877e7e36e63fe068815c678181439f2f29e2562147c3694"
+"checksum term_size 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "07b6c1ac5b3fffd75073276bca1ceed01f67a28537097a2a9539e116e50fb21a"
 "checksum thread-id 3.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "4437c97558c70d129e40629a5b385b3fb1ffac301e63941335e4d354081ec14a"
-"checksum thread_local 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7793b722f0f77ce716e7f1acf416359ca32ff24d04ffbac4269f44a4a83be05d"
-"checksum unicode-segmentation 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "c3bc443ded17b11305ffffe6b37e2076f328a5a8cb6aa877b1b98f77699e98b5"
+"checksum thread_local 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "c85048c6260d17cf486ceae3282d9fb6b90be220bf5b28c400f5485ffc29f0c7"
+"checksum unicode-segmentation 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "18127285758f0e2c6cf325bb3f3d138a12fee27de4f23e146cd6a179f26c2cf3"
 "checksum unicode-width 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "bf3a113775714a22dcb774d8ea3655c53a32debae63a063acc00a91cc586245f"
 "checksum unreachable 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "1f2ae5ddb18e1c92664717616dd9549dde73f539f01bd7b77c2edb2446bdff91"
 "checksum utf8-ranges 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "662fab6525a98beff2921d7f61a39e7d59e0b425ebc7d0d9e66d316e55124122"
-"checksum vec_map 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)" = "cac5efe5cb0fa14ec2f84f83c701c562ee63f6dcc680861b21d65c682adfb05f"
+"checksum vec_map 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "f8cdc8b93bd0198ed872357fb2e667f7125646b1762f16d60b2c96350d361897"
 "checksum void 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "6a02e4885ed3bc0f2de90ea6dd45ebcbb66dacffe03547fadbb0eeae2770887d"
 "checksum walkdir 1.0.7 (registry+https://github.com/rust-lang/crates.io-index)" = "bb08f9e670fab86099470b97cd2b252d6527f0b3cc1401acdb595ffc9dd288ff"
 "checksum winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)" = "167dc9d6949a9b857f3451275e911c3f44255842c1f7a76f33c55103a909087a"
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -28,8 +28,9 @@ path = "tests/tests.rs"
 [dependencies]
 atty = "0.2.2"
 bytecount = "0.1.4"
-clap = "2.19.0"
-env_logger = { version = "0.3", default-features = false }
+clap = "2.20.5"
+encoding_rs = "0.5.0"
+env_logger = { version = "0.4", default-features = false }
 grep = { version = "0.1.5", path = "grep" }
 ignore = { version = "0.1.7", path = "ignore" }
 lazy_static = "0.2"
@@ -40,7 +41,7 @@ memmap = "0.5"
 num_cpus = "1"
 regex = "0.2.1"
 same-file = "0.1.1"
-termcolor = { version = "0.2.0", path = "termcolor" }
+termcolor = { version = "0.3.0", path = "termcolor" }

 [build-dependencies]
 clap = "2.18"
--- a/README.md
+++ b/README.md
@@ -1,7 +1,7 @@
 ripgrep (rg)
 ------------
 `ripgrep` is a line oriented search tool that combines the usability of The
-Silver Searcher (an `ack` clone) with the raw speed of GNU grep. `ripgrep`
+Silver Searcher (similar to `ack`) with the raw speed of GNU grep. `ripgrep`
 works by recursively searching your current directory for a regex pattern.
 `ripgrep` has first class support on Windows, Mac and Linux, with binary
 downloads available for
@@ -83,6 +83,10 @@ increases the times to `3.081s` for ripgrep and `11.403s` for GNU grep.
  of search results, searching multiple patterns, highlighting matches with
  color and full Unicode support. Unlike GNU grep, `ripgrep` stays fast while
  supporting Unicode (which is always on).
+* `ripgrep` supports searching files in text encodings other than UTF-8, such
+  as UTF-16, latin-1, GBK, EUC-JP, Shift_JIS and more. (Some support for
+  automatically detecting UTF-16 is provided. Other text encodings must be
+  specifically specified with the `-E/--encoding` flag.)

 In other words, use `ripgrep` if you like speed, filtering by default, fewer
 bugs and Unicode support.
@@ -101,18 +105,12 @@ give you a glimpse at some important downsides or missing features of
  support for Unicode categories (e.g., `\p{Sc}` to match currency symbols or
  `\p{Lu}` to match any uppercase letter). (Fancier regexes will never be
  supported.)
-* If you need to search files with text encodings other than UTF-8 (like
-  UTF-16), then `ripgrep` won't work. `ripgrep` will still work on ASCII
-  compatible encodings like latin1 or otherwise partially valid UTF-8.
-  `ripgrep` *can* search for arbitrary bytes though, which might work in
-  a pinch. (Likely to be supported in the future.)
 * `ripgrep` doesn't yet support searching compressed files. (Likely to be
  supported in the future.)
 * `ripgrep` doesn't have multiline search. (Unlikely to ever be supported.)

-In other words, if you like fancy regexes, non-UTF-8 character encodings,
-searching compressed files or multiline search, then `ripgrep` may not quite
-meet your needs (yet).
+In other words, if you like fancy regexes, searching compressed files or
+multiline search, then `ripgrep` may not quite meet your needs (yet).

 ### Is it really faster than everything else?

@@ -167,6 +165,12 @@ $ brew tap burntsushi/ripgrep https://github.com/BurntSushi/ripgrep.git
 $ brew install burntsushi/ripgrep/ripgrep-bin
 ```

+If you're a **Windows Chocolatey** user, then you can install `ripgrep` from the [official repo](https://chocolatey.org/packages/ripgrep):
+
+```
+$ choco install ripgrep
+```
+
 If you're an **Arch Linux** user, then you can install `ripgrep` from the official repos:

 ```
@@ -370,3 +374,17 @@ $ cargo test
 ```

 from the repository root.
+
+### Known issues
+
+#### I just hit Ctrl+C in the middle of ripgrep's output and now my terminal's foreground color is wrong!
+
+Type in `color` on Windows and `echo -ne "\033[0m"` on Unix to restore your
+original foreground color.
+
+PR [#187](https://github.com/BurntSushi/ripgrep/pull/187) fixed this, and it
+was later deprecated in
+[#281](https://github.com/BurntSushi/ripgrep/issues/281). A full explanation is
+available [here][msys issue explanation].
+
+[msys issue explanation]: https://github.com/BurntSushi/ripgrep/issues/281#issuecomment-269093893
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -40,6 +40,7 @@ before_deploy:
  - cargo build --release
  - mkdir staging
  - copy target\release\rg.exe staging
+  - copy target\release\build\ripgrep-*\out\_rg.ps1 staging
  - cd staging
    # release zipfile will look like 'rust-everywhere-v1.2.3-x86_64-pc-windows-msvc'
  - 7z a ../%PROJECT_NAME%-%APPVEYOR_REPO_TAG_NAME%-%TARGET%.zip *
--- a/doc/rg.1
+++ b/doc/rg.1
@@ -1,4 +1,4 @@
-.\" Automatically generated by Pandoc 1.19.1
+.\" Automatically generated by Pandoc 1.19.2.1
 .\"
 .TH "rg" "1"
 .hy
@@ -64,20 +64,29 @@ Treat the pattern as a literal string instead of a regular expression.
 .TP
 .B \-g, \-\-glob \f[I]GLOB\f[] ...
 Include or exclude files for searching that match the given glob.
-This always overrides any other ignore logic.
+This always overrides any other ignore logic if there is a conflict, but
+is otherwise applied in addition to ignore files (e.g., .gitignore or
+\&.ignore).
 Multiple glob flags may be used.
 Globbing rules match .gitignore globs.
 Precede a glob with a \[aq]!\[aq] to exclude it.
 .RS
-.PP
-Values given to \-g must be quoted or your shell will expand them and
-result in unexpected behavior.
-.PP
-Combine with the \-\-files flag to return matched filenames (i.e., to
-replicate ack/ag\[aq]s \-g flag).
-.PP
-For example: rg \-g \[aq]<glob>\[aq] \-\-files
 .RE
+.PP
+The \-\-glob flag subsumes the functionality of both the \-\-include and
+\-\-exclude flags commonly found in other tools.
+.IP
+.nf
+\f[C]
+Values\ given\ to\ \-g\ must\ be\ quoted\ or\ your\ shell\ will\ expand\ them\ and\ result
+in\ unexpected\ behavior.
+
+Combine\ with\ the\ \-\-files\ flag\ to\ return\ matched\ filenames
+(i.e.,\ to\ replicate\ ack/ag\[aq]s\ \-g\ flag).
+
+For\ example:\ rg\ \-g\ \[aq]\\<glob\\>\[aq]\ \-\-files
+\f[]
+.fi
 .TP
 .B \-h, \-\-help
 Show this usage message.
@@ -127,6 +136,12 @@ Two \-u flags will search hidden files and directories.
 Three \-u flags will search binary files.
 \-uu is equivalent to grep \-r, and \-uuu is equivalent to grep \-a \-r.
 .RS
+.PP
+Note that the \-u flags are convenient aliases for other combinations of
+flags.
+\-u aliases \[aq]\-\-no\-ignore\[aq].
+\-uu aliases \[aq]\-\-no\-ignore \-\-hidden\[aq].
+\-uuu aliases \[aq]\-\-no\-ignore \-\-hidden \-\-text\[aq].
 .RE
 .TP
 .B \-v, \-\-invert\-match
@@ -199,6 +214,15 @@ Show debug messages.
 .RS
 .RE
 .TP
+.B \-E, \-\-encoding \f[I]ENCODING\f[]
+Specify the text encoding that ripgrep will use on all files searched.
+The default value is \[aq]auto\[aq], which will cause ripgrep to do a
+best effort automatic detection of encoding on a per\-file basis.
+Other supported values can be found in the list of labels here:
+https://encoding.spec.whatwg.org/#concept\-encoding\-get
+.RS
+.RE
+.TP
 .B \-f, \-\-file FILE ...
 Search for patterns from the given file, with one pattern per line.
 When this flag is used or multiple times or in combination with the
@@ -275,11 +299,21 @@ Follow symlinks.
 .RS
 .RE
 .TP
-.B \-m, \-\-max\-count NUM
+.B \-m, \-\-max\-count \f[I]NUM\f[]
 Limit the number of matching lines per file searched to NUM.
 .RS
 .RE
 .TP
+.B \-\-max\-filesize \f[I]NUM\f[]+\f[I]SUFFIX\f[]?
+Ignore files larger than \f[I]NUM\f[] in size.
+Directories will never be ignored.
+.RS
+.PP
+\f[I]SUFFIX\f[] is optional and may be one of K, M or G.
+These correspond to kilobytes, megabytes and gigabytes respectively.
+If omitted the input is treated as bytes.
+.RE
+.TP
 .B \-\-maxdepth \f[I]NUM\f[]
 Descend at most NUM directories below the command line arguments.
 A value of zero searches only the starting\-points themselves.
--- a/doc/rg.1.md
+++ b/doc/rg.1.md
@@ -49,8 +49,13 @@ Project home page: https://github.com/BurntSushi/ripgrep

 -g, --glob *GLOB* ...
 : Include or exclude files for searching that match the given glob. This always
-  overrides any other ignore logic. Multiple glob flags may be used. Globbing
-  rules match .gitignore globs. Precede a glob with a '!' to exclude it.
+  overrides any other ignore logic if there is a conflict, but is otherwise
+  applied in addition to ignore files (e.g., .gitignore or .ignore). Multiple
+  glob flags may be used. Globbing rules match .gitignore globs. Precede a
+  glob with a '!' to exclude it.
+
+  The --glob flag subsumes the functionality of both the --include and
+  --exclude flags commonly found in other tools.

    Values given to -g must be quoted or your shell will expand them and result
    in unexpected behavior.
@@ -89,6 +94,10 @@ Project home page: https://github.com/BurntSushi/ripgrep
  -u flags will search binary files. -uu is equivalent to grep -r, and -uuu is
  equivalent to grep -a -r.

+    Note that the -u flags are convenient aliases for other combinations of
+    flags. -u aliases '--no-ignore'. -uu aliases '--no-ignore --hidden'.
+    -uuu aliases '--no-ignore --hidden --text'.
+
 -v, --invert-match
 : Invert matching.

@@ -136,6 +145,13 @@ Project home page: https://github.com/BurntSushi/ripgrep
 --debug
 : Show debug messages.

+-E, --encoding *ENCODING*
+: Specify the text encoding that ripgrep will use on all files
+  searched. The default value is 'auto', which will cause ripgrep to do
+  a best effort automatic detection of encoding on a per-file basis.
+  Other supported values can be found in the list of labels here:
+  https://encoding.spec.whatwg.org/#concept-encoding-get
+
 -f, --file FILE ...
 : Search for patterns from the given file, with one pattern per line. When this
  flag is used or multiple times or in combination with the -e/--regexp flag,
@@ -187,9 +203,20 @@ Project home page: https://github.com/BurntSushi/ripgrep
 -L, --follow
 : Follow symlinks.

-m, --max-count NUM
+-M, --max-columns *NUM*
+: Don't print lines longer than this limit in bytes. Longer lines are omitted,
+  and only the number of matches in that line is printed.
+
+-m, --max-count *NUM*
 : Limit the number of matching lines per file searched to NUM.

+--max-filesize *NUM*+*SUFFIX*?
+: Ignore files larger than *NUM* in size. Directories will never be ignored.
+
+    *SUFFIX* is optional and may be one of K, M or G. These correspond to
+    kilobytes, megabytes and gigabytes respectively. If omitted the input is
+    treated as bytes.
+
 --maxdepth *NUM*
 : Descend at most NUM directories below the command line arguments.
  A value of zero searches only the starting-points themselves.
--- a/globset/COPYING
+++ b/globset/COPYING
@@ -0,0 +1,3 @@
+This project is dual-licensed under the Unlicense and MIT licenses.
+
+You may use this code under the terms of either license.
--- a/globset/Cargo.toml
+++ b/globset/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "globset"
-version = "0.1.3"  #:version
+version = "0.1.4"  #:version
 authors = ["Andrew Gallant <jamslam@gmail.com>"]
 description = """
 Cross platform single glob and glob set matching. Glob set matching is the
--- a/globset/LICENSE-MIT
+++ b/globset/LICENSE-MIT
@@ -0,0 +1,21 @@
+The MIT License (MIT)
+
+Copyright (c) 2015 Andrew Gallant
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
--- a/globset/UNLICENSE
+++ b/globset/UNLICENSE
@@ -0,0 +1,24 @@
+This is free and unencumbered software released into the public domain.
+
+Anyone is free to copy, modify, publish, use, compile, sell, or
+distribute this software, either in source code form or as a compiled
+binary, for any purpose, commercial or non-commercial, and by any
+means.
+
+In jurisdictions that recognize copyright laws, the author or authors
+of this software dedicate any and all copyright interest in the
+software to the public domain. We make this dedication for the benefit
+of the public at large and to the detriment of our heirs and
+successors. We intend this dedication to be an overt act of
+relinquishment in perpetuity of all present and future rights to this
+software under copyright law.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+OTHER DEALINGS IN THE SOFTWARE.
+
+For more information, please refer to <http://unlicense.org/>
--- a/globset/src/glob.rs
+++ b/globset/src/glob.rs
@@ -659,9 +659,18 @@ impl Tokens {
                    for pat in patterns {
                        let mut altre = String::new();
                        self.tokens_to_regex(options, &pat, &mut altre);
-                        parts.push(altre);
+                        if !altre.is_empty() {
+                            parts.push(altre);
+                        }
+                    }
+
+                    // It is possible to have an empty set in which case the
+                    // resulting alternation '()' would be an error.
+                    if !parts.is_empty() {
+                        re.push('(');
+                        re.push_str(&parts.join("|"));
+                        re.push(')');
                    }
-                    re.push_str(&parts.join("|"));
                }
            }
        }
--- a/grep/COPYING
+++ b/grep/COPYING
@@ -0,0 +1,3 @@
+This project is dual-licensed under the Unlicense and MIT licenses.
+
+You may use this code under the terms of either license.
--- a/grep/Cargo.toml
+++ b/grep/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "grep"
-version = "0.1.5"  #:version
+version = "0.1.6"  #:version
 authors = ["Andrew Gallant <jamslam@gmail.com>"]
 description = """
 Fast line oriented regex searching as a library.
--- a/grep/LICENSE-MIT
+++ b/grep/LICENSE-MIT
@@ -0,0 +1,21 @@
+The MIT License (MIT)
+
+Copyright (c) 2015 Andrew Gallant
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
--- a/grep/UNLICENSE
+++ b/grep/UNLICENSE
@@ -0,0 +1,24 @@
+This is free and unencumbered software released into the public domain.
+
+Anyone is free to copy, modify, publish, use, compile, sell, or
+distribute this software, either in source code form or as a compiled
+binary, for any purpose, commercial or non-commercial, and by any
+means.
+
+In jurisdictions that recognize copyright laws, the author or authors
+of this software dedicate any and all copyright interest in the
+software to the public domain. We make this dedication for the benefit
+of the public at large and to the detriment of our heirs and
+successors. We intend this dedication to be an overt act of
+relinquishment in perpetuity of all present and future rights to this
+software under copyright law.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+OTHER DEALINGS IN THE SOFTWARE.
+
+For more information, please refer to <http://unlicense.org/>
--- a/ignore/COPYING
+++ b/ignore/COPYING
@@ -0,0 +1,3 @@
+This project is dual-licensed under the Unlicense and MIT licenses.
+
+You may use this code under the terms of either license.
--- a/ignore/Cargo.toml
+++ b/ignore/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "ignore"
-version = "0.1.7"  #:version
+version = "0.1.8"  #:version
 authors = ["Andrew Gallant <jamslam@gmail.com>"]
 description = """
 A fast library for efficiently matching ignore files such as `.gitignore`
@@ -19,7 +19,7 @@ bench = false

 [dependencies]
 crossbeam = "0.2"
-globset = { version = "0.1.3", path = "../globset" }
+globset = { version = "0.1.4", path = "../globset" }
 lazy_static = "0.2"
 log = "0.3"
 memchr = "1"
--- a/ignore/LICENSE-MIT
+++ b/ignore/LICENSE-MIT
@@ -0,0 +1,21 @@
+The MIT License (MIT)
+
+Copyright (c) 2015 Andrew Gallant
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
--- a/ignore/UNLICENSE
+++ b/ignore/UNLICENSE
@@ -0,0 +1,24 @@
+This is free and unencumbered software released into the public domain.
+
+Anyone is free to copy, modify, publish, use, compile, sell, or
+distribute this software, either in source code form or as a compiled
+binary, for any purpose, commercial or non-commercial, and by any
+means.
+
+In jurisdictions that recognize copyright laws, the author or authors
+of this software dedicate any and all copyright interest in the
+software to the public domain. We make this dedication for the benefit
+of the public at large and to the detriment of our heirs and
+successors. We intend this dedication to be an overt act of
+relinquishment in perpetuity of all present and future rights to this
+software under copyright law.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+OTHER DEALINGS IN THE SOFTWARE.
+
+For more information, please refer to <http://unlicense.org/>
--- a/ignore/src/gitignore.rs
+++ b/ignore/src/gitignore.rs
@@ -368,9 +368,10 @@ impl GitignoreBuilder {
        };
        let mut literal_separator = false;
        let has_slash = line.chars().any(|c| c == '/');
-        let is_absolute = line.chars().nth(0).unwrap() == '/';
+        let mut is_absolute = false;
        if line.starts_with("\\!") || line.starts_with("\\#") {
            line = &line[1..];
+            is_absolute = line.chars().nth(0) == Some('/');
        } else {
            if line.starts_with("!") {
                glob.is_whitelist = true;
@@ -383,6 +384,7 @@ impl GitignoreBuilder {
                // simply banning wildcards from matching /.
                literal_separator = true;
                line = &line[1..];
+                is_absolute = true;
            }
        }
        // If it ends with a slash, then this should only match directories,
@@ -570,6 +572,7 @@ mod tests {
    not_ignored!(
        ignot14, "./third_party/protobuf", "m4/ltoptions.m4",
        "./third_party/protobuf/csharp/src/packages/repositories.config");
+    not_ignored!(ignot15, ROOT, "!/bar", "foo/bar");

    fn bytes(s: &str) -> Vec<u8> {
        s.to_string().into_bytes()
--- a/ignore/src/overrides.rs
+++ b/ignore/src/overrides.rs
@@ -214,4 +214,10 @@ mod tests {
        assert!(ov.matched("src/foo", false).is_ignore());
        assert!(ov.matched("src/foo", true).is_none());
    }
+
+    #[test]
+    fn absolute_path() {
+        let ov = ov(&["!/bar"]);
+        assert!(ov.matched("./foo/bar", false).is_none());
+    }
 }
--- a/ignore/src/types.rs
+++ b/ignore/src/types.rs
@@ -116,7 +116,7 @@ const DEFAULT_TYPES: &'static [(&'static str, &'static [&'static str])] = &[
    ("crystal", &["Projectfile", "*.cr"]),
    ("cs", &["*.cs"]),
    ("csharp", &["*.cs"]),
-    ("css", &["*.css"]),
+    ("css", &["*.css", "*.scss"]),
    ("cython", &["*.pyx"]),
    ("dart", &["*.dart"]),
    ("d", &["*.d"]),
@@ -134,7 +134,7 @@ const DEFAULT_TYPES: &'static [(&'static str, &'static [&'static str])] = &[
    ("h", &["*.h", "*.hpp"]),
    ("hbs", &["*.hbs"]),
    ("haskell", &["*.hs", "*.lhs"]),
-    ("html", &["*.htm", "*.html"]),
+    ("html", &["*.htm", "*.html", "*.ejs"]),
    ("java", &["*.java"]),
    ("jinja", &["*.jinja", "*.jinja2"]),
    ("js", &[
@@ -145,6 +145,7 @@ const DEFAULT_TYPES: &'static [(&'static str, &'static [&'static str])] = &[
    ("kotlin", &["*.kt", "*.kts"]),
    ("less", &["*.less"]),
    ("lisp", &["*.el", "*.jl", "*.lisp", "*.lsp", "*.sc", "*.scm"]),
+    ("log", &["*.log"]),
    ("lua", &["*.lua"]),
    ("m4", &["*.ac", "*.m4"]),
    ("make", &["gnumakefile", "Gnumakefile", "makefile", "Makefile", "*.mk", "*.mak"]),
@@ -177,6 +178,7 @@ const DEFAULT_TYPES: &'static [(&'static str, &'static [&'static str])] = &[
    ("stylus", &["*.styl"]),
    ("sql", &["*.sql"]),
    ("sv", &["*.v", "*.vg", "*.sv", "*.svh", "*.h"]),
+    ("svg", &["*.svg"]),
    ("swift", &["*.swift"]),
    ("swig", &["*.def", "*.i"]),
    ("taskpaper", &["*.taskpaper"]),
--- a/ignore/src/walk.rs
+++ b/ignore/src/walk.rs
@@ -392,7 +392,9 @@ impl DirEntryRaw {
 /// continues.
 /// * Fifth, if the path hasn't been whitelisted and it is hidden, then the
 /// path is skipped.
-/// * Sixth, if the path has made it this far then it is yielded in the
+/// * Sixth, unless the path is a directory, the size of the file is compared
+/// against the max filesize limit. If it exceeds the limit, it is skipped.
+/// * Seventh, if the path has made it this far then it is yielded in the
 /// iterator.
 #[derive(Clone)]
 pub struct WalkBuilder {
@@ -400,6 +402,7 @@ pub struct WalkBuilder {
    ig_builder: IgnoreBuilder,
    parents: bool,
    max_depth: Option<usize>,
+    max_filesize: Option<u64>,
    follow_links: bool,
    sorter: Option<Arc<Fn(&OsString, &OsString) -> cmp::Ordering + 'static>>,
    threads: usize,
@@ -412,6 +415,7 @@ impl fmt::Debug for WalkBuilder {
            .field("ig_builder", &self.ig_builder)
            .field("parents", &self.parents)
            .field("max_depth", &self.max_depth)
+            .field("max_filesize", &self.max_filesize)
            .field("follow_links", &self.follow_links)
            .field("threads", &self.threads)
            .finish()
@@ -431,6 +435,7 @@ impl WalkBuilder {
            ig_builder: IgnoreBuilder::new(),
            parents: true,
            max_depth: None,
+            max_filesize: None,
            follow_links: false,
            sorter: None,
            threads: 0,
@@ -464,6 +469,7 @@ impl WalkBuilder {
            it: None,
            ig_root: ig_root.clone(),
            ig: ig_root.clone(),
+            max_filesize: self.max_filesize,
            parents: self.parents,
        }
    }
@@ -478,6 +484,7 @@ impl WalkBuilder {
            paths: self.paths.clone().into_iter(),
            ig_root: self.ig_builder.build(),
            max_depth: self.max_depth,
+            max_filesize: self.max_filesize,
            follow_links: self.follow_links,
            parents: self.parents,
            threads: self.threads,
@@ -508,6 +515,12 @@ impl WalkBuilder {
        self
    }

+    /// Whether to ignore files above the specified limit.
+    pub fn max_filesize(&mut self, filesize: Option<u64>) -> &mut WalkBuilder {
+        self.max_filesize = filesize;
+        self
+    }
+
    /// The number of threads to use for traversal.
    ///
    /// Note that this only has an effect when using `build_parallel`.
@@ -650,6 +663,7 @@ pub struct Walk {
    it: Option<WalkEventIter>,
    ig_root: Ignore,
    ig: Ignore,
+    max_filesize: Option<u64>,
    parents: bool,
 }

@@ -667,7 +681,17 @@ impl Walk {
        if ent.depth() == 0 {
            return false;
        }
-        skip_path(&self.ig, ent.path(), ent.file_type().is_dir())
+
+        let is_dir = ent.file_type().is_dir();
+        let max_size = self.max_filesize;
+        let should_skip_path = skip_path(&self.ig, ent.path(), is_dir);
+        let should_skip_filesize = if !is_dir && max_size.is_some() {
+            skip_filesize(max_size.unwrap(), ent.path(), &ent.metadata().ok())
+        } else {
+            false
+        };
+
+        should_skip_path || should_skip_filesize
    }
 }

@@ -824,6 +848,7 @@ pub struct WalkParallel {
    paths: vec::IntoIter<PathBuf>,
    ig_root: Ignore,
    parents: bool,
+    max_filesize: Option<u64>,
    max_depth: Option<usize>,
    follow_links: bool,
    threads: usize,
@@ -886,6 +911,7 @@ impl WalkParallel {
                threads: threads,
                parents: self.parents,
                max_depth: self.max_depth,
+                max_filesize: self.max_filesize,
                follow_links: self.follow_links,
            };
            handles.push(thread::spawn(|| worker.run()));
@@ -1000,6 +1026,9 @@ struct Worker {
    /// The maximum depth of directories to descend. A value of `0` means no
    /// descension at all.
    max_depth: Option<usize>,
+    /// The maximum size a searched file can be (in bytes). If a file exceeds
+    /// this size it will be skipped.
+    max_filesize: Option<u64>,
    /// Whether to follow symbolic links or not. When this is enabled, loop
    /// detection is performed.
    follow_links: bool,
@@ -1106,7 +1135,15 @@ impl Worker {
            }
        }
        let is_dir = dent.file_type().map_or(false, |ft| ft.is_dir());
-        if !skip_path(ig, dent.path(), is_dir) {
+        let max_size = self.max_filesize;
+        let should_skip_path = skip_path(ig, dent.path(), is_dir);
+        let should_skip_filesize = if !is_dir && max_size.is_some() {
+            skip_filesize(max_size.unwrap(), dent.path(), &dent.metadata().ok())
+        } else {
+            false
+        };
+
+        if !should_skip_path && !should_skip_filesize {
            self.queue.push(Message::Work(Work {
                dent: dent,
                ignore: ig.clone(),
@@ -1253,6 +1290,30 @@ fn check_symlink_loop(
    Ok(())
 }

+// Before calling this function, make sure that you ensure that is really
+// necessary as the arguments imply a file stat.
+fn skip_filesize(
+    max_filesize: u64,
+    path: &Path,
+    ent: &Option<Metadata>
+) -> bool {
+    let filesize = match *ent {
+        Some(ref md) => Some(md.len()),
+        None => None
+    };
+
+    if let Some(fs) = filesize {
+        if fs > max_filesize {
+            debug!("ignoring {}: {} bytes", path.display(), fs);
+            true
+        } else {
+            false
+        }
+    } else {
+        false
+    }
+}
+
 fn skip_path(ig: &Ignore, path: &Path, is_dir: bool) -> bool {
    let m = ig.matched(path, is_dir);
    if m.is_ignore() {
@@ -1282,6 +1343,11 @@ mod tests {
        file.write_all(contents.as_bytes()).unwrap();
    }

+    fn wfile_size<P: AsRef<Path>>(path: P, size: u64) {
+        let file = File::create(path).unwrap();
+        file.set_len(size).unwrap();
+    }
+
    #[cfg(unix)]
    fn symlink<P: AsRef<Path>, Q: AsRef<Path>>(src: P, dst: Q) {
        use std::os::unix::fs::symlink;
@@ -1438,6 +1504,32 @@ mod tests {
        ]);
    }

+    #[test]
+    fn max_filesize() {
+        let td = TempDir::new("walk-test-").unwrap();
+        mkdirp(td.path().join("a/b"));
+        wfile_size(td.path().join("foo"), 0);
+        wfile_size(td.path().join("bar"), 400);
+        wfile_size(td.path().join("baz"), 600);
+        wfile_size(td.path().join("a/foo"), 600);
+        wfile_size(td.path().join("a/bar"), 500);
+        wfile_size(td.path().join("a/baz"), 200);
+
+        let mut builder = WalkBuilder::new(td.path());
+        assert_paths(td.path(), &builder, &[
+            "a", "a/b", "foo", "bar", "baz", "a/foo", "a/bar", "a/baz",
+        ]);
+        assert_paths(td.path(), builder.max_filesize(Some(0)), &[
+            "a", "a/b", "foo"
+        ]);
+        assert_paths(td.path(), builder.max_filesize(Some(500)), &[
+            "a", "a/b", "foo", "bar", "a/bar", "a/baz"
+        ]);
+        assert_paths(td.path(), builder.max_filesize(Some(50000)), &[
+            "a", "a/b", "foo", "bar", "baz", "a/foo", "a/bar", "a/baz",
+        ]);
+    }
+
    #[cfg(unix)] // because symlinks on windows are weird
    #[test]
    fn symlinks() {
--- a/src/app.rs
+++ b/src/app.rs
@@ -96,6 +96,8 @@ fn app<F>(next_line_help: bool, doc: F) -> App<'static, 'static>
             .possible_values(&["never", "auto", "always", "ansi"]))
        .arg(flag("colors").value_name("SPEC")
             .takes_value(true).multiple(true).number_of_values(1))
+        .arg(flag("encoding").short("E").value_name("ENCODING")
+             .takes_value(true).number_of_values(1))
        .arg(flag("fixed-strings").short("F"))
        .arg(flag("glob").short("g")
             .takes_value(true).multiple(true).number_of_values(1)
@@ -135,8 +137,8 @@ fn app<F>(next_line_help: bool, doc: F) -> App<'static, 'static>
        .arg(flag("files-without-match"))
        .arg(flag("with-filename").short("H"))
        .arg(flag("no-filename"))
-        .arg(flag("heading"))
-        .arg(flag("no-heading"))
+        .arg(flag("heading").overrides_with("no-heading"))
+        .arg(flag("no-heading").overrides_with("heading"))
        .arg(flag("hidden"))
        .arg(flag("ignore-file")
             .value_name("FILE").takes_value(true)
@@ -145,6 +147,8 @@ fn app<F>(next_line_help: bool, doc: F) -> App<'static, 'static>
        .arg(flag("max-count")
             .short("m").value_name("NUM").takes_value(true)
             .validator(validate_number))
+        .arg(flag("max-filesize")
+             .value_name("NUM+SUFFIX?").takes_value(true))
        .arg(flag("maxdepth")
             .value_name("NUM").takes_value(true)
             .validator(validate_number))
@@ -165,6 +169,9 @@ fn app<F>(next_line_help: bool, doc: F) -> App<'static, 'static>
             .short("j").value_name("ARG").takes_value(true)
             .validator(validate_number))
        .arg(flag("vimgrep"))
+        .arg(flag("max-columns").short("M")
+             .value_name("NUM").takes_value(true)
+             .validator(validate_number))
        .arg(flag("type-add")
             .value_name("TYPE").takes_value(true)
             .multiple(true).number_of_values(1))
@@ -249,6 +256,14 @@ lazy_static! {
              change the match color to magenta and the background color for \
              line numbers to yellow:\n\n\
              rg --colors 'match:fg:magenta' --colors 'line:bg:yellow' foo.");
+        doc!(h, "encoding",
+             "Specify the text encoding of files to search.",
+             "Specify the text encoding that ripgrep will use on all files \
+              searched. The default value is 'auto', which will cause ripgrep \
+              to do a best effort automatic detection of encoding on a \
+              per-file basis. Other supported values can be found in the list \
+              of labels here: \
+              https://encoding.spec.whatwg.org/#concept-encoding-get");
        doc!(h, "fixed-strings",
             "Treat the pattern as a literal string.",
             "Treat the pattern as a literal string instead of a regular \
@@ -333,9 +348,9 @@ lazy_static! {
              provided are searched. Empty pattern lines will match all input \
              lines, and the newline is not counted as part of the pattern.");
        doc!(h, "files-with-matches",
-             "Only show the path of each file with at least one match.");
+             "Only show the paths with at least one match.");
        doc!(h, "files-without-match",
-             "Only show the path of each file that contains zero matches.");
+             "Only show the paths that contains zero matches.");
        doc!(h, "with-filename",
             "Show file name for each match.",
             "Prefix each match with the file name that contains it. This is \
@@ -371,6 +386,13 @@ lazy_static! {
        doc!(h, "max-count",
             "Limit the number of matches.",
             "Limit the number of matching lines per file searched to NUM.");
+        doc!(h, "max-filesize",
+             "Ignore files larger than NUM in size.",
+             "Ignore files larger than NUM in size. Does not ignore directories. \
+              \n\nThe input format accepts suffixes of K, M or G which \
+              correspond to kilobytes, megabytes and gigabytes. If no suffix is \
+              provided the input is treated as bytes. \
+              \n\nExample: --max-filesize 50K or --max-filesize 80M");
        doc!(h, "maxdepth",
             "Descend at most NUM directories.",
             "Limit the depth of directory traversal to NUM levels beyond \
@@ -454,6 +476,11 @@ lazy_static! {
             "Show results with every match on its own line, including \
              line numbers and column numbers. With this option, a line with \
              more than one match will be printed more than once.");
+        doc!(h, "max-columns",
+             "Don't print lines longer than this limit in bytes.",
+             "Don't print lines longer than this limit in bytes. Longer lines \
+              are omitted, and only the number of matches in that line is \
+              printed.");

        doc!(h, "type-add",
             "Add a new glob for a file type.",
--- a/src/args.rs
+++ b/src/args.rs
@@ -10,6 +10,7 @@ use std::sync::Arc;
 use std::sync::atomic::{AtomicBool, Ordering};

 use clap;
+use encoding_rs::Encoding;
 use env_logger;
 use grep::{Grep, GrepBuilder};
 use log;
@@ -41,6 +42,7 @@ pub struct Args {
    column: bool,
    context_separator: Vec<u8>,
    count: bool,
+    encoding: Option<&'static Encoding>,
    files_with_matches: bool,
    files_without_matches: bool,
    eol: u8,
@@ -54,7 +56,9 @@ pub struct Args {
    invert_match: bool,
    line_number: bool,
    line_per_match: bool,
+    max_columns: Option<usize>,
    max_count: Option<u64>,
+    max_filesize: Option<u64>,
    maxdepth: Option<usize>,
    mmap: bool,
    no_ignore: bool,
@@ -153,7 +157,8 @@ impl Args {
            .line_per_match(self.line_per_match)
            .null(self.null)
            .path_separator(self.path_separator)
-            .with_filename(self.with_filename);
+            .with_filename(self.with_filename)
+            .max_columns(self.max_columns);
        if let Some(ref rep) = self.replace {
            p = p.replace(rep.clone());
        }
@@ -223,6 +228,7 @@ impl Args {
            .after_context(self.after_context)
            .before_context(self.before_context)
            .count(self.count)
+            .encoding(self.encoding)
            .files_with_matches(self.files_with_matches)
            .files_without_matches(self.files_without_matches)
            .eol(self.eol)
@@ -285,6 +291,7 @@ impl Args {
        wd.follow_links(self.follow);
        wd.hidden(!self.hidden);
        wd.max_depth(self.maxdepth);
+        wd.max_filesize(self.max_filesize);
        wd.overrides(self.glob_overrides.clone());
        wd.types(self.types.clone());
        wd.git_global(!self.no_ignore && !self.no_ignore_vcs);
@@ -314,6 +321,7 @@ impl<'a> ArgMatches<'a> {
    /// configuration.
    fn to_args(&self) -> Result<Args> {
        let paths = self.paths();
+        let line_number = self.line_number(&paths);
        let mmap = try!(self.mmap(&paths));
        let with_filename = self.with_filename(&paths);
        let (before_context, after_context) = try!(self.contexts());
@@ -328,6 +336,7 @@ impl<'a> ArgMatches<'a> {
            column: self.column(),
            context_separator: self.context_separator(),
            count: self.is_present("count"),
+            encoding: try!(self.encoding()),
            files_with_matches: self.is_present("files-with-matches"),
            files_without_matches: self.is_present("files-without-match"),
            eol: b'\n',
@@ -339,9 +348,11 @@ impl<'a> ArgMatches<'a> {
            hidden: self.hidden(),
            ignore_files: self.ignore_files(),
            invert_match: self.is_present("invert-match"),
-            line_number: self.line_number(),
+            line_number: line_number,
            line_per_match: self.is_present("vimgrep"),
+            max_columns: try!(self.usize_of("max-columns")),
            max_count: try!(self.usize_of("max-count")).map(|max| max as u64),
+            max_filesize: try!(self.max_filesize()),
            maxdepth: try!(self.usize_of("maxdepth")),
            mmap: mmap,
            no_ignore: self.no_ignore(),
@@ -434,6 +445,9 @@ impl<'a> ArgMatches<'a> {
    ///
    /// If any pattern is invalid UTF-8, then an error is returned.
    fn patterns(&self) -> Result<Vec<String>> {
+        if self.is_present("files") || self.is_present("type-list") {
+            return Ok(vec![self.empty_pattern()]);
+        }
        let mut pats = vec![];
        match self.values_of_os("regexp") {
            None => {
@@ -563,6 +577,7 @@ impl<'a> ArgMatches<'a> {
    /// will need to search.
    fn mmap(&self, paths: &[PathBuf]) -> Result<bool> {
        let (before, after) = try!(self.contexts());
+        let enc = try!(self.encoding());
        Ok(if before > 0 || after > 0 || self.is_present("no-mmap") {
            false
        } else if self.is_present("mmap") {
@@ -570,6 +585,10 @@ impl<'a> ArgMatches<'a> {
        } else if cfg!(target_os = "macos") {
            // On Mac, memory maps appear to suck. Neat.
            false
+        } else if enc.is_some() {
+            // There's no practical way to transcode a memory map that isn't
+            // isomorphic to searching over io::Read.
+            false
        } else {
            // If we're only searching a few paths and all of them are
            // files, then memory maps are probably faster.
@@ -578,13 +597,14 @@ impl<'a> ArgMatches<'a> {
    }

    /// Returns true if and only if line numbers should be shown.
-    fn line_number(&self) -> bool {
+    fn line_number(&self, paths: &[PathBuf]) -> bool {
        if self.is_present("no-line-number") || self.is_present("count") {
            false
        } else {
+            let only_stdin = paths == &[Path::new("-")];
            self.is_present("line-number")
            || self.is_present("column")
-            || atty::is(atty::Stream::Stdout)
+            || (atty::is(atty::Stream::Stdout) && !only_stdin)
            || self.is_present("pretty")
            || self.is_present("vimgrep")
        }
@@ -715,6 +735,29 @@ impl<'a> ArgMatches<'a> {
        Ok(ColorSpecs::new(&specs))
    }

+    /// Return the text encoding specified.
+    ///
+    /// If the label given by the caller doesn't correspond to a valid
+    /// supported encoding (and isn't `auto`), then return an error.
+    ///
+    /// A `None` encoding implies that the encoding should be automatically
+    /// detected on a per-file basis.
+    fn encoding(&self) -> Result<Option<&'static Encoding>> {
+        match self.0.value_of_lossy("encoding") {
+            None => Ok(None),
+            Some(label) => {
+                if label == "auto" {
+                    return Ok(None);
+                }
+                match Encoding::for_label(label.as_bytes()) {
+                    Some(enc) => Ok(Some(enc)),
+                    None => Err(From::from(
+                        format!("unsupported encoding: {}", label))),
+                }
+            }
+        }
+    }
+
    /// Returns the approximate number of threads that ripgrep should use.
    fn threads(&self) -> Result<usize> {
        if self.is_present("sort-files") {
@@ -776,6 +819,31 @@ impl<'a> ArgMatches<'a> {
        btypes.build().map_err(From::from)
    }

+    /// Parses the max-filesize argument option into a byte count.
+    fn max_filesize(&self) -> Result<Option<u64>> {
+        use regex::Regex;
+
+        let max_filesize = match self.value_of_lossy("max-filesize") {
+            Some(x) => x,
+            None => return Ok(None)
+        };
+
+        let re = Regex::new("^([0-9]+)([KMG])?$").unwrap();
+        let caps = try!(re.captures(&max_filesize)
+                          .ok_or("invalid format for max-filesize argument"));
+
+        let value = try!(caps[1].parse::<u64>().map_err(|err| err.to_string()));
+        let suffix = caps.get(2).map(|x| x.as_str());
+
+        match suffix {
+            None      => Ok(Some(value)),
+            Some("K") => Ok(Some(value * 1024)),
+            Some("M") => Ok(Some(value * 1024 * 1024)),
+            Some("G") => Ok(Some(value * 1024 * 1024 * 1024)),
+            _ => Err(From::from("invalid suffix for max-filesize argument"))
+        }
+    }
+
    /// Returns true if ignore files should be ignored.
    fn no_ignore(&self) -> bool {
        self.is_present("no-ignore")
--- a/src/decoder.rs
+++ b/src/decoder.rs
@@ -0,0 +1,461 @@
+#![allow(dead_code)]
+
+use std::cmp;
+use std::io::{self, Read};
+
+use encoding_rs::{Decoder, Encoding, UTF_8};
+
+/// A BOM is at least 2 bytes and at most 3 bytes.
+///
+/// If fewer than 2 bytes are available to be read at the beginning of a
+/// reader, then a BOM is `None`.
+#[derive(Clone, Copy, Debug, Eq, PartialEq)]
+struct Bom {
+    bytes: [u8; 3],
+    len: usize,
+}
+
+impl Bom {
+    fn as_slice(&self) -> &[u8] {
+        &self.bytes[0..self.len]
+    }
+
+    fn decoder(&self) -> Option<Decoder> {
+        let bom = self.as_slice();
+        if bom.len() < 3 {
+            return None;
+        }
+        if let Some((enc, _)) = Encoding::for_bom(bom) {
+            if enc != UTF_8 {
+                return Some(enc.new_decoder_with_bom_removal());
+            }
+        }
+        None
+    }
+}
+
+/// BomPeeker wraps `R` and satisfies the `io::Read` interface while also
+/// providing a peek at the BOM if one exists. Peeking at the BOM does not
+/// advance the reader.
+struct BomPeeker<R> {
+    rdr: R,
+    bom: Option<Bom>,
+    nread: usize,
+}
+
+impl<R: io::Read> BomPeeker<R> {
+    /// Create a new BomPeeker.
+    ///
+    /// The first three bytes can be read using the `peek_bom` method, but
+    /// will not advance the reader.
+    fn new(rdr: R) -> BomPeeker<R> {
+        BomPeeker { rdr: rdr, bom: None, nread: 0 }
+    }
+
+    /// Peek at the first three bytes of the underlying reader.
+    ///
+    /// This does not advance the reader provided by `BomPeeker`.
+    ///
+    /// If the underlying reader does not have at least two bytes available,
+    /// then `None` is returned.
+    fn peek_bom(&mut self) -> io::Result<Bom> {
+        if let Some(bom) = self.bom {
+            return Ok(bom);
+        }
+        self.bom = Some(Bom { bytes: [0; 3], len: 0 });
+        let mut buf = [0u8; 3];
+        let bom_len = try!(read_full(&mut self.rdr, &mut buf));
+        self.bom = Some(Bom { bytes: buf, len: bom_len });
+        Ok(self.bom.unwrap())
+    }
+}
+
+impl<R: io::Read> io::Read for BomPeeker<R> {
+    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
+        if self.nread < 3 {
+            let bom = try!(self.peek_bom());
+            let bom = bom.as_slice();
+            if self.nread < bom.len() {
+                let rest = &bom[self.nread..];
+                let len = cmp::min(buf.len(), rest.len());
+                buf[..len].copy_from_slice(&rest[..len]);
+                self.nread += len;
+                return Ok(len);
+            }
+        }
+        let nread = try!(self.rdr.read(buf));
+        self.nread += nread;
+        Ok(nread)
+    }
+}
+
+/// Like io::Read::read_exact, except it never returns UnexpectedEof and
+/// instead returns the number of bytes read if EOF is seen before filling
+/// `buf`.
+fn read_full<R: io::Read>(
+    mut rdr: R,
+    mut buf: &mut [u8],
+) -> io::Result<usize> {
+    let mut nread = 0;
+    while !buf.is_empty() {
+        match rdr.read(buf) {
+            Ok(0) => break,
+            Ok(n) => {
+                nread += n;
+                let tmp = buf;
+                buf = &mut tmp[n..];
+            }
+            Err(ref e) if e.kind() == io::ErrorKind::Interrupted => {}
+            Err(e) => return Err(e),
+        }
+    }
+    Ok(nread)
+}
+
+/// A reader that transcodes to UTF-8. The source encoding is determined by
+/// inspecting the BOM from the stream read from `R`, if one exists. If a
+/// UTF-16 BOM exists, then the source stream is trancoded to UTF-8 with
+/// invalid UTF-16 sequences translated to the Unicode replacement character.
+/// In all other cases, the underlying reader is passed through unchanged.
+///
+/// `R` is the type of the underlying reader and `B` is the type of an internal
+/// buffer used to store the results of trancoding.
+///
+/// Note that not all methods on `io::Read` work with this implementation.
+/// For example, the `bytes` adapter method attempts to read a single byte at
+/// a time, but this implementation requires a buffer of size at least `4`. If
+/// a buffer of size less than 4 is given, then an error is returned.
+pub struct DecodeReader<R, B> {
+    /// The underlying reader, wrapped in a peeker for reading a BOM if one
+    /// exists.
+    rdr: BomPeeker<R>,
+    /// The internal buffer to store transcoded bytes before they are read by
+    /// callers.
+    buf: B,
+    /// The current position in `buf`. Subsequent reads start here.
+    pos: usize,
+    /// The number of transcoded bytes in `buf`. Subsequent reads end here.
+    buflen: usize,
+    /// Whether this is the first read or not (in which we inspect the BOM).
+    first: bool,
+    /// Whether a "last" read has occurred. After this point, EOF will always
+    /// be returned.
+    last: bool,
+    /// The underlying text decoder derived from the BOM, if one exists.
+    decoder: Option<Decoder>,
+}
+
+impl<R: io::Read, B: AsMut<[u8]>> DecodeReader<R, B> {
+    /// Create a new transcoder that converts a source stream to valid UTF-8.
+    ///
+    /// If an encoding is specified, then it is used to transcode `rdr` to
+    /// UTF-8. Otherwise, if no encoding is specified, and if a UTF-16 BOM is
+    /// found, then the corresponding UTF-16 encoding is used to transcode
+    /// `rdr` to UTF-8. In all other cases, `rdr` is assumed to be at least
+    /// ASCII-compatible and passed through untouched.
+    ///
+    /// Errors in the encoding of `rdr` are handled with the Unicode
+    /// replacement character. If no encoding of `rdr` is specified, then
+    /// errors are not handled.
+    pub fn new(
+        rdr: R,
+        buf: B,
+        enc: Option<&'static Encoding>,
+    ) -> DecodeReader<R, B> {
+        DecodeReader {
+            rdr: BomPeeker::new(rdr),
+            buf: buf,
+            buflen: 0,
+            pos: 0,
+            first: enc.is_none(),
+            last: false,
+            decoder: enc.map(|enc| enc.new_decoder_with_bom_removal()),
+        }
+    }
+
+    /// Fill the internal buffer from the underlying reader.
+    ///
+    /// If there are unread bytes in the internal buffer, then we move them
+    /// to the beginning of the internal buffer and fill the remainder.
+    ///
+    /// If the internal buffer is too small to read additional bytes, then an
+    /// error is returned.
+    #[inline(always)] // massive perf benefit (???)
+    fn fill(&mut self) -> io::Result<()> {
+        if self.pos < self.buflen {
+            if self.buflen >= self.buf.as_mut().len() {
+                return Err(io::Error::new(
+                    io::ErrorKind::Other,
+                    "DecodeReader: internal buffer exhausted"));
+            }
+            let newlen = self.buflen - self.pos;
+            let mut tmp = Vec::with_capacity(newlen);
+            tmp.extend_from_slice(&self.buf.as_mut()[self.pos..self.buflen]);
+            self.buf.as_mut()[..newlen].copy_from_slice(&tmp);
+            self.buflen = newlen;
+        } else {
+            self.buflen = 0;
+        }
+        self.pos = 0;
+        self.buflen +=
+            try!(self.rdr.read(&mut self.buf.as_mut()[self.buflen..]));
+        Ok(())
+    }
+
+    /// Transcode the inner stream to UTF-8 in `buf`. This assumes that there
+    /// is a decoder capable of transcoding the inner stream to UTF-8. This
+    /// returns the number of bytes written to `buf`.
+    ///
+    /// When this function returns, exactly one of the following things will
+    /// be true:
+    ///
+    /// 1. A non-zero number of bytes were written to `buf`.
+    /// 2. The underlying reader reached EOF.
+    /// 3. An error is returned: the internal buffer ran out of room.
+    /// 4. An I/O error occurred.
+    ///
+    /// Note that `buf` must have at least 4 bytes of space.
+    fn transcode(&mut self, buf: &mut [u8]) -> io::Result<usize> {
+        assert!(buf.len() >= 4);
+        if self.last {
+            return Ok(0);
+        }
+        if self.pos >= self.buflen {
+            try!(self.fill());
+        }
+        let mut nwrite = 0;
+        loop {
+            let (_, nin, nout, _) =
+                self.decoder.as_mut().unwrap().decode_to_utf8(
+                    &self.buf.as_mut()[self.pos..self.buflen], buf, false);
+            self.pos += nin;
+            nwrite += nout;
+            // If we've written at least one byte to the caller-provided
+            // buffer, then our mission is complete.
+            if nwrite > 0 {
+                break;
+            }
+            // Otherwise, we know that our internal buffer has insufficient
+            // data to transcode at least one char, so we attempt to refill it.
+            try!(self.fill());
+            // Quit on EOF.
+            if self.buflen == 0 {
+                self.pos = 0;
+                self.last = true;
+                let (_, _, nout, _) =
+                    self.decoder.as_mut().unwrap().decode_to_utf8(
+                        &[], buf, true);
+                return Ok(nout);
+            }
+        }
+        Ok(nwrite)
+    }
+
+    #[inline(never)] // impacts perf...
+    fn detect(&mut self) -> io::Result<()> {
+        let bom = try!(self.rdr.peek_bom());
+        self.decoder = bom.decoder();
+        Ok(())
+    }
+}
+
+impl<R: io::Read, B: AsMut<[u8]>> io::Read for DecodeReader<R, B> {
+    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
+        if self.first {
+            self.first = false;
+            try!(self.detect());
+        }
+        if self.decoder.is_none() {
+            return self.rdr.read(buf);
+        }
+        // When decoding UTF-8, we need at least 4 bytes of space to guarantee
+        // that we can decode at least one codepoint. If we don't have it, we
+        // can either return `0` for the number of bytes read or return an
+        // error. Since `0` would be interpreted as a possibly premature EOF,
+        // we opt for an error.
+        if buf.len() < 4 {
+            return Err(io::Error::new(
+                io::ErrorKind::Other,
+                "DecodeReader: byte buffer must have length at least 4"));
+        }
+        self.transcode(buf)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::io::Read;
+
+    use encoding_rs::Encoding;
+
+    use super::{Bom, BomPeeker, DecodeReader};
+
+    fn utf8(bytes: &[u8]) -> &str {
+        ::std::str::from_utf8(bytes).unwrap()
+    }
+
+    fn read_to_string<R: Read>(mut rdr: R) -> String {
+        let mut s = String::new();
+        rdr.read_to_string(&mut s).unwrap();
+        s
+    }
+
+    #[test]
+    fn peeker_empty() {
+        let buf = [];
+        let mut peeker = BomPeeker::new(&buf[..]);
+        assert_eq!(Bom { bytes: [0; 3], len: 0}, peeker.peek_bom().unwrap());
+
+        let mut tmp = [0; 100];
+        assert_eq!(0, peeker.read(&mut tmp).unwrap());
+    }
+
+    #[test]
+    fn peeker_one() {
+        let buf = [1];
+        let mut peeker = BomPeeker::new(&buf[..]);
+        assert_eq!(
+            Bom { bytes: [1, 0, 0], len: 1},
+            peeker.peek_bom().unwrap());
+
+        let mut tmp = [0; 100];
+        assert_eq!(1, peeker.read(&mut tmp).unwrap());
+        assert_eq!(1, tmp[0]);
+        assert_eq!(0, peeker.read(&mut tmp).unwrap());
+    }
+
+    #[test]
+    fn peeker_two() {
+        let buf = [1, 2];
+        let mut peeker = BomPeeker::new(&buf[..]);
+        assert_eq!(
+            Bom { bytes: [1, 2, 0], len: 2},
+            peeker.peek_bom().unwrap());
+
+        let mut tmp = [0; 100];
+        assert_eq!(2, peeker.read(&mut tmp).unwrap());
+        assert_eq!(1, tmp[0]);
+        assert_eq!(2, tmp[1]);
+        assert_eq!(0, peeker.read(&mut tmp).unwrap());
+    }
+
+    #[test]
+    fn peeker_three() {
+        let buf = [1, 2, 3];
+        let mut peeker = BomPeeker::new(&buf[..]);
+        assert_eq!(
+            Bom { bytes: [1, 2, 3], len: 3},
+            peeker.peek_bom().unwrap());
+
+        let mut tmp = [0; 100];
+        assert_eq!(3, peeker.read(&mut tmp).unwrap());
+        assert_eq!(1, tmp[0]);
+        assert_eq!(2, tmp[1]);
+        assert_eq!(3, tmp[2]);
+        assert_eq!(0, peeker.read(&mut tmp).unwrap());
+    }
+
+    #[test]
+    fn peeker_four() {
+        let buf = [1, 2, 3, 4];
+        let mut peeker = BomPeeker::new(&buf[..]);
+        assert_eq!(
+            Bom { bytes: [1, 2, 3], len: 3},
+            peeker.peek_bom().unwrap());
+
+        let mut tmp = [0; 100];
+        assert_eq!(3, peeker.read(&mut tmp).unwrap());
+        assert_eq!(1, tmp[0]);
+        assert_eq!(2, tmp[1]);
+        assert_eq!(3, tmp[2]);
+        assert_eq!(1, peeker.read(&mut tmp).unwrap());
+        assert_eq!(4, tmp[0]);
+        assert_eq!(0, peeker.read(&mut tmp).unwrap());
+    }
+
+    #[test]
+    fn peeker_one_at_a_time() {
+        let buf = [1, 2, 3, 4];
+        let mut peeker = BomPeeker::new(&buf[..]);
+
+        let mut tmp = [0; 1];
+        assert_eq!(0, peeker.read(&mut tmp[..0]).unwrap());
+        assert_eq!(0, tmp[0]);
+        assert_eq!(1, peeker.read(&mut tmp).unwrap());
+        assert_eq!(1, tmp[0]);
+        assert_eq!(1, peeker.read(&mut tmp).unwrap());
+        assert_eq!(2, tmp[0]);
+        assert_eq!(1, peeker.read(&mut tmp).unwrap());
+        assert_eq!(3, tmp[0]);
+        assert_eq!(1, peeker.read(&mut tmp).unwrap());
+        assert_eq!(4, tmp[0]);
+    }
+
+    // In cases where all we have is a bom, we expect the bytes to be
+    // passed through unchanged.
+    #[test]
+    fn trans_utf16_bom() {
+        let srcbuf = vec![0xFF, 0xFE];
+        let mut dstbuf = vec![0; 8 * (1<<10)];
+        let mut rdr = DecodeReader::new(&*srcbuf, vec![0; 8 * (1<<10)], None);
+        let n = rdr.read(&mut dstbuf).unwrap();
+        assert_eq!(&*srcbuf, &dstbuf[..n]);
+
+        let srcbuf = vec![0xFE, 0xFF];
+        let mut rdr = DecodeReader::new(&*srcbuf, vec![0; 8 * (1<<10)], None);
+        let n = rdr.read(&mut dstbuf).unwrap();
+        assert_eq!(&*srcbuf, &dstbuf[..n]);
+
+        let srcbuf = vec![0xEF, 0xBB, 0xBF];
+        let mut rdr = DecodeReader::new(&*srcbuf, vec![0; 8 * (1<<10)], None);
+        let n = rdr.read(&mut dstbuf).unwrap();
+        assert_eq!(&*srcbuf, &dstbuf[..n]);
+    }
+
+    // Test basic UTF-16 decoding.
+    #[test]
+    fn trans_utf16_basic() {
+        let srcbuf = vec![0xFF, 0xFE, 0x61, 0x00];
+        let mut rdr = DecodeReader::new(&*srcbuf, vec![0; 8 * (1<<10)], None);
+        assert_eq!("a", read_to_string(&mut rdr));
+
+        let srcbuf = vec![0xFE, 0xFF, 0x00, 0x61];
+        let mut rdr = DecodeReader::new(&*srcbuf, vec![0; 8 * (1<<10)], None);
+        assert_eq!("a", read_to_string(&mut rdr));
+    }
+
+    // Test incomplete UTF-16 decoding. This ensures we see a replacement char
+    // if the stream ends with an unpaired code unit.
+    #[test]
+    fn trans_utf16_incomplete() {
+        let srcbuf = vec![0xFF, 0xFE, 0x61, 0x00, 0x00];
+        let mut rdr = DecodeReader::new(&*srcbuf, vec![0; 8 * (1<<10)], None);
+        assert_eq!("a\u{FFFD}", read_to_string(&mut rdr));
+    }
+
+    macro_rules! test_trans_simple {
+        ($name:ident, $enc:expr, $srcbytes:expr, $dst:expr) => {
+            #[test]
+            fn $name() {
+                let srcbuf = &$srcbytes[..];
+                let enc = Encoding::for_label($enc.as_bytes());
+                let mut rdr = DecodeReader::new(
+                    &*srcbuf, vec![0; 8 * (1<<10)], enc);
+                assert_eq!($dst, read_to_string(&mut rdr));
+            }
+        }
+    }
+
+    // This isn't exhaustive obviously, but it lets us test base level support.
+    test_trans_simple!(trans_simple_auto, "does not exist", b"\xD0\x96", "Ж");
+    test_trans_simple!(trans_simple_utf8, "utf-8", b"\xD0\x96", "Ж");
+    test_trans_simple!(trans_simple_utf16le, "utf-16le", b"\x16\x04", "Ж");
+    test_trans_simple!(trans_simple_utf16be, "utf-16be", b"\x04\x16", "Ж");
+    test_trans_simple!(trans_simple_chinese, "chinese", b"\xA7\xA8", "Ж");
+    test_trans_simple!(trans_simple_korean, "korean", b"\xAC\xA8", "Ж");
+    test_trans_simple!(trans_simple_big5_hkscs, "big5-hkscs", b"\xC7\xFA", "Ж");
+    test_trans_simple!(trans_simple_gbk, "gbk", b"\xA7\xA8", "Ж");
+    test_trans_simple!(trans_simple_sjis, "sjis", b"\x84\x47", "Ж");
+    test_trans_simple!(trans_simple_eucjp, "euc-jp", b"\xA7\xA8", "Ж");
+    test_trans_simple!(trans_simple_latin1, "latin1", b"\xA9", "©");
+}
--- a/src/main.rs
+++ b/src/main.rs
@@ -2,6 +2,7 @@ extern crate atty;
 extern crate bytecount;
 #[macro_use]
 extern crate clap;
+extern crate encoding_rs;
 extern crate env_logger;
 extern crate grep;
 extern crate ignore;
@@ -43,6 +44,7 @@ macro_rules! eprintln {

 mod app;
 mod args;
+mod decoder;
 mod pathutil;
 mod printer;
 mod search_buffer;
--- a/src/printer.rs
+++ b/src/printer.rs
@@ -3,12 +3,32 @@ use std::fmt;
 use std::path::Path;
 use std::str::FromStr;

-use regex::bytes::Regex;
+use regex::bytes::{Regex, Replacer, Captures};
 use termcolor::{Color, ColorSpec, ParseColorError, WriteColor};

 use pathutil::strip_prefix;
 use ignore::types::FileTypeDef;

+/// CountingReplacer implements the Replacer interface for Regex,
+/// and counts how often replacement is being performed.
+struct CountingReplacer<'r> {
+    replace: &'r [u8],
+    count: &'r mut usize,
+}
+
+impl<'r> CountingReplacer<'r> {
+    fn new(replace: &'r [u8], count: &'r mut usize) -> CountingReplacer<'r> {
+        CountingReplacer { replace: replace, count: count }
+    }
+}
+
+impl<'r> Replacer for CountingReplacer<'r> {
+    fn replace_append(&mut self, caps: &Captures, dst: &mut Vec<u8>) {
+        *self.count += 1;
+        caps.expand(self.replace, dst);
+    }
+}
+
 /// Printer encapsulates all output logic for searching.
 ///
 /// Note that we currently ignore all write errors. It's probably worthwhile
@@ -46,6 +66,8 @@ pub struct Printer<W> {
    colors: ColorSpecs,
    /// The separator to use for file paths. If empty, this is ignored.
    path_separator: Option<u8>,
+    /// Restrict lines to this many columns.
+    max_columns: Option<usize>
 }

 impl<W: WriteColor> Printer<W> {
@@ -65,6 +87,7 @@ impl<W: WriteColor> Printer<W> {
            with_filename: false,
            colors: ColorSpecs::default(),
            path_separator: None,
+            max_columns: None,
        }
    }

@@ -144,6 +167,12 @@ impl<W: WriteColor> Printer<W> {
        self
    }

+    /// Configure the max. number of columns used for printing matching lines.
+    pub fn max_columns(mut self, max_columns: Option<usize>) -> Printer<W> {
+        self.max_columns = max_columns;
+        self
+    }
+
    /// Returns true if and only if something has been printed.
    pub fn has_printed(&self) -> bool {
        self.has_printed
@@ -263,31 +292,57 @@ impl<W: WriteColor> Printer<W> {
            self.write(b":");
        }
        if self.replace.is_some() {
-            let line = re.replace_all(
-                &buf[start..end], &**self.replace.as_ref().unwrap());
+            let mut count = 0;
+            let line = {
+                let replacer = CountingReplacer::new(
+                    self.replace.as_ref().unwrap(), &mut count);
+                re.replace_all(&buf[start..end], replacer)
+            };
+            if self.max_columns.map_or(false, |m| line.len() > m) {
+                let _ = self.wtr.set_color(self.colors.matched());
+                let msg = format!(
+                    "[Omitted long line with {} replacements]", count);
+                self.write(msg.as_bytes());
+                let _ = self.wtr.reset();
+                self.write_eol();
+                return;
+            }
            self.write(&line);
+            if line.last() != Some(&self.eol) {
+                self.write_eol();
+            }
        } else {
            self.write_matched_line(re, &buf[start..end]);
-        }
-        if buf[start..end].last() != Some(&self.eol) {
-            self.write_eol();
+            // write_matched_line guarantees to write a newline.
        }
    }

    fn write_matched_line(&mut self, re: &Regex, buf: &[u8]) {
-        if !self.wtr.supports_color() || self.colors.matched().is_none() {
-            self.write(buf);
+        if self.max_columns.map_or(false, |m| buf.len() > m) {
+            let count = re.find_iter(buf).count();
+            let _ = self.wtr.set_color(self.colors.matched());
+            let msg = format!("[Omitted long line with {} matches]", count);
+            self.write(msg.as_bytes());
+            let _ = self.wtr.reset();
+            self.write_eol();
            return;
        }
-        let mut last_written = 0;
-        for m in re.find_iter(buf) {
-            self.write(&buf[last_written..m.start()]);
-            let _ = self.wtr.set_color(self.colors.matched());
-            self.write(&buf[m.start()..m.end()]);
-            let _ = self.wtr.reset();
-            last_written = m.end();
+        if !self.wtr.supports_color() || self.colors.matched().is_none() {
+            self.write(buf);
+        } else {
+            let mut last_written = 0;
+            for m in re.find_iter(buf) {
+                self.write(&buf[last_written..m.start()]);
+                let _ = self.wtr.set_color(self.colors.matched());
+                self.write(&buf[m.start()..m.end()]);
+                let _ = self.wtr.reset();
+                last_written = m.end();
+            }
+            self.write(&buf[last_written..]);
+        }
+        if buf.last() != Some(&self.eol) {
+            self.write_eol();
        }
-        self.write(&buf[last_written..]);
    }

    pub fn context<P: AsRef<Path>>(
@@ -312,6 +367,11 @@ impl<W: WriteColor> Printer<W> {
        if let Some(line_number) = line_number {
            self.line_number(line_number, b'-');
        }
+        if self.max_columns.map_or(false, |m| end - start > m) {
+            self.write(format!("[Omitted long context line]").as_bytes());
+            self.write_eol();
+            return;
+        }
        self.write(&buf[start..end]);
        if buf[start..end].last() != Some(&self.eol) {
            self.write_eol();
--- a/src/search_buffer.rs
+++ b/src/search_buffer.rs
@@ -113,8 +113,8 @@ impl<'a, W: WriteColor> BufferSearcher<'a, W> {

    #[inline(never)]
    pub fn run(mut self) -> u64 {
-        let binary_upto = cmp::min(4096, self.buf.len());
-        if !self.opts.text && is_binary(&self.buf[..binary_upto]) {
+        let binary_upto = cmp::min(10240, self.buf.len());
+        if !self.opts.text && is_binary(&self.buf[..binary_upto], true) {
            return 0;
        }

--- a/src/search_stream.rs
+++ b/src/search_stream.rs
@@ -248,6 +248,7 @@ impl<'a, R: io::Read, W: WriteColor> Searcher<'a, R, W> {
    /// If enabled, search binary files as if they were text.
    pub fn text(mut self, yes: bool) -> Self {
        self.opts.text = yes;
+        self.inp.text(yes);
        self
    }

@@ -266,9 +267,6 @@ impl<'a, R: io::Read, W: WriteColor> Searcher<'a, R, W> {
            if !try!(self.fill()) {
                break;
            }
-            if !self.opts.text && self.inp.is_binary {
-                break;
-            }
            while !self.terminate() && self.inp.pos < self.inp.lastnl {
                let matched = self.grep.read_match(
                    &mut self.last_match,
@@ -501,10 +499,8 @@ pub struct InputBuffer {
    end: usize,
    /// Set to true if and only if no reads have occurred yet.
    first: bool,
-    /// Set to true if and only if the contents of buf are determined to be
-    /// "binary" (i.e., not searchable text). Note that its value may be
-    /// falsely negative *or* falsely positive. It is only a heuristic.
-    is_binary: bool,
+    /// Set to true if all binary data should be treated as if it were text.
+    text: bool,
 }

 impl InputBuffer {
@@ -532,13 +528,23 @@ impl InputBuffer {
            lastnl: 0,
            end: 0,
            first: true,
-            is_binary: false,
+            text: false,
        }
    }

    /// Set the end-of-line terminator used by this input buffer.
-    pub fn eol(&mut self, eol: u8) {
+    pub fn eol(&mut self, eol: u8) -> &mut Self {
        self.eol = eol;
+        self
+    }
+
+    /// If enabled, search binary files as if they were text.
+    ///
+    /// Note that this may cause the buffer to load the entire contents of a
+    /// file into memory.
+    pub fn text(&mut self, yes: bool) -> &mut Self {
+        self.text = yes;
+        self
    }

    /// Resets this buffer so that it may be reused with a new reader.
@@ -547,7 +553,6 @@ impl InputBuffer {
        self.lastnl = 0;
        self.end = 0;
        self.first = true;
-        self.is_binary = false;
    }

    /// Fill the contents of this buffer with the reader given. The reader
@@ -582,8 +587,10 @@ impl InputBuffer {
            }
            let n = try!(rdr.read(
                &mut self.buf[self.end..self.end + self.read_size]));
-            if self.first && is_binary(&self.buf[self.end..self.end + n]) {
-                self.is_binary = true;
+            if !self.text {
+                if is_binary(&self.buf[self.end..self.end + n], self.first) {
+                    return Ok(false);
+                }
            }
            self.first = false;
            // We assume that reading 0 bytes means we've hit EOF.
@@ -613,11 +620,11 @@ impl InputBuffer {
 ///
 /// Note that this may return both false positives and false negatives.
 #[inline(always)]
-pub fn is_binary(buf: &[u8]) -> bool {
-    if buf.len() >= 4 && &buf[0..4] == b"%PDF" {
+pub fn is_binary(buf: &[u8], first: bool) -> bool {
+    if first && buf.len() >= 4 && &buf[0..4] == b"%PDF" {
        return true;
    }
-    memchr(b'\x00', &buf[0..cmp::min(1024, buf.len())]).is_some()
+    memchr(b'\x00', buf).is_some()
 }

 /// Count the number of lines in the given buffer.
--- a/src/worker.rs
+++ b/src/worker.rs
@@ -2,11 +2,13 @@ use std::fs::File;
 use std::io;
 use std::path::Path;

+use encoding_rs::Encoding;
 use grep::Grep;
 use ignore::DirEntry;
 use memmap::{Mmap, Protection};
 use termcolor::WriteColor;

+use decoder::DecodeReader;
 use pathutil::strip_prefix;
 use printer::Printer;
 use search_buffer::BufferSearcher;
@@ -27,6 +29,7 @@ pub struct WorkerBuilder {
 #[derive(Clone, Debug)]
 struct Options {
    mmap: bool,
+    encoding: Option<&'static Encoding>,
    after_context: usize,
    before_context: usize,
    count: bool,
@@ -45,6 +48,7 @@ impl Default for Options {
    fn default() -> Options {
        Options {
            mmap: false,
+            encoding: None,
            after_context: 0,
            before_context: 0,
            count: false,
@@ -80,6 +84,7 @@ impl WorkerBuilder {
        Worker {
            grep: self.grep,
            inpbuf: inpbuf,
+            decodebuf: vec![0; 8 * (1<<10)],
            opts: self.opts,
        }
    }
@@ -106,6 +111,15 @@ impl WorkerBuilder {
        self
    }

+    /// Set the encoding to use to read each file.
+    ///
+    /// If the encoding is `None` (the default), then the encoding is
+    /// automatically detected on a best-effort per-file basis.
+    pub fn encoding(mut self, enc: Option<&'static Encoding>) -> Self {
+        self.opts.encoding = enc;
+        self
+    }
+
    /// If enabled, searching will print the path instead of each match.
    ///
    /// Disabled by default.
@@ -181,8 +195,9 @@ impl WorkerBuilder {
 /// Worker is responsible for executing searches on file paths, while choosing
 /// streaming search or memory map search as appropriate.
 pub struct Worker {
-    inpbuf: InputBuffer,
    grep: Grep,
+    inpbuf: InputBuffer,
+    decodebuf: Vec<u8>,
    opts: Options,
 }

@@ -241,6 +256,8 @@ impl Worker {
        path: &Path,
        rdr: R,
    ) -> Result<u64> {
+        let rdr = DecodeReader::new(
+            rdr, &mut self.decodebuf, self.opts.encoding);
        let searcher = Searcher::new(
            &mut self.inpbuf, printer, &self.grep, path, rdr);
        searcher
@@ -274,8 +291,13 @@ impl Worker {
            return self.search(printer, path, file);
        }
        let mmap = try!(Mmap::open(file, Protection::Read));
-        let searcher = BufferSearcher::new(
-            printer, &self.grep, path, unsafe { mmap.as_slice() });
+        let buf = unsafe { mmap.as_slice() };
+        if buf.len() >= 3 && Encoding::for_bom(buf).is_some() {
+            // If we have a UTF-16 bom in our memory map, then we need to fall
+            // back to the stream reader, which will do transcoding.
+            return self.search(printer, path, file);
+        }
+        let searcher = BufferSearcher::new(printer, &self.grep, path, buf);
        Ok(searcher
            .count(self.opts.count)
            .files_with_matches(self.opts.files_with_matches)
--- a/termcolor/Cargo.toml
+++ b/termcolor/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "termcolor"
-version = "0.3.0"  #:version
+version = "0.3.1"  #:version
 authors = ["Andrew Gallant <jamslam@gmail.com>"]
 description = """
 A simple cross platform library for writing colored text to a terminal.
@@ -17,4 +17,4 @@ name = "termcolor"
 bench = false

 [target.'cfg(windows)'.dependencies]
-wincolor = { version = "0.1.1", path = "../wincolor" }
+wincolor = { version = "0.1.3", path = "../wincolor" }
--- a/termcolor/README.md
+++ b/termcolor/README.md
@@ -6,9 +6,6 @@ by interacting with the Windows console. Several convenient abstractions
 are provided for use in single-threaded or multi-threaded command line
 applications.

-[![Windows build status](https://ci.appveyor.com/api/projects/status/github/BurntSushi/ripgrep?svg=true)](https://ci.appveyor.com/project/BurntSushi/ripgrep)
-[![](https://img.shields.io/crates/v/wincolor.svg)](https://crates.io/crates/wincolor)
-
 [![Linux build status](https://api.travis-ci.org/BurntSushi/ripgrep.png)](https://travis-ci.org/BurntSushi/ripgrep)
 [![Windows build status](https://ci.appveyor.com/api/projects/status/github/BurntSushi/ripgrep?svg=true)](https://ci.appveyor.com/project/BurntSushi/ripgrep)
 [![](https://img.shields.io/crates/v/termcolor.svg)](https://crates.io/crates/termcolor)
@@ -25,7 +22,7 @@ Add this to your `Cargo.toml`:

 ```toml
 [dependencies]
-termcolor = "0.1"
+termcolor = "0.3"
 ```

 and this to your crate root:
--- a/tests/tests.rs
+++ b/tests/tests.rs
@@ -432,6 +432,63 @@ sherlock!(context_line_numbers, "world|attached",
    assert_eq!(lines, expected);
 });

+sherlock!(max_filesize_parse_error_length, "Sherlock", ".",
+|wd: WorkDir, mut cmd: Command| {
+    cmd.arg("--max-filesize").arg("44444444444444444444");
+    wd.assert_err(&mut cmd);
+});
+
+sherlock!(max_filesize_parse_error_suffix, "Sherlock", ".",
+|wd: WorkDir, mut cmd: Command| {
+    cmd.arg("--max-filesize").arg("45k");
+    wd.assert_err(&mut cmd);
+});
+
+sherlock!(max_filesize_parse_no_suffix, "Sherlock", ".",
+|wd: WorkDir, mut cmd: Command| {
+    wd.remove("sherlock");
+    wd.create_size("foo", 40);
+    wd.create_size("bar", 60);
+
+    cmd.arg("--max-filesize").arg("50").arg("--files");
+    let lines: String = wd.stdout(&mut cmd);
+    let expected = "\
+foo
+";
+
+    assert_eq!(lines, expected);
+});
+
+sherlock!(max_filesize_parse_k_suffix, "Sherlock", ".",
+|wd: WorkDir, mut cmd: Command| {
+    wd.remove("sherlock");
+    wd.create_size("foo", 3048);
+    wd.create_size("bar", 4100);
+
+    cmd.arg("--max-filesize").arg("4K").arg("--files");
+    let lines: String = wd.stdout(&mut cmd);
+    let expected = "\
+foo
+";
+
+    assert_eq!(lines, expected);
+});
+
+sherlock!(max_filesize_parse_m_suffix, "Sherlock", ".",
+|wd: WorkDir, mut cmd: Command| {
+    wd.remove("sherlock");
+    wd.create_size("foo", 1000000);
+    wd.create_size("bar", 1400000);
+
+    cmd.arg("--max-filesize").arg("1M").arg("--files");
+    let lines: String = wd.stdout(&mut cmd);
+    let expected = "\
+foo
+";
+
+    assert_eq!(lines, expected);
+});
+
 sherlock!(ignore_hidden, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
    wd.remove("sherlock");
    wd.create(".sherlock", hay::SHERLOCK);
@@ -988,6 +1045,64 @@ clean!(regression_279, "test", ".", |wd: WorkDir, mut cmd: Command| {
    assert_eq!(lines, "");
 });

+// See: https://github.com/BurntSushi/ripgrep/issues/405
+clean!(regression_405, "test", ".", |wd: WorkDir, mut cmd: Command| {
+    wd.create_dir("foo/bar");
+    wd.create_dir("bar/foo");
+    wd.create("foo/bar/file1.txt", "test");
+    wd.create("bar/foo/file2.txt", "test");
+    cmd.arg("-g").arg("!/foo/**");
+
+    let lines: String = wd.stdout(&mut cmd);
+    assert_eq!(lines, format!("{}:test\n", path("bar/foo/file2.txt")));
+});
+
+// See: https://github.com/BurntSushi/ripgrep/issues/1
+clean!(feature_1_sjis, "Шерлок Холмс", ".", |wd: WorkDir, mut cmd: Command| {
+    let sherlock =
+        b"\x84Y\x84u\x84\x82\x84|\x84\x80\x84{ \x84V\x84\x80\x84|\x84}\x84\x83";
+    wd.create_bytes("foo", &sherlock[..]);
+    cmd.arg("-Esjis");
+
+    let lines: String = wd.stdout(&mut cmd);
+    assert_eq!(lines, "foo:Шерлок Холмс\n");
+});
+
+// See: https://github.com/BurntSushi/ripgrep/issues/1
+clean!(feature_1_utf16_auto, "Шерлок Холмс", ".",
+|wd: WorkDir, mut cmd: Command| {
+    let sherlock =
+        b"\xff\xfe(\x045\x04@\x04;\x04>\x04:\x04 \x00%\x04>\x04;\x04<\x04A\x04";
+    wd.create_bytes("foo", &sherlock[..]);
+
+    let lines: String = wd.stdout(&mut cmd);
+    assert_eq!(lines, "foo:Шерлок Холмс\n");
+});
+
+// See: https://github.com/BurntSushi/ripgrep/issues/1
+clean!(feature_1_utf16_explicit, "Шерлок Холмс", ".",
+|wd: WorkDir, mut cmd: Command| {
+    let sherlock =
+        b"\xff\xfe(\x045\x04@\x04;\x04>\x04:\x04 \x00%\x04>\x04;\x04<\x04A\x04";
+    wd.create_bytes("foo", &sherlock[..]);
+    cmd.arg("-Eutf-16le");
+
+    let lines: String = wd.stdout(&mut cmd);
+    assert_eq!(lines, "foo:Шерлок Холмс\n");
+});
+
+// See: https://github.com/BurntSushi/ripgrep/issues/1
+clean!(feature_1_eucjp, "Шерлок Холмс", ".",
+|wd: WorkDir, mut cmd: Command| {
+    let sherlock =
+        b"\xa7\xba\xa7\xd6\xa7\xe2\xa7\xdd\xa7\xe0\xa7\xdc \xa7\xb7\xa7\xe0\xa7\xdd\xa7\xde\xa7\xe3";
+    wd.create_bytes("foo", &sherlock[..]);
+    cmd.arg("-Eeuc-jp");
+
+    let lines: String = wd.stdout(&mut cmd);
+    assert_eq!(lines, "foo:Шерлок Холмс\n");
+});
+
 // See: https://github.com/BurntSushi/ripgrep/issues/7
 sherlock!(feature_7, "-fpat", "sherlock", |wd: WorkDir, mut cmd: Command| {
    wd.create("pat", "Sherlock\nHolmes");
@@ -1199,6 +1314,36 @@ clean!(feature_109_case_sensitive_part2, "test", ".",
    wd.assert_err(&mut cmd);
 });

+// See: https://github.com/BurntSushi/ripgrep/issues/129
+clean!(feature_129_matches, "test", ".", |wd: WorkDir, mut cmd: Command| {
+    wd.create("foo", "test\ntest abcdefghijklmnopqrstuvwxyz test");
+    cmd.arg("-M26");
+
+    let lines: String = wd.stdout(&mut cmd);
+    let expected = "foo:test\nfoo:[Omitted long line with 2 matches]\n";
+    assert_eq!(lines, expected);
+});
+
+// See: https://github.com/BurntSushi/ripgrep/issues/129
+clean!(feature_129_context, "test", ".", |wd: WorkDir, mut cmd: Command| {
+    wd.create("foo", "test\nabcdefghijklmnopqrstuvwxyz");
+    cmd.arg("-M20").arg("-C1");
+
+    let lines: String = wd.stdout(&mut cmd);
+    let expected = "foo:test\nfoo-[Omitted long context line]\n";
+    assert_eq!(lines, expected);
+});
+
+// See: https://github.com/BurntSushi/ripgrep/issues/129
+clean!(feature_129_replace, "test", ".", |wd: WorkDir, mut cmd: Command| {
+    wd.create("foo", "test\ntest abcdefghijklmnopqrstuvwxyz test");
+    cmd.arg("-M26").arg("-rfoo");
+
+    let lines: String = wd.stdout(&mut cmd);
+    let expected = "foo:foo\nfoo:[Omitted long line with 2 replacements]\n";
+    assert_eq!(lines, expected);
+});
+
 // See: https://github.com/BurntSushi/ripgrep/issues/159
 clean!(feature_159_works, "test", ".", |wd: WorkDir, mut cmd: Command| {
    wd.create("foo", "test\ntest");
@@ -1326,6 +1471,27 @@ fn regression_270() {
    assert_eq!(lines, path("foo:-test\n"));
 }

+// See: https://github.com/BurntSushi/ripgrep/issues/391
+#[test]
+fn regression_391() {
+    let wd = WorkDir::new("regression_391");
+    wd.create_dir(".git");
+    wd.create("lock", "");
+    wd.create("bar.py", "");
+    wd.create(".git/packed-refs", "");
+    wd.create(".git/description", "");
+
+    let mut cmd = wd.command();
+    cmd.arg("--no-ignore").arg("--hidden").arg("--follow").arg("--files")
+        .arg("--glob")
+        .arg("!{.git,node_modules,plugged}/**")
+        .arg("--glob")
+        .arg("*.{js,json,php,md,styl,scss,sass,pug,html,config,py,cpp,c,go,hs}");
+
+    let lines: String = wd.stdout(&mut cmd);
+    assert_eq!(lines, "bar.py\n");
+}
+
 #[test]
 fn type_list() {
    let wd = WorkDir::new("type_list");
--- a/tests/workdir.rs
+++ b/tests/workdir.rs
@@ -46,6 +46,13 @@ impl WorkDir {
        self.create_bytes(name, contents.as_bytes());
    }

+    /// Create a new file with the given name and size.
+    pub fn create_size<P: AsRef<Path>>(&self, name: P, filesize: u64) {
+        let path = self.dir.join(name);
+        let file = nice_err(&path, File::create(&path));
+        nice_err(&path, file.set_len(filesize));
+    }
+
    /// Create a new file with the given name and contents in this directory.
    pub fn create_bytes<P: AsRef<Path>>(&self, name: P, contents: &[u8]) {
        let path = self.dir.join(name);
--- a/wincolor/COPYING
+++ b/wincolor/COPYING
@@ -0,0 +1,3 @@
+This project is dual-licensed under the Unlicense and MIT licenses.
+
+You may use this code under the terms of either license.
--- a/wincolor/Cargo.toml
+++ b/wincolor/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "wincolor"
-version = "0.1.2"  #:version
+version = "0.1.3"  #:version
 authors = ["Andrew Gallant <jamslam@gmail.com>"]
 description = """
 A simple Windows specific API for controlling text color in a Windows console.
--- a/wincolor/LICENSE-MIT
+++ b/wincolor/LICENSE-MIT
@@ -0,0 +1,21 @@
+The MIT License (MIT)
+
+Copyright (c) 2015 Andrew Gallant
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
--- a/wincolor/UNLICENSE
+++ b/wincolor/UNLICENSE
@@ -0,0 +1,24 @@
+This is free and unencumbered software released into the public domain.
+
+Anyone is free to copy, modify, publish, use, compile, sell, or
+distribute this software, either in source code form or as a compiled
+binary, for any purpose, commercial or non-commercial, and by any
+means.
+
+In jurisdictions that recognize copyright laws, the author or authors
+of this software dedicate any and all copyright interest in the
+software to the public domain. We make this dedication for the benefit
+of the public at large and to the detriment of our heirs and
+successors. We intend this dedication to be an overt act of
+relinquishment in perpetuity of all present and future rights to this
+software under copyright law.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+OTHER DEALINGS IN THE SOFTWARE.
+
+For more information, please refer to <http://unlicense.org/>
Author	SHA1	Message	Date
Andrew Gallant	c648eadbaa	Bump and update deps.	2017-03-12 21:33:13 -04:00
Ralf Jung	d352b79294	Add new -M/--max-columns option. This permits setting the maximum line width with respect to the number of bytes in a line. Omitted lines (whether part of a match, replacement or context) are replaced with a message stating that the line was elided. Fixes #129	2017-03-12 21:21:28 -04:00
Andrew Gallant	23aec58669	pin nightly	2017-03-12 20:52:28 -04:00
Andrew Gallant	ae863bc7aa	Improve docs for --glob flag. Fixes #345.	2017-03-12 20:31:09 -04:00
Andrew Gallant	f0d3cae569	Clarify -u/--unrestricted flags. Fixes #340	2017-03-12 20:24:45 -04:00
Andrew Gallant	4ef4818130	No line numbers when searching only stdin. This changes the default behavior of ripgrep to not show line numbers when it is printing to a tty and is only searching stdin. Fixes #380 [breaking-change]	2017-03-12 20:21:40 -04:00
Andrew Gallant	8db24e1353	Stop aggressive inlining. It's not clear what exactly is happening here, but the Read implementation for text decoding appears a bit sensitive. Small pertubations in the code appear to have a nearly 100% impact on the overall speed of ripgrep when searching UTF-16 files. I haven't had the time to examine the generated code in detail, but `perf stat` seems to think that the instruction cache is performing a lot worse when the code slows down. This might mean that excessive inlining causes a different code structure that leads to less-than-optimal icache usage, but it's at best a guess. Explicitly disabling the inline for the cold path seems to help the optimizer figure out the right thing.	2017-03-12 20:21:22 -04:00
Andrew Gallant	8bbe58d623	Add support for additional text encodings. This includes, but is not limited to, UTF-16, latin-1, GBK, EUC-JP and Shift_JIS. (Courtesy of the `encoding_rs` crate.) Specifically, this feature enables ripgrep to search files that are encoded in an encoding other than UTF-8. The list of available encodings is tied directly to what the `encoding_rs` crate supports, which is in turn tied to the Encoding Standard. The full list of available encodings can be found here: https://encoding.spec.whatwg.org/#concept-encoding-get This pull request also introduces the notion that text encodings can be automatically detected on a best effort basis. Currently, the only support for this is checking for a UTF-16 bom. In all other cases, a text encoding of `auto` (the default) implies a UTF-8 or ASCII compatible source encoding. When a text encoding is otherwise specified, it is unconditionally used for all files searched. Since ripgrep's regex engine is fundamentally built on top of UTF-8, this feature works by transcoding the files to be searched from their source encoding to UTF-8. This transcoding only happens when: 1. `auto` is specified and a non-UTF-8 encoding is detected. 2. A specific encoding is given by end users (including UTF-8). When transcoding occurs, errors are handled by automatically inserting the Unicode replacement character. In this case, ripgrep's output is guaranteed to be valid UTF-8 (excluding non-UTF-8 file paths, if they are printed). In all other cases, the source text is searched directly, which implies an assumption that it is at least ASCII compatible, but where UTF-8 is most useful. In this scenario, encoding errors are not detected. In this case, ripgrep's output will match the input exactly, byte-for-byte. This design may not be optimal in all cases, but it has some advantages: 1. In the happy path ("UTF-8 everywhere") remains happy. I have not been able to witness any performance regressions. 2. In the non-UTF-8 path, implementation complexity is kept relatively low. The cost here is transcoding itself. A potentially superior implementation might build decoding of any encoding into the regex engine itself. In particular, the fundamental problem with transcoding everything first is that literal optimizations are nearly negated. Future work should entail improving the user experience. For example, we might want to auto-detect more text encodings. A more elaborate UX experience might permit end users to specify multiple text encodings, although this seems hard to pull off in an ergonomic way. Fixes #1	2017-03-12 19:54:48 -04:00
Joshua Horwitz	b3fd0df94b	Fixes #394 - Added in svg to the types file	2017-03-12 19:52:01 -04:00
Andrew Gallant	c1b841e934	Add license files to each crate. Fixes #381	2017-03-12 16:57:15 -04:00
Andrew Gallant	f5ede0e319	Add scss and ejs. We add scss to the existing `css` file type and `ejs` to the existing `html` file type. Fixes #393	2017-03-12 16:51:55 -04:00
Andrew Gallant	6ecffec537	Fix test on Windows. (This is what I get for directly pushing to master.)	2017-03-12 16:07:31 -04:00
Andrew Gallant	80e91a1f1d	Fix leading slash bug when used with `!`. When writing paths like `!/foo` in gitignore files (or when using the -g/--glob flag), the presence of `!` would prevent the gitignore builder from noticing the leading slash, which causes absolute path matching to fail. Fixes #405	2017-03-12 15:51:17 -04:00
Daniel Santa Cruz	d570f78144	Add _rg.ps1 to windows zip Tested with local cargo build paths.	2017-03-09 09:45:28 -05:00
Andrew Gallant	7c37065911	update deps	2017-03-08 20:23:12 -05:00
Jean-Marie Comets	50f7a60a8d	Add "Known issues" section in README.md Also document that ctrl-c doesn't restore the termcolor. Fixes #347.	2017-03-08 10:18:19 -05:00
Marc Tiehuis	33ec988d70	Remove regex build-dependency in Cargo.toml	2017-03-08 10:17:18 -05:00
Marc Tiehuis	adff43fbb4	Remove clap validator + add max-filesize integration tests	2017-03-08 10:17:18 -05:00
Marc Tiehuis	71585f6d47	Reduce unnecessary stat calls for max_filesize	2017-03-08 10:17:18 -05:00
tiehuis	714ae82241	Add `--max-filesize` option to cli The --max-filesize option allows filtering files which are larger than the specified limit. This is potentially useful if one is attempting to search a number of large files without common file-types/suffixes. See #369.	2017-03-08 10:17:18 -05:00
tiehuis	49fd668712	Add file size exclusion to walker A maximum filesize can be specified as an argument to a `WalkBuilder`. If a file exceeds the specified size it will be ignored as part of the resulting file/directory set. The filesize limit never applies to directories.	2017-03-08 10:17:18 -05:00
Marc Tiehuis	066f97d855	Add enclosing group to alternations in globs Fixes #391.	2017-03-08 10:13:28 -05:00
David Salter	df1bf4a042	Added Chocolatey to the installation list	2017-03-01 06:41:52 -05:00
Andrew Gallant	4e8c0fc4ad	bump clap to 2.20.5 Fixes #383	2017-02-25 18:43:13 -05:00
Igor Gnatenko	da1764dfd1	update env_logger to 0.4	2017-02-25 17:46:43 -05:00
Andrew Gallant	48a8a3a691	kick travis	2017-02-24 08:41:20 -05:00
deepy	796eaab0d7	Add .log as FileType	2017-02-23 11:41:32 -05:00
Andrew Gallant	bf49448e1e	fix badges	2017-02-19 11:28:36 -05:00
Andrew Gallant	cffba53379	use termcolor 0.3, not 0.1	2017-02-19 11:27:41 -05:00
Andrew Gallant	79d40d0e20	Tweak how binary files are handled internally. This commit fixes two issues. The first issue is that if a file contained many NUL bytes without any LF bytes, then the InputBuffer would read the entire file into memory. This is not typically a problem, but if you run rg on /proc, then bad things can happen when reading virtual memory mapping files. Arguably, such files should be ignored, but we should also try to avoid exhausting memory too. We fix this by pushing the `-a/--text` flag option down into InputBuffer, so that it knows to stop immediately if it finds a NUL byte. The other issue this fixes is that binary detection is now applied to every buffer instead of just the first one. This helps avoid detecting too many files as plain text if the first parts of a binary file happen to contain no NUL bytes. This issue still persists somewhat in the memory map searcher, since we probably don't want to search the entire file upfront for NUL bytes before actually performing our search. Instead, we search the first 10KB for now. Fixes #52, Fixes #311	2017-02-18 16:20:21 -05:00
Andrew Gallant	525b278049	Don't parses regexes with --files. When the --files flag is given, ripgrep would still try to parse some of the positional arguments as regexes. Don't do that. Fixes #326	2017-02-18 15:34:54 -05:00
Andrew Gallant	16de47920c	Permit --heading to override --no-heading. @kbknapp <3 Fixes #327	2017-02-18 15:25:08 -05:00
Andrew Gallant	a114b86063	update termcolor dep	2017-02-18 15:09:25 -05:00