use std::env;
use std::io;
use std::path::{Path, PathBuf};
use std::process;
use docopt::{self, Docopt};
use env_logger;
use grep::{Grep, GrepBuilder};
use log;
use num_cpus;
use regex;
use term::Terminal;
#[cfg(not(windows))]
use term;
#[cfg(windows)]
use term::WinConsole;
use atty;
use ignore::overrides::{Override, OverrideBuilder};
use ignore::types::{FileTypeDef, Types, TypesBuilder};
use ignore;
use out::{Out, ColoredTerminal};
use printer::Printer;
#[cfg(windows)]
use terminal_win::WindowsBuffer;
use worker::{Worker, WorkerBuilder};
use Result;
const USAGE: &'static str = "
Usage: rg [options] -e PATTERN ... [<path> ...]
rg [options] <pattern> [<path> ...]
rg [options] --files [<path> ...]
rg [options] --type-list
rg [options] --help
rg [options] --version
ripgrep (rg) recursively searches your current directory for a regex pattern.
Project home page: https://github.com/BurntSushi/ripgrep
Common options:
-a, --text Search binary files as if they were text.
-c, --count Only show count of line matches for each file.
--color WHEN Whether to use coloring in match.
Valid values are never, always or auto.
[default: auto]
-e, --regexp PATTERN ... Use PATTERN to search. This option can be
provided multiple times, where all patterns
given are searched. This is also useful when
searching for a pattern that starts with a dash.
-F, --fixed-strings Treat the pattern as a literal string instead of
a regular expression.
-g, --glob GLOB ... Include or exclude files for searching that
match the given glob. This always overrides any
other ignore logic. Multiple glob flags may be
used. Globbing rules match .gitignore globs.
Precede a glob with a '!' to exclude it.
-h, --help Show this usage message.
-i, --ignore-case Case insensitive search.
Overridden by --case-sensitive.
-n, --line-number Show line numbers (1-based). This is enabled
by default at a tty.
-N, --no-line-number Suppress line numbers.
-q, --quiet Do not print anything to stdout. If a match is
found in a file, stop searching that file.
-t, --type TYPE ... Only search files matching TYPE. Multiple type
flags may be provided. Use the --type-list flag
to list all available types.
-T, --type-not TYPE ... Do not search files matching TYPE. Multiple
not-type flags may be provided.
-u, --unrestricted ... Reduce the level of 'smart' searching. A
single -u doesn't respect .gitignore (etc.)
files. Two -u flags will search hidden files
and directories. Three -u flags will search
binary files. -uu is equivalent to grep -r,
and -uuu is equivalent to grep -a -r.
-v, --invert-match Invert matching.
-w, --word-regexp Only show matches surrounded by word boundaries.
This is equivalent to putting \\b before and
after the search pattern.
Less common options:
-A, --after-context NUM
Show NUM lines after each match.
-B, --before-context NUM
Show NUM lines before each match.
-C, --context NUM
Show NUM lines before and after each match.
--column
Show column numbers (1 based) in output. This only shows the column
numbers for the first match on each line. Note that this doesn't try
to account for Unicode. One byte is equal to one column.
--context-separator ARG
The string to use when separating non-continuous context lines. Escape
sequences may be used. [default: --]
--debug
Show debug messages.
--files
Print each file that would be searched (but don't search).
-l, --files-with-matches
Only show path of each file with matches.
-H, --with-filename
Prefix each match with the file name that contains it. This is the
default when more than one file is searched.
--no-filename
Never show the filename for a match. This is the default when
one file is searched.
--heading
Show the file name above clusters of matches from each file.
This is the default mode at a tty.
--no-heading
Don't show any file name heading.
--hidden
Search hidden directories and files. (Hidden directories and files are
skipped by default.)
--ignore-file FILE ...
Specify additional ignore files for filtering file paths. Ignore files
should be in the gitignore format and are matched relative to the
current working directory. These ignore files have lower precedence
than all other ignore file types. When specifying multiple ignore
files, earlier files have lower precedence than later files.
-L, --follow
Follow symlinks.
-m, --max-count NUM
Limit the number of matching lines per file searched to NUM.
--maxdepth NUM
Descend at most NUM directories below the command line arguments.
A value of zero only searches the starting-points themselves.
--mmap
Search using memory maps when possible. This is enabled by default
when ripgrep thinks it will be faster. (Note that mmap searching
doesn't currently support the various context related options.)
--no-messages
Suppress all error messages.
--no-mmap
Never use memory maps, even when they might be faster.
--no-ignore
Don't respect ignore files (.gitignore, .ignore, etc.)
This implies --no-ignore-parent.
--no-ignore-parent
Don't respect ignore files in parent directories.
--no-ignore-vcs
Don't respect version control ignore files (e.g., .gitignore).
Note that .ignore files will continue to be respected.
--null
Whenever a file name is printed, follow it with a NUL byte.
This includes printing filenames before matches, and when printing
a list of matching files such as with --count, --files-with-matches
and --files.
-p, --pretty
Alias for --color=always --heading -n.
-r, --replace ARG
Replace every match with the string given when printing search results.
Neither this flag nor any other flag will modify your files.
Capture group indices (e.g., $5) and names (e.g., $foo) are supported
in the replacement string.
-s, --case-sensitive
Search case sensitively. This overrides --ignore-case and --smart-case.
-S, --smart-case
Search case insensitively if the pattern is all lowercase.
Search case sensitively otherwise. This is overridden by
either --case-sensitive or --ignore-case.
-j, --threads ARG
The number of threads to use. 0 means use the number of logical CPUs
(capped at 6). [default: 0]
--version
Show the version number of ripgrep and exit.
--vimgrep
Show results with every match on its own line, including line
numbers and column numbers. (With this option, a line with more
than one match of the regex will be printed more than once.)
File type management options:
--type-list
Show all supported file types and their associated globs.
--type-add ARG ...
Add a new glob for a particular file type. Only one glob can be
added at a time. Multiple --type-add flags can be provided.
Unless --type-clear is used, globs are added to any existing globs
inside of ripgrep. Note that this must be passed to every invocation of
rg. Type settings are NOT persisted.
Example: `rg --type-add 'foo:*.foo' -tfoo PATTERN`
--type-clear TYPE ...
Clear the file type globs previously defined for TYPE. This only clears
the default type definitions that are found inside of ripgrep. Note
that this must be passed to every invocation of rg.
";
#[derive(Debug, RustcDecodable)]
pub struct RawArgs {
arg_pattern: String,
arg_path: Vec<String>,
flag_after_context: usize,
flag_before_context: usize,
flag_case_sensitive: bool,
flag_color: String,
flag_column: bool,
flag_context: usize,
flag_context_separator: String,
flag_count: bool,
flag_files_with_matches: bool,
flag_debug: bool,
flag_files: bool,
flag_follow: bool,
flag_glob: Vec<String>,
flag_heading: bool,
flag_hidden: bool,
flag_ignore_case: bool,
flag_ignore_file: Vec<String>,
flag_invert_match: bool,
flag_line_number: bool,
flag_fixed_strings: bool,
flag_max_count: Option<usize>,
flag_maxdepth: Option<usize>,
flag_mmap: bool,
flag_no_heading: bool,
flag_no_ignore: bool,
flag_no_ignore_parent: bool,
flag_no_ignore_vcs: bool,
flag_no_line_number: bool,
flag_no_messages: bool,
flag_no_mmap: bool,
flag_no_filename: bool,
flag_null: bool,
flag_pretty: bool,
flag_quiet: bool,
flag_regexp: Vec<String>,
flag_replace: Option<String>,
flag_smart_case: bool,
flag_text: bool,
flag_threads: usize,
flag_type: Vec<String>,
flag_type_not: Vec<String>,
flag_type_list: bool,
flag_type_add: Vec<String>,
flag_type_clear: Vec<String>,
flag_unrestricted: u32,
flag_vimgrep: bool,
flag_with_filename: bool,
flag_word_regexp: bool,
}
#[derive(Debug)]
pub struct Args {
paths: Vec<PathBuf>,
after_context: usize,
before_context: usize,
color: bool,
column: bool,
context_separator: Vec<u8>,
count: bool,
files_with_matches: bool,
eol: u8,
files: bool,
follow: bool,
glob_overrides: Override,
grep: Grep,
heading: bool,
hidden: bool,
ignore_case: bool,
ignore_files: Vec<PathBuf>,
invert_match: bool,
line_number: bool,
line_per_match: bool,
max_count: Option<u64>,
maxdepth: Option<usize>,
mmap: bool,
no_ignore: bool,
no_ignore_parent: bool,
no_ignore_vcs: bool,
no_messages: bool,
null: bool,
quiet: bool,
replace: Option<Vec<u8>>,
text: bool,
threads: usize,
type_list: bool,
types: Types,
with_filename: bool,
}
impl RawArgs {
fn to_args(&self) -> Result<Args> {
let paths =
if self.arg_path.is_empty() {
if atty::on_stdin()
|| self.flag_files
|| self.flag_type_list
|| !atty::stdin_is_readable() {
vec![Path::new("./").to_path_buf()]
} else {
vec![Path::new("-").to_path_buf()]
}
} else {
self.arg_path.iter().map(|p| {
Path::new(p).to_path_buf()
}).collect()
};
let (after_context, before_context) =
if self.flag_context > 0 {
(self.flag_context, self.flag_context)
} else {
(self.flag_after_context, self.flag_before_context)
};
let mmap =
if before_context > 0 || after_context > 0 || self.flag_no_mmap {
false
} else if self.flag_mmap {
true
} else if cfg!(windows) {
true
} else if cfg!(target_os = "macos") {
false
} else {
paths.len() <= 10 && paths.iter().all(|p| p.is_file())
};
if mmap {
debug!("will try to use memory maps");
}
let glob_overrides =
if self.flag_glob.is_empty() {
Override::empty()
} else {
let mut ovr = OverrideBuilder::new(try!(env::current_dir()));
for pat in &self.flag_glob {
try!(ovr.add(pat));
}
try!(ovr.build())
};
let threads =
if self.flag_threads == 0 {
num_cpus::get()
} else {
self.flag_threads
};
let color =
if self.flag_color == "always" {
true
} else if self.flag_vimgrep {
false
} else if self.flag_color == "auto" {
atty::on_stdout() || self.flag_pretty
} else {
false
};
let mut with_filename = self.flag_with_filename;
if !with_filename {
with_filename = paths.len() > 1 || paths[0].is_dir();
}
with_filename = with_filename && !self.flag_no_filename;
let no_ignore = self.flag_no_ignore || self.flag_unrestricted >= 1;
let hidden = self.flag_hidden || self.flag_unrestricted >= 2;
let text = self.flag_text || self.flag_unrestricted >= 3;
let ignore_files: Vec<_> = self.flag_ignore_file.iter().map(|p| {
Path::new(p).to_path_buf()
}).collect();
let mut args = Args {
paths: paths,
after_context: after_context,
before_context: before_context,
color: color,
column: self.flag_column,
context_separator: unescape(&self.flag_context_separator),
count: self.flag_count,
files_with_matches: self.flag_files_with_matches,
eol: self.eol(),
files: self.flag_files,
follow: self.flag_follow,
glob_overrides: glob_overrides,
grep: try!(self.grep()),
heading: !self.flag_no_heading && self.flag_heading,
hidden: hidden,
ignore_case: self.flag_ignore_case,
ignore_files: ignore_files,
invert_match: self.flag_invert_match,
line_number: !self.flag_no_line_number && self.flag_line_number,
line_per_match: self.flag_vimgrep,
max_count: self.flag_max_count.map(|max| max as u64),
maxdepth: self.flag_maxdepth,
mmap: mmap,
no_ignore: no_ignore,
no_ignore_parent:
self.flag_no_ignore_parent || no_ignore,
no_ignore_vcs:
self.flag_no_ignore_vcs || no_ignore,
no_messages: self.flag_no_messages,
null: self.flag_null,
quiet: self.flag_quiet,
replace: self.flag_replace.clone().map(|s| s.into_bytes()),
text: text,
threads: threads,
type_list: self.flag_type_list,
types: try!(self.types()),
with_filename: with_filename,
};
if atty::on_stdout() || self.flag_pretty {
if !self.flag_no_line_number && !args.count {
args.line_number = true;
}
if !self.flag_no_heading {
args.heading = true;
}
}
if self.flag_vimgrep {
args.column = true;
args.line_number = true;
}
Ok(args)
}
fn types(&self) -> Result<Types> {
let mut btypes = TypesBuilder::new();
btypes.add_defaults();
for ty in &self.flag_type_clear {
btypes.clear(ty);
}
for def in &self.flag_type_add {
try!(btypes.add_def(def));
}
for ty in &self.flag_type {
btypes.select(ty);
}
for ty in &self.flag_type_not {
btypes.negate(ty);
}
btypes.build().map_err(From::from)
}
fn pattern(&self) -> String {
if !self.flag_regexp.is_empty() {
if self.flag_fixed_strings {
self.flag_regexp.iter().cloned().map(|lit| {
self.word_pattern(regex::quote(&lit))
}).collect::<Vec<String>>().join("|")
} else {
self.flag_regexp.iter().cloned().map(|pat| {
self.word_pattern(pat)
}).collect::<Vec<String>>().join("|")
}
} else {
if self.flag_fixed_strings {
self.word_pattern(regex::quote(&self.arg_pattern))
} else {
self.word_pattern(self.arg_pattern.clone())
}
}
}
fn word_pattern(&self, s: String) -> String {
if self.flag_word_regexp {
format!(r"\b{}\b", s)
} else {
s
}
}
fn eol(&self) -> u8 {
b'\n'
}
fn grep(&self) -> Result<Grep> {
let smart =
self.flag_smart_case
&& !self.flag_ignore_case
&& !self.flag_case_sensitive;
let casei =
self.flag_ignore_case
&& !self.flag_case_sensitive;
GrepBuilder::new(&self.pattern())
.case_smart(smart)
.case_insensitive(casei)
.line_terminator(self.eol())
.build()
.map_err(From::from)
}
}
impl Args {
pub fn parse() -> Result<Args> {
let mut argv = vec![];
for arg in env::args_os() {
match arg.into_string() {
Ok(s) => argv.push(s),
Err(s) => {
errored!("Argument '{}' is not valid UTF-8. \
Use hex escape sequences to match arbitrary \
bytes in a pattern (e.g., \\xFF).",
s.to_string_lossy());
}
}
}
let mut raw: RawArgs =
Docopt::new(USAGE)
.and_then(|d| d.argv(argv).version(Some(version())).decode())
.unwrap_or_else(|e| {
match e {
docopt::Error::Version(ref v) => {
println!("ripgrep {}", v);
process::exit(0);
}
e => e.exit(),
}
});
let mut logb = env_logger::LogBuilder::new();
if raw.flag_debug {
logb.filter(None, log::LogLevelFilter::Debug);
} else {
logb.filter(None, log::LogLevelFilter::Warn);
}
if let Err(err) = logb.init() {
errored!("failed to initialize logger: {}", err);
}
if raw.flag_files {
if !raw.arg_pattern.is_empty() {
raw.arg_path.insert(0, raw.arg_pattern.clone());
}
}
raw.to_args().map_err(From::from)
}
pub fn files(&self) -> bool {
self.files
}
pub fn grep(&self) -> Grep {
self.grep.clone()
}
pub fn quiet(&self) -> bool {
self.quiet
}
pub fn printer<W: Terminal + Send>(&self, wtr: W) -> Printer<W> {
let mut p = Printer::new(wtr)
.column(self.column)
.context_separator(self.context_separator.clone())
.eol(self.eol)
.heading(self.heading)
.line_per_match(self.line_per_match)
.null(self.null)
.with_filename(self.with_filename);
if let Some(ref rep) = self.replace {
p = p.replace(rep.clone());
}
p
}
pub fn out(&self) -> Out {
let mut out = Out::new(self.color);
if let Some(filesep) = self.file_separator() {
out = out.file_separator(filesep);
}
out
}
pub fn file_separator(&self) -> Option<Vec<u8>> {
if self.heading && !self.count && !self.files_with_matches {
Some(b"".to_vec())
} else if self.before_context > 0 || self.after_context > 0 {
Some(self.context_separator.clone())
} else {
None
}
}
pub fn never_match(&self) -> bool {
self.max_count == Some(0)
}
#[cfg(not(windows))]
pub fn outbuf(&self) -> ColoredTerminal<term::TerminfoTerminal<Vec<u8>>> {
ColoredTerminal::new(vec![], self.color)
}
#[cfg(windows)]
pub fn outbuf(&self) -> ColoredTerminal<WindowsBuffer> {
ColoredTerminal::new_buffer(self.color)
}
#[cfg(not(windows))]
pub fn stdout(
&self,
) -> ColoredTerminal<term::TerminfoTerminal<io::BufWriter<io::Stdout>>> {
ColoredTerminal::new(io::BufWriter::new(io::stdout()), self.color)
}
#[cfg(windows)]
pub fn stdout(&self) -> ColoredTerminal<WinConsole<io::Stdout>> {
ColoredTerminal::new_stdout(self.color)
}
pub fn paths(&self) -> &[PathBuf] {
&self.paths
}
pub fn is_one_path(&self) -> bool {
self.paths.len() == 1
&& (self.paths[0] == Path::new("-") || self.paths[0].is_file())
}
pub fn worker(&self) -> Worker {
WorkerBuilder::new(self.grep())
.after_context(self.after_context)
.before_context(self.before_context)
.count(self.count)
.files_with_matches(self.files_with_matches)
.eol(self.eol)
.line_number(self.line_number)
.invert_match(self.invert_match)
.max_count(self.max_count)
.mmap(self.mmap)
.quiet(self.quiet)
.text(self.text)
.build()
}
pub fn threads(&self) -> usize {
self.threads
}
pub fn type_defs(&self) -> &[FileTypeDef] {
self.types.definitions()
}
pub fn type_list(&self) -> bool {
self.type_list
}
pub fn no_messages(&self) -> bool {
self.no_messages
}
pub fn walker(&self) -> ignore::Walk {
self.walker_builder().build()
}
pub fn walker_parallel(&self) -> ignore::WalkParallel {
self.walker_builder().build_parallel()
}
fn walker_builder(&self) -> ignore::WalkBuilder {
let paths = self.paths();
let mut wd = ignore::WalkBuilder::new(&paths[0]);
for path in &paths[1..] {
wd.add(path);
}
for path in &self.ignore_files {
if let Some(err) = wd.add_ignore(path) {
if !self.no_messages {
eprintln!("{}", err);
}
}
}
wd.follow_links(self.follow);
wd.hidden(!self.hidden);
wd.max_depth(self.maxdepth);
wd.overrides(self.glob_overrides.clone());
wd.types(self.types.clone());
wd.git_global(!self.no_ignore && !self.no_ignore_vcs);
wd.git_ignore(!self.no_ignore && !self.no_ignore_vcs);
wd.git_exclude(!self.no_ignore && !self.no_ignore_vcs);
wd.ignore(!self.no_ignore);
wd.parents(!self.no_ignore_parent);
wd.threads(self.threads());
wd
}
}
fn version() -> String {
let (maj, min, pat) = (
option_env!("CARGO_PKG_VERSION_MAJOR"),
option_env!("CARGO_PKG_VERSION_MINOR"),
option_env!("CARGO_PKG_VERSION_PATCH"),
);
match (maj, min, pat) {
(Some(maj), Some(min), Some(pat)) =>
format!("{}.{}.{}", maj, min, pat),
_ => "".to_owned(),
}
}
#[derive(Clone, Copy, Eq, PartialEq)]
enum State {
Escape,
HexFirst,
HexSecond(char),
Literal,
}
fn unescape(s: &str) -> Vec<u8> {
use self::State::*;
let mut bytes = vec![];
let mut state = Literal;
for c in s.chars() {
match state {
Escape => {
match c {
'n' => { bytes.push(b'\n'); state = Literal; }
'r' => { bytes.push(b'\r'); state = Literal; }
't' => { bytes.push(b'\t'); state = Literal; }
'x' => { state = HexFirst; }
c => {
bytes.extend(&format!(r"\{}", c).into_bytes());
state = Literal;
}
}
}
HexFirst => {
match c {
'0'...'9' | 'A'...'F' | 'a'...'f' => {
state = HexSecond(c);
}
c => {
bytes.extend(&format!(r"\x{}", c).into_bytes());
state = Literal;
}
}
}
HexSecond(first) => {
match c {
'0'...'9' | 'A'...'F' | 'a'...'f' => {
let ordinal = format!("{}{}", first, c);
let byte = u8::from_str_radix(&ordinal, 16).unwrap();
bytes.push(byte);
state = Literal;
}
c => {
let original = format!(r"\x{}{}", first, c);
bytes.extend(&original.into_bytes());
state = Literal;
}
}
}
Literal => {
match c {
'\\' => { state = Escape; }
c => { bytes.extend(c.to_string().as_bytes()); }
}
}
}
}
match state {
Escape => bytes.push(b'\\'),
HexFirst => bytes.extend(b"\\x"),
HexSecond(c) => bytes.extend(&format!("\\x{}", c).into_bytes()),
Literal => {}
}
bytes
}
#[cfg(test)]
mod tests {
use super::unescape;
fn b(bytes: &'static [u8]) -> Vec<u8> {
bytes.to_vec()
}
#[test]
fn unescape_nul() {
assert_eq!(b(b"\x00"), unescape(r"\x00"));
}
#[test]
fn unescape_nl() {
assert_eq!(b(b"\n"), unescape(r"\n"));
}
#[test]
fn unescape_tab() {
assert_eq!(b(b"\t"), unescape(r"\t"));
}
#[test]
fn unescape_carriage() {
assert_eq!(b(b"\r"), unescape(r"\r"));
}
#[test]
fn unescape_nothing_simple() {
assert_eq!(b(b"\\a"), unescape(r"\a"));
}
#[test]
fn unescape_nothing_hex0() {
assert_eq!(b(b"\\x"), unescape(r"\x"));
}
#[test]
fn unescape_nothing_hex1() {
assert_eq!(b(b"\\xz"), unescape(r"\xz"));
}
#[test]
fn unescape_nothing_hex2() {
assert_eq!(b(b"\\xzz"), unescape(r"\xzz"));
}
}