use std::{
collections::HashSet,
path::{Path, PathBuf},
};
use {
bstr::BString,
grep::printer::{ColorSpecs, SummaryKind},
};
use crate::{
flags::lowargs::{
BinaryMode, BoundaryMode, BufferMode, CaseMode, ColorChoice,
ContextMode, ContextSeparator, EncodingMode, EngineChoice,
FieldContextSeparator, FieldMatchSeparator, LowArgs, MmapMode, Mode,
PatternSource, SearchMode, SortMode, SortModeKind, TypeChange,
},
haystack::{Haystack, HaystackBuilder},
search::{PatternMatcher, Printer, SearchWorker, SearchWorkerBuilder},
};
#[derive(Debug)]
pub(crate) struct HiArgs {
binary: BinaryDetection,
boundary: Option<BoundaryMode>,
buffer: BufferMode,
byte_offset: bool,
case: CaseMode,
color: ColorChoice,
colors: grep::printer::ColorSpecs,
column: bool,
context: ContextMode,
context_separator: ContextSeparator,
crlf: bool,
cwd: PathBuf,
dfa_size_limit: Option<usize>,
encoding: EncodingMode,
engine: EngineChoice,
field_context_separator: FieldContextSeparator,
field_match_separator: FieldMatchSeparator,
file_separator: Option<Vec<u8>>,
fixed_strings: bool,
follow: bool,
globs: ignore::overrides::Override,
heading: bool,
hidden: bool,
hyperlink_config: grep::printer::HyperlinkConfig,
ignore_file_case_insensitive: bool,
ignore_file: Vec<PathBuf>,
include_zero: bool,
invert_match: bool,
is_terminal_stdout: bool,
line_number: bool,
max_columns: Option<u64>,
max_columns_preview: bool,
max_count: Option<u64>,
max_depth: Option<usize>,
max_filesize: Option<u64>,
mmap_choice: grep::searcher::MmapChoice,
mode: Mode,
multiline: bool,
multiline_dotall: bool,
no_ignore_dot: bool,
no_ignore_exclude: bool,
no_ignore_files: bool,
no_ignore_global: bool,
no_ignore_parent: bool,
no_ignore_vcs: bool,
no_require_git: bool,
no_unicode: bool,
null_data: bool,
one_file_system: bool,
only_matching: bool,
path_separator: Option<u8>,
paths: Paths,
path_terminator: Option<u8>,
patterns: Patterns,
pre: Option<PathBuf>,
pre_globs: ignore::overrides::Override,
quiet: bool,
quit_after_match: bool,
regex_size_limit: Option<usize>,
replace: Option<BString>,
search_zip: bool,
sort: Option<SortMode>,
stats: Option<grep::printer::Stats>,
stop_on_nonmatch: bool,
threads: usize,
trim: bool,
types: ignore::types::Types,
vimgrep: bool,
with_filename: bool,
}
impl HiArgs {
pub(crate) fn from_low_args(mut low: LowArgs) -> anyhow::Result<HiArgs> {
assert_eq!(None, low.special, "special mode demands short-circuiting");
if let Some(ref sort) = low.sort {
sort.supported()?;
}
match low.mode {
Mode::Search(ref mut mode) => match *mode {
SearchMode::CountMatches if low.invert_match => {
*mode = SearchMode::Count;
}
SearchMode::Count if low.only_matching => {
*mode = SearchMode::CountMatches;
}
_ => {}
},
_ => {}
}
let mut state = State::new()?;
let patterns = Patterns::from_low_args(&mut state, &mut low)?;
let paths = Paths::from_low_args(&mut state, &patterns, &mut low)?;
let binary = BinaryDetection::from_low_args(&state, &low);
let colors = take_color_specs(&mut state, &mut low);
let hyperlink_config = take_hyperlink_config(&mut state, &mut low)?;
let stats = stats(&low);
let types = types(&low)?;
let globs = globs(&state, &low)?;
let pre_globs = preprocessor_globs(&state, &low)?;
let color = match low.color {
ColorChoice::Auto if !state.is_terminal_stdout => {
ColorChoice::Never
}
_ => low.color,
};
let column = low.column.unwrap_or(low.vimgrep);
let heading = match low.heading {
None => !low.vimgrep && state.is_terminal_stdout,
Some(false) => false,
Some(true) => !low.vimgrep,
};
let path_terminator = if low.null { Some(b'\x00') } else { None };
let quit_after_match = stats.is_none() && low.quiet;
let threads = if low.sort.is_some() || paths.is_one_file {
1
} else if let Some(threads) = low.threads {
threads
} else {
std::thread::available_parallelism().map_or(1, |n| n.get()).min(12)
};
log::debug!("using {threads} thread(s)");
let with_filename = low
.with_filename
.unwrap_or_else(|| low.vimgrep || !paths.is_one_file);
let file_separator = match low.mode {
Mode::Search(SearchMode::Standard) => {
if heading {
Some(b"".to_vec())
} else if let ContextMode::Limited(ref limited) = low.context {
let (before, after) = limited.get();
if before > 0 || after > 0 {
low.context_separator.clone().into_bytes()
} else {
None
}
} else {
None
}
}
_ => None,
};
let line_number = low.line_number.unwrap_or_else(|| {
if low.quiet {
return false;
}
let Mode::Search(ref search_mode) = low.mode else { return false };
match *search_mode {
SearchMode::FilesWithMatches
| SearchMode::FilesWithoutMatch
| SearchMode::Count
| SearchMode::CountMatches => return false,
SearchMode::JSON => return true,
SearchMode::Standard => {
(state.is_terminal_stdout && !paths.is_only_stdin())
|| column
|| low.vimgrep
}
}
});
let mmap_choice = {
let maybe = unsafe { grep::searcher::MmapChoice::auto() };
let never = grep::searcher::MmapChoice::never();
match low.mmap {
MmapMode::Auto => {
if paths.paths.len() <= 10
&& paths.paths.iter().all(|p| p.is_file())
{
maybe
} else {
never
}
}
MmapMode::AlwaysTryMmap => maybe,
MmapMode::Never => never,
}
};
Ok(HiArgs {
mode: low.mode,
patterns,
paths,
binary,
boundary: low.boundary,
buffer: low.buffer,
byte_offset: low.byte_offset,
case: low.case,
color,
colors,
column,
context: low.context,
context_separator: low.context_separator,
crlf: low.crlf,
cwd: state.cwd,
dfa_size_limit: low.dfa_size_limit,
encoding: low.encoding,
engine: low.engine,
field_context_separator: low.field_context_separator,
field_match_separator: low.field_match_separator,
file_separator,
fixed_strings: low.fixed_strings,
follow: low.follow,
heading,
hidden: low.hidden,
hyperlink_config,
ignore_file: low.ignore_file,
ignore_file_case_insensitive: low.ignore_file_case_insensitive,
include_zero: low.include_zero,
invert_match: low.invert_match,
is_terminal_stdout: state.is_terminal_stdout,
line_number,
max_columns: low.max_columns,
max_columns_preview: low.max_columns_preview,
max_count: low.max_count,
max_depth: low.max_depth,
max_filesize: low.max_filesize,
mmap_choice,
multiline: low.multiline,
multiline_dotall: low.multiline_dotall,
no_ignore_dot: low.no_ignore_dot,
no_ignore_exclude: low.no_ignore_exclude,
no_ignore_files: low.no_ignore_files,
no_ignore_global: low.no_ignore_global,
no_ignore_parent: low.no_ignore_parent,
no_ignore_vcs: low.no_ignore_vcs,
no_require_git: low.no_require_git,
no_unicode: low.no_unicode,
null_data: low.null_data,
one_file_system: low.one_file_system,
only_matching: low.only_matching,
globs,
path_separator: low.path_separator,
path_terminator,
pre: low.pre,
pre_globs,
quiet: low.quiet,
quit_after_match,
regex_size_limit: low.regex_size_limit,
replace: low.replace,
search_zip: low.search_zip,
sort: low.sort,
stats,
stop_on_nonmatch: low.stop_on_nonmatch,
threads,
trim: low.trim,
types,
vimgrep: low.vimgrep,
with_filename,
})
}
pub(crate) fn buffer_writer(&self) -> termcolor::BufferWriter {
let mut wtr =
termcolor::BufferWriter::stdout(self.color.to_termcolor());
wtr.separator(self.file_separator.clone());
wtr
}
pub(crate) fn has_implicit_path(&self) -> bool {
self.paths.has_implicit_path
}
pub(crate) fn haystack_builder(&self) -> HaystackBuilder {
let mut builder = HaystackBuilder::new();
builder.strip_dot_prefix(self.paths.has_implicit_path);
builder
}
pub(crate) fn matcher(&self) -> anyhow::Result<PatternMatcher> {
match self.engine {
EngineChoice::Default => match self.matcher_rust() {
Ok(m) => Ok(m),
Err(err) => {
anyhow::bail!(suggest_other_engine(err.to_string()));
}
},
EngineChoice::PCRE2 => Ok(self.matcher_pcre2()?),
EngineChoice::Auto => {
let rust_err = match self.matcher_rust() {
Ok(m) => return Ok(m),
Err(err) => err,
};
log::debug!(
"error building Rust regex in hybrid mode:\n{rust_err}",
);
let pcre_err = match self.matcher_pcre2() {
Ok(m) => return Ok(m),
Err(err) => err,
};
let divider = "~".repeat(79);
anyhow::bail!(
"regex could not be compiled with either the default \
regex engine or with PCRE2.\n\n\
default regex engine error:\n\
{divider}\n\
{rust_err}\n\
{divider}\n\n\
PCRE2 regex engine error:\n{pcre_err}",
);
}
}
}
fn matcher_pcre2(&self) -> anyhow::Result<PatternMatcher> {
#[cfg(feature = "pcre2")]
{
let mut builder = grep::pcre2::RegexMatcherBuilder::new();
builder.multi_line(true).fixed_strings(self.fixed_strings);
match self.case {
CaseMode::Sensitive => builder.caseless(false),
CaseMode::Insensitive => builder.caseless(true),
CaseMode::Smart => builder.case_smart(true),
};
if let Some(ref boundary) = self.boundary {
match *boundary {
BoundaryMode::Line => builder.whole_line(true),
BoundaryMode::Word => builder.word(true),
};
}
if cfg!(target_pointer_width = "64") {
builder
.jit_if_available(true)
.max_jit_stack_size(Some(10 * (1 << 20)));
}
if !self.no_unicode {
builder.utf(true).ucp(true);
}
if self.multiline {
builder.dotall(self.multiline_dotall);
}
if self.crlf {
builder.crlf(true);
}
let m = builder.build_many(&self.patterns.patterns)?;
Ok(PatternMatcher::PCRE2(m))
}
#[cfg(not(feature = "pcre2"))]
{
Err(anyhow::anyhow!(
"PCRE2 is not available in this build of ripgrep"
))
}
}
fn matcher_rust(&self) -> anyhow::Result<PatternMatcher> {
let mut builder = grep::regex::RegexMatcherBuilder::new();
builder
.multi_line(true)
.unicode(!self.no_unicode)
.octal(false)
.fixed_strings(self.fixed_strings);
match self.case {
CaseMode::Sensitive => builder.case_insensitive(false),
CaseMode::Insensitive => builder.case_insensitive(true),
CaseMode::Smart => builder.case_smart(true),
};
if let Some(ref boundary) = self.boundary {
match *boundary {
BoundaryMode::Line => builder.whole_line(true),
BoundaryMode::Word => builder.word(true),
};
}
if self.multiline {
builder.dot_matches_new_line(self.multiline_dotall);
if self.crlf {
builder.crlf(true).line_terminator(None);
}
} else {
builder.line_terminator(Some(b'\n')).dot_matches_new_line(false);
if self.crlf {
builder.crlf(true);
}
if self.null_data {
builder.line_terminator(Some(b'\x00'));
}
}
if let Some(limit) = self.regex_size_limit {
builder.size_limit(limit);
}
if let Some(limit) = self.dfa_size_limit {
builder.dfa_size_limit(limit);
}
if !self.binary.is_none() {
builder.ban_byte(Some(b'\x00'));
}
let m = match builder.build_many(&self.patterns.patterns) {
Ok(m) => m,
Err(err) => {
anyhow::bail!(suggest_text(suggest_multiline(err.to_string())))
}
};
Ok(PatternMatcher::RustRegex(m))
}
pub(crate) fn matches_possible(&self) -> bool {
if self.patterns.patterns.is_empty() && !self.invert_match {
return false;
}
if self.max_count == Some(0) {
return false;
}
true
}
pub(crate) fn mode(&self) -> Mode {
self.mode
}
pub(crate) fn path_printer_builder(
&self,
) -> grep::printer::PathPrinterBuilder {
let mut builder = grep::printer::PathPrinterBuilder::new();
builder
.color_specs(self.colors.clone())
.hyperlink(self.hyperlink_config.clone())
.separator(self.path_separator.clone())
.terminator(self.path_terminator.unwrap_or(b'\n'));
builder
}
pub(crate) fn printer<W: termcolor::WriteColor>(
&self,
search_mode: SearchMode,
wtr: W,
) -> Printer<W> {
let summary_kind = if self.quiet {
match search_mode {
SearchMode::FilesWithMatches
| SearchMode::Count
| SearchMode::CountMatches
| SearchMode::JSON
| SearchMode::Standard => SummaryKind::QuietWithMatch,
SearchMode::FilesWithoutMatch => {
SummaryKind::QuietWithoutMatch
}
}
} else {
match search_mode {
SearchMode::FilesWithMatches => SummaryKind::PathWithMatch,
SearchMode::FilesWithoutMatch => SummaryKind::PathWithoutMatch,
SearchMode::Count => SummaryKind::Count,
SearchMode::CountMatches => SummaryKind::CountMatches,
SearchMode::JSON => {
return Printer::JSON(self.printer_json(wtr));
}
SearchMode::Standard => {
return Printer::Standard(self.printer_standard(wtr));
}
}
};
Printer::Summary(self.printer_summary(wtr, summary_kind))
}
fn printer_json<W: std::io::Write>(
&self,
wtr: W,
) -> grep::printer::JSON<W> {
grep::printer::JSONBuilder::new()
.pretty(false)
.always_begin_end(false)
.replacement(self.replace.clone().map(|r| r.into()))
.build(wtr)
}
fn printer_standard<W: termcolor::WriteColor>(
&self,
wtr: W,
) -> grep::printer::Standard<W> {
let mut builder = grep::printer::StandardBuilder::new();
builder
.byte_offset(self.byte_offset)
.color_specs(self.colors.clone())
.column(self.column)
.heading(self.heading)
.hyperlink(self.hyperlink_config.clone())
.max_columns_preview(self.max_columns_preview)
.max_columns(self.max_columns)
.only_matching(self.only_matching)
.path(self.with_filename)
.path_terminator(self.path_terminator.clone())
.per_match_one_line(true)
.per_match(self.vimgrep)
.replacement(self.replace.clone().map(|r| r.into()))
.separator_context(self.context_separator.clone().into_bytes())
.separator_field_context(
self.field_context_separator.clone().into_bytes(),
)
.separator_field_match(
self.field_match_separator.clone().into_bytes(),
)
.separator_path(self.path_separator.clone())
.stats(self.stats.is_some())
.trim_ascii(self.trim);
if self.threads == 1 {
builder.separator_search(self.file_separator.clone());
}
builder.build(wtr)
}
fn printer_summary<W: termcolor::WriteColor>(
&self,
wtr: W,
kind: SummaryKind,
) -> grep::printer::Summary<W> {
grep::printer::SummaryBuilder::new()
.color_specs(self.colors.clone())
.exclude_zero(!self.include_zero)
.hyperlink(self.hyperlink_config.clone())
.kind(kind)
.path(self.with_filename)
.path_terminator(self.path_terminator.clone())
.separator_field(b":".to_vec())
.separator_path(self.path_separator.clone())
.stats(self.stats.is_some())
.build(wtr)
}
pub(crate) fn quiet(&self) -> bool {
self.quiet
}
pub(crate) fn quit_after_match(&self) -> bool {
self.quit_after_match
}
pub(crate) fn search_worker<W: termcolor::WriteColor>(
&self,
matcher: PatternMatcher,
searcher: grep::searcher::Searcher,
printer: Printer<W>,
) -> anyhow::Result<SearchWorker<W>> {
let mut builder = SearchWorkerBuilder::new();
builder
.preprocessor(self.pre.clone())?
.preprocessor_globs(self.pre_globs.clone())
.search_zip(self.search_zip)
.binary_detection_explicit(self.binary.explicit.clone())
.binary_detection_implicit(self.binary.implicit.clone());
Ok(builder.build(matcher, searcher, printer))
}
pub(crate) fn searcher(&self) -> anyhow::Result<grep::searcher::Searcher> {
let line_term = if self.crlf {
grep::matcher::LineTerminator::crlf()
} else if self.null_data {
grep::matcher::LineTerminator::byte(b'\x00')
} else {
grep::matcher::LineTerminator::byte(b'\n')
};
let mut builder = grep::searcher::SearcherBuilder::new();
builder
.max_matches(self.max_count)
.line_terminator(line_term)
.invert_match(self.invert_match)
.line_number(self.line_number)
.multi_line(self.multiline)
.memory_map(self.mmap_choice.clone())
.stop_on_nonmatch(self.stop_on_nonmatch);
match self.context {
ContextMode::Passthru => {
builder.passthru(true);
}
ContextMode::Limited(ref limited) => {
let (before, after) = limited.get();
builder.before_context(before);
builder.after_context(after);
}
}
match self.encoding {
EncodingMode::Auto => {} EncodingMode::Some(ref enc) => {
builder.encoding(Some(enc.clone()));
}
EncodingMode::Disabled => {
builder.bom_sniffing(false);
}
}
Ok(builder.build())
}
pub(crate) fn sort<'a, I>(
&self,
haystacks: I,
) -> Box<dyn Iterator<Item = Haystack> + 'a>
where
I: Iterator<Item = Haystack> + 'a,
{
use std::{cmp::Ordering, fs::Metadata, io, time::SystemTime};
fn attach_timestamps(
haystacks: impl Iterator<Item = Haystack>,
get: impl Fn(&Metadata) -> io::Result<SystemTime>,
) -> impl Iterator<Item = (Haystack, Option<SystemTime>)> {
haystacks.map(move |s| {
let time = s.path().metadata().and_then(|m| get(&m)).ok();
(s, time)
})
}
let Some(ref sort) = self.sort else { return Box::new(haystacks) };
let mut with_timestamps: Vec<_> = match sort.kind {
SortModeKind::Path if !sort.reverse => return Box::new(haystacks),
SortModeKind::Path => {
let mut haystacks = haystacks.collect::<Vec<Haystack>>();
haystacks.sort_by(|ref h1, ref h2| {
h1.path().cmp(h2.path()).reverse()
});
return Box::new(haystacks.into_iter());
}
SortModeKind::LastModified => {
attach_timestamps(haystacks, |md| md.modified()).collect()
}
SortModeKind::LastAccessed => {
attach_timestamps(haystacks, |md| md.accessed()).collect()
}
SortModeKind::Created => {
attach_timestamps(haystacks, |md| md.created()).collect()
}
};
with_timestamps.sort_by(|(_, t1), (_, t2)| {
let ordering = match (*t1, *t2) {
(Some(t1), Some(t2)) => t1.cmp(&t2),
(Some(_), None) => Ordering::Less,
(None, Some(_)) => Ordering::Greater,
(None, None) => Ordering::Equal,
};
if sort.reverse { ordering.reverse() } else { ordering }
});
Box::new(with_timestamps.into_iter().map(|(s, _)| s))
}
pub(crate) fn stats(&self) -> Option<grep::printer::Stats> {
self.stats.clone()
}
pub(crate) fn stdout(&self) -> grep::cli::StandardStream {
let color = self.color.to_termcolor();
match self.buffer {
BufferMode::Auto => {
if self.is_terminal_stdout {
grep::cli::stdout_buffered_line(color)
} else {
grep::cli::stdout_buffered_block(color)
}
}
BufferMode::Line => grep::cli::stdout_buffered_line(color),
BufferMode::Block => grep::cli::stdout_buffered_block(color),
}
}
pub(crate) fn threads(&self) -> usize {
self.threads
}
pub(crate) fn types(&self) -> &ignore::types::Types {
&self.types
}
pub(crate) fn walk_builder(&self) -> anyhow::Result<ignore::WalkBuilder> {
let mut builder = ignore::WalkBuilder::new(&self.paths.paths[0]);
for path in self.paths.paths.iter().skip(1) {
builder.add(path);
}
if !self.no_ignore_files {
for path in self.ignore_file.iter() {
if let Some(err) = builder.add_ignore(path) {
ignore_message!("{err}");
}
}
}
builder
.max_depth(self.max_depth)
.follow_links(self.follow)
.max_filesize(self.max_filesize)
.threads(self.threads)
.same_file_system(self.one_file_system)
.skip_stdout(matches!(self.mode, Mode::Search(_)))
.overrides(self.globs.clone())
.types(self.types.clone())
.hidden(!self.hidden)
.parents(!self.no_ignore_parent)
.ignore(!self.no_ignore_dot)
.git_global(!self.no_ignore_vcs && !self.no_ignore_global)
.git_ignore(!self.no_ignore_vcs)
.git_exclude(!self.no_ignore_vcs && !self.no_ignore_exclude)
.require_git(!self.no_require_git)
.ignore_case_insensitive(self.ignore_file_case_insensitive)
.current_dir(&self.cwd);
if !self.no_ignore_dot {
builder.add_custom_ignore_filename(".rgignore");
}
if let Some(ref sort) = self.sort {
assert_eq!(1, self.threads, "sorting implies single threaded");
if !sort.reverse && matches!(sort.kind, SortModeKind::Path) {
builder.sort_by_file_name(|a, b| a.cmp(b));
}
}
Ok(builder)
}
}
#[derive(Debug)]
struct State {
is_terminal_stdout: bool,
stdin_consumed: bool,
cwd: PathBuf,
}
impl State {
fn new() -> anyhow::Result<State> {
use std::io::IsTerminal;
let cwd = current_dir()?;
log::debug!("read CWD from environment: {}", cwd.display());
Ok(State {
is_terminal_stdout: std::io::stdout().is_terminal(),
stdin_consumed: false,
cwd,
})
}
}
#[derive(Debug)]
struct Patterns {
patterns: Vec<String>,
}
impl Patterns {
fn from_low_args(
state: &mut State,
low: &mut LowArgs,
) -> anyhow::Result<Patterns> {
if !matches!(low.mode, Mode::Search(_)) {
return Ok(Patterns { patterns: vec![] });
}
if low.patterns.is_empty() {
anyhow::ensure!(
!low.positional.is_empty(),
"ripgrep requires at least one pattern to execute a search"
);
let ospat = low.positional.remove(0);
let Ok(pat) = ospat.into_string() else {
anyhow::bail!("pattern given is not valid UTF-8")
};
return Ok(Patterns { patterns: vec![pat] });
}
let mut seen = HashSet::new();
let mut patterns = Vec::with_capacity(low.patterns.len());
let mut add = |pat: String| {
if !seen.contains(&pat) {
seen.insert(pat.clone());
patterns.push(pat);
}
};
for source in low.patterns.drain(..) {
match source {
PatternSource::Regexp(pat) => add(pat),
PatternSource::File(path) => {
if path == Path::new("-") {
anyhow::ensure!(
!state.stdin_consumed,
"error reading -f/--file from stdin: stdin \
has already been consumed"
);
for pat in grep::cli::patterns_from_stdin()? {
add(pat);
}
state.stdin_consumed = true;
} else {
for pat in grep::cli::patterns_from_path(&path)? {
add(pat);
}
}
}
}
}
Ok(Patterns { patterns })
}
}
#[derive(Debug)]
struct Paths {
paths: Vec<PathBuf>,
has_implicit_path: bool,
is_one_file: bool,
}
impl Paths {
fn from_low_args(
state: &mut State,
_: &Patterns,
low: &mut LowArgs,
) -> anyhow::Result<Paths> {
let mut paths = Vec::with_capacity(low.positional.len());
for osarg in low.positional.drain(..) {
let path = PathBuf::from(osarg);
if state.stdin_consumed && path == Path::new("-") {
anyhow::bail!(
"error: attempted to read patterns from stdin \
while also searching stdin",
);
}
paths.push(path);
}
log::debug!("number of paths given to search: {}", paths.len());
if !paths.is_empty() {
let is_one_file = paths.len() == 1
&& (paths[0] == Path::new("-") || !paths[0].is_dir());
log::debug!("is_one_file? {is_one_file:?}");
return Ok(Paths { paths, has_implicit_path: false, is_one_file });
}
let is_readable_stdin = grep::cli::is_readable_stdin();
let use_cwd = !is_readable_stdin
|| state.stdin_consumed
|| !matches!(low.mode, Mode::Search(_));
log::debug!(
"using heuristics to determine whether to read from \
stdin or search ./ (\
is_readable_stdin={is_readable_stdin}, \
stdin_consumed={stdin_consumed}, \
mode={mode:?})",
stdin_consumed = state.stdin_consumed,
mode = low.mode,
);
let (path, is_one_file) = if use_cwd {
log::debug!("heuristic chose to search ./");
(PathBuf::from("./"), false)
} else {
log::debug!("heuristic chose to search stdin");
(PathBuf::from("-"), true)
};
Ok(Paths { paths: vec![path], has_implicit_path: true, is_one_file })
}
fn is_only_stdin(&self) -> bool {
self.paths.len() == 1 && self.paths[0] == Path::new("-")
}
}
#[derive(Debug)]
struct BinaryDetection {
explicit: grep::searcher::BinaryDetection,
implicit: grep::searcher::BinaryDetection,
}
impl BinaryDetection {
fn from_low_args(_: &State, low: &LowArgs) -> BinaryDetection {
let none = matches!(low.binary, BinaryMode::AsText) || low.null_data;
let convert = matches!(low.binary, BinaryMode::SearchAndSuppress);
let explicit = if none {
grep::searcher::BinaryDetection::none()
} else {
grep::searcher::BinaryDetection::convert(b'\x00')
};
let implicit = if none {
grep::searcher::BinaryDetection::none()
} else if convert {
grep::searcher::BinaryDetection::convert(b'\x00')
} else {
grep::searcher::BinaryDetection::quit(b'\x00')
};
BinaryDetection { explicit, implicit }
}
pub(crate) fn is_none(&self) -> bool {
let none = grep::searcher::BinaryDetection::none();
self.explicit == none && self.implicit == none
}
}
fn types(low: &LowArgs) -> anyhow::Result<ignore::types::Types> {
let mut builder = ignore::types::TypesBuilder::new();
builder.add_defaults();
for tychange in low.type_changes.iter() {
match *tychange {
TypeChange::Clear { ref name } => {
builder.clear(name);
}
TypeChange::Add { ref def } => {
builder.add_def(def)?;
}
TypeChange::Select { ref name } => {
builder.select(name);
}
TypeChange::Negate { ref name } => {
builder.negate(name);
}
}
}
Ok(builder.build()?)
}
fn globs(
state: &State,
low: &LowArgs,
) -> anyhow::Result<ignore::overrides::Override> {
if low.globs.is_empty() && low.iglobs.is_empty() {
return Ok(ignore::overrides::Override::empty());
}
let mut builder = ignore::overrides::OverrideBuilder::new(&state.cwd);
if low.glob_case_insensitive {
builder.case_insensitive(true).unwrap();
}
for glob in low.globs.iter() {
builder.add(glob)?;
}
builder.case_insensitive(true).unwrap();
for glob in low.iglobs.iter() {
builder.add(&glob)?;
}
Ok(builder.build()?)
}
fn preprocessor_globs(
state: &State,
low: &LowArgs,
) -> anyhow::Result<ignore::overrides::Override> {
if low.pre_glob.is_empty() {
return Ok(ignore::overrides::Override::empty());
}
let mut builder = ignore::overrides::OverrideBuilder::new(&state.cwd);
for glob in low.pre_glob.iter() {
builder.add(glob)?;
}
Ok(builder.build()?)
}
fn stats(low: &LowArgs) -> Option<grep::printer::Stats> {
if !matches!(low.mode, Mode::Search(_)) {
return None;
}
if low.stats || matches!(low.mode, Mode::Search(SearchMode::JSON)) {
return Some(grep::printer::Stats::new());
}
None
}
fn take_color_specs(_: &mut State, low: &mut LowArgs) -> ColorSpecs {
let mut specs = grep::printer::default_color_specs();
for spec in low.colors.drain(..) {
specs.push(spec);
}
ColorSpecs::new(&specs)
}
fn take_hyperlink_config(
_: &mut State,
low: &mut LowArgs,
) -> anyhow::Result<grep::printer::HyperlinkConfig> {
let mut env = grep::printer::HyperlinkEnvironment::new();
if let Some(hostname) = hostname(low.hostname_bin.as_deref()) {
log::debug!("found hostname for hyperlink configuration: {hostname}");
env.host(Some(hostname));
}
if let Some(wsl_prefix) = wsl_prefix() {
log::debug!(
"found wsl_prefix for hyperlink configuration: {wsl_prefix}"
);
env.wsl_prefix(Some(wsl_prefix));
}
let fmt = std::mem::take(&mut low.hyperlink_format);
log::debug!("hyperlink format: {:?}", fmt.to_string());
Ok(grep::printer::HyperlinkConfig::new(env, fmt))
}
fn current_dir() -> anyhow::Result<PathBuf> {
let err = match std::env::current_dir() {
Err(err) => err,
Ok(cwd) => return Ok(cwd),
};
if let Some(cwd) = std::env::var_os("PWD") {
if !cwd.is_empty() {
return Ok(PathBuf::from(cwd));
}
}
anyhow::bail!(
"failed to get current working directory: {err}\n\
did your CWD get deleted?",
)
}
fn hostname(bin: Option<&Path>) -> Option<String> {
let Some(bin) = bin else { return platform_hostname() };
let bin = match grep::cli::resolve_binary(bin) {
Ok(bin) => bin,
Err(err) => {
log::debug!(
"failed to run command '{bin:?}' to get hostname \
(falling back to platform hostname): {err}",
);
return platform_hostname();
}
};
let mut cmd = std::process::Command::new(&bin);
cmd.stdin(std::process::Stdio::null());
let rdr = match grep::cli::CommandReader::new(&mut cmd) {
Ok(rdr) => rdr,
Err(err) => {
log::debug!(
"failed to spawn command '{bin:?}' to get \
hostname (falling back to platform hostname): {err}",
);
return platform_hostname();
}
};
let out = match std::io::read_to_string(rdr) {
Ok(out) => out,
Err(err) => {
log::debug!(
"failed to read output from command '{bin:?}' to get \
hostname (falling back to platform hostname): {err}",
);
return platform_hostname();
}
};
let hostname = out.trim();
if hostname.is_empty() {
log::debug!(
"output from command '{bin:?}' is empty after trimming \
leading and trailing whitespace (falling back to \
platform hostname)",
);
return platform_hostname();
}
Some(hostname.to_string())
}
fn platform_hostname() -> Option<String> {
let hostname_os = match grep::cli::hostname() {
Ok(x) => x,
Err(err) => {
log::debug!("could not get hostname: {}", err);
return None;
}
};
let Some(hostname) = hostname_os.to_str() else {
log::debug!(
"got hostname {:?}, but it's not valid UTF-8",
hostname_os
);
return None;
};
Some(hostname.to_string())
}
fn wsl_prefix() -> Option<String> {
if !cfg!(unix) {
return None;
}
let distro_os = std::env::var_os("WSL_DISTRO_NAME")?;
let Some(distro) = distro_os.to_str() else {
log::debug!(
"found WSL_DISTRO_NAME={:?}, but value is not UTF-8",
distro_os
);
return None;
};
Some(format!("wsl$/{distro}"))
}
fn suggest_other_engine(msg: String) -> String {
if let Some(pcre_msg) = suggest_pcre2(&msg) {
return pcre_msg;
}
msg
}
fn suggest_pcre2(msg: &str) -> Option<String> {
if !cfg!(feature = "pcre2") {
return None;
}
if !msg.contains("backreferences") && !msg.contains("look-around") {
None
} else {
Some(format!(
"{msg}
Consider enabling PCRE2 with the --pcre2 flag, which can handle backreferences
and look-around.",
))
}
}
fn suggest_multiline(msg: String) -> String {
if msg.contains("the literal") && msg.contains("not allowed") {
format!(
"{msg}
Consider enabling multiline mode with the --multiline flag (or -U for short).
When multiline mode is enabled, new line characters can be matched.",
)
} else {
msg
}
}
fn suggest_text(msg: String) -> String {
if msg.contains("pattern contains \"\\0\"") {
format!(
"{msg}
Consider enabling text mode with the --text flag (or -a for short). Otherwise,
binary detection is enabled and matching a NUL byte is impossible.",
)
} else {
msg
}
}