use std::fs;
use std::io;
use std::io::Read;
use std::io::Write;
use std::ops::Range;
use std::path::Path;
use std::path::PathBuf;
use memchr::memchr;
use memchr::memchr_iter;
use memmap2::Mmap;
use crate::output::OutputConfig;
use crate::output::TAB_FIELD_WIDTH_STDIN;
use crate::output::write_context_line;
use crate::output::write_group_separator;
use crate::output::write_json_path;
use crate::output::write_json_summary;
use crate::output::write_line_match;
use crate::output::write_only_matching;
use crate::pattern::CompiledPattern;
#[inline]
fn num_digits(n: usize) -> usize {
if n == 0 {
return 1;
}
let mut digits = 0;
let mut v = n;
while v > 0 {
digits += 1;
v /= 10;
}
digits
}
const MMAP_THRESHOLD: u64 = 256 * 1024;
enum FileDataRef<'a> {
Mmap(Mmap),
Borrowed(&'a [u8]),
}
impl<'a> AsRef<[u8]> for FileDataRef<'a> {
#[inline]
fn as_ref(&self) -> &[u8] {
match self {
FileDataRef::Mmap(m) => m.as_ref(),
FileDataRef::Borrowed(b) => b,
}
}
}
impl<'a> std::ops::Deref for FileDataRef<'a> {
type Target = [u8];
#[inline]
fn deref(&self) -> &[u8] {
self.as_ref()
}
}
fn read_file_reuse<'a>(path: &Path, buf: &'a mut Vec<u8>) -> io::Result<FileDataRef<'a>> {
let file = fs::File::open(path)?;
let size = file.metadata()?.len();
if size > MMAP_THRESHOLD {
let mmap = unsafe { Mmap::map(&file)? };
#[cfg(unix)]
mmap.advise(memmap2::Advice::Sequential)?;
Ok(FileDataRef::Mmap(mmap))
} else {
buf.clear();
let mut file = file;
file.read_to_end(buf)?;
Ok(FileDataRef::Borrowed(buf.as_slice()))
}
}
const PARALLEL_THRESHOLD: usize = 4 * 1024 * 1024;
pub struct FileResult {
pub path: PathBuf,
pub matches: Vec<LineMatch>,
pub is_binary: bool,
}
pub struct LineMatch {
pub line_no: u32,
pub line: Vec<u8>,
pub match_ranges: Vec<Range<usize>>,
pub byte_offset: u64,
pub line_len: u32,
}
enum FileData {
Mmap(Mmap),
Read(Vec<u8>),
}
impl AsRef<[u8]> for FileData {
#[inline]
fn as_ref(&self) -> &[u8] {
match self {
FileData::Mmap(m) => m.as_ref(),
FileData::Read(v) => v.as_ref(),
}
}
}
impl std::ops::Deref for FileData {
type Target = [u8];
#[inline]
fn deref(&self) -> &[u8] {
self.as_ref()
}
}
fn read_file(path: &Path) -> io::Result<FileData> {
let file = fs::File::open(path)?;
let size = file.metadata()?.len();
if size > MMAP_THRESHOLD {
let mmap = unsafe { Mmap::map(&file)? };
#[cfg(unix)]
mmap.advise(memmap2::Advice::Sequential)?;
Ok(FileData::Mmap(mmap))
} else {
Ok(FileData::Read(fs::read(path)?))
}
}
fn is_binary(data: &[u8]) -> bool {
memchr(0, data).is_some()
}
#[inline]
fn strip_line_terminator(data: &[u8]) -> &[u8] {
data.strip_suffix(b"\n").unwrap_or(data)
}
struct LineCursor<'a> {
data: &'a [u8],
line_start: usize,
line_end: usize,
line_no: u32,
}
impl<'a> LineCursor<'a> {
#[inline]
fn new(data: &'a [u8]) -> Self {
let line_end = match memchr(b'\n', data) {
Some(pos) => pos,
None => data.len(),
};
Self { data, line_start: 0, line_end, line_no: 1 }
}
#[inline]
fn advance_to(&mut self, pos: usize) {
while pos > self.line_end && self.line_end < self.data.len() {
self.line_start = self.line_end + 1;
self.line_no += 1;
self.line_end = match memchr(b'\n', &self.data[self.line_start..]) {
Some(rel) => self.line_start + rel,
None => self.data.len(),
};
}
}
#[inline]
fn line(&self) -> &'a [u8] {
&self.data[self.line_start..self.line_end]
}
}
fn has_non_matching_line(data: &[u8], pattern: &CompiledPattern) -> bool {
let data = strip_line_terminator(data);
let mut start = 0;
loop {
let end = match memchr(b'\n', &data[start..]) {
Some(pos) => start + pos,
None => data.len(),
};
if !pattern.is_match(&data[start..end]) {
return true;
}
if end == data.len() {
break;
}
start = end + 1;
}
false
}
pub fn search_file(
path: &Path,
pattern: &CompiledPattern,
invert_match: bool,
need_ranges: bool,
count_only: bool,
) -> io::Result<FileResult> {
let bytes = read_file(path)?;
let bytes: &[u8] = &bytes;
if is_binary(bytes) {
if count_only {
let count = count_matches(bytes, pattern, invert_match);
let matches = (0..count)
.map(|_| LineMatch {
line_no: 0,
line: Vec::new(),
match_ranges: Vec::new(),
byte_offset: 0,
line_len: 0,
})
.collect();
return Ok(FileResult { path: path.to_owned(), matches, is_binary: true });
}
let has_match = if invert_match { true } else { pattern.is_match(bytes) };
return Ok(FileResult {
path: path.to_owned(),
matches: if has_match {
vec![LineMatch {
line_no: 0,
line: Vec::new(),
match_ranges: Vec::new(),
byte_offset: 0,
line_len: 0,
}]
} else {
Vec::new()
},
is_binary: true,
});
}
if count_only {
let count = count_matches(bytes, pattern, invert_match);
let matches = (0..count)
.map(|_| LineMatch {
line_no: 0,
line: Vec::new(),
match_ranges: Vec::new(),
byte_offset: 0,
line_len: 0,
})
.collect();
return Ok(FileResult { path: path.to_owned(), matches, is_binary: false });
}
let matches = search_bytes(bytes, pattern, invert_match, need_ranges);
Ok(FileResult { path: path.to_owned(), matches, is_binary: false })
}
pub fn search_reader(
reader: &mut dyn Read,
pattern: &CompiledPattern,
invert_match: bool,
need_ranges: bool,
count_only: bool,
) -> io::Result<FileResult> {
let mut buf = Vec::new();
reader.read_to_end(&mut buf)?;
if count_only {
let count = count_matches(&buf, pattern, invert_match);
let matches = (0..count)
.map(|_| LineMatch {
line_no: 0,
line: Vec::new(),
match_ranges: Vec::new(),
byte_offset: 0,
line_len: 0,
})
.collect();
return Ok(FileResult { path: PathBuf::new(), matches, is_binary: false });
}
let matches = search_bytes(&buf, pattern, invert_match, need_ranges);
Ok(FileResult { path: PathBuf::new(), matches, is_binary: false })
}
pub fn search_reader_streaming(
reader: &mut dyn Read,
pattern: &CompiledPattern,
invert_match: bool,
output_config: &OutputConfig,
writer: &mut impl Write,
) -> io::Result<usize> {
let mut buf = Vec::new();
reader.read_to_end(&mut buf)?;
let number_width = TAB_FIELD_WIDTH_STDIN;
let has_context = output_config.before_context > 0 || output_config.after_context > 0;
let need_ranges = output_config.requires_match_ranges();
if has_context {
return stream_with_context(
&buf,
pattern,
invert_match,
output_config,
None,
writer,
number_width,
);
}
if output_config.only_matching {
return stream_line_by_line(
&buf,
pattern,
invert_match,
need_ranges,
output_config,
None,
writer,
number_width,
);
}
stream_line_by_line(
&buf,
pattern,
invert_match,
need_ranges,
output_config,
None,
writer,
number_width,
)
}
pub fn search_reader_streaming_labeled(
reader: &mut dyn Read,
pattern: &CompiledPattern,
invert_match: bool,
output_config: &OutputConfig,
writer: &mut impl Write,
label: Option<&[u8]>,
) -> io::Result<usize> {
let mut buf = Vec::new();
reader.read_to_end(&mut buf)?;
let number_width = TAB_FIELD_WIDTH_STDIN;
let has_context = output_config.before_context > 0 || output_config.after_context > 0;
let need_ranges = output_config.requires_match_ranges();
if has_context {
return stream_with_context(
&buf,
pattern,
invert_match,
output_config,
label,
writer,
number_width,
);
}
stream_line_by_line(
&buf,
pattern,
invert_match,
need_ranges,
output_config,
label,
writer,
number_width,
)
}
fn count_matches(data: &[u8], pattern: &CompiledPattern, invert_match: bool) -> usize {
let data = strip_line_terminator(data);
if !invert_match && let Some(finder) = pattern.literal_finder() {
let mut count = 0;
let mut last_line_start: Option<usize> = None;
let mut lines = LineCursor::new(data);
for match_pos in finder.find_iter(data) {
lines.advance_to(match_pos);
if last_line_start != Some(lines.line_start) {
count += 1;
last_line_start = Some(lines.line_start);
}
}
return count;
}
if !invert_match && let Some(pfx) = pattern.prefix_finder() {
let mut count = 0;
let mut last_line_start: Option<usize> = None;
let mut lines = LineCursor::new(data);
for match_pos in pfx.find_iter(data) {
lines.advance_to(match_pos);
if last_line_start == Some(lines.line_start) {
continue;
}
last_line_start = Some(lines.line_start);
if pattern.regex.is_match(lines.line()) {
count += 1;
}
}
return count;
}
let mut count = 0;
let mut start = 0;
loop {
let end = match memchr(b'\n', &data[start..]) {
Some(pos) => start + pos,
None => data.len(),
};
let line_bytes = &data[start..end];
let is_match = pattern.is_match(line_bytes);
if invert_match { !is_match } else { is_match }.then(|| count += 1);
if end == data.len() {
break;
}
start = end + 1;
}
count
}
fn search_bytes(
data: &[u8],
pattern: &CompiledPattern,
invert_match: bool,
need_ranges: bool,
) -> Vec<LineMatch> {
let data = strip_line_terminator(data);
if !invert_match && let Some(finder) = pattern.literal_finder() {
return search_literal_whole_buffer(data, finder, need_ranges);
}
if !invert_match && let Some(pfx) = pattern.prefix_finder() {
return search_prefix_accelerated(data, pfx, pattern, need_ranges);
}
search_bytes_line_by_line(data, pattern, invert_match, need_ranges)
}
fn search_prefix_accelerated(
data: &[u8],
pfx: &memchr::memmem::Finder<'_>,
pattern: &CompiledPattern,
need_ranges: bool,
) -> Vec<LineMatch> {
let mut matches: Vec<LineMatch> = Vec::new();
let mut last_line_start: Option<usize> = None;
let mut lines = LineCursor::new(data);
for match_pos in pfx.find_iter(data) {
lines.advance_to(match_pos);
if last_line_start == Some(lines.line_start) {
continue; }
last_line_start = Some(lines.line_start);
let line_bytes = lines.line();
if !pattern.regex.is_match(line_bytes) {
continue; }
let match_ranges = if need_ranges {
pattern.regex.find_iter(line_bytes).map(|m| m.start()..m.end()).collect()
} else {
Vec::new()
};
matches.push(LineMatch {
line_no: lines.line_no,
line: line_bytes.to_vec(),
match_ranges,
byte_offset: lines.line_start as u64,
line_len: line_bytes.len() as u32,
});
}
matches
}
fn search_literal_whole_buffer(
data: &[u8],
finder: &memchr::memmem::Finder<'_>,
need_ranges: bool,
) -> Vec<LineMatch> {
let mut matches: Vec<LineMatch> = Vec::new();
let mut last_line_start: Option<usize> = None;
let mut lines = LineCursor::new(data);
let needle_len = finder.needle().len();
for match_pos in finder.find_iter(data) {
lines.advance_to(match_pos);
if last_line_start == Some(lines.line_start) {
continue;
}
last_line_start = Some(lines.line_start);
let line_bytes = lines.line();
let match_ranges = if need_ranges {
finder.find_iter(line_bytes).map(|pos| pos..(pos + needle_len)).collect()
} else {
Vec::new()
};
matches.push(LineMatch {
line_no: lines.line_no,
line: line_bytes.to_vec(),
match_ranges,
byte_offset: lines.line_start as u64,
line_len: line_bytes.len() as u32,
});
}
matches
}
fn search_bytes_line_by_line(
data: &[u8],
pattern: &CompiledPattern,
invert_match: bool,
need_ranges: bool,
) -> Vec<LineMatch> {
let mut matches = Vec::new();
let mut line_no: u32 = 1;
let mut start = 0;
loop {
let end = match memchr(b'\n', &data[start..]) {
Some(pos) => start + pos,
None => data.len(),
};
let line_bytes = &data[start..end];
let line_len = line_bytes.len() as u32;
let is_match = pattern.is_match(line_bytes);
let should_include = if invert_match { !is_match } else { is_match };
if should_include {
let match_ranges = if need_ranges && !invert_match {
pattern.regex.find_iter(line_bytes).map(|m| m.start()..m.end()).collect()
} else {
Vec::new()
};
matches.push(LineMatch {
line_no,
line: line_bytes.to_vec(),
match_ranges,
byte_offset: start as u64,
line_len,
});
}
if end == data.len() {
break;
}
start = end + 1;
line_no += 1;
}
matches
}
fn split_at_newlines(data: &[u8], n: usize) -> Vec<&[u8]> {
let chunk_size = data.len() / n;
let mut chunks = Vec::with_capacity(n);
let mut start = 0;
for _ in 0..n - 1 {
let target = start + chunk_size;
if target >= data.len() {
break;
}
let boundary = match memchr(b'\n', &data[target..]) {
Some(pos) => target + pos + 1,
None => data.len(),
};
chunks.push(&data[start..boundary]);
start = boundary;
if start >= data.len() {
break;
}
}
if start < data.len() {
chunks.push(&data[start..]);
}
chunks
}
fn parallel_count_matches(data: &[u8], pattern: &CompiledPattern, invert_match: bool) -> usize {
let n = std::thread::available_parallelism().map(|n| n.get()).unwrap_or(1);
if n <= 1 || data.len() < PARALLEL_THRESHOLD {
return count_matches(data, pattern, invert_match);
}
let data = strip_line_terminator(data);
let chunks = split_at_newlines(data, n);
std::thread::scope(|s| {
let handles: Vec<_> = chunks
.iter()
.map(|chunk| s.spawn(|| count_matches(chunk, pattern, invert_match)))
.collect();
handles.into_iter().map(|h| h.join().unwrap()).sum()
})
}
#[allow(clippy::too_many_arguments)]
fn parallel_search_streaming(
data: &[u8],
pattern: &CompiledPattern,
invert_match: bool,
need_ranges: bool,
config: &OutputConfig,
path_bytes: Option<&[u8]>,
writer: &mut impl Write,
number_width: usize,
) -> io::Result<usize> {
let n = std::thread::available_parallelism().map(|n| n.get()).unwrap_or(1);
if n <= 1 || data.len() < PARALLEL_THRESHOLD {
if !invert_match && let Some(finder) = pattern.literal_finder() {
return stream_literal_whole_buffer(
data,
finder,
need_ranges,
config,
path_bytes,
writer,
number_width,
);
}
return stream_line_by_line(
data,
pattern,
invert_match,
need_ranges,
config,
path_bytes,
writer,
number_width,
);
}
let data = strip_line_terminator(data);
let chunks = split_at_newlines(data, n);
type ChunkMatch<'a> = (u32, &'a [u8], Vec<Range<usize>>);
let chunk_results: Vec<Vec<ChunkMatch<'_>>> = std::thread::scope(|s| {
let handles: Vec<_> = chunks
.iter()
.map(|chunk| {
s.spawn(|| search_chunk_collect(chunk, pattern, invert_match, need_ranges))
})
.collect();
handles.into_iter().map(|h| h.join().unwrap()).collect()
});
let mut line_offsets = Vec::with_capacity(chunks.len());
let mut cumulative_lines: u32 = 0;
for (i, chunk) in chunks.iter().enumerate() {
line_offsets.push(cumulative_lines);
if i < chunks.len() - 1 {
cumulative_lines += memchr_iter(b'\n', chunk).count() as u32;
}
}
let mut total = 0;
let data_base = data.as_ptr() as usize;
for (chunk_matches, line_offset) in chunk_results.iter().zip(line_offsets.iter()) {
for (rel_line_no, line, ranges) in chunk_matches {
let byte_off = (line.as_ptr() as usize - data_base) as u64;
write_line_match(
writer,
config,
path_bytes,
rel_line_no + line_offset,
byte_off,
line,
ranges,
number_width,
)?;
total += 1;
}
}
Ok(total)
}
fn search_chunk_collect<'a>(
data: &'a [u8],
pattern: &CompiledPattern,
invert_match: bool,
need_ranges: bool,
) -> Vec<(u32, &'a [u8], Vec<Range<usize>>)> {
let data = strip_line_terminator(data);
if !invert_match && let Some(finder) = pattern.literal_finder() {
return collect_literal_whole_buffer(data, finder, need_ranges);
}
if !invert_match && let Some(pfx) = pattern.prefix_finder() {
return collect_prefix_accelerated(data, pfx, pattern, need_ranges);
}
let mut results = Vec::new();
let mut line_no: u32 = 1;
let mut start = 0;
loop {
let end = match memchr(b'\n', &data[start..]) {
Some(pos) => start + pos,
None => data.len(),
};
let line_bytes = &data[start..end];
let is_match = pattern.is_match(line_bytes);
let should_include = if invert_match { !is_match } else { is_match };
if should_include {
let match_ranges = if need_ranges && !invert_match {
pattern.regex.find_iter(line_bytes).map(|m| m.start()..m.end()).collect()
} else {
Vec::new()
};
results.push((line_no, line_bytes, match_ranges));
}
if end == data.len() {
break;
}
start = end + 1;
line_no += 1;
}
results
}
fn collect_prefix_accelerated<'a>(
data: &'a [u8],
pfx: &memchr::memmem::Finder<'_>,
pattern: &CompiledPattern,
need_ranges: bool,
) -> Vec<(u32, &'a [u8], Vec<Range<usize>>)> {
let mut results = Vec::new();
let mut last_line_start: Option<usize> = None;
let mut lines = LineCursor::new(data);
for match_pos in pfx.find_iter(data) {
lines.advance_to(match_pos);
if last_line_start == Some(lines.line_start) {
continue;
}
last_line_start = Some(lines.line_start);
let line_bytes = lines.line();
if !pattern.regex.is_match(line_bytes) {
continue;
}
let match_ranges = if need_ranges {
pattern.regex.find_iter(line_bytes).map(|m| m.start()..m.end()).collect()
} else {
Vec::new()
};
results.push((lines.line_no, line_bytes, match_ranges));
}
results
}
fn collect_literal_whole_buffer<'a>(
data: &'a [u8],
finder: &memchr::memmem::Finder<'_>,
need_ranges: bool,
) -> Vec<(u32, &'a [u8], Vec<Range<usize>>)> {
let mut results = Vec::new();
let mut last_line_start: Option<usize> = None;
let mut lines = LineCursor::new(data);
let needle_len = finder.needle().len();
for match_pos in finder.find_iter(data) {
lines.advance_to(match_pos);
if last_line_start == Some(lines.line_start) {
continue;
}
last_line_start = Some(lines.line_start);
let line_bytes = lines.line();
let match_ranges = if need_ranges {
finder.find_iter(line_bytes).map(|pos| pos..(pos + needle_len)).collect()
} else {
Vec::new()
};
results.push((lines.line_no, line_bytes, match_ranges));
}
results
}
fn write_count_line(
writer: &mut impl Write,
config: &OutputConfig,
path: &Path,
count: usize,
) -> io::Result<()> {
if config.is_json() {
return write_json_summary(writer, Some(path), count);
}
if config.multi_file {
let path_bytes = path.as_os_str().as_encoded_bytes();
let sep = if config.null { b'\0' } else { b':' };
if config.color {
writer.write_all(crate::output::COLOR_FILENAME)?;
writer.write_all(path_bytes)?;
writer.write_all(crate::output::COLOR_RESET)?;
if !config.null {
writer.write_all(crate::output::COLOR_SEP)?;
writer.write_all(&[sep])?;
writer.write_all(crate::output::COLOR_RESET)?;
} else {
writer.write_all(&[sep])?;
}
} else {
writer.write_all(path_bytes)?;
writer.write_all(&[sep])?;
}
}
let mut itoa_buf = itoa::Buffer::new();
writer.write_all(itoa_buf.format(count).as_bytes())?;
writer.write_all(b"\n")
}
fn write_filename_line(
writer: &mut impl Write,
config: &OutputConfig,
path: &Path,
) -> io::Result<()> {
if config.is_json() {
return write_json_path(writer, path, !config.files_without_match);
}
let path_bytes = path.as_os_str().as_encoded_bytes();
if config.color {
writer.write_all(crate::output::COLOR_FILENAME)?;
writer.write_all(path_bytes)?;
writer.write_all(crate::output::COLOR_RESET)?;
} else {
writer.write_all(path_bytes)?;
}
if config.null { writer.write_all(b"\0") } else { writer.write_all(b"\n") }
}
pub fn search_file_streaming(
path: &Path,
pattern: &CompiledPattern,
invert_match: bool,
output_config: &OutputConfig,
writer: &mut impl Write,
) -> io::Result<usize> {
let data = read_file(path)?;
let bytes: &[u8] = &data;
let number_width = num_digits(bytes.len());
if !output_config.text && is_binary(bytes) {
if output_config.ignore_binary {
if output_config.count {
write_count_line(writer, output_config, path, 0)?;
} else if output_config.files_without_match {
write_filename_line(writer, output_config, path)?;
}
return Ok(0);
}
let has_match = if invert_match { true } else { pattern.is_match(bytes) };
if output_config.quiet {
return Ok(if has_match { 1 } else { 0 });
}
if output_config.files_without_match {
if !has_match {
write_filename_line(writer, output_config, path)?;
}
return Ok(if has_match { 1 } else { 0 });
}
if output_config.count {
let count = count_matches(bytes, pattern, invert_match);
write_count_line(writer, output_config, path, count)?;
return Ok(count);
}
if has_match {
if output_config.files_with_matches {
write_filename_line(writer, output_config, path)?;
} else {
eprintln!("grep: {}: binary file matches", path.display());
}
return Ok(1);
}
return Ok(0);
}
if output_config.quiet {
let has_match = if invert_match {
has_non_matching_line(bytes, pattern)
} else {
pattern.is_match(bytes)
};
return Ok(if has_match { 1 } else { 0 });
}
if output_config.files_with_matches {
let has_match = if invert_match {
has_non_matching_line(bytes, pattern)
} else {
pattern.is_match(bytes)
};
if has_match {
write_filename_line(writer, output_config, path)?;
return Ok(1);
}
return Ok(0);
}
if output_config.files_without_match {
let has_match = if invert_match {
has_non_matching_line(bytes, pattern)
} else {
pattern.is_match(bytes)
};
if !has_match {
write_filename_line(writer, output_config, path)?;
}
return Ok(if has_match { 1 } else { 0 });
}
if output_config.count {
let mut count = parallel_count_matches(bytes, pattern, invert_match);
if output_config.max_count > 0 && count > output_config.max_count {
count = output_config.max_count;
}
let path_bytes = path.as_os_str().as_encoded_bytes();
if output_config.multi_file {
if output_config.color {
writer.write_all(b"\x1b[35m")?;
writer.write_all(path_bytes)?;
writer.write_all(b"\x1b[0m\x1b[36m:\x1b[0m")?;
} else {
writer.write_all(path_bytes)?;
writer.write_all(b":")?;
}
}
let mut itoa_buf = itoa::Buffer::new();
writer.write_all(itoa_buf.format(count).as_bytes())?;
writer.write_all(b"\n")?;
return Ok(count);
}
let need_ranges = output_config.requires_match_ranges();
let path_bytes = if output_config.is_json() || output_config.multi_file {
Some(path.as_os_str().as_encoded_bytes())
} else {
None
};
let has_context = output_config.before_context > 0 || output_config.after_context > 0;
if has_context {
return stream_with_context(
bytes,
pattern,
invert_match,
output_config,
path_bytes,
writer,
number_width,
);
}
if bytes.len() >= PARALLEL_THRESHOLD {
return parallel_search_streaming(
bytes,
pattern,
invert_match,
need_ranges,
output_config,
path_bytes,
writer,
number_width,
);
}
if !invert_match && let Some(finder) = pattern.literal_finder() {
return stream_literal_whole_buffer(
bytes,
finder,
need_ranges,
output_config,
path_bytes,
writer,
number_width,
);
}
stream_line_by_line(
bytes,
pattern,
invert_match,
need_ranges,
output_config,
path_bytes,
writer,
number_width,
)
}
pub fn search_file_streaming_reuse(
path: &Path,
pattern: &CompiledPattern,
invert_match: bool,
output_config: &OutputConfig,
writer: &mut impl Write,
read_buf: &mut Vec<u8>,
) -> io::Result<usize> {
let data = read_file_reuse(path, read_buf)?;
let bytes: &[u8] = &data;
let number_width = num_digits(bytes.len());
if !output_config.text && is_binary(bytes) {
if output_config.ignore_binary {
if output_config.count {
write_count_line(writer, output_config, path, 0)?;
} else if output_config.files_without_match {
write_filename_line(writer, output_config, path)?;
}
return Ok(0);
}
let has_match = if invert_match { true } else { pattern.is_match(bytes) };
if output_config.quiet {
return Ok(if has_match { 1 } else { 0 });
}
if output_config.files_without_match {
if !has_match {
write_filename_line(writer, output_config, path)?;
}
return Ok(if has_match { 1 } else { 0 });
}
if output_config.count {
let count = count_matches(bytes, pattern, invert_match);
write_count_line(writer, output_config, path, count)?;
return Ok(count);
}
if has_match {
if output_config.files_with_matches {
write_filename_line(writer, output_config, path)?;
} else {
eprintln!("grep: {}: binary file matches", path.display());
}
return Ok(1);
}
return Ok(0);
}
if output_config.quiet {
let has_match = if invert_match {
has_non_matching_line(bytes, pattern)
} else {
pattern.is_match(bytes)
};
return Ok(if has_match { 1 } else { 0 });
}
if output_config.files_with_matches {
let has_match = if invert_match {
!pattern.is_match(bytes) || bytes.contains(&b'\n')
} else {
pattern.is_match(bytes)
};
if has_match {
write_filename_line(writer, output_config, path)?;
return Ok(1);
}
return Ok(0);
}
if output_config.files_without_match {
let has_match = if invert_match {
has_non_matching_line(bytes, pattern)
} else {
pattern.is_match(bytes)
};
if !has_match {
write_filename_line(writer, output_config, path)?;
}
return Ok(if has_match { 1 } else { 0 });
}
if output_config.count {
let count = count_matches(bytes, pattern, invert_match);
write_count_line(writer, output_config, path, count)?;
return Ok(count);
}
let need_ranges = output_config.requires_match_ranges();
let path_bytes = if output_config.is_json() || output_config.multi_file {
Some(path.as_os_str().as_encoded_bytes())
} else {
None
};
let has_context = output_config.before_context > 0 || output_config.after_context > 0;
if has_context {
return stream_with_context(
bytes,
pattern,
invert_match,
output_config,
path_bytes,
writer,
number_width,
);
}
if !invert_match && let Some(finder) = pattern.literal_finder() {
return stream_literal_whole_buffer(
bytes,
finder,
need_ranges,
output_config,
path_bytes,
writer,
number_width,
);
}
stream_line_by_line(
bytes,
pattern,
invert_match,
need_ranges,
output_config,
path_bytes,
writer,
number_width,
)
}
fn stream_literal_whole_buffer(
data: &[u8],
finder: &memchr::memmem::Finder<'_>,
need_ranges: bool,
config: &OutputConfig,
path_bytes: Option<&[u8]>,
writer: &mut impl Write,
number_width: usize,
) -> io::Result<usize> {
let data = strip_line_terminator(data);
let mut count = 0;
let mut last_line_start: Option<usize> = None;
let mut lines = LineCursor::new(data);
let needle_len = finder.needle().len();
let mut pending_ranges: Vec<Range<usize>> = Vec::new();
let mut pending_line: &[u8] = &[];
let mut pending_line_no: u32 = 0;
let max_count = config.max_count;
for match_pos in finder.find_iter(data) {
lines.advance_to(match_pos);
if last_line_start == Some(lines.line_start) {
continue;
}
if let Some(prev_line_start) = last_line_start {
if config.only_matching {
count += write_only_matching(
writer,
config,
path_bytes,
pending_line_no,
prev_line_start as u64,
pending_line,
&pending_ranges,
number_width,
)?;
} else {
write_line_match(
writer,
config,
path_bytes,
pending_line_no,
prev_line_start as u64,
pending_line,
&pending_ranges,
number_width,
)?;
count += 1;
}
if max_count > 0 && count >= max_count {
return Ok(count);
}
}
last_line_start = Some(lines.line_start);
pending_line = lines.line();
pending_line_no = lines.line_no;
pending_ranges.clear();
if need_ranges || config.only_matching {
for pos in finder.find_iter(pending_line) {
pending_ranges.push(pos..(pos + needle_len));
}
}
}
if let Some(last_line_start) = last_line_start
&& (max_count == 0 || count < max_count)
{
if config.only_matching {
count += write_only_matching(
writer,
config,
path_bytes,
pending_line_no,
last_line_start as u64,
pending_line,
&pending_ranges,
number_width,
)?;
} else {
write_line_match(
writer,
config,
path_bytes,
pending_line_no,
last_line_start as u64,
pending_line,
&pending_ranges,
number_width,
)?;
count += 1;
}
}
Ok(count)
}
#[allow(clippy::too_many_arguments)]
fn stream_line_by_line(
data: &[u8],
pattern: &CompiledPattern,
invert_match: bool,
need_ranges: bool,
config: &OutputConfig,
path_bytes: Option<&[u8]>,
writer: &mut impl Write,
number_width: usize,
) -> io::Result<usize> {
let data = strip_line_terminator(data);
let max_count = config.max_count;
let mut count = 0;
let mut line_no: u32 = 1;
let mut start = 0;
loop {
let end = match memchr(b'\n', &data[start..]) {
Some(pos) => start + pos,
None => data.len(),
};
let line_bytes = &data[start..end];
let is_match = pattern.is_match(line_bytes);
let should_include = if invert_match { !is_match } else { is_match };
if should_include {
let match_ranges: Vec<Range<usize>> =
if (need_ranges || config.only_matching) && !invert_match {
pattern.regex.find_iter(line_bytes).map(|m| m.start()..m.end()).collect()
} else {
Vec::new()
};
if config.only_matching && !invert_match {
count += write_only_matching(
writer,
config,
path_bytes,
line_no,
start as u64,
line_bytes,
&match_ranges,
number_width,
)?;
} else {
write_line_match(
writer,
config,
path_bytes,
line_no,
start as u64,
line_bytes,
&match_ranges,
number_width,
)?;
count += 1;
}
if max_count > 0 && count >= max_count {
break;
}
}
if end == data.len() {
break;
}
start = end + 1;
line_no += 1;
}
Ok(count)
}
fn stream_with_context(
data: &[u8],
pattern: &CompiledPattern,
invert_match: bool,
config: &OutputConfig,
path_bytes: Option<&[u8]>,
writer: &mut impl Write,
number_width: usize,
) -> io::Result<usize> {
let data = strip_line_terminator(data);
let before = config.before_context;
let after = config.after_context;
let need_ranges = config.requires_match_ranges();
let mut lines: Vec<(u32, u64, &[u8])> = Vec::new();
let mut start = 0;
let mut line_no: u32 = 1;
loop {
let end = match memchr(b'\n', &data[start..]) {
Some(pos) => start + pos,
None => data.len(),
};
lines.push((line_no, start as u64, &data[start..end]));
if end == data.len() {
break;
}
start = end + 1;
line_no += 1;
}
let mut is_match: Vec<bool> = Vec::with_capacity(lines.len());
for &(_, _, line) in &lines {
let m = pattern.is_match(line);
is_match.push(if invert_match { !m } else { m });
}
let max_count = config.max_count;
if max_count > 0 {
let mut match_count = 0;
for m in is_match.iter_mut() {
if *m {
match_count += 1;
if match_count > max_count {
*m = false;
}
}
}
}
let mut should_print: Vec<bool> = vec![false; lines.len()];
for (i, &m) in is_match.iter().enumerate() {
if m {
let ctx_start = i.saturating_sub(before);
for item in should_print.iter_mut().take(i).skip(ctx_start) {
*item = true;
}
should_print[i] = true;
let ctx_end = (i + after + 1).min(lines.len());
for item in should_print.iter_mut().take(ctx_end).skip(i + 1) {
*item = true;
}
}
}
let mut count = 0;
let mut last_printed: Option<usize> = None;
for (i, &print) in should_print.iter().enumerate() {
if !print {
continue;
}
if let Some(prev) = last_printed
&& i > prev + 1
{
write_group_separator(writer, config)?;
}
last_printed = Some(i);
let (ln, byte_off, line) = lines[i];
if is_match[i] {
let match_ranges: Vec<Range<usize>> = if need_ranges && !invert_match {
pattern.regex.find_iter(line).map(|m| m.start()..m.end()).collect()
} else {
Vec::new()
};
if config.only_matching && !invert_match {
count += write_only_matching(
writer,
config,
path_bytes,
ln,
byte_off,
line,
&match_ranges,
number_width,
)?;
} else {
write_line_match(
writer,
config,
path_bytes,
ln,
byte_off,
line,
&match_ranges,
number_width,
)?;
count += 1;
}
} else {
write_context_line(writer, config, path_bytes, ln, byte_off, line, number_width)?;
}
}
Ok(count)
}