use async_trait::async_trait;
use clap::{CommandFactory, Parser};
use grep_regex::{RegexMatcher, RegexMatcherBuilder};
use grep_searcher::{BinaryDetection, Encoding, SearcherBuilder};
use regex::RegexBuilder;
use std::path::{Path, PathBuf};
use crate::ast::Value;
use crate::backend_walker_fs::BackendWalkerFs;
use crate::interpreter::{ExecResult, OutputData, OutputNode};
use crate::tools::builtin::grep_engine::{AccumulatorSink, ContextKind, SearchEvent};
use crate::tools::{schema_from_clap, ExecContext, ToolCtx, GlobalFlags, Tool, ToolArgs, ToolSchema, validate_against_schema};
use crate::validator::{IssueCode, ValidationIssue};
use crate::walker::{FileWalker, GlobPath, IncludeExclude, WalkOptions};
pub struct Grep;
#[derive(Parser, Debug)]
#[command(name = "grep", about = "Search for patterns in files or stdin")]
struct GrepArgs {
#[arg(short = 'i', long = "ignore-case", visible_alias = "ignore_case")]
ignore_case: bool,
#[arg(short = 'n', long = "line-number", visible_alias = "line_number")]
line_number: bool,
#[arg(short = 'v', long = "invert")]
invert: bool,
#[arg(short = 'c', long = "count")]
count: bool,
#[arg(short = 'o', long = "only-matching", visible_alias = "only_matching")]
only_matching: bool,
#[arg(short = 'q', long = "quiet")]
quiet: bool,
#[arg(short = 'l', long = "files-with-matches", visible_alias = "files_with_matches")]
files_with_matches: bool,
#[arg(short = 'w', long = "word-regexp", visible_alias = "word_regexp")]
word_regexp: bool,
#[arg(short = 'r', long = "recursive")]
recursive: bool,
#[arg(short = 'R')]
recursive_upper: bool,
#[arg(short = 'U', long = "multiline")]
multiline: bool,
#[arg(id = "extended_regexp", short = 'E', long = "extended-regexp", visible_alias = "extended_regexp")]
_extended: bool,
#[arg(id = "fixed_strings", short = 'F', long = "fixed-strings", visible_alias = "fixed_strings")]
_fixed: bool,
#[arg(short = 'A', long = "after-context", visible_alias = "after_context")]
after_context: Option<String>,
#[arg(short = 'B', long = "before-context", visible_alias = "before_context")]
before_context: Option<String>,
#[arg(short = 'C', long = "context")]
context: Option<String>,
#[arg(long = "include")]
include: Option<String>,
#[arg(long = "exclude")]
exclude: Option<String>,
#[arg(long = "encoding")]
encoding: Option<String>,
#[arg(long = "binary")]
binary: Option<String>,
#[command(flatten)]
global: GlobalFlags,
pattern: Vec<String>,
}
#[async_trait]
impl Tool for Grep {
fn name(&self) -> &str {
"grep"
}
fn schema(&self) -> ToolSchema {
schema_from_clap(
&GrepArgs::command(),
"grep",
"Search for patterns in files or stdin",
[
("Search for pattern in file", "grep pattern file.txt"),
("Case-insensitive search", "grep -i ERROR log.txt"),
("Show line numbers", "grep -n TODO *.rs"),
("Extract matched text only", "grep -o 'https://[^\"]*' file.html"),
("Context around matches", "grep -C 2 error log.txt"),
("Recursive search", "grep -r TODO src/"),
("With file filter", "grep -rn TODO . --include='*.rs'"),
],
)
}
fn validate(&self, args: &ToolArgs) -> Vec<ValidationIssue> {
let mut issues = validate_against_schema(args, &self.schema());
let fixed = args.has_flag("F") || args.has_flag("fixed-strings");
if !fixed && let Some(pattern) = args.get_string("pattern", 0) {
if !pattern.contains("<dynamic>")
&& let Err(e) = regex::Regex::new(&pattern) {
issues.push(ValidationIssue::error(
IssueCode::InvalidRegex,
format!("grep: invalid regex pattern: {}", e),
).with_suggestion("check regex syntax at https://docs.rs/regex"));
}
}
issues
}
async fn execute(&self, mut args: ToolArgs, ctx: &mut dyn ToolCtx) -> ExecResult {
let Some(ctx) = ctx.as_any_mut().downcast_mut::<ExecContext>() else {
return ExecResult::failure(1, "internal error: kernel builtin requires ExecContext");
};
args.flagify_bool_named();
let parsed = match GrepArgs::try_parse_from(
std::iter::once("grep".to_string()).chain(args.to_argv()),
) {
Ok(p) => p,
Err(e) => return ExecResult::failure(2, format!("grep: {e}")),
};
parsed.global.apply(ctx);
let pattern = match args.get_string("pattern", 0) {
Some(p) => p,
None => return ExecResult::failure(1, "grep: missing pattern argument"),
};
let ignore_case = args.has_flag("ignore-case") || args.has_flag("i");
let line_number = args.has_flag("line-number") || args.has_flag("n");
let invert = args.has_flag("invert") || args.has_flag("v");
let count_only = args.has_flag("count") || args.has_flag("c");
let only_matching = args.has_flag("only-matching") || args.has_flag("o");
let quiet = args.has_flag("quiet") || args.has_flag("q");
let files_only = args.has_flag("files-with-matches") || args.has_flag("l");
let word_regexp = args.has_flag("word-regexp") || args.has_flag("w");
let recursive = args.has_flag("recursive") || args.has_flag("r") || args.has_flag("R");
let fixed_strings = args.has_flag("F") || args.has_flag("fixed-strings");
fn parse_context(name: &str, value: &Option<String>) -> Result<Option<usize>, String> {
match value {
None => Ok(None),
Some(s) => s.parse::<usize>().map(Some).map_err(|_| {
format!("grep: invalid {name} value {s:?} (expected a non-negative number)")
}),
}
}
let context = match parse_context("--context", &parsed.context) {
Ok(c) => c,
Err(e) => return ExecResult::failure(2, e),
};
let after_context = match parse_context("--after-context", &parsed.after_context) {
Ok(c) => c.or(context),
Err(e) => return ExecResult::failure(2, e),
};
let before_context = match parse_context("--before-context", &parsed.before_context) {
Ok(c) => c.or(context),
Err(e) => return ExecResult::failure(2, e),
};
let multiline = args.has_flag("multiline") || args.has_flag("U");
let encoding = args.get_string("encoding", usize::MAX);
let binary_mode = args
.get_string("binary", usize::MAX)
.unwrap_or_else(|| "quit".into());
let binary_detection = match binary_mode.as_str() {
"none" | "text" => BinaryDetection::none(),
"without-match" => BinaryDetection::convert(b'\x00'),
_ => BinaryDetection::quit(b'\x00'),
};
let escaped = if fixed_strings { regex::escape(&pattern) } else { pattern };
let final_pattern = if word_regexp {
format!(r"\b{}\b", escaped)
} else {
escaped
};
let regex = match RegexBuilder::new(&final_pattern)
.case_insensitive(ignore_case)
.multi_line(multiline)
.build()
{
Ok(r) => r,
Err(e) => return ExecResult::failure(1, format!("grep: invalid pattern: {}", e)),
};
let matcher = match RegexMatcherBuilder::new()
.case_insensitive(ignore_case)
.multi_line(multiline)
.build(&final_pattern)
{
Ok(m) => m,
Err(e) => return ExecResult::failure(1, format!("grep: invalid pattern: {}", e)),
};
let grep_opts = GrepOptions {
show_line_numbers: line_number,
invert,
only_matching,
before_context,
after_context,
show_filename: false, multiline,
encoding: encoding.clone(),
binary_detection,
};
if recursive {
let path = args
.get_string("path", 1)
.unwrap_or_else(|| ".".to_string());
let root = ctx.resolve_path(&path);
let mut filter = IncludeExclude::new();
if let Some(Value::String(inc)) = args.get("include", usize::MAX) {
filter.include(inc);
}
if let Some(Value::String(exc)) = args.get("exclude", usize::MAX) {
filter.exclude(exc);
}
let glob = if let Some(Value::String(inc)) = args.get("include", usize::MAX) {
GlobPath::new(&format!("**/{}", inc)).ok()
} else {
GlobPath::new("**/*").ok()
};
let options = WalkOptions {
max_depth: None,
entry_types: crate::walker::EntryTypes::files_only(),
respect_gitignore: ctx.ignore_config.auto_gitignore(),
include_hidden: false,
filter,
..WalkOptions::default()
};
let fs = BackendWalkerFs(ctx.backend.as_ref());
let mut walker = if let Some(g) = glob {
FileWalker::new(&fs, &root)
.with_pattern(g)
.with_options(options)
} else {
FileWalker::new(&fs, &root).with_options(options)
};
if let Some(ignore_filter) = ctx.build_ignore_filter(&root).await {
walker = walker.with_ignore(ignore_filter);
}
let files = match walker.collect().await {
Ok(f) => f,
Err(e) => return ExecResult::failure(1, format!("grep: {}", e)),
};
return self
.grep_multiple_files(ctx, &files, &root, &matcher, &grep_opts, quiet, files_only, count_only)
.await;
}
let file_operands: Vec<String> = args
.positional
.iter()
.skip(1)
.map(crate::interpreter::value_to_string)
.collect();
if file_operands.len() > 1 {
let root = ctx.resolve_path(".");
let resolved: Vec<PathBuf> = file_operands
.iter()
.map(|f| ctx.resolve_path(f))
.collect();
return self
.grep_multiple_files(
ctx, &resolved, &root, &matcher, &grep_opts, quiet, files_only, count_only,
)
.await;
}
let is_simple = !count_only && !quiet && !files_only && !only_matching
&& before_context.is_none() && after_context.is_none();
let can_stream = args.get_string("path", 1).is_none()
&& is_simple
&& ctx.pipe_stdin.is_some() && ctx.pipe_stdout.is_some();
if can_stream {
if let (Some(pipe_stdin), Some(pipe_stdout)) =
(ctx.pipe_stdin.take(), ctx.pipe_stdout.take())
{
return self.stream_grep(ctx, pipe_stdin, pipe_stdout, ®ex, invert, line_number).await;
}
}
if let Some(path) = args.get_string("path", 1).filter(|_| is_simple) {
let resolved = ctx.resolve_path(&path);
let quit_byte = match binary_mode.as_str() {
"none" | "text" | "without-match" => None,
_ => Some(b'\x00'),
};
let mut scanner =
GrepLineScanner::new(®ex, invert, line_number, quit_byte, Some(path.clone()));
let scan_result = ctx
.read_file_chunked(
Path::new(&resolved),
ExecContext::STREAM_CHUNK_SIZE,
|chunk| {
scanner.push(chunk);
if scanner.saw_invalid_utf8 || scanner.stopped_early {
std::ops::ControlFlow::Break(())
} else {
std::ops::ControlFlow::Continue(())
}
},
)
.await;
if let Err(e) = scan_result {
return ExecResult::failure(1, format!("grep: {}: {}", path, e));
}
scanner.finish();
if scanner.saw_invalid_utf8 {
return ExecResult::failure(
2,
format!("grep: {path}: binary data — pipe through base64/xxd or use cmp"),
);
}
if !scanner.stopped_early {
let render = scanner.into_render_result();
return if render.match_count == 0 {
ExecResult::from_output(1, render.text, "")
} else {
let headers = if line_number {
vec!["MATCH".to_string(), "LINE".to_string()]
} else {
vec!["MATCH".to_string()]
};
let output = OutputData::table(headers, render.nodes)
.with_rich_json(serde_json::Value::Array(render.rich));
ExecResult::with_output_and_text(output, render.text)
};
}
}
let (bytes, filename) = match args.get_string("path", 1) {
Some(path) => {
let resolved = ctx.resolve_path(&path);
match ctx.backend.read(Path::new(&resolved), None).await {
Ok(data) => (data, Some(path)),
Err(e) => return ExecResult::failure(1, format!("grep: {}: {}", path, e)),
}
}
None => {
let text = match ctx.read_stdin_to_text().await {
Ok(s) => s.unwrap_or_default(),
Err(e) => return ExecResult::failure(2, format!("grep: {e}")),
};
(text.into_bytes(), None)
}
};
if std::str::from_utf8(&bytes).is_err() {
let where_ = filename.as_deref().unwrap_or("(standard input)");
return ExecResult::failure(
2,
format!("grep: {where_}: binary data — pipe through base64/xxd or use cmp"),
);
}
let render = match grep_lines_structured(
&bytes,
&matcher,
&grep_opts,
filename.as_deref(),
) {
Ok(t) => t,
Err(e) => return ExecResult::failure(1, format!("grep: {e}")),
};
if quiet {
return if render.match_count > 0 {
ExecResult::success("")
} else {
ExecResult::from_output(1, "", "")
};
}
if files_only {
return if render.match_count > 0 {
if let Some(name) = filename {
ExecResult::with_output(OutputData::text(format!("{}\n", name)))
} else {
ExecResult::with_output(OutputData::text("-\n".to_string()))
}
} else {
ExecResult::from_output(1, "", "")
};
}
if count_only {
ExecResult::with_output(OutputData::text(format!("{}\n", render.match_count)))
} else if render.match_count == 0 {
ExecResult::from_output(1, render.text, "")
} else {
let headers = if grep_opts.show_line_numbers {
vec!["MATCH".to_string(), "LINE".to_string()]
} else {
vec!["MATCH".to_string()]
};
let output = OutputData::table(headers, render.nodes)
.with_rich_json(serde_json::Value::Array(render.rich));
ExecResult::with_output_and_text(output, render.text)
}
}
}
impl Grep {
async fn stream_grep(
&self,
_ctx: &mut ExecContext,
pipe_in: crate::scheduler::PipeReader,
mut pipe_out: crate::scheduler::PipeWriter,
regex: ®ex::Regex,
invert: bool,
show_line_numbers: bool,
) -> ExecResult {
use tokio::io::{AsyncBufReadExt, AsyncWriteExt, BufReader};
let mut reader = BufReader::new(pipe_in);
let mut match_count = 0usize;
let mut line_num = 0usize;
let mut line_buf = String::new();
loop {
line_buf.clear();
match reader.read_line(&mut line_buf).await {
Ok(0) => break,
Ok(_) => {
line_num += 1;
let matches = regex.is_match(line_buf.trim_end_matches('\n'));
let should_output = if invert { !matches } else { matches };
if should_output {
match_count += 1;
let output = if show_line_numbers {
format!("{}:{}", line_num, line_buf)
} else {
line_buf.clone()
};
if pipe_out.write_all(output.as_bytes()).await.is_err() {
break;
}
if !output.ends_with('\n') && pipe_out.write_all(b"\n").await.is_err() {
break;
}
}
}
Err(_) => break,
}
}
drop(reader);
let _ = pipe_out.shutdown().await;
if match_count > 0 {
ExecResult::success("")
} else {
ExecResult::from_output(1, String::new(), String::new())
}
}
#[allow(clippy::too_many_arguments)]
async fn grep_multiple_files(
&self,
ctx: &mut ExecContext,
files: &[PathBuf],
root: &Path,
matcher: &RegexMatcher,
base_opts: &GrepOptions,
quiet: bool,
files_only: bool,
count_only: bool,
) -> ExecResult {
let mut total_output = String::new();
let mut total_nodes: Vec<OutputNode> = Vec::new();
let mut total_rich: Vec<serde_json::Value> = Vec::new();
let mut total_matches: usize = 0;
let mut files_with_matches = Vec::new();
let opts = GrepOptions {
show_filename: true,
..base_opts.clone()
};
for file_path in files {
let bytes = match ctx.backend.read(file_path, None).await {
Ok(data) => data,
Err(_) => continue,
};
let display_name = file_path
.strip_prefix(root)
.unwrap_or(file_path)
.to_string_lossy()
.to_string();
let render = match grep_lines_structured(&bytes, matcher, &opts, Some(&display_name)) {
Ok(t) => t,
Err(_) => continue,
};
if render.match_count > 0 {
total_matches += render.match_count;
files_with_matches.push(display_name.clone());
if !quiet && !files_only && !count_only {
total_output.push_str(&render.text);
total_nodes.extend(render.nodes);
total_rich.extend(render.rich);
}
}
}
if quiet {
return if total_matches > 0 {
ExecResult::success("")
} else {
ExecResult::from_output(1, "", "")
};
}
if files_only {
return if files_with_matches.is_empty() {
ExecResult::from_output(1, "", "")
} else {
ExecResult::with_output(OutputData::text(files_with_matches.join("\n") + "\n"))
};
}
if count_only {
ExecResult::with_output(OutputData::text(format!("{}\n", total_matches)))
} else if total_matches == 0 {
ExecResult::from_output(1, total_output, "")
} else {
let headers = if opts.show_line_numbers {
vec!["MATCH".to_string(), "FILE".to_string(), "LINE".to_string()]
} else {
vec!["MATCH".to_string(), "FILE".to_string()]
};
let output = OutputData::table(headers, total_nodes)
.with_rich_json(serde_json::Value::Array(total_rich));
ExecResult::with_output_and_text(output, total_output)
}
}
}
struct GrepLineScanner<'r> {
regex: &'r regex::Regex,
invert: bool,
show_line_numbers: bool,
path: Option<String>,
quit_byte: Option<u8>,
carry: Vec<u8>,
consumed: u64,
line_number: u64,
text: String,
nodes: Vec<OutputNode>,
rich: Vec<serde_json::Value>,
match_count: usize,
saw_invalid_utf8: bool,
stopped_early: bool,
}
impl<'r> GrepLineScanner<'r> {
fn new(
regex: &'r regex::Regex,
invert: bool,
show_line_numbers: bool,
quit_byte: Option<u8>,
path: Option<String>,
) -> Self {
Self {
regex,
invert,
show_line_numbers,
path,
quit_byte,
carry: Vec::new(),
consumed: 0,
line_number: 0,
text: String::new(),
nodes: Vec::new(),
rich: Vec::new(),
match_count: 0,
saw_invalid_utf8: false,
stopped_early: false,
}
}
fn push(&mut self, chunk: &[u8]) {
if self.saw_invalid_utf8 || self.stopped_early {
return;
}
let chunk = if let Some(qb) = self.quit_byte {
if let Some(pos) = chunk.iter().position(|&b| b == qb) {
let prefix = &chunk[..pos];
self.carry.extend_from_slice(prefix);
self.drain_complete_lines();
self.stopped_early = true;
return;
}
chunk
} else {
chunk
};
self.carry.extend_from_slice(chunk);
self.drain_complete_lines();
}
fn drain_complete_lines(&mut self) {
let valid_text = match std::str::from_utf8(&self.carry) {
Ok(s) => s.to_owned(),
Err(e) if e.error_len().is_none() => {
let valid_up_to = e.valid_up_to();
match std::str::from_utf8(&self.carry[..valid_up_to]) {
Ok(s) => s.to_owned(),
Err(_) => String::new(),
}
}
Err(_) => {
self.saw_invalid_utf8 = true;
return;
}
};
let Some(last_nl) = valid_text.rfind('\n') else {
return; };
let mut local = 0usize;
for line in valid_text[..last_nl].split('\n') {
let line_abs = self.consumed + local as u64;
self.line_number += 1;
let stripped = line.strip_suffix('\r').unwrap_or(line);
self.match_line(stripped, line_abs);
local += line.len() + 1; }
self.carry.drain(..last_nl + 1);
self.consumed += (last_nl + 1) as u64;
}
fn finish(&mut self) {
if self.saw_invalid_utf8 || self.stopped_early || self.carry.is_empty() {
return;
}
match std::str::from_utf8(&self.carry) {
Ok(line) => {
let owned = line.strip_suffix('\r').unwrap_or(line).to_owned();
self.line_number += 1;
let line_abs = self.consumed;
self.match_line(&owned, line_abs);
self.consumed += self.carry.len() as u64;
self.carry.clear();
}
Err(_) => {
self.saw_invalid_utf8 = true;
}
}
}
fn match_line(&mut self, line: &str, byte_offset: u64) {
let matches = self.regex.is_match(line);
let should_output = if self.invert { !matches } else { matches };
if !should_output {
return;
}
self.match_count += 1;
if self.show_line_numbers {
self.text.push_str(&format!("{}:{}\n", self.line_number, line));
} else {
self.text.push_str(line);
self.text.push('\n');
}
let mut cells = Vec::new();
if self.show_line_numbers {
cells.push(self.line_number.to_string());
}
self.nodes
.push(OutputNode::new(line).with_cells(cells));
let submatches: Vec<serde_json::Value> = self
.regex
.find_iter(line)
.map(|m| {
serde_json::json!({
"text": m.as_str(),
"start": m.start(),
"end": m.end(),
})
})
.collect();
let path_v = match &self.path {
Some(p) => serde_json::Value::String(p.clone()),
None => serde_json::Value::Null,
};
self.rich.push(serde_json::json!({
"path": path_v,
"line_number": self.line_number,
"byte_offset": byte_offset,
"line_text": line,
"submatches": submatches,
}));
}
fn into_render_result(self) -> RenderResult {
RenderResult {
text: self.text,
nodes: self.nodes,
rich: self.rich,
match_count: self.match_count,
}
}
}
#[derive(Clone)]
struct GrepOptions {
show_line_numbers: bool,
invert: bool,
show_filename: bool,
only_matching: bool,
before_context: Option<usize>,
after_context: Option<usize>,
multiline: bool,
encoding: Option<String>,
binary_detection: BinaryDetection,
}
fn grep_lines_structured(
input: &[u8],
matcher: &RegexMatcher,
opts: &GrepOptions,
filename: Option<&str>,
) -> Result<RenderResult, String> {
let mut sb = SearcherBuilder::new();
sb.line_number(true)
.multi_line(opts.multiline)
.invert_match(opts.invert)
.binary_detection(opts.binary_detection.clone());
if let Some(before) = opts.before_context {
sb.before_context(before);
}
if let Some(after) = opts.after_context {
sb.after_context(after);
}
if let Some(enc_label) = opts.encoding.as_deref() {
match Encoding::new(enc_label) {
Ok(enc) => {
sb.encoding(Some(enc));
}
Err(e) => return Err(format!("invalid encoding '{enc_label}': {e}")),
}
}
let mut searcher = sb.build();
let mut sink = AccumulatorSink::new(matcher, None);
searcher
.search_slice(matcher, input, &mut sink)
.map_err(|e| e.to_string())?;
let events = sink.into_events();
Ok(render_events(&events, opts, filename))
}
struct RenderResult {
text: String,
nodes: Vec<OutputNode>,
rich: Vec<serde_json::Value>,
match_count: usize,
}
fn render_events(events: &[SearchEvent], opts: &GrepOptions, filename: Option<&str>) -> RenderResult {
let prefix = |line_num: u64, sep: char| -> String {
let mut p = String::new();
if opts.show_filename
&& let Some(f) = filename
{
p.push_str(f);
p.push(sep);
}
if opts.show_line_numbers {
p.push_str(&format!("{line_num}{sep}"));
}
p
};
let mut output = String::new();
let mut nodes: Vec<OutputNode> = Vec::new();
let mut rich: Vec<serde_json::Value> = Vec::new();
let mut match_count: usize = 0;
let mut emitted_any = false;
for event in events {
match event {
SearchEvent::Match(m) => {
let line_num = m.line_number.unwrap_or(0);
if opts.only_matching && !opts.invert && !m.submatches.is_empty() {
for sub in &m.submatches {
output.push_str(&prefix(line_num, ':'));
output.push_str(&sub.text);
output.push('\n');
let mut cells = Vec::new();
if opts.show_filename
&& let Some(f) = filename
{
cells.push(f.to_string());
}
if opts.show_line_numbers {
cells.push(line_num.to_string());
}
nodes.push(OutputNode::new(&sub.text).with_cells(cells));
}
} else {
output.push_str(&prefix(line_num, ':'));
output.push_str(&m.line_text);
output.push('\n');
let mut cells = Vec::new();
if opts.show_filename
&& let Some(f) = filename
{
cells.push(f.to_string());
}
if opts.show_line_numbers {
cells.push(line_num.to_string());
}
nodes.push(OutputNode::new(&m.line_text).with_cells(cells));
}
rich.push(match_record_to_json(m, filename));
match_count += 1;
emitted_any = true;
}
SearchEvent::Context(c) => {
let sep = match c.kind {
ContextKind::Before | ContextKind::After | ContextKind::Other => '-',
};
let line_num = c.line_number.unwrap_or(0);
output.push_str(&prefix(line_num, sep));
output.push_str(&c.line_text);
output.push('\n');
emitted_any = true;
}
SearchEvent::ContextBreak => {
if emitted_any {
output.push_str("--\n");
}
}
}
}
RenderResult {
text: output,
nodes,
rich,
match_count,
}
}
fn match_record_to_json(
m: &crate::tools::builtin::grep_engine::MatchRecord,
fallback_path: Option<&str>,
) -> serde_json::Value {
use serde_json::{Value, json};
let path = m
.path
.as_ref()
.map(|p| p.to_string_lossy().to_string())
.or_else(|| fallback_path.map(|s| s.to_string()));
let path_v = match path {
Some(p) => Value::String(p),
None => Value::Null,
};
let line_number_v = match m.line_number {
Some(n) => Value::Number(n.into()),
None => Value::Null,
};
let submatches: Vec<Value> = m
.submatches
.iter()
.map(|s| {
json!({
"text": s.text,
"start": s.start,
"end": s.end,
})
})
.collect();
json!({
"path": path_v,
"line_number": line_number_v,
"byte_offset": m.absolute_byte_offset,
"line_text": m.line_text,
"submatches": submatches,
})
}
#[cfg(test)]
mod tests {
use super::*;
use crate::vfs::{Filesystem, MemoryFs, VfsRouter};
use std::sync::Arc;
async fn make_ctx() -> ExecContext {
let mut vfs = VfsRouter::new();
let mem = MemoryFs::new();
mem.write(Path::new("test.txt"), b"hello world\nHELLO WORLD\nfoo bar\nbaz")
.await
.unwrap();
mem.write(Path::new("lines.txt"), b"line one\nline two\nline three\nfour")
.await
.unwrap();
vfs.mount("/", mem);
ExecContext::new(Arc::new(vfs))
}
#[tokio::test]
async fn test_grep_file() {
let mut ctx = make_ctx().await;
let mut args = ToolArgs::new();
args.positional.push(Value::String("hello".into()));
args.positional.push(Value::String("/test.txt".into()));
let result = Grep.execute(args, &mut ctx).await;
assert!(result.ok());
assert_eq!(&*result.text_out(), "hello world\n");
}
#[tokio::test]
async fn test_grep_case_insensitive() {
let mut ctx = make_ctx().await;
let mut args = ToolArgs::new();
args.positional.push(Value::String("hello".into()));
args.positional.push(Value::String("/test.txt".into()));
args.flags.insert("i".to_string());
let result = Grep.execute(args, &mut ctx).await;
assert!(result.ok());
assert!(result.text_out().contains("hello world"));
assert!(result.text_out().contains("HELLO WORLD"));
}
#[tokio::test]
async fn test_grep_line_numbers() {
let mut ctx = make_ctx().await;
let mut args = ToolArgs::new();
args.positional.push(Value::String("line".into()));
args.positional.push(Value::String("/lines.txt".into()));
args.flags.insert("n".to_string());
let result = Grep.execute(args, &mut ctx).await;
assert!(result.ok());
assert!(result.text_out().contains("1:line one"));
assert!(result.text_out().contains("2:line two"));
assert!(result.text_out().contains("3:line three"));
}
#[tokio::test]
async fn test_grep_invert() {
let mut ctx = make_ctx().await;
let mut args = ToolArgs::new();
args.positional.push(Value::String("line".into()));
args.positional.push(Value::String("/lines.txt".into()));
args.flags.insert("v".to_string());
let result = Grep.execute(args, &mut ctx).await;
assert!(result.ok());
assert_eq!(&*result.text_out(), "four\n");
}
#[tokio::test]
async fn test_grep_count() {
let mut ctx = make_ctx().await;
let mut args = ToolArgs::new();
args.positional.push(Value::String("line".into()));
args.positional.push(Value::String("/lines.txt".into()));
args.flags.insert("c".to_string());
let result = Grep.execute(args, &mut ctx).await;
assert!(result.ok());
assert_eq!(&*result.text_out(), "3\n");
}
#[tokio::test]
async fn test_grep_no_match() {
let mut ctx = make_ctx().await;
let mut args = ToolArgs::new();
args.positional.push(Value::String("xyz".into()));
args.positional.push(Value::String("/test.txt".into()));
let result = Grep.execute(args, &mut ctx).await;
assert!(!result.ok());
assert!(result.err.is_empty());
assert_eq!(result.code, 1);
}
#[tokio::test]
async fn test_grep_stdin() {
let mut ctx = make_ctx().await;
ctx.set_stdin("apple\nbanana\napricot\n".to_string());
let mut args = ToolArgs::new();
args.positional.push(Value::String("ap".into()));
let result = Grep.execute(args, &mut ctx).await;
assert!(result.ok());
assert!(result.text_out().contains("apple"));
assert!(result.text_out().contains("apricot"));
assert!(!result.text_out().contains("banana"));
}
#[tokio::test]
async fn test_grep_regex() {
let mut ctx = make_ctx().await;
let mut args = ToolArgs::new();
args.positional.push(Value::String("^line".into())); args.positional.push(Value::String("/lines.txt".into()));
let result = Grep.execute(args, &mut ctx).await;
assert!(result.ok());
assert!(result.text_out().contains("line one"));
assert!(!result.text_out().contains("four")); }
#[tokio::test]
async fn test_grep_invalid_regex() {
let mut ctx = make_ctx().await;
let mut args = ToolArgs::new();
args.positional.push(Value::String("[invalid".into()));
args.positional.push(Value::String("/test.txt".into()));
let result = Grep.execute(args, &mut ctx).await;
assert!(!result.ok());
assert!(result.err.contains("invalid pattern"));
}
#[tokio::test]
async fn test_grep_missing_pattern() {
let mut ctx = make_ctx().await;
let result = Grep.execute(ToolArgs::new(), &mut ctx).await;
assert!(!result.ok());
assert!(result.err.contains("pattern"));
}
#[tokio::test]
async fn test_grep_file_not_found() {
let mut ctx = make_ctx().await;
let mut args = ToolArgs::new();
args.positional.push(Value::String("pattern".into()));
args.positional.push(Value::String("/nonexistent".into()));
let result = Grep.execute(args, &mut ctx).await;
assert!(!result.ok());
}
#[tokio::test]
async fn test_grep_only_matching() {
let mut ctx = make_ctx().await;
ctx.set_stdin("hello world hello\nfoo bar\n".to_string());
let mut args = ToolArgs::new();
args.positional.push(Value::String("hello".into()));
args.flags.insert("o".to_string());
let result = Grep.execute(args, &mut ctx).await;
assert!(result.ok());
let text = result.text_out();
let lines: Vec<&str> = text.lines().collect();
assert_eq!(lines.len(), 2);
assert_eq!(lines[0], "hello");
assert_eq!(lines[1], "hello");
}
#[tokio::test]
async fn test_grep_quiet_match() {
let mut ctx = make_ctx().await;
let mut args = ToolArgs::new();
args.positional.push(Value::String("hello".into()));
args.positional.push(Value::String("/test.txt".into()));
args.flags.insert("q".to_string());
let result = Grep.execute(args, &mut ctx).await;
assert!(result.ok());
assert!(result.text_out().is_empty());
}
#[tokio::test]
async fn test_grep_quiet_no_match() {
let mut ctx = make_ctx().await;
let mut args = ToolArgs::new();
args.positional.push(Value::String("xyz".into()));
args.positional.push(Value::String("/test.txt".into()));
args.flags.insert("q".to_string());
let result = Grep.execute(args, &mut ctx).await;
assert!(!result.ok());
assert_eq!(result.code, 1);
}
#[tokio::test]
async fn test_grep_multiline_flag() {
let mut ctx = make_ctx().await;
ctx.set_stdin("foo line\nmiddle\nbar line\n".to_string());
let mut args = ToolArgs::new();
args.positional.push(Value::String("(?s)foo.*bar".into()));
args.flags.insert("U".to_string());
let result = Grep.execute(args, &mut ctx).await;
assert!(
result.ok(),
"multiline grep failed: code={} err={}",
result.code,
result.err
);
assert!(
result.text_out().contains("foo line"),
"expected match crossing lines: {:?}",
result.text_out().to_string(),
);
}
#[tokio::test]
async fn test_grep_no_multiline_by_default() {
let mut ctx = make_ctx().await;
ctx.set_stdin("foo line\nmiddle\nbar line\n".to_string());
let mut args = ToolArgs::new();
args.positional.push(Value::String("(?s)foo.*bar".into()));
let result = Grep.execute(args, &mut ctx).await;
assert_eq!(result.code, 1);
}
#[tokio::test]
async fn test_grep_binary_quit_default() {
use crate::vfs::{Filesystem, MemoryFs, VfsRouter};
use std::sync::Arc;
let mut vfs = VfsRouter::new();
let mem = MemoryFs::new();
let mut bytes = b"foo\x00bar\n".to_vec();
bytes.extend_from_slice(b"second line foo\n");
mem.write(Path::new("bin.dat"), &bytes).await.unwrap();
vfs.mount("/", mem);
let mut ctx = ExecContext::new(Arc::new(vfs));
let mut args = ToolArgs::new();
args.positional.push(Value::String("foo".into()));
args.positional.push(Value::String("/bin.dat".into()));
let result = Grep.execute(args, &mut ctx).await;
assert!(
!result.text_out().contains("second line"),
"binary quit should suppress post-NUL output, got: {:?}",
result.text_out().to_string(),
);
}
#[tokio::test]
async fn test_grep_binary_text_searches_through() {
use crate::vfs::{Filesystem, MemoryFs, VfsRouter};
use std::sync::Arc;
let mut vfs = VfsRouter::new();
let mem = MemoryFs::new();
let mut bytes = b"foo\x00bar\n".to_vec();
bytes.extend_from_slice(b"after_null foo bar\n");
mem.write(Path::new("bin.dat"), &bytes).await.unwrap();
vfs.mount("/", mem);
let mut ctx = ExecContext::new(Arc::new(vfs));
let mut args = ToolArgs::new();
args.positional.push(Value::String("foo".into()));
args.positional.push(Value::String("/bin.dat".into()));
args.named
.insert("binary".to_string(), Value::String("text".into()));
let result = Grep.execute(args, &mut ctx).await;
assert!(result.ok());
assert!(
result.text_out().contains("after_null"),
"binary=text should find post-NUL match, got: {:?}",
result.text_out().to_string(),
);
}
#[tokio::test]
async fn test_grep_json_rich_schema() {
use kaish_types::output::{OutputFormat, apply_output_format};
let mut ctx = make_ctx().await;
let mut args = ToolArgs::new();
args.positional.push(Value::String("hello".into()));
args.positional.push(Value::String("/test.txt".into()));
args.flags.insert("n".to_string());
let raw = Grep.execute(args, &mut ctx).await;
let result = apply_output_format(raw, OutputFormat::Json);
let parsed: serde_json::Value =
serde_json::from_str(&result.text_out()).expect("valid JSON");
let arr = parsed.as_array().expect("array");
assert!(!arr.is_empty(), "expected at least one match: {parsed:#?}");
let first = &arr[0];
for key in ["path", "line_number", "byte_offset", "line_text", "submatches"] {
assert!(
first.get(key).is_some(),
"missing key {key:?} in rich JSON: {first:#?}",
);
}
let subs = first
.get("submatches")
.and_then(|v| v.as_array())
.expect("submatches array");
assert!(!subs.is_empty(), "expected at least one submatch");
let first_sub = &subs[0];
assert!(first_sub.get("text").and_then(|v| v.as_str()).is_some());
assert!(first_sub.get("start").and_then(|v| v.as_u64()).is_some());
assert!(first_sub.get("end").and_then(|v| v.as_u64()).is_some());
}
#[tokio::test]
async fn test_grep_context_break_separator() {
let mut ctx = make_ctx().await;
ctx.set_stdin(
"match1\nbetween1\nbetween2\nbetween3\nbetween4\nmatch2\n".to_string(),
);
let mut args = ToolArgs::new();
args.positional.push(Value::String("match".into()));
args.named.insert("context".to_string(), Value::Int(1));
let result = Grep.execute(args, &mut ctx).await;
assert!(result.ok());
let out = result.text_out().to_string();
assert!(
out.contains("--\n"),
"expected context-break separator '--' in output, got:\n{out}",
);
}
#[tokio::test]
async fn test_grep_word_regexp() {
let mut ctx = make_ctx().await;
ctx.set_stdin("foobar\nfoo bar\nbarfoo\n".to_string());
let mut args = ToolArgs::new();
args.positional.push(Value::String("foo".into()));
args.flags.insert("w".to_string());
let result = Grep.execute(args, &mut ctx).await;
assert!(result.ok());
assert_eq!(&*result.text_out(), "foo bar\n");
}
#[tokio::test]
async fn test_grep_files_with_matches() {
let mut ctx = make_ctx().await;
let mut args = ToolArgs::new();
args.positional.push(Value::String("hello".into()));
args.positional.push(Value::String("/test.txt".into()));
args.flags.insert("l".to_string());
let result = Grep.execute(args, &mut ctx).await;
assert!(result.ok());
assert_eq!(result.text_out().trim(), "/test.txt");
}
#[tokio::test]
async fn test_grep_context() {
let mut ctx = make_ctx().await;
let mut args = ToolArgs::new();
args.positional.push(Value::String("two".into()));
args.positional.push(Value::String("/lines.txt".into()));
args.named.insert("context".to_string(), Value::Int(1));
let result = Grep.execute(args, &mut ctx).await;
assert!(result.ok());
assert!(result.text_out().contains("line one"));
assert!(result.text_out().contains("line two"));
assert!(result.text_out().contains("line three"));
}
async fn make_recursive_ctx() -> ExecContext {
let mut vfs = VfsRouter::new();
let mem = MemoryFs::new();
mem.mkdir(Path::new("src")).await.unwrap();
mem.mkdir(Path::new("src/lib")).await.unwrap();
mem.write(Path::new("src/main.rs"), b"fn main() {\n // TODO: implement\n}")
.await
.unwrap();
mem.write(Path::new("src/lib.rs"), b"// TODO: add modules\npub mod lib;")
.await
.unwrap();
mem.write(Path::new("src/lib/utils.rs"), b"pub fn util() {\n // helper function\n}")
.await
.unwrap();
mem.write(Path::new("README.md"), b"# Project\nTODO: write docs")
.await
.unwrap();
vfs.mount("/", mem);
ExecContext::new(Arc::new(vfs))
}
#[tokio::test]
async fn test_grep_recursive() {
let mut ctx = make_recursive_ctx().await;
let mut args = ToolArgs::new();
args.positional.push(Value::String("TODO".into()));
args.positional.push(Value::String("/".into()));
args.flags.insert("r".to_string());
let result = Grep.execute(args, &mut ctx).await;
assert!(result.ok());
assert!(result.text_out().contains("TODO"));
assert!(result.text_out().contains("main.rs"));
assert!(result.text_out().contains("lib.rs"));
assert!(result.text_out().contains("README.md"));
}
#[tokio::test]
async fn test_grep_recursive_with_line_numbers() {
let mut ctx = make_recursive_ctx().await;
let mut args = ToolArgs::new();
args.positional.push(Value::String("TODO".into()));
args.positional.push(Value::String("/src".into()));
args.flags.insert("r".to_string());
args.flags.insert("n".to_string());
let result = Grep.execute(args, &mut ctx).await;
assert!(result.ok());
assert!(result.text_out().contains(":"));
}
#[tokio::test]
async fn test_grep_recursive_include() {
let mut ctx = make_recursive_ctx().await;
let mut args = ToolArgs::new();
args.positional.push(Value::String("TODO".into()));
args.positional.push(Value::String("/".into()));
args.flags.insert("r".to_string());
args.named
.insert("include".to_string(), Value::String("*.rs".into()));
let result = Grep.execute(args, &mut ctx).await;
assert!(result.ok());
assert!(result.text_out().contains("main.rs") || result.text_out().contains("lib.rs"));
assert!(!result.text_out().contains("README.md"));
}
#[tokio::test]
async fn test_grep_recursive_files_only() {
let mut ctx = make_recursive_ctx().await;
let mut args = ToolArgs::new();
args.positional.push(Value::String("TODO".into()));
args.positional.push(Value::String("/".into()));
args.flags.insert("r".to_string());
args.flags.insert("l".to_string());
let result = Grep.execute(args, &mut ctx).await;
assert!(result.ok());
let text = result.text_out();
let lines: Vec<&str> = text.lines().collect();
assert!(lines.len() >= 2); for line in &lines {
assert!(!line.contains("TODO"), "Output should only contain filenames");
}
}
#[tokio::test]
async fn test_grep_recursive_uppercase_r() {
let mut ctx = make_recursive_ctx().await;
let mut args = ToolArgs::new();
args.positional.push(Value::String("TODO".into()));
args.positional.push(Value::String("/src".into()));
args.flags.insert("R".to_string());
let result = Grep.execute(args, &mut ctx).await;
assert!(result.ok());
assert!(result.text_out().contains("TODO"));
assert!(result.text_out().contains("main.rs") || result.text_out().contains("lib.rs"));
}
async fn make_ctx_with(files: &[(&str, &[u8])]) -> ExecContext {
let mut vfs = VfsRouter::new();
let mem = MemoryFs::new();
for (path, content) in files {
mem.write(Path::new(path), content).await.unwrap();
}
vfs.mount("/", mem);
ExecContext::new(Arc::new(vfs))
}
#[tokio::test]
async fn test_grep_fixed_strings_literal_dot() {
let mut ctx = make_ctx_with(&[(
"ips.txt",
b"192.168.1.1\n1X168Y1Z1\n10.0.0.1\n",
)])
.await;
let mut args = ToolArgs::new();
args.positional.push(Value::String("192.168.1.1".into()));
args.positional.push(Value::String("/ips.txt".into()));
args.flags.insert("F".to_string());
let result = Grep.execute(args, &mut ctx).await;
assert!(result.ok(), "grep -F should succeed: {}", result.err);
let out = result.text_out();
assert!(out.contains("192.168.1.1"), "literal match missing: {}", out);
assert!(!out.contains("1X168Y1Z1"), "regex metachar leaked through -F: {}", out);
}
#[tokio::test]
async fn test_grep_fixed_strings_with_metachars() {
let mut ctx = make_ctx_with(&[("code.txt", b"foo[bar]\nfoobar\nbaz\n")]).await;
let mut args = ToolArgs::new();
args.positional.push(Value::String("foo[bar]".into()));
args.positional.push(Value::String("/code.txt".into()));
args.flags.insert("fixed-strings".to_string());
let result = Grep.execute(args, &mut ctx).await;
assert!(result.ok(), "grep --fixed_strings should succeed: {}", result.err);
assert!(result.text_out().contains("foo[bar]"));
assert!(!result.text_out().contains("foobar\n"));
}
async fn stream_grep_via_exec(content: &[u8], pattern: &str, line_numbers: bool) -> (String, i32) {
let mem = MemoryFs::new();
mem.write(Path::new("f.txt"), content).await.unwrap();
let mut vfs = VfsRouter::new();
vfs.mount("/", mem);
let mut ctx = ExecContext::new(Arc::new(vfs));
let mut args = ToolArgs::new();
args.positional.push(Value::String(pattern.into()));
args.positional.push(Value::String("/f.txt".into()));
if line_numbers {
args.flags.insert("n".to_string());
}
let result = Grep.execute(args, &mut ctx).await;
(result.text_out().to_string(), result.code as i32)
}
fn reference_render(content: &[u8], pattern: &str, line_numbers: bool, path: Option<&str>) -> RenderResult {
let matcher = RegexMatcherBuilder::new().build(pattern).unwrap();
let opts = GrepOptions {
show_line_numbers: line_numbers,
invert: false,
only_matching: false,
before_context: None,
after_context: None,
show_filename: false,
multiline: false,
encoding: None,
binary_detection: BinaryDetection::quit(b'\x00'),
};
grep_lines_structured(content, &matcher, &opts, path).unwrap()
}
fn scanner_render(
content: &[u8],
pattern: &str,
line_numbers: bool,
path: Option<&str>,
chunk_size: usize,
) -> (RenderResult, bool) {
let regex = regex::RegexBuilder::new(pattern).build().unwrap();
let mut scanner =
GrepLineScanner::new(®ex, false, line_numbers, Some(b'\x00'), path.map(str::to_string));
for chunk in content.chunks(chunk_size.max(1)) {
scanner.push(chunk);
}
scanner.finish();
let invalid = scanner.saw_invalid_utf8;
(scanner.into_render_result(), invalid)
}
fn scanner_grep(content: &[u8], pattern: &str, invert: bool, line_numbers: bool, chunk_size: usize) -> (String, usize, bool) {
let regex = regex::RegexBuilder::new(pattern).build().unwrap();
let mut scanner = GrepLineScanner::new(®ex, invert, line_numbers, Some(b'\x00'), None);
for chunk in content.chunks(chunk_size) {
scanner.push(chunk);
}
scanner.finish();
let invalid = scanner.saw_invalid_utf8;
let render = scanner.into_render_result();
(render.text, render.match_count, invalid)
}
#[test]
fn grep_scanner_matches_whole_buffer_across_every_split() {
let cases: &[(&[u8], &str)] = &[
(b"hello world\nfoo bar\nbaz hello\n", "hello"),
(b"line one\nline two\nline three\n", "line"),
("日本語テスト\nfoo\n日本語match\n".as_bytes(), "日本語"),
(b"foo\nbar\nbaz", "bar"),
(b"alpha\nbeta\ngamma\n", "zzz"),
(b"only line here", "line"),
(b"win foo\r\nwin bar\r\nno match\r\n", "win"),
(b"a\r\nbcd\r", "a"),
(b"", "pattern"),
];
for (input, pattern) in cases {
for line_numbers in [false, true] {
let reference = reference_render(input, pattern, line_numbers, Some("f.txt"));
for chunk_size in 1..=input.len().max(1) {
let (render, invalid) =
scanner_render(input, pattern, line_numbers, Some("f.txt"), chunk_size);
let ctx = format!(
"pattern={pattern:?} ln={line_numbers} chunk={chunk_size} input={:?}",
String::from_utf8_lossy(input)
);
assert!(!invalid, "valid UTF-8 flagged binary — {ctx}");
assert_eq!(render.text, reference.text, "text — {ctx}");
assert_eq!(render.match_count, reference.match_count, "match_count — {ctx}");
assert_eq!(render.rich, reference.rich, "rich JSON — {ctx}");
}
}
}
}
#[test]
fn grep_scanner_rejects_invalid_utf8_as_binary() {
let binary_content: &[u8] = b"valid line\n\xff invalid\nsecond line\n";
let (text, _count, saw_invalid) = scanner_grep(binary_content, "line", false, false, 4);
assert!(saw_invalid, "saw_invalid_utf8 must be set for non-UTF-8 input");
let _ = text; }
#[test]
fn grep_scanner_quit_on_nul_is_not_an_error() {
let content: &[u8] = b"foo line\nbar\x00baz\nsecond foo\n";
let (text, _count, saw_invalid) =
scanner_grep(content, "foo", false, false, 32);
assert!(!saw_invalid, "NUL byte must not set saw_invalid_utf8");
assert!(
!text.contains("second foo"),
"output after NUL must be suppressed: {text:?}",
);
}
#[tokio::test]
async fn grep_streaming_file_parity_with_reference() {
let content = "one line\ntwo line\nthree\n日本語 test\n".as_bytes().to_vec();
let (streamed, code) = stream_grep_via_exec(&content, "line", false).await;
assert_eq!(code, 0, "streaming path must return 0 on match");
assert!(streamed.contains("one line"), "missing 'one line': {streamed:?}");
assert!(streamed.contains("two line"), "missing 'two line': {streamed:?}");
assert!(!streamed.contains("three"), "non-matching line leaked: {streamed:?}");
}
#[tokio::test]
async fn grep_streaming_file_line_numbers() {
let content = b"alpha\nbeta\ngamma\n";
let (out, code) = stream_grep_via_exec(content, "beta", true).await;
assert_eq!(code, 0);
assert!(out.contains("2:beta"), "expected '2:beta' in output: {out:?}");
}
#[tokio::test]
async fn grep_streaming_file_binary_data_error() {
let content: Vec<u8> = b"good line\nbad \xff byte\nmore\n".to_vec();
let mem = MemoryFs::new();
mem.write(Path::new("bad.txt"), &content).await.unwrap();
let mut vfs = VfsRouter::new();
vfs.mount("/", mem);
let mut ctx = ExecContext::new(Arc::new(vfs));
let mut args = ToolArgs::new();
args.positional.push(Value::String("line".into()));
args.positional.push(Value::String("/bad.txt".into()));
let result = Grep.execute(args, &mut ctx).await;
assert_eq!(result.code, 2, "binary data must exit with code 2");
assert!(
result.err.contains("binary data"),
"error must mention 'binary data': {:?}",
result.err,
);
assert!(
result.err.contains("/bad.txt"),
"error must name the file: {:?}",
result.err,
);
}
#[tokio::test]
async fn grep_nul_file_matches_whole_buffer_path() {
let content: &[u8] = b"foo\nbar\x00baz\nfoo again\n";
for (pattern, ln) in [("foo", false), ("bar", false), ("foo", true)] {
let reference = reference_render(content, pattern, ln, Some("/f.txt"));
let (text, code) = stream_grep_via_exec(content, pattern, ln).await;
assert_eq!(text, reference.text, "pattern={pattern} ln={ln}: text");
let expected_code = if reference.match_count == 0 { 1 } else { 0 };
assert_eq!(code, expected_code, "pattern={pattern} ln={ln}: exit code");
}
}
struct RecordingFs {
inner: MemoryFs,
ranges: Arc<std::sync::Mutex<Vec<(Option<u64>, Option<u64>)>>>,
}
#[async_trait::async_trait]
impl crate::vfs::Filesystem for RecordingFs {
async fn read(&self, path: &Path) -> std::io::Result<Vec<u8>> {
self.ranges.lock().unwrap().push((None, None));
self.inner.read(path).await
}
async fn read_range(
&self,
path: &Path,
range: Option<kaish_vfs::ReadRange>,
) -> std::io::Result<Vec<u8>> {
let key = (
range.as_ref().and_then(|r| r.offset),
range.as_ref().and_then(|r| r.limit),
);
self.ranges.lock().unwrap().push(key);
self.inner.read_range(path, range).await
}
async fn write(&self, path: &Path, data: &[u8]) -> std::io::Result<()> {
self.inner.write(path, data).await
}
async fn list(&self, path: &Path) -> std::io::Result<Vec<crate::vfs::DirEntry>> {
self.inner.list(path).await
}
async fn stat(&self, path: &Path) -> std::io::Result<crate::vfs::DirEntry> {
self.inner.stat(path).await
}
async fn mkdir(&self, path: &Path) -> std::io::Result<()> {
self.inner.mkdir(path).await
}
async fn remove(&self, path: &Path) -> std::io::Result<()> {
self.inner.remove(path).await
}
fn read_only(&self) -> bool {
self.inner.read_only()
}
}
#[tokio::test]
async fn grep_streams_file_in_bounded_chunks() {
let ranges = Arc::new(std::sync::Mutex::new(Vec::new()));
let rec = RecordingFs {
inner: MemoryFs::new(),
ranges: ranges.clone(),
};
let mut payload = Vec::new();
for i in 0..40usize {
payload.extend_from_slice(format!("line number {i:04} content here\n").as_bytes());
}
rec.inner.write(Path::new("big.txt"), &payload).await.unwrap();
let mut vfs = VfsRouter::new();
vfs.mount("/", rec);
let ctx = ExecContext::new(Arc::new(vfs));
let regex = regex::Regex::new("line").unwrap();
let mut scanner =
GrepLineScanner::new(®ex, false, false, Some(b'\x00'), Some("/big.txt".to_string()));
ctx.read_file_chunked(Path::new("/big.txt"), 256, |c| {
scanner.push(c);
std::ops::ControlFlow::Continue(())
})
.await
.unwrap();
scanner.finish();
let recs = ranges.lock().unwrap();
assert!(
recs.len() >= 4,
"expected the file to be read in several chunks, got {} reads",
recs.len()
);
assert!(
recs.iter().all(|&(_, limit)| limit == Some(256)),
"every read must be bounded to the chunk size; recorded {recs:?}",
);
drop(recs);
let render = scanner.into_render_result();
assert_eq!(render.match_count, 40, "expected 40 matches");
}
}