Skip to main content

rgx/filter/
mod.rs

1//! `rgx filter` subcommand — live/non-interactive regex filter over stdin or a file.
2
3use std::io::{self, BufRead, BufReader, IsTerminal, Read, Write};
4use std::path::Path;
5
6use crate::config::cli::FilterArgs;
7use crate::engine::{self, EngineFlags, EngineKind};
8
9pub mod app;
10pub mod json_path;
11pub mod run;
12pub mod ui;
13pub use app::{FilterApp, Outcome};
14
15#[derive(Debug, Clone, Copy, Default)]
16pub struct FilterOptions {
17    pub invert: bool,
18    pub case_insensitive: bool,
19}
20
21impl FilterOptions {
22    fn flags(&self) -> EngineFlags {
23        EngineFlags {
24            case_insensitive: self.case_insensitive,
25            ..EngineFlags::default()
26        }
27    }
28}
29
30/// Apply the pattern to each line. Returns the 0-indexed line numbers of every
31/// line whose match status (matches vs. invert) satisfies `options.invert`.
32///
33/// Returns `Err` if the pattern fails to compile. An empty pattern is treated
34/// as "match everything" (every line passes) so the TUI has a sensible default
35/// before the user types.
36pub fn filter_lines(
37    lines: &[String],
38    pattern: &str,
39    options: FilterOptions,
40) -> Result<Vec<usize>, String> {
41    if pattern.is_empty() {
42        // Empty pattern — every line passes iff not inverted.
43        return Ok(if options.invert {
44            Vec::new()
45        } else {
46            (0..lines.len()).collect()
47        });
48    }
49
50    let engine = engine::create_engine(EngineKind::RustRegex);
51    let compiled = engine
52        .compile(pattern, &options.flags())
53        .map_err(|e| e.to_string())?;
54
55    let mut indices = Vec::with_capacity(lines.len());
56    for (idx, line) in lines.iter().enumerate() {
57        let matched = compiled
58            .find_matches(line)
59            .map(|v| !v.is_empty())
60            .unwrap_or(false);
61        if matched != options.invert {
62            indices.push(idx);
63        }
64    }
65    Ok(indices)
66}
67
68/// Apply the pattern to the extracted string for each line. Lines whose
69/// `extracted[i]` is `None` are excluded from the match set regardless of
70/// whether the pattern is empty or `invert` is set — a missing/non-string
71/// field is not a "line" for matching purposes.
72///
73/// Returns the 0-indexed line numbers of the raw input that should be emitted
74/// (i.e. whose extracted value satisfies the pattern + invert flag).
75pub fn filter_lines_with_extracted(
76    extracted: &[Option<String>],
77    pattern: &str,
78    options: FilterOptions,
79) -> Result<Vec<usize>, String> {
80    if pattern.is_empty() {
81        // Empty pattern: every line with a present extracted value passes
82        // (iff not inverted). A None extracted value is excluded either way.
83        return Ok(extracted
84            .iter()
85            .enumerate()
86            .filter_map(|(idx, v)| {
87                if v.is_some() && !options.invert {
88                    Some(idx)
89                } else {
90                    None
91                }
92            })
93            .collect());
94    }
95
96    let engine = engine::create_engine(EngineKind::RustRegex);
97    let compiled = engine
98        .compile(pattern, &options.flags())
99        .map_err(|e| e.to_string())?;
100
101    let mut indices = Vec::with_capacity(extracted.len());
102    for (idx, slot) in extracted.iter().enumerate() {
103        let Some(s) = slot else {
104            // Missing field or parse failure — never emit.
105            continue;
106        };
107        let matched = compiled
108            .find_matches(s)
109            .map(|v| !v.is_empty())
110            .unwrap_or(false);
111        if matched != options.invert {
112            indices.push(idx);
113        }
114    }
115    Ok(indices)
116}
117
118/// Returns per-line extracted strings. `None` means the line should be excluded
119/// from matching (JSON parse failure, path miss, or non-string value). The
120/// returned vector has the same length as `lines`, so callers can index it
121/// directly alongside the raw lines.
122pub fn extract_strings(lines: &[String], path_expr: &str) -> Result<Vec<Option<String>>, String> {
123    let path = json_path::parse_path(path_expr)?;
124    let mut out = Vec::with_capacity(lines.len());
125    for line in lines {
126        let extracted = match serde_json::from_str::<serde_json::Value>(line) {
127            Ok(v) => json_path::extract(&v, &path).and_then(|v| v.as_str().map(str::to_string)),
128            Err(_) => None,
129        };
130        out.push(extracted);
131    }
132    Ok(out)
133}
134
135/// Exit codes, matching grep conventions.
136pub const EXIT_MATCH: i32 = 0;
137pub const EXIT_NO_MATCH: i32 = 1;
138pub const EXIT_ERROR: i32 = 2;
139
140/// Emit matching lines to `writer`. If `line_number` is true, each line is
141/// prefixed with its 1-indexed line number and a colon.
142pub fn emit_matches(
143    writer: &mut dyn Write,
144    lines: &[String],
145    matched: &[usize],
146    line_number: bool,
147) -> io::Result<()> {
148    for &idx in matched {
149        if line_number {
150            writeln!(writer, "{}:{}", idx + 1, lines[idx])?;
151        } else {
152            writeln!(writer, "{}", lines[idx])?;
153        }
154    }
155    Ok(())
156}
157
158/// Emit only the count of matched lines.
159pub fn emit_count(writer: &mut dyn Write, matched_count: usize) -> io::Result<()> {
160    writeln!(writer, "{matched_count}")
161}
162
163/// Read all lines from either a file path or the provided reader (typically stdin).
164/// Trailing `\n`/`\r\n` is stripped per line. A trailing empty line (from a
165/// terminating newline) is dropped.
166///
167/// Invalid UTF-8 bytes are replaced with `U+FFFD REPLACEMENT CHARACTER` rather
168/// than aborting the read — this matches `grep`'s behavior and keeps the filter
169/// usable against binary-ish logs (e.g. files with stray latin-1 bytes).
170///
171/// `max_lines` caps the number of lines read to prevent OOM on unbounded
172/// streams. Pass `0` to disable the cap. Returns `(lines, truncated)` where
173/// `truncated` is `true` if the cap was reached before end-of-input.
174pub fn read_input(
175    file: Option<&Path>,
176    fallback: impl Read,
177    max_lines: usize,
178) -> io::Result<(Vec<String>, bool)> {
179    let mut reader: Box<dyn BufRead> = match file {
180        Some(path) => Box::new(BufReader::new(std::fs::File::open(path)?)),
181        None => Box::new(BufReader::new(fallback)),
182    };
183    let mut out = Vec::new();
184    let mut buf = Vec::new();
185    let mut truncated = false;
186    loop {
187        if max_lines != 0 && out.len() >= max_lines {
188            // Peek: is there any more data after the cap? Only then do we
189            // flag truncation, so callers don't warn about files that just
190            // happen to have exactly `max_lines` lines.
191            buf.clear();
192            let n = reader.read_until(b'\n', &mut buf)?;
193            if n > 0 {
194                truncated = true;
195            }
196            break;
197        }
198        buf.clear();
199        let n = reader.read_until(b'\n', &mut buf)?;
200        if n == 0 {
201            break;
202        }
203        // Strip trailing \n and optional \r.
204        let end = buf
205            .iter()
206            .rposition(|b| *b != b'\n' && *b != b'\r')
207            .map(|i| i + 1)
208            .unwrap_or(0);
209        out.push(String::from_utf8_lossy(&buf[..end]).into_owned());
210    }
211    Ok((out, truncated))
212}
213
214/// CLI entry point for `rgx filter`. Reads input, decides between non-interactive
215/// and TUI modes, and returns an exit code.
216pub fn entry(args: FilterArgs) -> i32 {
217    match run_entry(args) {
218        Ok(code) => code,
219        Err(msg) => {
220            eprintln!("rgx filter: {msg}");
221            EXIT_ERROR
222        }
223    }
224}
225
226fn run_entry(args: FilterArgs) -> Result<i32, String> {
227    let (lines, truncated) = read_input(args.file.as_deref(), io::stdin(), args.max_lines)
228        .map_err(|e| format!("reading input: {e}"))?;
229    if truncated {
230        eprintln!(
231            "rgx filter: input truncated at {} lines (use --max-lines to override)",
232            args.max_lines
233        );
234    }
235
236    let options = FilterOptions {
237        invert: args.invert,
238        case_insensitive: args.case_insensitive,
239    };
240
241    // Non-interactive paths: --count, --line-number, or a pattern was given and
242    // stdout is not a TTY (so we're being piped).
243    let has_pattern = args.pattern.as_deref().is_some_and(|p| !p.is_empty());
244    let stdout_is_tty = io::stdout().is_terminal();
245    let non_interactive = args.count || args.line_number || (has_pattern && !stdout_is_tty);
246
247    // If --json was given, resolve the per-line extracted strings up front.
248    // We do this before splitting non-interactive vs. TUI so both paths
249    // see the same view of the input.
250    let json_extracted = if let Some(path_expr) = args.json.as_deref() {
251        Some(extract_strings(&lines, path_expr).map_err(|e| format!("--json: {e}"))?)
252    } else {
253        None
254    };
255
256    if non_interactive {
257        let pattern = args.pattern.unwrap_or_default();
258        let matched = match &json_extracted {
259            Some(extracted) => filter_lines_with_extracted(extracted, &pattern, options)
260                .map_err(|e| format!("pattern: {e}"))?,
261            None => filter_lines(&lines, &pattern, options).map_err(|e| format!("pattern: {e}"))?,
262        };
263
264        let mut stdout = io::stdout().lock();
265        if args.count {
266            emit_count(&mut stdout, matched.len()).map_err(|e| format!("writing output: {e}"))?;
267        } else {
268            // Emit the raw lines regardless of --json — users still get the
269            // full JSON records back, not just the extracted fields.
270            emit_matches(&mut stdout, &lines, &matched, args.line_number)
271                .map_err(|e| format!("writing output: {e}"))?;
272        }
273        return Ok(if matched.is_empty() {
274            EXIT_NO_MATCH
275        } else {
276            EXIT_MATCH
277        });
278    }
279
280    // TUI mode.
281    let initial_pattern = args.pattern.unwrap_or_default();
282    let app = match json_extracted {
283        Some(extracted) => {
284            FilterApp::with_json_extracted(lines, extracted, &initial_pattern, options)
285        }
286        None => FilterApp::new(lines, &initial_pattern, options),
287    };
288    let (final_app, outcome) = run::run_tui(app).map_err(|e| format!("tui: {e}"))?;
289
290    match outcome {
291        Outcome::Emit => {
292            let mut stdout = io::stdout().lock();
293            emit_matches(&mut stdout, &final_app.lines, &final_app.matched, false)
294                .map_err(|e| format!("writing output: {e}"))?;
295            Ok(if final_app.matched.is_empty() {
296                EXIT_NO_MATCH
297            } else {
298                EXIT_MATCH
299            })
300        }
301        Outcome::Discard => Ok(EXIT_NO_MATCH),
302        Outcome::Pending => Ok(EXIT_ERROR),
303    }
304}