1use std::collections::HashMap;
28use std::fs;
29use std::path::{Path, PathBuf};
30
31use regex::{Regex, RegexBuilder};
32use serde::Deserialize;
33
34const REGEX_SIZE_LIMIT: usize = 2 * 1024 * 1024;
37
38const MAX_PATTERNS_PER_FILTER: usize = 256;
41
42const DEFAULT_LINE_MAX: usize = usize::MAX;
46
47const DEFAULT_MAX_LINES: usize = usize::MAX;
49
50#[derive(Debug, Clone)]
52pub struct TomlFilter {
53 pub name: String,
54 pub source: FilterSource,
55 pub matches: Vec<String>,
56 pub description: Option<String>,
57 pub strip: Vec<Regex>,
58 pub line_max: usize,
59 pub max_lines: usize,
60 pub keep: KeepMode,
61 pub shortcircuit_when: Option<Regex>,
62 pub shortcircuit_replacement: Option<String>,
63 pub strip_ansi: bool,
64}
65
66#[derive(Debug, Clone, PartialEq, Eq)]
68pub enum FilterSource {
69 Builtin,
70 User { path: PathBuf },
71 Project { path: PathBuf },
72}
73
74#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
75pub enum KeepMode {
76 Head,
77 #[default]
78 Tail,
79 Middle,
80}
81
82#[derive(Debug, Default, Clone)]
88pub struct FilterRegistry {
89 by_match: HashMap<String, TomlFilter>,
91 all: Vec<TomlFilter>,
94 warnings: Vec<String>,
96}
97
98impl FilterRegistry {
99 pub fn lookup(&self, command: &str) -> Option<&TomlFilter> {
102 let program = program_name(command)?;
103 self.by_match.get(program)
104 }
105
106 pub fn all(&self) -> &[TomlFilter] {
108 &self.all
109 }
110
111 pub fn warnings(&self) -> &[String] {
114 &self.warnings
115 }
116}
117
118pub fn build_registry(
123 builtin_inputs: &[(&'static str, &'static str)],
124 user_dir: Option<&Path>,
125 project_dir: Option<&Path>,
126) -> FilterRegistry {
127 let mut registry = FilterRegistry::default();
128
129 for (name, content) in builtin_inputs {
131 match parse_filter(name, content, FilterSource::Builtin) {
132 Ok(filter) => insert_filter(&mut registry, filter),
133 Err(e) => registry
134 .warnings
135 .push(format!("builtin filter {name}: {e}")),
136 }
137 }
138
139 if let Some(dir) = user_dir {
141 load_dir(dir, &mut registry, |path| FilterSource::User {
142 path: path.to_path_buf(),
143 });
144 }
145
146 if let Some(dir) = project_dir {
150 load_dir(dir, &mut registry, |path| FilterSource::Project {
151 path: path.to_path_buf(),
152 });
153 }
154
155 registry
156}
157
158fn load_dir<F>(dir: &Path, registry: &mut FilterRegistry, source_for: F)
159where
160 F: Fn(&Path) -> FilterSource,
161{
162 let entries = match fs::read_dir(dir) {
163 Ok(entries) => entries,
164 Err(e) => {
165 if e.kind() != std::io::ErrorKind::NotFound {
167 registry
168 .warnings
169 .push(format!("filter dir {}: {e}", dir.display()));
170 }
171 return;
172 }
173 };
174
175 let mut paths: Vec<PathBuf> = entries
176 .filter_map(|res| res.ok())
177 .map(|entry| entry.path())
178 .filter(|path| path.extension().and_then(|s| s.to_str()) == Some("toml"))
179 .collect();
180 paths.sort();
181
182 for path in paths {
183 let content = match fs::read_to_string(&path) {
184 Ok(s) => s,
185 Err(e) => {
186 registry
187 .warnings
188 .push(format!("filter {}: read failed: {e}", path.display()));
189 continue;
190 }
191 };
192 let name = path
193 .file_stem()
194 .and_then(|s| s.to_str())
195 .unwrap_or("<unknown>")
196 .to_string();
197 let source = source_for(&path);
198 match parse_filter(&name, &content, source) {
199 Ok(filter) => insert_filter(registry, filter),
200 Err(e) => registry
201 .warnings
202 .push(format!("filter {}: {e}", path.display())),
203 }
204 }
205}
206
207fn insert_filter(registry: &mut FilterRegistry, filter: TomlFilter) {
208 for keyword in &filter.matches {
212 registry.by_match.insert(keyword.clone(), filter.clone());
213 }
214 registry
217 .all
218 .retain(|existing| !(existing.name == filter.name && existing.source == filter.source));
219 registry.all.push(filter);
220}
221
222#[derive(Debug, Deserialize)]
223struct RawFilter {
224 #[serde(default)]
225 filter: RawFilterMeta,
226 #[serde(default)]
227 strip: Option<RawStrip>,
228 #[serde(default)]
229 truncate: Option<RawTruncate>,
230 #[serde(default)]
231 cap: Option<RawCap>,
232 #[serde(default)]
233 shortcircuit: Option<RawShortcircuit>,
234 #[serde(default)]
235 ansi: Option<RawAnsi>,
236}
237
238#[derive(Debug, Deserialize, Default)]
239struct RawFilterMeta {
240 #[serde(default)]
241 matches: Vec<String>,
242 #[serde(default)]
243 description: Option<String>,
244}
245
246#[derive(Debug, Deserialize, Default)]
247struct RawStrip {
248 #[serde(default)]
249 patterns: Vec<String>,
250}
251
252#[derive(Debug, Deserialize, Default)]
253struct RawTruncate {
254 #[serde(default)]
255 line_max: Option<usize>,
256}
257
258#[derive(Debug, Deserialize, Default)]
259struct RawCap {
260 #[serde(default)]
261 max_lines: Option<usize>,
262 #[serde(default)]
263 keep: Option<String>,
264}
265
266#[derive(Debug, Deserialize, Default)]
267struct RawShortcircuit {
268 #[serde(default)]
269 when: Option<String>,
270 #[serde(default)]
271 replacement: Option<String>,
272}
273
274#[derive(Debug, Deserialize, Default)]
275struct RawAnsi {
276 #[serde(default)]
277 strip: Option<bool>,
278}
279
280pub fn parse_filter(name: &str, content: &str, source: FilterSource) -> Result<TomlFilter, String> {
283 let raw: RawFilter = toml::from_str(content).map_err(|e| format!("invalid TOML: {e}"))?;
284
285 let mut matches = raw.filter.matches;
286 if matches.is_empty() {
287 matches.push(name.to_string());
289 }
290 for keyword in &matches {
291 if keyword.is_empty() || keyword.contains(char::is_whitespace) {
292 return Err(format!("invalid match keyword {keyword:?}"));
293 }
294 }
295
296 let strip_patterns = raw.strip.unwrap_or_default().patterns;
297 if strip_patterns.len() > MAX_PATTERNS_PER_FILTER {
298 return Err(format!(
299 "too many strip patterns ({} > {MAX_PATTERNS_PER_FILTER})",
300 strip_patterns.len()
301 ));
302 }
303 let mut strip = Vec::with_capacity(strip_patterns.len());
304 for pattern in strip_patterns {
305 let regex =
306 build_regex(&pattern, true).map_err(|e| format!("strip pattern {pattern:?}: {e}"))?;
307 strip.push(regex);
308 }
309
310 let line_max = raw
311 .truncate
312 .as_ref()
313 .and_then(|t| t.line_max)
314 .unwrap_or(DEFAULT_LINE_MAX);
315
316 let cap = raw.cap.unwrap_or_default();
317 let max_lines = cap.max_lines.unwrap_or(DEFAULT_MAX_LINES);
318 let keep = match cap.keep.as_deref() {
319 None => KeepMode::default(),
320 Some("head") => KeepMode::Head,
321 Some("tail") => KeepMode::Tail,
322 Some("middle") => KeepMode::Middle,
323 Some(other) => return Err(format!("invalid cap.keep {other:?}")),
324 };
325
326 let shortcircuit = raw.shortcircuit.unwrap_or_default();
327 let (shortcircuit_when, shortcircuit_replacement) =
328 match (shortcircuit.when, shortcircuit.replacement) {
329 (Some(when), Some(replacement)) => {
330 let regex = build_regex(&when, false)
331 .map_err(|e| format!("shortcircuit.when {when:?}: {e}"))?;
332 (Some(regex), Some(replacement))
333 }
334 (Some(_), None) => return Err("shortcircuit.when set but replacement missing".into()),
335 (None, Some(_)) => return Err("shortcircuit.replacement set but when missing".into()),
336 (None, None) => (None, None),
337 };
338
339 let strip_ansi = raw.ansi.and_then(|a| a.strip).unwrap_or(true);
340
341 Ok(TomlFilter {
342 name: name.to_string(),
343 source,
344 matches,
345 description: raw.filter.description,
346 strip,
347 line_max,
348 max_lines,
349 keep,
350 shortcircuit_when,
351 shortcircuit_replacement,
352 strip_ansi,
353 })
354}
355
356fn build_regex(pattern: &str, multiline: bool) -> Result<Regex, String> {
357 RegexBuilder::new(pattern)
358 .size_limit(REGEX_SIZE_LIMIT)
359 .multi_line(multiline)
360 .build()
361 .map_err(|e| e.to_string())
362}
363
364pub fn apply_filter(filter: &TomlFilter, output: &str) -> String {
373 let stripped_ansi = if filter.strip_ansi {
374 crate::compress::generic::strip_ansi(output)
375 } else {
376 output.to_string()
377 };
378
379 let kept: Vec<&str> = stripped_ansi
381 .lines()
382 .filter(|line| !filter.strip.iter().any(|re| re.is_match(line)))
383 .collect();
384 let after_strip = kept.join("\n");
385
386 if let (Some(when), Some(replacement)) =
388 (&filter.shortcircuit_when, &filter.shortcircuit_replacement)
389 {
390 if when.is_match(&after_strip) {
391 return replacement.clone();
392 }
393 }
394
395 let truncated: Vec<String> = if filter.line_max == usize::MAX {
397 kept.iter().map(|s| (*s).to_string()).collect()
398 } else {
399 kept.iter()
400 .map(|line| truncate_line(line, filter.line_max))
401 .collect()
402 };
403
404 cap_lines(&truncated, filter.max_lines, filter.keep)
406}
407
408fn truncate_line(line: &str, line_max: usize) -> String {
409 if line.chars().count() <= line_max {
410 return line.to_string();
411 }
412 let keep_each_side = line_max.saturating_sub(3) / 2;
414 let head: String = line.chars().take(keep_each_side).collect();
415 let tail: String = line
416 .chars()
417 .rev()
418 .take(keep_each_side)
419 .collect::<Vec<_>>()
420 .into_iter()
421 .rev()
422 .collect();
423 format!("{head}…{tail}")
424}
425
426fn cap_lines(lines: &[String], max_lines: usize, keep: KeepMode) -> String {
427 if lines.len() <= max_lines || max_lines == usize::MAX {
428 return lines.join("\n");
429 }
430
431 let omitted = lines.len() - max_lines;
432 let marker = format!("… ({omitted} more lines)");
433
434 match keep {
435 KeepMode::Head => {
436 let mut out: Vec<String> = lines.iter().take(max_lines).cloned().collect();
437 out.push(marker);
438 out.join("\n")
439 }
440 KeepMode::Tail => {
441 let mut out = vec![marker];
442 out.extend(lines.iter().skip(omitted).cloned());
443 out.join("\n")
444 }
445 KeepMode::Middle => {
446 let head_count = max_lines / 2;
447 let tail_count = max_lines - head_count;
448 let mut out: Vec<String> = lines.iter().take(head_count).cloned().collect();
449 out.push(marker);
450 out.extend(lines.iter().skip(lines.len() - tail_count).cloned());
451 out.join("\n")
452 }
453 }
454}
455
456pub fn program_name(command: &str) -> Option<&str> {
466 for token in command.split_whitespace() {
467 if is_env_assignment(token) {
469 continue;
470 }
471 return Some(basename(token));
473 }
474 None
475}
476
477fn is_env_assignment(token: &str) -> bool {
478 let Some(eq) = token.find('=') else {
479 return false;
480 };
481 let key = &token[..eq];
482 !key.is_empty() && key.chars().all(|c| c.is_ascii_alphanumeric() || c == '_')
483}
484
485fn basename(token: &str) -> &str {
486 let last_unix = token.rfind('/');
488 let last_win = token.rfind('\\');
489 let split_at = match (last_unix, last_win) {
490 (Some(u), Some(w)) => u.max(w),
491 (Some(u), None) => u,
492 (None, Some(w)) => w,
493 (None, None) => return token,
494 };
495 &token[split_at + 1..]
496}
497
498#[cfg(test)]
499mod tests {
500 use super::*;
501
502 fn parse(content: &str) -> TomlFilter {
503 parse_filter("test", content, FilterSource::Builtin).expect("parse")
504 }
505
506 #[test]
507 fn parses_minimal_filter() {
508 let filter = parse(
509 r#"
510[filter]
511matches = ["make"]
512"#,
513 );
514 assert_eq!(filter.matches, vec!["make"]);
515 assert_eq!(filter.line_max, usize::MAX);
516 assert_eq!(filter.max_lines, usize::MAX);
517 assert!(filter.strip.is_empty());
518 assert!(filter.shortcircuit_when.is_none());
519 assert!(filter.strip_ansi);
520 }
521
522 #[test]
523 fn filename_default_match() {
524 let filter = parse_filter("ls", "", FilterSource::Builtin).expect("parse");
526 assert_eq!(filter.matches, vec!["ls"]);
527 }
528
529 #[test]
530 fn rejects_invalid_match_keyword() {
531 let err = parse_filter(
532 "bad",
533 r#"[filter]
534matches = ["has whitespace"]
535"#,
536 FilterSource::Builtin,
537 )
538 .unwrap_err();
539 assert!(err.contains("invalid match keyword"), "got: {err}");
540 }
541
542 #[test]
543 fn rejects_bad_strip_regex() {
544 let err = parse_filter(
545 "bad",
546 r#"
547[filter]
548matches = ["bad"]
549
550[strip]
551patterns = ["[unclosed"]
552"#,
553 FilterSource::Builtin,
554 )
555 .unwrap_err();
556 assert!(err.contains("strip pattern"), "got: {err}");
557 }
558
559 #[test]
560 fn strip_drops_matching_lines() {
561 let filter = parse(
562 r#"
563[filter]
564matches = ["x"]
565
566[strip]
567patterns = ['^Entering directory', '^Leaving directory']
568"#,
569 );
570 let input = "Entering directory `/tmp`\ngcc -c foo.c\nLeaving directory `/tmp`";
571 let out = apply_filter(&filter, input);
572 assert_eq!(out, "gcc -c foo.c");
573 }
574
575 #[test]
576 fn shortcircuit_replaces_empty_after_strip() {
577 let filter = parse(
578 r#"
579[filter]
580matches = ["x"]
581
582[strip]
583patterns = ['^make\[\d+\]:.*']
584
585[shortcircuit]
586when = '\A\z'
587replacement = "make: ok"
588"#,
589 );
590 let input = "make[1]: Entering directory `/tmp`\nmake[1]: Leaving directory `/tmp`";
591 let out = apply_filter(&filter, input);
592 assert_eq!(out, "make: ok");
593 }
594
595 #[test]
596 fn shortcircuit_line_anchors_do_not_match_inner_blank_lines() {
597 let filter = parse(
598 r#"
599[filter]
600matches = ["x"]
601
602[shortcircuit]
603when = '^\s*$'
604replacement = "ok"
605"#,
606 );
607 let out = apply_filter(&filter, "error\n\nhint");
608 assert_eq!(out, "error\n\nhint");
609 }
610
611 #[test]
612 fn cap_tail_keeps_last_n_lines() {
613 let filter = parse(
614 r#"
615[filter]
616matches = ["x"]
617
618[cap]
619max_lines = 3
620keep = "tail"
621"#,
622 );
623 let input = "1\n2\n3\n4\n5";
624 let out = apply_filter(&filter, input);
625 assert_eq!(out, "… (2 more lines)\n3\n4\n5");
626 }
627
628 #[test]
629 fn cap_head_keeps_first_n_lines() {
630 let filter = parse(
631 r#"
632[filter]
633matches = ["x"]
634
635[cap]
636max_lines = 2
637keep = "head"
638"#,
639 );
640 let input = "1\n2\n3\n4";
641 let out = apply_filter(&filter, input);
642 assert_eq!(out, "1\n2\n… (2 more lines)");
643 }
644
645 #[test]
646 fn cap_middle_keeps_head_and_tail() {
647 let filter = parse(
648 r#"
649[filter]
650matches = ["x"]
651
652[cap]
653max_lines = 4
654keep = "middle"
655"#,
656 );
657 let input = "1\n2\n3\n4\n5\n6\n7\n8";
658 let out = apply_filter(&filter, input);
659 assert_eq!(out, "1\n2\n… (4 more lines)\n7\n8");
661 }
662
663 #[test]
664 fn truncate_per_line() {
665 let filter = parse(
666 r#"
667[filter]
668matches = ["x"]
669
670[truncate]
671line_max = 10
672"#,
673 );
674 let input = "shortline\nthis is a very long line indeed";
675 let out = apply_filter(&filter, input);
676 assert!(out.contains("shortline"));
677 assert!(out.contains("…"));
678 assert!(out.lines().any(|l| l.chars().count() <= 10));
679 }
680
681 #[test]
682 fn ansi_strip_default_true() {
683 let filter = parse(
684 r#"
685[filter]
686matches = ["x"]
687"#,
688 );
689 let input = "\x1b[31mred\x1b[0m text";
690 let out = apply_filter(&filter, input);
691 assert_eq!(out, "red text");
692 }
693
694 #[test]
695 fn ansi_strip_can_be_disabled() {
696 let filter = parse(
697 r#"
698[filter]
699matches = ["x"]
700
701[ansi]
702strip = false
703"#,
704 );
705 let input = "\x1b[31mred\x1b[0m text";
706 let out = apply_filter(&filter, input);
707 assert_eq!(out, input);
708 }
709
710 #[test]
711 fn shortcircuit_runs_on_after_strip_body() {
712 let filter = parse(
714 r#"
715[filter]
716matches = ["x"]
717
718[strip]
719patterns = ['^.*$']
720
721[shortcircuit]
722when = '^$'
723replacement = "ok"
724"#,
725 );
726 assert_eq!(apply_filter(&filter, "anything\nat all"), "ok");
727 }
728
729 #[test]
730 fn program_name_handles_env_and_paths() {
731 assert_eq!(program_name("make build"), Some("make"));
732 assert_eq!(program_name("FOO=1 BAR=2 make build"), Some("make"));
733 assert_eq!(program_name("/usr/bin/cargo build"), Some("cargo"));
734 assert_eq!(program_name("./node_modules/.bin/eslint ."), Some("eslint"));
735 assert_eq!(program_name("FOO=bar /opt/x/y subcmd"), Some("y"));
737 assert_eq!(program_name(""), None);
738 assert_eq!(program_name(" "), None);
739 }
740
741 #[test]
742 fn program_name_unquoted_windows_path() {
743 assert_eq!(
750 program_name(r"C:\Program Files\Git\bin\git.exe status"),
751 Some("Program")
752 );
753 }
754
755 #[test]
756 fn program_name_does_not_skip_non_assignment_token_with_equals() {
757 assert_eq!(program_name("=oops echo hi"), Some("=oops"));
759 }
760
761 #[test]
762 fn registry_lookup_by_program_name() {
763 let registry = build_registry(
764 &[(
765 "make",
766 r#"[filter]
767matches = ["make"]
768
769[strip]
770patterns = ['^Entering']
771"#,
772 )],
773 None,
774 None,
775 );
776 let f = registry.lookup("make build foo").unwrap();
777 assert_eq!(f.matches, vec!["make"]);
778 assert!(matches!(f.source, FilterSource::Builtin));
779 }
780
781 #[test]
782 fn registry_user_overrides_builtin() {
783 let tmp = tempfile::tempdir().unwrap();
784 let user_path = tmp.path().join("make.toml");
785 fs::write(
786 &user_path,
787 r#"[filter]
788matches = ["make"]
789description = "user override"
790"#,
791 )
792 .unwrap();
793
794 let registry = build_registry(
795 &[(
796 "make",
797 r#"[filter]
798matches = ["make"]
799description = "builtin"
800"#,
801 )],
802 Some(tmp.path()),
803 None,
804 );
805 let f = registry.lookup("make build").unwrap();
806 assert_eq!(f.description.as_deref(), Some("user override"));
807 assert!(matches!(f.source, FilterSource::User { .. }));
808 }
809
810 #[test]
811 fn registry_project_overrides_user() {
812 let user_dir = tempfile::tempdir().unwrap();
813 let project_dir = tempfile::tempdir().unwrap();
814 fs::write(
815 user_dir.path().join("make.toml"),
816 r#"[filter]
817matches = ["make"]
818description = "user"
819"#,
820 )
821 .unwrap();
822 fs::write(
823 project_dir.path().join("make.toml"),
824 r#"[filter]
825matches = ["make"]
826description = "project"
827"#,
828 )
829 .unwrap();
830
831 let registry = build_registry(&[], Some(user_dir.path()), Some(project_dir.path()));
832 let f = registry.lookup("make").unwrap();
833 assert_eq!(f.description.as_deref(), Some("project"));
834 assert!(matches!(f.source, FilterSource::Project { .. }));
835 }
836
837 #[test]
838 fn bad_filter_files_warn_not_panic() {
839 let tmp = tempfile::tempdir().unwrap();
840 fs::write(
841 tmp.path().join("good.toml"),
842 r#"[filter]
843matches = ["good"]
844"#,
845 )
846 .unwrap();
847 fs::write(tmp.path().join("bad.toml"), "not valid = toml = at all =").unwrap();
848
849 let registry = build_registry(&[], Some(tmp.path()), None);
850 assert!(registry.lookup("good").is_some());
851 assert!(registry.lookup("bad").is_none());
852 assert!(
853 registry.warnings().iter().any(|w| w.contains("bad.toml")),
854 "warnings: {:?}",
855 registry.warnings()
856 );
857 }
858
859 #[test]
860 fn missing_dir_does_not_warn() {
861 let registry = build_registry(&[], Some(Path::new("/nonexistent/path/12345")), None);
862 assert!(registry.warnings().is_empty());
863 }
864}