1use std::collections::HashMap;
28use std::fs;
29use std::path::{Path, PathBuf};
30
31use regex::{Regex, RegexBuilder};
32use serde::Deserialize;
33
34const REGEX_SIZE_LIMIT: usize = 2 * 1024 * 1024;
37
38const MAX_PATTERNS_PER_FILTER: usize = 256;
41
42const DEFAULT_LINE_MAX: usize = usize::MAX;
46
47const DEFAULT_MAX_LINES: usize = usize::MAX;
49
50#[derive(Debug, Clone)]
52pub struct TomlFilter {
53 pub name: String,
54 pub source: FilterSource,
55 pub matches: Vec<String>,
56 pub description: Option<String>,
57 pub strip: Vec<Regex>,
58 pub line_max: usize,
59 pub max_lines: usize,
60 pub keep: KeepMode,
61 pub shortcircuit_when: Option<Regex>,
62 pub shortcircuit_replacement: Option<String>,
63 pub strip_ansi: bool,
64}
65
66#[derive(Debug, Clone, PartialEq, Eq)]
68pub enum FilterSource {
69 Builtin,
70 User { path: PathBuf },
71 Project { path: PathBuf },
72}
73
74#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
75pub enum KeepMode {
76 Head,
77 #[default]
78 Tail,
79 Middle,
80}
81
82#[derive(Debug, Default, Clone)]
88pub struct FilterRegistry {
89 by_match: HashMap<String, TomlFilter>,
91 all: Vec<TomlFilter>,
94 warnings: Vec<String>,
96}
97
98impl FilterRegistry {
99 pub fn lookup(&self, command: &str) -> Option<&TomlFilter> {
102 let program = program_name(command)?;
103 self.by_match.get(program)
104 }
105
106 pub fn all(&self) -> &[TomlFilter] {
108 &self.all
109 }
110
111 pub fn warnings(&self) -> &[String] {
114 &self.warnings
115 }
116}
117
118pub fn build_registry(
123 builtin_inputs: &[(&'static str, &'static str)],
124 user_dir: Option<&Path>,
125 project_dir: Option<&Path>,
126) -> FilterRegistry {
127 let mut registry = FilterRegistry::default();
128
129 for (name, content) in builtin_inputs {
131 match parse_filter(name, content, FilterSource::Builtin) {
132 Ok(filter) => insert_filter(&mut registry, filter),
133 Err(e) => registry
134 .warnings
135 .push(format!("builtin filter {name}: {e}")),
136 }
137 }
138
139 if let Some(dir) = user_dir {
141 load_dir(dir, &mut registry, |path| FilterSource::User {
142 path: path.to_path_buf(),
143 });
144 }
145
146 if let Some(dir) = project_dir {
150 load_dir(dir, &mut registry, |path| FilterSource::Project {
151 path: path.to_path_buf(),
152 });
153 }
154
155 registry
156}
157
158fn load_dir<F>(dir: &Path, registry: &mut FilterRegistry, source_for: F)
159where
160 F: Fn(&Path) -> FilterSource,
161{
162 let entries = match fs::read_dir(dir) {
163 Ok(entries) => entries,
164 Err(e) => {
165 if e.kind() != std::io::ErrorKind::NotFound {
167 registry
168 .warnings
169 .push(format!("filter dir {}: {e}", dir.display()));
170 }
171 return;
172 }
173 };
174
175 let mut paths: Vec<PathBuf> = entries
176 .filter_map(|res| res.ok())
177 .map(|entry| entry.path())
178 .filter(|path| path.extension().and_then(|s| s.to_str()) == Some("toml"))
179 .collect();
180 paths.sort();
181
182 for path in paths {
183 let content = match fs::read_to_string(&path) {
184 Ok(s) => s,
185 Err(e) => {
186 registry
187 .warnings
188 .push(format!("filter {}: read failed: {e}", path.display()));
189 continue;
190 }
191 };
192 let name = path
193 .file_stem()
194 .and_then(|s| s.to_str())
195 .unwrap_or("<unknown>")
196 .to_string();
197 let source = source_for(&path);
198 match parse_filter(&name, &content, source) {
199 Ok(filter) => insert_filter(registry, filter),
200 Err(e) => registry
201 .warnings
202 .push(format!("filter {}: {e}", path.display())),
203 }
204 }
205}
206
207fn insert_filter(registry: &mut FilterRegistry, filter: TomlFilter) {
208 for keyword in &filter.matches {
212 registry.by_match.insert(keyword.clone(), filter.clone());
213 }
214 registry
217 .all
218 .retain(|existing| !(existing.name == filter.name && existing.source == filter.source));
219 registry.all.push(filter);
220}
221
222#[derive(Debug, Deserialize)]
223struct RawFilter {
224 #[serde(default)]
225 filter: RawFilterMeta,
226 #[serde(default)]
227 strip: Option<RawStrip>,
228 #[serde(default)]
229 truncate: Option<RawTruncate>,
230 #[serde(default)]
231 cap: Option<RawCap>,
232 #[serde(default)]
233 shortcircuit: Option<RawShortcircuit>,
234 #[serde(default)]
235 ansi: Option<RawAnsi>,
236}
237
238#[derive(Debug, Deserialize, Default)]
239struct RawFilterMeta {
240 #[serde(default)]
241 matches: Vec<String>,
242 #[serde(default)]
243 description: Option<String>,
244}
245
246#[derive(Debug, Deserialize, Default)]
247struct RawStrip {
248 #[serde(default)]
249 patterns: Vec<String>,
250}
251
252#[derive(Debug, Deserialize, Default)]
253struct RawTruncate {
254 #[serde(default)]
255 line_max: Option<usize>,
256}
257
258#[derive(Debug, Deserialize, Default)]
259struct RawCap {
260 #[serde(default)]
261 max_lines: Option<usize>,
262 #[serde(default)]
263 keep: Option<String>,
264}
265
266#[derive(Debug, Deserialize, Default)]
267struct RawShortcircuit {
268 #[serde(default)]
269 when: Option<String>,
270 #[serde(default)]
271 replacement: Option<String>,
272}
273
274#[derive(Debug, Deserialize, Default)]
275struct RawAnsi {
276 #[serde(default)]
277 strip: Option<bool>,
278}
279
280pub fn parse_filter(name: &str, content: &str, source: FilterSource) -> Result<TomlFilter, String> {
283 let raw: RawFilter = toml::from_str(content).map_err(|e| format!("invalid TOML: {e}"))?;
284
285 let mut matches = raw.filter.matches;
286 if matches.is_empty() {
287 matches.push(name.to_string());
289 }
290 for keyword in &matches {
291 if keyword.is_empty() || keyword.contains(char::is_whitespace) {
292 return Err(format!("invalid match keyword {keyword:?}"));
293 }
294 }
295
296 let strip_patterns = raw.strip.unwrap_or_default().patterns;
297 if strip_patterns.len() > MAX_PATTERNS_PER_FILTER {
298 return Err(format!(
299 "too many strip patterns ({} > {MAX_PATTERNS_PER_FILTER})",
300 strip_patterns.len()
301 ));
302 }
303 let mut strip = Vec::with_capacity(strip_patterns.len());
304 for pattern in strip_patterns {
305 let regex =
306 build_regex(&pattern, true).map_err(|e| format!("strip pattern {pattern:?}: {e}"))?;
307 strip.push(regex);
308 }
309
310 let line_max = raw
311 .truncate
312 .as_ref()
313 .and_then(|t| t.line_max)
314 .unwrap_or(DEFAULT_LINE_MAX);
315
316 let cap = raw.cap.unwrap_or_default();
317 let max_lines = cap.max_lines.unwrap_or(DEFAULT_MAX_LINES);
318 let keep = match cap.keep.as_deref() {
319 None => KeepMode::default(),
320 Some("head") => KeepMode::Head,
321 Some("tail") => KeepMode::Tail,
322 Some("middle") => KeepMode::Middle,
323 Some(other) => return Err(format!("invalid cap.keep {other:?}")),
324 };
325
326 let shortcircuit = raw.shortcircuit.unwrap_or_default();
327 let (shortcircuit_when, shortcircuit_replacement) =
328 match (shortcircuit.when, shortcircuit.replacement) {
329 (Some(when), Some(replacement)) => {
330 let regex = build_regex(&when, false)
331 .map_err(|e| format!("shortcircuit.when {when:?}: {e}"))?;
332 (Some(regex), Some(replacement))
333 }
334 (Some(_), None) => return Err("shortcircuit.when set but replacement missing".into()),
335 (None, Some(_)) => return Err("shortcircuit.replacement set but when missing".into()),
336 (None, None) => (None, None),
337 };
338
339 let strip_ansi = raw.ansi.and_then(|a| a.strip).unwrap_or(true);
340
341 Ok(TomlFilter {
342 name: name.to_string(),
343 source,
344 matches,
345 description: raw.filter.description,
346 strip,
347 line_max,
348 max_lines,
349 keep,
350 shortcircuit_when,
351 shortcircuit_replacement,
352 strip_ansi,
353 })
354}
355
356fn build_regex(pattern: &str, multiline: bool) -> Result<Regex, String> {
357 RegexBuilder::new(pattern)
358 .size_limit(REGEX_SIZE_LIMIT)
359 .multi_line(multiline)
360 .build()
361 .map_err(|e| e.to_string())
362}
363
364pub fn apply_filter(filter: &TomlFilter, output: &str) -> String {
373 let stripped_ansi = if filter.strip_ansi {
374 crate::compress::generic::strip_ansi(output)
375 } else {
376 output.to_string()
377 };
378
379 let kept: Vec<&str> = stripped_ansi
381 .lines()
382 .filter(|line| !filter.strip.iter().any(|re| re.is_match(line)))
383 .collect();
384 let after_strip = kept.join("\n");
385
386 if let (Some(when), Some(replacement)) =
388 (&filter.shortcircuit_when, &filter.shortcircuit_replacement)
389 {
390 if when.is_match(&after_strip) {
391 return replacement.clone();
392 }
393 }
394
395 let truncated: Vec<String> = if filter.line_max == usize::MAX {
397 kept.iter().map(|s| (*s).to_string()).collect()
398 } else {
399 kept.iter()
400 .map(|line| truncate_line(line, filter.line_max))
401 .collect()
402 };
403
404 cap_lines(&truncated, filter.max_lines, filter.keep)
406}
407
408fn truncate_line(line: &str, line_max: usize) -> String {
409 if line.chars().count() <= line_max {
410 return line.to_string();
411 }
412 let keep_each_side = line_max.saturating_sub(3) / 2;
414 let head: String = line.chars().take(keep_each_side).collect();
415 let tail: String = line
416 .chars()
417 .rev()
418 .take(keep_each_side)
419 .collect::<Vec<_>>()
420 .into_iter()
421 .rev()
422 .collect();
423 format!("{head}…{tail}")
424}
425
426fn cap_lines(lines: &[String], max_lines: usize, keep: KeepMode) -> String {
427 if lines.len() <= max_lines || max_lines == usize::MAX {
428 return lines.join("\n");
429 }
430
431 if max_lines == 0 {
432 return String::new();
433 }
434
435 let content_slots = max_lines - 1;
436
437 match keep {
438 KeepMode::Head => {
439 let omitted = lines.len() - content_slots;
440 let marker = format!("… ({omitted} more lines)");
441 let mut out: Vec<String> = lines.iter().take(content_slots).cloned().collect();
442 out.push(marker);
443 out.join("\n")
444 }
445 KeepMode::Tail => {
446 let omitted = lines.len() - content_slots;
447 let marker = format!("… ({omitted} more lines)");
448 let mut out = vec![marker];
449 out.extend(lines.iter().skip(omitted).cloned());
450 out.join("\n")
451 }
452 KeepMode::Middle => {
453 let head_count = content_slots / 2;
454 let tail_count = content_slots - head_count;
455 let omitted = lines.len() - head_count - tail_count;
456 let marker = format!("… ({omitted} more lines)");
457 let mut out: Vec<String> = lines.iter().take(head_count).cloned().collect();
458 out.push(marker);
459 out.extend(lines.iter().skip(lines.len() - tail_count).cloned());
460 out.join("\n")
461 }
462 }
463}
464
465pub fn program_name(command: &str) -> Option<&str> {
475 for token in command.split_whitespace() {
476 if is_env_assignment(token) {
478 continue;
479 }
480 return Some(basename(token));
482 }
483 None
484}
485
486fn is_env_assignment(token: &str) -> bool {
487 let Some(eq) = token.find('=') else {
488 return false;
489 };
490 let key = &token[..eq];
491 !key.is_empty() && key.chars().all(|c| c.is_ascii_alphanumeric() || c == '_')
492}
493
494fn basename(token: &str) -> &str {
495 let last_unix = token.rfind('/');
497 let last_win = token.rfind('\\');
498 let split_at = match (last_unix, last_win) {
499 (Some(u), Some(w)) => u.max(w),
500 (Some(u), None) => u,
501 (None, Some(w)) => w,
502 (None, None) => return token,
503 };
504 &token[split_at + 1..]
505}
506
507#[cfg(test)]
508mod tests {
509 use super::*;
510
511 fn parse(content: &str) -> TomlFilter {
512 parse_filter("test", content, FilterSource::Builtin).expect("parse")
513 }
514
515 #[test]
516 fn parses_minimal_filter() {
517 let filter = parse(
518 r#"
519[filter]
520matches = ["make"]
521"#,
522 );
523 assert_eq!(filter.matches, vec!["make"]);
524 assert_eq!(filter.line_max, usize::MAX);
525 assert_eq!(filter.max_lines, usize::MAX);
526 assert!(filter.strip.is_empty());
527 assert!(filter.shortcircuit_when.is_none());
528 assert!(filter.strip_ansi);
529 }
530
531 #[test]
532 fn filename_default_match() {
533 let filter = parse_filter("ls", "", FilterSource::Builtin).expect("parse");
535 assert_eq!(filter.matches, vec!["ls"]);
536 }
537
538 #[test]
539 fn rejects_invalid_match_keyword() {
540 let err = parse_filter(
541 "bad",
542 r#"[filter]
543matches = ["has whitespace"]
544"#,
545 FilterSource::Builtin,
546 )
547 .unwrap_err();
548 assert!(err.contains("invalid match keyword"), "got: {err}");
549 }
550
551 #[test]
552 fn rejects_bad_strip_regex() {
553 let err = parse_filter(
554 "bad",
555 r#"
556[filter]
557matches = ["bad"]
558
559[strip]
560patterns = ["[unclosed"]
561"#,
562 FilterSource::Builtin,
563 )
564 .unwrap_err();
565 assert!(err.contains("strip pattern"), "got: {err}");
566 }
567
568 #[test]
569 fn strip_drops_matching_lines() {
570 let filter = parse(
571 r#"
572[filter]
573matches = ["x"]
574
575[strip]
576patterns = ['^Entering directory', '^Leaving directory']
577"#,
578 );
579 let input = "Entering directory `/tmp`\ngcc -c foo.c\nLeaving directory `/tmp`";
580 let out = apply_filter(&filter, input);
581 assert_eq!(out, "gcc -c foo.c");
582 }
583
584 #[test]
585 fn shortcircuit_replaces_empty_after_strip() {
586 let filter = parse(
587 r#"
588[filter]
589matches = ["x"]
590
591[strip]
592patterns = ['^make\[\d+\]:.*']
593
594[shortcircuit]
595when = '\A\z'
596replacement = "make: ok"
597"#,
598 );
599 let input = "make[1]: Entering directory `/tmp`\nmake[1]: Leaving directory `/tmp`";
600 let out = apply_filter(&filter, input);
601 assert_eq!(out, "make: ok");
602 }
603
604 #[test]
605 fn shortcircuit_line_anchors_do_not_match_inner_blank_lines() {
606 let filter = parse(
607 r#"
608[filter]
609matches = ["x"]
610
611[shortcircuit]
612when = '^\s*$'
613replacement = "ok"
614"#,
615 );
616 let out = apply_filter(&filter, "error\n\nhint");
617 assert_eq!(out, "error\n\nhint");
618 }
619
620 #[test]
621 fn cap_tail_keeps_last_n_lines() {
622 let filter = parse(
623 r#"
624[filter]
625matches = ["x"]
626
627[cap]
628max_lines = 3
629keep = "tail"
630"#,
631 );
632 let input = "1\n2\n3\n4\n5";
633 let out = apply_filter(&filter, input);
634 assert_eq!(out, "… (3 more lines)\n4\n5");
635 assert_eq!(out.lines().count(), 3);
636 }
637
638 #[test]
639 fn cap_head_keeps_first_n_lines() {
640 let filter = parse(
641 r#"
642[filter]
643matches = ["x"]
644
645[cap]
646max_lines = 2
647keep = "head"
648"#,
649 );
650 let input = "1\n2\n3\n4";
651 let out = apply_filter(&filter, input);
652 assert_eq!(out, "1\n… (3 more lines)");
653 assert_eq!(out.lines().count(), 2);
654 }
655
656 #[test]
657 fn cap_middle_keeps_head_and_tail() {
658 let filter = parse(
659 r#"
660[filter]
661matches = ["x"]
662
663[cap]
664max_lines = 4
665keep = "middle"
666"#,
667 );
668 let input = "1\n2\n3\n4\n5\n6\n7\n8";
669 let out = apply_filter(&filter, input);
670 assert_eq!(out, "1\n… (5 more lines)\n7\n8");
672 assert_eq!(out.lines().count(), 4);
673 }
674
675 #[test]
676 fn cap_zero_keeps_no_lines() {
677 let filter = parse(
678 r#"
679[filter]
680matches = ["x"]
681
682[cap]
683max_lines = 0
684keep = "head"
685"#,
686 );
687 let out = apply_filter(&filter, "1\n2\n3");
688 assert_eq!(out, "");
689 }
690
691 #[test]
692 fn cap_one_keeps_only_marker_when_truncated() {
693 let filter = parse(
694 r#"
695[filter]
696matches = ["x"]
697
698[cap]
699max_lines = 1
700keep = "tail"
701"#,
702 );
703 let out = apply_filter(&filter, "1\n2\n3");
704 assert_eq!(out, "… (3 more lines)");
705 assert_eq!(out.lines().count(), 1);
706 }
707
708 #[test]
709 fn cap_two_reserves_one_line_for_marker() {
710 let filter = parse(
711 r#"
712[filter]
713matches = ["x"]
714
715[cap]
716max_lines = 2
717keep = "tail"
718"#,
719 );
720 let out = apply_filter(&filter, "1\n2\n3\n4");
721 assert_eq!(out, "… (3 more lines)\n4");
722 assert_eq!(out.lines().count(), 2);
723 }
724
725 #[test]
726 fn truncate_per_line() {
727 let filter = parse(
728 r#"
729[filter]
730matches = ["x"]
731
732[truncate]
733line_max = 10
734"#,
735 );
736 let input = "shortline\nthis is a very long line indeed";
737 let out = apply_filter(&filter, input);
738 assert!(out.contains("shortline"));
739 assert!(out.contains("…"));
740 assert!(out.lines().any(|l| l.chars().count() <= 10));
741 }
742
743 #[test]
744 fn ansi_strip_default_true() {
745 let filter = parse(
746 r#"
747[filter]
748matches = ["x"]
749"#,
750 );
751 let input = "\x1b[31mred\x1b[0m text";
752 let out = apply_filter(&filter, input);
753 assert_eq!(out, "red text");
754 }
755
756 #[test]
757 fn ansi_strip_can_be_disabled() {
758 let filter = parse(
759 r#"
760[filter]
761matches = ["x"]
762
763[ansi]
764strip = false
765"#,
766 );
767 let input = "\x1b[31mred\x1b[0m text";
768 let out = apply_filter(&filter, input);
769 assert_eq!(out, input);
770 }
771
772 #[test]
773 fn shortcircuit_runs_on_after_strip_body() {
774 let filter = parse(
776 r#"
777[filter]
778matches = ["x"]
779
780[strip]
781patterns = ['^.*$']
782
783[shortcircuit]
784when = '^$'
785replacement = "ok"
786"#,
787 );
788 assert_eq!(apply_filter(&filter, "anything\nat all"), "ok");
789 }
790
791 #[test]
792 fn program_name_handles_env_and_paths() {
793 assert_eq!(program_name("make build"), Some("make"));
794 assert_eq!(program_name("FOO=1 BAR=2 make build"), Some("make"));
795 assert_eq!(program_name("/usr/bin/cargo build"), Some("cargo"));
796 assert_eq!(program_name("./node_modules/.bin/eslint ."), Some("eslint"));
797 assert_eq!(program_name("FOO=bar /opt/x/y subcmd"), Some("y"));
799 assert_eq!(program_name(""), None);
800 assert_eq!(program_name(" "), None);
801 }
802
803 #[test]
804 fn program_name_unquoted_windows_path() {
805 assert_eq!(
812 program_name(r"C:\Program Files\Git\bin\git.exe status"),
813 Some("Program")
814 );
815 }
816
817 #[test]
818 fn program_name_does_not_skip_non_assignment_token_with_equals() {
819 assert_eq!(program_name("=oops echo hi"), Some("=oops"));
821 }
822
823 #[test]
824 fn registry_lookup_by_program_name() {
825 let registry = build_registry(
826 &[(
827 "make",
828 r#"[filter]
829matches = ["make"]
830
831[strip]
832patterns = ['^Entering']
833"#,
834 )],
835 None,
836 None,
837 );
838 let f = registry.lookup("make build foo").unwrap();
839 assert_eq!(f.matches, vec!["make"]);
840 assert!(matches!(f.source, FilterSource::Builtin));
841 }
842
843 #[test]
844 fn registry_user_overrides_builtin() {
845 let tmp = tempfile::tempdir().unwrap();
846 let user_path = tmp.path().join("make.toml");
847 fs::write(
848 &user_path,
849 r#"[filter]
850matches = ["make"]
851description = "user override"
852"#,
853 )
854 .unwrap();
855
856 let registry = build_registry(
857 &[(
858 "make",
859 r#"[filter]
860matches = ["make"]
861description = "builtin"
862"#,
863 )],
864 Some(tmp.path()),
865 None,
866 );
867 let f = registry.lookup("make build").unwrap();
868 assert_eq!(f.description.as_deref(), Some("user override"));
869 assert!(matches!(f.source, FilterSource::User { .. }));
870 }
871
872 #[test]
873 fn registry_project_overrides_user() {
874 let user_dir = tempfile::tempdir().unwrap();
875 let project_dir = tempfile::tempdir().unwrap();
876 fs::write(
877 user_dir.path().join("make.toml"),
878 r#"[filter]
879matches = ["make"]
880description = "user"
881"#,
882 )
883 .unwrap();
884 fs::write(
885 project_dir.path().join("make.toml"),
886 r#"[filter]
887matches = ["make"]
888description = "project"
889"#,
890 )
891 .unwrap();
892
893 let registry = build_registry(&[], Some(user_dir.path()), Some(project_dir.path()));
894 let f = registry.lookup("make").unwrap();
895 assert_eq!(f.description.as_deref(), Some("project"));
896 assert!(matches!(f.source, FilterSource::Project { .. }));
897 }
898
899 #[test]
900 fn bad_filter_files_warn_not_panic() {
901 let tmp = tempfile::tempdir().unwrap();
902 fs::write(
903 tmp.path().join("good.toml"),
904 r#"[filter]
905matches = ["good"]
906"#,
907 )
908 .unwrap();
909 fs::write(tmp.path().join("bad.toml"), "not valid = toml = at all =").unwrap();
910
911 let registry = build_registry(&[], Some(tmp.path()), None);
912 assert!(registry.lookup("good").is_some());
913 assert!(registry.lookup("bad").is_none());
914 assert!(
915 registry.warnings().iter().any(|w| w.contains("bad.toml")),
916 "warnings: {:?}",
917 registry.warnings()
918 );
919 }
920
921 #[test]
922 fn missing_dir_does_not_warn() {
923 let registry = build_registry(&[], Some(Path::new("/nonexistent/path/12345")), None);
924 assert!(registry.warnings().is_empty());
925 }
926}