1use std::collections::HashMap;
26use std::fs;
27use std::path::{Path, PathBuf};
28
29use regex::{Regex, RegexBuilder};
30use serde::Deserialize;
31
32const REGEX_SIZE_LIMIT: usize = 2 * 1024 * 1024;
35
36const MAX_PATTERNS_PER_FILTER: usize = 256;
39
40const DEFAULT_LINE_MAX: usize = usize::MAX;
44
45const DEFAULT_MAX_LINES: usize = usize::MAX;
47
48#[derive(Debug, Clone)]
50pub struct TomlFilter {
51 pub name: String,
52 pub source: FilterSource,
53 pub matches: Vec<String>,
54 pub description: Option<String>,
55 pub strip: Vec<Regex>,
56 pub line_max: usize,
57 pub max_lines: usize,
58 pub keep: KeepMode,
59 pub shortcircuit_when: Option<Regex>,
60 pub shortcircuit_replacement: Option<String>,
61 pub strip_ansi: bool,
62}
63
64#[derive(Debug, Clone, PartialEq, Eq)]
66pub enum FilterSource {
67 Builtin,
68 User { path: PathBuf },
69 Project { path: PathBuf },
70}
71
72#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
73pub enum KeepMode {
74 Head,
75 #[default]
76 Tail,
77 Middle,
78}
79
80#[derive(Debug, Default, Clone)]
86pub struct FilterRegistry {
87 by_match: HashMap<String, TomlFilter>,
89 all: Vec<TomlFilter>,
92 warnings: Vec<String>,
94}
95
96impl FilterRegistry {
97 pub fn lookup(&self, command: &str) -> Option<&TomlFilter> {
100 let program = program_name(command)?;
101 self.by_match.get(program)
102 }
103
104 pub fn all(&self) -> &[TomlFilter] {
106 &self.all
107 }
108
109 pub fn warnings(&self) -> &[String] {
112 &self.warnings
113 }
114}
115
116pub fn build_registry(
121 builtin_inputs: &[(&'static str, &'static str)],
122 user_dir: Option<&Path>,
123 project_dir: Option<&Path>,
124) -> FilterRegistry {
125 let mut registry = FilterRegistry::default();
126
127 for (name, content) in builtin_inputs {
129 match parse_filter(name, content, FilterSource::Builtin) {
130 Ok(filter) => insert_filter(&mut registry, filter),
131 Err(e) => registry
132 .warnings
133 .push(format!("builtin filter {name}: {e}")),
134 }
135 }
136
137 if let Some(dir) = user_dir {
139 load_dir(dir, &mut registry, |path| FilterSource::User {
140 path: path.to_path_buf(),
141 });
142 }
143
144 if let Some(dir) = project_dir {
148 load_dir(dir, &mut registry, |path| FilterSource::Project {
149 path: path.to_path_buf(),
150 });
151 }
152
153 registry
154}
155
156fn load_dir<F>(dir: &Path, registry: &mut FilterRegistry, source_for: F)
157where
158 F: Fn(&Path) -> FilterSource,
159{
160 let entries = match fs::read_dir(dir) {
161 Ok(entries) => entries,
162 Err(e) => {
163 if e.kind() != std::io::ErrorKind::NotFound {
165 registry
166 .warnings
167 .push(format!("filter dir {}: {e}", dir.display()));
168 }
169 return;
170 }
171 };
172
173 let mut paths: Vec<PathBuf> = entries
174 .filter_map(|res| res.ok())
175 .map(|entry| entry.path())
176 .filter(|path| path.extension().and_then(|s| s.to_str()) == Some("toml"))
177 .collect();
178 paths.sort();
179
180 for path in paths {
181 let content = match fs::read_to_string(&path) {
182 Ok(s) => s,
183 Err(e) => {
184 registry
185 .warnings
186 .push(format!("filter {}: read failed: {e}", path.display()));
187 continue;
188 }
189 };
190 let name = path
191 .file_stem()
192 .and_then(|s| s.to_str())
193 .unwrap_or("<unknown>")
194 .to_string();
195 let source = source_for(&path);
196 match parse_filter(&name, &content, source) {
197 Ok(filter) => insert_filter(registry, filter),
198 Err(e) => registry
199 .warnings
200 .push(format!("filter {}: {e}", path.display())),
201 }
202 }
203}
204
205fn insert_filter(registry: &mut FilterRegistry, filter: TomlFilter) {
206 for keyword in &filter.matches {
210 registry.by_match.insert(keyword.clone(), filter.clone());
211 }
212 registry
215 .all
216 .retain(|existing| !(existing.name == filter.name && existing.source == filter.source));
217 registry.all.push(filter);
218}
219
220#[derive(Debug, Deserialize)]
221struct RawFilter {
222 #[serde(default)]
223 filter: RawFilterMeta,
224 #[serde(default)]
225 strip: Option<RawStrip>,
226 #[serde(default)]
227 truncate: Option<RawTruncate>,
228 #[serde(default)]
229 cap: Option<RawCap>,
230 #[serde(default)]
231 shortcircuit: Option<RawShortcircuit>,
232 #[serde(default)]
233 ansi: Option<RawAnsi>,
234}
235
236#[derive(Debug, Deserialize, Default)]
237struct RawFilterMeta {
238 #[serde(default)]
239 matches: Vec<String>,
240 #[serde(default)]
241 description: Option<String>,
242}
243
244#[derive(Debug, Deserialize, Default)]
245struct RawStrip {
246 #[serde(default)]
247 patterns: Vec<String>,
248}
249
250#[derive(Debug, Deserialize, Default)]
251struct RawTruncate {
252 #[serde(default)]
253 line_max: Option<usize>,
254}
255
256#[derive(Debug, Deserialize, Default)]
257struct RawCap {
258 #[serde(default)]
259 max_lines: Option<usize>,
260 #[serde(default)]
261 keep: Option<String>,
262}
263
264#[derive(Debug, Deserialize, Default)]
265struct RawShortcircuit {
266 #[serde(default)]
267 when: Option<String>,
268 #[serde(default)]
269 replacement: Option<String>,
270}
271
272#[derive(Debug, Deserialize, Default)]
273struct RawAnsi {
274 #[serde(default)]
275 strip: Option<bool>,
276}
277
278pub fn parse_filter(name: &str, content: &str, source: FilterSource) -> Result<TomlFilter, String> {
281 let raw: RawFilter = toml::from_str(content).map_err(|e| format!("invalid TOML: {e}"))?;
282
283 let mut matches = raw.filter.matches;
284 if matches.is_empty() {
285 matches.push(name.to_string());
287 }
288 for keyword in &matches {
289 if keyword.is_empty() || keyword.contains(char::is_whitespace) {
290 return Err(format!("invalid match keyword {keyword:?}"));
291 }
292 }
293
294 let strip_patterns = raw.strip.unwrap_or_default().patterns;
295 if strip_patterns.len() > MAX_PATTERNS_PER_FILTER {
296 return Err(format!(
297 "too many strip patterns ({} > {MAX_PATTERNS_PER_FILTER})",
298 strip_patterns.len()
299 ));
300 }
301 let mut strip = Vec::with_capacity(strip_patterns.len());
302 for pattern in strip_patterns {
303 let regex = build_regex(&pattern).map_err(|e| format!("strip pattern {pattern:?}: {e}"))?;
304 strip.push(regex);
305 }
306
307 let line_max = raw
308 .truncate
309 .as_ref()
310 .and_then(|t| t.line_max)
311 .unwrap_or(DEFAULT_LINE_MAX);
312
313 let cap = raw.cap.unwrap_or_default();
314 let max_lines = cap.max_lines.unwrap_or(DEFAULT_MAX_LINES);
315 let keep = match cap.keep.as_deref() {
316 None => KeepMode::default(),
317 Some("head") => KeepMode::Head,
318 Some("tail") => KeepMode::Tail,
319 Some("middle") => KeepMode::Middle,
320 Some(other) => return Err(format!("invalid cap.keep {other:?}")),
321 };
322
323 let shortcircuit = raw.shortcircuit.unwrap_or_default();
324 let (shortcircuit_when, shortcircuit_replacement) =
325 match (shortcircuit.when, shortcircuit.replacement) {
326 (Some(when), Some(replacement)) => {
327 let regex =
328 build_regex(&when).map_err(|e| format!("shortcircuit.when {when:?}: {e}"))?;
329 (Some(regex), Some(replacement))
330 }
331 (Some(_), None) => return Err("shortcircuit.when set but replacement missing".into()),
332 (None, Some(_)) => return Err("shortcircuit.replacement set but when missing".into()),
333 (None, None) => (None, None),
334 };
335
336 let strip_ansi = raw.ansi.and_then(|a| a.strip).unwrap_or(true);
337
338 Ok(TomlFilter {
339 name: name.to_string(),
340 source,
341 matches,
342 description: raw.filter.description,
343 strip,
344 line_max,
345 max_lines,
346 keep,
347 shortcircuit_when,
348 shortcircuit_replacement,
349 strip_ansi,
350 })
351}
352
353fn build_regex(pattern: &str) -> Result<Regex, String> {
354 RegexBuilder::new(pattern)
355 .size_limit(REGEX_SIZE_LIMIT)
356 .multi_line(true)
357 .build()
358 .map_err(|e| e.to_string())
359}
360
361pub fn apply_filter(filter: &TomlFilter, output: &str) -> String {
370 let stripped_ansi = if filter.strip_ansi {
371 crate::compress::generic::strip_ansi(output)
372 } else {
373 output.to_string()
374 };
375
376 let kept: Vec<&str> = stripped_ansi
378 .lines()
379 .filter(|line| !filter.strip.iter().any(|re| re.is_match(line)))
380 .collect();
381 let after_strip = kept.join("\n");
382
383 if let (Some(when), Some(replacement)) =
385 (&filter.shortcircuit_when, &filter.shortcircuit_replacement)
386 {
387 if when.is_match(&after_strip) {
388 return replacement.clone();
389 }
390 }
391
392 let truncated: Vec<String> = if filter.line_max == usize::MAX {
394 kept.iter().map(|s| (*s).to_string()).collect()
395 } else {
396 kept.iter()
397 .map(|line| truncate_line(line, filter.line_max))
398 .collect()
399 };
400
401 cap_lines(&truncated, filter.max_lines, filter.keep)
403}
404
405fn truncate_line(line: &str, line_max: usize) -> String {
406 if line.chars().count() <= line_max {
407 return line.to_string();
408 }
409 let keep_each_side = line_max.saturating_sub(3) / 2;
411 let head: String = line.chars().take(keep_each_side).collect();
412 let tail: String = line
413 .chars()
414 .rev()
415 .take(keep_each_side)
416 .collect::<Vec<_>>()
417 .into_iter()
418 .rev()
419 .collect();
420 format!("{head}…{tail}")
421}
422
423fn cap_lines(lines: &[String], max_lines: usize, keep: KeepMode) -> String {
424 if lines.len() <= max_lines || max_lines == usize::MAX {
425 return lines.join("\n");
426 }
427
428 let omitted = lines.len() - max_lines;
429 let marker = format!("… ({omitted} more lines)");
430
431 match keep {
432 KeepMode::Head => {
433 let mut out: Vec<String> = lines.iter().take(max_lines).cloned().collect();
434 out.push(marker);
435 out.join("\n")
436 }
437 KeepMode::Tail => {
438 let mut out = vec![marker];
439 out.extend(lines.iter().skip(omitted).cloned());
440 out.join("\n")
441 }
442 KeepMode::Middle => {
443 let head_count = max_lines / 2;
444 let tail_count = max_lines - head_count;
445 let mut out: Vec<String> = lines.iter().take(head_count).cloned().collect();
446 out.push(marker);
447 out.extend(lines.iter().skip(lines.len() - tail_count).cloned());
448 out.join("\n")
449 }
450 }
451}
452
453pub fn program_name(command: &str) -> Option<&str> {
463 for token in command.split_whitespace() {
464 if is_env_assignment(token) {
466 continue;
467 }
468 return Some(basename(token));
470 }
471 None
472}
473
474fn is_env_assignment(token: &str) -> bool {
475 let Some(eq) = token.find('=') else {
476 return false;
477 };
478 let key = &token[..eq];
479 !key.is_empty() && key.chars().all(|c| c.is_ascii_alphanumeric() || c == '_')
480}
481
482fn basename(token: &str) -> &str {
483 let last_unix = token.rfind('/');
485 let last_win = token.rfind('\\');
486 let split_at = match (last_unix, last_win) {
487 (Some(u), Some(w)) => u.max(w),
488 (Some(u), None) => u,
489 (None, Some(w)) => w,
490 (None, None) => return token,
491 };
492 &token[split_at + 1..]
493}
494
495#[cfg(test)]
496mod tests {
497 use super::*;
498
499 fn parse(content: &str) -> TomlFilter {
500 parse_filter("test", content, FilterSource::Builtin).expect("parse")
501 }
502
503 #[test]
504 fn parses_minimal_filter() {
505 let filter = parse(
506 r#"
507[filter]
508matches = ["make"]
509"#,
510 );
511 assert_eq!(filter.matches, vec!["make"]);
512 assert_eq!(filter.line_max, usize::MAX);
513 assert_eq!(filter.max_lines, usize::MAX);
514 assert!(filter.strip.is_empty());
515 assert!(filter.shortcircuit_when.is_none());
516 assert!(filter.strip_ansi);
517 }
518
519 #[test]
520 fn filename_default_match() {
521 let filter = parse_filter("ls", "", FilterSource::Builtin).expect("parse");
523 assert_eq!(filter.matches, vec!["ls"]);
524 }
525
526 #[test]
527 fn rejects_invalid_match_keyword() {
528 let err = parse_filter(
529 "bad",
530 r#"[filter]
531matches = ["has whitespace"]
532"#,
533 FilterSource::Builtin,
534 )
535 .unwrap_err();
536 assert!(err.contains("invalid match keyword"), "got: {err}");
537 }
538
539 #[test]
540 fn rejects_bad_strip_regex() {
541 let err = parse_filter(
542 "bad",
543 r#"
544[filter]
545matches = ["bad"]
546
547[strip]
548patterns = ["[unclosed"]
549"#,
550 FilterSource::Builtin,
551 )
552 .unwrap_err();
553 assert!(err.contains("strip pattern"), "got: {err}");
554 }
555
556 #[test]
557 fn strip_drops_matching_lines() {
558 let filter = parse(
559 r#"
560[filter]
561matches = ["x"]
562
563[strip]
564patterns = ['^Entering directory', '^Leaving directory']
565"#,
566 );
567 let input = "Entering directory `/tmp`\ngcc -c foo.c\nLeaving directory `/tmp`";
568 let out = apply_filter(&filter, input);
569 assert_eq!(out, "gcc -c foo.c");
570 }
571
572 #[test]
573 fn shortcircuit_replaces_empty_after_strip() {
574 let filter = parse(
575 r#"
576[filter]
577matches = ["x"]
578
579[strip]
580patterns = ['^make\[\d+\]:.*']
581
582[shortcircuit]
583when = '^$'
584replacement = "make: ok"
585"#,
586 );
587 let input = "make[1]: Entering directory `/tmp`\nmake[1]: Leaving directory `/tmp`";
588 let out = apply_filter(&filter, input);
589 assert_eq!(out, "make: ok");
590 }
591
592 #[test]
593 fn cap_tail_keeps_last_n_lines() {
594 let filter = parse(
595 r#"
596[filter]
597matches = ["x"]
598
599[cap]
600max_lines = 3
601keep = "tail"
602"#,
603 );
604 let input = "1\n2\n3\n4\n5";
605 let out = apply_filter(&filter, input);
606 assert_eq!(out, "… (2 more lines)\n3\n4\n5");
607 }
608
609 #[test]
610 fn cap_head_keeps_first_n_lines() {
611 let filter = parse(
612 r#"
613[filter]
614matches = ["x"]
615
616[cap]
617max_lines = 2
618keep = "head"
619"#,
620 );
621 let input = "1\n2\n3\n4";
622 let out = apply_filter(&filter, input);
623 assert_eq!(out, "1\n2\n… (2 more lines)");
624 }
625
626 #[test]
627 fn cap_middle_keeps_head_and_tail() {
628 let filter = parse(
629 r#"
630[filter]
631matches = ["x"]
632
633[cap]
634max_lines = 4
635keep = "middle"
636"#,
637 );
638 let input = "1\n2\n3\n4\n5\n6\n7\n8";
639 let out = apply_filter(&filter, input);
640 assert_eq!(out, "1\n2\n… (4 more lines)\n7\n8");
642 }
643
644 #[test]
645 fn truncate_per_line() {
646 let filter = parse(
647 r#"
648[filter]
649matches = ["x"]
650
651[truncate]
652line_max = 10
653"#,
654 );
655 let input = "shortline\nthis is a very long line indeed";
656 let out = apply_filter(&filter, input);
657 assert!(out.contains("shortline"));
658 assert!(out.contains("…"));
659 assert!(out.lines().any(|l| l.chars().count() <= 10));
660 }
661
662 #[test]
663 fn ansi_strip_default_true() {
664 let filter = parse(
665 r#"
666[filter]
667matches = ["x"]
668"#,
669 );
670 let input = "\x1b[31mred\x1b[0m text";
671 let out = apply_filter(&filter, input);
672 assert_eq!(out, "red text");
673 }
674
675 #[test]
676 fn ansi_strip_can_be_disabled() {
677 let filter = parse(
678 r#"
679[filter]
680matches = ["x"]
681
682[ansi]
683strip = false
684"#,
685 );
686 let input = "\x1b[31mred\x1b[0m text";
687 let out = apply_filter(&filter, input);
688 assert_eq!(out, input);
689 }
690
691 #[test]
692 fn shortcircuit_runs_on_after_strip_body() {
693 let filter = parse(
695 r#"
696[filter]
697matches = ["x"]
698
699[strip]
700patterns = ['^.*$']
701
702[shortcircuit]
703when = '^$'
704replacement = "ok"
705"#,
706 );
707 assert_eq!(apply_filter(&filter, "anything\nat all"), "ok");
708 }
709
710 #[test]
711 fn program_name_handles_env_and_paths() {
712 assert_eq!(program_name("make build"), Some("make"));
713 assert_eq!(program_name("FOO=1 BAR=2 make build"), Some("make"));
714 assert_eq!(program_name("/usr/bin/cargo build"), Some("cargo"));
715 assert_eq!(program_name("./node_modules/.bin/eslint ."), Some("eslint"));
716 assert_eq!(program_name("FOO=bar /opt/x/y subcmd"), Some("y"));
718 assert_eq!(program_name(""), None);
719 assert_eq!(program_name(" "), None);
720 }
721
722 #[test]
723 fn program_name_unquoted_windows_path() {
724 assert_eq!(
731 program_name(r"C:\Program Files\Git\bin\git.exe status"),
732 Some("Program")
733 );
734 }
735
736 #[test]
737 fn program_name_does_not_skip_non_assignment_token_with_equals() {
738 assert_eq!(program_name("=oops echo hi"), Some("=oops"));
740 }
741
742 #[test]
743 fn registry_lookup_by_program_name() {
744 let registry = build_registry(
745 &[(
746 "make",
747 r#"[filter]
748matches = ["make"]
749
750[strip]
751patterns = ['^Entering']
752"#,
753 )],
754 None,
755 None,
756 );
757 let f = registry.lookup("make build foo").unwrap();
758 assert_eq!(f.matches, vec!["make"]);
759 assert!(matches!(f.source, FilterSource::Builtin));
760 }
761
762 #[test]
763 fn registry_user_overrides_builtin() {
764 let tmp = tempfile::tempdir().unwrap();
765 let user_path = tmp.path().join("make.toml");
766 fs::write(
767 &user_path,
768 r#"[filter]
769matches = ["make"]
770description = "user override"
771"#,
772 )
773 .unwrap();
774
775 let registry = build_registry(
776 &[(
777 "make",
778 r#"[filter]
779matches = ["make"]
780description = "builtin"
781"#,
782 )],
783 Some(tmp.path()),
784 None,
785 );
786 let f = registry.lookup("make build").unwrap();
787 assert_eq!(f.description.as_deref(), Some("user override"));
788 assert!(matches!(f.source, FilterSource::User { .. }));
789 }
790
791 #[test]
792 fn registry_project_overrides_user() {
793 let user_dir = tempfile::tempdir().unwrap();
794 let project_dir = tempfile::tempdir().unwrap();
795 fs::write(
796 user_dir.path().join("make.toml"),
797 r#"[filter]
798matches = ["make"]
799description = "user"
800"#,
801 )
802 .unwrap();
803 fs::write(
804 project_dir.path().join("make.toml"),
805 r#"[filter]
806matches = ["make"]
807description = "project"
808"#,
809 )
810 .unwrap();
811
812 let registry = build_registry(&[], Some(user_dir.path()), Some(project_dir.path()));
813 let f = registry.lookup("make").unwrap();
814 assert_eq!(f.description.as_deref(), Some("project"));
815 assert!(matches!(f.source, FilterSource::Project { .. }));
816 }
817
818 #[test]
819 fn bad_filter_files_warn_not_panic() {
820 let tmp = tempfile::tempdir().unwrap();
821 fs::write(
822 tmp.path().join("good.toml"),
823 r#"[filter]
824matches = ["good"]
825"#,
826 )
827 .unwrap();
828 fs::write(tmp.path().join("bad.toml"), "not valid = toml = at all =").unwrap();
829
830 let registry = build_registry(&[], Some(tmp.path()), None);
831 assert!(registry.lookup("good").is_some());
832 assert!(registry.lookup("bad").is_none());
833 assert!(
834 registry.warnings().iter().any(|w| w.contains("bad.toml")),
835 "warnings: {:?}",
836 registry.warnings()
837 );
838 }
839
840 #[test]
841 fn missing_dir_does_not_warn() {
842 let registry = build_registry(&[], Some(Path::new("/nonexistent/path/12345")), None);
843 assert!(registry.warnings().is_empty());
844 }
845}