Skip to main content

normalize_filter/
lib.rs

1//! Filter system for --exclude and --only flags.
2//!
3//! Supports:
4//! - Glob patterns: `--exclude="*_test.go"`, `--only="*.rs"`
5//! - Aliases: `--exclude=@tests`, `--only=@docs`
6//!
7//! Built-in aliases are language-aware (e.g., @tests includes `*_test.go` for Go,
8//! `test_*.py` for Python). Config can override or add new aliases via `[aliases]`.
9
10#[cfg(feature = "cli")]
11pub mod service;
12
13use ignore::gitignore::{Gitignore, GitignoreBuilder};
14use serde::{Deserialize, Serialize};
15use std::collections::HashMap;
16use std::path::Path;
17
18// ============================================================================
19// AliasConfig
20// ============================================================================
21
22/// Unified alias configuration for @ prefix expansion.
23/// Used for both command targets (`normalize view @todo`) and filters (`--only @tests`).
24///
25/// Example:
26/// ```toml
27/// [aliases]
28/// todo = ["TODO.md"]              # @todo → specific file
29/// config = [".normalize/config.toml"]  # overrides built-in @config
30/// vendor = ["vendor/**"]          # custom filter alias
31/// tests = []                      # disable built-in @tests
32/// ```
33#[derive(Debug, Clone, Deserialize, Serialize, Default)]
34#[cfg_attr(feature = "config", derive(schemars::JsonSchema))]
35#[serde(default)]
36pub struct AliasConfig {
37    /// Map alias names to paths/patterns. Empty array disables the alias.
38    #[serde(flatten)]
39    pub entries: HashMap<String, Vec<String>>,
40}
41
42impl AliasConfig {
43    /// Names of all built-in aliases.
44    pub fn builtin_names() -> &'static [&'static str] {
45        &["tests", "config", "build", "docs", "generated"]
46    }
47
48    /// Get values for an alias, falling back to builtins.
49    /// Returns None if alias is unknown or disabled (empty array).
50    ///
51    /// For language-aware builtins like @tests, pass detected languages.
52    pub fn get(&self, name: &str) -> Option<Vec<String>> {
53        self.get_with_languages(name, &[])
54    }
55
56    /// Get values for an alias with language context for builtins like @tests.
57    pub fn get_with_languages(&self, name: &str, languages: &[&str]) -> Option<Vec<String>> {
58        // Check user config first
59        if let Some(values) = self.entries.get(name) {
60            if values.is_empty() {
61                return None; // Disabled
62            }
63            return Some(values.clone());
64        }
65
66        // Fall back to builtins
67        Self::builtin(name, languages)
68    }
69
70    /// Built-in alias patterns.
71    fn builtin(name: &str, languages: &[&str]) -> Option<Vec<String>> {
72        let patterns: Vec<&str> = match name {
73            "tests" => {
74                let mut p: Vec<String> = vec![];
75                for lang in languages {
76                    p.extend(normalize_language_meta::test_file_globs_for_language(lang));
77                }
78                p.sort_unstable();
79                p.dedup();
80                return Some(p);
81            }
82            "config" => vec![
83                "*.toml",
84                "*.yaml",
85                "*.yml",
86                "*.json",
87                "*.ini",
88                "*.cfg",
89                ".env",
90                ".env.*",
91                "*.config.js",
92                "*.config.ts",
93            ],
94            "build" => vec![
95                "target/**",
96                "dist/**",
97                "build/**",
98                "out/**",
99                "node_modules/**",
100                ".next/**",
101                ".nuxt/**",
102                "__pycache__/**",
103                "*.pyc",
104            ],
105            "docs" => vec![
106                "*.md",
107                "*.rst",
108                "*.txt",
109                "docs/**",
110                "doc/**",
111                "README*",
112                "CHANGELOG*",
113                "LICENSE*",
114            ],
115            "generated" => vec![
116                "*.gen.*",
117                "*.generated.*",
118                "*.pb.go",
119                "*.pb.rs",
120                "*_generated.go",
121                "*_generated.rs",
122                "generated/**",
123            ],
124            _ => return None,
125        };
126        Some(patterns.into_iter().map(String::from).collect())
127    }
128}
129
130// ============================================================================
131// FilterError
132// ============================================================================
133
134/// Error returned by [`Filter::new`].
135#[derive(Debug, thiserror::Error)]
136pub enum FilterError {
137    /// The pattern is not a valid glob.
138    #[error("invalid filter pattern '{pattern}': {reason}")]
139    InvalidPattern { pattern: String, reason: String },
140    /// A bare word that looks like a language name was used instead of a glob or alias.
141    #[error("{0}")]
142    InvalidPatternHint(String),
143    /// An `@alias` name is not defined.
144    #[error("unknown alias @{0}")]
145    UnknownAlias(String),
146}
147
148impl From<FilterError> for String {
149    fn from(e: FilterError) -> String {
150        e.to_string()
151    }
152}
153
154// ============================================================================
155// Filter
156// ============================================================================
157
158/// Status of an alias (for display purposes).
159#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize)]
160#[cfg_attr(feature = "config", derive(schemars::JsonSchema))]
161#[serde(rename_all = "lowercase")]
162pub enum AliasStatus {
163    /// Built-in alias, unmodified
164    Builtin,
165    /// Custom alias defined in config
166    Custom,
167    /// Built-in alias disabled via empty array in config
168    Disabled,
169    /// Built-in alias overridden with new patterns in config
170    Overridden,
171}
172
173impl std::fmt::Display for AliasStatus {
174    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
175        match self {
176            AliasStatus::Builtin => write!(f, "builtin"),
177            AliasStatus::Custom => write!(f, "custom"),
178            AliasStatus::Disabled => write!(f, "disabled"),
179            AliasStatus::Overridden => write!(f, "overridden"),
180        }
181    }
182}
183
184/// Resolved alias information for display.
185#[derive(Debug, Clone)]
186pub struct ResolvedAlias {
187    pub name: String,
188    pub patterns: Vec<String>,
189    pub status: AliasStatus,
190}
191
192/// Result of resolving a filter value.
193#[derive(Debug)]
194pub enum AliasResolution {
195    /// Resolved to glob patterns
196    Patterns(Vec<String>),
197    /// Alias not found
198    UnknownAlias(String),
199    /// Alias is disabled (empty patterns)
200    DisabledAlias(String),
201}
202
203/// Filter engine that resolves aliases and matches paths.
204#[derive(Debug)]
205pub struct Filter {
206    /// Compiled exclude patterns
207    exclude_matcher: Option<Gitignore>,
208    /// Compiled include patterns (only mode)
209    only_matcher: Option<Gitignore>,
210    /// Warnings accumulated during construction
211    warnings: Vec<String>,
212}
213
214impl Filter {
215    /// Create a new filter from exclude/only patterns.
216    ///
217    /// Patterns starting with `@` are resolved as aliases.
218    /// Returns warnings for disabled aliases.
219    pub fn new(
220        exclude: &[String],
221        only: &[String],
222        config: &AliasConfig,
223        languages: &[&str],
224    ) -> Result<Self, FilterError> {
225        let mut warnings = Vec::new();
226
227        // Build exclude matcher
228        let exclude_matcher = if exclude.is_empty() {
229            None
230        } else {
231            let patterns = resolve_patterns(exclude, config, languages, &mut warnings)?;
232            if patterns.is_empty() {
233                None
234            } else {
235                Some(build_matcher(&patterns)?)
236            }
237        };
238
239        // Build only matcher
240        let only_matcher = if only.is_empty() {
241            None
242        } else {
243            let patterns = resolve_patterns(only, config, languages, &mut warnings)?;
244            if patterns.is_empty() {
245                None
246            } else {
247                Some(build_matcher(&patterns)?)
248            }
249        };
250
251        Ok(Self {
252            exclude_matcher,
253            only_matcher,
254            warnings,
255        })
256    }
257
258    /// Get warnings from filter construction.
259    pub fn warnings(&self) -> &[String] {
260        &self.warnings
261    }
262
263    /// Check if a path should be included.
264    ///
265    /// Returns true if the path passes the filter.
266    pub fn matches(&self, path: &Path) -> bool {
267        // If only matcher exists, path must match it
268        if let Some(ref only) = self.only_matcher
269            && !only.matched(path, false).is_ignore()
270        {
271            return false;
272        }
273
274        // If exclude matcher exists, path must not match it
275        if let Some(ref exclude) = self.exclude_matcher
276            && exclude.matched(path, false).is_ignore()
277        {
278            return false;
279        }
280
281        true
282    }
283
284    /// Check if any filters are active.
285    #[allow(dead_code)]
286    pub fn is_active(&self) -> bool {
287        self.exclude_matcher.is_some() || self.only_matcher.is_some()
288    }
289}
290
291/// Resolve patterns, expanding aliases.
292fn resolve_patterns(
293    patterns: &[String],
294    config: &AliasConfig,
295    languages: &[&str],
296    warnings: &mut Vec<String>,
297) -> Result<Vec<String>, FilterError> {
298    let mut result = Vec::new();
299
300    for pattern in patterns {
301        if let Some(alias_name) = pattern.strip_prefix('@') {
302            match resolve_alias(alias_name, config, languages) {
303                AliasResolution::Patterns(ps) => {
304                    result.extend(ps);
305                }
306                AliasResolution::UnknownAlias(name) => {
307                    return Err(FilterError::UnknownAlias(name));
308                }
309                AliasResolution::DisabledAlias(name) => {
310                    warnings.push(format!("@{} is disabled (matches nothing)", name));
311                }
312            }
313        } else if looks_like_language_name(pattern) {
314            // Bare words like "rust" or "Rust" are not valid glob patterns and will
315            // silently match nothing. Detect this early and emit a helpful error.
316            let matched_lang = languages
317                .iter()
318                .find(|l| l.eq_ignore_ascii_case(pattern))
319                .copied();
320            if let Some(lang) = matched_lang {
321                return Err(FilterError::InvalidPatternHint(format!(
322                    "'{pattern}' is not a valid pattern — use a glob like '*.ext' or an alias like '@tests' (run 'normalize aliases' to list available aliases; detected language: {lang})"
323                )));
324            } else {
325                return Err(FilterError::InvalidPatternHint(format!(
326                    "'{pattern}' is not a valid pattern — use a glob like '*.rs' or an alias like '@tests' (run 'normalize aliases' to list available aliases)"
327                )));
328            }
329        } else {
330            result.push(pattern.clone());
331        }
332    }
333
334    Ok(result)
335}
336
337/// Returns true if `pattern` looks like a bare language name rather than a glob.
338///
339/// A bare language name has no glob metacharacters (`*`, `?`, `{`, `[`),
340/// no path separator (`/`), and no file-extension dot (`.`). These patterns
341/// are unambiguously user errors — they will silently match nothing as globs.
342fn looks_like_language_name(pattern: &str) -> bool {
343    !pattern.is_empty()
344        && !pattern.contains(['*', '?', '{', '[', '/', '.'])
345        && pattern
346            .chars()
347            .all(|c| c.is_alphabetic() || c == '-' || c == '_')
348}
349
350/// Resolve a single alias name to patterns.
351fn resolve_alias(name: &str, config: &AliasConfig, languages: &[&str]) -> AliasResolution {
352    // Check if explicitly disabled
353    if let Some(patterns) = config.entries.get(name)
354        && patterns.is_empty()
355    {
356        return AliasResolution::DisabledAlias(name.to_string());
357    }
358
359    // Use unified alias lookup
360    match config.get_with_languages(name, languages) {
361        Some(patterns) => AliasResolution::Patterns(patterns),
362        None => AliasResolution::UnknownAlias(name.to_string()),
363    }
364}
365
366/// Build a gitignore-style matcher from patterns.
367fn build_matcher(patterns: &[String]) -> Result<Gitignore, FilterError> {
368    let mut builder = GitignoreBuilder::new("");
369
370    for pattern in patterns {
371        builder
372            .add_line(None, pattern)
373            .map_err(|e| FilterError::InvalidPattern {
374                pattern: pattern.clone(),
375                reason: e.to_string(),
376            })?;
377    }
378
379    builder.build().map_err(|e| FilterError::InvalidPattern {
380        pattern: String::new(),
381        reason: e.to_string(),
382    })
383}
384
385/// Get all resolved aliases for display (normalize filter aliases).
386pub fn list_aliases(config: &AliasConfig, languages: &[&str]) -> Vec<ResolvedAlias> {
387    let mut aliases = Vec::new();
388    let builtin_names = AliasConfig::builtin_names();
389
390    // Process built-in aliases
391    for &name in builtin_names {
392        if let Some(user_patterns) = config.entries.get(name) {
393            if user_patterns.is_empty() {
394                aliases.push(ResolvedAlias {
395                    name: name.to_string(),
396                    patterns: vec![],
397                    status: AliasStatus::Disabled,
398                });
399            } else {
400                aliases.push(ResolvedAlias {
401                    name: name.to_string(),
402                    patterns: user_patterns.clone(),
403                    status: AliasStatus::Overridden,
404                });
405            }
406        } else if let Some(patterns) = config.get_with_languages(name, languages) {
407            aliases.push(ResolvedAlias {
408                name: name.to_string(),
409                patterns,
410                status: AliasStatus::Builtin,
411            });
412        }
413    }
414
415    // Add custom aliases from config
416    let builtin_set: std::collections::HashSet<&str> = builtin_names.iter().copied().collect();
417    for (name, patterns) in &config.entries {
418        if !builtin_set.contains(name.as_str()) {
419            aliases.push(ResolvedAlias {
420                name: name.clone(),
421                patterns: patterns.clone(),
422                status: AliasStatus::Custom,
423            });
424        }
425    }
426
427    // Sort: built-ins first, then custom
428    aliases.sort_by(|a, b| {
429        let a_builtin = matches!(
430            a.status,
431            AliasStatus::Builtin | AliasStatus::Disabled | AliasStatus::Overridden
432        );
433        let b_builtin = matches!(
434            b.status,
435            AliasStatus::Builtin | AliasStatus::Disabled | AliasStatus::Overridden
436        );
437        match (a_builtin, b_builtin) {
438            (true, false) => std::cmp::Ordering::Less,
439            (false, true) => std::cmp::Ordering::Greater,
440            _ => a.name.cmp(&b.name),
441        }
442    });
443
444    aliases
445}
446
447#[cfg(test)]
448mod tests {
449    use super::*;
450
451    #[test]
452    fn test_resolve_glob_pattern() {
453        let config = AliasConfig::default();
454        let filter =
455            // normalize-syntax-allow: rust/unwrap-in-impl - test code, panic is appropriate
456            Filter::new(&["*.test.js".to_string()], &[], &config, &["javascript"]).unwrap();
457
458        assert!(filter.is_active());
459        assert!(!filter.matches(Path::new("foo.test.js")));
460        assert!(filter.matches(Path::new("foo.js")));
461    }
462
463    #[test]
464    fn test_resolve_alias() {
465        let config = AliasConfig::default();
466        // normalize-syntax-allow: rust/unwrap-in-impl - test code, panic is appropriate
467        let filter = Filter::new(&["@tests".to_string()], &[], &config, &["go"]).unwrap();
468
469        assert!(filter.is_active());
470        assert!(!filter.matches(Path::new("foo_test.go")));
471        assert!(filter.matches(Path::new("foo.go")));
472    }
473
474    #[test]
475    fn test_unknown_alias_error() {
476        let config = AliasConfig::default();
477        let result = Filter::new(&["@unknown".to_string()], &[], &config, &[]);
478
479        assert!(result.is_err());
480        // normalize-syntax-allow: rust/unwrap-in-impl - test code, panic is appropriate
481        assert!(
482            result
483                .unwrap_err()
484                .to_string()
485                .contains("unknown alias @unknown")
486        );
487    }
488
489    #[test]
490    fn test_disabled_alias_warning() {
491        let mut config = AliasConfig::default();
492        config.entries.insert("tests".to_string(), vec![]);
493
494        // normalize-syntax-allow: rust/unwrap-in-impl - test code, panic is appropriate
495        let filter = Filter::new(&["@tests".to_string()], &[], &config, &["Go"]).unwrap();
496
497        assert!(!filter.is_active()); // No patterns = not active
498        assert_eq!(filter.warnings().len(), 1);
499        assert!(filter.warnings()[0].contains("disabled"));
500    }
501
502    #[test]
503    fn test_config_override() {
504        let mut config = AliasConfig::default();
505        config
506            .entries
507            .insert("tests".to_string(), vec!["my_tests/**".to_string()]);
508
509        // normalize-syntax-allow: rust/unwrap-in-impl - test code, panic is appropriate
510        let filter = Filter::new(&["@tests".to_string()], &[], &config, &["Go"]).unwrap();
511
512        assert!(filter.is_active());
513        assert!(!filter.matches(Path::new("my_tests/foo.go")));
514        assert!(filter.matches(Path::new("foo_test.go"))); // Built-in pattern not applied
515    }
516
517    #[test]
518    fn test_only_mode() {
519        let config = AliasConfig::default();
520        // normalize-syntax-allow: rust/unwrap-in-impl - test code, panic is appropriate
521        let filter = Filter::new(&[], &["*.rs".to_string()], &config, &[]).unwrap();
522
523        assert!(filter.is_active());
524        assert!(filter.matches(Path::new("foo.rs")));
525        assert!(!filter.matches(Path::new("foo.go")));
526    }
527
528    #[test]
529    fn test_bare_language_name_error() {
530        let config = AliasConfig::default();
531        // "rust" looks like a language name — should error with a helpful message
532        let result = Filter::new(&[], &["rust".to_string()], &config, &["Rust"]);
533        assert!(result.is_err());
534        // normalize-syntax-allow: rust/unwrap-in-impl - test code, panic is appropriate
535        let err = result.unwrap_err().to_string();
536        assert!(err.contains("'rust' is not a valid pattern"), "got: {err}");
537        assert!(
538            err.contains("Rust"),
539            "should mention detected language, got: {err}"
540        );
541    }
542
543    #[test]
544    fn test_bare_language_name_no_detected_language() {
545        let config = AliasConfig::default();
546        // "python" not in detected languages — still errors with generic hint
547        let result = Filter::new(&[], &["python".to_string()], &config, &["Rust"]);
548        assert!(result.is_err());
549        // normalize-syntax-allow: rust/unwrap-in-impl - test code, panic is appropriate
550        let err = result.unwrap_err().to_string();
551        assert!(
552            err.contains("'python' is not a valid pattern"),
553            "got: {err}"
554        );
555    }
556
557    #[test]
558    fn test_list_aliases() {
559        let mut config = AliasConfig::default();
560        config.entries.insert("tests".to_string(), vec![]); // Disabled
561        config
562            .entries
563            .insert("vendor".to_string(), vec!["vendor/**".to_string()]); // Custom
564
565        let aliases = list_aliases(&config, &["rust"]);
566
567        // normalize-syntax-allow: rust/unwrap-in-impl - test code, panic is appropriate
568        let tests = aliases.iter().find(|a| a.name == "tests").unwrap();
569        assert_eq!(tests.status, AliasStatus::Disabled);
570
571        // normalize-syntax-allow: rust/unwrap-in-impl - test code, panic is appropriate
572        let vendor = aliases.iter().find(|a| a.name == "vendor").unwrap();
573        assert_eq!(vendor.status, AliasStatus::Custom);
574
575        // normalize-syntax-allow: rust/unwrap-in-impl - test code, panic is appropriate
576        let docs = aliases.iter().find(|a| a.name == "docs").unwrap();
577        assert_eq!(docs.status, AliasStatus::Builtin);
578    }
579}