Skip to main content

alint_core/
facts.rs

1//! Facts — cached properties of the repository evaluated once per run and
2//! referenced by `when` clauses on rules (shipping in a later commit).
3//!
4//! Each fact has an `id` and exactly one kind-specific top-level field that
5//! names its type. Example:
6//!
7//! ```yaml
8//! facts:
9//!   - id: is_rust
10//!     any_file_exists: ["Cargo.toml"]
11//!   - id: is_monorepo
12//!     all_files_exist: ["packages", "pnpm-workspace.yaml"]
13//!   - id: n_java_files
14//!     count_files: "**/*.java"
15//! ```
16//!
17//! Evaluation is declarative and cheap — facts see the walked `FileIndex`
18//! but not arbitrary filesystem state outside the repo root.
19
20use std::collections::HashMap;
21use std::path::Path;
22
23use regex::Regex;
24use serde::Deserialize;
25
26use crate::error::{Error, Result};
27use crate::scope::Scope;
28use crate::walker::FileIndex;
29
30/// A value a fact evaluates to. Keeps the surface small for v0.2; richer
31/// types (list, map) arrive with the `when` expression language.
32#[derive(Debug, Clone, PartialEq)]
33pub enum FactValue {
34    Bool(bool),
35    Int(i64),
36    String(String),
37}
38
39impl FactValue {
40    /// Boolean coercion — `Bool(b)` → b; `Int(n)` → `n != 0`; `String(s)` →
41    /// `!s.is_empty()`. Used by `when` evaluation's truthiness checks.
42    pub fn truthy(&self) -> bool {
43        match self {
44            Self::Bool(b) => *b,
45            Self::Int(n) => *n != 0,
46            Self::String(s) => !s.is_empty(),
47        }
48    }
49}
50
51/// A string or a list of strings — accepted by fact kinds whose input is
52/// glob-shaped.
53#[derive(Debug, Clone, Deserialize)]
54#[serde(untagged)]
55pub enum OneOrMany {
56    One(String),
57    Many(Vec<String>),
58}
59
60impl OneOrMany {
61    pub fn to_vec(&self) -> Vec<String> {
62        match self {
63            Self::One(s) => vec![s.clone()],
64            Self::Many(v) => v.clone(),
65        }
66    }
67}
68
69/// YAML-level declaration of a single fact.
70#[derive(Debug, Clone, Deserialize)]
71pub struct FactSpec {
72    pub id: String,
73    #[serde(flatten)]
74    pub kind: FactKind,
75}
76
77/// The closed set of built-in fact kinds. Serde dispatches via `untagged`
78/// — the first variant whose required field is present in the YAML wins.
79#[derive(Debug, Clone, Deserialize)]
80#[serde(untagged)]
81pub enum FactKind {
82    AnyFileExists {
83        any_file_exists: OneOrMany,
84    },
85    AllFilesExist {
86        all_files_exist: OneOrMany,
87    },
88    CountFiles {
89        count_files: String,
90    },
91    FileContentMatches {
92        file_content_matches: FileContentMatchesFact,
93    },
94    GitBranch {
95        git_branch: GitBranchFact,
96    },
97    Custom {
98        custom: CustomFact,
99    },
100}
101
102/// Fact-kind body for `custom`. Spawns `argv` as a child process
103/// rooted at the repo; the process's stdout (trimmed of trailing
104/// whitespace) becomes the fact's `String` value. A non-zero
105/// exit code resolves to the empty string; timeouts and spawn
106/// failures do the same. No shell is invoked — `argv` is passed
107/// to `execve` (or the platform equivalent) verbatim.
108///
109/// Security: `custom` facts are only allowed in the user's own
110/// top-level config. Any `extends:` ancestor that declares one
111/// is rejected at load time — otherwise a malicious ruleset
112/// could execute arbitrary code just by being fetched.
113#[derive(Debug, Clone, Deserialize)]
114#[serde(deny_unknown_fields)]
115pub struct CustomFact {
116    /// Program and arguments. `argv[0]` is looked up through PATH
117    /// if it's not an absolute or relative-with-separator path.
118    pub argv: Vec<String>,
119}
120
121/// Fact-kind body for `file_content_matches`. Fact evaluates
122/// truthy when at least one file in `paths` contains a regex
123/// match for `pattern`. Files that aren't valid UTF-8 are skipped.
124#[derive(Debug, Clone, Deserialize)]
125#[serde(deny_unknown_fields)]
126pub struct FileContentMatchesFact {
127    pub paths: OneOrMany,
128    pub pattern: String,
129}
130
131/// Fact-kind body for `git_branch`. Empty — the body is just
132/// `git_branch: {}` in YAML and the discriminator is the key.
133///
134/// Evaluates to the current branch name by reading `.git/HEAD`
135/// directly (no `git` binary required). Returns an empty string
136/// when the repo isn't on a named branch (detached HEAD, not a
137/// git repo at all, worktree/submodule variants, or any unusual
138/// `.git` layout we don't fully resolve). An empty string is
139/// falsy under `when:` coercion, so downstream rules naturally
140/// no-op in those cases.
141#[derive(Debug, Clone, Deserialize, Default)]
142#[serde(deny_unknown_fields)]
143pub struct GitBranchFact {}
144
145/// The resolved map from fact id to value, produced once per `Engine::run`.
146#[derive(Debug, Default, Clone)]
147pub struct FactValues(HashMap<String, FactValue>);
148
149impl FactValues {
150    pub fn new() -> Self {
151        Self::default()
152    }
153
154    pub fn insert(&mut self, id: String, v: FactValue) {
155        self.0.insert(id, v);
156    }
157
158    pub fn get(&self, id: &str) -> Option<&FactValue> {
159        self.0.get(id)
160    }
161
162    pub fn len(&self) -> usize {
163        self.0.len()
164    }
165
166    pub fn is_empty(&self) -> bool {
167        self.0.is_empty()
168    }
169
170    pub fn as_map(&self) -> &HashMap<String, FactValue> {
171        &self.0
172    }
173}
174
175/// Evaluate a whole fact list against a prebuilt `FileIndex`. Invoked by
176/// `Engine::run` before any rule evaluates.
177pub fn evaluate_facts(facts: &[FactSpec], root: &Path, index: &FileIndex) -> Result<FactValues> {
178    let mut out = FactValues::new();
179    for spec in facts {
180        let value = evaluate_one(spec, root, index)?;
181        out.insert(spec.id.clone(), value);
182    }
183    Ok(out)
184}
185
186fn evaluate_one(spec: &FactSpec, root: &Path, index: &FileIndex) -> Result<FactValue> {
187    match &spec.kind {
188        FactKind::AnyFileExists { any_file_exists } => {
189            let globs = any_file_exists.to_vec();
190            let scope = Scope::from_patterns(&globs)?;
191            let found = index.files().any(|e| scope.matches(&e.path));
192            Ok(FactValue::Bool(found))
193        }
194        FactKind::AllFilesExist { all_files_exist } => {
195            let globs = all_files_exist.to_vec();
196            for glob in &globs {
197                let scope = Scope::from_patterns(std::slice::from_ref(glob))?;
198                if !index.files().any(|e| scope.matches(&e.path)) {
199                    return Ok(FactValue::Bool(false));
200                }
201            }
202            Ok(FactValue::Bool(true))
203        }
204        FactKind::CountFiles { count_files } => {
205            let scope = Scope::from_patterns(std::slice::from_ref(count_files))?;
206            let count = index.files().filter(|e| scope.matches(&e.path)).count();
207            Ok(FactValue::Int(i64::try_from(count).unwrap_or(i64::MAX)))
208        }
209        FactKind::FileContentMatches {
210            file_content_matches: spec,
211        } => {
212            let scope = Scope::from_patterns(&spec.paths.to_vec())?;
213            let regex = Regex::new(&spec.pattern)
214                .map_err(|e| Error::Other(format!("fact pattern /{}/: {e}", spec.pattern)))?;
215            let any = index.files().any(|entry| {
216                if !scope.matches(&entry.path) {
217                    return false;
218                }
219                let Ok(bytes) = std::fs::read(root.join(&entry.path)) else {
220                    return false;
221                };
222                let Ok(text) = std::str::from_utf8(&bytes) else {
223                    return false;
224                };
225                regex.is_match(text)
226            });
227            Ok(FactValue::Bool(any))
228        }
229        FactKind::GitBranch { git_branch: _ } => Ok(FactValue::String(read_git_branch(root))),
230        FactKind::Custom { custom } => Ok(FactValue::String(run_custom(custom, root))),
231    }
232}
233
234/// Best-effort: spawn `argv` at `root`, capture stdout. Non-zero
235/// exit / spawn failures / unusable output → empty string.
236fn run_custom(spec: &CustomFact, root: &Path) -> String {
237    let Some((program, args)) = spec.argv.split_first() else {
238        return String::new();
239    };
240    let output = std::process::Command::new(program)
241        .args(args)
242        .current_dir(root)
243        .stdin(std::process::Stdio::null())
244        .stderr(std::process::Stdio::null())
245        .output();
246    let Ok(output) = output else {
247        return String::new();
248    };
249    if !output.status.success() {
250        return String::new();
251    }
252    match std::str::from_utf8(&output.stdout) {
253        Ok(text) => text.trim_end().to_string(),
254        Err(_) => String::new(),
255    }
256}
257
258/// Reject `custom` facts in `config`. Used by the DSL loader to
259/// enforce that only the user's top-level config can spawn
260/// processes; extended (local or remote) configs can't.
261pub fn reject_custom_facts(config: &crate::config::Config, source: &str) -> Result<()> {
262    for f in &config.facts {
263        if matches!(f.kind, FactKind::Custom { .. }) {
264            return Err(Error::Other(format!(
265                "fact {:?}: `custom:` facts are only allowed in the user's top-level \
266                 config; declaring one in an extended config ({source}) is refused because \
267                 it would let a ruleset spawn arbitrary processes",
268                f.id
269            )));
270        }
271    }
272    Ok(())
273}
274
275/// Best-effort branch resolution: read `<root>/.git/HEAD` and
276/// extract the branch from a `ref: refs/heads/<branch>` line.
277/// Detached HEADs, bare SHAs, worktree pointers, missing files,
278/// non-UTF-8 content — every edge case returns `""`. Downstream
279/// `when:` coercion treats that as falsy.
280fn read_git_branch(root: &Path) -> String {
281    let head = root.join(".git").join("HEAD");
282    let Ok(content) = std::fs::read_to_string(&head) else {
283        return String::new();
284    };
285    content
286        .trim()
287        .strip_prefix("ref: refs/heads/")
288        .unwrap_or("")
289        .to_string()
290}
291
292#[cfg(test)]
293mod tests {
294    use super::*;
295    use crate::walker::FileEntry;
296    use std::path::PathBuf;
297
298    fn idx(paths: &[&str]) -> FileIndex {
299        FileIndex {
300            entries: paths
301                .iter()
302                .map(|p| FileEntry {
303                    path: PathBuf::from(p),
304                    is_dir: false,
305                    size: 1,
306                })
307                .collect(),
308        }
309    }
310
311    fn parse(yaml: &str) -> Vec<FactSpec> {
312        serde_yaml_ng::from_str(yaml).unwrap()
313    }
314
315    #[test]
316    fn any_file_exists_true_when_match_found() {
317        let facts = parse("- id: is_rust\n  any_file_exists: [Cargo.toml]\n");
318        let v =
319            evaluate_facts(&facts, Path::new("/"), &idx(&["Cargo.toml", "src/lib.rs"])).unwrap();
320        assert_eq!(v.get("is_rust"), Some(&FactValue::Bool(true)));
321    }
322
323    #[test]
324    fn any_file_exists_false_when_no_match() {
325        let facts = parse("- id: is_rust\n  any_file_exists: [Cargo.toml]\n");
326        let v = evaluate_facts(&facts, Path::new("/"), &idx(&["src/lib.rs"])).unwrap();
327        assert_eq!(v.get("is_rust"), Some(&FactValue::Bool(false)));
328    }
329
330    #[test]
331    fn any_file_exists_accepts_single_string() {
332        let facts = parse("- id: has_readme\n  any_file_exists: README.md\n");
333        let v = evaluate_facts(&facts, Path::new("/"), &idx(&["README.md"])).unwrap();
334        assert_eq!(v.get("has_readme"), Some(&FactValue::Bool(true)));
335    }
336
337    #[test]
338    fn all_files_exist_true_when_all_match() {
339        let facts = parse("- id: is_monorepo\n  all_files_exist: [Cargo.toml, README.md]\n");
340        let v = evaluate_facts(
341            &facts,
342            Path::new("/"),
343            &idx(&["Cargo.toml", "README.md", "src/main.rs"]),
344        )
345        .unwrap();
346        assert_eq!(v.get("is_monorepo"), Some(&FactValue::Bool(true)));
347    }
348
349    #[test]
350    fn all_files_exist_false_when_any_missing() {
351        let facts = parse("- id: is_monorepo\n  all_files_exist: [Cargo.toml, README.md]\n");
352        let v = evaluate_facts(&facts, Path::new("/"), &idx(&["Cargo.toml"])).unwrap();
353        assert_eq!(v.get("is_monorepo"), Some(&FactValue::Bool(false)));
354    }
355
356    #[test]
357    fn count_files_returns_integer() {
358        let facts = parse("- id: n_rs\n  count_files: \"**/*.rs\"\n");
359        let v = evaluate_facts(
360            &facts,
361            Path::new("/"),
362            &idx(&["a.rs", "b.rs", "src/c.rs", "README.md"]),
363        )
364        .unwrap();
365        assert_eq!(v.get("n_rs"), Some(&FactValue::Int(3)));
366    }
367
368    #[test]
369    fn multiple_facts_all_resolved() {
370        let facts = parse(
371            r#"
372- id: is_rust
373  any_file_exists: [Cargo.toml]
374- id: n_rs
375  count_files: "**/*.rs"
376- id: has_readme
377  any_file_exists: README.md
378"#,
379        );
380        let v = evaluate_facts(
381            &facts,
382            Path::new("/"),
383            &idx(&["Cargo.toml", "src/lib.rs", "README.md"]),
384        )
385        .unwrap();
386        assert_eq!(v.len(), 3);
387        assert_eq!(v.get("is_rust"), Some(&FactValue::Bool(true)));
388        assert_eq!(v.get("n_rs"), Some(&FactValue::Int(1)));
389        assert_eq!(v.get("has_readme"), Some(&FactValue::Bool(true)));
390    }
391
392    #[test]
393    fn file_content_matches_true_when_pattern_appears() {
394        use tempfile::tempdir;
395        let tmp = tempdir().unwrap();
396        std::fs::write(
397            tmp.path().join("Cargo.toml"),
398            "[dependencies]\ntokio = \"1\"\n",
399        )
400        .unwrap();
401        std::fs::write(tmp.path().join("README.md"), "hello\n").unwrap();
402
403        let facts = parse(
404            "- id: uses_tokio\n  file_content_matches:\n    paths: Cargo.toml\n    pattern: tokio\n",
405        );
406        let idx = idx(&["Cargo.toml", "README.md"]);
407        let v = evaluate_facts(&facts, tmp.path(), &idx).unwrap();
408        assert_eq!(v.get("uses_tokio"), Some(&FactValue::Bool(true)));
409    }
410
411    #[test]
412    fn file_content_matches_false_when_pattern_absent() {
413        use tempfile::tempdir;
414        let tmp = tempdir().unwrap();
415        std::fs::write(tmp.path().join("Cargo.toml"), "[dependencies]\n").unwrap();
416
417        let facts = parse(
418            "- id: uses_tokio\n  file_content_matches:\n    paths: Cargo.toml\n    pattern: tokio\n",
419        );
420        let idx = idx(&["Cargo.toml"]);
421        let v = evaluate_facts(&facts, tmp.path(), &idx).unwrap();
422        assert_eq!(v.get("uses_tokio"), Some(&FactValue::Bool(false)));
423    }
424
425    #[test]
426    fn file_content_matches_skips_non_utf8_files() {
427        use tempfile::tempdir;
428        let tmp = tempdir().unwrap();
429        // Invalid UTF-8 byte sequence.
430        std::fs::write(tmp.path().join("blob.bin"), [0xFFu8, 0xFE, 0x00, 0x01]).unwrap();
431        std::fs::write(
432            tmp.path().join("text.txt"),
433            "SPDX-License-Identifier: MIT\n",
434        )
435        .unwrap();
436
437        let facts = parse(
438            "- id: has_spdx\n  file_content_matches:\n    paths: [\"**/*\"]\n    pattern: SPDX\n",
439        );
440        let idx = idx(&["blob.bin", "text.txt"]);
441        let v = evaluate_facts(&facts, tmp.path(), &idx).unwrap();
442        // Non-UTF-8 is silently skipped, so `text.txt` is what matters.
443        assert_eq!(v.get("has_spdx"), Some(&FactValue::Bool(true)));
444    }
445
446    #[test]
447    fn git_branch_reads_refs_heads() {
448        use tempfile::tempdir;
449        let tmp = tempdir().unwrap();
450        std::fs::create_dir(tmp.path().join(".git")).unwrap();
451        std::fs::write(tmp.path().join(".git/HEAD"), "ref: refs/heads/feature-x\n").unwrap();
452
453        let facts = parse("- id: branch\n  git_branch: {}\n");
454        let v = evaluate_facts(&facts, tmp.path(), &idx(&[])).unwrap();
455        assert_eq!(
456            v.get("branch"),
457            Some(&FactValue::String("feature-x".to_string()))
458        );
459    }
460
461    #[test]
462    fn git_branch_detached_head_is_empty_string() {
463        use tempfile::tempdir;
464        let tmp = tempdir().unwrap();
465        std::fs::create_dir(tmp.path().join(".git")).unwrap();
466        std::fs::write(
467            tmp.path().join(".git/HEAD"),
468            "deadbeefdeadbeefdeadbeefdeadbeefdeadbeef\n",
469        )
470        .unwrap();
471
472        let facts = parse("- id: branch\n  git_branch: {}\n");
473        let v = evaluate_facts(&facts, tmp.path(), &idx(&[])).unwrap();
474        assert_eq!(v.get("branch"), Some(&FactValue::String(String::new())));
475    }
476
477    #[test]
478    fn git_branch_missing_git_dir_is_empty_string() {
479        use tempfile::tempdir;
480        let tmp = tempdir().unwrap();
481        let facts = parse("- id: branch\n  git_branch: {}\n");
482        let v = evaluate_facts(&facts, tmp.path(), &idx(&[])).unwrap();
483        assert_eq!(v.get("branch"), Some(&FactValue::String(String::new())));
484    }
485
486    #[cfg(unix)]
487    #[test]
488    fn custom_captures_stdout_trimmed() {
489        use tempfile::tempdir;
490        let tmp = tempdir().unwrap();
491        let facts = parse(
492            "- id: greeting\n  custom:\n    argv: [\"/bin/sh\", \"-c\", \"printf 'hello world\\n'\"]\n",
493        );
494        let v = evaluate_facts(&facts, tmp.path(), &idx(&[])).unwrap();
495        assert_eq!(
496            v.get("greeting"),
497            Some(&FactValue::String("hello world".to_string()))
498        );
499    }
500
501    #[test]
502    fn custom_unknown_program_is_empty_string() {
503        use tempfile::tempdir;
504        let tmp = tempdir().unwrap();
505        let facts =
506            parse("- id: nope\n  custom:\n    argv: [\"no-such-program-alint-test-xyzzy\"]\n");
507        let v = evaluate_facts(&facts, tmp.path(), &idx(&[])).unwrap();
508        assert_eq!(v.get("nope"), Some(&FactValue::String(String::new())));
509    }
510
511    #[cfg(unix)]
512    #[test]
513    fn custom_nonzero_exit_is_empty_string() {
514        use tempfile::tempdir;
515        let tmp = tempdir().unwrap();
516        // `false` exits 1; we should not see any captured output.
517        let facts = parse("- id: bad\n  custom:\n    argv: [\"/bin/false\"]\n");
518        let v = evaluate_facts(&facts, tmp.path(), &idx(&[])).unwrap();
519        assert_eq!(v.get("bad"), Some(&FactValue::String(String::new())));
520    }
521
522    #[test]
523    fn reject_custom_facts_flags_custom_but_passes_others() {
524        let facts = parse(
525            "- id: plain\n  any_file_exists: x\n- id: run\n  custom:\n    argv: [\"echo\"]\n",
526        );
527        let config = crate::config::Config {
528            version: 1,
529            extends: Vec::new(),
530            ignore: Vec::new(),
531            respect_gitignore: true,
532            vars: std::collections::HashMap::new(),
533            facts,
534            rules: Vec::new(),
535            fix_size_limit: None,
536        };
537        let err = reject_custom_facts(&config, "./base.yml").unwrap_err();
538        assert!(err.to_string().contains("custom"), "{err}");
539        assert!(err.to_string().contains("./base.yml"), "{err}");
540    }
541
542    #[test]
543    fn reject_custom_facts_ok_when_none_present() {
544        let facts = parse("- id: plain\n  any_file_exists: x\n");
545        let config = crate::config::Config {
546            version: 1,
547            extends: Vec::new(),
548            ignore: Vec::new(),
549            respect_gitignore: true,
550            vars: std::collections::HashMap::new(),
551            facts,
552            rules: Vec::new(),
553            fix_size_limit: None,
554        };
555        assert!(reject_custom_facts(&config, "./base.yml").is_ok());
556    }
557
558    #[test]
559    fn truthy_coercion() {
560        assert!(FactValue::Bool(true).truthy());
561        assert!(!FactValue::Bool(false).truthy());
562        assert!(FactValue::Int(1).truthy());
563        assert!(!FactValue::Int(0).truthy());
564        assert!(FactValue::String("x".into()).truthy());
565        assert!(!FactValue::String(String::new()).truthy());
566    }
567}