Skip to main content

alint_core/
facts.rs

1//! Facts — cached properties of the repository evaluated once per run and
2//! referenced by `when` clauses on rules (shipping in a later commit).
3//!
4//! Each fact has an `id` and exactly one kind-specific top-level field that
5//! names its type. Example:
6//!
7//! ```yaml
8//! facts:
9//!   - id: is_rust
10//!     any_file_exists: ["Cargo.toml"]
11//!   - id: is_monorepo
12//!     all_files_exist: ["packages", "pnpm-workspace.yaml"]
13//!   - id: n_java_files
14//!     count_files: "**/*.java"
15//! ```
16//!
17//! Evaluation is declarative and cheap — facts see the walked `FileIndex`
18//! but not arbitrary filesystem state outside the repo root.
19
20use std::collections::HashMap;
21use std::path::Path;
22
23use regex::Regex;
24use serde::Deserialize;
25
26use crate::error::{Error, Result};
27use crate::scope::Scope;
28use crate::walker::FileIndex;
29
30/// A value a fact evaluates to. Keeps the surface small for v0.2; richer
31/// types (list, map) arrive with the `when` expression language.
32#[derive(Debug, Clone, PartialEq)]
33pub enum FactValue {
34    Bool(bool),
35    Int(i64),
36    String(String),
37}
38
39impl FactValue {
40    /// Boolean coercion — `Bool(b)` → b; `Int(n)` → `n != 0`; `String(s)` →
41    /// `!s.is_empty()`. Used by `when` evaluation's truthiness checks.
42    pub fn truthy(&self) -> bool {
43        match self {
44            Self::Bool(b) => *b,
45            Self::Int(n) => *n != 0,
46            Self::String(s) => !s.is_empty(),
47        }
48    }
49}
50
51/// A string or a list of strings — accepted by fact kinds whose input is
52/// glob-shaped.
53#[derive(Debug, Clone, Deserialize)]
54#[serde(untagged)]
55pub enum OneOrMany {
56    One(String),
57    Many(Vec<String>),
58}
59
60impl OneOrMany {
61    pub fn to_vec(&self) -> Vec<String> {
62        match self {
63            Self::One(s) => vec![s.clone()],
64            Self::Many(v) => v.clone(),
65        }
66    }
67}
68
69/// YAML-level declaration of a single fact.
70#[derive(Debug, Clone, Deserialize)]
71pub struct FactSpec {
72    pub id: String,
73    #[serde(flatten)]
74    pub kind: FactKind,
75}
76
77/// The closed set of built-in fact kinds. Serde dispatches via `untagged`
78/// — the first variant whose required field is present in the YAML wins.
79#[derive(Debug, Clone, Deserialize)]
80#[serde(untagged)]
81pub enum FactKind {
82    AnyFileExists {
83        any_file_exists: OneOrMany,
84    },
85    AllFilesExist {
86        all_files_exist: OneOrMany,
87    },
88    CountFiles {
89        count_files: String,
90    },
91    FileContentMatches {
92        file_content_matches: FileContentMatchesFact,
93    },
94    GitBranch {
95        git_branch: GitBranchFact,
96    },
97    Custom {
98        custom: CustomFact,
99    },
100}
101
102impl FactKind {
103    /// The YAML-facing discriminator for the fact kind, suitable for
104    /// user-facing renderers like `alint facts` output.
105    pub fn name(&self) -> &'static str {
106        match self {
107            Self::AnyFileExists { .. } => "any_file_exists",
108            Self::AllFilesExist { .. } => "all_files_exist",
109            Self::CountFiles { .. } => "count_files",
110            Self::FileContentMatches { .. } => "file_content_matches",
111            Self::GitBranch { .. } => "git_branch",
112            Self::Custom { .. } => "custom",
113        }
114    }
115}
116
117/// Fact-kind body for `custom`. Spawns `argv` as a child process
118/// rooted at the repo; the process's stdout (trimmed of trailing
119/// whitespace) becomes the fact's `String` value. A non-zero
120/// exit code resolves to the empty string; timeouts and spawn
121/// failures do the same. No shell is invoked — `argv` is passed
122/// to `execve` (or the platform equivalent) verbatim.
123///
124/// Security: `custom` facts are only allowed in the user's own
125/// top-level config. Any `extends:` ancestor that declares one
126/// is rejected at load time — otherwise a malicious ruleset
127/// could execute arbitrary code just by being fetched.
128#[derive(Debug, Clone, Deserialize)]
129#[serde(deny_unknown_fields)]
130pub struct CustomFact {
131    /// Program and arguments. `argv[0]` is looked up through PATH
132    /// if it's not an absolute or relative-with-separator path.
133    pub argv: Vec<String>,
134}
135
136/// Fact-kind body for `file_content_matches`. Fact evaluates
137/// truthy when at least one file in `paths` contains a regex
138/// match for `pattern`. Files that aren't valid UTF-8 are skipped.
139#[derive(Debug, Clone, Deserialize)]
140#[serde(deny_unknown_fields)]
141pub struct FileContentMatchesFact {
142    pub paths: OneOrMany,
143    pub pattern: String,
144}
145
146/// Fact-kind body for `git_branch`. Empty — the body is just
147/// `git_branch: {}` in YAML and the discriminator is the key.
148///
149/// Evaluates to the current branch name by reading `.git/HEAD`
150/// directly (no `git` binary required). Returns an empty string
151/// when the repo isn't on a named branch (detached HEAD, not a
152/// git repo at all, worktree/submodule variants, or any unusual
153/// `.git` layout we don't fully resolve). An empty string is
154/// falsy under `when:` coercion, so downstream rules naturally
155/// no-op in those cases.
156#[derive(Debug, Clone, Deserialize, Default)]
157#[serde(deny_unknown_fields)]
158pub struct GitBranchFact {}
159
160/// The resolved map from fact id to value, produced once per `Engine::run`.
161#[derive(Debug, Default, Clone)]
162pub struct FactValues(HashMap<String, FactValue>);
163
164impl FactValues {
165    pub fn new() -> Self {
166        Self::default()
167    }
168
169    pub fn insert(&mut self, id: String, v: FactValue) {
170        self.0.insert(id, v);
171    }
172
173    pub fn get(&self, id: &str) -> Option<&FactValue> {
174        self.0.get(id)
175    }
176
177    pub fn len(&self) -> usize {
178        self.0.len()
179    }
180
181    pub fn is_empty(&self) -> bool {
182        self.0.is_empty()
183    }
184
185    pub fn as_map(&self) -> &HashMap<String, FactValue> {
186        &self.0
187    }
188}
189
190/// Evaluate a whole fact list against a prebuilt `FileIndex`. Invoked by
191/// `Engine::run` before any rule evaluates.
192pub fn evaluate_facts(facts: &[FactSpec], root: &Path, index: &FileIndex) -> Result<FactValues> {
193    let mut out = FactValues::new();
194    for spec in facts {
195        let value = evaluate_one(spec, root, index)?;
196        out.insert(spec.id.clone(), value);
197    }
198    Ok(out)
199}
200
201fn evaluate_one(spec: &FactSpec, root: &Path, index: &FileIndex) -> Result<FactValue> {
202    match &spec.kind {
203        FactKind::AnyFileExists { any_file_exists } => {
204            let globs = any_file_exists.to_vec();
205            let scope = Scope::from_patterns(&globs)?;
206            let found = index.files().any(|e| scope.matches(&e.path));
207            Ok(FactValue::Bool(found))
208        }
209        FactKind::AllFilesExist { all_files_exist } => {
210            let globs = all_files_exist.to_vec();
211            for glob in &globs {
212                let scope = Scope::from_patterns(std::slice::from_ref(glob))?;
213                if !index.files().any(|e| scope.matches(&e.path)) {
214                    return Ok(FactValue::Bool(false));
215                }
216            }
217            Ok(FactValue::Bool(true))
218        }
219        FactKind::CountFiles { count_files } => {
220            let scope = Scope::from_patterns(std::slice::from_ref(count_files))?;
221            let count = index.files().filter(|e| scope.matches(&e.path)).count();
222            Ok(FactValue::Int(i64::try_from(count).unwrap_or(i64::MAX)))
223        }
224        FactKind::FileContentMatches {
225            file_content_matches: spec,
226        } => {
227            let scope = Scope::from_patterns(&spec.paths.to_vec())?;
228            let regex = Regex::new(&spec.pattern)
229                .map_err(|e| Error::Other(format!("fact pattern /{}/: {e}", spec.pattern)))?;
230            let any = index.files().any(|entry| {
231                if !scope.matches(&entry.path) {
232                    return false;
233                }
234                let Ok(bytes) = std::fs::read(root.join(&entry.path)) else {
235                    return false;
236                };
237                let Ok(text) = std::str::from_utf8(&bytes) else {
238                    return false;
239                };
240                regex.is_match(text)
241            });
242            Ok(FactValue::Bool(any))
243        }
244        FactKind::GitBranch { git_branch: _ } => Ok(FactValue::String(read_git_branch(root))),
245        FactKind::Custom { custom } => Ok(FactValue::String(run_custom(custom, root))),
246    }
247}
248
249/// Best-effort: spawn `argv` at `root`, capture stdout. Non-zero
250/// exit / spawn failures / unusable output → empty string.
251fn run_custom(spec: &CustomFact, root: &Path) -> String {
252    let Some((program, args)) = spec.argv.split_first() else {
253        return String::new();
254    };
255    let output = std::process::Command::new(program)
256        .args(args)
257        .current_dir(root)
258        .stdin(std::process::Stdio::null())
259        .stderr(std::process::Stdio::null())
260        .output();
261    let Ok(output) = output else {
262        return String::new();
263    };
264    if !output.status.success() {
265        return String::new();
266    }
267    match std::str::from_utf8(&output.stdout) {
268        Ok(text) => text.trim_end().to_string(),
269        Err(_) => String::new(),
270    }
271}
272
273/// Reject `custom` facts in `config`. Used by the DSL loader to
274/// enforce that only the user's top-level config can spawn
275/// processes; extended (local or remote) configs can't.
276pub fn reject_custom_facts(config: &crate::config::Config, source: &str) -> Result<()> {
277    reject_custom_facts_in(&config.facts, source)
278}
279
280/// Like [`reject_custom_facts`] but takes a bare facts slice —
281/// used by the DSL loader which does merge bookkeeping at the
282/// YAML layer before it has a full [`crate::config::Config`] to
283/// hand in.
284pub fn reject_custom_facts_in(facts: &[FactSpec], source: &str) -> Result<()> {
285    for f in facts {
286        if matches!(f.kind, FactKind::Custom { .. }) {
287            return Err(Error::Other(format!(
288                "fact {:?}: `custom:` facts are only allowed in the user's top-level \
289                 config; declaring one in an extended config ({source}) is refused because \
290                 it would let a ruleset spawn arbitrary processes",
291                f.id
292            )));
293        }
294    }
295    Ok(())
296}
297
298/// Best-effort branch resolution: read `<root>/.git/HEAD` and
299/// extract the branch from a `ref: refs/heads/<branch>` line.
300/// Detached HEADs, bare SHAs, worktree pointers, missing files,
301/// non-UTF-8 content — every edge case returns `""`. Downstream
302/// `when:` coercion treats that as falsy.
303fn read_git_branch(root: &Path) -> String {
304    let head = root.join(".git").join("HEAD");
305    let Ok(content) = std::fs::read_to_string(&head) else {
306        return String::new();
307    };
308    content
309        .trim()
310        .strip_prefix("ref: refs/heads/")
311        .unwrap_or("")
312        .to_string()
313}
314
315#[cfg(test)]
316mod tests {
317    use super::*;
318    use crate::walker::FileEntry;
319    use std::path::PathBuf;
320
321    fn idx(paths: &[&str]) -> FileIndex {
322        FileIndex {
323            entries: paths
324                .iter()
325                .map(|p| FileEntry {
326                    path: PathBuf::from(p),
327                    is_dir: false,
328                    size: 1,
329                })
330                .collect(),
331        }
332    }
333
334    fn parse(yaml: &str) -> Vec<FactSpec> {
335        serde_yaml_ng::from_str(yaml).unwrap()
336    }
337
338    #[test]
339    fn any_file_exists_true_when_match_found() {
340        let facts = parse("- id: is_rust\n  any_file_exists: [Cargo.toml]\n");
341        let v =
342            evaluate_facts(&facts, Path::new("/"), &idx(&["Cargo.toml", "src/lib.rs"])).unwrap();
343        assert_eq!(v.get("is_rust"), Some(&FactValue::Bool(true)));
344    }
345
346    #[test]
347    fn any_file_exists_false_when_no_match() {
348        let facts = parse("- id: is_rust\n  any_file_exists: [Cargo.toml]\n");
349        let v = evaluate_facts(&facts, Path::new("/"), &idx(&["src/lib.rs"])).unwrap();
350        assert_eq!(v.get("is_rust"), Some(&FactValue::Bool(false)));
351    }
352
353    #[test]
354    fn any_file_exists_accepts_single_string() {
355        let facts = parse("- id: has_readme\n  any_file_exists: README.md\n");
356        let v = evaluate_facts(&facts, Path::new("/"), &idx(&["README.md"])).unwrap();
357        assert_eq!(v.get("has_readme"), Some(&FactValue::Bool(true)));
358    }
359
360    #[test]
361    fn all_files_exist_true_when_all_match() {
362        let facts = parse("- id: is_monorepo\n  all_files_exist: [Cargo.toml, README.md]\n");
363        let v = evaluate_facts(
364            &facts,
365            Path::new("/"),
366            &idx(&["Cargo.toml", "README.md", "src/main.rs"]),
367        )
368        .unwrap();
369        assert_eq!(v.get("is_monorepo"), Some(&FactValue::Bool(true)));
370    }
371
372    #[test]
373    fn all_files_exist_false_when_any_missing() {
374        let facts = parse("- id: is_monorepo\n  all_files_exist: [Cargo.toml, README.md]\n");
375        let v = evaluate_facts(&facts, Path::new("/"), &idx(&["Cargo.toml"])).unwrap();
376        assert_eq!(v.get("is_monorepo"), Some(&FactValue::Bool(false)));
377    }
378
379    #[test]
380    fn count_files_returns_integer() {
381        let facts = parse("- id: n_rs\n  count_files: \"**/*.rs\"\n");
382        let v = evaluate_facts(
383            &facts,
384            Path::new("/"),
385            &idx(&["a.rs", "b.rs", "src/c.rs", "README.md"]),
386        )
387        .unwrap();
388        assert_eq!(v.get("n_rs"), Some(&FactValue::Int(3)));
389    }
390
391    #[test]
392    fn multiple_facts_all_resolved() {
393        let facts = parse(
394            r#"
395- id: is_rust
396  any_file_exists: [Cargo.toml]
397- id: n_rs
398  count_files: "**/*.rs"
399- id: has_readme
400  any_file_exists: README.md
401"#,
402        );
403        let v = evaluate_facts(
404            &facts,
405            Path::new("/"),
406            &idx(&["Cargo.toml", "src/lib.rs", "README.md"]),
407        )
408        .unwrap();
409        assert_eq!(v.len(), 3);
410        assert_eq!(v.get("is_rust"), Some(&FactValue::Bool(true)));
411        assert_eq!(v.get("n_rs"), Some(&FactValue::Int(1)));
412        assert_eq!(v.get("has_readme"), Some(&FactValue::Bool(true)));
413    }
414
415    #[test]
416    fn file_content_matches_true_when_pattern_appears() {
417        use tempfile::tempdir;
418        let tmp = tempdir().unwrap();
419        std::fs::write(
420            tmp.path().join("Cargo.toml"),
421            "[dependencies]\ntokio = \"1\"\n",
422        )
423        .unwrap();
424        std::fs::write(tmp.path().join("README.md"), "hello\n").unwrap();
425
426        let facts = parse(
427            "- id: uses_tokio\n  file_content_matches:\n    paths: Cargo.toml\n    pattern: tokio\n",
428        );
429        let idx = idx(&["Cargo.toml", "README.md"]);
430        let v = evaluate_facts(&facts, tmp.path(), &idx).unwrap();
431        assert_eq!(v.get("uses_tokio"), Some(&FactValue::Bool(true)));
432    }
433
434    #[test]
435    fn file_content_matches_false_when_pattern_absent() {
436        use tempfile::tempdir;
437        let tmp = tempdir().unwrap();
438        std::fs::write(tmp.path().join("Cargo.toml"), "[dependencies]\n").unwrap();
439
440        let facts = parse(
441            "- id: uses_tokio\n  file_content_matches:\n    paths: Cargo.toml\n    pattern: tokio\n",
442        );
443        let idx = idx(&["Cargo.toml"]);
444        let v = evaluate_facts(&facts, tmp.path(), &idx).unwrap();
445        assert_eq!(v.get("uses_tokio"), Some(&FactValue::Bool(false)));
446    }
447
448    #[test]
449    fn file_content_matches_skips_non_utf8_files() {
450        use tempfile::tempdir;
451        let tmp = tempdir().unwrap();
452        // Invalid UTF-8 byte sequence.
453        std::fs::write(tmp.path().join("blob.bin"), [0xFFu8, 0xFE, 0x00, 0x01]).unwrap();
454        std::fs::write(
455            tmp.path().join("text.txt"),
456            "SPDX-License-Identifier: MIT\n",
457        )
458        .unwrap();
459
460        let facts = parse(
461            "- id: has_spdx\n  file_content_matches:\n    paths: [\"**/*\"]\n    pattern: SPDX\n",
462        );
463        let idx = idx(&["blob.bin", "text.txt"]);
464        let v = evaluate_facts(&facts, tmp.path(), &idx).unwrap();
465        // Non-UTF-8 is silently skipped, so `text.txt` is what matters.
466        assert_eq!(v.get("has_spdx"), Some(&FactValue::Bool(true)));
467    }
468
469    #[test]
470    fn git_branch_reads_refs_heads() {
471        use tempfile::tempdir;
472        let tmp = tempdir().unwrap();
473        std::fs::create_dir(tmp.path().join(".git")).unwrap();
474        std::fs::write(tmp.path().join(".git/HEAD"), "ref: refs/heads/feature-x\n").unwrap();
475
476        let facts = parse("- id: branch\n  git_branch: {}\n");
477        let v = evaluate_facts(&facts, tmp.path(), &idx(&[])).unwrap();
478        assert_eq!(
479            v.get("branch"),
480            Some(&FactValue::String("feature-x".to_string()))
481        );
482    }
483
484    #[test]
485    fn git_branch_detached_head_is_empty_string() {
486        use tempfile::tempdir;
487        let tmp = tempdir().unwrap();
488        std::fs::create_dir(tmp.path().join(".git")).unwrap();
489        std::fs::write(
490            tmp.path().join(".git/HEAD"),
491            "deadbeefdeadbeefdeadbeefdeadbeefdeadbeef\n",
492        )
493        .unwrap();
494
495        let facts = parse("- id: branch\n  git_branch: {}\n");
496        let v = evaluate_facts(&facts, tmp.path(), &idx(&[])).unwrap();
497        assert_eq!(v.get("branch"), Some(&FactValue::String(String::new())));
498    }
499
500    #[test]
501    fn git_branch_missing_git_dir_is_empty_string() {
502        use tempfile::tempdir;
503        let tmp = tempdir().unwrap();
504        let facts = parse("- id: branch\n  git_branch: {}\n");
505        let v = evaluate_facts(&facts, tmp.path(), &idx(&[])).unwrap();
506        assert_eq!(v.get("branch"), Some(&FactValue::String(String::new())));
507    }
508
509    #[cfg(unix)]
510    #[test]
511    fn custom_captures_stdout_trimmed() {
512        use tempfile::tempdir;
513        let tmp = tempdir().unwrap();
514        let facts = parse(
515            "- id: greeting\n  custom:\n    argv: [\"/bin/sh\", \"-c\", \"printf 'hello world\\n'\"]\n",
516        );
517        let v = evaluate_facts(&facts, tmp.path(), &idx(&[])).unwrap();
518        assert_eq!(
519            v.get("greeting"),
520            Some(&FactValue::String("hello world".to_string()))
521        );
522    }
523
524    #[test]
525    fn custom_unknown_program_is_empty_string() {
526        use tempfile::tempdir;
527        let tmp = tempdir().unwrap();
528        let facts =
529            parse("- id: nope\n  custom:\n    argv: [\"no-such-program-alint-test-xyzzy\"]\n");
530        let v = evaluate_facts(&facts, tmp.path(), &idx(&[])).unwrap();
531        assert_eq!(v.get("nope"), Some(&FactValue::String(String::new())));
532    }
533
534    #[cfg(unix)]
535    #[test]
536    fn custom_nonzero_exit_is_empty_string() {
537        use tempfile::tempdir;
538        let tmp = tempdir().unwrap();
539        // `false` exits 1; we should not see any captured output.
540        let facts = parse("- id: bad\n  custom:\n    argv: [\"/bin/false\"]\n");
541        let v = evaluate_facts(&facts, tmp.path(), &idx(&[])).unwrap();
542        assert_eq!(v.get("bad"), Some(&FactValue::String(String::new())));
543    }
544
545    #[test]
546    fn reject_custom_facts_flags_custom_but_passes_others() {
547        let facts = parse(
548            "- id: plain\n  any_file_exists: x\n- id: run\n  custom:\n    argv: [\"echo\"]\n",
549        );
550        let config = crate::config::Config {
551            version: 1,
552            extends: Vec::new(),
553            ignore: Vec::new(),
554            respect_gitignore: true,
555            vars: std::collections::HashMap::new(),
556            facts,
557            rules: Vec::new(),
558            fix_size_limit: None,
559            nested_configs: false,
560        };
561        let err = reject_custom_facts(&config, "./base.yml").unwrap_err();
562        assert!(err.to_string().contains("custom"), "{err}");
563        assert!(err.to_string().contains("./base.yml"), "{err}");
564    }
565
566    #[test]
567    fn reject_custom_facts_ok_when_none_present() {
568        let facts = parse("- id: plain\n  any_file_exists: x\n");
569        let config = crate::config::Config {
570            version: 1,
571            extends: Vec::new(),
572            ignore: Vec::new(),
573            respect_gitignore: true,
574            vars: std::collections::HashMap::new(),
575            facts,
576            rules: Vec::new(),
577            fix_size_limit: None,
578            nested_configs: false,
579        };
580        assert!(reject_custom_facts(&config, "./base.yml").is_ok());
581    }
582
583    #[test]
584    fn truthy_coercion() {
585        assert!(FactValue::Bool(true).truthy());
586        assert!(!FactValue::Bool(false).truthy());
587        assert!(FactValue::Int(1).truthy());
588        assert!(!FactValue::Int(0).truthy());
589        assert!(FactValue::String("x".into()).truthy());
590        assert!(!FactValue::String(String::new()).truthy());
591    }
592}