1use std::collections::HashMap;
21use std::path::Path;
22
23use regex::Regex;
24use serde::Deserialize;
25
26use crate::error::{Error, Result};
27use crate::scope::Scope;
28use crate::walker::FileIndex;
29
30#[derive(Debug, Clone, PartialEq)]
33pub enum FactValue {
34 Bool(bool),
35 Int(i64),
36 String(String),
37}
38
39impl FactValue {
40 pub fn truthy(&self) -> bool {
43 match self {
44 Self::Bool(b) => *b,
45 Self::Int(n) => *n != 0,
46 Self::String(s) => !s.is_empty(),
47 }
48 }
49}
50
51#[derive(Debug, Clone, Deserialize)]
54#[serde(untagged)]
55pub enum OneOrMany {
56 One(String),
57 Many(Vec<String>),
58}
59
60impl OneOrMany {
61 pub fn to_vec(&self) -> Vec<String> {
62 match self {
63 Self::One(s) => vec![s.clone()],
64 Self::Many(v) => v.clone(),
65 }
66 }
67}
68
69#[derive(Debug, Clone, Deserialize)]
71pub struct FactSpec {
72 pub id: String,
73 #[serde(flatten)]
74 pub kind: FactKind,
75}
76
77#[derive(Debug, Clone, Deserialize)]
80#[serde(untagged)]
81pub enum FactKind {
82 AnyFileExists {
83 any_file_exists: OneOrMany,
84 },
85 AllFilesExist {
86 all_files_exist: OneOrMany,
87 },
88 CountFiles {
89 count_files: String,
90 },
91 FileContentMatches {
92 file_content_matches: FileContentMatchesFact,
93 },
94 GitBranch {
95 git_branch: GitBranchFact,
96 },
97 Custom {
98 custom: CustomFact,
99 },
100}
101
102impl FactKind {
103 pub fn name(&self) -> &'static str {
106 match self {
107 Self::AnyFileExists { .. } => "any_file_exists",
108 Self::AllFilesExist { .. } => "all_files_exist",
109 Self::CountFiles { .. } => "count_files",
110 Self::FileContentMatches { .. } => "file_content_matches",
111 Self::GitBranch { .. } => "git_branch",
112 Self::Custom { .. } => "custom",
113 }
114 }
115
116 pub const ALL_NAMES: &'static [&'static str] = &[
125 "all_files_exist",
126 "any_file_exists",
127 "count_files",
128 "custom",
129 "file_content_matches",
130 "git_branch",
131 ];
132}
133
134#[derive(Debug, Clone, Deserialize)]
146#[serde(deny_unknown_fields)]
147pub struct CustomFact {
148 pub argv: Vec<String>,
151}
152
153#[derive(Debug, Clone, Deserialize)]
157#[serde(deny_unknown_fields)]
158pub struct FileContentMatchesFact {
159 pub paths: OneOrMany,
160 pub pattern: String,
161}
162
163#[derive(Debug, Clone, Deserialize, Default)]
174#[serde(deny_unknown_fields)]
175pub struct GitBranchFact {}
176
177#[derive(Debug, Default, Clone)]
179pub struct FactValues(HashMap<String, FactValue>);
180
181impl FactValues {
182 pub fn new() -> Self {
183 Self::default()
184 }
185
186 pub fn insert(&mut self, id: String, v: FactValue) {
187 self.0.insert(id, v);
188 }
189
190 pub fn get(&self, id: &str) -> Option<&FactValue> {
191 self.0.get(id)
192 }
193
194 pub fn len(&self) -> usize {
195 self.0.len()
196 }
197
198 pub fn is_empty(&self) -> bool {
199 self.0.is_empty()
200 }
201
202 pub fn as_map(&self) -> &HashMap<String, FactValue> {
203 &self.0
204 }
205}
206
207pub fn evaluate_facts(facts: &[FactSpec], root: &Path, index: &FileIndex) -> Result<FactValues> {
210 let mut out = FactValues::new();
211 for spec in facts {
212 let value = evaluate_one(spec, root, index)?;
213 out.insert(spec.id.clone(), value);
214 }
215 Ok(out)
216}
217
218fn evaluate_one(spec: &FactSpec, root: &Path, index: &FileIndex) -> Result<FactValue> {
219 match &spec.kind {
220 FactKind::AnyFileExists { any_file_exists } => {
221 let globs = any_file_exists.to_vec();
222 let scope = Scope::from_patterns(&globs)?;
223 let found = index.files().any(|e| scope.matches(&e.path, index));
224 Ok(FactValue::Bool(found))
225 }
226 FactKind::AllFilesExist { all_files_exist } => {
227 let globs = all_files_exist.to_vec();
228 for glob in &globs {
229 let scope = Scope::from_patterns(std::slice::from_ref(glob))?;
230 if !index.files().any(|e| scope.matches(&e.path, index)) {
231 return Ok(FactValue::Bool(false));
232 }
233 }
234 Ok(FactValue::Bool(true))
235 }
236 FactKind::CountFiles { count_files } => {
237 let scope = Scope::from_patterns(std::slice::from_ref(count_files))?;
238 let count = index
239 .files()
240 .filter(|e| scope.matches(&e.path, index))
241 .count();
242 Ok(FactValue::Int(i64::try_from(count).unwrap_or(i64::MAX)))
243 }
244 FactKind::FileContentMatches {
245 file_content_matches: spec,
246 } => {
247 let scope = Scope::from_patterns(&spec.paths.to_vec())?;
248 let regex = Regex::new(&spec.pattern)
249 .map_err(|e| Error::Other(format!("fact pattern /{}/: {e}", spec.pattern)))?;
250 let any = index.files().any(|entry| {
251 if !scope.matches(&entry.path, index) {
252 return false;
253 }
254 let Ok(bytes) = std::fs::read(root.join(&entry.path)) else {
255 return false;
256 };
257 let Ok(text) = std::str::from_utf8(&bytes) else {
258 return false;
259 };
260 regex.is_match(text)
261 });
262 Ok(FactValue::Bool(any))
263 }
264 FactKind::GitBranch { git_branch: _ } => Ok(FactValue::String(read_git_branch(root))),
265 FactKind::Custom { custom } => Ok(FactValue::String(run_custom(custom, root))),
266 }
267}
268
269fn run_custom(spec: &CustomFact, root: &Path) -> String {
272 let Some((program, args)) = spec.argv.split_first() else {
273 return String::new();
274 };
275 let output = std::process::Command::new(program)
276 .args(args)
277 .current_dir(root)
278 .stdin(std::process::Stdio::null())
279 .stderr(std::process::Stdio::null())
280 .output();
281 let Ok(output) = output else {
282 return String::new();
283 };
284 if !output.status.success() {
285 return String::new();
286 }
287 match std::str::from_utf8(&output.stdout) {
288 Ok(text) => text.trim_end().to_string(),
289 Err(_) => String::new(),
290 }
291}
292
293pub fn reject_custom_facts(config: &crate::config::Config, source: &str) -> Result<()> {
297 reject_custom_facts_in(&config.facts, source)
298}
299
300pub fn reject_custom_facts_in(facts: &[FactSpec], source: &str) -> Result<()> {
305 for f in facts {
306 if matches!(f.kind, FactKind::Custom { .. }) {
307 return Err(Error::Other(format!(
308 "fact {:?}: `custom:` facts are only allowed in the user's top-level \
309 config; declaring one in an extended config ({source}) is refused because \
310 it would let a ruleset spawn arbitrary processes",
311 f.id
312 )));
313 }
314 }
315 Ok(())
316}
317
318fn read_git_branch(root: &Path) -> String {
324 let head = root.join(".git").join("HEAD");
325 let Ok(content) = std::fs::read_to_string(&head) else {
326 return String::new();
327 };
328 content
329 .trim()
330 .strip_prefix("ref: refs/heads/")
331 .unwrap_or("")
332 .to_string()
333}
334
335#[cfg(test)]
336mod tests {
337 use super::*;
338 use crate::walker::FileEntry;
339
340 fn idx(paths: &[&str]) -> FileIndex {
341 FileIndex::from_entries(
342 paths
343 .iter()
344 .map(|p| FileEntry {
345 path: std::path::Path::new(p).into(),
346 is_dir: false,
347 size: 1,
348 })
349 .collect(),
350 )
351 }
352
353 fn parse(yaml: &str) -> Vec<FactSpec> {
354 serde_yaml_ng::from_str(yaml).unwrap()
355 }
356
357 #[test]
364 fn all_names_is_sorted_unique_and_matches_name() {
365 let mut sorted = FactKind::ALL_NAMES.to_vec();
366 sorted.sort_unstable();
367 sorted.dedup();
368 assert_eq!(
369 FactKind::ALL_NAMES,
370 sorted.as_slice(),
371 "FactKind::ALL_NAMES must be sorted and de-duplicated"
372 );
373
374 let cases = [
375 ("- id: f\n any_file_exists: x\n", "any_file_exists"),
376 ("- id: f\n all_files_exist: [x]\n", "all_files_exist"),
377 ("- id: f\n count_files: \"**/*\"\n", "count_files"),
378 (
379 "- id: f\n file_content_matches:\n paths: x\n pattern: y\n",
380 "file_content_matches",
381 ),
382 ("- id: f\n git_branch: {}\n", "git_branch"),
383 ("- id: f\n custom:\n argv: [echo, hi]\n", "custom"),
384 ];
385 let mut seen = std::collections::BTreeSet::new();
386 for (yaml, expected) in cases {
387 let specs = parse(yaml);
388 assert_eq!(specs[0].kind.name(), expected, "name() drift for {yaml:?}");
389 assert!(
390 FactKind::ALL_NAMES.contains(&expected),
391 "{expected} is produced by name() but missing from ALL_NAMES"
392 );
393 seen.insert(expected);
394 }
395 let listed: std::collections::BTreeSet<&str> =
396 FactKind::ALL_NAMES.iter().copied().collect();
397 assert_eq!(
398 seen, listed,
399 "ALL_NAMES lists a name no covered variant produces (add a case above or fix ALL_NAMES)"
400 );
401 }
402
403 #[test]
404 fn any_file_exists_true_when_match_found() {
405 let facts = parse("- id: is_rust\n any_file_exists: [Cargo.toml]\n");
406 let v =
407 evaluate_facts(&facts, Path::new("/"), &idx(&["Cargo.toml", "src/lib.rs"])).unwrap();
408 assert_eq!(v.get("is_rust"), Some(&FactValue::Bool(true)));
409 }
410
411 #[test]
412 fn any_file_exists_false_when_no_match() {
413 let facts = parse("- id: is_rust\n any_file_exists: [Cargo.toml]\n");
414 let v = evaluate_facts(&facts, Path::new("/"), &idx(&["src/lib.rs"])).unwrap();
415 assert_eq!(v.get("is_rust"), Some(&FactValue::Bool(false)));
416 }
417
418 #[test]
419 fn any_file_exists_accepts_single_string() {
420 let facts = parse("- id: has_readme\n any_file_exists: README.md\n");
421 let v = evaluate_facts(&facts, Path::new("/"), &idx(&["README.md"])).unwrap();
422 assert_eq!(v.get("has_readme"), Some(&FactValue::Bool(true)));
423 }
424
425 #[test]
426 fn all_files_exist_true_when_all_match() {
427 let facts = parse("- id: is_monorepo\n all_files_exist: [Cargo.toml, README.md]\n");
428 let v = evaluate_facts(
429 &facts,
430 Path::new("/"),
431 &idx(&["Cargo.toml", "README.md", "src/main.rs"]),
432 )
433 .unwrap();
434 assert_eq!(v.get("is_monorepo"), Some(&FactValue::Bool(true)));
435 }
436
437 #[test]
438 fn all_files_exist_false_when_any_missing() {
439 let facts = parse("- id: is_monorepo\n all_files_exist: [Cargo.toml, README.md]\n");
440 let v = evaluate_facts(&facts, Path::new("/"), &idx(&["Cargo.toml"])).unwrap();
441 assert_eq!(v.get("is_monorepo"), Some(&FactValue::Bool(false)));
442 }
443
444 #[test]
445 fn count_files_returns_integer() {
446 let facts = parse("- id: n_rs\n count_files: \"**/*.rs\"\n");
447 let v = evaluate_facts(
448 &facts,
449 Path::new("/"),
450 &idx(&["a.rs", "b.rs", "src/c.rs", "README.md"]),
451 )
452 .unwrap();
453 assert_eq!(v.get("n_rs"), Some(&FactValue::Int(3)));
454 }
455
456 #[test]
457 fn multiple_facts_all_resolved() {
458 let facts = parse(
459 r#"
460- id: is_rust
461 any_file_exists: [Cargo.toml]
462- id: n_rs
463 count_files: "**/*.rs"
464- id: has_readme
465 any_file_exists: README.md
466"#,
467 );
468 let v = evaluate_facts(
469 &facts,
470 Path::new("/"),
471 &idx(&["Cargo.toml", "src/lib.rs", "README.md"]),
472 )
473 .unwrap();
474 assert_eq!(v.len(), 3);
475 assert_eq!(v.get("is_rust"), Some(&FactValue::Bool(true)));
476 assert_eq!(v.get("n_rs"), Some(&FactValue::Int(1)));
477 assert_eq!(v.get("has_readme"), Some(&FactValue::Bool(true)));
478 }
479
480 #[test]
481 fn file_content_matches_true_when_pattern_appears() {
482 use tempfile::tempdir;
483 let tmp = tempdir().unwrap();
484 std::fs::write(
485 tmp.path().join("Cargo.toml"),
486 "[dependencies]\ntokio = \"1\"\n",
487 )
488 .unwrap();
489 std::fs::write(tmp.path().join("README.md"), "hello\n").unwrap();
490
491 let facts = parse(
492 "- id: uses_tokio\n file_content_matches:\n paths: Cargo.toml\n pattern: tokio\n",
493 );
494 let idx = idx(&["Cargo.toml", "README.md"]);
495 let v = evaluate_facts(&facts, tmp.path(), &idx).unwrap();
496 assert_eq!(v.get("uses_tokio"), Some(&FactValue::Bool(true)));
497 }
498
499 #[test]
500 fn file_content_matches_false_when_pattern_absent() {
501 use tempfile::tempdir;
502 let tmp = tempdir().unwrap();
503 std::fs::write(tmp.path().join("Cargo.toml"), "[dependencies]\n").unwrap();
504
505 let facts = parse(
506 "- id: uses_tokio\n file_content_matches:\n paths: Cargo.toml\n pattern: tokio\n",
507 );
508 let idx = idx(&["Cargo.toml"]);
509 let v = evaluate_facts(&facts, tmp.path(), &idx).unwrap();
510 assert_eq!(v.get("uses_tokio"), Some(&FactValue::Bool(false)));
511 }
512
513 #[test]
514 fn file_content_matches_skips_non_utf8_files() {
515 use tempfile::tempdir;
516 let tmp = tempdir().unwrap();
517 std::fs::write(tmp.path().join("blob.bin"), [0xFFu8, 0xFE, 0x00, 0x01]).unwrap();
519 std::fs::write(
520 tmp.path().join("text.txt"),
521 "SPDX-License-Identifier: MIT\n",
522 )
523 .unwrap();
524
525 let facts = parse(
526 "- id: has_spdx\n file_content_matches:\n paths: [\"**/*\"]\n pattern: SPDX\n",
527 );
528 let idx = idx(&["blob.bin", "text.txt"]);
529 let v = evaluate_facts(&facts, tmp.path(), &idx).unwrap();
530 assert_eq!(v.get("has_spdx"), Some(&FactValue::Bool(true)));
532 }
533
534 #[test]
535 fn git_branch_reads_refs_heads() {
536 use tempfile::tempdir;
537 let tmp = tempdir().unwrap();
538 std::fs::create_dir(tmp.path().join(".git")).unwrap();
539 std::fs::write(tmp.path().join(".git/HEAD"), "ref: refs/heads/feature-x\n").unwrap();
540
541 let facts = parse("- id: branch\n git_branch: {}\n");
542 let v = evaluate_facts(&facts, tmp.path(), &idx(&[])).unwrap();
543 assert_eq!(
544 v.get("branch"),
545 Some(&FactValue::String("feature-x".to_string()))
546 );
547 }
548
549 #[test]
550 fn git_branch_detached_head_is_empty_string() {
551 use tempfile::tempdir;
552 let tmp = tempdir().unwrap();
553 std::fs::create_dir(tmp.path().join(".git")).unwrap();
554 std::fs::write(
555 tmp.path().join(".git/HEAD"),
556 "deadbeefdeadbeefdeadbeefdeadbeefdeadbeef\n",
557 )
558 .unwrap();
559
560 let facts = parse("- id: branch\n git_branch: {}\n");
561 let v = evaluate_facts(&facts, tmp.path(), &idx(&[])).unwrap();
562 assert_eq!(v.get("branch"), Some(&FactValue::String(String::new())));
563 }
564
565 #[test]
566 fn git_branch_missing_git_dir_is_empty_string() {
567 use tempfile::tempdir;
568 let tmp = tempdir().unwrap();
569 let facts = parse("- id: branch\n git_branch: {}\n");
570 let v = evaluate_facts(&facts, tmp.path(), &idx(&[])).unwrap();
571 assert_eq!(v.get("branch"), Some(&FactValue::String(String::new())));
572 }
573
574 #[cfg(unix)]
575 #[test]
576 fn custom_captures_stdout_trimmed() {
577 use tempfile::tempdir;
578 let tmp = tempdir().unwrap();
579 let facts = parse(
580 "- id: greeting\n custom:\n argv: [\"/bin/sh\", \"-c\", \"printf 'hello world\\n'\"]\n",
581 );
582 let v = evaluate_facts(&facts, tmp.path(), &idx(&[])).unwrap();
583 assert_eq!(
584 v.get("greeting"),
585 Some(&FactValue::String("hello world".to_string()))
586 );
587 }
588
589 #[test]
590 fn custom_unknown_program_is_empty_string() {
591 use tempfile::tempdir;
592 let tmp = tempdir().unwrap();
593 let facts =
594 parse("- id: nope\n custom:\n argv: [\"no-such-program-alint-test-xyzzy\"]\n");
595 let v = evaluate_facts(&facts, tmp.path(), &idx(&[])).unwrap();
596 assert_eq!(v.get("nope"), Some(&FactValue::String(String::new())));
597 }
598
599 #[cfg(unix)]
600 #[test]
601 fn custom_nonzero_exit_is_empty_string() {
602 use tempfile::tempdir;
603 let tmp = tempdir().unwrap();
604 let facts = parse("- id: bad\n custom:\n argv: [\"/bin/false\"]\n");
606 let v = evaluate_facts(&facts, tmp.path(), &idx(&[])).unwrap();
607 assert_eq!(v.get("bad"), Some(&FactValue::String(String::new())));
608 }
609
610 #[test]
611 fn reject_custom_facts_flags_custom_but_passes_others() {
612 let facts = parse(
613 "- id: plain\n any_file_exists: x\n- id: run\n custom:\n argv: [\"echo\"]\n",
614 );
615 let config = crate::config::Config {
616 version: 1,
617 extends: Vec::new(),
618 ignore: Vec::new(),
619 respect_gitignore: true,
620 vars: std::collections::HashMap::new(),
621 facts,
622 rules: Vec::new(),
623 fix_size_limit: None,
624 nested_configs: false,
625 allow_out_of_root: crate::AllowOutOfRoot::default(),
626 };
627 let err = reject_custom_facts(&config, "./base.yml").unwrap_err();
628 assert!(err.to_string().contains("custom"), "{err}");
629 assert!(err.to_string().contains("./base.yml"), "{err}");
630 }
631
632 #[test]
633 fn reject_custom_facts_ok_when_none_present() {
634 let facts = parse("- id: plain\n any_file_exists: x\n");
635 let config = crate::config::Config {
636 version: 1,
637 extends: Vec::new(),
638 ignore: Vec::new(),
639 respect_gitignore: true,
640 vars: std::collections::HashMap::new(),
641 facts,
642 rules: Vec::new(),
643 fix_size_limit: None,
644 nested_configs: false,
645 allow_out_of_root: crate::AllowOutOfRoot::default(),
646 };
647 assert!(reject_custom_facts(&config, "./base.yml").is_ok());
648 }
649
650 #[test]
651 fn truthy_coercion() {
652 assert!(FactValue::Bool(true).truthy());
653 assert!(!FactValue::Bool(false).truthy());
654 assert!(FactValue::Int(1).truthy());
655 assert!(!FactValue::Int(0).truthy());
656 assert!(FactValue::String("x".into()).truthy());
657 assert!(!FactValue::String(String::new()).truthy());
658 }
659}