1use std::collections::HashMap;
21use std::path::Path;
22
23use regex::Regex;
24use serde::Deserialize;
25
26use crate::error::{Error, Result};
27use crate::scope::Scope;
28use crate::walker::FileIndex;
29
30#[derive(Debug, Clone, PartialEq)]
33pub enum FactValue {
34 Bool(bool),
35 Int(i64),
36 String(String),
37}
38
39impl FactValue {
40 pub fn truthy(&self) -> bool {
43 match self {
44 Self::Bool(b) => *b,
45 Self::Int(n) => *n != 0,
46 Self::String(s) => !s.is_empty(),
47 }
48 }
49}
50
51#[derive(Debug, Clone, Deserialize)]
54#[serde(untagged)]
55pub enum OneOrMany {
56 One(String),
57 Many(Vec<String>),
58}
59
60impl OneOrMany {
61 pub fn to_vec(&self) -> Vec<String> {
62 match self {
63 Self::One(s) => vec![s.clone()],
64 Self::Many(v) => v.clone(),
65 }
66 }
67}
68
69#[derive(Debug, Clone, Deserialize)]
71pub struct FactSpec {
72 pub id: String,
73 #[serde(flatten)]
74 pub kind: FactKind,
75}
76
77#[derive(Debug, Clone, Deserialize)]
80#[serde(untagged)]
81pub enum FactKind {
82 AnyFileExists {
83 any_file_exists: OneOrMany,
84 },
85 AllFilesExist {
86 all_files_exist: OneOrMany,
87 },
88 CountFiles {
89 count_files: String,
90 },
91 FileContentMatches {
92 file_content_matches: FileContentMatchesFact,
93 },
94 GitBranch {
95 git_branch: GitBranchFact,
96 },
97 Custom {
98 custom: CustomFact,
99 },
100}
101
102impl FactKind {
103 pub fn name(&self) -> &'static str {
106 match self {
107 Self::AnyFileExists { .. } => "any_file_exists",
108 Self::AllFilesExist { .. } => "all_files_exist",
109 Self::CountFiles { .. } => "count_files",
110 Self::FileContentMatches { .. } => "file_content_matches",
111 Self::GitBranch { .. } => "git_branch",
112 Self::Custom { .. } => "custom",
113 }
114 }
115}
116
117#[derive(Debug, Clone, Deserialize)]
129#[serde(deny_unknown_fields)]
130pub struct CustomFact {
131 pub argv: Vec<String>,
134}
135
136#[derive(Debug, Clone, Deserialize)]
140#[serde(deny_unknown_fields)]
141pub struct FileContentMatchesFact {
142 pub paths: OneOrMany,
143 pub pattern: String,
144}
145
146#[derive(Debug, Clone, Deserialize, Default)]
157#[serde(deny_unknown_fields)]
158pub struct GitBranchFact {}
159
160#[derive(Debug, Default, Clone)]
162pub struct FactValues(HashMap<String, FactValue>);
163
164impl FactValues {
165 pub fn new() -> Self {
166 Self::default()
167 }
168
169 pub fn insert(&mut self, id: String, v: FactValue) {
170 self.0.insert(id, v);
171 }
172
173 pub fn get(&self, id: &str) -> Option<&FactValue> {
174 self.0.get(id)
175 }
176
177 pub fn len(&self) -> usize {
178 self.0.len()
179 }
180
181 pub fn is_empty(&self) -> bool {
182 self.0.is_empty()
183 }
184
185 pub fn as_map(&self) -> &HashMap<String, FactValue> {
186 &self.0
187 }
188}
189
190pub fn evaluate_facts(facts: &[FactSpec], root: &Path, index: &FileIndex) -> Result<FactValues> {
193 let mut out = FactValues::new();
194 for spec in facts {
195 let value = evaluate_one(spec, root, index)?;
196 out.insert(spec.id.clone(), value);
197 }
198 Ok(out)
199}
200
201fn evaluate_one(spec: &FactSpec, root: &Path, index: &FileIndex) -> Result<FactValue> {
202 match &spec.kind {
203 FactKind::AnyFileExists { any_file_exists } => {
204 let globs = any_file_exists.to_vec();
205 let scope = Scope::from_patterns(&globs)?;
206 let found = index.files().any(|e| scope.matches(&e.path));
207 Ok(FactValue::Bool(found))
208 }
209 FactKind::AllFilesExist { all_files_exist } => {
210 let globs = all_files_exist.to_vec();
211 for glob in &globs {
212 let scope = Scope::from_patterns(std::slice::from_ref(glob))?;
213 if !index.files().any(|e| scope.matches(&e.path)) {
214 return Ok(FactValue::Bool(false));
215 }
216 }
217 Ok(FactValue::Bool(true))
218 }
219 FactKind::CountFiles { count_files } => {
220 let scope = Scope::from_patterns(std::slice::from_ref(count_files))?;
221 let count = index.files().filter(|e| scope.matches(&e.path)).count();
222 Ok(FactValue::Int(i64::try_from(count).unwrap_or(i64::MAX)))
223 }
224 FactKind::FileContentMatches {
225 file_content_matches: spec,
226 } => {
227 let scope = Scope::from_patterns(&spec.paths.to_vec())?;
228 let regex = Regex::new(&spec.pattern)
229 .map_err(|e| Error::Other(format!("fact pattern /{}/: {e}", spec.pattern)))?;
230 let any = index.files().any(|entry| {
231 if !scope.matches(&entry.path) {
232 return false;
233 }
234 let Ok(bytes) = std::fs::read(root.join(&entry.path)) else {
235 return false;
236 };
237 let Ok(text) = std::str::from_utf8(&bytes) else {
238 return false;
239 };
240 regex.is_match(text)
241 });
242 Ok(FactValue::Bool(any))
243 }
244 FactKind::GitBranch { git_branch: _ } => Ok(FactValue::String(read_git_branch(root))),
245 FactKind::Custom { custom } => Ok(FactValue::String(run_custom(custom, root))),
246 }
247}
248
249fn run_custom(spec: &CustomFact, root: &Path) -> String {
252 let Some((program, args)) = spec.argv.split_first() else {
253 return String::new();
254 };
255 let output = std::process::Command::new(program)
256 .args(args)
257 .current_dir(root)
258 .stdin(std::process::Stdio::null())
259 .stderr(std::process::Stdio::null())
260 .output();
261 let Ok(output) = output else {
262 return String::new();
263 };
264 if !output.status.success() {
265 return String::new();
266 }
267 match std::str::from_utf8(&output.stdout) {
268 Ok(text) => text.trim_end().to_string(),
269 Err(_) => String::new(),
270 }
271}
272
273pub fn reject_custom_facts(config: &crate::config::Config, source: &str) -> Result<()> {
277 reject_custom_facts_in(&config.facts, source)
278}
279
280pub fn reject_custom_facts_in(facts: &[FactSpec], source: &str) -> Result<()> {
285 for f in facts {
286 if matches!(f.kind, FactKind::Custom { .. }) {
287 return Err(Error::Other(format!(
288 "fact {:?}: `custom:` facts are only allowed in the user's top-level \
289 config; declaring one in an extended config ({source}) is refused because \
290 it would let a ruleset spawn arbitrary processes",
291 f.id
292 )));
293 }
294 }
295 Ok(())
296}
297
298fn read_git_branch(root: &Path) -> String {
304 let head = root.join(".git").join("HEAD");
305 let Ok(content) = std::fs::read_to_string(&head) else {
306 return String::new();
307 };
308 content
309 .trim()
310 .strip_prefix("ref: refs/heads/")
311 .unwrap_or("")
312 .to_string()
313}
314
315#[cfg(test)]
316mod tests {
317 use super::*;
318 use crate::walker::FileEntry;
319 use std::path::PathBuf;
320
321 fn idx(paths: &[&str]) -> FileIndex {
322 FileIndex {
323 entries: paths
324 .iter()
325 .map(|p| FileEntry {
326 path: PathBuf::from(p),
327 is_dir: false,
328 size: 1,
329 })
330 .collect(),
331 }
332 }
333
334 fn parse(yaml: &str) -> Vec<FactSpec> {
335 serde_yaml_ng::from_str(yaml).unwrap()
336 }
337
338 #[test]
339 fn any_file_exists_true_when_match_found() {
340 let facts = parse("- id: is_rust\n any_file_exists: [Cargo.toml]\n");
341 let v =
342 evaluate_facts(&facts, Path::new("/"), &idx(&["Cargo.toml", "src/lib.rs"])).unwrap();
343 assert_eq!(v.get("is_rust"), Some(&FactValue::Bool(true)));
344 }
345
346 #[test]
347 fn any_file_exists_false_when_no_match() {
348 let facts = parse("- id: is_rust\n any_file_exists: [Cargo.toml]\n");
349 let v = evaluate_facts(&facts, Path::new("/"), &idx(&["src/lib.rs"])).unwrap();
350 assert_eq!(v.get("is_rust"), Some(&FactValue::Bool(false)));
351 }
352
353 #[test]
354 fn any_file_exists_accepts_single_string() {
355 let facts = parse("- id: has_readme\n any_file_exists: README.md\n");
356 let v = evaluate_facts(&facts, Path::new("/"), &idx(&["README.md"])).unwrap();
357 assert_eq!(v.get("has_readme"), Some(&FactValue::Bool(true)));
358 }
359
360 #[test]
361 fn all_files_exist_true_when_all_match() {
362 let facts = parse("- id: is_monorepo\n all_files_exist: [Cargo.toml, README.md]\n");
363 let v = evaluate_facts(
364 &facts,
365 Path::new("/"),
366 &idx(&["Cargo.toml", "README.md", "src/main.rs"]),
367 )
368 .unwrap();
369 assert_eq!(v.get("is_monorepo"), Some(&FactValue::Bool(true)));
370 }
371
372 #[test]
373 fn all_files_exist_false_when_any_missing() {
374 let facts = parse("- id: is_monorepo\n all_files_exist: [Cargo.toml, README.md]\n");
375 let v = evaluate_facts(&facts, Path::new("/"), &idx(&["Cargo.toml"])).unwrap();
376 assert_eq!(v.get("is_monorepo"), Some(&FactValue::Bool(false)));
377 }
378
379 #[test]
380 fn count_files_returns_integer() {
381 let facts = parse("- id: n_rs\n count_files: \"**/*.rs\"\n");
382 let v = evaluate_facts(
383 &facts,
384 Path::new("/"),
385 &idx(&["a.rs", "b.rs", "src/c.rs", "README.md"]),
386 )
387 .unwrap();
388 assert_eq!(v.get("n_rs"), Some(&FactValue::Int(3)));
389 }
390
391 #[test]
392 fn multiple_facts_all_resolved() {
393 let facts = parse(
394 r#"
395- id: is_rust
396 any_file_exists: [Cargo.toml]
397- id: n_rs
398 count_files: "**/*.rs"
399- id: has_readme
400 any_file_exists: README.md
401"#,
402 );
403 let v = evaluate_facts(
404 &facts,
405 Path::new("/"),
406 &idx(&["Cargo.toml", "src/lib.rs", "README.md"]),
407 )
408 .unwrap();
409 assert_eq!(v.len(), 3);
410 assert_eq!(v.get("is_rust"), Some(&FactValue::Bool(true)));
411 assert_eq!(v.get("n_rs"), Some(&FactValue::Int(1)));
412 assert_eq!(v.get("has_readme"), Some(&FactValue::Bool(true)));
413 }
414
415 #[test]
416 fn file_content_matches_true_when_pattern_appears() {
417 use tempfile::tempdir;
418 let tmp = tempdir().unwrap();
419 std::fs::write(
420 tmp.path().join("Cargo.toml"),
421 "[dependencies]\ntokio = \"1\"\n",
422 )
423 .unwrap();
424 std::fs::write(tmp.path().join("README.md"), "hello\n").unwrap();
425
426 let facts = parse(
427 "- id: uses_tokio\n file_content_matches:\n paths: Cargo.toml\n pattern: tokio\n",
428 );
429 let idx = idx(&["Cargo.toml", "README.md"]);
430 let v = evaluate_facts(&facts, tmp.path(), &idx).unwrap();
431 assert_eq!(v.get("uses_tokio"), Some(&FactValue::Bool(true)));
432 }
433
434 #[test]
435 fn file_content_matches_false_when_pattern_absent() {
436 use tempfile::tempdir;
437 let tmp = tempdir().unwrap();
438 std::fs::write(tmp.path().join("Cargo.toml"), "[dependencies]\n").unwrap();
439
440 let facts = parse(
441 "- id: uses_tokio\n file_content_matches:\n paths: Cargo.toml\n pattern: tokio\n",
442 );
443 let idx = idx(&["Cargo.toml"]);
444 let v = evaluate_facts(&facts, tmp.path(), &idx).unwrap();
445 assert_eq!(v.get("uses_tokio"), Some(&FactValue::Bool(false)));
446 }
447
448 #[test]
449 fn file_content_matches_skips_non_utf8_files() {
450 use tempfile::tempdir;
451 let tmp = tempdir().unwrap();
452 std::fs::write(tmp.path().join("blob.bin"), [0xFFu8, 0xFE, 0x00, 0x01]).unwrap();
454 std::fs::write(
455 tmp.path().join("text.txt"),
456 "SPDX-License-Identifier: MIT\n",
457 )
458 .unwrap();
459
460 let facts = parse(
461 "- id: has_spdx\n file_content_matches:\n paths: [\"**/*\"]\n pattern: SPDX\n",
462 );
463 let idx = idx(&["blob.bin", "text.txt"]);
464 let v = evaluate_facts(&facts, tmp.path(), &idx).unwrap();
465 assert_eq!(v.get("has_spdx"), Some(&FactValue::Bool(true)));
467 }
468
469 #[test]
470 fn git_branch_reads_refs_heads() {
471 use tempfile::tempdir;
472 let tmp = tempdir().unwrap();
473 std::fs::create_dir(tmp.path().join(".git")).unwrap();
474 std::fs::write(tmp.path().join(".git/HEAD"), "ref: refs/heads/feature-x\n").unwrap();
475
476 let facts = parse("- id: branch\n git_branch: {}\n");
477 let v = evaluate_facts(&facts, tmp.path(), &idx(&[])).unwrap();
478 assert_eq!(
479 v.get("branch"),
480 Some(&FactValue::String("feature-x".to_string()))
481 );
482 }
483
484 #[test]
485 fn git_branch_detached_head_is_empty_string() {
486 use tempfile::tempdir;
487 let tmp = tempdir().unwrap();
488 std::fs::create_dir(tmp.path().join(".git")).unwrap();
489 std::fs::write(
490 tmp.path().join(".git/HEAD"),
491 "deadbeefdeadbeefdeadbeefdeadbeefdeadbeef\n",
492 )
493 .unwrap();
494
495 let facts = parse("- id: branch\n git_branch: {}\n");
496 let v = evaluate_facts(&facts, tmp.path(), &idx(&[])).unwrap();
497 assert_eq!(v.get("branch"), Some(&FactValue::String(String::new())));
498 }
499
500 #[test]
501 fn git_branch_missing_git_dir_is_empty_string() {
502 use tempfile::tempdir;
503 let tmp = tempdir().unwrap();
504 let facts = parse("- id: branch\n git_branch: {}\n");
505 let v = evaluate_facts(&facts, tmp.path(), &idx(&[])).unwrap();
506 assert_eq!(v.get("branch"), Some(&FactValue::String(String::new())));
507 }
508
509 #[cfg(unix)]
510 #[test]
511 fn custom_captures_stdout_trimmed() {
512 use tempfile::tempdir;
513 let tmp = tempdir().unwrap();
514 let facts = parse(
515 "- id: greeting\n custom:\n argv: [\"/bin/sh\", \"-c\", \"printf 'hello world\\n'\"]\n",
516 );
517 let v = evaluate_facts(&facts, tmp.path(), &idx(&[])).unwrap();
518 assert_eq!(
519 v.get("greeting"),
520 Some(&FactValue::String("hello world".to_string()))
521 );
522 }
523
524 #[test]
525 fn custom_unknown_program_is_empty_string() {
526 use tempfile::tempdir;
527 let tmp = tempdir().unwrap();
528 let facts =
529 parse("- id: nope\n custom:\n argv: [\"no-such-program-alint-test-xyzzy\"]\n");
530 let v = evaluate_facts(&facts, tmp.path(), &idx(&[])).unwrap();
531 assert_eq!(v.get("nope"), Some(&FactValue::String(String::new())));
532 }
533
534 #[cfg(unix)]
535 #[test]
536 fn custom_nonzero_exit_is_empty_string() {
537 use tempfile::tempdir;
538 let tmp = tempdir().unwrap();
539 let facts = parse("- id: bad\n custom:\n argv: [\"/bin/false\"]\n");
541 let v = evaluate_facts(&facts, tmp.path(), &idx(&[])).unwrap();
542 assert_eq!(v.get("bad"), Some(&FactValue::String(String::new())));
543 }
544
545 #[test]
546 fn reject_custom_facts_flags_custom_but_passes_others() {
547 let facts = parse(
548 "- id: plain\n any_file_exists: x\n- id: run\n custom:\n argv: [\"echo\"]\n",
549 );
550 let config = crate::config::Config {
551 version: 1,
552 extends: Vec::new(),
553 ignore: Vec::new(),
554 respect_gitignore: true,
555 vars: std::collections::HashMap::new(),
556 facts,
557 rules: Vec::new(),
558 fix_size_limit: None,
559 nested_configs: false,
560 };
561 let err = reject_custom_facts(&config, "./base.yml").unwrap_err();
562 assert!(err.to_string().contains("custom"), "{err}");
563 assert!(err.to_string().contains("./base.yml"), "{err}");
564 }
565
566 #[test]
567 fn reject_custom_facts_ok_when_none_present() {
568 let facts = parse("- id: plain\n any_file_exists: x\n");
569 let config = crate::config::Config {
570 version: 1,
571 extends: Vec::new(),
572 ignore: Vec::new(),
573 respect_gitignore: true,
574 vars: std::collections::HashMap::new(),
575 facts,
576 rules: Vec::new(),
577 fix_size_limit: None,
578 nested_configs: false,
579 };
580 assert!(reject_custom_facts(&config, "./base.yml").is_ok());
581 }
582
583 #[test]
584 fn truthy_coercion() {
585 assert!(FactValue::Bool(true).truthy());
586 assert!(!FactValue::Bool(false).truthy());
587 assert!(FactValue::Int(1).truthy());
588 assert!(!FactValue::Int(0).truthy());
589 assert!(FactValue::String("x".into()).truthy());
590 assert!(!FactValue::String(String::new()).truthy());
591 }
592}