1use std::collections::HashMap;
21use std::path::Path;
22
23use regex::Regex;
24use serde::Deserialize;
25
26use crate::error::{Error, Result};
27use crate::scope::Scope;
28use crate::walker::FileIndex;
29
30#[derive(Debug, Clone, PartialEq)]
33pub enum FactValue {
34 Bool(bool),
35 Int(i64),
36 String(String),
37}
38
39impl FactValue {
40 pub fn truthy(&self) -> bool {
43 match self {
44 Self::Bool(b) => *b,
45 Self::Int(n) => *n != 0,
46 Self::String(s) => !s.is_empty(),
47 }
48 }
49}
50
51#[derive(Debug, Clone, Deserialize)]
54#[serde(untagged)]
55pub enum OneOrMany {
56 One(String),
57 Many(Vec<String>),
58}
59
60impl OneOrMany {
61 pub fn to_vec(&self) -> Vec<String> {
62 match self {
63 Self::One(s) => vec![s.clone()],
64 Self::Many(v) => v.clone(),
65 }
66 }
67}
68
69#[derive(Debug, Clone, Deserialize)]
71pub struct FactSpec {
72 pub id: String,
73 #[serde(flatten)]
74 pub kind: FactKind,
75}
76
77#[derive(Debug, Clone, Deserialize)]
80#[serde(untagged)]
81pub enum FactKind {
82 AnyFileExists {
83 any_file_exists: OneOrMany,
84 },
85 AllFilesExist {
86 all_files_exist: OneOrMany,
87 },
88 CountFiles {
89 count_files: String,
90 },
91 FileContentMatches {
92 file_content_matches: FileContentMatchesFact,
93 },
94 GitBranch {
95 git_branch: GitBranchFact,
96 },
97 Custom {
98 custom: CustomFact,
99 },
100}
101
102impl FactKind {
103 pub fn name(&self) -> &'static str {
106 match self {
107 Self::AnyFileExists { .. } => "any_file_exists",
108 Self::AllFilesExist { .. } => "all_files_exist",
109 Self::CountFiles { .. } => "count_files",
110 Self::FileContentMatches { .. } => "file_content_matches",
111 Self::GitBranch { .. } => "git_branch",
112 Self::Custom { .. } => "custom",
113 }
114 }
115}
116
117#[derive(Debug, Clone, Deserialize)]
129#[serde(deny_unknown_fields)]
130pub struct CustomFact {
131 pub argv: Vec<String>,
134}
135
136#[derive(Debug, Clone, Deserialize)]
140#[serde(deny_unknown_fields)]
141pub struct FileContentMatchesFact {
142 pub paths: OneOrMany,
143 pub pattern: String,
144}
145
146#[derive(Debug, Clone, Deserialize, Default)]
157#[serde(deny_unknown_fields)]
158pub struct GitBranchFact {}
159
160#[derive(Debug, Default, Clone)]
162pub struct FactValues(HashMap<String, FactValue>);
163
164impl FactValues {
165 pub fn new() -> Self {
166 Self::default()
167 }
168
169 pub fn insert(&mut self, id: String, v: FactValue) {
170 self.0.insert(id, v);
171 }
172
173 pub fn get(&self, id: &str) -> Option<&FactValue> {
174 self.0.get(id)
175 }
176
177 pub fn len(&self) -> usize {
178 self.0.len()
179 }
180
181 pub fn is_empty(&self) -> bool {
182 self.0.is_empty()
183 }
184
185 pub fn as_map(&self) -> &HashMap<String, FactValue> {
186 &self.0
187 }
188}
189
190pub fn evaluate_facts(facts: &[FactSpec], root: &Path, index: &FileIndex) -> Result<FactValues> {
193 let mut out = FactValues::new();
194 for spec in facts {
195 let value = evaluate_one(spec, root, index)?;
196 out.insert(spec.id.clone(), value);
197 }
198 Ok(out)
199}
200
201fn evaluate_one(spec: &FactSpec, root: &Path, index: &FileIndex) -> Result<FactValue> {
202 match &spec.kind {
203 FactKind::AnyFileExists { any_file_exists } => {
204 let globs = any_file_exists.to_vec();
205 let scope = Scope::from_patterns(&globs)?;
206 let found = index.files().any(|e| scope.matches(&e.path, index));
207 Ok(FactValue::Bool(found))
208 }
209 FactKind::AllFilesExist { all_files_exist } => {
210 let globs = all_files_exist.to_vec();
211 for glob in &globs {
212 let scope = Scope::from_patterns(std::slice::from_ref(glob))?;
213 if !index.files().any(|e| scope.matches(&e.path, index)) {
214 return Ok(FactValue::Bool(false));
215 }
216 }
217 Ok(FactValue::Bool(true))
218 }
219 FactKind::CountFiles { count_files } => {
220 let scope = Scope::from_patterns(std::slice::from_ref(count_files))?;
221 let count = index
222 .files()
223 .filter(|e| scope.matches(&e.path, index))
224 .count();
225 Ok(FactValue::Int(i64::try_from(count).unwrap_or(i64::MAX)))
226 }
227 FactKind::FileContentMatches {
228 file_content_matches: spec,
229 } => {
230 let scope = Scope::from_patterns(&spec.paths.to_vec())?;
231 let regex = Regex::new(&spec.pattern)
232 .map_err(|e| Error::Other(format!("fact pattern /{}/: {e}", spec.pattern)))?;
233 let any = index.files().any(|entry| {
234 if !scope.matches(&entry.path, index) {
235 return false;
236 }
237 let Ok(bytes) = std::fs::read(root.join(&entry.path)) else {
238 return false;
239 };
240 let Ok(text) = std::str::from_utf8(&bytes) else {
241 return false;
242 };
243 regex.is_match(text)
244 });
245 Ok(FactValue::Bool(any))
246 }
247 FactKind::GitBranch { git_branch: _ } => Ok(FactValue::String(read_git_branch(root))),
248 FactKind::Custom { custom } => Ok(FactValue::String(run_custom(custom, root))),
249 }
250}
251
252fn run_custom(spec: &CustomFact, root: &Path) -> String {
255 let Some((program, args)) = spec.argv.split_first() else {
256 return String::new();
257 };
258 let output = std::process::Command::new(program)
259 .args(args)
260 .current_dir(root)
261 .stdin(std::process::Stdio::null())
262 .stderr(std::process::Stdio::null())
263 .output();
264 let Ok(output) = output else {
265 return String::new();
266 };
267 if !output.status.success() {
268 return String::new();
269 }
270 match std::str::from_utf8(&output.stdout) {
271 Ok(text) => text.trim_end().to_string(),
272 Err(_) => String::new(),
273 }
274}
275
276pub fn reject_custom_facts(config: &crate::config::Config, source: &str) -> Result<()> {
280 reject_custom_facts_in(&config.facts, source)
281}
282
283pub fn reject_custom_facts_in(facts: &[FactSpec], source: &str) -> Result<()> {
288 for f in facts {
289 if matches!(f.kind, FactKind::Custom { .. }) {
290 return Err(Error::Other(format!(
291 "fact {:?}: `custom:` facts are only allowed in the user's top-level \
292 config; declaring one in an extended config ({source}) is refused because \
293 it would let a ruleset spawn arbitrary processes",
294 f.id
295 )));
296 }
297 }
298 Ok(())
299}
300
301fn read_git_branch(root: &Path) -> String {
307 let head = root.join(".git").join("HEAD");
308 let Ok(content) = std::fs::read_to_string(&head) else {
309 return String::new();
310 };
311 content
312 .trim()
313 .strip_prefix("ref: refs/heads/")
314 .unwrap_or("")
315 .to_string()
316}
317
318#[cfg(test)]
319mod tests {
320 use super::*;
321 use crate::walker::FileEntry;
322
323 fn idx(paths: &[&str]) -> FileIndex {
324 FileIndex::from_entries(
325 paths
326 .iter()
327 .map(|p| FileEntry {
328 path: std::path::Path::new(p).into(),
329 is_dir: false,
330 size: 1,
331 })
332 .collect(),
333 )
334 }
335
336 fn parse(yaml: &str) -> Vec<FactSpec> {
337 serde_yaml_ng::from_str(yaml).unwrap()
338 }
339
340 #[test]
341 fn any_file_exists_true_when_match_found() {
342 let facts = parse("- id: is_rust\n any_file_exists: [Cargo.toml]\n");
343 let v =
344 evaluate_facts(&facts, Path::new("/"), &idx(&["Cargo.toml", "src/lib.rs"])).unwrap();
345 assert_eq!(v.get("is_rust"), Some(&FactValue::Bool(true)));
346 }
347
348 #[test]
349 fn any_file_exists_false_when_no_match() {
350 let facts = parse("- id: is_rust\n any_file_exists: [Cargo.toml]\n");
351 let v = evaluate_facts(&facts, Path::new("/"), &idx(&["src/lib.rs"])).unwrap();
352 assert_eq!(v.get("is_rust"), Some(&FactValue::Bool(false)));
353 }
354
355 #[test]
356 fn any_file_exists_accepts_single_string() {
357 let facts = parse("- id: has_readme\n any_file_exists: README.md\n");
358 let v = evaluate_facts(&facts, Path::new("/"), &idx(&["README.md"])).unwrap();
359 assert_eq!(v.get("has_readme"), Some(&FactValue::Bool(true)));
360 }
361
362 #[test]
363 fn all_files_exist_true_when_all_match() {
364 let facts = parse("- id: is_monorepo\n all_files_exist: [Cargo.toml, README.md]\n");
365 let v = evaluate_facts(
366 &facts,
367 Path::new("/"),
368 &idx(&["Cargo.toml", "README.md", "src/main.rs"]),
369 )
370 .unwrap();
371 assert_eq!(v.get("is_monorepo"), Some(&FactValue::Bool(true)));
372 }
373
374 #[test]
375 fn all_files_exist_false_when_any_missing() {
376 let facts = parse("- id: is_monorepo\n all_files_exist: [Cargo.toml, README.md]\n");
377 let v = evaluate_facts(&facts, Path::new("/"), &idx(&["Cargo.toml"])).unwrap();
378 assert_eq!(v.get("is_monorepo"), Some(&FactValue::Bool(false)));
379 }
380
381 #[test]
382 fn count_files_returns_integer() {
383 let facts = parse("- id: n_rs\n count_files: \"**/*.rs\"\n");
384 let v = evaluate_facts(
385 &facts,
386 Path::new("/"),
387 &idx(&["a.rs", "b.rs", "src/c.rs", "README.md"]),
388 )
389 .unwrap();
390 assert_eq!(v.get("n_rs"), Some(&FactValue::Int(3)));
391 }
392
393 #[test]
394 fn multiple_facts_all_resolved() {
395 let facts = parse(
396 r#"
397- id: is_rust
398 any_file_exists: [Cargo.toml]
399- id: n_rs
400 count_files: "**/*.rs"
401- id: has_readme
402 any_file_exists: README.md
403"#,
404 );
405 let v = evaluate_facts(
406 &facts,
407 Path::new("/"),
408 &idx(&["Cargo.toml", "src/lib.rs", "README.md"]),
409 )
410 .unwrap();
411 assert_eq!(v.len(), 3);
412 assert_eq!(v.get("is_rust"), Some(&FactValue::Bool(true)));
413 assert_eq!(v.get("n_rs"), Some(&FactValue::Int(1)));
414 assert_eq!(v.get("has_readme"), Some(&FactValue::Bool(true)));
415 }
416
417 #[test]
418 fn file_content_matches_true_when_pattern_appears() {
419 use tempfile::tempdir;
420 let tmp = tempdir().unwrap();
421 std::fs::write(
422 tmp.path().join("Cargo.toml"),
423 "[dependencies]\ntokio = \"1\"\n",
424 )
425 .unwrap();
426 std::fs::write(tmp.path().join("README.md"), "hello\n").unwrap();
427
428 let facts = parse(
429 "- id: uses_tokio\n file_content_matches:\n paths: Cargo.toml\n pattern: tokio\n",
430 );
431 let idx = idx(&["Cargo.toml", "README.md"]);
432 let v = evaluate_facts(&facts, tmp.path(), &idx).unwrap();
433 assert_eq!(v.get("uses_tokio"), Some(&FactValue::Bool(true)));
434 }
435
436 #[test]
437 fn file_content_matches_false_when_pattern_absent() {
438 use tempfile::tempdir;
439 let tmp = tempdir().unwrap();
440 std::fs::write(tmp.path().join("Cargo.toml"), "[dependencies]\n").unwrap();
441
442 let facts = parse(
443 "- id: uses_tokio\n file_content_matches:\n paths: Cargo.toml\n pattern: tokio\n",
444 );
445 let idx = idx(&["Cargo.toml"]);
446 let v = evaluate_facts(&facts, tmp.path(), &idx).unwrap();
447 assert_eq!(v.get("uses_tokio"), Some(&FactValue::Bool(false)));
448 }
449
450 #[test]
451 fn file_content_matches_skips_non_utf8_files() {
452 use tempfile::tempdir;
453 let tmp = tempdir().unwrap();
454 std::fs::write(tmp.path().join("blob.bin"), [0xFFu8, 0xFE, 0x00, 0x01]).unwrap();
456 std::fs::write(
457 tmp.path().join("text.txt"),
458 "SPDX-License-Identifier: MIT\n",
459 )
460 .unwrap();
461
462 let facts = parse(
463 "- id: has_spdx\n file_content_matches:\n paths: [\"**/*\"]\n pattern: SPDX\n",
464 );
465 let idx = idx(&["blob.bin", "text.txt"]);
466 let v = evaluate_facts(&facts, tmp.path(), &idx).unwrap();
467 assert_eq!(v.get("has_spdx"), Some(&FactValue::Bool(true)));
469 }
470
471 #[test]
472 fn git_branch_reads_refs_heads() {
473 use tempfile::tempdir;
474 let tmp = tempdir().unwrap();
475 std::fs::create_dir(tmp.path().join(".git")).unwrap();
476 std::fs::write(tmp.path().join(".git/HEAD"), "ref: refs/heads/feature-x\n").unwrap();
477
478 let facts = parse("- id: branch\n git_branch: {}\n");
479 let v = evaluate_facts(&facts, tmp.path(), &idx(&[])).unwrap();
480 assert_eq!(
481 v.get("branch"),
482 Some(&FactValue::String("feature-x".to_string()))
483 );
484 }
485
486 #[test]
487 fn git_branch_detached_head_is_empty_string() {
488 use tempfile::tempdir;
489 let tmp = tempdir().unwrap();
490 std::fs::create_dir(tmp.path().join(".git")).unwrap();
491 std::fs::write(
492 tmp.path().join(".git/HEAD"),
493 "deadbeefdeadbeefdeadbeefdeadbeefdeadbeef\n",
494 )
495 .unwrap();
496
497 let facts = parse("- id: branch\n git_branch: {}\n");
498 let v = evaluate_facts(&facts, tmp.path(), &idx(&[])).unwrap();
499 assert_eq!(v.get("branch"), Some(&FactValue::String(String::new())));
500 }
501
502 #[test]
503 fn git_branch_missing_git_dir_is_empty_string() {
504 use tempfile::tempdir;
505 let tmp = tempdir().unwrap();
506 let facts = parse("- id: branch\n git_branch: {}\n");
507 let v = evaluate_facts(&facts, tmp.path(), &idx(&[])).unwrap();
508 assert_eq!(v.get("branch"), Some(&FactValue::String(String::new())));
509 }
510
511 #[cfg(unix)]
512 #[test]
513 fn custom_captures_stdout_trimmed() {
514 use tempfile::tempdir;
515 let tmp = tempdir().unwrap();
516 let facts = parse(
517 "- id: greeting\n custom:\n argv: [\"/bin/sh\", \"-c\", \"printf 'hello world\\n'\"]\n",
518 );
519 let v = evaluate_facts(&facts, tmp.path(), &idx(&[])).unwrap();
520 assert_eq!(
521 v.get("greeting"),
522 Some(&FactValue::String("hello world".to_string()))
523 );
524 }
525
526 #[test]
527 fn custom_unknown_program_is_empty_string() {
528 use tempfile::tempdir;
529 let tmp = tempdir().unwrap();
530 let facts =
531 parse("- id: nope\n custom:\n argv: [\"no-such-program-alint-test-xyzzy\"]\n");
532 let v = evaluate_facts(&facts, tmp.path(), &idx(&[])).unwrap();
533 assert_eq!(v.get("nope"), Some(&FactValue::String(String::new())));
534 }
535
536 #[cfg(unix)]
537 #[test]
538 fn custom_nonzero_exit_is_empty_string() {
539 use tempfile::tempdir;
540 let tmp = tempdir().unwrap();
541 let facts = parse("- id: bad\n custom:\n argv: [\"/bin/false\"]\n");
543 let v = evaluate_facts(&facts, tmp.path(), &idx(&[])).unwrap();
544 assert_eq!(v.get("bad"), Some(&FactValue::String(String::new())));
545 }
546
547 #[test]
548 fn reject_custom_facts_flags_custom_but_passes_others() {
549 let facts = parse(
550 "- id: plain\n any_file_exists: x\n- id: run\n custom:\n argv: [\"echo\"]\n",
551 );
552 let config = crate::config::Config {
553 version: 1,
554 extends: Vec::new(),
555 ignore: Vec::new(),
556 respect_gitignore: true,
557 vars: std::collections::HashMap::new(),
558 facts,
559 rules: Vec::new(),
560 fix_size_limit: None,
561 nested_configs: false,
562 };
563 let err = reject_custom_facts(&config, "./base.yml").unwrap_err();
564 assert!(err.to_string().contains("custom"), "{err}");
565 assert!(err.to_string().contains("./base.yml"), "{err}");
566 }
567
568 #[test]
569 fn reject_custom_facts_ok_when_none_present() {
570 let facts = parse("- id: plain\n any_file_exists: x\n");
571 let config = crate::config::Config {
572 version: 1,
573 extends: Vec::new(),
574 ignore: Vec::new(),
575 respect_gitignore: true,
576 vars: std::collections::HashMap::new(),
577 facts,
578 rules: Vec::new(),
579 fix_size_limit: None,
580 nested_configs: false,
581 };
582 assert!(reject_custom_facts(&config, "./base.yml").is_ok());
583 }
584
585 #[test]
586 fn truthy_coercion() {
587 assert!(FactValue::Bool(true).truthy());
588 assert!(!FactValue::Bool(false).truthy());
589 assert!(FactValue::Int(1).truthy());
590 assert!(!FactValue::Int(0).truthy());
591 assert!(FactValue::String("x".into()).truthy());
592 assert!(!FactValue::String(String::new()).truthy());
593 }
594}