1use std::collections::HashMap;
21use std::path::Path;
22
23use regex::Regex;
24use serde::Deserialize;
25
26use crate::error::{Error, Result};
27use crate::scope::Scope;
28use crate::walker::FileIndex;
29
30#[derive(Debug, Clone, PartialEq)]
33pub enum FactValue {
34 Bool(bool),
35 Int(i64),
36 String(String),
37}
38
39impl FactValue {
40 pub fn truthy(&self) -> bool {
43 match self {
44 Self::Bool(b) => *b,
45 Self::Int(n) => *n != 0,
46 Self::String(s) => !s.is_empty(),
47 }
48 }
49}
50
51#[derive(Debug, Clone, Deserialize)]
54#[serde(untagged)]
55pub enum OneOrMany {
56 One(String),
57 Many(Vec<String>),
58}
59
60impl OneOrMany {
61 pub fn to_vec(&self) -> Vec<String> {
62 match self {
63 Self::One(s) => vec![s.clone()],
64 Self::Many(v) => v.clone(),
65 }
66 }
67}
68
69#[derive(Debug, Clone, Deserialize)]
71pub struct FactSpec {
72 pub id: String,
73 #[serde(flatten)]
74 pub kind: FactKind,
75}
76
77#[derive(Debug, Clone, Deserialize)]
80#[serde(untagged)]
81pub enum FactKind {
82 AnyFileExists {
83 any_file_exists: OneOrMany,
84 },
85 AllFilesExist {
86 all_files_exist: OneOrMany,
87 },
88 CountFiles {
89 count_files: String,
90 },
91 FileContentMatches {
92 file_content_matches: FileContentMatchesFact,
93 },
94 GitBranch {
95 git_branch: GitBranchFact,
96 },
97 Custom {
98 custom: CustomFact,
99 },
100}
101
102impl FactKind {
103 pub fn name(&self) -> &'static str {
106 match self {
107 Self::AnyFileExists { .. } => "any_file_exists",
108 Self::AllFilesExist { .. } => "all_files_exist",
109 Self::CountFiles { .. } => "count_files",
110 Self::FileContentMatches { .. } => "file_content_matches",
111 Self::GitBranch { .. } => "git_branch",
112 Self::Custom { .. } => "custom",
113 }
114 }
115}
116
117#[derive(Debug, Clone, Deserialize)]
129#[serde(deny_unknown_fields)]
130pub struct CustomFact {
131 pub argv: Vec<String>,
134}
135
136#[derive(Debug, Clone, Deserialize)]
140#[serde(deny_unknown_fields)]
141pub struct FileContentMatchesFact {
142 pub paths: OneOrMany,
143 pub pattern: String,
144}
145
146#[derive(Debug, Clone, Deserialize, Default)]
157#[serde(deny_unknown_fields)]
158pub struct GitBranchFact {}
159
160#[derive(Debug, Default, Clone)]
162pub struct FactValues(HashMap<String, FactValue>);
163
164impl FactValues {
165 pub fn new() -> Self {
166 Self::default()
167 }
168
169 pub fn insert(&mut self, id: String, v: FactValue) {
170 self.0.insert(id, v);
171 }
172
173 pub fn get(&self, id: &str) -> Option<&FactValue> {
174 self.0.get(id)
175 }
176
177 pub fn len(&self) -> usize {
178 self.0.len()
179 }
180
181 pub fn is_empty(&self) -> bool {
182 self.0.is_empty()
183 }
184
185 pub fn as_map(&self) -> &HashMap<String, FactValue> {
186 &self.0
187 }
188}
189
190pub fn evaluate_facts(facts: &[FactSpec], root: &Path, index: &FileIndex) -> Result<FactValues> {
193 let mut out = FactValues::new();
194 for spec in facts {
195 let value = evaluate_one(spec, root, index)?;
196 out.insert(spec.id.clone(), value);
197 }
198 Ok(out)
199}
200
201fn evaluate_one(spec: &FactSpec, root: &Path, index: &FileIndex) -> Result<FactValue> {
202 match &spec.kind {
203 FactKind::AnyFileExists { any_file_exists } => {
204 let globs = any_file_exists.to_vec();
205 let scope = Scope::from_patterns(&globs)?;
206 let found = index.files().any(|e| scope.matches(&e.path));
207 Ok(FactValue::Bool(found))
208 }
209 FactKind::AllFilesExist { all_files_exist } => {
210 let globs = all_files_exist.to_vec();
211 for glob in &globs {
212 let scope = Scope::from_patterns(std::slice::from_ref(glob))?;
213 if !index.files().any(|e| scope.matches(&e.path)) {
214 return Ok(FactValue::Bool(false));
215 }
216 }
217 Ok(FactValue::Bool(true))
218 }
219 FactKind::CountFiles { count_files } => {
220 let scope = Scope::from_patterns(std::slice::from_ref(count_files))?;
221 let count = index.files().filter(|e| scope.matches(&e.path)).count();
222 Ok(FactValue::Int(i64::try_from(count).unwrap_or(i64::MAX)))
223 }
224 FactKind::FileContentMatches {
225 file_content_matches: spec,
226 } => {
227 let scope = Scope::from_patterns(&spec.paths.to_vec())?;
228 let regex = Regex::new(&spec.pattern)
229 .map_err(|e| Error::Other(format!("fact pattern /{}/: {e}", spec.pattern)))?;
230 let any = index.files().any(|entry| {
231 if !scope.matches(&entry.path) {
232 return false;
233 }
234 let Ok(bytes) = std::fs::read(root.join(&entry.path)) else {
235 return false;
236 };
237 let Ok(text) = std::str::from_utf8(&bytes) else {
238 return false;
239 };
240 regex.is_match(text)
241 });
242 Ok(FactValue::Bool(any))
243 }
244 FactKind::GitBranch { git_branch: _ } => Ok(FactValue::String(read_git_branch(root))),
245 FactKind::Custom { custom } => Ok(FactValue::String(run_custom(custom, root))),
246 }
247}
248
249fn run_custom(spec: &CustomFact, root: &Path) -> String {
252 let Some((program, args)) = spec.argv.split_first() else {
253 return String::new();
254 };
255 let output = std::process::Command::new(program)
256 .args(args)
257 .current_dir(root)
258 .stdin(std::process::Stdio::null())
259 .stderr(std::process::Stdio::null())
260 .output();
261 let Ok(output) = output else {
262 return String::new();
263 };
264 if !output.status.success() {
265 return String::new();
266 }
267 match std::str::from_utf8(&output.stdout) {
268 Ok(text) => text.trim_end().to_string(),
269 Err(_) => String::new(),
270 }
271}
272
273pub fn reject_custom_facts(config: &crate::config::Config, source: &str) -> Result<()> {
277 reject_custom_facts_in(&config.facts, source)
278}
279
280pub fn reject_custom_facts_in(facts: &[FactSpec], source: &str) -> Result<()> {
285 for f in facts {
286 if matches!(f.kind, FactKind::Custom { .. }) {
287 return Err(Error::Other(format!(
288 "fact {:?}: `custom:` facts are only allowed in the user's top-level \
289 config; declaring one in an extended config ({source}) is refused because \
290 it would let a ruleset spawn arbitrary processes",
291 f.id
292 )));
293 }
294 }
295 Ok(())
296}
297
298fn read_git_branch(root: &Path) -> String {
304 let head = root.join(".git").join("HEAD");
305 let Ok(content) = std::fs::read_to_string(&head) else {
306 return String::new();
307 };
308 content
309 .trim()
310 .strip_prefix("ref: refs/heads/")
311 .unwrap_or("")
312 .to_string()
313}
314
315#[cfg(test)]
316mod tests {
317 use super::*;
318 use crate::walker::FileEntry;
319
320 fn idx(paths: &[&str]) -> FileIndex {
321 FileIndex {
322 entries: paths
323 .iter()
324 .map(|p| FileEntry {
325 path: std::path::Path::new(p).into(),
326 is_dir: false,
327 size: 1,
328 })
329 .collect(),
330 }
331 }
332
333 fn parse(yaml: &str) -> Vec<FactSpec> {
334 serde_yaml_ng::from_str(yaml).unwrap()
335 }
336
337 #[test]
338 fn any_file_exists_true_when_match_found() {
339 let facts = parse("- id: is_rust\n any_file_exists: [Cargo.toml]\n");
340 let v =
341 evaluate_facts(&facts, Path::new("/"), &idx(&["Cargo.toml", "src/lib.rs"])).unwrap();
342 assert_eq!(v.get("is_rust"), Some(&FactValue::Bool(true)));
343 }
344
345 #[test]
346 fn any_file_exists_false_when_no_match() {
347 let facts = parse("- id: is_rust\n any_file_exists: [Cargo.toml]\n");
348 let v = evaluate_facts(&facts, Path::new("/"), &idx(&["src/lib.rs"])).unwrap();
349 assert_eq!(v.get("is_rust"), Some(&FactValue::Bool(false)));
350 }
351
352 #[test]
353 fn any_file_exists_accepts_single_string() {
354 let facts = parse("- id: has_readme\n any_file_exists: README.md\n");
355 let v = evaluate_facts(&facts, Path::new("/"), &idx(&["README.md"])).unwrap();
356 assert_eq!(v.get("has_readme"), Some(&FactValue::Bool(true)));
357 }
358
359 #[test]
360 fn all_files_exist_true_when_all_match() {
361 let facts = parse("- id: is_monorepo\n all_files_exist: [Cargo.toml, README.md]\n");
362 let v = evaluate_facts(
363 &facts,
364 Path::new("/"),
365 &idx(&["Cargo.toml", "README.md", "src/main.rs"]),
366 )
367 .unwrap();
368 assert_eq!(v.get("is_monorepo"), Some(&FactValue::Bool(true)));
369 }
370
371 #[test]
372 fn all_files_exist_false_when_any_missing() {
373 let facts = parse("- id: is_monorepo\n all_files_exist: [Cargo.toml, README.md]\n");
374 let v = evaluate_facts(&facts, Path::new("/"), &idx(&["Cargo.toml"])).unwrap();
375 assert_eq!(v.get("is_monorepo"), Some(&FactValue::Bool(false)));
376 }
377
378 #[test]
379 fn count_files_returns_integer() {
380 let facts = parse("- id: n_rs\n count_files: \"**/*.rs\"\n");
381 let v = evaluate_facts(
382 &facts,
383 Path::new("/"),
384 &idx(&["a.rs", "b.rs", "src/c.rs", "README.md"]),
385 )
386 .unwrap();
387 assert_eq!(v.get("n_rs"), Some(&FactValue::Int(3)));
388 }
389
390 #[test]
391 fn multiple_facts_all_resolved() {
392 let facts = parse(
393 r#"
394- id: is_rust
395 any_file_exists: [Cargo.toml]
396- id: n_rs
397 count_files: "**/*.rs"
398- id: has_readme
399 any_file_exists: README.md
400"#,
401 );
402 let v = evaluate_facts(
403 &facts,
404 Path::new("/"),
405 &idx(&["Cargo.toml", "src/lib.rs", "README.md"]),
406 )
407 .unwrap();
408 assert_eq!(v.len(), 3);
409 assert_eq!(v.get("is_rust"), Some(&FactValue::Bool(true)));
410 assert_eq!(v.get("n_rs"), Some(&FactValue::Int(1)));
411 assert_eq!(v.get("has_readme"), Some(&FactValue::Bool(true)));
412 }
413
414 #[test]
415 fn file_content_matches_true_when_pattern_appears() {
416 use tempfile::tempdir;
417 let tmp = tempdir().unwrap();
418 std::fs::write(
419 tmp.path().join("Cargo.toml"),
420 "[dependencies]\ntokio = \"1\"\n",
421 )
422 .unwrap();
423 std::fs::write(tmp.path().join("README.md"), "hello\n").unwrap();
424
425 let facts = parse(
426 "- id: uses_tokio\n file_content_matches:\n paths: Cargo.toml\n pattern: tokio\n",
427 );
428 let idx = idx(&["Cargo.toml", "README.md"]);
429 let v = evaluate_facts(&facts, tmp.path(), &idx).unwrap();
430 assert_eq!(v.get("uses_tokio"), Some(&FactValue::Bool(true)));
431 }
432
433 #[test]
434 fn file_content_matches_false_when_pattern_absent() {
435 use tempfile::tempdir;
436 let tmp = tempdir().unwrap();
437 std::fs::write(tmp.path().join("Cargo.toml"), "[dependencies]\n").unwrap();
438
439 let facts = parse(
440 "- id: uses_tokio\n file_content_matches:\n paths: Cargo.toml\n pattern: tokio\n",
441 );
442 let idx = idx(&["Cargo.toml"]);
443 let v = evaluate_facts(&facts, tmp.path(), &idx).unwrap();
444 assert_eq!(v.get("uses_tokio"), Some(&FactValue::Bool(false)));
445 }
446
447 #[test]
448 fn file_content_matches_skips_non_utf8_files() {
449 use tempfile::tempdir;
450 let tmp = tempdir().unwrap();
451 std::fs::write(tmp.path().join("blob.bin"), [0xFFu8, 0xFE, 0x00, 0x01]).unwrap();
453 std::fs::write(
454 tmp.path().join("text.txt"),
455 "SPDX-License-Identifier: MIT\n",
456 )
457 .unwrap();
458
459 let facts = parse(
460 "- id: has_spdx\n file_content_matches:\n paths: [\"**/*\"]\n pattern: SPDX\n",
461 );
462 let idx = idx(&["blob.bin", "text.txt"]);
463 let v = evaluate_facts(&facts, tmp.path(), &idx).unwrap();
464 assert_eq!(v.get("has_spdx"), Some(&FactValue::Bool(true)));
466 }
467
468 #[test]
469 fn git_branch_reads_refs_heads() {
470 use tempfile::tempdir;
471 let tmp = tempdir().unwrap();
472 std::fs::create_dir(tmp.path().join(".git")).unwrap();
473 std::fs::write(tmp.path().join(".git/HEAD"), "ref: refs/heads/feature-x\n").unwrap();
474
475 let facts = parse("- id: branch\n git_branch: {}\n");
476 let v = evaluate_facts(&facts, tmp.path(), &idx(&[])).unwrap();
477 assert_eq!(
478 v.get("branch"),
479 Some(&FactValue::String("feature-x".to_string()))
480 );
481 }
482
483 #[test]
484 fn git_branch_detached_head_is_empty_string() {
485 use tempfile::tempdir;
486 let tmp = tempdir().unwrap();
487 std::fs::create_dir(tmp.path().join(".git")).unwrap();
488 std::fs::write(
489 tmp.path().join(".git/HEAD"),
490 "deadbeefdeadbeefdeadbeefdeadbeefdeadbeef\n",
491 )
492 .unwrap();
493
494 let facts = parse("- id: branch\n git_branch: {}\n");
495 let v = evaluate_facts(&facts, tmp.path(), &idx(&[])).unwrap();
496 assert_eq!(v.get("branch"), Some(&FactValue::String(String::new())));
497 }
498
499 #[test]
500 fn git_branch_missing_git_dir_is_empty_string() {
501 use tempfile::tempdir;
502 let tmp = tempdir().unwrap();
503 let facts = parse("- id: branch\n git_branch: {}\n");
504 let v = evaluate_facts(&facts, tmp.path(), &idx(&[])).unwrap();
505 assert_eq!(v.get("branch"), Some(&FactValue::String(String::new())));
506 }
507
508 #[cfg(unix)]
509 #[test]
510 fn custom_captures_stdout_trimmed() {
511 use tempfile::tempdir;
512 let tmp = tempdir().unwrap();
513 let facts = parse(
514 "- id: greeting\n custom:\n argv: [\"/bin/sh\", \"-c\", \"printf 'hello world\\n'\"]\n",
515 );
516 let v = evaluate_facts(&facts, tmp.path(), &idx(&[])).unwrap();
517 assert_eq!(
518 v.get("greeting"),
519 Some(&FactValue::String("hello world".to_string()))
520 );
521 }
522
523 #[test]
524 fn custom_unknown_program_is_empty_string() {
525 use tempfile::tempdir;
526 let tmp = tempdir().unwrap();
527 let facts =
528 parse("- id: nope\n custom:\n argv: [\"no-such-program-alint-test-xyzzy\"]\n");
529 let v = evaluate_facts(&facts, tmp.path(), &idx(&[])).unwrap();
530 assert_eq!(v.get("nope"), Some(&FactValue::String(String::new())));
531 }
532
533 #[cfg(unix)]
534 #[test]
535 fn custom_nonzero_exit_is_empty_string() {
536 use tempfile::tempdir;
537 let tmp = tempdir().unwrap();
538 let facts = parse("- id: bad\n custom:\n argv: [\"/bin/false\"]\n");
540 let v = evaluate_facts(&facts, tmp.path(), &idx(&[])).unwrap();
541 assert_eq!(v.get("bad"), Some(&FactValue::String(String::new())));
542 }
543
544 #[test]
545 fn reject_custom_facts_flags_custom_but_passes_others() {
546 let facts = parse(
547 "- id: plain\n any_file_exists: x\n- id: run\n custom:\n argv: [\"echo\"]\n",
548 );
549 let config = crate::config::Config {
550 version: 1,
551 extends: Vec::new(),
552 ignore: Vec::new(),
553 respect_gitignore: true,
554 vars: std::collections::HashMap::new(),
555 facts,
556 rules: Vec::new(),
557 fix_size_limit: None,
558 nested_configs: false,
559 };
560 let err = reject_custom_facts(&config, "./base.yml").unwrap_err();
561 assert!(err.to_string().contains("custom"), "{err}");
562 assert!(err.to_string().contains("./base.yml"), "{err}");
563 }
564
565 #[test]
566 fn reject_custom_facts_ok_when_none_present() {
567 let facts = parse("- id: plain\n any_file_exists: x\n");
568 let config = crate::config::Config {
569 version: 1,
570 extends: Vec::new(),
571 ignore: Vec::new(),
572 respect_gitignore: true,
573 vars: std::collections::HashMap::new(),
574 facts,
575 rules: Vec::new(),
576 fix_size_limit: None,
577 nested_configs: false,
578 };
579 assert!(reject_custom_facts(&config, "./base.yml").is_ok());
580 }
581
582 #[test]
583 fn truthy_coercion() {
584 assert!(FactValue::Bool(true).truthy());
585 assert!(!FactValue::Bool(false).truthy());
586 assert!(FactValue::Int(1).truthy());
587 assert!(!FactValue::Int(0).truthy());
588 assert!(FactValue::String("x".into()).truthy());
589 assert!(!FactValue::String(String::new()).truthy());
590 }
591}