1use std::collections::HashMap;
21use std::path::Path;
22
23use regex::Regex;
24use serde::Deserialize;
25
26use crate::error::{Error, Result};
27use crate::scope::Scope;
28use crate::walker::FileIndex;
29
30#[derive(Debug, Clone, PartialEq)]
33pub enum FactValue {
34 Bool(bool),
35 Int(i64),
36 String(String),
37}
38
39impl FactValue {
40 pub fn truthy(&self) -> bool {
43 match self {
44 Self::Bool(b) => *b,
45 Self::Int(n) => *n != 0,
46 Self::String(s) => !s.is_empty(),
47 }
48 }
49}
50
51#[derive(Debug, Clone, Deserialize)]
54#[serde(untagged)]
55pub enum OneOrMany {
56 One(String),
57 Many(Vec<String>),
58}
59
60impl OneOrMany {
61 pub fn to_vec(&self) -> Vec<String> {
62 match self {
63 Self::One(s) => vec![s.clone()],
64 Self::Many(v) => v.clone(),
65 }
66 }
67}
68
69#[derive(Debug, Clone, Deserialize)]
71pub struct FactSpec {
72 pub id: String,
73 #[serde(flatten)]
74 pub kind: FactKind,
75}
76
77#[derive(Debug, Clone, Deserialize)]
80#[serde(untagged)]
81pub enum FactKind {
82 AnyFileExists {
83 any_file_exists: OneOrMany,
84 },
85 AllFilesExist {
86 all_files_exist: OneOrMany,
87 },
88 CountFiles {
89 count_files: String,
90 },
91 FileContentMatches {
92 file_content_matches: FileContentMatchesFact,
93 },
94 GitBranch {
95 git_branch: GitBranchFact,
96 },
97 Custom {
98 custom: CustomFact,
99 },
100}
101
102#[derive(Debug, Clone, Deserialize)]
114#[serde(deny_unknown_fields)]
115pub struct CustomFact {
116 pub argv: Vec<String>,
119}
120
121#[derive(Debug, Clone, Deserialize)]
125#[serde(deny_unknown_fields)]
126pub struct FileContentMatchesFact {
127 pub paths: OneOrMany,
128 pub pattern: String,
129}
130
131#[derive(Debug, Clone, Deserialize, Default)]
142#[serde(deny_unknown_fields)]
143pub struct GitBranchFact {}
144
145#[derive(Debug, Default, Clone)]
147pub struct FactValues(HashMap<String, FactValue>);
148
149impl FactValues {
150 pub fn new() -> Self {
151 Self::default()
152 }
153
154 pub fn insert(&mut self, id: String, v: FactValue) {
155 self.0.insert(id, v);
156 }
157
158 pub fn get(&self, id: &str) -> Option<&FactValue> {
159 self.0.get(id)
160 }
161
162 pub fn len(&self) -> usize {
163 self.0.len()
164 }
165
166 pub fn is_empty(&self) -> bool {
167 self.0.is_empty()
168 }
169
170 pub fn as_map(&self) -> &HashMap<String, FactValue> {
171 &self.0
172 }
173}
174
175pub fn evaluate_facts(facts: &[FactSpec], root: &Path, index: &FileIndex) -> Result<FactValues> {
178 let mut out = FactValues::new();
179 for spec in facts {
180 let value = evaluate_one(spec, root, index)?;
181 out.insert(spec.id.clone(), value);
182 }
183 Ok(out)
184}
185
186fn evaluate_one(spec: &FactSpec, root: &Path, index: &FileIndex) -> Result<FactValue> {
187 match &spec.kind {
188 FactKind::AnyFileExists { any_file_exists } => {
189 let globs = any_file_exists.to_vec();
190 let scope = Scope::from_patterns(&globs)?;
191 let found = index.files().any(|e| scope.matches(&e.path));
192 Ok(FactValue::Bool(found))
193 }
194 FactKind::AllFilesExist { all_files_exist } => {
195 let globs = all_files_exist.to_vec();
196 for glob in &globs {
197 let scope = Scope::from_patterns(std::slice::from_ref(glob))?;
198 if !index.files().any(|e| scope.matches(&e.path)) {
199 return Ok(FactValue::Bool(false));
200 }
201 }
202 Ok(FactValue::Bool(true))
203 }
204 FactKind::CountFiles { count_files } => {
205 let scope = Scope::from_patterns(std::slice::from_ref(count_files))?;
206 let count = index.files().filter(|e| scope.matches(&e.path)).count();
207 Ok(FactValue::Int(i64::try_from(count).unwrap_or(i64::MAX)))
208 }
209 FactKind::FileContentMatches {
210 file_content_matches: spec,
211 } => {
212 let scope = Scope::from_patterns(&spec.paths.to_vec())?;
213 let regex = Regex::new(&spec.pattern)
214 .map_err(|e| Error::Other(format!("fact pattern /{}/: {e}", spec.pattern)))?;
215 let any = index.files().any(|entry| {
216 if !scope.matches(&entry.path) {
217 return false;
218 }
219 let Ok(bytes) = std::fs::read(root.join(&entry.path)) else {
220 return false;
221 };
222 let Ok(text) = std::str::from_utf8(&bytes) else {
223 return false;
224 };
225 regex.is_match(text)
226 });
227 Ok(FactValue::Bool(any))
228 }
229 FactKind::GitBranch { git_branch: _ } => Ok(FactValue::String(read_git_branch(root))),
230 FactKind::Custom { custom } => Ok(FactValue::String(run_custom(custom, root))),
231 }
232}
233
234fn run_custom(spec: &CustomFact, root: &Path) -> String {
237 let Some((program, args)) = spec.argv.split_first() else {
238 return String::new();
239 };
240 let output = std::process::Command::new(program)
241 .args(args)
242 .current_dir(root)
243 .stdin(std::process::Stdio::null())
244 .stderr(std::process::Stdio::null())
245 .output();
246 let Ok(output) = output else {
247 return String::new();
248 };
249 if !output.status.success() {
250 return String::new();
251 }
252 match std::str::from_utf8(&output.stdout) {
253 Ok(text) => text.trim_end().to_string(),
254 Err(_) => String::new(),
255 }
256}
257
258pub fn reject_custom_facts(config: &crate::config::Config, source: &str) -> Result<()> {
262 reject_custom_facts_in(&config.facts, source)
263}
264
265pub fn reject_custom_facts_in(facts: &[FactSpec], source: &str) -> Result<()> {
270 for f in facts {
271 if matches!(f.kind, FactKind::Custom { .. }) {
272 return Err(Error::Other(format!(
273 "fact {:?}: `custom:` facts are only allowed in the user's top-level \
274 config; declaring one in an extended config ({source}) is refused because \
275 it would let a ruleset spawn arbitrary processes",
276 f.id
277 )));
278 }
279 }
280 Ok(())
281}
282
283fn read_git_branch(root: &Path) -> String {
289 let head = root.join(".git").join("HEAD");
290 let Ok(content) = std::fs::read_to_string(&head) else {
291 return String::new();
292 };
293 content
294 .trim()
295 .strip_prefix("ref: refs/heads/")
296 .unwrap_or("")
297 .to_string()
298}
299
300#[cfg(test)]
301mod tests {
302 use super::*;
303 use crate::walker::FileEntry;
304 use std::path::PathBuf;
305
306 fn idx(paths: &[&str]) -> FileIndex {
307 FileIndex {
308 entries: paths
309 .iter()
310 .map(|p| FileEntry {
311 path: PathBuf::from(p),
312 is_dir: false,
313 size: 1,
314 })
315 .collect(),
316 }
317 }
318
319 fn parse(yaml: &str) -> Vec<FactSpec> {
320 serde_yaml_ng::from_str(yaml).unwrap()
321 }
322
323 #[test]
324 fn any_file_exists_true_when_match_found() {
325 let facts = parse("- id: is_rust\n any_file_exists: [Cargo.toml]\n");
326 let v =
327 evaluate_facts(&facts, Path::new("/"), &idx(&["Cargo.toml", "src/lib.rs"])).unwrap();
328 assert_eq!(v.get("is_rust"), Some(&FactValue::Bool(true)));
329 }
330
331 #[test]
332 fn any_file_exists_false_when_no_match() {
333 let facts = parse("- id: is_rust\n any_file_exists: [Cargo.toml]\n");
334 let v = evaluate_facts(&facts, Path::new("/"), &idx(&["src/lib.rs"])).unwrap();
335 assert_eq!(v.get("is_rust"), Some(&FactValue::Bool(false)));
336 }
337
338 #[test]
339 fn any_file_exists_accepts_single_string() {
340 let facts = parse("- id: has_readme\n any_file_exists: README.md\n");
341 let v = evaluate_facts(&facts, Path::new("/"), &idx(&["README.md"])).unwrap();
342 assert_eq!(v.get("has_readme"), Some(&FactValue::Bool(true)));
343 }
344
345 #[test]
346 fn all_files_exist_true_when_all_match() {
347 let facts = parse("- id: is_monorepo\n all_files_exist: [Cargo.toml, README.md]\n");
348 let v = evaluate_facts(
349 &facts,
350 Path::new("/"),
351 &idx(&["Cargo.toml", "README.md", "src/main.rs"]),
352 )
353 .unwrap();
354 assert_eq!(v.get("is_monorepo"), Some(&FactValue::Bool(true)));
355 }
356
357 #[test]
358 fn all_files_exist_false_when_any_missing() {
359 let facts = parse("- id: is_monorepo\n all_files_exist: [Cargo.toml, README.md]\n");
360 let v = evaluate_facts(&facts, Path::new("/"), &idx(&["Cargo.toml"])).unwrap();
361 assert_eq!(v.get("is_monorepo"), Some(&FactValue::Bool(false)));
362 }
363
364 #[test]
365 fn count_files_returns_integer() {
366 let facts = parse("- id: n_rs\n count_files: \"**/*.rs\"\n");
367 let v = evaluate_facts(
368 &facts,
369 Path::new("/"),
370 &idx(&["a.rs", "b.rs", "src/c.rs", "README.md"]),
371 )
372 .unwrap();
373 assert_eq!(v.get("n_rs"), Some(&FactValue::Int(3)));
374 }
375
376 #[test]
377 fn multiple_facts_all_resolved() {
378 let facts = parse(
379 r#"
380- id: is_rust
381 any_file_exists: [Cargo.toml]
382- id: n_rs
383 count_files: "**/*.rs"
384- id: has_readme
385 any_file_exists: README.md
386"#,
387 );
388 let v = evaluate_facts(
389 &facts,
390 Path::new("/"),
391 &idx(&["Cargo.toml", "src/lib.rs", "README.md"]),
392 )
393 .unwrap();
394 assert_eq!(v.len(), 3);
395 assert_eq!(v.get("is_rust"), Some(&FactValue::Bool(true)));
396 assert_eq!(v.get("n_rs"), Some(&FactValue::Int(1)));
397 assert_eq!(v.get("has_readme"), Some(&FactValue::Bool(true)));
398 }
399
400 #[test]
401 fn file_content_matches_true_when_pattern_appears() {
402 use tempfile::tempdir;
403 let tmp = tempdir().unwrap();
404 std::fs::write(
405 tmp.path().join("Cargo.toml"),
406 "[dependencies]\ntokio = \"1\"\n",
407 )
408 .unwrap();
409 std::fs::write(tmp.path().join("README.md"), "hello\n").unwrap();
410
411 let facts = parse(
412 "- id: uses_tokio\n file_content_matches:\n paths: Cargo.toml\n pattern: tokio\n",
413 );
414 let idx = idx(&["Cargo.toml", "README.md"]);
415 let v = evaluate_facts(&facts, tmp.path(), &idx).unwrap();
416 assert_eq!(v.get("uses_tokio"), Some(&FactValue::Bool(true)));
417 }
418
419 #[test]
420 fn file_content_matches_false_when_pattern_absent() {
421 use tempfile::tempdir;
422 let tmp = tempdir().unwrap();
423 std::fs::write(tmp.path().join("Cargo.toml"), "[dependencies]\n").unwrap();
424
425 let facts = parse(
426 "- id: uses_tokio\n file_content_matches:\n paths: Cargo.toml\n pattern: tokio\n",
427 );
428 let idx = idx(&["Cargo.toml"]);
429 let v = evaluate_facts(&facts, tmp.path(), &idx).unwrap();
430 assert_eq!(v.get("uses_tokio"), Some(&FactValue::Bool(false)));
431 }
432
433 #[test]
434 fn file_content_matches_skips_non_utf8_files() {
435 use tempfile::tempdir;
436 let tmp = tempdir().unwrap();
437 std::fs::write(tmp.path().join("blob.bin"), [0xFFu8, 0xFE, 0x00, 0x01]).unwrap();
439 std::fs::write(
440 tmp.path().join("text.txt"),
441 "SPDX-License-Identifier: MIT\n",
442 )
443 .unwrap();
444
445 let facts = parse(
446 "- id: has_spdx\n file_content_matches:\n paths: [\"**/*\"]\n pattern: SPDX\n",
447 );
448 let idx = idx(&["blob.bin", "text.txt"]);
449 let v = evaluate_facts(&facts, tmp.path(), &idx).unwrap();
450 assert_eq!(v.get("has_spdx"), Some(&FactValue::Bool(true)));
452 }
453
454 #[test]
455 fn git_branch_reads_refs_heads() {
456 use tempfile::tempdir;
457 let tmp = tempdir().unwrap();
458 std::fs::create_dir(tmp.path().join(".git")).unwrap();
459 std::fs::write(tmp.path().join(".git/HEAD"), "ref: refs/heads/feature-x\n").unwrap();
460
461 let facts = parse("- id: branch\n git_branch: {}\n");
462 let v = evaluate_facts(&facts, tmp.path(), &idx(&[])).unwrap();
463 assert_eq!(
464 v.get("branch"),
465 Some(&FactValue::String("feature-x".to_string()))
466 );
467 }
468
469 #[test]
470 fn git_branch_detached_head_is_empty_string() {
471 use tempfile::tempdir;
472 let tmp = tempdir().unwrap();
473 std::fs::create_dir(tmp.path().join(".git")).unwrap();
474 std::fs::write(
475 tmp.path().join(".git/HEAD"),
476 "deadbeefdeadbeefdeadbeefdeadbeefdeadbeef\n",
477 )
478 .unwrap();
479
480 let facts = parse("- id: branch\n git_branch: {}\n");
481 let v = evaluate_facts(&facts, tmp.path(), &idx(&[])).unwrap();
482 assert_eq!(v.get("branch"), Some(&FactValue::String(String::new())));
483 }
484
485 #[test]
486 fn git_branch_missing_git_dir_is_empty_string() {
487 use tempfile::tempdir;
488 let tmp = tempdir().unwrap();
489 let facts = parse("- id: branch\n git_branch: {}\n");
490 let v = evaluate_facts(&facts, tmp.path(), &idx(&[])).unwrap();
491 assert_eq!(v.get("branch"), Some(&FactValue::String(String::new())));
492 }
493
494 #[cfg(unix)]
495 #[test]
496 fn custom_captures_stdout_trimmed() {
497 use tempfile::tempdir;
498 let tmp = tempdir().unwrap();
499 let facts = parse(
500 "- id: greeting\n custom:\n argv: [\"/bin/sh\", \"-c\", \"printf 'hello world\\n'\"]\n",
501 );
502 let v = evaluate_facts(&facts, tmp.path(), &idx(&[])).unwrap();
503 assert_eq!(
504 v.get("greeting"),
505 Some(&FactValue::String("hello world".to_string()))
506 );
507 }
508
509 #[test]
510 fn custom_unknown_program_is_empty_string() {
511 use tempfile::tempdir;
512 let tmp = tempdir().unwrap();
513 let facts =
514 parse("- id: nope\n custom:\n argv: [\"no-such-program-alint-test-xyzzy\"]\n");
515 let v = evaluate_facts(&facts, tmp.path(), &idx(&[])).unwrap();
516 assert_eq!(v.get("nope"), Some(&FactValue::String(String::new())));
517 }
518
519 #[cfg(unix)]
520 #[test]
521 fn custom_nonzero_exit_is_empty_string() {
522 use tempfile::tempdir;
523 let tmp = tempdir().unwrap();
524 let facts = parse("- id: bad\n custom:\n argv: [\"/bin/false\"]\n");
526 let v = evaluate_facts(&facts, tmp.path(), &idx(&[])).unwrap();
527 assert_eq!(v.get("bad"), Some(&FactValue::String(String::new())));
528 }
529
530 #[test]
531 fn reject_custom_facts_flags_custom_but_passes_others() {
532 let facts = parse(
533 "- id: plain\n any_file_exists: x\n- id: run\n custom:\n argv: [\"echo\"]\n",
534 );
535 let config = crate::config::Config {
536 version: 1,
537 extends: Vec::new(),
538 ignore: Vec::new(),
539 respect_gitignore: true,
540 vars: std::collections::HashMap::new(),
541 facts,
542 rules: Vec::new(),
543 fix_size_limit: None,
544 nested_configs: false,
545 };
546 let err = reject_custom_facts(&config, "./base.yml").unwrap_err();
547 assert!(err.to_string().contains("custom"), "{err}");
548 assert!(err.to_string().contains("./base.yml"), "{err}");
549 }
550
551 #[test]
552 fn reject_custom_facts_ok_when_none_present() {
553 let facts = parse("- id: plain\n any_file_exists: x\n");
554 let config = crate::config::Config {
555 version: 1,
556 extends: Vec::new(),
557 ignore: Vec::new(),
558 respect_gitignore: true,
559 vars: std::collections::HashMap::new(),
560 facts,
561 rules: Vec::new(),
562 fix_size_limit: None,
563 nested_configs: false,
564 };
565 assert!(reject_custom_facts(&config, "./base.yml").is_ok());
566 }
567
568 #[test]
569 fn truthy_coercion() {
570 assert!(FactValue::Bool(true).truthy());
571 assert!(!FactValue::Bool(false).truthy());
572 assert!(FactValue::Int(1).truthy());
573 assert!(!FactValue::Int(0).truthy());
574 assert!(FactValue::String("x".into()).truthy());
575 assert!(!FactValue::String(String::new()).truthy());
576 }
577}