1use std::collections::BTreeMap;
11use std::path::{Path, PathBuf};
12
13use harn_lexer::{Lexer, Span};
14use harn_parser::{peel_attributes, Attribute, AttributeArg, Node, Parser};
15use sha2::{Digest, Sha256};
16
17use super::executor::PredicateKind;
18use crate::flow::slice::PredicateHash;
19
20pub const INVARIANTS_FILE: &str = "invariants.harn";
22
23#[derive(Clone, Debug)]
26pub struct DiscoveredInvariantFile {
27 pub path: PathBuf,
29 pub relative_dir: String,
31 pub source: String,
33 pub predicates: Vec<DiscoveredPredicate>,
35 pub diagnostics: Vec<DiscoveryDiagnostic>,
37}
38
39#[derive(Clone, Debug)]
41pub struct DiscoveredPredicate {
42 pub name: String,
45 pub kind: PredicateKind,
47 pub fallback: Option<String>,
50 pub archivist: Option<ArchivistMetadata>,
52 pub retroactive: bool,
55 pub source_hash: PredicateHash,
59 pub span: Span,
61}
62
63#[derive(Clone, Debug, Default, PartialEq)]
65pub struct ArchivistMetadata {
66 pub evidence: Vec<String>,
67 pub confidence: Option<f64>,
68 pub source_date: Option<String>,
69 pub coverage_examples: Vec<String>,
70}
71
72#[derive(Clone, Debug)]
76pub struct DiscoveryDiagnostic {
77 pub severity: DiagnosticSeverity,
78 pub message: String,
79 pub span: Option<Span>,
80}
81
82#[derive(Clone, Copy, Debug, PartialEq, Eq)]
83pub enum DiagnosticSeverity {
84 Warning,
85 Error,
86}
87
88pub fn discover_invariants(root: &Path, target_dir: &Path) -> Vec<DiscoveredInvariantFile> {
98 let mut files = Vec::new();
99 let candidates = candidate_directories(root, target_dir);
100
101 for dir in candidates {
102 let path = dir.join(INVARIANTS_FILE);
103 if !path.is_file() {
104 continue;
105 }
106 let source = match std::fs::read_to_string(&path) {
107 Ok(s) => s,
108 Err(_) => continue,
109 };
110 let relative_dir = relative_dir_label(root, &dir);
111 let parsed = parse_invariants_source(&source);
112 files.push(DiscoveredInvariantFile {
113 path,
114 relative_dir,
115 source,
116 predicates: parsed.predicates,
117 diagnostics: parsed.diagnostics,
118 });
119 }
120
121 validate_semantic_fallbacks(&mut files);
122 files
123}
124
125pub fn parse_invariants_source(source: &str) -> ParsedInvariantFile {
128 let mut diagnostics = Vec::new();
129 let tokens = match Lexer::new(source).tokenize() {
130 Ok(t) => t,
131 Err(error) => {
132 diagnostics.push(DiscoveryDiagnostic {
133 severity: DiagnosticSeverity::Error,
134 message: format!("lex error: {error:?}"),
135 span: None,
136 });
137 return ParsedInvariantFile {
138 predicates: Vec::new(),
139 diagnostics,
140 };
141 }
142 };
143 let program = match Parser::new(tokens).parse() {
144 Ok(p) => p,
145 Err(error) => {
146 diagnostics.push(DiscoveryDiagnostic {
147 severity: DiagnosticSeverity::Error,
148 message: format!("parse error: {error:?}"),
149 span: None,
150 });
151 return ParsedInvariantFile {
152 predicates: Vec::new(),
153 diagnostics,
154 };
155 }
156 };
157
158 let mut predicates = Vec::new();
159 for node in &program {
160 let (attrs, inner) = peel_attributes(node);
161 let Node::FnDecl { name, .. } = &inner.node else {
162 continue;
163 };
164 let Some(predicate) =
165 predicate_from_attributes(source, name, attrs, inner.span, &mut diagnostics)
166 else {
167 continue;
168 };
169 predicates.push(predicate);
170 }
171
172 ParsedInvariantFile {
173 predicates,
174 diagnostics,
175 }
176}
177
178#[derive(Clone, Debug, Default)]
180pub struct ParsedInvariantFile {
181 pub predicates: Vec<DiscoveredPredicate>,
182 pub diagnostics: Vec<DiscoveryDiagnostic>,
183}
184
185fn predicate_from_attributes(
186 source: &str,
187 name: &str,
188 attrs: &[Attribute],
189 span: Span,
190 diagnostics: &mut Vec<DiscoveryDiagnostic>,
191) -> Option<DiscoveredPredicate> {
192 let invariant = attrs.iter().find(|a| a.name == "invariant")?;
195 if !invariant.args.is_empty() {
196 return None;
197 }
198
199 let deterministic = attrs.iter().any(|a| a.name == "deterministic");
200 let semantic = attrs.iter().any(|a| a.name == "semantic");
201 let kind = match (deterministic, semantic) {
202 (true, true) => {
203 diagnostics.push(DiscoveryDiagnostic {
204 severity: DiagnosticSeverity::Error,
205 message: format!(
206 "predicate `{name}` declares both `@deterministic` and \
207 `@semantic`; pick exactly one"
208 ),
209 span: Some(span),
210 });
211 PredicateKind::Deterministic
212 }
213 (false, false) => {
214 PredicateKind::Deterministic
217 }
218 (true, false) => PredicateKind::Deterministic,
219 (false, true) => PredicateKind::Semantic,
220 };
221
222 let archivist = attrs
223 .iter()
224 .find(|a| a.name == "archivist")
225 .map(parse_archivist_attribute);
226 if archivist.is_none() {
227 diagnostics.push(DiscoveryDiagnostic {
228 severity: DiagnosticSeverity::Warning,
229 message: format!(
230 "predicate `{name}` is missing `@archivist(...)` provenance \
231 (evidence, confidence, source_date, coverage_examples)"
232 ),
233 span: Some(span),
234 });
235 }
236
237 let retroactive = attrs.iter().any(|a| a.name == "retroactive");
238 let fallback = attrs
239 .iter()
240 .find(|a| a.name == "semantic")
241 .and_then(parse_semantic_fallback);
242 if kind == PredicateKind::Semantic && fallback.is_none() {
243 diagnostics.push(DiscoveryDiagnostic {
244 severity: DiagnosticSeverity::Error,
245 message: format!(
246 "semantic predicate `{name}` must declare a deterministic fallback with \
247 `@semantic(fallback: \"predicate_name\")`"
248 ),
249 span: Some(span),
250 });
251 }
252 let source_hash = predicate_source_hash(source, attrs, span);
253
254 Some(DiscoveredPredicate {
255 name: name.to_string(),
256 kind,
257 fallback,
258 archivist,
259 retroactive,
260 source_hash,
261 span,
262 })
263}
264
265fn parse_semantic_fallback(attr: &Attribute) -> Option<String> {
266 attr.args
267 .iter()
268 .find(|arg| arg.name.as_deref() == Some("fallback"))
269 .or_else(|| attr.args.iter().find(|arg| arg.name.is_none()))
270 .and_then(identifier_or_string_arg)
271}
272
273fn validate_semantic_fallbacks(files: &mut [DiscoveredInvariantFile]) {
274 let mut visible_deterministic = BTreeMap::<String, PredicateHash>::new();
275
276 for file in files {
277 for predicate in &file.predicates {
278 if predicate.kind == PredicateKind::Deterministic {
279 visible_deterministic.insert(predicate.name.clone(), predicate.source_hash.clone());
280 }
281 }
282
283 let diagnostics = file
284 .predicates
285 .iter()
286 .filter(|predicate| predicate.kind == PredicateKind::Semantic)
287 .filter_map(|predicate| {
288 let fallback = predicate.fallback.as_ref()?;
289 if visible_deterministic.contains_key(fallback) {
290 return None;
291 }
292 Some(DiscoveryDiagnostic {
293 severity: DiagnosticSeverity::Error,
294 message: format!(
295 "semantic predicate `{}` fallback `{fallback}` must name a \
296 deterministic predicate in the same invariants.harn file or an ancestor file",
297 predicate.name
298 ),
299 span: Some(predicate.span),
300 })
301 })
302 .collect::<Vec<_>>();
303 file.diagnostics.extend(diagnostics);
304 }
305}
306
307fn predicate_source_hash(source: &str, attrs: &[Attribute], span: Span) -> PredicateHash {
308 let start = attrs
309 .iter()
310 .map(|attr| attr.span.start)
311 .min()
312 .unwrap_or(span.start)
313 .min(source.len());
314 let end = span.end.min(source.len()).max(start);
315 let bytes = &source.as_bytes()[start..end];
316 PredicateHash::new(format!("sha256:{}", hex::encode(Sha256::digest(bytes))))
317}
318
319fn parse_archivist_attribute(attr: &Attribute) -> ArchivistMetadata {
320 let mut metadata = ArchivistMetadata::default();
321 for arg in &attr.args {
322 let Some(name) = arg.name.as_deref() else {
323 continue;
324 };
325 match name {
326 "evidence" => metadata.evidence = string_list_arg(arg),
327 "confidence" => metadata.confidence = number_arg(arg),
328 "source_date" => metadata.source_date = string_arg(arg),
329 "coverage_examples" => metadata.coverage_examples = string_list_arg(arg),
330 _ => {}
331 }
332 }
333 metadata
334}
335
336fn string_arg(arg: &AttributeArg) -> Option<String> {
337 match &arg.value.node {
338 Node::StringLiteral(s) | Node::RawStringLiteral(s) => Some(s.clone()),
339 _ => None,
340 }
341}
342
343fn identifier_or_string_arg(arg: &AttributeArg) -> Option<String> {
344 match &arg.value.node {
345 Node::Identifier(s) | Node::StringLiteral(s) | Node::RawStringLiteral(s) => Some(s.clone()),
346 _ => None,
347 }
348}
349
350fn number_arg(arg: &AttributeArg) -> Option<f64> {
351 match &arg.value.node {
352 Node::FloatLiteral(f) => Some(*f),
353 Node::IntLiteral(i) => Some(*i as f64),
354 _ => None,
355 }
356}
357
358fn string_list_arg(arg: &AttributeArg) -> Vec<String> {
359 match &arg.value.node {
360 Node::ListLiteral(items) => items
361 .iter()
362 .filter_map(|item| match &item.node {
363 Node::StringLiteral(s) | Node::RawStringLiteral(s) => Some(s.clone()),
364 _ => None,
365 })
366 .collect(),
367 Node::StringLiteral(s) | Node::RawStringLiteral(s) => vec![s.clone()],
368 _ => Vec::new(),
369 }
370}
371
372fn candidate_directories(root: &Path, target_dir: &Path) -> Vec<PathBuf> {
378 let mut chain = vec![root.to_path_buf()];
379
380 let relative = target_dir.strip_prefix(root).unwrap_or_else(|_| {
383 if target_dir.is_absolute() {
384 Path::new("")
385 } else {
386 target_dir
387 }
388 });
389
390 let mut current = root.to_path_buf();
391 for component in relative.components() {
392 use std::path::Component;
393 match component {
394 Component::Normal(name) => {
395 current.push(name);
396 chain.push(current.clone());
397 }
398 Component::CurDir => {}
399 Component::ParentDir | Component::RootDir | Component::Prefix(_) => {
401 continue;
402 }
403 }
404 }
405
406 chain
407}
408
409fn relative_dir_label(root: &Path, dir: &Path) -> String {
410 let rel = dir.strip_prefix(root).unwrap_or(dir);
411 let mut parts: Vec<String> = Vec::new();
412 for component in rel.components() {
413 if let std::path::Component::Normal(name) = component {
414 parts.push(name.to_string_lossy().into_owned());
415 }
416 }
417 if parts.is_empty() {
418 ".".to_string()
419 } else {
420 parts.join("/")
421 }
422}
423
424#[cfg(test)]
425mod tests {
426 use super::*;
427 use crate::flow::resolve_predicates;
428 use std::fs;
429 use tempfile::TempDir;
430
431 fn write(dir: &Path, name: &str, contents: &str) {
432 fs::create_dir_all(dir).unwrap();
433 fs::write(dir.join(name), contents).unwrap();
434 }
435
436 fn sample_predicate(name: &str) -> String {
437 format!(
438 r#"
439@invariant
440@deterministic
441@archivist(evidence: ["https://example.com/spec"], confidence: 0.95, source_date: "2026-04-01")
442fn {name}(slice) -> bool {{
443 return true
444}}
445"#
446 )
447 }
448
449 #[test]
450 fn discover_walks_from_root_to_leaf() {
451 let tmp = TempDir::new().unwrap();
452 let root = tmp.path();
453 write(root, INVARIANTS_FILE, &sample_predicate("root_check"));
454 let nested = root.join("crates").join("foo");
455 write(&nested, INVARIANTS_FILE, &sample_predicate("inner_check"));
456
457 let files = discover_invariants(root, &nested);
458 let labels: Vec<_> = files.iter().map(|f| f.relative_dir.clone()).collect();
459 assert_eq!(labels, vec![".".to_string(), "crates/foo".to_string()]);
460 assert_eq!(files[0].predicates[0].name, "root_check");
461 assert_eq!(files[0].predicates[0].kind, PredicateKind::Deterministic);
462 assert_eq!(files[1].predicates[0].name, "inner_check");
463 }
464
465 #[test]
466 fn discover_clamps_parent_dir_traversal() {
467 let tmp = TempDir::new().unwrap();
468 let root = tmp.path().join("repo");
469 fs::create_dir_all(&root).unwrap();
470 write(&root, INVARIANTS_FILE, &sample_predicate("root_check"));
471
472 let files = discover_invariants(&root, Path::new("../../escape"));
473 assert_eq!(files.len(), 1);
474 assert_eq!(files[0].relative_dir, ".");
475 }
476
477 #[test]
478 fn parse_picks_up_archivist_metadata() {
479 let source = sample_predicate("foo");
480 let parsed = parse_invariants_source(&source);
481 assert!(parsed.diagnostics.is_empty(), "{:?}", parsed.diagnostics);
482 let pred = &parsed.predicates[0];
483 let arch = pred.archivist.as_ref().expect("archivist present");
484 assert_eq!(arch.evidence, vec!["https://example.com/spec".to_string()]);
485 assert_eq!(arch.confidence, Some(0.95));
486 assert_eq!(arch.source_date.as_deref(), Some("2026-04-01"));
487 }
488
489 #[test]
490 fn parse_pins_predicate_source_hash() {
491 let source = sample_predicate("foo");
492 let parsed = parse_invariants_source(&source);
493 let original = parsed.predicates[0].source_hash.clone();
494
495 let changed = sample_predicate("foo").replace("return true", "return false");
496 let reparsed = parse_invariants_source(&changed);
497 assert_ne!(reparsed.predicates[0].source_hash, original);
498 assert!(original.as_str().starts_with("sha256:"));
499 }
500
501 #[test]
502 fn parse_warns_when_archivist_missing() {
503 let source = r#"
504@invariant
505@deterministic
506fn missing_arch(slice) -> bool { return true }
507"#;
508 let parsed = parse_invariants_source(source);
509 assert_eq!(parsed.predicates.len(), 1);
510 assert!(parsed
511 .diagnostics
512 .iter()
513 .any(|d| d.message.contains("missing `@archivist(...)`")));
514 }
515
516 #[test]
517 fn parse_errors_when_kinds_collide() {
518 let source = r#"
519@invariant
520@deterministic
521@semantic
522@archivist(evidence: ["x"])
523fn both_modes(slice) -> bool { return true }
524"#;
525 let parsed = parse_invariants_source(source);
526 assert!(parsed
527 .diagnostics
528 .iter()
529 .any(|d| d.severity == DiagnosticSeverity::Error
530 && d.message.contains("pick exactly one")));
531 }
532
533 #[test]
534 fn parse_recognises_semantic_mode_and_retroactive() {
535 let source = r#"
536@invariant
537@semantic(fallback: "fallback_check")
538@retroactive
539@archivist(evidence: ["https://x"], confidence: 0.5)
540fn check(slice) -> bool { return true }
541
542@invariant
543@deterministic
544@archivist(evidence: ["https://x"])
545fn fallback_check(slice) -> bool { return true }
546"#;
547 let parsed = parse_invariants_source(source);
548 assert_eq!(parsed.predicates.len(), 2);
549 let pred = &parsed.predicates[0];
550 assert_eq!(pred.kind, PredicateKind::Semantic);
551 assert_eq!(pred.fallback.as_deref(), Some("fallback_check"));
552 assert!(pred.retroactive);
553 }
554
555 #[test]
556 fn parse_errors_when_semantic_fallback_missing() {
557 let source = r#"
558@invariant
559@semantic
560@archivist(evidence: ["https://x"], confidence: 0.5)
561fn check(slice) -> bool { return true }
562"#;
563 let parsed = parse_invariants_source(source);
564 assert!(parsed.diagnostics.iter().any(|d| {
565 d.severity == DiagnosticSeverity::Error
566 && d.message.contains("must declare a deterministic fallback")
567 }));
568 }
569
570 #[test]
571 fn discover_accepts_semantic_fallback_from_ancestor() {
572 let tmp = TempDir::new().unwrap();
573 let root = tmp.path();
574 write(root, INVARIANTS_FILE, &sample_predicate("root_fallback"));
575 let nested = root.join("crates");
576 write(
577 &nested,
578 INVARIANTS_FILE,
579 r#"
580@invariant
581@semantic(fallback: root_fallback)
582@archivist(evidence: ["https://x"], confidence: 0.5)
583fn semantic_check(slice) -> bool { return true }
584"#,
585 );
586
587 let files = discover_invariants(root, &nested);
588
589 assert!(files
590 .iter()
591 .flat_map(|file| file.diagnostics.iter())
592 .all(|diagnostic| diagnostic.severity != DiagnosticSeverity::Error));
593 let resolved = resolve_predicates(&files);
594 let semantic = resolved
595 .iter()
596 .find(|predicate| predicate.logical_name == "semantic_check")
597 .unwrap();
598 assert_eq!(
599 semantic.fallback_hash,
600 Some(files[0].predicates[0].source_hash.clone())
601 );
602 }
603
604 #[test]
605 fn discover_rejects_semantic_fallback_from_descendant_only() {
606 let tmp = TempDir::new().unwrap();
607 let root = tmp.path();
608 write(
609 root,
610 INVARIANTS_FILE,
611 r#"
612@invariant
613@semantic(fallback: child_fallback)
614@archivist(evidence: ["https://x"], confidence: 0.5)
615fn semantic_check(slice) -> bool { return true }
616"#,
617 );
618 let nested = root.join("crates");
619 write(
620 &nested,
621 INVARIANTS_FILE,
622 &sample_predicate("child_fallback"),
623 );
624
625 let files = discover_invariants(root, &nested);
626
627 assert!(files[0].diagnostics.iter().any(|diagnostic| {
628 diagnostic.severity == DiagnosticSeverity::Error
629 && diagnostic
630 .message
631 .contains("same invariants.harn file or an ancestor file")
632 }));
633 }
634
635 #[test]
636 fn parse_skips_handler_ir_invariants() {
637 let source = r#"
640@invariant("fs.writes", "src/**")
641fn handler_check(slice) -> bool { return true }
642"#;
643 let parsed = parse_invariants_source(source);
644 assert!(parsed.predicates.is_empty(), "{:?}", parsed.predicates);
645 }
646
647 #[test]
648 fn resolve_predicates_keeps_ancestors_for_composition() {
649 let tmp = TempDir::new().unwrap();
650 let root = tmp.path();
651 write(root, INVARIANTS_FILE, &sample_predicate("shared"));
652 let nested = root.join("crates");
653 write(
655 &nested,
656 INVARIANTS_FILE,
657 &format!(
658 "{}{}",
659 sample_predicate("shared"),
660 sample_predicate("extra")
661 ),
662 );
663
664 let files = discover_invariants(root, &nested);
665 let resolved = resolve_predicates(&files);
666 let qualified: Vec<_> = resolved.iter().map(|p| p.qualified_name.clone()).collect();
667 assert!(qualified.contains(&"shared".to_string()));
670 assert!(qualified.contains(&"crates::shared".to_string()));
671 assert!(qualified.contains(&"crates::extra".to_string()));
673 }
674}