Skip to main content

harn_vm/flow/predicates/
discovery.rs

1//! Discovery and parsing of `invariants.harn` Flow predicate files.
2//!
3//! Mirrors `metadata_resolve` semantics: predicates declared in higher
4//! directories apply to all descendants. This module owns the walk + parse;
5//! hierarchy merging lives in [`super::compose`], and evaluation lives in
6//! [`super::executor`].
7//!
8//! See parent epic #571 and ticket #579 for the design rationale.
9
10use std::collections::BTreeMap;
11use std::path::{Path, PathBuf};
12
13use harn_lexer::{Lexer, Span};
14use harn_parser::{peel_attributes, Attribute, AttributeArg, Node, Parser};
15use sha2::{Digest, Sha256};
16
17use super::executor::PredicateKind;
18use crate::flow::slice::PredicateHash;
19
20/// Filename used for per-directory Flow invariant declarations.
21pub const INVARIANTS_FILE: &str = "invariants.harn";
22
23/// One `invariants.harn` file discovered on disk, with its predicates
24/// already parsed into typed metadata.
25#[derive(Clone, Debug)]
26pub struct DiscoveredInvariantFile {
27    /// Absolute path to the source file.
28    pub path: PathBuf,
29    /// Path relative to the discovery root, normalised with `/` separators.
30    pub relative_dir: String,
31    /// Raw source — kept around so callers can render diagnostics.
32    pub source: String,
33    /// Predicates declared at the top level, in source order.
34    pub predicates: Vec<DiscoveredPredicate>,
35    /// Parse / attribute errors encountered when reading this file.
36    pub diagnostics: Vec<DiscoveryDiagnostic>,
37}
38
39/// One Flow predicate declaration parsed out of an invariants file.
40#[derive(Clone, Debug)]
41pub struct DiscoveredPredicate {
42    /// Function name. Composition uses this name plus the source directory
43    /// ancestry to identify stricter-child override lineages.
44    pub name: String,
45    /// `Deterministic` (default) or `Semantic`.
46    pub kind: PredicateKind,
47    /// For `@semantic` predicates, the named deterministic predicate that
48    /// carries the replayable enforcement path.
49    pub fallback: Option<String>,
50    /// Optional Archivist provenance block.
51    pub archivist: Option<ArchivistMetadata>,
52    /// Advisory historical flag — predicates that legalise existing state
53    /// rather than gate new atoms.
54    pub retroactive: bool,
55    /// Stable content hash of the predicate declaration, including Flow
56    /// attributes. Shipped slices pin this value so later predicate edits are
57    /// append-only audit drift instead of retroactive blockers.
58    pub source_hash: PredicateHash,
59    /// Span of the function declaration in the source file (1-based).
60    pub span: Span,
61}
62
63/// Provenance metadata pulled from `@archivist(...)`.
64#[derive(Clone, Debug, Default, PartialEq)]
65pub struct ArchivistMetadata {
66    pub evidence: Vec<String>,
67    pub confidence: Option<f64>,
68    pub source_date: Option<String>,
69    pub coverage_examples: Vec<String>,
70}
71
72/// One diagnostic surfaced by discovery — covers both parse errors and
73/// the structural attribute checks that go beyond the typechecker
74/// (`@invariant` requires `@archivist`, etc.).
75#[derive(Clone, Debug)]
76pub struct DiscoveryDiagnostic {
77    pub severity: DiagnosticSeverity,
78    pub message: String,
79    pub span: Option<Span>,
80}
81
82#[derive(Clone, Copy, Debug, PartialEq, Eq)]
83pub enum DiagnosticSeverity {
84    Warning,
85    Error,
86}
87
88/// Walk from `root` down through every component of `target_dir`,
89/// collecting `invariants.harn` at each level.
90///
91/// Returns the files in root-to-leaf order so composition can stamp source
92/// depth and evaluate ancestor/child predicates together.
93///
94/// `target_dir` is interpreted relative to `root`. Absolute paths or
95/// paths that escape `root` are silently clamped — discovery never reads
96/// files outside `root`.
97pub fn discover_invariants(root: &Path, target_dir: &Path) -> Vec<DiscoveredInvariantFile> {
98    let mut files = Vec::new();
99    let candidates = candidate_directories(root, target_dir);
100
101    for dir in candidates {
102        let path = dir.join(INVARIANTS_FILE);
103        if !path.is_file() {
104            continue;
105        }
106        let source = match std::fs::read_to_string(&path) {
107            Ok(s) => s,
108            Err(_) => continue,
109        };
110        let relative_dir = relative_dir_label(root, &dir);
111        let parsed = parse_invariants_source(&source);
112        files.push(DiscoveredInvariantFile {
113            path,
114            relative_dir,
115            source,
116            predicates: parsed.predicates,
117            diagnostics: parsed.diagnostics,
118        });
119    }
120
121    validate_semantic_fallbacks(&mut files);
122    files
123}
124
125/// Parse a single `invariants.harn` source string. Exposed publicly for
126/// tests, the LSP, and tooling that has the file contents in hand.
127pub fn parse_invariants_source(source: &str) -> ParsedInvariantFile {
128    let mut diagnostics = Vec::new();
129    let tokens = match Lexer::new(source).tokenize() {
130        Ok(t) => t,
131        Err(error) => {
132            diagnostics.push(DiscoveryDiagnostic {
133                severity: DiagnosticSeverity::Error,
134                message: format!("lex error: {error:?}"),
135                span: None,
136            });
137            return ParsedInvariantFile {
138                predicates: Vec::new(),
139                diagnostics,
140            };
141        }
142    };
143    let program = match Parser::new(tokens).parse() {
144        Ok(p) => p,
145        Err(error) => {
146            diagnostics.push(DiscoveryDiagnostic {
147                severity: DiagnosticSeverity::Error,
148                message: format!("parse error: {error:?}"),
149                span: None,
150            });
151            return ParsedInvariantFile {
152                predicates: Vec::new(),
153                diagnostics,
154            };
155        }
156    };
157
158    let mut predicates = Vec::new();
159    for node in &program {
160        let (attrs, inner) = peel_attributes(node);
161        let Node::FnDecl { name, .. } = &inner.node else {
162            continue;
163        };
164        let Some(predicate) =
165            predicate_from_attributes(source, name, attrs, inner.span, &mut diagnostics)
166        else {
167            continue;
168        };
169        predicates.push(predicate);
170    }
171
172    ParsedInvariantFile {
173        predicates,
174        diagnostics,
175    }
176}
177
178/// Parsed-but-not-yet-located output of [`parse_invariants_source`].
179#[derive(Clone, Debug, Default)]
180pub struct ParsedInvariantFile {
181    pub predicates: Vec<DiscoveredPredicate>,
182    pub diagnostics: Vec<DiscoveryDiagnostic>,
183}
184
185fn predicate_from_attributes(
186    source: &str,
187    name: &str,
188    attrs: &[Attribute],
189    span: Span,
190    diagnostics: &mut Vec<DiscoveryDiagnostic>,
191) -> Option<DiscoveredPredicate> {
192    // The Flow predicate marker is a *bare* `@invariant`. Anything with
193    // arguments is the handler-IR form and is not part of Flow discovery.
194    let invariant = attrs.iter().find(|a| a.name == "invariant")?;
195    if !invariant.args.is_empty() {
196        return None;
197    }
198
199    let deterministic = attrs.iter().any(|a| a.name == "deterministic");
200    let semantic = attrs.iter().any(|a| a.name == "semantic");
201    let kind = match (deterministic, semantic) {
202        (true, true) => {
203            diagnostics.push(DiscoveryDiagnostic {
204                severity: DiagnosticSeverity::Error,
205                message: format!(
206                    "predicate `{name}` declares both `@deterministic` and \
207                     `@semantic`; pick exactly one"
208                ),
209                span: Some(span),
210            });
211            PredicateKind::Deterministic
212        }
213        (false, false) => {
214            // Default per design: predicates without an explicit mode are
215            // deterministic.
216            PredicateKind::Deterministic
217        }
218        (true, false) => PredicateKind::Deterministic,
219        (false, true) => PredicateKind::Semantic,
220    };
221
222    let archivist = attrs
223        .iter()
224        .find(|a| a.name == "archivist")
225        .map(parse_archivist_attribute);
226    if archivist.is_none() {
227        diagnostics.push(DiscoveryDiagnostic {
228            severity: DiagnosticSeverity::Warning,
229            message: format!(
230                "predicate `{name}` is missing `@archivist(...)` provenance \
231                 (evidence, confidence, source_date, coverage_examples)"
232            ),
233            span: Some(span),
234        });
235    }
236
237    let retroactive = attrs.iter().any(|a| a.name == "retroactive");
238    let fallback = attrs
239        .iter()
240        .find(|a| a.name == "semantic")
241        .and_then(parse_semantic_fallback);
242    if kind == PredicateKind::Semantic && fallback.is_none() {
243        diagnostics.push(DiscoveryDiagnostic {
244            severity: DiagnosticSeverity::Error,
245            message: format!(
246                "semantic predicate `{name}` must declare a deterministic fallback with \
247                 `@semantic(fallback: \"predicate_name\")`"
248            ),
249            span: Some(span),
250        });
251    }
252    let source_hash = predicate_source_hash(source, attrs, span);
253
254    Some(DiscoveredPredicate {
255        name: name.to_string(),
256        kind,
257        fallback,
258        archivist,
259        retroactive,
260        source_hash,
261        span,
262    })
263}
264
265fn parse_semantic_fallback(attr: &Attribute) -> Option<String> {
266    attr.args
267        .iter()
268        .find(|arg| arg.name.as_deref() == Some("fallback"))
269        .or_else(|| attr.args.iter().find(|arg| arg.name.is_none()))
270        .and_then(identifier_or_string_arg)
271}
272
273fn validate_semantic_fallbacks(files: &mut [DiscoveredInvariantFile]) {
274    let mut visible_deterministic = BTreeMap::<String, PredicateHash>::new();
275
276    for file in files {
277        for predicate in &file.predicates {
278            if predicate.kind == PredicateKind::Deterministic {
279                visible_deterministic.insert(predicate.name.clone(), predicate.source_hash.clone());
280            }
281        }
282
283        let diagnostics = file
284            .predicates
285            .iter()
286            .filter(|predicate| predicate.kind == PredicateKind::Semantic)
287            .filter_map(|predicate| {
288                let fallback = predicate.fallback.as_ref()?;
289                if visible_deterministic.contains_key(fallback) {
290                    return None;
291                }
292                Some(DiscoveryDiagnostic {
293                    severity: DiagnosticSeverity::Error,
294                    message: format!(
295                        "semantic predicate `{}` fallback `{fallback}` must name a \
296                         deterministic predicate in the same invariants.harn file or an ancestor file",
297                        predicate.name
298                    ),
299                    span: Some(predicate.span),
300                })
301            })
302            .collect::<Vec<_>>();
303        file.diagnostics.extend(diagnostics);
304    }
305}
306
307fn predicate_source_hash(source: &str, attrs: &[Attribute], span: Span) -> PredicateHash {
308    let start = attrs
309        .iter()
310        .map(|attr| attr.span.start)
311        .min()
312        .unwrap_or(span.start)
313        .min(source.len());
314    let end = span.end.min(source.len()).max(start);
315    let bytes = &source.as_bytes()[start..end];
316    PredicateHash::new(format!("sha256:{}", hex::encode(Sha256::digest(bytes))))
317}
318
319fn parse_archivist_attribute(attr: &Attribute) -> ArchivistMetadata {
320    let mut metadata = ArchivistMetadata::default();
321    for arg in &attr.args {
322        let Some(name) = arg.name.as_deref() else {
323            continue;
324        };
325        match name {
326            "evidence" => metadata.evidence = string_list_arg(arg),
327            "confidence" => metadata.confidence = number_arg(arg),
328            "source_date" => metadata.source_date = string_arg(arg),
329            "coverage_examples" => metadata.coverage_examples = string_list_arg(arg),
330            _ => {}
331        }
332    }
333    metadata
334}
335
336fn string_arg(arg: &AttributeArg) -> Option<String> {
337    match &arg.value.node {
338        Node::StringLiteral(s) | Node::RawStringLiteral(s) => Some(s.clone()),
339        _ => None,
340    }
341}
342
343fn identifier_or_string_arg(arg: &AttributeArg) -> Option<String> {
344    match &arg.value.node {
345        Node::Identifier(s) | Node::StringLiteral(s) | Node::RawStringLiteral(s) => Some(s.clone()),
346        _ => None,
347    }
348}
349
350fn number_arg(arg: &AttributeArg) -> Option<f64> {
351    match &arg.value.node {
352        Node::FloatLiteral(f) => Some(*f),
353        Node::IntLiteral(i) => Some(*i as f64),
354        _ => None,
355    }
356}
357
358fn string_list_arg(arg: &AttributeArg) -> Vec<String> {
359    match &arg.value.node {
360        Node::ListLiteral(items) => items
361            .iter()
362            .filter_map(|item| match &item.node {
363                Node::StringLiteral(s) | Node::RawStringLiteral(s) => Some(s.clone()),
364                _ => None,
365            })
366            .collect(),
367        Node::StringLiteral(s) | Node::RawStringLiteral(s) => vec![s.clone()],
368        _ => Vec::new(),
369    }
370}
371
372/// Build the root → target chain of directories to inspect, in order.
373///
374/// Mirrors `MetadataState::resolve`: starts at `root`, then descends one
375/// component at a time. Empty / `.` / `..` components are stripped so a
376/// caller can't escape the root.
377fn candidate_directories(root: &Path, target_dir: &Path) -> Vec<PathBuf> {
378    let mut chain = vec![root.to_path_buf()];
379
380    // Make `target_dir` relative to `root` if it is absolute, otherwise
381    // treat it as already-relative.
382    let relative = target_dir.strip_prefix(root).unwrap_or_else(|_| {
383        if target_dir.is_absolute() {
384            Path::new("")
385        } else {
386            target_dir
387        }
388    });
389
390    let mut current = root.to_path_buf();
391    for component in relative.components() {
392        use std::path::Component;
393        match component {
394            Component::Normal(name) => {
395                current.push(name);
396                chain.push(current.clone());
397            }
398            Component::CurDir => {}
399            // Refuse to escape `root`.
400            Component::ParentDir | Component::RootDir | Component::Prefix(_) => {
401                continue;
402            }
403        }
404    }
405
406    chain
407}
408
409fn relative_dir_label(root: &Path, dir: &Path) -> String {
410    let rel = dir.strip_prefix(root).unwrap_or(dir);
411    let mut parts: Vec<String> = Vec::new();
412    for component in rel.components() {
413        if let std::path::Component::Normal(name) = component {
414            parts.push(name.to_string_lossy().into_owned());
415        }
416    }
417    if parts.is_empty() {
418        ".".to_string()
419    } else {
420        parts.join("/")
421    }
422}
423
424#[cfg(test)]
425mod tests {
426    use super::*;
427    use crate::flow::resolve_predicates;
428    use std::fs;
429    use tempfile::TempDir;
430
431    fn write(dir: &Path, name: &str, contents: &str) {
432        fs::create_dir_all(dir).unwrap();
433        fs::write(dir.join(name), contents).unwrap();
434    }
435
436    fn sample_predicate(name: &str) -> String {
437        format!(
438            r#"
439@invariant
440@deterministic
441@archivist(evidence: ["https://example.com/spec"], confidence: 0.95, source_date: "2026-04-01")
442fn {name}(slice) -> bool {{
443    return true
444}}
445"#
446        )
447    }
448
449    #[test]
450    fn discover_walks_from_root_to_leaf() {
451        let tmp = TempDir::new().unwrap();
452        let root = tmp.path();
453        write(root, INVARIANTS_FILE, &sample_predicate("root_check"));
454        let nested = root.join("crates").join("foo");
455        write(&nested, INVARIANTS_FILE, &sample_predicate("inner_check"));
456
457        let files = discover_invariants(root, &nested);
458        let labels: Vec<_> = files.iter().map(|f| f.relative_dir.clone()).collect();
459        assert_eq!(labels, vec![".".to_string(), "crates/foo".to_string()]);
460        assert_eq!(files[0].predicates[0].name, "root_check");
461        assert_eq!(files[0].predicates[0].kind, PredicateKind::Deterministic);
462        assert_eq!(files[1].predicates[0].name, "inner_check");
463    }
464
465    #[test]
466    fn discover_clamps_parent_dir_traversal() {
467        let tmp = TempDir::new().unwrap();
468        let root = tmp.path().join("repo");
469        fs::create_dir_all(&root).unwrap();
470        write(&root, INVARIANTS_FILE, &sample_predicate("root_check"));
471
472        let files = discover_invariants(&root, Path::new("../../escape"));
473        assert_eq!(files.len(), 1);
474        assert_eq!(files[0].relative_dir, ".");
475    }
476
477    #[test]
478    fn parse_picks_up_archivist_metadata() {
479        let source = sample_predicate("foo");
480        let parsed = parse_invariants_source(&source);
481        assert!(parsed.diagnostics.is_empty(), "{:?}", parsed.diagnostics);
482        let pred = &parsed.predicates[0];
483        let arch = pred.archivist.as_ref().expect("archivist present");
484        assert_eq!(arch.evidence, vec!["https://example.com/spec".to_string()]);
485        assert_eq!(arch.confidence, Some(0.95));
486        assert_eq!(arch.source_date.as_deref(), Some("2026-04-01"));
487    }
488
489    #[test]
490    fn parse_pins_predicate_source_hash() {
491        let source = sample_predicate("foo");
492        let parsed = parse_invariants_source(&source);
493        let original = parsed.predicates[0].source_hash.clone();
494
495        let changed = sample_predicate("foo").replace("return true", "return false");
496        let reparsed = parse_invariants_source(&changed);
497        assert_ne!(reparsed.predicates[0].source_hash, original);
498        assert!(original.as_str().starts_with("sha256:"));
499    }
500
501    #[test]
502    fn parse_warns_when_archivist_missing() {
503        let source = r#"
504@invariant
505@deterministic
506fn missing_arch(slice) -> bool { return true }
507"#;
508        let parsed = parse_invariants_source(source);
509        assert_eq!(parsed.predicates.len(), 1);
510        assert!(parsed
511            .diagnostics
512            .iter()
513            .any(|d| d.message.contains("missing `@archivist(...)`")));
514    }
515
516    #[test]
517    fn parse_errors_when_kinds_collide() {
518        let source = r#"
519@invariant
520@deterministic
521@semantic
522@archivist(evidence: ["x"])
523fn both_modes(slice) -> bool { return true }
524"#;
525        let parsed = parse_invariants_source(source);
526        assert!(parsed
527            .diagnostics
528            .iter()
529            .any(|d| d.severity == DiagnosticSeverity::Error
530                && d.message.contains("pick exactly one")));
531    }
532
533    #[test]
534    fn parse_recognises_semantic_mode_and_retroactive() {
535        let source = r#"
536@invariant
537@semantic(fallback: "fallback_check")
538@retroactive
539@archivist(evidence: ["https://x"], confidence: 0.5)
540fn check(slice) -> bool { return true }
541
542@invariant
543@deterministic
544@archivist(evidence: ["https://x"])
545fn fallback_check(slice) -> bool { return true }
546"#;
547        let parsed = parse_invariants_source(source);
548        assert_eq!(parsed.predicates.len(), 2);
549        let pred = &parsed.predicates[0];
550        assert_eq!(pred.kind, PredicateKind::Semantic);
551        assert_eq!(pred.fallback.as_deref(), Some("fallback_check"));
552        assert!(pred.retroactive);
553    }
554
555    #[test]
556    fn parse_errors_when_semantic_fallback_missing() {
557        let source = r#"
558@invariant
559@semantic
560@archivist(evidence: ["https://x"], confidence: 0.5)
561fn check(slice) -> bool { return true }
562"#;
563        let parsed = parse_invariants_source(source);
564        assert!(parsed.diagnostics.iter().any(|d| {
565            d.severity == DiagnosticSeverity::Error
566                && d.message.contains("must declare a deterministic fallback")
567        }));
568    }
569
570    #[test]
571    fn discover_accepts_semantic_fallback_from_ancestor() {
572        let tmp = TempDir::new().unwrap();
573        let root = tmp.path();
574        write(root, INVARIANTS_FILE, &sample_predicate("root_fallback"));
575        let nested = root.join("crates");
576        write(
577            &nested,
578            INVARIANTS_FILE,
579            r#"
580@invariant
581@semantic(fallback: root_fallback)
582@archivist(evidence: ["https://x"], confidence: 0.5)
583fn semantic_check(slice) -> bool { return true }
584"#,
585        );
586
587        let files = discover_invariants(root, &nested);
588
589        assert!(files
590            .iter()
591            .flat_map(|file| file.diagnostics.iter())
592            .all(|diagnostic| diagnostic.severity != DiagnosticSeverity::Error));
593        let resolved = resolve_predicates(&files);
594        let semantic = resolved
595            .iter()
596            .find(|predicate| predicate.logical_name == "semantic_check")
597            .unwrap();
598        assert_eq!(
599            semantic.fallback_hash,
600            Some(files[0].predicates[0].source_hash.clone())
601        );
602    }
603
604    #[test]
605    fn discover_rejects_semantic_fallback_from_descendant_only() {
606        let tmp = TempDir::new().unwrap();
607        let root = tmp.path();
608        write(
609            root,
610            INVARIANTS_FILE,
611            r#"
612@invariant
613@semantic(fallback: child_fallback)
614@archivist(evidence: ["https://x"], confidence: 0.5)
615fn semantic_check(slice) -> bool { return true }
616"#,
617        );
618        let nested = root.join("crates");
619        write(
620            &nested,
621            INVARIANTS_FILE,
622            &sample_predicate("child_fallback"),
623        );
624
625        let files = discover_invariants(root, &nested);
626
627        assert!(files[0].diagnostics.iter().any(|diagnostic| {
628            diagnostic.severity == DiagnosticSeverity::Error
629                && diagnostic
630                    .message
631                    .contains("same invariants.harn file or an ancestor file")
632        }));
633    }
634
635    #[test]
636    fn parse_skips_handler_ir_invariants() {
637        // `@invariant("name", "glob")` is the harn-ir handler form; it
638        // should never be treated as a Flow predicate.
639        let source = r#"
640@invariant("fs.writes", "src/**")
641fn handler_check(slice) -> bool { return true }
642"#;
643        let parsed = parse_invariants_source(source);
644        assert!(parsed.predicates.is_empty(), "{:?}", parsed.predicates);
645    }
646
647    #[test]
648    fn resolve_predicates_keeps_ancestors_for_composition() {
649        let tmp = TempDir::new().unwrap();
650        let root = tmp.path();
651        write(root, INVARIANTS_FILE, &sample_predicate("shared"));
652        let nested = root.join("crates");
653        // Override `shared` and add `extra`.
654        write(
655            &nested,
656            INVARIANTS_FILE,
657            &format!(
658                "{}{}",
659                sample_predicate("shared"),
660                sample_predicate("extra")
661            ),
662        );
663
664        let files = discover_invariants(root, &nested);
665        let resolved = resolve_predicates(&files);
666        let qualified: Vec<_> = resolved.iter().map(|p| p.qualified_name.clone()).collect();
667        // Composition needs both versions so child results can tighten but
668        // cannot relax ancestor verdicts.
669        assert!(qualified.contains(&"shared".to_string()));
670        assert!(qualified.contains(&"crates::shared".to_string()));
671        // `extra` only exists in the deeper file.
672        assert!(qualified.contains(&"crates::extra".to_string()));
673    }
674}