Skip to main content

harn_modules/
fingerprint.rs

1//! Interface fingerprints — a stable hash of a module's public surface.
2//!
3//! The fingerprint covers exactly the parts of a module that downstream
4//! importers can observe:
5//!
6//! * public functions, pipelines, tools, skills (name + signature)
7//! * public structs, enums, type aliases, interfaces (full shape)
8//! * `pub import` re-exports (target path + selective names)
9//!
10//! It intentionally excludes anything internal — function bodies,
11//! comments, private helpers, local variable bindings — so an edit that
12//! changes only the implementation of a public function leaves the
13//! fingerprint stable and dependents stay valid.
14//!
15//! The hash is BLAKE3 over a canonical textual rendering of the surface
16//! (alphabetized, single source of truth so trivial reorderings don't
17//! flip the fingerprint either).
18
19use std::fmt::Write as _;
20use std::path::Path;
21
22use harn_parser::{
23    peel_attributes, EnumVariant, InterfaceMethod, Node, Parser, SNode, ShapeField, StructField,
24    TypeExpr, TypeParam, TypedParam, Variance, WhereClause,
25};
26
27use crate::read_module_source;
28
29/// A 32-byte BLAKE3 digest of a module's public surface.
30pub type Fingerprint = [u8; 32];
31
32/// Compute the interface fingerprint for `program` (the parsed top-level
33/// statements of a module). Returns the BLAKE3 digest of a canonical
34/// textual rendering — see the module docs for what's included.
35pub fn fingerprint_program(program: &[SNode]) -> Fingerprint {
36    let canonical = canonicalize_program(program);
37    blake3::hash(canonical.as_bytes()).into()
38}
39
40/// Convenience: parse `path` (real file or `<std>` virtual path) and
41/// fingerprint its public surface. Returns `None` when the source can't
42/// be read or doesn't lex/parse — callers can treat that as "no
43/// fingerprint" rather than erroring.
44pub fn fingerprint_file(path: &Path) -> Option<Fingerprint> {
45    let source = read_module_source(path)?;
46    fingerprint_source(&source)
47}
48
49/// Fingerprint an already-loaded source string. Lex + parse failures
50/// return `None`.
51pub fn fingerprint_source(source: &str) -> Option<Fingerprint> {
52    let mut lexer = harn_lexer::Lexer::new(source);
53    let tokens = lexer.tokenize().ok()?;
54    let program = Parser::new(tokens).parse().ok()?;
55    Some(fingerprint_program(&program))
56}
57
58/// Hex-encode a fingerprint for human-readable output (NDJSON events,
59/// logs, etc.).
60pub fn fingerprint_hex(fp: &Fingerprint) -> String {
61    let mut out = String::with_capacity(fp.len() * 2);
62    for b in fp {
63        write!(&mut out, "{b:02x}").expect("write to String is infallible");
64    }
65    out
66}
67
68fn canonicalize_program(program: &[SNode]) -> String {
69    let mut lines: Vec<String> = program.iter().filter_map(canonicalize_top_level).collect();
70    lines.sort();
71    lines.join("\n")
72}
73
74fn canonicalize_top_level(snode: &SNode) -> Option<String> {
75    let (_attrs, inner) = peel_attributes(snode);
76    match &inner.node {
77        Node::FnDecl {
78            name,
79            type_params,
80            params,
81            return_type,
82            where_clauses,
83            is_pub,
84            is_stream,
85            ..
86        } => is_pub.then(|| {
87            format!(
88                "fn{stream}:{name}{generics}({params}){ret}{wheres}",
89                stream = if *is_stream { "*" } else { "" },
90                generics = format_type_params(type_params),
91                params = format_typed_params(params),
92                ret = format_return(return_type),
93                wheres = format_where_clauses(where_clauses),
94            )
95        }),
96        Node::Pipeline {
97            name,
98            params,
99            return_type,
100            is_pub,
101            extends,
102            ..
103        } => is_pub.then(|| {
104            format!(
105                "pipeline:{name}({params}){ret}{extends}",
106                params = params.join(","),
107                ret = format_return(return_type),
108                extends = extends
109                    .as_deref()
110                    .map(|e| format!(" extends {e}"))
111                    .unwrap_or_default(),
112            )
113        }),
114        Node::ToolDecl {
115            name,
116            params,
117            return_type,
118            is_pub,
119            ..
120        } => is_pub.then(|| {
121            format!(
122                "tool:{name}({params}){ret}",
123                params = format_typed_params(params),
124                ret = format_return(return_type),
125            )
126        }),
127        Node::SkillDecl { name, is_pub, .. } => {
128            // Skill bodies are configuration that downstream importers
129            // observe by reading the resulting registry dict, but a
130            // skill is identified by its name from a typing perspective.
131            // The conservative thing is to hash the name only and let
132            // body edits propagate via runtime registration rather than
133            // type-time invalidation.
134            is_pub.then(|| format!("skill:{name}"))
135        }
136        Node::StructDecl {
137            name,
138            type_params,
139            fields,
140            is_pub,
141        } => is_pub.then(|| {
142            format!(
143                "struct:{name}{generics}{{{fields}}}",
144                generics = format_type_params(type_params),
145                fields = format_struct_fields(fields),
146            )
147        }),
148        Node::EnumDecl {
149            name,
150            type_params,
151            variants,
152            is_pub,
153        } => is_pub.then(|| {
154            format!(
155                "enum:{name}{generics}{{{variants}}}",
156                generics = format_type_params(type_params),
157                variants = format_enum_variants(variants),
158            )
159        }),
160        Node::InterfaceDecl {
161            name,
162            type_params,
163            associated_types,
164            methods,
165        } => Some(format!(
166            "interface:{name}{generics}{{assoc=[{assoc}]methods=[{methods}]}}",
167            generics = format_type_params(type_params),
168            assoc = format_associated_types(associated_types),
169            methods = format_interface_methods(methods),
170        )),
171        Node::TypeDecl {
172            name,
173            type_params,
174            type_expr,
175        } => Some(format!(
176            "type:{name}{generics}={ty}",
177            generics = format_type_params(type_params),
178            ty = format_type_expr(type_expr),
179        )),
180        Node::ImportDecl { path, is_pub } => is_pub.then(|| format!("pub_import_wildcard:{path}")),
181        Node::SelectiveImport {
182            names,
183            path,
184            is_pub,
185        } => is_pub.then(|| {
186            let mut sorted = names.clone();
187            sorted.sort();
188            format!("pub_import_selective:{path}::{}", sorted.join(","))
189        }),
190        _ => None,
191    }
192}
193
194fn format_type_params(params: &[TypeParam]) -> String {
195    if params.is_empty() {
196        return String::new();
197    }
198    let parts: Vec<String> = params
199        .iter()
200        .map(|p| {
201            let var = match p.variance {
202                Variance::Invariant => "",
203                Variance::Covariant => "out ",
204                Variance::Contravariant => "in ",
205            };
206            format!("{var}{}", p.name)
207        })
208        .collect();
209    format!("<{}>", parts.join(","))
210}
211
212fn format_typed_params(params: &[TypedParam]) -> String {
213    params
214        .iter()
215        .map(|p| {
216            let mut s = String::new();
217            if p.rest {
218                s.push_str("...");
219            }
220            s.push_str(&p.name);
221            if let Some(ty) = &p.type_expr {
222                s.push(':');
223                s.push_str(&format_type_expr(ty));
224            }
225            // Default values reference expressions whose shape we don't
226            // walk into; presence-only is enough — adding a default to
227            // a public parameter changes the callable contract.
228            if p.default_value.is_some() {
229                s.push_str("=?");
230            }
231            s
232        })
233        .collect::<Vec<_>>()
234        .join(",")
235}
236
237fn format_return(ret: &Option<TypeExpr>) -> String {
238    match ret {
239        Some(ty) => format!("->{}", format_type_expr(ty)),
240        None => String::new(),
241    }
242}
243
244fn format_where_clauses(clauses: &[WhereClause]) -> String {
245    if clauses.is_empty() {
246        return String::new();
247    }
248    let mut parts: Vec<String> = clauses
249        .iter()
250        .map(|w| format!("{}:{}", w.type_name, w.bound))
251        .collect();
252    parts.sort();
253    format!(" where {}", parts.join(","))
254}
255
256fn format_struct_fields(fields: &[StructField]) -> String {
257    let mut rendered: Vec<String> = fields
258        .iter()
259        .map(|f| {
260            let opt = if f.optional { "?" } else { "" };
261            let ty = f
262                .type_expr
263                .as_ref()
264                .map(format_type_expr)
265                .unwrap_or_default();
266            format!("{}{opt}:{ty}", f.name)
267        })
268        .collect();
269    rendered.sort();
270    rendered.join(",")
271}
272
273fn format_enum_variants(variants: &[EnumVariant]) -> String {
274    let mut rendered: Vec<String> = variants
275        .iter()
276        .map(|v| format!("{}({})", v.name, format_typed_params(&v.fields)))
277        .collect();
278    rendered.sort();
279    rendered.join(",")
280}
281
282fn format_associated_types(items: &[(String, Option<TypeExpr>)]) -> String {
283    let mut rendered: Vec<String> = items
284        .iter()
285        .map(|(name, bound)| match bound {
286            Some(ty) => format!("{name}:{}", format_type_expr(ty)),
287            None => name.clone(),
288        })
289        .collect();
290    rendered.sort();
291    rendered.join(",")
292}
293
294fn format_interface_methods(methods: &[InterfaceMethod]) -> String {
295    let mut rendered: Vec<String> = methods
296        .iter()
297        .map(|m| {
298            format!(
299                "{}{}({}){}",
300                m.name,
301                format_type_params(&m.type_params),
302                format_typed_params(&m.params),
303                format_return(&m.return_type),
304            )
305        })
306        .collect();
307    rendered.sort();
308    rendered.join(",")
309}
310
311fn format_type_expr(ty: &TypeExpr) -> String {
312    match ty {
313        TypeExpr::Named(name) => name.clone(),
314        TypeExpr::Union(parts) => {
315            let mut rendered: Vec<String> = parts.iter().map(format_type_expr).collect();
316            rendered.sort();
317            format!("({})", rendered.join("|"))
318        }
319        TypeExpr::Intersection(parts) => {
320            let mut rendered: Vec<String> = parts.iter().map(format_type_expr).collect();
321            rendered.sort();
322            format!("({})", rendered.join("&"))
323        }
324        TypeExpr::Shape(fields) => format!("{{{}}}", format_shape_fields(fields)),
325        TypeExpr::List(inner) => format!("list<{}>", format_type_expr(inner)),
326        TypeExpr::DictType(k, v) => {
327            format!("dict<{},{}>", format_type_expr(k), format_type_expr(v))
328        }
329        TypeExpr::Iter(inner) => format!("iter<{}>", format_type_expr(inner)),
330        TypeExpr::Generator(inner) => format!("Generator<{}>", format_type_expr(inner)),
331        TypeExpr::Stream(inner) => format!("Stream<{}>", format_type_expr(inner)),
332        TypeExpr::Applied { name, args } => {
333            let rendered: Vec<String> = args.iter().map(format_type_expr).collect();
334            format!("{name}<{}>", rendered.join(","))
335        }
336        TypeExpr::FnType {
337            params,
338            return_type,
339        } => {
340            let rendered: Vec<String> = params.iter().map(format_type_expr).collect();
341            format!(
342                "fn({})->{}",
343                rendered.join(","),
344                format_type_expr(return_type)
345            )
346        }
347        TypeExpr::Never => "Never".to_string(),
348        TypeExpr::LitString(s) => format!("\"{s}\""),
349        TypeExpr::LitInt(n) => n.to_string(),
350        TypeExpr::Owned(inner) => format!("owned<{}>", format_type_expr(inner)),
351    }
352}
353
354fn format_shape_fields(fields: &[ShapeField]) -> String {
355    let mut rendered: Vec<String> = fields
356        .iter()
357        .map(|f| {
358            let opt = if f.optional { "?" } else { "" };
359            format!("{}{opt}:{}", f.name, format_type_expr(&f.type_expr))
360        })
361        .collect();
362    rendered.sort();
363    rendered.join(",")
364}
365
366#[cfg(test)]
367mod tests {
368    use super::*;
369
370    fn fp(source: &str) -> Fingerprint {
371        fingerprint_source(source).expect("source parses")
372    }
373
374    #[test]
375    fn private_body_change_does_not_flip_fingerprint() {
376        let before = fp("pub fn add(a: int, b: int) -> int { a + b }\n");
377        let after = fp("pub fn add(a: int, b: int) -> int { let s = a + b; s }\n");
378        assert_eq!(before, after);
379    }
380
381    #[test]
382    fn private_helper_does_not_flip_fingerprint() {
383        let before = fp("pub fn entry() { internal() }\nfn internal() { 1 }\n");
384        let after = fp("pub fn entry() { internal() }\nfn internal() { 2 }\nfn extra() { 3 }\n");
385        assert_eq!(before, after);
386    }
387
388    #[test]
389    fn reordering_public_decls_does_not_flip_fingerprint() {
390        let a = fp("pub fn alpha() {}\npub fn beta() {}\n");
391        let b = fp("pub fn beta() {}\npub fn alpha() {}\n");
392        assert_eq!(a, b);
393    }
394
395    #[test]
396    fn changing_public_signature_flips_fingerprint() {
397        let before = fp("pub fn add(a: int, b: int) -> int { a + b }\n");
398        let after = fp("pub fn add(a: int, b: int, c: int) -> int { a + b + c }\n");
399        assert_ne!(before, after);
400    }
401
402    #[test]
403    fn changing_public_return_type_flips_fingerprint() {
404        let before = fp("pub fn make() -> string { \"x\" }\n");
405        let after = fp("pub fn make() -> int { 1 }\n");
406        assert_ne!(before, after);
407    }
408
409    #[test]
410    fn adding_pub_struct_field_flips_fingerprint() {
411        let before = fp("pub struct Point { x: int, y: int }\n");
412        let after = fp("pub struct Point { x: int, y: int, z: int }\n");
413        assert_ne!(before, after);
414    }
415
416    #[test]
417    fn pub_re_export_change_flips_fingerprint() {
418        let before = fp("pub import { foo } from \"./a\"\n");
419        let after = fp("pub import { foo, bar } from \"./a\"\n");
420        assert_ne!(before, after);
421    }
422
423    #[test]
424    fn adding_pub_decl_flips_fingerprint() {
425        let before = fp("pub fn alpha() {}\n");
426        let after = fp("pub fn alpha() {}\npub fn beta() {}\n");
427        assert_ne!(before, after);
428    }
429
430    #[test]
431    fn changing_only_non_pub_imports_does_not_flip_fingerprint() {
432        let before = fp("import \"./a\"\npub fn entry() {}\n");
433        let after = fp("import \"./b\"\npub fn entry() {}\n");
434        // Private imports affect what _this_ module sees but not what
435        // downstreams see, so they're outside the public surface.
436        assert_eq!(before, after);
437    }
438
439    #[test]
440    fn hex_is_64_chars() {
441        let h = fingerprint_hex(&fp("pub fn x() {}\n"));
442        assert_eq!(h.len(), 64);
443        assert!(h.chars().all(|c| c.is_ascii_hexdigit()));
444    }
445}