Skip to main content

harn_modules/
fingerprint.rs

1//! Interface fingerprints — a stable hash of a module's public surface.
2//!
3//! The fingerprint covers exactly the parts of a module that downstream
4//! importers can observe:
5//!
6//! * public functions, pipelines, tools, skills (name + signature)
7//! * public structs, enums, type aliases, interfaces (full shape)
8//! * `pub import` re-exports (target path + selective names)
9//!
10//! It intentionally excludes anything internal — function bodies,
11//! comments, private helpers, local variable bindings — so an edit that
12//! changes only the implementation of a public function leaves the
13//! fingerprint stable and dependents stay valid.
14//!
15//! The hash is BLAKE3 over a canonical textual rendering of the surface
16//! (alphabetized, single source of truth so trivial reorderings don't
17//! flip the fingerprint either).
18
19use std::fmt::Write as _;
20use std::path::Path;
21
22use harn_parser::{
23    peel_attributes, EnumVariant, InterfaceMethod, Node, Parser, SNode, ShapeField, StructField,
24    TypeExpr, TypeParam, TypedParam, Variance, WhereClause,
25};
26
27use crate::read_module_source;
28
29/// A 32-byte BLAKE3 digest of a module's public surface.
30pub type Fingerprint = [u8; 32];
31
32/// Compute the interface fingerprint for `program` (the parsed top-level
33/// statements of a module). Returns the BLAKE3 digest of a canonical
34/// textual rendering — see the module docs for what's included.
35pub fn fingerprint_program(program: &[SNode]) -> Fingerprint {
36    let canonical = canonicalize_program(program);
37    blake3::hash(canonical.as_bytes()).into()
38}
39
40/// Convenience: parse `path` (real file or `<std>` virtual path) and
41/// fingerprint its public surface. Returns `None` when the source can't
42/// be read or doesn't lex/parse — callers can treat that as "no
43/// fingerprint" rather than erroring.
44pub fn fingerprint_file(path: &Path) -> Option<Fingerprint> {
45    let source = read_module_source(path)?;
46    fingerprint_source(&source)
47}
48
49/// Fingerprint an already-loaded source string. Lex + parse failures
50/// return `None`.
51pub fn fingerprint_source(source: &str) -> Option<Fingerprint> {
52    let mut lexer = harn_lexer::Lexer::new(source);
53    let tokens = lexer.tokenize().ok()?;
54    let program = Parser::new(tokens).parse().ok()?;
55    Some(fingerprint_program(&program))
56}
57
58/// Hex-encode a fingerprint for human-readable output (NDJSON events,
59/// logs, etc.).
60pub fn fingerprint_hex(fp: &Fingerprint) -> String {
61    let mut out = String::with_capacity(fp.len() * 2);
62    for b in fp {
63        write!(&mut out, "{b:02x}").expect("write to String is infallible");
64    }
65    out
66}
67
68fn canonicalize_program(program: &[SNode]) -> String {
69    let mut lines: Vec<String> = program.iter().filter_map(canonicalize_top_level).collect();
70    lines.sort();
71    lines.join("\n")
72}
73
74fn canonicalize_top_level(snode: &SNode) -> Option<String> {
75    let (_attrs, inner) = peel_attributes(snode);
76    match &inner.node {
77        Node::FnDecl {
78            name,
79            type_params,
80            params,
81            return_type,
82            where_clauses,
83            is_pub,
84            is_stream,
85            ..
86        } => is_pub.then(|| {
87            format!(
88                "fn{stream}:{name}{generics}({params}){ret}{wheres}",
89                stream = if *is_stream { "*" } else { "" },
90                generics = format_type_params(type_params),
91                params = format_typed_params(params),
92                ret = format_return(return_type),
93                wheres = format_where_clauses(where_clauses),
94            )
95        }),
96        Node::Pipeline {
97            name,
98            params,
99            return_type,
100            is_pub,
101            extends,
102            ..
103        } => is_pub.then(|| {
104            format!(
105                "pipeline:{name}({params}){ret}{extends}",
106                params = params.join(","),
107                ret = format_return(return_type),
108                extends = extends
109                    .as_deref()
110                    .map(|e| format!(" extends {e}"))
111                    .unwrap_or_default(),
112            )
113        }),
114        Node::ToolDecl {
115            name,
116            params,
117            return_type,
118            is_pub,
119            ..
120        } => is_pub.then(|| {
121            format!(
122                "tool:{name}({params}){ret}",
123                params = format_typed_params(params),
124                ret = format_return(return_type),
125            )
126        }),
127        Node::SkillDecl { name, is_pub, .. } => {
128            // Skill bodies are configuration that downstream importers
129            // observe by reading the resulting registry dict, but a
130            // skill is identified by its name from a typing perspective.
131            // The conservative thing is to hash the name only and let
132            // body edits propagate via runtime registration rather than
133            // type-time invalidation.
134            is_pub.then(|| format!("skill:{name}"))
135        }
136        Node::StructDecl {
137            name,
138            type_params,
139            fields,
140            is_pub,
141        } => is_pub.then(|| {
142            format!(
143                "struct:{name}{generics}{{{fields}}}",
144                generics = format_type_params(type_params),
145                fields = format_struct_fields(fields),
146            )
147        }),
148        Node::EnumDecl {
149            name,
150            type_params,
151            variants,
152            is_pub,
153        } => is_pub.then(|| {
154            format!(
155                "enum:{name}{generics}{{{variants}}}",
156                generics = format_type_params(type_params),
157                variants = format_enum_variants(variants),
158            )
159        }),
160        Node::InterfaceDecl {
161            name,
162            type_params,
163            associated_types,
164            methods,
165        } => Some(format!(
166            "interface:{name}{generics}{{assoc=[{assoc}]methods=[{methods}]}}",
167            generics = format_type_params(type_params),
168            assoc = format_associated_types(associated_types),
169            methods = format_interface_methods(methods),
170        )),
171        Node::TypeDecl {
172            name,
173            type_params,
174            type_expr,
175            is_pub,
176        } => is_pub.then(|| {
177            format!(
178                "type:{name}{generics}={ty}",
179                generics = format_type_params(type_params),
180                ty = format_type_expr(type_expr),
181            )
182        }),
183        Node::ImportDecl { path, is_pub } => is_pub.then(|| format!("pub_import_wildcard:{path}")),
184        Node::SelectiveImport {
185            names,
186            path,
187            is_pub,
188        } => is_pub.then(|| {
189            let mut sorted = names.clone();
190            sorted.sort();
191            format!("pub_import_selective:{path}::{}", sorted.join(","))
192        }),
193        _ => None,
194    }
195}
196
197fn format_type_params(params: &[TypeParam]) -> String {
198    if params.is_empty() {
199        return String::new();
200    }
201    let parts: Vec<String> = params
202        .iter()
203        .map(|p| {
204            let var = match p.variance {
205                Variance::Invariant => "",
206                Variance::Covariant => "out ",
207                Variance::Contravariant => "in ",
208            };
209            format!("{var}{}", p.name)
210        })
211        .collect();
212    format!("<{}>", parts.join(","))
213}
214
215fn format_typed_params(params: &[TypedParam]) -> String {
216    params
217        .iter()
218        .map(|p| {
219            let mut s = String::new();
220            if p.rest {
221                s.push_str("...");
222            }
223            s.push_str(&p.name);
224            if let Some(ty) = &p.type_expr {
225                s.push(':');
226                s.push_str(&format_type_expr(ty));
227            }
228            // Default values reference expressions whose shape we don't
229            // walk into; presence-only is enough — adding a default to
230            // a public parameter changes the callable contract.
231            if p.default_value.is_some() {
232                s.push_str("=?");
233            }
234            s
235        })
236        .collect::<Vec<_>>()
237        .join(",")
238}
239
240fn format_return(ret: &Option<TypeExpr>) -> String {
241    match ret {
242        Some(ty) => format!("->{}", format_type_expr(ty)),
243        None => String::new(),
244    }
245}
246
247fn format_where_clauses(clauses: &[WhereClause]) -> String {
248    if clauses.is_empty() {
249        return String::new();
250    }
251    let mut parts: Vec<String> = clauses
252        .iter()
253        .map(|w| format!("{}:{}", w.type_name, w.bound))
254        .collect();
255    parts.sort();
256    format!(" where {}", parts.join(","))
257}
258
259fn format_struct_fields(fields: &[StructField]) -> String {
260    let mut rendered: Vec<String> = fields
261        .iter()
262        .map(|f| {
263            let opt = if f.optional { "?" } else { "" };
264            let ty = f
265                .type_expr
266                .as_ref()
267                .map(format_type_expr)
268                .unwrap_or_default();
269            format!("{}{opt}:{ty}", f.name)
270        })
271        .collect();
272    rendered.sort();
273    rendered.join(",")
274}
275
276fn format_enum_variants(variants: &[EnumVariant]) -> String {
277    let mut rendered: Vec<String> = variants
278        .iter()
279        .map(|v| format!("{}({})", v.name, format_typed_params(&v.fields)))
280        .collect();
281    rendered.sort();
282    rendered.join(",")
283}
284
285fn format_associated_types(items: &[(String, Option<TypeExpr>)]) -> String {
286    let mut rendered: Vec<String> = items
287        .iter()
288        .map(|(name, bound)| match bound {
289            Some(ty) => format!("{name}:{}", format_type_expr(ty)),
290            None => name.clone(),
291        })
292        .collect();
293    rendered.sort();
294    rendered.join(",")
295}
296
297fn format_interface_methods(methods: &[InterfaceMethod]) -> String {
298    let mut rendered: Vec<String> = methods
299        .iter()
300        .map(|m| {
301            format!(
302                "{}{}({}){}",
303                m.name,
304                format_type_params(&m.type_params),
305                format_typed_params(&m.params),
306                format_return(&m.return_type),
307            )
308        })
309        .collect();
310    rendered.sort();
311    rendered.join(",")
312}
313
314fn format_type_expr(ty: &TypeExpr) -> String {
315    match ty {
316        TypeExpr::Named(name) => name.clone(),
317        TypeExpr::Union(parts) => {
318            let mut rendered: Vec<String> = parts.iter().map(format_type_expr).collect();
319            rendered.sort();
320            format!("({})", rendered.join("|"))
321        }
322        TypeExpr::Intersection(parts) => {
323            let mut rendered: Vec<String> = parts.iter().map(format_type_expr).collect();
324            rendered.sort();
325            format!("({})", rendered.join("&"))
326        }
327        TypeExpr::Shape(fields) => format!("{{{}}}", format_shape_fields(fields)),
328        TypeExpr::OpenShape { fields, rests } => {
329            let tails: Vec<String> = rests
330                .iter()
331                .map(|r| format!("...{}", format_type_expr(r)))
332                .collect();
333            format!("{{{}|{}}}", format_shape_fields(fields), tails.join(","))
334        }
335        TypeExpr::List(inner) => format!("list<{}>", format_type_expr(inner)),
336        TypeExpr::DictType(k, v) => {
337            format!("dict<{},{}>", format_type_expr(k), format_type_expr(v))
338        }
339        TypeExpr::Iter(inner) => format!("iter<{}>", format_type_expr(inner)),
340        TypeExpr::Generator(inner) => format!("Generator<{}>", format_type_expr(inner)),
341        TypeExpr::Stream(inner) => format!("Stream<{}>", format_type_expr(inner)),
342        TypeExpr::Applied { name, args } => {
343            let rendered: Vec<String> = args.iter().map(format_type_expr).collect();
344            format!("{name}<{}>", rendered.join(","))
345        }
346        TypeExpr::FnType {
347            params,
348            return_type,
349        } => {
350            let rendered: Vec<String> = params.iter().map(format_type_expr).collect();
351            format!(
352                "fn({})->{}",
353                rendered.join(","),
354                format_type_expr(return_type)
355            )
356        }
357        TypeExpr::Never => "Never".to_string(),
358        TypeExpr::LitString(s) => format!("\"{s}\""),
359        TypeExpr::LitInt(n) => n.to_string(),
360        TypeExpr::Owned(inner) => format!("owned<{}>", format_type_expr(inner)),
361    }
362}
363
364fn format_shape_fields(fields: &[ShapeField]) -> String {
365    let mut rendered: Vec<String> = fields
366        .iter()
367        .map(|f| {
368            let opt = if f.optional { "?" } else { "" };
369            format!("{}{opt}:{}", f.name, format_type_expr(&f.type_expr))
370        })
371        .collect();
372    rendered.sort();
373    rendered.join(",")
374}
375
376#[cfg(test)]
377mod tests {
378    use super::*;
379
380    fn fp(source: &str) -> Fingerprint {
381        fingerprint_source(source).expect("source parses")
382    }
383
384    #[test]
385    fn private_body_change_does_not_flip_fingerprint() {
386        let before = fp("pub fn add(a: int, b: int) -> int { a + b }\n");
387        let after = fp("pub fn add(a: int, b: int) -> int { let s = a + b; s }\n");
388        assert_eq!(before, after);
389    }
390
391    #[test]
392    fn private_helper_does_not_flip_fingerprint() {
393        let before = fp("pub fn entry() { internal() }\nfn internal() { 1 }\n");
394        let after = fp("pub fn entry() { internal() }\nfn internal() { 2 }\nfn extra() { 3 }\n");
395        assert_eq!(before, after);
396    }
397
398    #[test]
399    fn reordering_public_decls_does_not_flip_fingerprint() {
400        let a = fp("pub fn alpha() {}\npub fn beta() {}\n");
401        let b = fp("pub fn beta() {}\npub fn alpha() {}\n");
402        assert_eq!(a, b);
403    }
404
405    #[test]
406    fn changing_public_signature_flips_fingerprint() {
407        let before = fp("pub fn add(a: int, b: int) -> int { a + b }\n");
408        let after = fp("pub fn add(a: int, b: int, c: int) -> int { a + b + c }\n");
409        assert_ne!(before, after);
410    }
411
412    #[test]
413    fn changing_public_return_type_flips_fingerprint() {
414        let before = fp("pub fn make() -> string { \"x\" }\n");
415        let after = fp("pub fn make() -> int { 1 }\n");
416        assert_ne!(before, after);
417    }
418
419    #[test]
420    fn adding_pub_struct_field_flips_fingerprint() {
421        let before = fp("pub struct Point { x: int, y: int }\n");
422        let after = fp("pub struct Point { x: int, y: int, z: int }\n");
423        assert_ne!(before, after);
424    }
425
426    #[test]
427    fn pub_re_export_change_flips_fingerprint() {
428        let before = fp("pub import { foo } from \"./a\"\n");
429        let after = fp("pub import { foo, bar } from \"./a\"\n");
430        assert_ne!(before, after);
431    }
432
433    #[test]
434    fn adding_pub_decl_flips_fingerprint() {
435        let before = fp("pub fn alpha() {}\n");
436        let after = fp("pub fn alpha() {}\npub fn beta() {}\n");
437        assert_ne!(before, after);
438    }
439
440    #[test]
441    fn changing_only_non_pub_imports_does_not_flip_fingerprint() {
442        let before = fp("import \"./a\"\npub fn entry() {}\n");
443        let after = fp("import \"./b\"\npub fn entry() {}\n");
444        // Private imports affect what _this_ module sees but not what
445        // downstreams see, so they're outside the public surface.
446        assert_eq!(before, after);
447    }
448
449    #[test]
450    fn hex_is_64_chars() {
451        let h = fingerprint_hex(&fp("pub fn x() {}\n"));
452        assert_eq!(h.len(), 64);
453        assert!(h.chars().all(|c| c.is_ascii_hexdigit()));
454    }
455}