Skip to main content

icl_core/
normalizer.rs

1//! Canonical normalizer — converts ICL to deterministic canonical form
2//!
3//! The normalizer transforms an ICL contract into its canonical representation.
4//! This is the single deterministic form used for hashing, comparison, and storage.
5//!
6//! # Pipeline
7//!
8//! `ICL text → parse → AST → normalize_ast → serialize_canonical → SHA-256`
9//!
10//! # Guarantees
11//!
12//! - **Idempotent**: `normalize(normalize(x)) == normalize(x)`
13//! - **Deterministic**: same input always produces same output
14//! - **Unique**: each distinct contract has one canonical form
15//! - **Semantic preserving**: no information loss
16
17use sha2::{Digest, Sha256};
18
19use crate::parser::ast::*;
20use crate::parser::tokenizer::Span;
21use crate::Result;
22
23// ── Public API ─────────────────────────────────────────────
24
25/// Normalize ICL text to canonical form
26///
27/// Pipeline: parse → normalize AST → serialize → compute hash
28///
29/// # Guarantees
30/// - Idempotent: `normalize(normalize(x)) == normalize(x)`
31/// - Deterministic: same input always produces same output
32/// - Semantic preserving: `parse(normalize(x))` preserves all meaning
33///
34/// # Errors
35/// Returns `ParseError` for invalid input or `NormalizationError`
36/// if the contract cannot be canonicalized.
37pub fn normalize(icl: &str) -> Result<String> {
38    let ast = crate::parser::parse(icl)?;
39    let normalized = normalize_ast(ast);
40    let canonical = serialize_canonical(&normalized);
41    Ok(canonical)
42}
43
44/// Normalize a parsed AST to canonical form (sorted, expanded, hashed)
45///
46/// Steps per CORE-SPECIFICATION.md §6.1:
47/// 1. Sort state fields alphabetically
48/// 2. Sort operation parameters alphabetically
49/// 3. Sort operations by name
50/// 4. Sort string lists alphabetically
51/// 5. Expand defaults (already in AST)
52/// 6. Compute SHA-256 semantic hash
53pub fn normalize_ast(mut ast: ContractNode) -> ContractNode {
54    // ── Step 1: Sort state fields ──────────────────────
55    ast.data_semantics
56        .state
57        .sort_by(|a, b| a.name.value.cmp(&b.name.value));
58
59    // Sort Object type fields recursively
60    for field in &mut ast.data_semantics.state {
61        normalize_type_fields(&mut field.type_expr);
62    }
63
64    // ── Step 2: Sort invariants ────────────────────────
65    ast.data_semantics
66        .invariants
67        .sort_by(|a, b| a.value.cmp(&b.value));
68
69    // ── Step 3: Sort operations by name ────────────────
70    ast.behavioral_semantics
71        .operations
72        .sort_by(|a, b| a.name.value.cmp(&b.name.value));
73
74    // ── Step 4: Sort operation internals ───────────────
75    for op in &mut ast.behavioral_semantics.operations {
76        op.parameters
77            .sort_by(|a, b| a.name.value.cmp(&b.name.value));
78        for param in &mut op.parameters {
79            normalize_type_fields(&mut param.type_expr);
80        }
81        op.side_effects.sort_by(|a, b| a.value.cmp(&b.value));
82    }
83
84    // ── Step 5: Sort string lists ──────────────────────
85    ast.execution_constraints
86        .trigger_types
87        .sort_by(|a, b| a.value.cmp(&b.value));
88    ast.execution_constraints
89        .external_permissions
90        .sort_by(|a, b| a.value.cmp(&b.value));
91    ast.human_machine_contract
92        .system_commitments
93        .sort_by(|a, b| a.value.cmp(&b.value));
94    ast.human_machine_contract
95        .system_refusals
96        .sort_by(|a, b| a.value.cmp(&b.value));
97    ast.human_machine_contract
98        .user_obligations
99        .sort_by(|a, b| a.value.cmp(&b.value));
100
101    // ── Step 6: Sort extensions ────────────────────────
102    if let Some(ref mut ext) = ast.extensions {
103        ext.systems.sort_by(|a, b| a.name.value.cmp(&b.name.value));
104        for sys in &mut ext.systems {
105            sys.fields.sort_by(|a, b| a.name.value.cmp(&b.name.value));
106        }
107    }
108
109    // ── Step 7: Compute semantic hash ──────────────────
110    // Hash is computed over the canonical form *excluding* the semantic_hash field
111    let hash = compute_semantic_hash(&ast);
112    ast.identity.semantic_hash = SpannedValue::new(hash, dummy_span());
113
114    ast
115}
116
117/// Normalize a parsed Contract struct to canonical form
118pub fn normalize_contract(contract: &crate::Contract) -> Result<crate::Contract> {
119    // Round-trip: Contract → serialize to ICL text → parse → normalize → lower
120    // This ensures we use the canonical pipeline
121    let text = serialize_contract_to_icl(contract);
122    let normalized_text = normalize(&text)?;
123    crate::parser::parse_contract(&normalized_text)
124}
125
126// ── Canonical Serializer ───────────────────────────────────
127
128/// Serialize a ContractNode AST to canonical ICL text
129///
130/// Produces deterministic output with:
131/// - Fixed section order (Identity, PurposeStatement, etc.)
132/// - 2-space indentation
133/// - One field per line
134/// - No comments
135/// - Consistent formatting
136pub fn serialize_canonical(ast: &ContractNode) -> String {
137    let mut out = String::new();
138
139    out.push_str("Contract {\n");
140    serialize_identity(&mut out, &ast.identity);
141    serialize_purpose_statement(&mut out, &ast.purpose_statement);
142    serialize_data_semantics(&mut out, &ast.data_semantics);
143    serialize_behavioral_semantics(&mut out, &ast.behavioral_semantics);
144    serialize_execution_constraints(&mut out, &ast.execution_constraints);
145    serialize_human_machine_contract(&mut out, &ast.human_machine_contract);
146    out.push_str("}\n");
147
148    if let Some(ref ext) = ast.extensions {
149        out.push('\n');
150        serialize_extensions(&mut out, ext);
151    }
152
153    out
154}
155
156// ── Section serializers ────────────────────────────────────
157
158fn serialize_identity(out: &mut String, id: &IdentityNode) {
159    out.push_str("  Identity {\n");
160    write_field_str(out, 4, "created_timestamp", &id.created_timestamp.value);
161    write_field_str(out, 4, "owner", &id.owner.value);
162    write_field_str(out, 4, "semantic_hash", &id.semantic_hash.value);
163    write_field_str(out, 4, "stable_id", &id.stable_id.value);
164    write_field_int(out, 4, "version", id.version.value);
165    out.push_str("  }\n");
166}
167
168fn serialize_purpose_statement(out: &mut String, ps: &PurposeStatementNode) {
169    out.push_str("  PurposeStatement {\n");
170    write_field_float(out, 4, "confidence_level", ps.confidence_level.value);
171    write_field_str(out, 4, "intent_source", &ps.intent_source.value);
172    write_field_str(out, 4, "narrative", &ps.narrative.value);
173    out.push_str("  }\n");
174}
175
176fn serialize_data_semantics(out: &mut String, ds: &DataSemanticsNode) {
177    out.push_str("  DataSemantics {\n");
178    write_indent(out, 4);
179    out.push_str("invariants: ");
180    serialize_string_list(out, &ds.invariants);
181    out.push_str(",\n");
182    write_indent(out, 4);
183    out.push_str("state: {\n");
184    for field in &ds.state {
185        serialize_state_field(out, field, 6);
186    }
187    write_indent(out, 4);
188    out.push_str("}\n");
189    out.push_str("  }\n");
190}
191
192fn serialize_behavioral_semantics(out: &mut String, bs: &BehavioralSemanticsNode) {
193    out.push_str("  BehavioralSemantics {\n");
194    write_indent(out, 4);
195    out.push_str("operations: [\n");
196    for (i, op) in bs.operations.iter().enumerate() {
197        serialize_operation(out, op, 6);
198        if i < bs.operations.len() - 1 {
199            // Comma between operations handled by the comma after }
200        }
201    }
202    write_indent(out, 4);
203    out.push_str("]\n");
204    out.push_str("  }\n");
205}
206
207fn serialize_operation(out: &mut String, op: &OperationNode, indent: usize) {
208    write_indent(out, indent);
209    out.push_str("{\n");
210    write_field_str(out, indent + 2, "idempotence", &op.idempotence.value);
211    write_field_str(out, indent + 2, "name", &op.name.value);
212    // parameters
213    write_indent(out, indent + 2);
214    out.push_str("parameters: {\n");
215    for param in &op.parameters {
216        serialize_state_field(out, param, indent + 4);
217    }
218    write_indent(out, indent + 2);
219    out.push_str("},\n");
220    write_field_str(out, indent + 2, "postcondition", &op.postcondition.value);
221    write_field_str(out, indent + 2, "precondition", &op.precondition.value);
222    write_indent(out, indent + 2);
223    out.push_str("side_effects: ");
224    serialize_string_list(out, &op.side_effects);
225    out.push('\n');
226    write_indent(out, indent);
227    out.push_str("}\n");
228}
229
230fn serialize_execution_constraints(out: &mut String, ec: &ExecutionConstraintsNode) {
231    out.push_str("  ExecutionConstraints {\n");
232    write_indent(out, 4);
233    out.push_str("external_permissions: ");
234    serialize_string_list(out, &ec.external_permissions);
235    out.push_str(",\n");
236
237    // resource_limits
238    write_indent(out, 4);
239    out.push_str("resource_limits: {\n");
240    write_field_int(
241        out,
242        6,
243        "computation_timeout_ms",
244        ec.resource_limits.computation_timeout_ms.value,
245    );
246    write_field_int(
247        out,
248        6,
249        "max_memory_bytes",
250        ec.resource_limits.max_memory_bytes.value,
251    );
252    write_field_int(
253        out,
254        6,
255        "max_state_size_bytes",
256        ec.resource_limits.max_state_size_bytes.value,
257    );
258    write_indent(out, 4);
259    out.push_str("},\n");
260
261    write_field_str(out, 4, "sandbox_mode", &ec.sandbox_mode.value);
262    write_indent(out, 4);
263    out.push_str("trigger_types: ");
264    serialize_string_list(out, &ec.trigger_types);
265    out.push('\n');
266    out.push_str("  }\n");
267}
268
269fn serialize_human_machine_contract(out: &mut String, hmc: &HumanMachineContractNode) {
270    out.push_str("  HumanMachineContract {\n");
271    write_indent(out, 4);
272    out.push_str("system_commitments: ");
273    serialize_string_list(out, &hmc.system_commitments);
274    out.push_str(",\n");
275    write_indent(out, 4);
276    out.push_str("system_refusals: ");
277    serialize_string_list(out, &hmc.system_refusals);
278    out.push_str(",\n");
279    write_indent(out, 4);
280    out.push_str("user_obligations: ");
281    serialize_string_list(out, &hmc.user_obligations);
282    out.push('\n');
283    out.push_str("  }\n");
284}
285
286fn serialize_extensions(out: &mut String, ext: &ExtensionsNode) {
287    out.push_str("Extensions {\n");
288    for sys in &ext.systems {
289        write_indent(out, 2);
290        out.push_str(&sys.name.value);
291        out.push_str(" {\n");
292        for field in &sys.fields {
293            write_indent(out, 4);
294            out.push_str(&field.name.value);
295            out.push_str(": ");
296            serialize_literal_value(out, &field.value);
297            out.push('\n');
298        }
299        write_indent(out, 2);
300        out.push_str("}\n");
301    }
302    out.push_str("}\n");
303}
304
305// ── Field serializers ──────────────────────────────────────
306
307fn serialize_state_field(out: &mut String, field: &StateFieldNode, indent: usize) {
308    write_indent(out, indent);
309    out.push_str(&field.name.value);
310    out.push_str(": ");
311    serialize_type_expression(out, &field.type_expr);
312    if let Some(ref default) = field.default_value {
313        out.push_str(" = ");
314        serialize_literal_value(out, default);
315    }
316    out.push_str(",\n");
317}
318
319fn serialize_type_expression(out: &mut String, ty: &TypeExpression) {
320    match ty {
321        TypeExpression::Primitive(p, _) => out.push_str(&p.to_string()),
322        TypeExpression::Array(inner, _) => {
323            out.push_str("Array<");
324            serialize_type_expression(out, inner);
325            out.push('>');
326        }
327        TypeExpression::Map(k, v, _) => {
328            out.push_str("Map<");
329            serialize_type_expression(out, k);
330            out.push_str(", ");
331            serialize_type_expression(out, v);
332            out.push('>');
333        }
334        TypeExpression::Object(fields, _) => {
335            out.push_str("Object {\n");
336            // Fields already sorted during normalization
337            for f in fields {
338                // Increase indent by context — we use a fixed deeper indent
339                out.push_str("        "); // 8 spaces for nested object fields
340                out.push_str(&f.name.value);
341                out.push_str(": ");
342                serialize_type_expression(out, &f.type_expr);
343                if let Some(ref def) = f.default_value {
344                    out.push_str(" = ");
345                    serialize_literal_value(out, def);
346                }
347                out.push_str(",\n");
348            }
349            out.push_str("      }"); // Close at parent indent
350        }
351        TypeExpression::Enum(variants, _) => {
352            out.push_str("Enum[");
353            for (i, v) in variants.iter().enumerate() {
354                if i > 0 {
355                    out.push_str(", ");
356                }
357                out.push('"');
358                out.push_str(&v.value);
359                out.push('"');
360            }
361            out.push(']');
362        }
363    }
364}
365
366fn serialize_literal_value(out: &mut String, val: &LiteralValue) {
367    match val {
368        LiteralValue::String(s, _) => {
369            out.push('"');
370            out.push_str(s);
371            out.push('"');
372        }
373        LiteralValue::Integer(n, _) => out.push_str(&n.to_string()),
374        LiteralValue::Float(f, _) => {
375            // Ensure we always have a decimal point
376            let s = format!("{}", f);
377            if s.contains('.') {
378                out.push_str(&s);
379            } else {
380                out.push_str(&format!("{}.0", f));
381            }
382        }
383        LiteralValue::Boolean(b, _) => out.push_str(if *b { "true" } else { "false" }),
384        LiteralValue::Array(items, _) => {
385            out.push('[');
386            for (i, item) in items.iter().enumerate() {
387                if i > 0 {
388                    out.push_str(", ");
389                }
390                serialize_literal_value(out, item);
391            }
392            out.push(']');
393        }
394    }
395}
396
397fn serialize_string_list(out: &mut String, items: &[SpannedValue<String>]) {
398    out.push('[');
399    for (i, item) in items.iter().enumerate() {
400        if i > 0 {
401            out.push_str(", ");
402        }
403        out.push('"');
404        out.push_str(&item.value);
405        out.push('"');
406    }
407    out.push(']');
408}
409
410// ── Helpers ────────────────────────────────────────────────
411
412fn write_indent(out: &mut String, n: usize) {
413    for _ in 0..n {
414        out.push(' ');
415    }
416}
417
418fn write_field_str(out: &mut String, indent: usize, name: &str, value: &str) {
419    write_indent(out, indent);
420    out.push_str(name);
421    out.push_str(": \"");
422    out.push_str(value);
423    out.push_str("\",\n");
424}
425
426fn write_field_int(out: &mut String, indent: usize, name: &str, value: i64) {
427    write_indent(out, indent);
428    out.push_str(name);
429    out.push_str(": ");
430    out.push_str(&value.to_string());
431    out.push_str(",\n");
432}
433
434fn write_field_float(out: &mut String, indent: usize, name: &str, value: f64) {
435    write_indent(out, indent);
436    out.push_str(name);
437    out.push_str(": ");
438    let s = format!("{}", value);
439    if s.contains('.') {
440        out.push_str(&s);
441    } else {
442        out.push_str(&format!("{}.0", value));
443    }
444    out.push_str(",\n");
445}
446
447fn normalize_type_fields(ty: &mut TypeExpression) {
448    match ty {
449        TypeExpression::Object(fields, _) => {
450            fields.sort_by(|a, b| a.name.value.cmp(&b.name.value));
451            for f in fields.iter_mut() {
452                normalize_type_fields(&mut f.type_expr);
453            }
454        }
455        TypeExpression::Array(inner, _) => normalize_type_fields(inner),
456        TypeExpression::Map(k, v, _) => {
457            normalize_type_fields(k);
458            normalize_type_fields(v);
459        }
460        TypeExpression::Enum(variants, _) => {
461            // Sort enum variants alphabetically for canonical form
462            variants.sort_by(|a, b| a.value.cmp(&b.value));
463        }
464        TypeExpression::Primitive(_, _) => {}
465    }
466}
467
468fn dummy_span() -> Span {
469    Span {
470        line: 0,
471        column: 0,
472        offset: 0,
473    }
474}
475
476// ── SHA-256 Hash Computation ──────────────────────────────
477
478/// Compute SHA-256 semantic hash of a normalized AST
479///
480/// The hash is computed over the canonical serialization
481/// with the semantic_hash field set to a placeholder value.
482/// This ensures the hash doesn't include itself.
483pub fn compute_semantic_hash(ast: &ContractNode) -> String {
484    // Clone AST with a placeholder hash
485    let mut hashable = ast.clone();
486    hashable.identity.semantic_hash = SpannedValue::new(
487        "0000000000000000000000000000000000000000000000000000000000000000".to_string(),
488        dummy_span(),
489    );
490
491    let canonical = serialize_canonical(&hashable);
492    let mut hasher = Sha256::new();
493    hasher.update(canonical.as_bytes());
494    let result = hasher.finalize();
495    format!("{:x}", result)
496}
497
498// ── Contract ↔ ICL text helpers ────────────────────────────
499
500/// Serialize a Contract struct to ICL text (for round-tripping through normalizer)
501fn serialize_contract_to_icl(contract: &crate::Contract) -> String {
502    let mut out = String::new();
503    out.push_str("Contract {\n");
504
505    // Identity
506    out.push_str("  Identity {\n");
507    write_field_str(&mut out, 4, "stable_id", &contract.identity.stable_id);
508    write_field_int(&mut out, 4, "version", contract.identity.version as i64);
509    write_field_str(
510        &mut out,
511        4,
512        "created_timestamp",
513        &contract.identity.created_timestamp,
514    );
515    write_field_str(&mut out, 4, "owner", &contract.identity.owner);
516    write_field_str(
517        &mut out,
518        4,
519        "semantic_hash",
520        &contract.identity.semantic_hash,
521    );
522    out.push_str("  }\n");
523
524    // PurposeStatement
525    out.push_str("  PurposeStatement {\n");
526    write_field_str(
527        &mut out,
528        4,
529        "narrative",
530        &contract.purpose_statement.narrative,
531    );
532    write_field_str(
533        &mut out,
534        4,
535        "intent_source",
536        &contract.purpose_statement.intent_source,
537    );
538    write_field_float(
539        &mut out,
540        4,
541        "confidence_level",
542        contract.purpose_statement.confidence_level,
543    );
544    out.push_str("  }\n");
545
546    // DataSemantics — state as empty since Contract uses serde_json::Value
547    out.push_str("  DataSemantics {\n");
548    out.push_str("    state: {},\n");
549    write_indent(&mut out, 4);
550    out.push_str("invariants: [");
551    for (i, inv) in contract.data_semantics.invariants.iter().enumerate() {
552        if i > 0 {
553            out.push_str(", ");
554        }
555        out.push('"');
556        out.push_str(inv);
557        out.push('"');
558    }
559    out.push_str("]\n");
560    out.push_str("  }\n");
561
562    // BehavioralSemantics
563    out.push_str("  BehavioralSemantics {\n");
564    out.push_str("    operations: [\n");
565    for op in &contract.behavioral_semantics.operations {
566        out.push_str("      {\n");
567        write_field_str(&mut out, 8, "name", &op.name);
568        write_field_str(&mut out, 8, "precondition", &op.precondition);
569        out.push_str("        parameters: {},\n");
570        write_field_str(&mut out, 8, "postcondition", &op.postcondition);
571        write_indent(&mut out, 8);
572        out.push_str("side_effects: [");
573        for (i, se) in op.side_effects.iter().enumerate() {
574            if i > 0 {
575                out.push_str(", ");
576            }
577            out.push('"');
578            out.push_str(se);
579            out.push('"');
580        }
581        out.push_str("],\n");
582        write_field_str(&mut out, 8, "idempotence", &op.idempotence);
583        out.push_str("      }\n");
584    }
585    out.push_str("    ]\n");
586    out.push_str("  }\n");
587
588    // ExecutionConstraints
589    out.push_str("  ExecutionConstraints {\n");
590    write_indent(&mut out, 4);
591    out.push_str("trigger_types: [");
592    for (i, t) in contract
593        .execution_constraints
594        .trigger_types
595        .iter()
596        .enumerate()
597    {
598        if i > 0 {
599            out.push_str(", ");
600        }
601        out.push('"');
602        out.push_str(t);
603        out.push('"');
604    }
605    out.push_str("],\n");
606    out.push_str("    resource_limits: {\n");
607    write_field_int(
608        &mut out,
609        6,
610        "max_memory_bytes",
611        contract
612            .execution_constraints
613            .resource_limits
614            .max_memory_bytes as i64,
615    );
616    write_field_int(
617        &mut out,
618        6,
619        "computation_timeout_ms",
620        contract
621            .execution_constraints
622            .resource_limits
623            .computation_timeout_ms as i64,
624    );
625    write_field_int(
626        &mut out,
627        6,
628        "max_state_size_bytes",
629        contract
630            .execution_constraints
631            .resource_limits
632            .max_state_size_bytes as i64,
633    );
634    out.push_str("    },\n");
635    write_indent(&mut out, 4);
636    out.push_str("external_permissions: [");
637    for (i, p) in contract
638        .execution_constraints
639        .external_permissions
640        .iter()
641        .enumerate()
642    {
643        if i > 0 {
644            out.push_str(", ");
645        }
646        out.push('"');
647        out.push_str(p);
648        out.push('"');
649    }
650    out.push_str("],\n");
651    write_field_str(
652        &mut out,
653        4,
654        "sandbox_mode",
655        &contract.execution_constraints.sandbox_mode,
656    );
657    out.push_str("  }\n");
658
659    // HumanMachineContract
660    out.push_str("  HumanMachineContract {\n");
661    write_string_list(
662        &mut out,
663        4,
664        "system_commitments",
665        &contract.human_machine_contract.system_commitments,
666    );
667    write_string_list(
668        &mut out,
669        4,
670        "system_refusals",
671        &contract.human_machine_contract.system_refusals,
672    );
673    write_string_list(
674        &mut out,
675        4,
676        "user_obligations",
677        &contract.human_machine_contract.user_obligations,
678    );
679    out.push_str("  }\n");
680
681    out.push_str("}\n");
682    out
683}
684
685fn write_string_list(out: &mut String, indent: usize, name: &str, items: &[String]) {
686    write_indent(out, indent);
687    out.push_str(name);
688    out.push_str(": [");
689    for (i, item) in items.iter().enumerate() {
690        if i > 0 {
691            out.push_str(", ");
692        }
693        out.push('"');
694        out.push_str(item);
695        out.push('"');
696    }
697    out.push_str("],\n");
698}
699
700#[cfg(test)]
701mod tests {
702    use super::*;
703    use std::fs;
704    use std::path::Path;
705
706    const MINIMAL_CONTRACT: &str = r#"Contract {
707  Identity {
708    stable_id: "ic-test-001",
709    version: 1,
710    created_timestamp: 2026-02-01T00:00:00Z,
711    owner: "test",
712    semantic_hash: "0000000000000000"
713  }
714
715  PurposeStatement {
716    narrative: "Minimal test contract",
717    intent_source: "test",
718    confidence_level: 1.0
719  }
720
721  DataSemantics {
722    state: {
723      value: String
724    },
725    invariants: []
726  }
727
728  BehavioralSemantics {
729    operations: []
730  }
731
732  ExecutionConstraints {
733    trigger_types: ["manual"],
734    resource_limits: {
735      max_memory_bytes: 1048576,
736      computation_timeout_ms: 100,
737      max_state_size_bytes: 1048576
738    },
739    external_permissions: [],
740    sandbox_mode: "full_isolation"
741  }
742
743  HumanMachineContract {
744    system_commitments: [],
745    system_refusals: [],
746    user_obligations: []
747  }
748}"#;
749
750    fn read_fixture(path: &str) -> String {
751        let full = Path::new(env!("CARGO_MANIFEST_DIR"))
752            .join("../../tests/fixtures")
753            .join(path);
754        fs::read_to_string(&full)
755            .unwrap_or_else(|e| panic!("Failed to read {}: {}", full.display(), e))
756    }
757
758    // ── Basic normalization ────────────────────────────
759
760    #[test]
761    fn test_normalize_minimal_contract() {
762        let result = normalize(MINIMAL_CONTRACT).unwrap();
763        assert!(result.contains("Contract {"));
764        assert!(result.contains("Identity {"));
765        assert!(result.contains("semantic_hash:"));
766    }
767
768    #[test]
769    fn test_normalize_produces_valid_icl() {
770        // Normalized output must parse successfully
771        let normalized = normalize(MINIMAL_CONTRACT).unwrap();
772        let ast = crate::parser::parse(&normalized);
773        assert!(
774            ast.is_ok(),
775            "Normalized output doesn't parse: {:?}",
776            ast.err()
777        );
778    }
779
780    // ── Sorting ────────────────────────────────────────
781
782    #[test]
783    fn test_normalize_sorts_state_fields() {
784        // State fields: z_field, a_field — should be sorted to a_field, z_field
785        let input = r#"Contract {
786  Identity {
787    stable_id: "ic-sort-001",
788    version: 1,
789    created_timestamp: 2026-02-01T00:00:00Z,
790    owner: "test",
791    semantic_hash: "0000000000000000"
792  }
793  PurposeStatement {
794    narrative: "Sort test",
795    intent_source: "test",
796    confidence_level: 0.5
797  }
798  DataSemantics {
799    state: {
800      z_field: String,
801      a_field: Integer
802    },
803    invariants: []
804  }
805  BehavioralSemantics {
806    operations: []
807  }
808  ExecutionConstraints {
809    trigger_types: ["manual"],
810    resource_limits: {
811      max_memory_bytes: 1048576,
812      computation_timeout_ms: 100,
813      max_state_size_bytes: 1048576
814    },
815    external_permissions: [],
816    sandbox_mode: "full_isolation"
817  }
818  HumanMachineContract {
819    system_commitments: [],
820    system_refusals: [],
821    user_obligations: []
822  }
823}"#;
824        let normalized = normalize(input).unwrap();
825        let a_pos = normalized.find("a_field").unwrap();
826        let z_pos = normalized.find("z_field").unwrap();
827        assert!(
828            a_pos < z_pos,
829            "a_field should come before z_field in normalized output"
830        );
831    }
832
833    #[test]
834    fn test_normalize_sorts_operations_by_name() {
835        let input = r#"Contract {
836  Identity {
837    stable_id: "ic-sort-ops-001",
838    version: 1,
839    created_timestamp: 2026-02-01T00:00:00Z,
840    owner: "test",
841    semantic_hash: "0000000000000000"
842  }
843  PurposeStatement {
844    narrative: "Sort ops test",
845    intent_source: "test",
846    confidence_level: 0.5
847  }
848  DataSemantics {
849    state: {},
850    invariants: []
851  }
852  BehavioralSemantics {
853    operations: [
854      {
855        name: "z_operation",
856        precondition: "none",
857        parameters: {},
858        postcondition: "done",
859        side_effects: [],
860        idempotence: "idempotent"
861      },
862      {
863        name: "a_operation",
864        precondition: "none",
865        parameters: {},
866        postcondition: "done",
867        side_effects: [],
868        idempotence: "idempotent"
869      }
870    ]
871  }
872  ExecutionConstraints {
873    trigger_types: ["manual"],
874    resource_limits: {
875      max_memory_bytes: 1048576,
876      computation_timeout_ms: 100,
877      max_state_size_bytes: 1048576
878    },
879    external_permissions: [],
880    sandbox_mode: "full_isolation"
881  }
882  HumanMachineContract {
883    system_commitments: [],
884    system_refusals: [],
885    user_obligations: []
886  }
887}"#;
888        let normalized = normalize(input).unwrap();
889        let a_pos = normalized.find("a_operation").unwrap();
890        let z_pos = normalized.find("z_operation").unwrap();
891        assert!(a_pos < z_pos, "a_operation should come before z_operation");
892    }
893
894    #[test]
895    fn test_normalize_sorts_string_lists() {
896        let input = r#"Contract {
897  Identity {
898    stable_id: "ic-sort-lists-001",
899    version: 1,
900    created_timestamp: 2026-02-01T00:00:00Z,
901    owner: "test",
902    semantic_hash: "0000000000000000"
903  }
904  PurposeStatement {
905    narrative: "Sort lists test",
906    intent_source: "test",
907    confidence_level: 0.5
908  }
909  DataSemantics {
910    state: {},
911    invariants: ["z_invariant", "a_invariant"]
912  }
913  BehavioralSemantics {
914    operations: []
915  }
916  ExecutionConstraints {
917    trigger_types: ["z_trigger", "a_trigger"],
918    resource_limits: {
919      max_memory_bytes: 1048576,
920      computation_timeout_ms: 100,
921      max_state_size_bytes: 1048576
922    },
923    external_permissions: [],
924    sandbox_mode: "full_isolation"
925  }
926  HumanMachineContract {
927    system_commitments: ["z_commit", "a_commit"],
928    system_refusals: [],
929    user_obligations: []
930  }
931}"#;
932        let normalized = normalize(input).unwrap();
933
934        // Check invariants sorted
935        let a_inv = normalized.find("a_invariant").unwrap();
936        let z_inv = normalized.find("z_invariant").unwrap();
937        assert!(a_inv < z_inv, "Invariants should be sorted");
938
939        // Check trigger_types sorted
940        let a_trig = normalized.find("a_trigger").unwrap();
941        let z_trig = normalized.find("z_trigger").unwrap();
942        assert!(a_trig < z_trig, "Trigger types should be sorted");
943
944        // Check commitments sorted
945        let a_com = normalized.find("a_commit").unwrap();
946        let z_com = normalized.find("z_commit").unwrap();
947        assert!(a_com < z_com, "Commitments should be sorted");
948    }
949
950    // ── Canonical field order in Identity ──────────────
951
952    #[test]
953    fn test_normalize_identity_fields_sorted() {
954        let normalized = normalize(MINIMAL_CONTRACT).unwrap();
955        // In canonical form, Identity fields should be alphabetical:
956        // created_timestamp, owner, semantic_hash, stable_id, version
957        let ct = normalized.find("created_timestamp").unwrap();
958        let ow = normalized.find("owner").unwrap();
959        let sh = normalized.find("semantic_hash").unwrap();
960        let si = normalized.find("stable_id").unwrap();
961        let ver = normalized.find("version").unwrap();
962        assert!(ct < ow, "created_timestamp before owner");
963        assert!(ow < sh, "owner before semantic_hash");
964        assert!(sh < si, "semantic_hash before stable_id");
965        assert!(si < ver, "stable_id before version");
966    }
967
968    // ── Comment removal ────────────────────────────────
969
970    #[test]
971    fn test_normalize_removes_comments() {
972        let input = format!("// This is a comment\n{}", MINIMAL_CONTRACT);
973        let normalized = normalize(&input).unwrap();
974        assert!(!normalized.contains("// This is a comment"));
975    }
976
977    // ── SHA-256 hash ───────────────────────────────────
978
979    #[test]
980    fn test_normalize_computes_sha256_hash() {
981        let normalized = normalize(MINIMAL_CONTRACT).unwrap();
982        let ast = crate::parser::parse(&normalized).unwrap();
983        let hash = &ast.identity.semantic_hash.value;
984
985        // SHA-256 hex is 64 chars
986        assert_eq!(hash.len(), 64, "Hash should be 64 hex chars, got: {}", hash);
987        assert!(
988            hash.chars().all(|c| c.is_ascii_hexdigit()),
989            "Hash should be hex, got: {}",
990            hash
991        );
992    }
993
994    #[test]
995    fn test_normalize_hash_is_deterministic() {
996        let hash1 = {
997            let n = normalize(MINIMAL_CONTRACT).unwrap();
998            let ast = crate::parser::parse(&n).unwrap();
999            ast.identity.semantic_hash.value
1000        };
1001        let hash2 = {
1002            let n = normalize(MINIMAL_CONTRACT).unwrap();
1003            let ast = crate::parser::parse(&n).unwrap();
1004            ast.identity.semantic_hash.value
1005        };
1006        assert_eq!(hash1, hash2, "Hash should be deterministic");
1007    }
1008
1009    #[test]
1010    fn test_different_contracts_different_hashes() {
1011        let contract_a = MINIMAL_CONTRACT;
1012        let contract_b = MINIMAL_CONTRACT.replace("ic-test-001", "ic-test-002");
1013
1014        let hash_a = {
1015            let n = normalize(contract_a).unwrap();
1016            let ast = crate::parser::parse(&n).unwrap();
1017            ast.identity.semantic_hash.value
1018        };
1019        let hash_b = {
1020            let n = normalize(&contract_b).unwrap();
1021            let ast = crate::parser::parse(&n).unwrap();
1022            ast.identity.semantic_hash.value
1023        };
1024        assert_ne!(
1025            hash_a, hash_b,
1026            "Different contracts should have different hashes"
1027        );
1028    }
1029
1030    // ── Idempotence proof ──────────────────────────────
1031
1032    #[test]
1033    fn test_idempotence() {
1034        let once = normalize(MINIMAL_CONTRACT).unwrap();
1035        let twice = normalize(&once).unwrap();
1036        assert_eq!(
1037            once, twice,
1038            "normalize(normalize(x)) must equal normalize(x)"
1039        );
1040    }
1041
1042    #[test]
1043    fn test_idempotence_complex_contract() {
1044        let input = read_fixture("conformance/valid/all-primitive-types.icl");
1045        let once = normalize(&input).unwrap();
1046        let twice = normalize(&once).unwrap();
1047        assert_eq!(once, twice, "Idempotence failure on complex contract");
1048    }
1049
1050    #[test]
1051    fn test_idempotence_with_operations() {
1052        let input = read_fixture("conformance/valid/multiple-operations.icl");
1053        let once = normalize(&input).unwrap();
1054        let twice = normalize(&once).unwrap();
1055        assert_eq!(
1056            once, twice,
1057            "Idempotence failure on contract with operations"
1058        );
1059    }
1060
1061    #[test]
1062    fn test_idempotence_with_extensions() {
1063        let input = read_fixture("conformance/valid/with-extensions.icl");
1064        let once = normalize(&input).unwrap();
1065        let twice = normalize(&once).unwrap();
1066        assert_eq!(
1067            once, twice,
1068            "Idempotence failure on contract with extensions"
1069        );
1070    }
1071
1072    // ── Determinism proof (100 iterations) ─────────────
1073
1074    #[test]
1075    fn test_determinism_100_iterations() {
1076        let first = normalize(MINIMAL_CONTRACT).unwrap();
1077
1078        for i in 0..100 {
1079            let result = normalize(MINIMAL_CONTRACT).unwrap();
1080            assert_eq!(first, result, "Determinism failure at iteration {}", i);
1081        }
1082    }
1083
1084    #[test]
1085    fn test_determinism_100_iterations_complex() {
1086        let input = read_fixture("conformance/valid/all-primitive-types.icl");
1087        let first = normalize(&input).unwrap();
1088
1089        for i in 0..100 {
1090            let result = normalize(&input).unwrap();
1091            assert_eq!(first, result, "Determinism failure at iteration {}", i);
1092        }
1093    }
1094
1095    // ── Semantic preservation ──────────────────────────
1096
1097    #[test]
1098    fn test_semantic_preservation() {
1099        // parse(normalize(x)) must preserve all semantic content of parse(x)
1100        let original = crate::parser::parse(MINIMAL_CONTRACT).unwrap();
1101        let normalized_text = normalize(MINIMAL_CONTRACT).unwrap();
1102        let normalized = crate::parser::parse(&normalized_text).unwrap();
1103
1104        // Identity content preserved
1105        assert_eq!(
1106            original.identity.stable_id.value,
1107            normalized.identity.stable_id.value
1108        );
1109        assert_eq!(
1110            original.identity.version.value,
1111            normalized.identity.version.value
1112        );
1113        assert_eq!(
1114            original.identity.owner.value,
1115            normalized.identity.owner.value
1116        );
1117
1118        // PurposeStatement preserved
1119        assert_eq!(
1120            original.purpose_statement.narrative.value,
1121            normalized.purpose_statement.narrative.value
1122        );
1123        assert_eq!(
1124            original.purpose_statement.confidence_level.value,
1125            normalized.purpose_statement.confidence_level.value
1126        );
1127
1128        // State fields preserved (count)
1129        assert_eq!(
1130            original.data_semantics.state.len(),
1131            normalized.data_semantics.state.len()
1132        );
1133
1134        // Operations preserved
1135        assert_eq!(
1136            original.behavioral_semantics.operations.len(),
1137            normalized.behavioral_semantics.operations.len()
1138        );
1139
1140        // ExecutionConstraints preserved
1141        assert_eq!(
1142            original.execution_constraints.sandbox_mode.value,
1143            normalized.execution_constraints.sandbox_mode.value
1144        );
1145    }
1146
1147    #[test]
1148    fn test_semantic_preservation_complex() {
1149        let input = read_fixture("conformance/valid/multiple-operations.icl");
1150        let original = crate::parser::parse(&input).unwrap();
1151        let normalized_text = normalize(&input).unwrap();
1152        let normalized = crate::parser::parse(&normalized_text).unwrap();
1153
1154        assert_eq!(
1155            original.behavioral_semantics.operations.len(),
1156            normalized.behavioral_semantics.operations.len()
1157        );
1158
1159        // All operation names preserved (may be reordered)
1160        let mut orig_names: Vec<_> = original
1161            .behavioral_semantics
1162            .operations
1163            .iter()
1164            .map(|o| o.name.value.clone())
1165            .collect();
1166        let mut norm_names: Vec<_> = normalized
1167            .behavioral_semantics
1168            .operations
1169            .iter()
1170            .map(|o| o.name.value.clone())
1171            .collect();
1172        orig_names.sort();
1173        norm_names.sort();
1174        assert_eq!(orig_names, norm_names);
1175    }
1176
1177    // ── Conformance fixtures ───────────────────────────
1178
1179    #[test]
1180    fn test_normalize_conformance_valid_minimal() {
1181        let input = read_fixture("conformance/valid/minimal-contract.icl");
1182        let normalized = normalize(&input).unwrap();
1183        let reparsed = crate::parser::parse(&normalized);
1184        assert!(
1185            reparsed.is_ok(),
1186            "Normalized valid/minimal-contract.icl doesn't reparse"
1187        );
1188    }
1189
1190    #[test]
1191    fn test_normalize_conformance_valid_all_types() {
1192        let input = read_fixture("conformance/valid/all-primitive-types.icl");
1193        let normalized = normalize(&input).unwrap();
1194        let reparsed = crate::parser::parse(&normalized);
1195        assert!(
1196            reparsed.is_ok(),
1197            "Normalized valid/all-primitive-types.icl doesn't reparse"
1198        );
1199    }
1200
1201    #[test]
1202    fn test_normalize_conformance_valid_composite() {
1203        let input = read_fixture("conformance/valid/composite-types.icl");
1204        let normalized = normalize(&input).unwrap();
1205        let reparsed = crate::parser::parse(&normalized);
1206        assert!(
1207            reparsed.is_ok(),
1208            "Normalized valid/composite-types.icl doesn't reparse"
1209        );
1210    }
1211
1212    #[test]
1213    fn test_normalize_conformance_valid_operations() {
1214        let input = read_fixture("conformance/valid/multiple-operations.icl");
1215        let normalized = normalize(&input).unwrap();
1216        let reparsed = crate::parser::parse(&normalized);
1217        assert!(
1218            reparsed.is_ok(),
1219            "Normalized valid/multiple-operations.icl doesn't reparse"
1220        );
1221    }
1222
1223    #[test]
1224    fn test_normalize_conformance_valid_extensions() {
1225        let input = read_fixture("conformance/valid/with-extensions.icl");
1226        let normalized = normalize(&input).unwrap();
1227        let reparsed = crate::parser::parse(&normalized);
1228        assert!(
1229            reparsed.is_ok(),
1230            "Normalized valid/with-extensions.icl doesn't reparse"
1231        );
1232    }
1233}