Skip to main content

sim_codec/
prism.rs

1//! Codec Prism contract over registered codec runtimes.
2//!
3//! A Prism treats each codec surface as a view over one semantic expression:
4//! parse text or bytes, record spans and diagnostics, encode the same semantic
5//! id at an output position, and prove whether a surface round-trips without
6//! semantic loss.
7
8use sim_kernel::{Cx, EncodeOptions, EncodePosition, Expr, ReadPolicy, SourceId, Symbol};
9
10use crate::{Input, Output, decode_tree_with_codec, encode_with_codec};
11
12/// A codec-aware editor contract for one codec surface.
13pub trait CodecPrism {
14    /// Parses text into a semantic expression id, span map, and diagnostics.
15    fn parse(&self, cx: &mut Cx, text: &str) -> PrismParse;
16
17    /// Encodes a parsed semantic id at a target output position.
18    fn encode(&self, cx: &mut Cx, id: &SemanticId, position: EncodePosition) -> PrismEncode;
19
20    /// Parses, encodes, and reparses text to prove semantic identity.
21    fn round_trip(&self, cx: &mut Cx, text: &str, position: EncodePosition) -> RoundTrip;
22}
23
24/// Runtime-backed [`CodecPrism`] for an installed codec symbol.
25#[derive(Clone, Debug, PartialEq, Eq)]
26pub struct RuntimeCodecPrism {
27    codec: Symbol,
28    surface: PrismSurface,
29}
30
31impl RuntimeCodecPrism {
32    /// Builds a prism for a general-purpose expression codec.
33    pub fn general(codec: Symbol) -> Self {
34        Self {
35            codec,
36            surface: PrismSurface::GeneralPurpose,
37        }
38    }
39
40    /// Builds a fail-closed prism for a domain codec.
41    pub fn domain(codec: Symbol, domain: impl Into<String>) -> Self {
42        Self {
43            codec,
44            surface: PrismSurface::Domain {
45                name: domain.into(),
46            },
47        }
48    }
49
50    /// Builds a prism for the binary frame codec.
51    pub fn binary(codec: Symbol) -> Self {
52        Self {
53            codec,
54            surface: PrismSurface::BinaryInspection {
55                carrier: BinaryCarrier::Bytes,
56            },
57        }
58    }
59
60    /// Builds a prism for the base64 text wrapper around binary frames.
61    pub fn binary_base64(codec: Symbol) -> Self {
62        Self {
63            codec,
64            surface: PrismSurface::BinaryInspection {
65                carrier: BinaryCarrier::Base64Text,
66            },
67        }
68    }
69
70    /// The codec symbol this prism drives.
71    pub fn codec(&self) -> &Symbol {
72        &self.codec
73    }
74
75    /// Parses raw bytes. Text codecs receive UTF-8 validation from the codec
76    /// runtime; binary codecs inspect the bytes as untrusted frame data.
77    pub fn parse_bytes(&self, cx: &mut Cx, bytes: &[u8]) -> PrismParse {
78        self.parse_input(cx, Input::Bytes(bytes.to_vec()), bytes.len())
79    }
80
81    /// Parses, encodes, and reparses raw bytes to prove semantic identity.
82    pub fn round_trip_bytes(
83        &self,
84        cx: &mut Cx,
85        bytes: &[u8],
86        position: EncodePosition,
87    ) -> RoundTrip {
88        self.round_trip_input(cx, Input::Bytes(bytes.to_vec()), bytes.len(), position)
89    }
90
91    fn parse_input(&self, cx: &mut Cx, input: Input, source_len: usize) -> PrismParse {
92        let input_kind = match &input {
93            Input::Text(_) => PrismInputKind::Text,
94            Input::Bytes(_) => PrismInputKind::Bytes,
95        };
96        let source_id = format!("codec-prism:{}", self.codec);
97        match decode_tree_with_codec(
98            cx,
99            &self.codec,
100            input.clone(),
101            ReadPolicy::default(),
102            source_id.clone(),
103        ) {
104            Ok(tree) => {
105                let semantic_id = SemanticId::from_expr(tree.expr.clone());
106                let mut span_map = Vec::new();
107                collect_spans(&tree, &mut span_map);
108                if span_map.is_empty() {
109                    span_map.push(PrismSpan {
110                        source: SourceId(source_id),
111                        start: 0,
112                        end: source_len,
113                    });
114                }
115                let diagnostics = self.surface_diagnostics(true, None);
116                PrismParse {
117                    codec: self.codec.clone(),
118                    semantic_id: Some(semantic_id),
119                    expr: Some(tree.expr),
120                    span_map,
121                    diagnostics,
122                    inspection: PrismInspection::new(input_kind, self.surface.is_executable()),
123                }
124            }
125            Err(error) => PrismParse {
126                codec: self.codec.clone(),
127                semantic_id: None,
128                expr: None,
129                span_map: Vec::new(),
130                diagnostics: self.surface_diagnostics(false, Some(error.to_string())),
131                inspection: PrismInspection::new(input_kind, self.surface.is_executable()),
132            },
133        }
134    }
135
136    fn surface_diagnostics(&self, accepted: bool, error: Option<String>) -> Vec<PrismDiagnostic> {
137        match (&self.surface, accepted, error) {
138            (PrismSurface::Domain { name }, false, Some(error)) => vec![PrismDiagnostic::error(
139                "domain-rejected",
140                format!("{name} codec rejected non-domain input: {error}"),
141            )],
142            (_, false, Some(error)) => {
143                vec![PrismDiagnostic::error("parse-error", error)]
144            }
145            _ => Vec::new(),
146        }
147    }
148
149    fn output_to_input(&self, output: &PrismOutput) -> Input {
150        match output {
151            PrismOutput::Text(text) => Input::Text(text.clone()),
152            PrismOutput::Bytes(bytes) => Input::Bytes(bytes.clone()),
153        }
154    }
155
156    fn round_trip_input(
157        &self,
158        cx: &mut Cx,
159        input: Input,
160        source_len: usize,
161        position: EncodePosition,
162    ) -> RoundTrip {
163        let parse = self.parse_input(cx, input, source_len);
164        let encode = parse
165            .semantic_id
166            .as_ref()
167            .map(|id| self.encode(cx, id, position))
168            .unwrap_or_else(|| PrismEncode {
169                codec: self.codec.clone(),
170                position,
171                output: None,
172                diagnostics: vec![PrismDiagnostic::error(
173                    "parse-missing",
174                    "parse did not produce a semantic id",
175                )],
176            });
177        let reparsed = encode.output.as_ref().map(|output| {
178            let input = self.output_to_input(output);
179            let len = output.len();
180            self.parse_input(cx, input, len)
181        });
182        let loss_report = LossReport::from_parts(&parse, &encode, reparsed.as_ref());
183        RoundTrip {
184            parse,
185            encode,
186            reparsed,
187            loss_report,
188        }
189    }
190}
191
192impl CodecPrism for RuntimeCodecPrism {
193    fn parse(&self, cx: &mut Cx, text: &str) -> PrismParse {
194        self.parse_input(cx, Input::Text(text.to_owned()), text.len())
195    }
196
197    fn encode(&self, cx: &mut Cx, id: &SemanticId, position: EncodePosition) -> PrismEncode {
198        let Some(expr) = &id.expr else {
199            return PrismEncode {
200                codec: self.codec.clone(),
201                position,
202                output: None,
203                diagnostics: vec![PrismDiagnostic::error(
204                    "semantic-id-missing",
205                    "semantic id does not carry an expression for encoding",
206                )],
207            };
208        };
209        let options = EncodeOptions {
210            position,
211            ..EncodeOptions::default()
212        };
213        match encode_with_codec(cx, &self.codec, expr, options) {
214            Ok(Output::Text(text)) => PrismEncode {
215                codec: self.codec.clone(),
216                position,
217                output: Some(PrismOutput::Text(text)),
218                diagnostics: Vec::new(),
219            },
220            Ok(Output::Bytes(bytes)) => PrismEncode {
221                codec: self.codec.clone(),
222                position,
223                output: Some(PrismOutput::Bytes(bytes)),
224                diagnostics: Vec::new(),
225            },
226            Err(error) => PrismEncode {
227                codec: self.codec.clone(),
228                position,
229                output: None,
230                diagnostics: vec![PrismDiagnostic::error("encode-error", error.to_string())],
231            },
232        }
233    }
234
235    fn round_trip(&self, cx: &mut Cx, text: &str, position: EncodePosition) -> RoundTrip {
236        self.round_trip_input(cx, Input::Text(text.to_owned()), text.len(), position)
237    }
238}
239
240/// The class of codec surface a Prism is driving.
241#[derive(Clone, Debug, PartialEq, Eq)]
242pub enum PrismSurface {
243    /// General-purpose expression codec.
244    GeneralPurpose,
245    /// Domain codec that fails closed outside `name`.
246    Domain {
247        /// The domain label shown in diagnostics.
248        name: String,
249    },
250    /// Binary frame inspection surface.
251    BinaryInspection {
252        /// How the bytes are carried.
253        carrier: BinaryCarrier,
254    },
255}
256
257impl PrismSurface {
258    fn is_executable(&self) -> bool {
259        false
260    }
261}
262
263/// How binary frame bytes are carried at the codec boundary.
264#[derive(Clone, Copy, Debug, PartialEq, Eq)]
265pub enum BinaryCarrier {
266    /// Raw bytes.
267    Bytes,
268    /// Base64 text.
269    Base64Text,
270}
271
272/// What kind of input the Prism inspected.
273#[derive(Clone, Copy, Debug, PartialEq, Eq)]
274pub enum PrismInputKind {
275    /// UTF-8 text input.
276    Text,
277    /// Raw byte input.
278    Bytes,
279}
280
281/// Metadata describing how input was inspected.
282#[derive(Clone, Debug, PartialEq, Eq)]
283pub struct PrismInspection {
284    /// Input carrier type.
285    pub input: PrismInputKind,
286    /// Whether the Prism treats the input as trusted executable code.
287    pub trusted_executable: bool,
288}
289
290impl PrismInspection {
291    fn new(input: PrismInputKind, trusted_executable: bool) -> Self {
292        Self {
293            input,
294            trusted_executable,
295        }
296    }
297}
298
299/// Stable identity for a semantic expression.
300#[derive(Clone, Debug, PartialEq, Eq)]
301pub struct SemanticId {
302    /// Stable display id for comparing Prism results.
303    pub stable: String,
304    /// The expression behind the id, retained for immediate re-encoding.
305    pub expr: Option<Expr>,
306}
307
308impl SemanticId {
309    /// Builds a semantic id from an expression's canonical key.
310    pub fn from_expr(expr: Expr) -> Self {
311        let stable = format!(
312            "expr:{}",
313            stable_hash(&format!("{:?}", expr.canonical_key()))
314        );
315        Self {
316            stable,
317            expr: Some(expr),
318        }
319    }
320}
321
322/// A half-open byte span belonging to a parsed surface.
323#[derive(Clone, Debug, PartialEq, Eq)]
324pub struct PrismSpan {
325    /// Source id.
326    pub source: SourceId,
327    /// Inclusive start byte.
328    pub start: usize,
329    /// Exclusive end byte.
330    pub end: usize,
331}
332
333/// A parse diagnostic surfaced by the Prism.
334#[derive(Clone, Debug, PartialEq, Eq)]
335pub struct PrismDiagnostic {
336    /// Severity label.
337    pub severity: DiagnosticSeverity,
338    /// Stable diagnostic code.
339    pub code: String,
340    /// Human-readable diagnostic message.
341    pub message: String,
342    /// Optional source span.
343    pub span: Option<PrismSpan>,
344}
345
346impl PrismDiagnostic {
347    /// Creates an error diagnostic without a span.
348    pub fn error(code: impl Into<String>, message: impl Into<String>) -> Self {
349        Self {
350            severity: DiagnosticSeverity::Error,
351            code: code.into(),
352            message: message.into(),
353            span: None,
354        }
355    }
356}
357
358/// Diagnostic severity.
359#[derive(Clone, Debug, PartialEq, Eq)]
360pub enum DiagnosticSeverity {
361    /// Informational diagnostic.
362    Info,
363    /// Warning diagnostic.
364    Warning,
365    /// Error diagnostic.
366    Error,
367}
368
369/// Parse result for one codec surface.
370#[derive(Clone, Debug, PartialEq, Eq)]
371pub struct PrismParse {
372    /// Codec symbol used for parsing.
373    pub codec: Symbol,
374    /// Semantic id, if parsing succeeded.
375    pub semantic_id: Option<SemanticId>,
376    /// Parsed expression, if parsing succeeded.
377    pub expr: Option<Expr>,
378    /// Span map over the parsed input.
379    pub span_map: Vec<PrismSpan>,
380    /// Parse diagnostics.
381    pub diagnostics: Vec<PrismDiagnostic>,
382    /// Inspection metadata.
383    pub inspection: PrismInspection,
384}
385
386/// Output from a Prism encode pass.
387#[derive(Clone, Debug, PartialEq, Eq)]
388pub enum PrismOutput {
389    /// Text output.
390    Text(String),
391    /// Raw byte output.
392    Bytes(Vec<u8>),
393}
394
395impl PrismOutput {
396    /// Display-safe representation of the output.
397    pub fn display(&self) -> String {
398        match self {
399            Self::Text(text) => text.clone(),
400            Self::Bytes(bytes) => {
401                let hex = bytes
402                    .iter()
403                    .map(|byte| format!("{byte:02x}"))
404                    .collect::<Vec<_>>()
405                    .join("");
406                format!("{} bytes: {hex}", bytes.len())
407            }
408        }
409    }
410
411    /// Output length in its carrier units.
412    pub fn len(&self) -> usize {
413        match self {
414            Self::Text(text) => text.len(),
415            Self::Bytes(bytes) => bytes.len(),
416        }
417    }
418
419    /// Whether the output is empty.
420    pub fn is_empty(&self) -> bool {
421        self.len() == 0
422    }
423}
424
425/// Encode result for one codec surface.
426#[derive(Clone, Debug, PartialEq, Eq)]
427pub struct PrismEncode {
428    /// Codec symbol used for encoding.
429    pub codec: Symbol,
430    /// Target output position.
431    pub position: EncodePosition,
432    /// Encoded output, if encoding succeeded.
433    pub output: Option<PrismOutput>,
434    /// Encode diagnostics.
435    pub diagnostics: Vec<PrismDiagnostic>,
436}
437
438/// Loss report for one parse/encode/reparse cycle.
439#[derive(Clone, Debug, PartialEq, Eq)]
440pub struct LossReport {
441    /// Whether the whole cycle had no diagnostics and preserved semantic id.
442    pub lossless: bool,
443    /// Whether parse and reparse produced the same semantic identity.
444    pub semantic_identity: bool,
445    /// Diagnostics collected across the cycle.
446    pub diagnostics: Vec<PrismDiagnostic>,
447}
448
449impl LossReport {
450    fn from_parts(parse: &PrismParse, encode: &PrismEncode, reparsed: Option<&PrismParse>) -> Self {
451        let semantic_identity = match (
452            parse.semantic_id.as_ref(),
453            reparsed.and_then(|parse| parse.semantic_id.as_ref()),
454        ) {
455            (Some(left), Some(right)) => left.stable == right.stable,
456            _ => false,
457        };
458        let mut diagnostics = Vec::new();
459        diagnostics.extend(parse.diagnostics.clone());
460        diagnostics.extend(encode.diagnostics.clone());
461        if let Some(reparsed) = reparsed {
462            diagnostics.extend(reparsed.diagnostics.clone());
463        }
464        if !semantic_identity {
465            diagnostics.push(PrismDiagnostic::error(
466                "semantic-identity-loss",
467                "parse and reparse semantic ids differ",
468            ));
469        }
470        Self {
471            lossless: semantic_identity && diagnostics.is_empty(),
472            semantic_identity,
473            diagnostics,
474        }
475    }
476}
477
478/// Full round-trip proof for one codec surface.
479#[derive(Clone, Debug, PartialEq, Eq)]
480pub struct RoundTrip {
481    /// Initial parse result.
482    pub parse: PrismParse,
483    /// Encode result.
484    pub encode: PrismEncode,
485    /// Parse result for the encoded output.
486    pub reparsed: Option<PrismParse>,
487    /// Loss report for the cycle.
488    pub loss_report: LossReport,
489}
490
491fn collect_spans(tree: &sim_kernel::LocatedExprTree, spans: &mut Vec<PrismSpan>) {
492    if let Some(origin) = &tree.origin {
493        spans.push(PrismSpan {
494            source: origin.source.clone(),
495            start: origin.span.start,
496            end: origin.span.end,
497        });
498    }
499    for child in &tree.children {
500        collect_spans(child, spans);
501    }
502}
503
504fn stable_hash(text: &str) -> String {
505    let mut hash = 0xcbf29ce484222325u64;
506    for byte in text.as_bytes() {
507        hash ^= u64::from(*byte);
508        hash = hash.wrapping_mul(0x100000001b3);
509    }
510    format!("{hash:016x}")
511}