Skip to main content

sim_codec/implementation/
runtime.rs

1//! The core decoder/encoder runtime contracts.
2//!
3//! Defines `Input`/`Output`, the `DecodePosition`/`DecodeTarget` output-position
4//! model, the `Decoder`/`Encoder` traits (with their located and tree variants),
5//! and the `CodecRuntime` glue that registers a codec as a runtime
6//! object.
7
8use std::sync::Arc;
9
10use sim_kernel::{
11    ClassRef, CodecId, Cx, LocatedExpr, LocatedExprTree, Object, Result, ShapeRef, Symbol, Value,
12    WriteCx,
13};
14
15use super::limits::ReadCx;
16
17/// Raw input handed to a [`Decoder`]: source text or source bytes.
18///
19/// Text codecs take [`Input::Text`]; binary codecs take [`Input::Bytes`]. A text
20/// codec given bytes interprets them as UTF-8 (see [`Input::into_string`]).
21#[derive(Clone, Debug, PartialEq, Eq)]
22pub enum Input {
23    /// Source text.
24    Text(String),
25    /// Raw source bytes.
26    Bytes(Vec<u8>),
27}
28
29impl Input {
30    /// Take the input as UTF-8 text, decoding [`Input::Bytes`] and failing closed
31    /// with a codec error if the bytes are not valid UTF-8.
32    pub fn into_string(self) -> Result<String> {
33        match self {
34            Self::Text(text) => Ok(text),
35            Self::Bytes(bytes) => {
36                String::from_utf8(bytes).map_err(|err| sim_kernel::Error::CodecError {
37                    codec: CodecId(0),
38                    message: err.to_string(),
39                })
40            }
41        }
42    }
43}
44
45/// Rendered output produced by an [`Encoder`]: text or bytes.
46///
47/// Text codecs emit [`Output::Text`]; binary codecs emit [`Output::Bytes`].
48#[derive(Clone, Debug, PartialEq, Eq)]
49pub enum Output {
50    /// Rendered text.
51    Text(String),
52    /// Rendered bytes.
53    Bytes(Vec<u8>),
54}
55
56impl Output {
57    /// Take the output as UTF-8 text, decoding [`Output::Bytes`] and failing
58    /// closed with a codec error if the bytes are not valid UTF-8.
59    pub fn into_text(self) -> Result<String> {
60        match self {
61            Self::Text(text) => Ok(text),
62            Self::Bytes(bytes) => {
63                String::from_utf8(bytes).map_err(|err| sim_kernel::Error::CodecError {
64                    codec: CodecId(0),
65                    message: err.to_string(),
66                })
67            }
68        }
69    }
70}
71
72/// The syntactic position a decode is targeting, mirroring the kernel's
73/// `EncodePosition`.
74///
75/// Position is the core idea of the codec contract: a decoder reads the same
76/// text differently depending on where its result will land. A codec may, for
77/// example, lower forms to calls in [`DecodePosition::Eval`] but keep them as
78/// data everywhere else (see [`CodecDefaultDecode::target_for`]).
79#[derive(Clone, Copy, Debug, PartialEq, Eq)]
80pub enum DecodePosition {
81    /// Decoding into evaluable position: the result will be evaluated.
82    Eval,
83    /// Decoding inside a quote: the result is literal structure, not evaluated.
84    Quote,
85    /// Decoding as plain data.
86    Data,
87    /// Decoding into a pattern (match/binding) position.
88    Pattern,
89}
90
91impl From<sim_kernel::EncodePosition> for DecodePosition {
92    fn from(position: sim_kernel::EncodePosition) -> Self {
93        match position {
94            sim_kernel::EncodePosition::Eval => Self::Eval,
95            sim_kernel::EncodePosition::Quote => Self::Quote,
96            sim_kernel::EncodePosition::Data => Self::Data,
97            sim_kernel::EncodePosition::Pattern => Self::Pattern,
98        }
99    }
100}
101
102/// The checked form a decode resolves to once a position is known: inert data
103/// or an evaluable term.
104#[derive(Clone, Copy, Debug, PartialEq, Eq)]
105pub enum DecodeTarget {
106    /// Decode to a `Datum` (inert data).
107    Datum,
108    /// Decode to a `Term` (an evaluable form, e.g. a call).
109    Term,
110}
111
112/// A codec's policy for choosing a [`DecodeTarget`] from a [`DecodePosition`].
113///
114/// Data codecs always yield data; eval-aware codecs yield a term in eval
115/// position and data otherwise.
116#[derive(Clone, Copy, Debug, PartialEq, Eq)]
117pub enum CodecDefaultDecode {
118    /// Always decode to a `Datum`, regardless of position.
119    Datum,
120    /// Decode to a `Term` in [`DecodePosition::Eval`], otherwise to a `Datum`.
121    TermInEvalDatumOtherwise,
122}
123
124impl CodecDefaultDecode {
125    /// Resolve the [`DecodeTarget`] for `position` under this policy.
126    ///
127    /// # Examples
128    ///
129    /// ```
130    /// use sim_codec::{CodecDefaultDecode, DecodePosition, DecodeTarget};
131    ///
132    /// let policy = CodecDefaultDecode::TermInEvalDatumOtherwise;
133    /// assert_eq!(policy.target_for(DecodePosition::Eval), DecodeTarget::Term);
134    /// assert_eq!(policy.target_for(DecodePosition::Data), DecodeTarget::Datum);
135    ///
136    /// // A pure data codec always yields data.
137    /// assert_eq!(
138    ///     CodecDefaultDecode::Datum.target_for(DecodePosition::Eval),
139    ///     DecodeTarget::Datum,
140    /// );
141    /// ```
142    pub fn target_for(self, position: DecodePosition) -> DecodeTarget {
143        match (self, position) {
144            (Self::TermInEvalDatumOtherwise, DecodePosition::Eval) => DecodeTarget::Term,
145            _ => DecodeTarget::Datum,
146        }
147    }
148
149    /// The stable kebab-case name of this policy, for metadata and display.
150    pub fn as_symbol_name(self) -> &'static str {
151        match self {
152            Self::Datum => "datum",
153            Self::TermInEvalDatumOtherwise => "term-in-eval-datum-otherwise",
154        }
155    }
156}
157
158/// The core decode contract: turn [`Input`] into a checked kernel `Expr`.
159///
160/// Every codec that can read implements `Decoder`. The [`ReadCx`] carries the
161/// kernel context, the codec id, the read policy, and the decode limits applied
162/// to untrusted input.
163pub trait Decoder: Send + Sync {
164    /// Decode `input` into a kernel `Expr`, charging the [`ReadCx`] budget.
165    fn decode(&self, cx: &mut ReadCx<'_>, input: Input) -> Result<sim_kernel::Expr>;
166}
167
168/// A decoder that preserves source `Origin`, producing a [`LocatedExpr`].
169///
170/// Optional: [`CodecRuntime`] falls back to a plain [`Decoder`] with no origin
171/// when a codec provides none.
172pub trait LocatedDecoder: Send + Sync {
173    /// Decode `input` into a [`LocatedExpr`], attributing spans to `source_id`.
174    fn decode_located(
175        &self,
176        cx: &mut ReadCx<'_>,
177        input: Input,
178        source_id: String,
179    ) -> Result<LocatedExpr>;
180}
181
182/// A decoder that preserves the full source tree as a [`LocatedExprTree`],
183/// including trivia, for lossless round-tripping.
184pub trait TreeDecoder: Send + Sync {
185    /// Decode `input` into a [`LocatedExprTree`], attributing spans to
186    /// `source_id`.
187    fn decode_tree(
188        &self,
189        cx: &mut ReadCx<'_>,
190        input: Input,
191        source_id: String,
192    ) -> Result<LocatedExprTree>;
193}
194
195/// The core encode contract: render a kernel `Expr` to [`Output`].
196///
197/// Every codec that can write implements `Encoder`. The [`WriteCx`] carries the
198/// kernel context, the codec id, and the [`EncodeOptions`](sim_kernel::EncodeOptions)
199/// that fix the output position and fidelity.
200pub trait Encoder: Send + Sync {
201    /// Encode `expr` into [`Output`] under the context's encode options.
202    fn encode(&self, cx: &mut WriteCx<'_>, expr: &sim_kernel::Expr) -> Result<Output>;
203}
204
205/// An encoder that consumes a [`LocatedExpr`], able to use source origin for a
206/// higher-fidelity rendering than the plain [`Encoder`].
207pub trait LocatedEncoder: Send + Sync {
208    /// Encode a [`LocatedExpr`], optionally using its origin for fidelity.
209    fn encode_located(&self, cx: &mut WriteCx<'_>, expr: &LocatedExpr) -> Result<Output>;
210}
211
212/// An encoder that consumes a full [`LocatedExprTree`], able to reproduce trivia
213/// and exact layout for lossless round-trips.
214pub trait TreeEncoder: Send + Sync {
215    /// Encode a [`LocatedExprTree`], reproducing layout and trivia where present.
216    fn encode_tree(&self, cx: &mut WriteCx<'_>, expr: &LocatedExprTree) -> Result<Output>;
217}
218
219#[sim_citizen_derive::non_citizen(
220    reason = "codec runtime registry handle; reconstruct by loading the codec lib and using the codec symbol",
221    kind = "handle",
222    descriptor = "core/Codec"
223)]
224/// A registered codec as a runtime object: a symbol-named bundle of optional
225/// decode/encode capabilities plus the Shapes and default-decode policy it
226/// exposes.
227///
228/// This is the value the kernel hands back from a codec lookup. Each capability
229/// is optional; the dispatch methods ([`decode`](CodecRuntime::decode),
230/// [`encode_located`](CodecRuntime::encode_located), ...) pick the richest
231/// implementation present and fall back to the plain forms otherwise, failing
232/// closed with a codec error when none is provided.
233pub struct CodecRuntime {
234    /// Stable id of this codec.
235    pub id: CodecId,
236    /// The symbol the codec is registered and looked up under.
237    pub symbol: Symbol,
238    /// Plain `Expr` decoder, if the codec can read.
239    pub decoder: Option<Arc<dyn Decoder>>,
240    /// Origin-preserving decoder, if available.
241    pub located_decoder: Option<Arc<dyn LocatedDecoder>>,
242    /// Full-tree (lossless) decoder, if available.
243    pub tree_decoder: Option<Arc<dyn TreeDecoder>>,
244    /// Plain `Expr` encoder, if the codec can write.
245    pub encoder: Option<Arc<dyn Encoder>>,
246    /// Origin-aware encoder, if available.
247    pub located_encoder: Option<Arc<dyn LocatedEncoder>>,
248    /// Full-tree (lossless) encoder, if available.
249    pub tree_encoder: Option<Arc<dyn TreeEncoder>>,
250    /// Shape describing the expressions this codec accepts/produces.
251    pub expr_shape: ShapeRef,
252    /// Shape describing this codec's options table.
253    pub options_shape: ShapeRef,
254    /// How this codec maps a [`DecodePosition`] to a [`DecodeTarget`].
255    pub default_decode: CodecDefaultDecode,
256}
257
258impl CodecRuntime {
259    /// Decode `input` with this codec's plain decoder, erroring if it has none.
260    pub fn decode(&self, cx: &mut ReadCx<'_>, input: Input) -> Result<sim_kernel::Expr> {
261        let Some(decoder) = &self.decoder else {
262            return Err(sim_kernel::Error::CodecError {
263                codec: self.id,
264                message: format!("codec {} has no decoder", self.symbol),
265            });
266        };
267        decoder.decode(cx, input)
268    }
269
270    /// Encode `expr` with this codec's plain encoder, erroring if it has none.
271    pub fn encode(&self, cx: &mut WriteCx<'_>, expr: &sim_kernel::Expr) -> Result<Output> {
272        let Some(encoder) = &self.encoder else {
273            return Err(sim_kernel::Error::CodecError {
274                codec: self.id,
275                message: format!("codec {} has no encoder", self.symbol),
276            });
277        };
278        encoder.encode(cx, expr)
279    }
280
281    /// Decode `input` preserving origin, falling back to [`decode`](Self::decode)
282    /// with no origin when the codec has no [`LocatedDecoder`].
283    pub fn decode_located(
284        &self,
285        cx: &mut ReadCx<'_>,
286        input: Input,
287        source_id: String,
288    ) -> Result<LocatedExpr> {
289        if let Some(decoder) = &self.located_decoder {
290            return decoder.decode_located(cx, input, source_id);
291        }
292        Ok(LocatedExpr {
293            expr: self.decode(cx, input)?,
294            origin: None,
295        })
296    }
297
298    /// Decode `input` into a full tree, falling back to
299    /// [`decode_located`](Self::decode_located) reconstructed recursively when
300    /// the codec has no [`TreeDecoder`].
301    pub fn decode_tree(
302        &self,
303        cx: &mut ReadCx<'_>,
304        input: Input,
305        source_id: String,
306    ) -> Result<LocatedExprTree> {
307        if let Some(decoder) = &self.tree_decoder {
308            return decoder.decode_tree(cx, input, source_id);
309        }
310        let located = self.decode_located(cx, input, source_id)?;
311        let mut tree = LocatedExprTree::from_expr_recursive(located.expr.clone());
312        tree.origin = located.origin;
313        Ok(tree)
314    }
315
316    /// Encode a [`LocatedExpr`], using the origin-aware encoder only when
317    /// lossless-origin output is requested; otherwise drop the origin and use
318    /// [`encode`](Self::encode).
319    pub fn encode_located(&self, cx: &mut WriteCx<'_>, expr: &LocatedExpr) -> Result<Output> {
320        if cx.options.lossless_origin
321            && let Some(encoder) = &self.located_encoder
322        {
323            return encoder.encode_located(cx, expr);
324        }
325        self.encode(cx, &expr.expr)
326    }
327
328    /// Encode a [`LocatedExprTree`], preferring the tree encoder then the
329    /// located encoder for lossless-origin output, and otherwise dropping to
330    /// [`encode`](Self::encode) on the bare expression.
331    pub fn encode_tree(&self, cx: &mut WriteCx<'_>, expr: &LocatedExprTree) -> Result<Output> {
332        if cx.options.lossless_origin {
333            if let Some(encoder) = &self.tree_encoder {
334                return encoder.encode_tree(cx, expr);
335            }
336            if let Some(encoder) = &self.located_encoder {
337                return encoder.encode_located(cx, &expr.located());
338            }
339        }
340        self.encode(cx, &expr.expr)
341    }
342}
343
344impl Object for CodecRuntime {
345    fn display(&self, _cx: &mut Cx) -> Result<String> {
346        Ok(format!("#<codec {}>", self.symbol))
347    }
348
349    fn as_any(&self) -> &dyn std::any::Any {
350        self
351    }
352}
353
354impl sim_kernel::ObjectCompat for CodecRuntime {
355    fn class(&self, cx: &mut Cx) -> Result<ClassRef> {
356        if let Some(value) = cx
357            .registry()
358            .class_by_symbol(&Symbol::qualified("core", "Codec"))
359        {
360            return Ok(value.clone());
361        }
362        cx.factory().class_stub(
363            sim_kernel::CORE_CODEC_CLASS_ID,
364            Symbol::qualified("core", "Codec"),
365        )
366    }
367    fn as_expr(&self, _cx: &mut Cx) -> Result<sim_kernel::Expr> {
368        Ok(sim_kernel::Expr::Symbol(self.symbol.clone()))
369    }
370    fn as_table(&self, cx: &mut Cx) -> Result<Value> {
371        cx.factory().table(vec![
372            (
373                Symbol::new("symbol"),
374                cx.factory().string(self.symbol.to_string())?,
375            ),
376            (
377                Symbol::new("has-decoder"),
378                cx.factory().bool(self.decoder.is_some())?,
379            ),
380            (
381                Symbol::new("has-encoder"),
382                cx.factory().bool(self.encoder.is_some())?,
383            ),
384            (
385                Symbol::new("has-located-decoder"),
386                cx.factory().bool(self.located_decoder.is_some())?,
387            ),
388            (
389                Symbol::new("has-located-encoder"),
390                cx.factory().bool(self.located_encoder.is_some())?,
391            ),
392            (
393                Symbol::new("has-tree-decoder"),
394                cx.factory().bool(self.tree_decoder.is_some())?,
395            ),
396            (
397                Symbol::new("has-tree-encoder"),
398                cx.factory().bool(self.tree_encoder.is_some())?,
399            ),
400            (
401                Symbol::new("default-decode"),
402                cx.factory()
403                    .string(self.default_decode.as_symbol_name().to_owned())?,
404            ),
405            (Symbol::new("expr-shape"), self.expr_shape.clone()),
406            (Symbol::new("options-shape"), self.options_shape.clone()),
407        ])
408    }
409}