sim-codec 0.1.0-rc.1

SIM workspace package for sim codec.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
//! The core decoder/encoder runtime contracts.
//!
//! Defines `Input`/`Output`, the `DecodePosition`/`DecodeTarget` output-position
//! model, the `Decoder`/`Encoder` traits (with their located and tree variants),
//! and the `CodecRuntime` glue that registers a codec as a runtime
//! object.

use std::sync::Arc;

use sim_kernel::{
    ClassRef, CodecId, Cx, LocatedExpr, LocatedExprTree, Object, Result, ShapeRef, Symbol, Value,
    WriteCx,
};

use super::limits::ReadCx;

/// Raw input handed to a [`Decoder`]: source text or source bytes.
///
/// Text codecs take [`Input::Text`]; binary codecs take [`Input::Bytes`]. A text
/// codec given bytes interprets them as UTF-8 (see [`Input::into_string`]).
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum Input {
    /// Source text.
    Text(String),
    /// Raw source bytes.
    Bytes(Vec<u8>),
}

impl Input {
    /// Take the input as UTF-8 text, decoding [`Input::Bytes`] and failing closed
    /// with a codec error if the bytes are not valid UTF-8.
    pub fn into_string(self) -> Result<String> {
        match self {
            Self::Text(text) => Ok(text),
            Self::Bytes(bytes) => {
                String::from_utf8(bytes).map_err(|err| sim_kernel::Error::CodecError {
                    codec: CodecId(0),
                    message: err.to_string(),
                })
            }
        }
    }
}

/// Rendered output produced by an [`Encoder`]: text or bytes.
///
/// Text codecs emit [`Output::Text`]; binary codecs emit [`Output::Bytes`].
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum Output {
    /// Rendered text.
    Text(String),
    /// Rendered bytes.
    Bytes(Vec<u8>),
}

impl Output {
    /// Take the output as UTF-8 text, decoding [`Output::Bytes`] and failing
    /// closed with a codec error if the bytes are not valid UTF-8.
    pub fn into_text(self) -> Result<String> {
        match self {
            Self::Text(text) => Ok(text),
            Self::Bytes(bytes) => {
                String::from_utf8(bytes).map_err(|err| sim_kernel::Error::CodecError {
                    codec: CodecId(0),
                    message: err.to_string(),
                })
            }
        }
    }
}

/// The syntactic position a decode is targeting, mirroring the kernel's
/// `EncodePosition`.
///
/// Position is the core idea of the codec contract: a decoder reads the same
/// text differently depending on where its result will land. A codec may, for
/// example, lower forms to calls in [`DecodePosition::Eval`] but keep them as
/// data everywhere else (see [`CodecDefaultDecode::target_for`]).
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum DecodePosition {
    /// Decoding into evaluable position: the result will be evaluated.
    Eval,
    /// Decoding inside a quote: the result is literal structure, not evaluated.
    Quote,
    /// Decoding as plain data.
    Data,
    /// Decoding into a pattern (match/binding) position.
    Pattern,
}

impl From<sim_kernel::EncodePosition> for DecodePosition {
    fn from(position: sim_kernel::EncodePosition) -> Self {
        match position {
            sim_kernel::EncodePosition::Eval => Self::Eval,
            sim_kernel::EncodePosition::Quote => Self::Quote,
            sim_kernel::EncodePosition::Data => Self::Data,
            sim_kernel::EncodePosition::Pattern => Self::Pattern,
        }
    }
}

/// The checked form a decode resolves to once a position is known: inert data
/// or an evaluable term.
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum DecodeTarget {
    /// Decode to a `Datum` (inert data).
    Datum,
    /// Decode to a `Term` (an evaluable form, e.g. a call).
    Term,
}

/// A codec's policy for choosing a [`DecodeTarget`] from a [`DecodePosition`].
///
/// Data codecs always yield data; eval-aware codecs yield a term in eval
/// position and data otherwise.
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum CodecDefaultDecode {
    /// Always decode to a `Datum`, regardless of position.
    Datum,
    /// Decode to a `Term` in [`DecodePosition::Eval`], otherwise to a `Datum`.
    TermInEvalDatumOtherwise,
}

impl CodecDefaultDecode {
    /// Resolve the [`DecodeTarget`] for `position` under this policy.
    ///
    /// # Examples
    ///
    /// ```
    /// use sim_codec::{CodecDefaultDecode, DecodePosition, DecodeTarget};
    ///
    /// let policy = CodecDefaultDecode::TermInEvalDatumOtherwise;
    /// assert_eq!(policy.target_for(DecodePosition::Eval), DecodeTarget::Term);
    /// assert_eq!(policy.target_for(DecodePosition::Data), DecodeTarget::Datum);
    ///
    /// // A pure data codec always yields data.
    /// assert_eq!(
    ///     CodecDefaultDecode::Datum.target_for(DecodePosition::Eval),
    ///     DecodeTarget::Datum,
    /// );
    /// ```
    pub fn target_for(self, position: DecodePosition) -> DecodeTarget {
        match (self, position) {
            (Self::TermInEvalDatumOtherwise, DecodePosition::Eval) => DecodeTarget::Term,
            _ => DecodeTarget::Datum,
        }
    }

    /// The stable kebab-case name of this policy, for metadata and display.
    pub fn as_symbol_name(self) -> &'static str {
        match self {
            Self::Datum => "datum",
            Self::TermInEvalDatumOtherwise => "term-in-eval-datum-otherwise",
        }
    }
}

/// The core decode contract: turn [`Input`] into a checked kernel `Expr`.
///
/// Every codec that can read implements `Decoder`. The [`ReadCx`] carries the
/// kernel context, the codec id, the read policy, and the decode limits applied
/// to untrusted input.
pub trait Decoder: Send + Sync {
    /// Decode `input` into a kernel `Expr`, charging the [`ReadCx`] budget.
    fn decode(&self, cx: &mut ReadCx<'_>, input: Input) -> Result<sim_kernel::Expr>;
}

/// A decoder that preserves source `Origin`, producing a [`LocatedExpr`].
///
/// Optional: [`CodecRuntime`] falls back to a plain [`Decoder`] with no origin
/// when a codec provides none.
pub trait LocatedDecoder: Send + Sync {
    /// Decode `input` into a [`LocatedExpr`], attributing spans to `source_id`.
    fn decode_located(
        &self,
        cx: &mut ReadCx<'_>,
        input: Input,
        source_id: String,
    ) -> Result<LocatedExpr>;
}

/// A decoder that preserves the full source tree as a [`LocatedExprTree`],
/// including trivia, for lossless round-tripping.
pub trait TreeDecoder: Send + Sync {
    /// Decode `input` into a [`LocatedExprTree`], attributing spans to
    /// `source_id`.
    fn decode_tree(
        &self,
        cx: &mut ReadCx<'_>,
        input: Input,
        source_id: String,
    ) -> Result<LocatedExprTree>;
}

/// The core encode contract: render a kernel `Expr` to [`Output`].
///
/// Every codec that can write implements `Encoder`. The [`WriteCx`] carries the
/// kernel context, the codec id, and the [`EncodeOptions`](sim_kernel::EncodeOptions)
/// that fix the output position and fidelity.
pub trait Encoder: Send + Sync {
    /// Encode `expr` into [`Output`] under the context's encode options.
    fn encode(&self, cx: &mut WriteCx<'_>, expr: &sim_kernel::Expr) -> Result<Output>;
}

/// An encoder that consumes a [`LocatedExpr`], able to use source origin for a
/// higher-fidelity rendering than the plain [`Encoder`].
pub trait LocatedEncoder: Send + Sync {
    /// Encode a [`LocatedExpr`], optionally using its origin for fidelity.
    fn encode_located(&self, cx: &mut WriteCx<'_>, expr: &LocatedExpr) -> Result<Output>;
}

/// An encoder that consumes a full [`LocatedExprTree`], able to reproduce trivia
/// and exact layout for lossless round-trips.
pub trait TreeEncoder: Send + Sync {
    /// Encode a [`LocatedExprTree`], reproducing layout and trivia where present.
    fn encode_tree(&self, cx: &mut WriteCx<'_>, expr: &LocatedExprTree) -> Result<Output>;
}

#[sim_citizen_derive::non_citizen(
    reason = "codec runtime registry handle; reconstruct by loading the codec lib and using the codec symbol",
    kind = "handle",
    descriptor = "core/Codec"
)]
/// A registered codec as a runtime object: a symbol-named bundle of optional
/// decode/encode capabilities plus the Shapes and default-decode policy it
/// exposes.
///
/// This is the value the kernel hands back from a codec lookup. Each capability
/// is optional; the dispatch methods ([`decode`](CodecRuntime::decode),
/// [`encode_located`](CodecRuntime::encode_located), ...) pick the richest
/// implementation present and fall back to the plain forms otherwise, failing
/// closed with a codec error when none is provided.
pub struct CodecRuntime {
    /// Stable id of this codec.
    pub id: CodecId,
    /// The symbol the codec is registered and looked up under.
    pub symbol: Symbol,
    /// Plain `Expr` decoder, if the codec can read.
    pub decoder: Option<Arc<dyn Decoder>>,
    /// Origin-preserving decoder, if available.
    pub located_decoder: Option<Arc<dyn LocatedDecoder>>,
    /// Full-tree (lossless) decoder, if available.
    pub tree_decoder: Option<Arc<dyn TreeDecoder>>,
    /// Plain `Expr` encoder, if the codec can write.
    pub encoder: Option<Arc<dyn Encoder>>,
    /// Origin-aware encoder, if available.
    pub located_encoder: Option<Arc<dyn LocatedEncoder>>,
    /// Full-tree (lossless) encoder, if available.
    pub tree_encoder: Option<Arc<dyn TreeEncoder>>,
    /// Shape describing the expressions this codec accepts/produces.
    pub expr_shape: ShapeRef,
    /// Shape describing this codec's options table.
    pub options_shape: ShapeRef,
    /// How this codec maps a [`DecodePosition`] to a [`DecodeTarget`].
    pub default_decode: CodecDefaultDecode,
}

impl CodecRuntime {
    /// Decode `input` with this codec's plain decoder, erroring if it has none.
    pub fn decode(&self, cx: &mut ReadCx<'_>, input: Input) -> Result<sim_kernel::Expr> {
        let Some(decoder) = &self.decoder else {
            return Err(sim_kernel::Error::CodecError {
                codec: self.id,
                message: format!("codec {} has no decoder", self.symbol),
            });
        };
        decoder.decode(cx, input)
    }

    /// Encode `expr` with this codec's plain encoder, erroring if it has none.
    pub fn encode(&self, cx: &mut WriteCx<'_>, expr: &sim_kernel::Expr) -> Result<Output> {
        let Some(encoder) = &self.encoder else {
            return Err(sim_kernel::Error::CodecError {
                codec: self.id,
                message: format!("codec {} has no encoder", self.symbol),
            });
        };
        encoder.encode(cx, expr)
    }

    /// Decode `input` preserving origin, falling back to [`decode`](Self::decode)
    /// with no origin when the codec has no [`LocatedDecoder`].
    pub fn decode_located(
        &self,
        cx: &mut ReadCx<'_>,
        input: Input,
        source_id: String,
    ) -> Result<LocatedExpr> {
        if let Some(decoder) = &self.located_decoder {
            return decoder.decode_located(cx, input, source_id);
        }
        Ok(LocatedExpr {
            expr: self.decode(cx, input)?,
            origin: None,
        })
    }

    /// Decode `input` into a full tree, falling back to
    /// [`decode_located`](Self::decode_located) reconstructed recursively when
    /// the codec has no [`TreeDecoder`].
    pub fn decode_tree(
        &self,
        cx: &mut ReadCx<'_>,
        input: Input,
        source_id: String,
    ) -> Result<LocatedExprTree> {
        if let Some(decoder) = &self.tree_decoder {
            return decoder.decode_tree(cx, input, source_id);
        }
        let located = self.decode_located(cx, input, source_id)?;
        let mut tree = LocatedExprTree::from_expr_recursive(located.expr.clone());
        tree.origin = located.origin;
        Ok(tree)
    }

    /// Encode a [`LocatedExpr`], using the origin-aware encoder only when
    /// lossless-origin output is requested; otherwise drop the origin and use
    /// [`encode`](Self::encode).
    pub fn encode_located(&self, cx: &mut WriteCx<'_>, expr: &LocatedExpr) -> Result<Output> {
        if cx.options.lossless_origin
            && let Some(encoder) = &self.located_encoder
        {
            return encoder.encode_located(cx, expr);
        }
        self.encode(cx, &expr.expr)
    }

    /// Encode a [`LocatedExprTree`], preferring the tree encoder then the
    /// located encoder for lossless-origin output, and otherwise dropping to
    /// [`encode`](Self::encode) on the bare expression.
    pub fn encode_tree(&self, cx: &mut WriteCx<'_>, expr: &LocatedExprTree) -> Result<Output> {
        if cx.options.lossless_origin {
            if let Some(encoder) = &self.tree_encoder {
                return encoder.encode_tree(cx, expr);
            }
            if let Some(encoder) = &self.located_encoder {
                return encoder.encode_located(cx, &expr.located());
            }
        }
        self.encode(cx, &expr.expr)
    }
}

impl Object for CodecRuntime {
    fn display(&self, _cx: &mut Cx) -> Result<String> {
        Ok(format!("#<codec {}>", self.symbol))
    }

    fn as_any(&self) -> &dyn std::any::Any {
        self
    }
}

impl sim_kernel::ObjectCompat for CodecRuntime {
    fn class(&self, cx: &mut Cx) -> Result<ClassRef> {
        if let Some(value) = cx
            .registry()
            .class_by_symbol(&Symbol::qualified("core", "Codec"))
        {
            return Ok(value.clone());
        }
        cx.factory().class_stub(
            sim_kernel::CORE_CODEC_CLASS_ID,
            Symbol::qualified("core", "Codec"),
        )
    }
    fn as_expr(&self, _cx: &mut Cx) -> Result<sim_kernel::Expr> {
        Ok(sim_kernel::Expr::Symbol(self.symbol.clone()))
    }
    fn as_table(&self, cx: &mut Cx) -> Result<Value> {
        cx.factory().table(vec![
            (
                Symbol::new("symbol"),
                cx.factory().string(self.symbol.to_string())?,
            ),
            (
                Symbol::new("has-decoder"),
                cx.factory().bool(self.decoder.is_some())?,
            ),
            (
                Symbol::new("has-encoder"),
                cx.factory().bool(self.encoder.is_some())?,
            ),
            (
                Symbol::new("has-located-decoder"),
                cx.factory().bool(self.located_decoder.is_some())?,
            ),
            (
                Symbol::new("has-located-encoder"),
                cx.factory().bool(self.located_encoder.is_some())?,
            ),
            (
                Symbol::new("has-tree-decoder"),
                cx.factory().bool(self.tree_decoder.is_some())?,
            ),
            (
                Symbol::new("has-tree-encoder"),
                cx.factory().bool(self.tree_encoder.is_some())?,
            ),
            (
                Symbol::new("default-decode"),
                cx.factory()
                    .string(self.default_decode.as_symbol_name().to_owned())?,
            ),
            (Symbol::new("expr-shape"), self.expr_shape.clone()),
            (Symbol::new("options-shape"), self.options_shape.clone()),
        ])
    }
}