sim_codec/implementation/runtime.rs
1//! The core decoder/encoder runtime contracts.
2//!
3//! Defines `Input`/`Output`, the `DecodePosition`/`DecodeTarget` output-position
4//! model, the `Decoder`/`Encoder` traits (with their located and tree variants),
5//! and the `CodecRuntime` glue that registers a codec as a runtime
6//! object.
7
8use std::sync::Arc;
9
10use sim_kernel::{
11 ClassRef, CodecId, Cx, LocatedExpr, LocatedExprTree, Object, Result, ShapeRef, Symbol, Value,
12 WriteCx,
13};
14
15use super::limits::ReadCx;
16
17/// Raw input handed to a [`Decoder`]: source text or source bytes.
18///
19/// Text codecs take [`Input::Text`]; binary codecs take [`Input::Bytes`]. A text
20/// codec given bytes interprets them as UTF-8 (see [`Input::into_string`]).
21#[derive(Clone, Debug, PartialEq, Eq)]
22pub enum Input {
23 /// Source text.
24 Text(String),
25 /// Raw source bytes.
26 Bytes(Vec<u8>),
27}
28
29impl Input {
30 /// Take the input as UTF-8 text, decoding [`Input::Bytes`] and failing closed
31 /// with a codec error if the bytes are not valid UTF-8.
32 pub fn into_string(self) -> Result<String> {
33 match self {
34 Self::Text(text) => Ok(text),
35 Self::Bytes(bytes) => {
36 String::from_utf8(bytes).map_err(|err| sim_kernel::Error::CodecError {
37 codec: CodecId(0),
38 message: err.to_string(),
39 })
40 }
41 }
42 }
43}
44
45/// Rendered output produced by an [`Encoder`]: text or bytes.
46///
47/// Text codecs emit [`Output::Text`]; binary codecs emit [`Output::Bytes`].
48#[derive(Clone, Debug, PartialEq, Eq)]
49pub enum Output {
50 /// Rendered text.
51 Text(String),
52 /// Rendered bytes.
53 Bytes(Vec<u8>),
54}
55
56impl Output {
57 /// Take the output as UTF-8 text, decoding [`Output::Bytes`] and failing
58 /// closed with a codec error if the bytes are not valid UTF-8.
59 pub fn into_text(self) -> Result<String> {
60 match self {
61 Self::Text(text) => Ok(text),
62 Self::Bytes(bytes) => {
63 String::from_utf8(bytes).map_err(|err| sim_kernel::Error::CodecError {
64 codec: CodecId(0),
65 message: err.to_string(),
66 })
67 }
68 }
69 }
70}
71
72/// The syntactic position a decode is targeting, mirroring the kernel's
73/// `EncodePosition`.
74///
75/// Position is the core idea of the codec contract: a decoder reads the same
76/// text differently depending on where its result will land. A codec may, for
77/// example, lower forms to calls in [`DecodePosition::Eval`] but keep them as
78/// data everywhere else (see [`CodecDefaultDecode::target_for`]).
79#[derive(Clone, Copy, Debug, PartialEq, Eq)]
80pub enum DecodePosition {
81 /// Decoding into evaluable position: the result will be evaluated.
82 Eval,
83 /// Decoding inside a quote: the result is literal structure, not evaluated.
84 Quote,
85 /// Decoding as plain data.
86 Data,
87 /// Decoding into a pattern (match/binding) position.
88 Pattern,
89}
90
91impl From<sim_kernel::EncodePosition> for DecodePosition {
92 fn from(position: sim_kernel::EncodePosition) -> Self {
93 match position {
94 sim_kernel::EncodePosition::Eval => Self::Eval,
95 sim_kernel::EncodePosition::Quote => Self::Quote,
96 sim_kernel::EncodePosition::Data => Self::Data,
97 sim_kernel::EncodePosition::Pattern => Self::Pattern,
98 }
99 }
100}
101
102/// The checked form a decode resolves to once a position is known: inert data
103/// or an evaluable term.
104#[derive(Clone, Copy, Debug, PartialEq, Eq)]
105pub enum DecodeTarget {
106 /// Decode to a `Datum` (inert data).
107 Datum,
108 /// Decode to a `Term` (an evaluable form, e.g. a call).
109 Term,
110}
111
112/// A codec's policy for choosing a [`DecodeTarget`] from a [`DecodePosition`].
113///
114/// Data codecs always yield data; eval-aware codecs yield a term in eval
115/// position and data otherwise.
116#[derive(Clone, Copy, Debug, PartialEq, Eq)]
117pub enum CodecDefaultDecode {
118 /// Always decode to a `Datum`, regardless of position.
119 Datum,
120 /// Decode to a `Term` in [`DecodePosition::Eval`], otherwise to a `Datum`.
121 TermInEvalDatumOtherwise,
122}
123
124impl CodecDefaultDecode {
125 /// Resolve the [`DecodeTarget`] for `position` under this policy.
126 ///
127 /// # Examples
128 ///
129 /// ```
130 /// use sim_codec::{CodecDefaultDecode, DecodePosition, DecodeTarget};
131 ///
132 /// let policy = CodecDefaultDecode::TermInEvalDatumOtherwise;
133 /// assert_eq!(policy.target_for(DecodePosition::Eval), DecodeTarget::Term);
134 /// assert_eq!(policy.target_for(DecodePosition::Data), DecodeTarget::Datum);
135 ///
136 /// // A pure data codec always yields data.
137 /// assert_eq!(
138 /// CodecDefaultDecode::Datum.target_for(DecodePosition::Eval),
139 /// DecodeTarget::Datum,
140 /// );
141 /// ```
142 pub fn target_for(self, position: DecodePosition) -> DecodeTarget {
143 match (self, position) {
144 (Self::TermInEvalDatumOtherwise, DecodePosition::Eval) => DecodeTarget::Term,
145 _ => DecodeTarget::Datum,
146 }
147 }
148
149 /// The stable kebab-case name of this policy, for metadata and display.
150 pub fn as_symbol_name(self) -> &'static str {
151 match self {
152 Self::Datum => "datum",
153 Self::TermInEvalDatumOtherwise => "term-in-eval-datum-otherwise",
154 }
155 }
156}
157
158/// The core decode contract: turn [`Input`] into a checked kernel `Expr`.
159///
160/// Every codec that can read implements `Decoder`. The [`ReadCx`] carries the
161/// kernel context, the codec id, the read policy, and the decode limits applied
162/// to untrusted input.
163pub trait Decoder: Send + Sync {
164 /// Decode `input` into a kernel `Expr`, charging the [`ReadCx`] budget.
165 fn decode(&self, cx: &mut ReadCx<'_>, input: Input) -> Result<sim_kernel::Expr>;
166}
167
168/// A decoder that preserves source `Origin`, producing a [`LocatedExpr`].
169///
170/// Optional: [`CodecRuntime`] falls back to a plain [`Decoder`] with no origin
171/// when a codec provides none.
172pub trait LocatedDecoder: Send + Sync {
173 /// Decode `input` into a [`LocatedExpr`], attributing spans to `source_id`.
174 fn decode_located(
175 &self,
176 cx: &mut ReadCx<'_>,
177 input: Input,
178 source_id: String,
179 ) -> Result<LocatedExpr>;
180}
181
182/// A decoder that preserves the full source tree as a [`LocatedExprTree`],
183/// including trivia, for lossless round-tripping.
184pub trait TreeDecoder: Send + Sync {
185 /// Decode `input` into a [`LocatedExprTree`], attributing spans to
186 /// `source_id`.
187 fn decode_tree(
188 &self,
189 cx: &mut ReadCx<'_>,
190 input: Input,
191 source_id: String,
192 ) -> Result<LocatedExprTree>;
193}
194
195/// The core encode contract: render a kernel `Expr` to [`Output`].
196///
197/// Every codec that can write implements `Encoder`. The [`WriteCx`] carries the
198/// kernel context, the codec id, and the [`EncodeOptions`](sim_kernel::EncodeOptions)
199/// that fix the output position and fidelity.
200pub trait Encoder: Send + Sync {
201 /// Encode `expr` into [`Output`] under the context's encode options.
202 fn encode(&self, cx: &mut WriteCx<'_>, expr: &sim_kernel::Expr) -> Result<Output>;
203}
204
205/// An encoder that consumes a [`LocatedExpr`], able to use source origin for a
206/// higher-fidelity rendering than the plain [`Encoder`].
207pub trait LocatedEncoder: Send + Sync {
208 /// Encode a [`LocatedExpr`], optionally using its origin for fidelity.
209 fn encode_located(&self, cx: &mut WriteCx<'_>, expr: &LocatedExpr) -> Result<Output>;
210}
211
212/// An encoder that consumes a full [`LocatedExprTree`], able to reproduce trivia
213/// and exact layout for lossless round-trips.
214pub trait TreeEncoder: Send + Sync {
215 /// Encode a [`LocatedExprTree`], reproducing layout and trivia where present.
216 fn encode_tree(&self, cx: &mut WriteCx<'_>, expr: &LocatedExprTree) -> Result<Output>;
217}
218
219#[sim_citizen_derive::non_citizen(
220 reason = "codec runtime registry handle; reconstruct by loading the codec lib and using the codec symbol",
221 kind = "handle",
222 descriptor = "core/Codec"
223)]
224/// A registered codec as a runtime object: a symbol-named bundle of optional
225/// decode/encode capabilities plus the Shapes and default-decode policy it
226/// exposes.
227///
228/// This is the value the kernel hands back from a codec lookup. Each capability
229/// is optional; the dispatch methods ([`decode`](CodecRuntime::decode),
230/// [`encode_located`](CodecRuntime::encode_located), ...) pick the richest
231/// implementation present and fall back to the plain forms otherwise, failing
232/// closed with a codec error when none is provided.
233pub struct CodecRuntime {
234 /// Stable id of this codec.
235 pub id: CodecId,
236 /// The symbol the codec is registered and looked up under.
237 pub symbol: Symbol,
238 /// Plain `Expr` decoder, if the codec can read.
239 pub decoder: Option<Arc<dyn Decoder>>,
240 /// Origin-preserving decoder, if available.
241 pub located_decoder: Option<Arc<dyn LocatedDecoder>>,
242 /// Full-tree (lossless) decoder, if available.
243 pub tree_decoder: Option<Arc<dyn TreeDecoder>>,
244 /// Plain `Expr` encoder, if the codec can write.
245 pub encoder: Option<Arc<dyn Encoder>>,
246 /// Origin-aware encoder, if available.
247 pub located_encoder: Option<Arc<dyn LocatedEncoder>>,
248 /// Full-tree (lossless) encoder, if available.
249 pub tree_encoder: Option<Arc<dyn TreeEncoder>>,
250 /// Shape describing the expressions this codec accepts/produces.
251 pub expr_shape: ShapeRef,
252 /// Shape describing this codec's options table.
253 pub options_shape: ShapeRef,
254 /// How this codec maps a [`DecodePosition`] to a [`DecodeTarget`].
255 pub default_decode: CodecDefaultDecode,
256}
257
258impl CodecRuntime {
259 /// Decode `input` with this codec's plain decoder, erroring if it has none.
260 pub fn decode(&self, cx: &mut ReadCx<'_>, input: Input) -> Result<sim_kernel::Expr> {
261 let Some(decoder) = &self.decoder else {
262 return Err(sim_kernel::Error::CodecError {
263 codec: self.id,
264 message: format!("codec {} has no decoder", self.symbol),
265 });
266 };
267 decoder.decode(cx, input)
268 }
269
270 /// Encode `expr` with this codec's plain encoder, erroring if it has none.
271 pub fn encode(&self, cx: &mut WriteCx<'_>, expr: &sim_kernel::Expr) -> Result<Output> {
272 let Some(encoder) = &self.encoder else {
273 return Err(sim_kernel::Error::CodecError {
274 codec: self.id,
275 message: format!("codec {} has no encoder", self.symbol),
276 });
277 };
278 encoder.encode(cx, expr)
279 }
280
281 /// Decode `input` preserving origin, falling back to [`decode`](Self::decode)
282 /// with no origin when the codec has no [`LocatedDecoder`].
283 pub fn decode_located(
284 &self,
285 cx: &mut ReadCx<'_>,
286 input: Input,
287 source_id: String,
288 ) -> Result<LocatedExpr> {
289 if let Some(decoder) = &self.located_decoder {
290 return decoder.decode_located(cx, input, source_id);
291 }
292 Ok(LocatedExpr {
293 expr: self.decode(cx, input)?,
294 origin: None,
295 })
296 }
297
298 /// Decode `input` into a full tree, falling back to
299 /// [`decode_located`](Self::decode_located) reconstructed recursively when
300 /// the codec has no [`TreeDecoder`].
301 pub fn decode_tree(
302 &self,
303 cx: &mut ReadCx<'_>,
304 input: Input,
305 source_id: String,
306 ) -> Result<LocatedExprTree> {
307 if let Some(decoder) = &self.tree_decoder {
308 return decoder.decode_tree(cx, input, source_id);
309 }
310 let located = self.decode_located(cx, input, source_id)?;
311 let mut tree = LocatedExprTree::from_expr_recursive(located.expr.clone());
312 tree.origin = located.origin;
313 Ok(tree)
314 }
315
316 /// Encode a [`LocatedExpr`], using the origin-aware encoder only when
317 /// lossless-origin output is requested; otherwise drop the origin and use
318 /// [`encode`](Self::encode).
319 pub fn encode_located(&self, cx: &mut WriteCx<'_>, expr: &LocatedExpr) -> Result<Output> {
320 if cx.options.lossless_origin
321 && let Some(encoder) = &self.located_encoder
322 {
323 return encoder.encode_located(cx, expr);
324 }
325 self.encode(cx, &expr.expr)
326 }
327
328 /// Encode a [`LocatedExprTree`], preferring the tree encoder then the
329 /// located encoder for lossless-origin output, and otherwise dropping to
330 /// [`encode`](Self::encode) on the bare expression.
331 pub fn encode_tree(&self, cx: &mut WriteCx<'_>, expr: &LocatedExprTree) -> Result<Output> {
332 if cx.options.lossless_origin {
333 if let Some(encoder) = &self.tree_encoder {
334 return encoder.encode_tree(cx, expr);
335 }
336 if let Some(encoder) = &self.located_encoder {
337 return encoder.encode_located(cx, &expr.located());
338 }
339 }
340 self.encode(cx, &expr.expr)
341 }
342}
343
344impl Object for CodecRuntime {
345 fn display(&self, _cx: &mut Cx) -> Result<String> {
346 Ok(format!("#<codec {}>", self.symbol))
347 }
348
349 fn as_any(&self) -> &dyn std::any::Any {
350 self
351 }
352}
353
354impl sim_kernel::ObjectCompat for CodecRuntime {
355 fn class(&self, cx: &mut Cx) -> Result<ClassRef> {
356 if let Some(value) = cx
357 .registry()
358 .class_by_symbol(&Symbol::qualified("core", "Codec"))
359 {
360 return Ok(value.clone());
361 }
362 cx.factory().class_stub(
363 sim_kernel::CORE_CODEC_CLASS_ID,
364 Symbol::qualified("core", "Codec"),
365 )
366 }
367 fn as_expr(&self, _cx: &mut Cx) -> Result<sim_kernel::Expr> {
368 Ok(sim_kernel::Expr::Symbol(self.symbol.clone()))
369 }
370 fn as_table(&self, cx: &mut Cx) -> Result<Value> {
371 cx.factory().table(vec![
372 (
373 Symbol::new("symbol"),
374 cx.factory().string(self.symbol.to_string())?,
375 ),
376 (
377 Symbol::new("has-decoder"),
378 cx.factory().bool(self.decoder.is_some())?,
379 ),
380 (
381 Symbol::new("has-encoder"),
382 cx.factory().bool(self.encoder.is_some())?,
383 ),
384 (
385 Symbol::new("has-located-decoder"),
386 cx.factory().bool(self.located_decoder.is_some())?,
387 ),
388 (
389 Symbol::new("has-located-encoder"),
390 cx.factory().bool(self.located_encoder.is_some())?,
391 ),
392 (
393 Symbol::new("has-tree-decoder"),
394 cx.factory().bool(self.tree_decoder.is_some())?,
395 ),
396 (
397 Symbol::new("has-tree-encoder"),
398 cx.factory().bool(self.tree_encoder.is_some())?,
399 ),
400 (
401 Symbol::new("default-decode"),
402 cx.factory()
403 .string(self.default_decode.as_symbol_name().to_owned())?,
404 ),
405 (Symbol::new("expr-shape"), self.expr_shape.clone()),
406 (Symbol::new("options-shape"), self.options_shape.clone()),
407 ])
408 }
409}