aardvark_core/
strategy.rs

1//! Invocation strategy abstraction allowing custom adapters to participate in execution.
2
3use crate::engine::{ExecutionOutput, JsRuntime};
4use crate::error::{PyRunnerError, Result};
5use crate::invocation::FieldDescriptor;
6use crate::outcome::{ResultPayload, SharedBufferHandle};
7use crate::runtime_language::RuntimeLanguage;
8use crate::session::PySession;
9use bytes::Bytes;
10use serde::{Deserialize, Serialize};
11use serde_json::{self, Map as JsonMap, Value as JsonValue};
12use std::sync::Arc;
13use v8;
14
15/// Shared context passed to invocation strategies during a single invocation.
16pub struct InvocationContext<'a> {
17    session: &'a PySession,
18    runtime: &'a mut JsRuntime,
19    language: RuntimeLanguage,
20}
21
22impl<'a> InvocationContext<'a> {
23    pub(crate) fn new(
24        session: &'a PySession,
25        runtime: &'a mut JsRuntime,
26        language: RuntimeLanguage,
27    ) -> Self {
28        Self {
29            session,
30            runtime,
31            language,
32        }
33    }
34
35    /// Returns the prepared session, including descriptor metadata.
36    pub fn session(&self) -> &PySession {
37        self.session
38    }
39
40    /// Provides mutable access to the underlying JS runtime for advanced adapters.
41    pub fn runtime(&mut self) -> &mut JsRuntime {
42        self.runtime
43    }
44
45    /// Returns the guest language in use for this invocation.
46    pub fn language(&self) -> RuntimeLanguage {
47        self.language
48    }
49}
50
51/// Trait implemented by host-provided invocation adapters.
52///
53/// Implementations can customise how arguments are materialised, how results
54/// are captured, and which guest runtime APIs are exercised during execution.
55pub trait PyInvocationStrategy {
56    /// Human-readable identifier for telemetry.
57    fn name(&self) -> &str {
58        "unknown"
59    }
60
61    /// Hook executed before any Python code runs, while JS context is active.
62    fn pre_execute_js(&mut self, _ctx: &mut InvocationContext<'_>) -> Result<()> {
63        Ok(())
64    }
65
66    /// Hook executed inside the Python interpreter before the user entrypoint.
67    fn pre_execute_py(&mut self, _ctx: &mut InvocationContext<'_>) -> Result<()> {
68        Ok(())
69    }
70
71    /// Executes the user entrypoint and returns the raw execution output.
72    fn invoke(&mut self, ctx: &mut InvocationContext<'_>) -> Result<StrategyResult>;
73
74    /// Hook executed after the entrypoint inside Python.
75    fn post_execute_py(
76        &mut self,
77        _ctx: &mut InvocationContext<'_>,
78        _result: &StrategyResult,
79    ) -> Result<()> {
80        Ok(())
81    }
82
83    /// Hook executed after the entrypoint inside JS.
84    fn post_execute_js(
85        &mut self,
86        _ctx: &mut InvocationContext<'_>,
87        _result: &StrategyResult,
88    ) -> Result<()> {
89        Ok(())
90    }
91}
92
93/// Simple strategy that executes the entrypoint with no additional hooks.
94///
95/// For Python handlers it forwards descriptor arguments positionally. When the
96/// guest language is JavaScript it invokes the exported function with a single
97/// descriptor argument, matching the semantics of `JavaScriptInvocationStrategy`.
98#[derive(Default)]
99pub struct DefaultInvocationStrategy;
100
101impl PyInvocationStrategy for DefaultInvocationStrategy {
102    fn name(&self) -> &str {
103        "default"
104    }
105
106    fn invoke(&mut self, ctx: &mut InvocationContext<'_>) -> Result<StrategyResult> {
107        let entrypoint = ctx.session().entrypoint().to_owned();
108        let execution = ctx.runtime().run_python_entrypoint(&entrypoint)?;
109        let payload = if !execution.shared_buffers.is_empty() {
110            let buffers = execution
111                .shared_buffers
112                .iter()
113                .map(SharedBufferHandle::from_shared_buffer)
114                .collect();
115            ResultPayload::SharedBuffers(buffers)
116        } else {
117            execution
118                .result
119                .as_ref()
120                .map(|text| ResultPayload::Text(text.clone()))
121                .unwrap_or(ResultPayload::None)
122        };
123        Ok(StrategyResult { execution, payload })
124    }
125}
126
127/// Strategy that marshals inputs/outputs via JSON helpers.
128///
129/// When targeting Python, the strategy injects a temporary global containing
130/// the JSON-decoded payload. For JavaScript the payload is published to the
131/// bootstrap so the handler receives a deserialised value via the descriptor.
132#[derive(Default)]
133pub struct JsonInvocationStrategy {
134    input: Option<JsonValue>,
135}
136
137/// Strategy that executes JavaScript module exports.
138///
139/// The entrypoint must refer to an exported function (`module:export`). The
140/// runtime passes the invocation descriptor as the single argument, mirroring
141/// the default Cloudflare Workers contract.
142#[derive(Default)]
143pub struct JavaScriptInvocationStrategy;
144
145impl PyInvocationStrategy for JavaScriptInvocationStrategy {
146    fn name(&self) -> &str {
147        "javascript"
148    }
149
150    fn invoke(&mut self, ctx: &mut InvocationContext<'_>) -> Result<StrategyResult> {
151        let entrypoint = ctx.session().entrypoint().to_owned();
152        let execution = ctx.runtime().run_js_entrypoint(&entrypoint)?;
153        let payload = payload_from_execution(&execution);
154        Ok(StrategyResult { execution, payload })
155    }
156}
157
158impl JsonInvocationStrategy {
159    /// Constructs a JSON strategy with optional input payload.
160    pub fn new(input: Option<JsonValue>) -> Self {
161        Self { input }
162    }
163}
164
165impl PyInvocationStrategy for JsonInvocationStrategy {
166    fn name(&self) -> &str {
167        "json"
168    }
169
170    fn pre_execute_js(&mut self, ctx: &mut InvocationContext<'_>) -> Result<()> {
171        if ctx.language() != RuntimeLanguage::JavaScript {
172            return Ok(());
173        }
174
175        let json_string = match &self.input {
176            Some(value) => Some(serde_json::to_string(value).map_err(|err| {
177                PyRunnerError::Execution(format!("failed to encode json input: {err}"))
178            })?),
179            None => None,
180        };
181
182        ctx.runtime().with_context(|scope, _| {
183            let global = scope.get_current_context().global(scope);
184            let key = v8::String::new(scope, "__aardvarkJsonInput").ok_or_else(|| {
185                PyRunnerError::Execution("failed to allocate json input key".into())
186            })?;
187
188            let value: v8::Local<v8::Value> = if let Some(ref json) = json_string {
189                let json_str = v8::String::new(scope, json).ok_or_else(|| {
190                    PyRunnerError::Execution("failed to allocate json input payload".into())
191                })?;
192                v8::json::parse(scope, json_str).ok_or_else(|| {
193                    PyRunnerError::Execution("failed to parse json input payload".into())
194                })?
195            } else {
196                v8::undefined(scope).into()
197            };
198
199            global.set(scope, key.into(), value);
200            Ok(())
201        })
202    }
203
204    fn pre_execute_py(&mut self, ctx: &mut InvocationContext<'_>) -> Result<()> {
205        if ctx.language() != RuntimeLanguage::Python {
206            return Ok(());
207        }
208        if let Some(ref value) = self.input {
209            let encoded = serde_json::to_string(value).map_err(|err| {
210                PyRunnerError::Execution(format!("failed to encode json input: {err}"))
211            })?;
212            let safe = encoded.replace("'''", "\\'\\'\\'");
213            let script = format!(
214                "import json\n__aardvark_input = json.loads(r'''{safe}''')\n",
215                safe = safe
216            );
217            ctx.runtime().run_python_snippet(&script)?;
218        }
219        Ok(())
220    }
221
222    fn invoke(&mut self, ctx: &mut InvocationContext<'_>) -> Result<StrategyResult> {
223        let entrypoint = ctx.session().entrypoint().to_owned();
224        match ctx.language() {
225            RuntimeLanguage::Python => {
226                let mut execution = ctx.runtime().run_python_entrypoint(&entrypoint)?;
227                if execution.json.is_none() {
228                    if let Some(value) = execution
229                        .result
230                        .as_ref()
231                        .and_then(|result| serde_json::from_str::<JsonValue>(result).ok())
232                    {
233                        execution.json = Some(value);
234                    }
235                }
236                let payload = payload_from_execution(&execution);
237                Ok(StrategyResult { execution, payload })
238            }
239            RuntimeLanguage::JavaScript => {
240                let execution = ctx.runtime().run_js_entrypoint(&entrypoint)?;
241                let payload = payload_from_execution(&execution);
242                Ok(StrategyResult { execution, payload })
243            }
244        }
245    }
246}
247
248/// RawCtx input buffer descriptor provided by the host.
249#[derive(Clone, Debug)]
250pub struct RawCtxMetadata {
251    pub dtype: String,
252    pub shape: Option<Vec<usize>>,
253    pub nullable: Option<bool>,
254    pub extra: Option<JsonValue>,
255}
256
257impl RawCtxMetadata {
258    /// Create metadata with a required dtype.
259    pub fn new(dtype: impl Into<String>) -> Self {
260        let owned = dtype.into();
261        Self {
262            dtype: owned.trim().to_owned(),
263            shape: None,
264            nullable: None,
265            extra: None,
266        }
267    }
268
269    /// Attach an optional shape (e.g. `[rows, cols]`).
270    pub fn with_shape(mut self, shape: Vec<usize>) -> Self {
271        self.shape = Some(shape);
272        self
273    }
274
275    /// Mark whether the data is nullable.
276    pub fn with_nullable(mut self, nullable: bool) -> Self {
277        self.nullable = Some(nullable);
278        self
279    }
280
281    /// Merge additional metadata fields (must be an object).
282    pub fn with_extra(mut self, extra: JsonValue) -> Result<Self> {
283        if !extra.is_object() {
284            return Err(PyRunnerError::Execution(
285                "RawCtx metadata extras must be a JSON object".into(),
286            ));
287        }
288        self.extra = Some(extra);
289        Ok(self)
290    }
291
292    fn validate(&self) -> Result<()> {
293        let dtype = self.dtype.trim();
294        if dtype.is_empty() {
295            return Err(PyRunnerError::Execution(
296                "RawCtx metadata dtype cannot be empty".into(),
297            ));
298        }
299        if let Some(shape) = &self.shape {
300            if shape.is_empty() {
301                return Err(PyRunnerError::Execution(
302                    "RawCtx metadata shape cannot be empty".into(),
303                ));
304            }
305            if shape.contains(&0) {
306                return Err(PyRunnerError::Execution(
307                    "RawCtx metadata shape dimensions must be positive".into(),
308                ));
309            }
310        }
311        if let Some(extra) = &self.extra {
312            if !extra.is_object() {
313                return Err(PyRunnerError::Execution(
314                    "RawCtx metadata extras must be a JSON object".into(),
315                ));
316            }
317        }
318        Ok(())
319    }
320
321    fn to_json_value(&self) -> Result<JsonValue> {
322        self.validate()?;
323        let mut map = JsonMap::new();
324        map.insert("dtype".to_owned(), JsonValue::String(self.dtype.clone()));
325        if let Some(shape) = &self.shape {
326            let shape_values = shape.iter().map(|dim| JsonValue::from(*dim)).collect();
327            map.insert("shape".to_owned(), JsonValue::Array(shape_values));
328        }
329        if let Some(nullable) = self.nullable {
330            map.insert("nullable".to_owned(), JsonValue::Bool(nullable));
331        }
332        if let Some(extra) = &self.extra {
333            if let Some(obj) = extra.as_object() {
334                for (key, value) in obj {
335                    if matches!(key.as_str(), "dtype" | "shape" | "nullable") {
336                        continue;
337                    }
338                    map.insert(key.clone(), value.clone());
339                }
340            }
341        }
342        Ok(JsonValue::Object(map))
343    }
344}
345
346#[derive(Clone, Debug)]
347pub struct RawCtxInput {
348    pub name: String,
349    pub buffer: Bytes,
350    pub metadata: Option<RawCtxMetadata>,
351}
352
353impl RawCtxInput {
354    /// Construct a new RawCtx input buffer.
355    pub fn new(
356        name: impl Into<String>,
357        buffer: Bytes,
358        metadata: Option<RawCtxMetadata>,
359    ) -> Result<Self> {
360        if let Some(meta) = metadata.as_ref() {
361            meta.validate()?;
362        }
363        Ok(Self {
364            name: name.into(),
365            buffer,
366            metadata,
367        })
368    }
369}
370
371/// Strategy that hydrates RawCtx-style buffers into Python and collects shared-buffer results.
372#[derive(Default)]
373pub struct RawCtxInvocationStrategy {
374    inputs: Vec<RawCtxInput>,
375}
376
377impl RawCtxInvocationStrategy {
378    /// Create a RawCtx strategy with the provided input buffers.
379    pub fn new(inputs: Vec<RawCtxInput>) -> Self {
380        Self { inputs }
381    }
382
383    /// Replace the current inputs with a new set.
384    pub fn with_inputs(mut self, inputs: Vec<RawCtxInput>) -> Self {
385        self.inputs = inputs;
386        self
387    }
388
389    fn publish_inputs(&self, runtime: &mut JsRuntime) -> Result<()> {
390        publish_rawctx_inputs(runtime, &self.inputs)
391    }
392
393    fn materialize_python_views(&self, ctx: &mut InvocationContext<'_>) -> Result<()> {
394        static PRELUDE: &str = r#"
395from js import globalThis as _js
396import builtins
397try:
398    from pyodide.ffi import to_memoryview as _aardvark_to_memoryview
399except ImportError:
400    _aardvark_to_memoryview = None
401
402__aardvark_rawctx_inputs = {}
403if hasattr(_js, "__aardvarkInputBuffers"):
404    _buffers = _js.__aardvarkInputBuffers.to_py()
405    _meta_source = {}
406    if hasattr(_js, "__aardvarkInputMetadata"):
407        _meta_source = _js.__aardvarkInputMetadata.to_py()
408    _view = None
409    _memory = None
410    _meta = None
411    _candidate = None
412    for _name, _view in _buffers.items():
413        _memory = None
414        if hasattr(_view, "to_memoryview"):
415            try:
416                _memory = _view.to_memoryview()
417            except TypeError:
418                _memory = None
419        if _memory is None and _aardvark_to_memoryview is not None:
420            try:
421                _memory = _aardvark_to_memoryview(_view)
422            except TypeError:
423                _memory = None
424        if _memory is None:
425            if hasattr(_view, "to_py"):
426                _candidate = _view.to_py()
427            else:
428                _candidate = _view
429            try:
430                _memory = memoryview(_candidate)
431            except TypeError:
432                _memory = memoryview(bytearray(_candidate))
433        _meta = None
434        if isinstance(_meta_source, dict):
435            _meta = _meta_source.get(_name)
436            if hasattr(_meta, "to_py"):
437                _meta = _meta.to_py()
438        __aardvark_rawctx_inputs[_name] = {"data": _memory, "metadata": _meta}
439builtins.__aardvark_rawctx_inputs = __aardvark_rawctx_inputs
440del _js, _buffers, _view, _memory, _meta, _meta_source, _aardvark_to_memoryview, _candidate, builtins
441"#;
442        ctx.runtime().run_python_snippet(PRELUDE)
443    }
444
445    fn install_auto_wrapper(&self, ctx: &mut InvocationContext<'_>) -> Result<()> {
446        let session = ctx.session();
447        let Some(spec_json) = cached_rawctx_spec(session)? else {
448            return Ok(());
449        };
450        let safe_payload = spec_json.replace("'''", "\\'\\'\\'");
451        let script = format!(
452            "{prelude}\n",
453            prelude = RAWCTX_AUTO_WRAPPER_SNIPPET.replace("{spec_json}", &safe_payload)
454        );
455        ctx.runtime().run_python_snippet(&script)?;
456        Ok(())
457    }
458}
459
460impl PyInvocationStrategy for RawCtxInvocationStrategy {
461    fn name(&self) -> &str {
462        "rawctx"
463    }
464
465    fn pre_execute_js(&mut self, ctx: &mut InvocationContext<'_>) -> Result<()> {
466        self.publish_inputs(ctx.runtime())?;
467        self.install_js_auto_wrapper(ctx)
468    }
469
470    fn pre_execute_py(&mut self, ctx: &mut InvocationContext<'_>) -> Result<()> {
471        if ctx.language() != RuntimeLanguage::Python {
472            return Ok(());
473        }
474        self.materialize_python_views(ctx)?;
475        self.install_auto_wrapper(ctx)
476    }
477
478    fn post_execute_js(
479        &mut self,
480        ctx: &mut InvocationContext<'_>,
481        _result: &StrategyResult,
482    ) -> Result<()> {
483        clear_rawctx_inputs(ctx.runtime())
484    }
485
486    fn invoke(&mut self, ctx: &mut InvocationContext<'_>) -> Result<StrategyResult> {
487        let entrypoint = ctx.session().entrypoint().to_owned();
488        match ctx.language() {
489            RuntimeLanguage::Python => {
490                let mut execution = ctx.runtime().run_python_entrypoint(&entrypoint)?;
491                if execution.json.is_none() {
492                    if let Some(value) = execution
493                        .result
494                        .as_ref()
495                        .and_then(|result| serde_json::from_str::<JsonValue>(result).ok())
496                    {
497                        execution.json = Some(value);
498                    }
499                }
500                let payload = payload_from_execution(&execution);
501                Ok(StrategyResult { execution, payload })
502            }
503            RuntimeLanguage::JavaScript => {
504                let execution = ctx.runtime().run_js_entrypoint(&entrypoint)?;
505                let payload = payload_from_execution(&execution);
506                Ok(StrategyResult { execution, payload })
507            }
508        }
509    }
510}
511
512impl RawCtxInvocationStrategy {
513    fn install_js_auto_wrapper(&self, ctx: &mut InvocationContext<'_>) -> Result<()> {
514        if ctx.language() != RuntimeLanguage::JavaScript {
515            return Ok(());
516        }
517        let spec_json = cached_rawctx_spec(ctx.session())?;
518        let script = if let Some(spec_json) = spec_json {
519            format!("globalThis.__aardvarkSetRawctxSpec({spec_json});")
520        } else {
521            "globalThis.__aardvarkSetRawctxSpec(null);".to_string()
522        };
523        ctx.runtime()
524            .execute_script("__aardvark_rawctx_spec.js", &script)
525            .map_err(|err| {
526                PyRunnerError::Execution(format!("failed to configure rawctx auto-wrapper: {err}"))
527            })
528    }
529}
530
531/// Result produced by a strategy invocation.
532pub struct StrategyResult {
533    pub execution: ExecutionOutput,
534    pub payload: ResultPayload,
535}
536
537fn payload_from_execution(execution: &ExecutionOutput) -> ResultPayload {
538    if !execution.shared_buffers.is_empty() {
539        let buffers = execution
540            .shared_buffers
541            .iter()
542            .map(SharedBufferHandle::from_shared_buffer)
543            .collect();
544        ResultPayload::SharedBuffers(buffers)
545    } else if let Some(json) = execution.json.clone() {
546        ResultPayload::Json(json)
547    } else if let Some(text) = execution.result.clone() {
548        ResultPayload::Text(text)
549    } else {
550        ResultPayload::None
551    }
552}
553
554/// Builder that emits invocation-descriptor metadata for RawCtx inputs.
555#[derive(Clone, Debug, Default)]
556pub struct RawCtxBindingBuilder {
557    arg: Option<String>,
558    mode: Option<String>,
559    decoder: Option<String>,
560    options: Option<JsonMap<String, JsonValue>>,
561    metadata_arg: Option<String>,
562    raw_arg: Option<String>,
563    python_loader: Option<String>,
564    default: Option<JsonValue>,
565    optional: Option<bool>,
566    enabled: Option<bool>,
567    table: Option<RawCtxTableSpec>,
568}
569
570impl RawCtxBindingBuilder {
571    /// Create an empty binding builder.
572    pub fn new() -> Self {
573        Self::default()
574    }
575
576    /// Convenience constructor for keyword arguments.
577    pub fn keyword(arg: impl Into<String>) -> Self {
578        Self::new().arg(arg).mode("keyword")
579    }
580
581    /// Convenience constructor for positional arguments.
582    pub fn positional() -> Self {
583        Self::new().mode("positional")
584    }
585
586    /// Assign the argument name that should receive the decoded value.
587    pub fn arg(mut self, arg: impl Into<String>) -> Self {
588        self.arg = Some(arg.into());
589        self
590    }
591
592    /// Override the binding mode (`keyword` or `positional`).
593    pub fn mode(mut self, mode: impl Into<String>) -> Self {
594        self.mode = Some(mode.into());
595        self
596    }
597
598    /// Configure the decoder to use (`utf8`, `json`, `bytes`, `memoryview`, `float64`, etc.).
599    pub fn decoder(mut self, decoder: impl Into<String>) -> Self {
600        self.decoder = Some(decoder.into());
601        self
602    }
603
604    /// Attach decoder-specific options (stored under `options`).
605    pub fn option(mut self, key: impl Into<String>, value: JsonValue) -> Self {
606        let map = self.options.get_or_insert_with(JsonMap::new);
607        map.insert(key.into(), value);
608        self
609    }
610
611    /// Name the keyword argument that should receive metadata for the buffer.
612    pub fn metadata_arg(mut self, name: impl Into<String>) -> Self {
613        self.metadata_arg = Some(name.into());
614        self
615    }
616
617    /// Name the keyword argument that should receive the raw payload record.
618    pub fn raw_arg(mut self, name: impl Into<String>) -> Self {
619        self.raw_arg = Some(name.into());
620        self
621    }
622
623    /// Provide a Python loader expression evaluated with `buffer`, `metadata`, and `payload`.
624    pub fn python_loader(mut self, expression: impl Into<String>) -> Self {
625        self.python_loader = Some(expression.into());
626        self
627    }
628
629    /// Fallback value used when the payload is missing or the decoder returns `None`.
630    pub fn default_value(mut self, value: JsonValue) -> Self {
631        self.default = Some(value);
632        self
633    }
634
635    /// Mark the binding optional (missing payload becomes `None` instead of error).
636    pub fn optional(mut self, optional: bool) -> Self {
637        self.optional = Some(optional);
638        self
639    }
640
641    /// Enable or disable the binding.
642    pub fn enabled(mut self, enabled: bool) -> Self {
643        self.enabled = Some(enabled);
644        self
645    }
646
647    /// Attach a table schema describing a dict-of-columns structure that the shim should build.
648    pub fn table(mut self, table: RawCtxTableSpec) -> Self {
649        self.table = Some(table);
650        self
651    }
652
653    /// Serialise the builder into descriptor metadata (`serde_json::Value`).
654    pub fn build(self) -> JsonValue {
655        wrap_rawctx_metadata(build_binding_metadata(self))
656    }
657
658    /// Merge the binding into an existing metadata object (mutating it in place).
659    pub fn merge_into(self, metadata: &mut JsonValue) {
660        merge_metadata(metadata, self.build());
661    }
662}
663
664/// Declarative schema describing a tabular payload decoded from a RawCtx buffer.
665#[derive(Clone, Debug, Serialize, Deserialize, Default)]
666pub struct RawCtxTableSpec {
667    #[serde(default)]
668    columns: Vec<RawCtxTableColumnSpec>,
669    #[serde(default, skip_serializing_if = "Option::is_none")]
670    orient: Option<String>,
671}
672
673impl RawCtxTableSpec {
674    fn into_json(self) -> JsonValue {
675        serde_json::to_value(self).expect("serialize rawctx table spec")
676    }
677
678    fn validate(&self) -> Result<()> {
679        if self.columns.is_empty() {
680            return Err(PyRunnerError::Execution(
681                "rawctx table spec requires at least one column".into(),
682            ));
683        }
684        if let Some(orient) = &self.orient {
685            let lowered = orient.to_ascii_lowercase();
686            if lowered != "records" && lowered != "columns" {
687                return Err(PyRunnerError::Execution(format!(
688                    "rawctx table orient must be 'records' or 'columns' (got {orient})"
689                )));
690            }
691        }
692        for column in &self.columns {
693            if column.name.trim().is_empty() {
694                return Err(PyRunnerError::Execution(
695                    "rawctx table column name cannot be empty".into(),
696                ));
697            }
698            if let Some(options) = &column.options {
699                if !options.is_object() {
700                    return Err(PyRunnerError::Execution(format!(
701                        "rawctx table column '{}' options must be a JSON object",
702                        column.name
703                    )));
704                }
705            }
706            if let Some(dtype) = &column.dtype {
707                if dtype.trim().is_empty() {
708                    return Err(PyRunnerError::Execution(format!(
709                        "rawctx table column '{}' dtype cannot be empty",
710                        column.name
711                    )));
712                }
713            }
714            if let Some(metadata) = &column.metadata {
715                if !metadata.is_object() {
716                    return Err(PyRunnerError::Execution(format!(
717                        "rawctx table column '{}' metadata must be a JSON object",
718                        column.name
719                    )));
720                }
721            }
722            if let Some(shape) = &column.shape {
723                if shape.is_empty() {
724                    return Err(PyRunnerError::Execution(format!(
725                        "rawctx table column '{}' shape cannot be empty",
726                        column.name
727                    )));
728                }
729                if shape.contains(&0) {
730                    return Err(PyRunnerError::Execution(format!(
731                        "rawctx table column '{}' shape dimensions must be positive",
732                        column.name
733                    )));
734                }
735            }
736            if let Some(manifest) = &column.manifest {
737                if !manifest.is_object() {
738                    return Err(PyRunnerError::Execution(format!(
739                        "rawctx table column '{}' manifest hints must be a JSON object",
740                        column.name
741                    )));
742                }
743            }
744            validate_decoder_options(
745                column.decoder.as_deref(),
746                column.options.as_ref(),
747                &format!("rawctx table column '{}'", column.name),
748            )?;
749        }
750        Ok(())
751    }
752
753    /// Construct a table specification from a manifest-style JSON object.
754    pub fn from_manifest(manifest: &JsonValue) -> Result<Self> {
755        build_table_spec_from_manifest(manifest)
756    }
757}
758
759fn build_table_spec_from_manifest(manifest: &JsonValue) -> Result<RawCtxTableSpec> {
760    let manifest_obj = manifest.as_object().ok_or_else(|| {
761        PyRunnerError::Execution("rawctx table manifest must be a JSON object".into())
762    })?;
763
764    let mut builder = RawCtxTableSpecBuilder::new();
765    if let Some(orient_value) = manifest_obj.get("orient") {
766        let orient = orient_value.as_str().ok_or_else(|| {
767            PyRunnerError::Execution("rawctx table manifest orient must be a string".into())
768        })?;
769        builder = builder.orient(orient);
770    }
771
772    let columns_value = manifest_obj.get("columns").ok_or_else(|| {
773        PyRunnerError::Execution("rawctx table manifest requires a 'columns' array".into())
774    })?;
775    let columns = columns_value.as_array().ok_or_else(|| {
776        PyRunnerError::Execution("rawctx table manifest 'columns' must be an array".into())
777    })?;
778    if columns.is_empty() {
779        return Err(PyRunnerError::Execution(
780            "rawctx table manifest must describe at least one column".into(),
781        ));
782    }
783
784    for (index, column_value) in columns.iter().enumerate() {
785        let column_obj = column_value.as_object().ok_or_else(|| {
786            PyRunnerError::Execution(format!(
787                "rawctx table manifest column {index} must be a JSON object"
788            ))
789        })?;
790
791        let name_value = column_obj
792            .get("field")
793            .or_else(|| column_obj.get("name"))
794            .ok_or_else(|| {
795                PyRunnerError::Execution(format!(
796                    "rawctx table manifest column {index} requires a 'field' or 'name'"
797                ))
798            })?;
799        let name = name_value.as_str().ok_or_else(|| {
800            PyRunnerError::Execution(format!(
801                "rawctx table manifest column {index} field/name must be a string"
802            ))
803        })?;
804        if name.trim().is_empty() {
805            return Err(PyRunnerError::Execution(format!(
806                "rawctx table manifest column {index} name cannot be empty"
807            )));
808        }
809
810        let mut column = RawCtxTableColumnBuilder::new(name);
811
812        if let Some(decoder_value) = column_obj.get("decoder") {
813            let decoder = decoder_value.as_str().ok_or_else(|| {
814                PyRunnerError::Execution(format!(
815                    "rawctx table manifest column '{name}' decoder must be a string"
816                ))
817            })?;
818            column = column.decoder(decoder);
819        }
820
821        if let Some(options_value) = column_obj.get("options") {
822            let options = options_value.as_object().ok_or_else(|| {
823                PyRunnerError::Execution(format!(
824                    "rawctx table manifest column '{name}' options must be a JSON object"
825                ))
826            })?;
827            for (key, value) in options {
828                column = column.option(key.clone(), value.clone());
829            }
830        }
831
832        if let Some(default_value) = column_obj.get("default") {
833            column = column.default_value(default_value.clone());
834        }
835
836        let optional_flag = match column_obj.get("optional") {
837            Some(value) => Some(value.as_bool().ok_or_else(|| {
838                PyRunnerError::Execution(format!(
839                    "rawctx table manifest column '{name}' optional flag must be boolean"
840                ))
841            })?),
842            None => match column_obj.get("required") {
843                Some(value) => {
844                    let required = value.as_bool().ok_or_else(|| {
845                        PyRunnerError::Execution(format!(
846                            "rawctx table manifest column '{name}' required flag must be boolean"
847                        ))
848                    })?;
849                    Some(!required)
850                }
851                None => None,
852            },
853        };
854        if let Some(optional) = optional_flag {
855            column = column.optional(optional);
856        }
857
858        if let Some(dtype_value) = column_obj.get("dtype") {
859            let dtype = dtype_value.as_str().ok_or_else(|| {
860                PyRunnerError::Execution(format!(
861                    "rawctx table manifest column '{name}' dtype must be a string"
862                ))
863            })?;
864            column = column.dtype(dtype);
865        }
866
867        if let Some(nullable_value) = column_obj.get("nullable") {
868            let nullable = nullable_value.as_bool().ok_or_else(|| {
869                PyRunnerError::Execution(format!(
870                    "rawctx table manifest column '{name}' nullable must be boolean"
871                ))
872            })?;
873            column = column.nullable(nullable);
874        }
875
876        if let Some(metadata_value) = column_obj.get("metadata") {
877            if !metadata_value.is_object() {
878                return Err(PyRunnerError::Execution(format!(
879                    "rawctx table manifest column '{name}' metadata must be a JSON object"
880                )));
881            }
882            column = column.schema_metadata(metadata_value.clone());
883        }
884
885        if let Some(shape_value) = column_obj.get("shape") {
886            let shape = shape_value.as_array().ok_or_else(|| {
887                PyRunnerError::Execution(format!(
888                    "rawctx table manifest column '{name}' shape must be an array"
889                ))
890            })?;
891            if shape.is_empty() {
892                return Err(PyRunnerError::Execution(format!(
893                    "rawctx table manifest column '{name}' shape cannot be empty"
894                )));
895            }
896            let mut dims = Vec::with_capacity(shape.len());
897            for dim in shape {
898                let value = dim.as_u64().ok_or_else(|| {
899                    PyRunnerError::Execution(format!(
900                        "rawctx table manifest column '{name}' shape entries must be positive integers"
901                    ))
902                })?;
903                if value == 0 {
904                    return Err(PyRunnerError::Execution(format!(
905                        "rawctx table manifest column '{name}' shape entries must be positive"
906                    )));
907                }
908                dims.push(value as usize);
909            }
910            column = column.shape(dims);
911        }
912
913        if let Some(manifest_value) = column_obj.get("manifest") {
914            if !manifest_value.is_object() {
915                return Err(PyRunnerError::Execution(format!(
916                    "rawctx table manifest column '{name}' manifest must be a JSON object"
917                )));
918            }
919            column = column.manifest(manifest_value.clone());
920        } else if let Some(source_value) = column_obj
921            .get("source")
922            .or_else(|| column_obj.get("manifest_column"))
923        {
924            let source = source_value.as_str().ok_or_else(|| {
925                PyRunnerError::Execution(format!(
926                    "rawctx table manifest column '{name}' source must be a string"
927                ))
928            })?;
929            column = column.manifest_column(source);
930        }
931
932        builder = builder.column(column);
933    }
934
935    Ok(builder.build())
936}
937
938/// Column descriptor used within a table schema.
939#[derive(Clone, Debug, Serialize, Deserialize, Default)]
940struct RawCtxTableColumnSpec {
941    name: String,
942    #[serde(default, skip_serializing_if = "Option::is_none")]
943    decoder: Option<String>,
944    #[serde(default, skip_serializing_if = "Option::is_none")]
945    options: Option<JsonValue>,
946    #[serde(default, skip_serializing_if = "Option::is_none")]
947    default: Option<JsonValue>,
948    #[serde(default, skip_serializing_if = "Option::is_none")]
949    optional: Option<bool>,
950    #[serde(default, skip_serializing_if = "Option::is_none")]
951    dtype: Option<String>,
952    #[serde(default, skip_serializing_if = "Option::is_none")]
953    nullable: Option<bool>,
954    #[serde(default, skip_serializing_if = "Option::is_none")]
955    metadata: Option<JsonValue>,
956    #[serde(default, skip_serializing_if = "Option::is_none")]
957    shape: Option<Vec<usize>>,
958    #[serde(default, skip_serializing_if = "Option::is_none")]
959    manifest: Option<JsonValue>,
960}
961
962/// Builder for table schemas assembled in host code.
963#[derive(Clone, Debug, Default)]
964pub struct RawCtxTableSpecBuilder {
965    columns: Vec<RawCtxTableColumnSpec>,
966    orient: Option<String>,
967}
968
969impl RawCtxTableSpecBuilder {
970    /// Begin a new table specification.
971    pub fn new() -> Self {
972        Self::default()
973    }
974
975    /// Set the expected orientation (`records`, `columns`, ...). Defaults to `records`.
976    pub fn orient(mut self, orient: impl Into<String>) -> Self {
977        self.orient = Some(orient.into());
978        self
979    }
980
981    /// Add a column definition to the schema (chainable).
982    pub fn column(mut self, column: RawCtxTableColumnBuilder) -> Self {
983        self.columns.push(column.build());
984        self
985    }
986
987    /// Add a column definition to the schema in builder style (mutable).
988    pub fn add_column(&mut self, column: RawCtxTableColumnBuilder) -> &mut Self {
989        self.columns.push(column.build());
990        self
991    }
992
993    /// Finalise the table specification.
994    pub fn build(self) -> RawCtxTableSpec {
995        let spec = RawCtxTableSpec {
996            columns: self.columns,
997            orient: self.orient,
998        };
999        spec.validate().expect("table spec validation");
1000        spec
1001    }
1002}
1003
1004/// Builder for individual table columns referenced by a table schema.
1005#[derive(Clone, Debug)]
1006pub struct RawCtxTableColumnBuilder {
1007    name: String,
1008    decoder: Option<String>,
1009    options: Option<JsonMap<String, JsonValue>>,
1010    default: Option<JsonValue>,
1011    optional: Option<bool>,
1012    dtype: Option<String>,
1013    nullable: Option<bool>,
1014    metadata: Option<JsonValue>,
1015    shape: Option<Vec<usize>>,
1016    manifest: Option<JsonValue>,
1017}
1018
1019impl RawCtxTableColumnBuilder {
1020    /// Create a column builder for the specified column name.
1021    pub fn new(name: impl Into<String>) -> Self {
1022        Self {
1023            name: name.into(),
1024            decoder: None,
1025            options: None,
1026            default: None,
1027            optional: None,
1028            dtype: None,
1029            nullable: None,
1030            metadata: None,
1031            shape: None,
1032            manifest: None,
1033        }
1034    }
1035
1036    /// Convenience constructor for UTF-8 string columns.
1037    pub fn utf8(name: impl Into<String>) -> Self {
1038        Self::new(name)
1039            .decoder("utf8")
1040            .dtype("string")
1041            .nullable(false)
1042    }
1043
1044    /// Convenience constructor for raw bytes columns.
1045    pub fn bytes(name: impl Into<String>) -> Self {
1046        Self::new(name)
1047            .decoder("bytes")
1048            .dtype("bytes")
1049            .nullable(false)
1050    }
1051
1052    /// Convenience constructor for base64-encoded binary columns.
1053    pub fn base64(name: impl Into<String>) -> Self {
1054        Self::new(name)
1055            .decoder("base64")
1056            .dtype("bytes")
1057            .nullable(false)
1058            .option("as_memoryview", JsonValue::Bool(true))
1059    }
1060
1061    /// Convenience constructor for float64 columns.
1062    pub fn float64(name: impl Into<String>) -> Self {
1063        Self::new(name)
1064            .decoder("float64")
1065            .dtype("float64")
1066            .nullable(false)
1067            .option("struct_format", JsonValue::String("<d".into()))
1068    }
1069
1070    /// Convenience constructor for float32 columns.
1071    pub fn float32(name: impl Into<String>) -> Self {
1072        Self::new(name)
1073            .decoder("float32")
1074            .dtype("float32")
1075            .nullable(false)
1076            .option("struct_format", JsonValue::String("<f".into()))
1077    }
1078
1079    /// Convenience constructor for int64 columns.
1080    pub fn int64(name: impl Into<String>) -> Self {
1081        Self::new(name)
1082            .decoder("int64")
1083            .dtype("int64")
1084            .nullable(false)
1085            .option("byteorder", JsonValue::String("little".into()))
1086    }
1087
1088    /// Convenience constructor for int32 columns.
1089    pub fn int32(name: impl Into<String>) -> Self {
1090        Self::new(name)
1091            .decoder("int32")
1092            .dtype("int32")
1093            .nullable(false)
1094            .option("struct_format", JsonValue::String("<i".into()))
1095    }
1096
1097    /// Convenience constructor for boolean columns.
1098    pub fn boolean(name: impl Into<String>) -> Self {
1099        Self::new(name)
1100            .decoder("bool")
1101            .dtype("bool")
1102            .nullable(false)
1103            .option("byteorder", JsonValue::String("little".into()))
1104    }
1105
1106    /// Override the column decoder used when normalising values (`json`, `utf8`, ...).
1107    pub fn decoder(mut self, decoder: impl Into<String>) -> Self {
1108        self.decoder = Some(decoder.into());
1109        self
1110    }
1111
1112    /// Attach decoder-specific options (stored under `options`).
1113    pub fn option(mut self, key: impl Into<String>, value: JsonValue) -> Self {
1114        let map = self.options.get_or_insert_with(JsonMap::new);
1115        map.insert(key.into(), value);
1116        self
1117    }
1118
1119    /// Provide a fallback value used when column data is missing.
1120    pub fn default_value(mut self, value: JsonValue) -> Self {
1121        self.default = Some(value);
1122        self
1123    }
1124
1125    /// Mark the column optional (missing entries use `default` or `None`).
1126    pub fn optional(mut self, optional: bool) -> Self {
1127        self.optional = Some(optional);
1128        self
1129    }
1130
1131    /// Declare the logical dtype associated with the column (e.g. `string`, `float64`).
1132    pub fn dtype(mut self, dtype: impl Into<String>) -> Self {
1133        self.dtype = Some(dtype.into());
1134        self
1135    }
1136
1137    /// Indicate whether the column permits null values.
1138    pub fn nullable(mut self, nullable: bool) -> Self {
1139        self.nullable = Some(nullable);
1140        self
1141    }
1142
1143    /// Attach additional schema metadata to the column.
1144    pub fn schema_metadata(mut self, metadata: JsonValue) -> Self {
1145        self.metadata = Some(metadata);
1146        self
1147    }
1148
1149    /// Record an expected shape for vector-valued columns.
1150    pub fn shape<I>(mut self, shape: I) -> Self
1151    where
1152        I: Into<Vec<usize>>,
1153    {
1154        self.shape = Some(shape.into());
1155        self
1156    }
1157
1158    /// Associate manifest-derived hints with the column (e.g. upstream dataset name).
1159    pub fn manifest(mut self, manifest: JsonValue) -> Self {
1160        self.manifest = Some(manifest);
1161        self
1162    }
1163
1164    /// Convenience helper for binding a manifest column name.
1165    pub fn manifest_column(mut self, column: impl Into<String>) -> Self {
1166        let mut map = JsonMap::new();
1167        map.insert("column".into(), JsonValue::String(column.into()));
1168        self.manifest = Some(JsonValue::Object(map));
1169        self
1170    }
1171
1172    fn build(self) -> RawCtxTableColumnSpec {
1173        RawCtxTableColumnSpec {
1174            name: self.name,
1175            decoder: self.decoder,
1176            options: self.options.map(JsonValue::Object),
1177            default: self.default,
1178            optional: self.optional,
1179            dtype: self.dtype,
1180            nullable: self.nullable,
1181            metadata: self.metadata,
1182            shape: self.shape,
1183            manifest: self.manifest,
1184        }
1185    }
1186}
1187
1188/// Builder that emits descriptor metadata for RawCtx outputs / shared buffers.
1189#[derive(Clone, Debug)]
1190pub struct RawCtxPublishBuilder {
1191    id: String,
1192    mode: Option<String>,
1193    transform: Option<String>,
1194    metadata: Option<JsonValue>,
1195    python_transform: Option<String>,
1196    return_behavior: Option<String>,
1197    when_none: Option<String>,
1198    encoding: Option<String>,
1199    enabled: Option<bool>,
1200}
1201
1202impl RawCtxPublishBuilder {
1203    /// Create a publish builder for the provided shared-buffer identifier.
1204    pub fn new(id: impl Into<String>) -> Self {
1205        Self {
1206            id: id.into(),
1207            mode: None,
1208            transform: None,
1209            metadata: None,
1210            python_transform: None,
1211            return_behavior: None,
1212            when_none: None,
1213            encoding: None,
1214            enabled: None,
1215        }
1216    }
1217
1218    /// Override the publish mode (defaults to `publish-buffer`).
1219    pub fn mode(mut self, mode: impl Into<String>) -> Self {
1220        self.mode = Some(mode.into());
1221        self
1222    }
1223
1224    /// Select the transform applied to the return value before publishing.
1225    pub fn transform(mut self, transform: impl Into<String>) -> Self {
1226        self.transform = Some(transform.into());
1227        self
1228    }
1229
1230    /// Attach metadata emitted alongside the shared buffer.
1231    pub fn metadata(mut self, metadata: JsonValue) -> Self {
1232        self.metadata = Some(metadata);
1233        self
1234    }
1235
1236    /// Provide a Python expression executed to transform the result (`result` in scope).
1237    pub fn python_transform(mut self, expression: impl Into<String>) -> Self {
1238        self.python_transform = Some(expression.into());
1239        self
1240    }
1241
1242    /// Control what the wrapper returns (`none`, `original`, `buffer`).
1243    pub fn return_behavior(mut self, behaviour: impl Into<String>) -> Self {
1244        self.return_behavior = Some(behaviour.into());
1245        self
1246    }
1247
1248    /// Specify behaviour when the user returns `None` (`skip`, `error`, `publish-empty`, `propagate`).
1249    pub fn when_none(mut self, mode: impl Into<String>) -> Self {
1250        self.when_none = Some(mode.into());
1251        self
1252    }
1253
1254    /// Provide an encoding hint when using the UTF-8 transform.
1255    pub fn encoding(mut self, encoding: impl Into<String>) -> Self {
1256        self.encoding = Some(encoding.into());
1257        self
1258    }
1259
1260    /// Enable/disable the publish binding.
1261    pub fn enabled(mut self, enabled: bool) -> Self {
1262        self.enabled = Some(enabled);
1263        self
1264    }
1265
1266    /// Serialise the builder into descriptor metadata (`serde_json::Value`).
1267    pub fn build(self) -> JsonValue {
1268        wrap_rawctx_metadata(build_publish_metadata(self))
1269    }
1270
1271    /// Merge the publish metadata into an existing descriptor metadata object.
1272    pub fn merge_into(self, metadata: &mut JsonValue) {
1273        merge_metadata(metadata, self.build());
1274    }
1275}
1276
1277fn build_binding_metadata(builder: RawCtxBindingBuilder) -> JsonMap<String, JsonValue> {
1278    let mut rawctx = JsonMap::new();
1279    if let Some(enabled) = builder.enabled {
1280        rawctx.insert("enabled".into(), JsonValue::Bool(enabled));
1281    }
1282    let mut binding = JsonMap::new();
1283    if let Some(arg) = builder.arg {
1284        binding.insert("arg".into(), JsonValue::String(arg));
1285    }
1286    if let Some(mode) = builder.mode {
1287        binding.insert("mode".into(), JsonValue::String(mode));
1288    }
1289    if let Some(decoder) = builder.decoder {
1290        binding.insert("decoder".into(), JsonValue::String(decoder));
1291    }
1292    if let Some(options) = builder.options {
1293        binding.insert("options".into(), JsonValue::Object(options));
1294    }
1295    if let Some(name) = builder.metadata_arg {
1296        binding.insert("metadata_arg".into(), JsonValue::String(name));
1297    }
1298    if let Some(name) = builder.raw_arg {
1299        binding.insert("raw_arg".into(), JsonValue::String(name));
1300    }
1301    if let Some(loader) = builder.python_loader {
1302        binding.insert("python_loader".into(), JsonValue::String(loader));
1303    }
1304    if let Some(default) = builder.default {
1305        binding.insert("default".into(), default);
1306    }
1307    if let Some(optional) = builder.optional {
1308        binding.insert("optional".into(), JsonValue::Bool(optional));
1309    }
1310    if let Some(table) = builder.table {
1311        table.validate().expect("rawctx table spec must be valid");
1312        binding.insert("table".into(), table.into_json());
1313    }
1314    if !binding.is_empty() {
1315        rawctx.insert("binding".into(), JsonValue::Object(binding));
1316    }
1317    rawctx
1318}
1319
1320fn build_publish_metadata(builder: RawCtxPublishBuilder) -> JsonMap<String, JsonValue> {
1321    let mut rawctx = JsonMap::new();
1322    if let Some(enabled) = builder.enabled {
1323        rawctx.insert("enabled".into(), JsonValue::Bool(enabled));
1324    }
1325    let mut publish = JsonMap::new();
1326    publish.insert("id".into(), JsonValue::String(builder.id));
1327    if let Some(mode) = builder.mode {
1328        publish.insert("mode".into(), JsonValue::String(mode));
1329    }
1330    if let Some(transform) = builder.transform {
1331        publish.insert("transform".into(), JsonValue::String(transform));
1332    }
1333    if let Some(metadata) = builder.metadata {
1334        publish.insert("metadata".into(), metadata);
1335    }
1336    if let Some(expression) = builder.python_transform {
1337        publish.insert("python_transform".into(), JsonValue::String(expression));
1338    }
1339    if let Some(behaviour) = builder.return_behavior {
1340        publish.insert("return".into(), JsonValue::String(behaviour));
1341    }
1342    if let Some(mode) = builder.when_none {
1343        publish.insert("when_none".into(), JsonValue::String(mode));
1344    }
1345    if let Some(encoding) = builder.encoding {
1346        publish.insert("encoding".into(), JsonValue::String(encoding));
1347    }
1348    rawctx.insert("publish".into(), JsonValue::Object(publish));
1349    rawctx
1350}
1351
1352fn wrap_rawctx_metadata(rawctx: JsonMap<String, JsonValue>) -> JsonValue {
1353    let mut inner = JsonMap::new();
1354    inner.insert("rawctx".into(), JsonValue::Object(rawctx));
1355    let mut outer = JsonMap::new();
1356    outer.insert("aardvark".into(), JsonValue::Object(inner));
1357    JsonValue::Object(outer)
1358}
1359
1360fn merge_metadata(target: &mut JsonValue, incoming: JsonValue) {
1361    match (target, incoming) {
1362        (JsonValue::Object(target_map), JsonValue::Object(incoming_map)) => {
1363            for (key, value) in incoming_map.into_iter() {
1364                match target_map.get_mut(&key) {
1365                    Some(existing) => merge_metadata(existing, value),
1366                    None => {
1367                        target_map.insert(key, value);
1368                    }
1369                }
1370            }
1371        }
1372        (target_value, incoming_value) => {
1373            *target_value = incoming_value;
1374        }
1375    }
1376}
1377
1378fn cached_rawctx_spec(session: &PySession) -> Result<Option<Arc<String>>> {
1379    session.rawctx_spec_json(|| {
1380        let spec = build_rawctx_auto_spec(session)?;
1381        match spec {
1382            Some(spec) => {
1383                let json = serde_json::to_string(&spec).map_err(|err| {
1384                    PyRunnerError::Execution(format!(
1385                        "failed to serialise rawctx auto-wrapper spec: {err}"
1386                    ))
1387                })?;
1388                Ok(Some(json))
1389            }
1390            None => Ok(None),
1391        }
1392    })
1393}
1394
1395#[derive(Clone, Debug, Serialize)]
1396pub(crate) struct RawCtxAutoSpec {
1397    entrypoint: String,
1398    #[serde(skip_serializing_if = "Vec::is_empty", default)]
1399    inputs: Vec<RawCtxInputBindingSpec>,
1400    #[serde(skip_serializing_if = "Option::is_none")]
1401    output: Option<RawCtxOutputSpec>,
1402    #[serde(skip_serializing_if = "Vec::is_empty", default)]
1403    outputs: Vec<RawCtxOutputSpec>,
1404}
1405
1406#[derive(Clone, Debug, Serialize, Deserialize)]
1407pub(crate) struct RawCtxInputBindingSpec {
1408    field: String,
1409    #[serde(skip_serializing_if = "Option::is_none")]
1410    arg: Option<String>,
1411    #[serde(skip_serializing_if = "Option::is_none")]
1412    mode: Option<String>,
1413    #[serde(skip_serializing_if = "Option::is_none")]
1414    decoder: Option<String>,
1415    #[serde(skip_serializing_if = "Option::is_none")]
1416    options: Option<JsonValue>,
1417    #[serde(skip_serializing_if = "Option::is_none")]
1418    metadata_arg: Option<String>,
1419    #[serde(skip_serializing_if = "Option::is_none")]
1420    raw_arg: Option<String>,
1421    #[serde(skip_serializing_if = "Option::is_none")]
1422    python_loader: Option<String>,
1423    #[serde(skip_serializing_if = "Option::is_none")]
1424    default: Option<JsonValue>,
1425    #[serde(skip_serializing_if = "Option::is_none")]
1426    optional: Option<bool>,
1427    #[serde(skip_serializing_if = "Option::is_none")]
1428    table: Option<RawCtxTableSpec>,
1429}
1430
1431#[derive(Clone, Debug, Serialize, Deserialize)]
1432pub(crate) struct RawCtxOutputSpec {
1433    id: String,
1434    #[serde(skip_serializing_if = "Option::is_none")]
1435    mode: Option<String>,
1436    #[serde(skip_serializing_if = "Option::is_none")]
1437    transform: Option<String>,
1438    #[serde(skip_serializing_if = "Option::is_none")]
1439    metadata: Option<JsonValue>,
1440    #[serde(skip_serializing_if = "Option::is_none")]
1441    python_transform: Option<String>,
1442    #[serde(skip_serializing_if = "Option::is_none")]
1443    return_behavior: Option<String>,
1444    #[serde(skip_serializing_if = "Option::is_none")]
1445    when_none: Option<String>,
1446    #[serde(skip_serializing_if = "Option::is_none")]
1447    encoding: Option<String>,
1448}
1449
1450fn build_rawctx_auto_spec(session: &PySession) -> Result<Option<RawCtxAutoSpec>> {
1451    let descriptor = session.descriptor();
1452    let entrypoint = descriptor.entrypoint();
1453    if !entrypoint.contains(':') {
1454        return Ok(None);
1455    }
1456
1457    let mut inputs = Vec::new();
1458    for field in &descriptor.inputs {
1459        if let Some(binding) = parse_input_binding(field)? {
1460            inputs.push(binding);
1461        }
1462    }
1463
1464    let mut outputs = Vec::new();
1465    for field in &descriptor.outputs {
1466        if let Some(output) = parse_output_binding(field)? {
1467            outputs.push(output);
1468        }
1469    }
1470
1471    let primary_output = outputs.first().cloned();
1472
1473    if inputs.is_empty() && outputs.is_empty() {
1474        return Ok(None);
1475    }
1476
1477    Ok(Some(RawCtxAutoSpec {
1478        entrypoint: entrypoint.to_owned(),
1479        inputs,
1480        output: primary_output,
1481        outputs,
1482    }))
1483}
1484
1485fn parse_table_spec(value: &JsonValue) -> Result<RawCtxTableSpec> {
1486    let spec: RawCtxTableSpec = serde_json::from_value(value.clone())
1487        .map_err(|err| PyRunnerError::Execution(format!("invalid rawctx table spec: {err}")))?;
1488    spec.validate()?;
1489    Ok(spec)
1490}
1491
1492fn parse_input_binding(field: &FieldDescriptor) -> Result<Option<RawCtxInputBindingSpec>> {
1493    let metadata = match &field.metadata {
1494        Some(value) => value,
1495        None => return Ok(None),
1496    };
1497    let rawctx = match extract_rawctx_metadata(metadata) {
1498        Some(value) => value,
1499        None => return Ok(None),
1500    };
1501
1502    if matches!(
1503        rawctx
1504            .get("mode")
1505            .and_then(|value| value.as_str())
1506            .map(|mode| mode.eq_ignore_ascii_case("manual")
1507                || mode.eq_ignore_ascii_case("skip")
1508                || mode.eq_ignore_ascii_case("disabled")),
1509        Some(true)
1510    ) {
1511        return Ok(None);
1512    }
1513
1514    let enabled = rawctx
1515        .get("enabled")
1516        .and_then(|value| value.as_bool())
1517        .unwrap_or(true);
1518    if !enabled {
1519        return Ok(None);
1520    }
1521
1522    let binding = if let Some(value) = rawctx.get("binding") {
1523        value.as_object().ok_or_else(|| {
1524            PyRunnerError::Execution("rawctx binding metadata must be a JSON object".into())
1525        })?
1526    } else {
1527        rawctx
1528    };
1529
1530    let arg = match binding.get("arg") {
1531        Some(value) if value.is_null() => None,
1532        Some(value) => {
1533            let string = value.as_str().ok_or_else(|| {
1534                PyRunnerError::Execution("rawctx binding arg must be a string when provided".into())
1535            })?;
1536            Some(string.to_owned())
1537        }
1538        None => None,
1539    };
1540    let arg = arg.or_else(|| Some(field.name.clone()));
1541
1542    let mode = binding
1543        .get("mode")
1544        .map(|value| {
1545            let mode = value.as_str().ok_or_else(|| {
1546                PyRunnerError::Execution("rawctx binding mode must be a string".into())
1547            })?;
1548            let lowered = mode.to_ascii_lowercase();
1549            if lowered != "keyword" && lowered != "positional" {
1550                return Err(PyRunnerError::Execution(format!(
1551                    "unsupported rawctx binding mode '{mode}' (expected 'keyword' or 'positional')"
1552                )));
1553            }
1554            Ok(lowered)
1555        })
1556        .transpose()?;
1557
1558    if mode.as_deref() == Some("positional") && arg.is_none() {
1559        return Err(PyRunnerError::Execution(
1560            "rawctx binding cannot be positional without an argument name".into(),
1561        ));
1562    }
1563
1564    let decoder = binding
1565        .get("decoder")
1566        .map(|value| {
1567            value
1568                .as_str()
1569                .map(|s| s.to_owned())
1570                .ok_or_else(|| PyRunnerError::Execution("rawctx decoder must be a string".into()))
1571        })
1572        .transpose()?;
1573
1574    let options = match binding.get("options") {
1575        Some(value) if value.is_null() => None,
1576        Some(value) => {
1577            if value.is_object() {
1578                Some(value.clone())
1579            } else {
1580                return Err(PyRunnerError::Execution(
1581                    "rawctx binding options must be a JSON object".into(),
1582                ));
1583            }
1584        }
1585        None => None,
1586    };
1587
1588    validate_decoder_options(
1589        decoder.as_deref(),
1590        options.as_ref(),
1591        &format!("rawctx binding '{}'", field.name),
1592    )?;
1593
1594    let metadata_arg = match binding.get("metadata_arg") {
1595        Some(value) if value.is_null() => None,
1596        Some(value) => {
1597            let string = value.as_str().ok_or_else(|| {
1598                PyRunnerError::Execution(
1599                    "rawctx metadata_arg must be a string when provided".into(),
1600                )
1601            })?;
1602            Some(string.to_owned())
1603        }
1604        None => None,
1605    };
1606
1607    let raw_arg = match binding.get("raw_arg") {
1608        Some(value) if value.is_null() => None,
1609        Some(value) => {
1610            let string = value.as_str().ok_or_else(|| {
1611                PyRunnerError::Execution("rawctx raw_arg must be a string when provided".into())
1612            })?;
1613            Some(string.to_owned())
1614        }
1615        None => None,
1616    };
1617
1618    let python_loader = binding
1619        .get("python_loader")
1620        .map(|value| {
1621            value.as_str().map(|s| s.to_owned()).ok_or_else(|| {
1622                PyRunnerError::Execution("rawctx python_loader must be a string".into())
1623            })
1624        })
1625        .transpose()?;
1626
1627    let default = binding.get("default").cloned();
1628
1629    let optional = binding
1630        .get("optional")
1631        .map(|value| {
1632            value
1633                .as_bool()
1634                .ok_or_else(|| PyRunnerError::Execution("rawctx optional must be a boolean".into()))
1635        })
1636        .transpose()?;
1637
1638    let mut table = match binding.get("table") {
1639        Some(value) => Some(parse_table_spec(value)?),
1640        None => None,
1641    };
1642
1643    if table.is_none() {
1644        if let Some(manifest_value) = binding
1645            .get("table_manifest")
1646            .or_else(|| rawctx.get("table_manifest"))
1647        {
1648            table = Some(RawCtxTableSpec::from_manifest(manifest_value)?);
1649        }
1650    }
1651
1652    if arg.is_none()
1653        && metadata_arg.is_none()
1654        && raw_arg.is_none()
1655        && python_loader.is_none()
1656        && default.is_none()
1657        && table.is_none()
1658    {
1659        return Err(PyRunnerError::Execution(
1660            "rawctx binding must project at least one argument or provide a custom loader/default"
1661                .into(),
1662        ));
1663    }
1664
1665    Ok(Some(RawCtxInputBindingSpec {
1666        field: field.name.clone(),
1667        arg,
1668        mode,
1669        decoder,
1670        options,
1671        metadata_arg,
1672        raw_arg,
1673        python_loader,
1674        default,
1675        optional,
1676        table,
1677    }))
1678}
1679
1680fn parse_output_binding(field: &FieldDescriptor) -> Result<Option<RawCtxOutputSpec>> {
1681    let metadata = match &field.metadata {
1682        Some(value) => value,
1683        None => return Ok(None),
1684    };
1685    let rawctx = match extract_rawctx_metadata(metadata) {
1686        Some(value) => value,
1687        None => return Ok(None),
1688    };
1689
1690    if matches!(
1691        rawctx
1692            .get("mode")
1693            .and_then(|value| value.as_str())
1694            .map(|mode| mode.eq_ignore_ascii_case("manual")
1695                || mode.eq_ignore_ascii_case("skip")
1696                || mode.eq_ignore_ascii_case("disabled")),
1697        Some(true)
1698    ) {
1699        return Ok(None);
1700    }
1701
1702    let publish_obj = if let Some(value) = rawctx.get("publish") {
1703        value.as_object().ok_or_else(|| {
1704            PyRunnerError::Execution("rawctx publish metadata must be a JSON object".into())
1705        })?
1706    } else {
1707        rawctx
1708    };
1709
1710    let enabled = publish_obj
1711        .get("enabled")
1712        .and_then(|value| value.as_bool())
1713        .unwrap_or(true);
1714    if !enabled {
1715        return Ok(None);
1716    }
1717
1718    let mode = publish_obj
1719        .get("mode")
1720        .and_then(|value| value.as_str())
1721        .map(|mode| mode.to_ascii_lowercase());
1722
1723    let mode_value = mode.as_deref().unwrap_or("publish-buffer");
1724    if mode_value != "publish-buffer" {
1725        return Err(PyRunnerError::Execution(format!(
1726            "unsupported rawctx output mode '{mode_value}'"
1727        )));
1728    }
1729
1730    let id = publish_obj
1731        .get("id")
1732        .and_then(|value| value.as_str())
1733        .ok_or_else(|| {
1734            PyRunnerError::Execution("rawctx output publish requires an 'id' field".into())
1735        })?
1736        .to_owned();
1737
1738    let transform = publish_obj
1739        .get("transform")
1740        .and_then(|value| value.as_str())
1741        .map(|value| value.to_ascii_lowercase());
1742
1743    if let Some(ref transform_value) = transform {
1744        let supported = ["memoryview", "bytes", "utf8", "identity"];
1745        if !supported
1746            .iter()
1747            .any(|item| item.eq_ignore_ascii_case(transform_value))
1748        {
1749            return Err(PyRunnerError::Execution(format!(
1750                "unsupported rawctx output transform '{transform_value}'"
1751            )));
1752        }
1753    }
1754
1755    let python_transform = publish_obj
1756        .get("python_transform")
1757        .and_then(|value| value.as_str())
1758        .map(|value| value.to_owned());
1759
1760    let return_behavior = publish_obj
1761        .get("return")
1762        .or_else(|| publish_obj.get("return_behavior"))
1763        .and_then(|value| value.as_str())
1764        .map(|value| value.to_ascii_lowercase());
1765
1766    if let Some(ref behaviour) = return_behavior {
1767        let supported = ["none", "original", "buffer"];
1768        if !supported
1769            .iter()
1770            .any(|item| item.eq_ignore_ascii_case(behaviour))
1771        {
1772            return Err(PyRunnerError::Execution(format!(
1773                "unsupported rawctx return behaviour '{behaviour}'"
1774            )));
1775        }
1776    }
1777
1778    let when_none = publish_obj
1779        .get("when_none")
1780        .and_then(|value| value.as_str())
1781        .map(|value| value.to_ascii_lowercase());
1782
1783    if let Some(ref mode) = when_none {
1784        let supported = ["skip", "error", "publish-empty", "propagate"];
1785        if !supported.iter().any(|item| item.eq_ignore_ascii_case(mode)) {
1786            return Err(PyRunnerError::Execution(format!(
1787                "unsupported rawctx when_none behaviour '{mode}'"
1788            )));
1789        }
1790    }
1791
1792    let encoding = publish_obj
1793        .get("encoding")
1794        .and_then(|value| value.as_str())
1795        .map(|value| value.to_owned());
1796
1797    Ok(Some(RawCtxOutputSpec {
1798        id,
1799        mode: mode.filter(|m| m != "publish-buffer"),
1800        transform,
1801        metadata: publish_obj.get("metadata").cloned(),
1802        python_transform,
1803        return_behavior,
1804        when_none,
1805        encoding,
1806    }))
1807}
1808
1809fn extract_rawctx_metadata(value: &JsonValue) -> Option<&serde_json::Map<String, JsonValue>> {
1810    let object = value.as_object()?;
1811    if let Some(aardvark) = object.get("aardvark").and_then(|value| value.as_object()) {
1812        if let Some(rawctx) = aardvark.get("rawctx").and_then(|value| value.as_object()) {
1813            return Some(rawctx);
1814        }
1815    }
1816    object.get("rawctx").and_then(|value| value.as_object())
1817}
1818
1819fn validate_decoder_options(
1820    decoder: Option<&str>,
1821    options: Option<&JsonValue>,
1822    context: &str,
1823) -> Result<()> {
1824    let Some(raw_decoder) = decoder else {
1825        if let Some(value) = options {
1826            if !value.is_null() && !value.is_object() {
1827                return Err(PyRunnerError::Execution(format!(
1828                    "{context} decoder options must be a JSON object"
1829                )));
1830            }
1831        }
1832        return Ok(());
1833    };
1834
1835    let trimmed = raw_decoder.trim();
1836    if trimmed.is_empty() {
1837        return Err(PyRunnerError::Execution(format!(
1838            "{context} decoder cannot be empty"
1839        )));
1840    }
1841
1842    let decoder = trimmed.to_ascii_lowercase();
1843    let Some(options_value) = options else {
1844        return Ok(());
1845    };
1846
1847    let object = options_value.as_object().ok_or_else(|| {
1848        PyRunnerError::Execution(format!("{context} decoder options must be a JSON object"))
1849    })?;
1850
1851    if object.is_empty() {
1852        return Ok(());
1853    }
1854
1855    match decoder.as_str() {
1856        "utf8" | "string" | "json" => {
1857            if let Some(value) = object.get("encoding") {
1858                let Some(string) = value.as_str() else {
1859                    return Err(PyRunnerError::Execution(format!(
1860                        "{context} decoder option 'encoding' must be a string"
1861                    )));
1862                };
1863                if string.trim().is_empty() {
1864                    return Err(PyRunnerError::Execution(format!(
1865                        "{context} decoder option 'encoding' cannot be empty"
1866                    )));
1867                }
1868            }
1869            if let Some(value) = object.get("errors") {
1870                if value.as_str().is_none() {
1871                    return Err(PyRunnerError::Execution(format!(
1872                        "{context} decoder option 'errors' must be a string"
1873                    )));
1874                }
1875            }
1876        }
1877        "float32" | "f32" | "float64" | "f64" | "int32" | "i32" | "uint32" | "u32" => {
1878            if let Some(value) = object.get("struct_format") {
1879                let Some(string) = value.as_str() else {
1880                    return Err(PyRunnerError::Execution(format!(
1881                        "{context} decoder option 'struct_format' must be a string"
1882                    )));
1883                };
1884                let trimmed = string.trim();
1885                if trimmed.is_empty() {
1886                    return Err(PyRunnerError::Execution(format!(
1887                        "{context} decoder option 'struct_format' cannot be empty"
1888                    )));
1889                }
1890
1891                let expected = match decoder.as_str() {
1892                    "float32" | "f32" => 'f',
1893                    "float64" | "f64" => 'd',
1894                    "int32" | "i32" => 'i',
1895                    "uint32" | "u32" => 'I',
1896                    other => {
1897                        debug_assert!(matches!(
1898                            other,
1899                            "float32"
1900                                | "f32"
1901                                | "float64"
1902                                | "f64"
1903                                | "int32"
1904                                | "i32"
1905                                | "uint32"
1906                                | "u32"
1907                        ));
1908                        'f'
1909                    }
1910                };
1911                let type_char = trimmed.chars().last().unwrap();
1912                if !type_char.eq_ignore_ascii_case(&expected) {
1913                    return Err(PyRunnerError::Execution(format!(
1914                        "{context} decoder option 'struct_format' must end with '{}'",
1915                        expected
1916                    )));
1917                }
1918
1919                if trimmed.len() > type_char.len_utf8() {
1920                    let prefix = &trimmed[..trimmed.len() - type_char.len_utf8()];
1921                    if !prefix.is_empty() {
1922                        let mut chars = prefix.chars();
1923                        let first = chars.next().unwrap();
1924                        let allowed = ['<', '>', '!', '=', '@'];
1925                        if allowed.contains(&first) {
1926                            if chars.any(|c| !c.is_ascii_digit()) {
1927                                return Err(PyRunnerError::Execution(format!(
1928                                    "{context} decoder option 'struct_format' prefix must contain only digits after the byteorder flag"
1929                                )));
1930                            }
1931                        } else if !first.is_ascii_digit() || chars.any(|c| !c.is_ascii_digit()) {
1932                            return Err(PyRunnerError::Execution(format!(
1933                                "{context} decoder option 'struct_format' prefix must be digits or a byteorder flag"
1934                            )));
1935                        }
1936                    }
1937                }
1938            }
1939        }
1940        "int64" | "i64" => {
1941            if let Some(value) = object.get("byteorder") {
1942                let Some(string) = value.as_str() else {
1943                    return Err(PyRunnerError::Execution(format!(
1944                        "{context} decoder option 'byteorder' must be a string"
1945                    )));
1946                };
1947                let lowered = string.trim().to_ascii_lowercase();
1948                if lowered != "little" && lowered != "big" {
1949                    return Err(PyRunnerError::Execution(format!(
1950                        "{context} decoder option 'byteorder' must be 'little' or 'big'"
1951                    )));
1952                }
1953            }
1954            if let Some(value) = object.get("signed") {
1955                if !value.is_boolean() {
1956                    return Err(PyRunnerError::Execution(format!(
1957                        "{context} decoder option 'signed' must be a boolean"
1958                    )));
1959                }
1960            }
1961        }
1962        "bool" | "boolean" => {
1963            if let Some(value) = object.get("byteorder") {
1964                let Some(string) = value.as_str() else {
1965                    return Err(PyRunnerError::Execution(format!(
1966                        "{context} decoder option 'byteorder' must be a string"
1967                    )));
1968                };
1969                let lowered = string.trim().to_ascii_lowercase();
1970                if lowered != "little" && lowered != "big" {
1971                    return Err(PyRunnerError::Execution(format!(
1972                        "{context} decoder option 'byteorder' must be 'little' or 'big'"
1973                    )));
1974                }
1975            }
1976        }
1977        "base64" | "b64" => {
1978            if let Some(value) = object.get("altchars") {
1979                let Some(string) = value.as_str() else {
1980                    return Err(PyRunnerError::Execution(format!(
1981                        "{context} decoder option 'altchars' must be a string"
1982                    )));
1983                };
1984                if string.chars().count() != 2 {
1985                    return Err(PyRunnerError::Execution(format!(
1986                        "{context} decoder option 'altchars' must contain exactly two characters"
1987                    )));
1988                }
1989            }
1990            for key in ["validate", "as_memoryview", "as_bytearray"] {
1991                if let Some(value) = object.get(key) {
1992                    if !value.is_boolean() {
1993                        return Err(PyRunnerError::Execution(format!(
1994                            "{context} decoder option '{key}' must be a boolean"
1995                        )));
1996                    }
1997                }
1998            }
1999        }
2000        _ => {}
2001    }
2002
2003    Ok(())
2004}
2005
2006const RAWCTX_AUTO_WRAPPER_SNIPPET: &str = r#"
2007import builtins, importlib, json
2008
2009__aardvark_rawctx_spec = json.loads(r'''{spec_json}''')
2010
2011def __aardvark__acquire_output_buffer(size, *, id=None, metadata=None):
2012    if size is None:
2013        raise ValueError("size is required")
2014    length = int(size)
2015    if length < 0:
2016        raise ValueError("size must be non-negative")
2017    from js import globalThis as _js
2018    view = _js.__aardvarkAcquireOutputBuffer(id, length, metadata)
2019    if hasattr(view, "to_py"):
2020        py_view = view.to_py()
2021    else:
2022        try:
2023            from pyodide.ffi import to_py
2024
2025            py_view = to_py(view)
2026        except ImportError:
2027            py_view = view
2028    if isinstance(py_view, memoryview):
2029        return py_view
2030    return memoryview(py_view)
2031
2032builtins.__aardvark_output_buffer = __aardvark__acquire_output_buffer
2033
2034def __aardvark__decode_rawctx(binding, payload):
2035    value, metadata, raw_payload = __aardvark__decode_scalar(binding, payload)
2036    table_spec = binding.get("table")
2037    if table_spec:
2038        table_value, table_metadata = __aardvark__materialize_table(table_spec, value)
2039        value = table_value
2040        if table_metadata is not None:
2041            if metadata is None:
2042                metadata = table_metadata
2043            elif isinstance(metadata, dict) and isinstance(table_metadata, dict):
2044                merged = dict(metadata)
2045                merged.update(table_metadata)
2046                metadata = merged
2047            else:
2048                metadata = table_metadata
2049    return value, metadata, raw_payload
2050
2051
2052def __aardvark__decode_scalar(binding, payload):
2053    if payload is None:
2054        return None, None, None
2055    data = payload.get("data")
2056    metadata = payload.get("metadata")
2057    raw_payload = payload
2058    if data is None:
2059        return None, metadata, raw_payload
2060    if binding.get("python_loader"):
2061        namespace = {
2062            "buffer": data,
2063            "metadata": metadata,
2064            "payload": raw_payload,
2065            "memoryview": memoryview,
2066        }
2067        return eval(binding["python_loader"], {}, namespace), metadata, raw_payload
2068    decoder = binding.get("decoder") or "memoryview"
2069    options = binding.get("options") or {}
2070    if decoder in ("memoryview", None):
2071        value = data
2072    elif decoder == "bytes":
2073        value = data.tobytes()
2074    elif decoder in ("utf8", "string"):
2075        encoding = options.get("encoding", "utf-8")
2076        errors = options.get("errors", "strict")
2077        value = data.tobytes().decode(encoding, errors)
2078    elif decoder in ("float32", "f32"):
2079        import struct as _struct
2080        fmt = options.get("struct_format", "<f")
2081        value = _struct.unpack(fmt, data.tobytes())[0]
2082    elif decoder in ("float64", "f64"):
2083        import struct as _struct
2084        fmt = options.get("struct_format", "<d")
2085        value = _struct.unpack(fmt, data.tobytes())[0]
2086    elif decoder in ("int32", "i32"):
2087        import struct as _struct
2088        fmt = options.get("struct_format", "<i")
2089        value = _struct.unpack(fmt, data.tobytes())[0]
2090    elif decoder in ("uint32", "u32"):
2091        import struct as _struct
2092        fmt = options.get("struct_format", "<I")
2093        value = _struct.unpack(fmt, data.tobytes())[0]
2094    elif decoder in ("int64", "i64"):
2095        byteorder = options.get("byteorder", "little")
2096        signed = bool(options.get("signed", True))
2097        value = int.from_bytes(data.tobytes(), byteorder=byteorder, signed=signed)
2098    elif decoder in ("bool", "boolean"):
2099        byteorder = options.get("byteorder", "little")
2100        value = bool(int.from_bytes(data.tobytes(), byteorder=byteorder, signed=False))
2101    elif decoder == "json":
2102        import json as _json
2103        encoding = options.get("encoding", "utf-8")
2104        errors = options.get("errors", "strict")
2105        value = _json.loads(data.tobytes().decode(encoding, errors))
2106    elif decoder in ("base64", "b64"):
2107        import base64 as _base64
2108        raw_bytes = data.tobytes()
2109        altchars = options.get("altchars")
2110        if altchars is not None and not isinstance(altchars, (bytes, bytearray)):
2111            altchars = str(altchars).encode()
2112        validate = bool(options.get("validate", False))
2113        decoded = _base64.b64decode(raw_bytes, altchars=altchars, validate=validate)
2114        if options.get("as_memoryview"):
2115            value = memoryview(decoded)
2116        elif options.get("as_bytearray"):
2117            value = bytearray(decoded)
2118        else:
2119            value = decoded
2120    elif decoder in ("bytearray", "bytesarray"):
2121        value = bytearray(data.tobytes())
2122    else:
2123        value = data
2124    return value, metadata, raw_payload
2125
2126
2127def __aardvark__materialize_table(spec, value):
2128    if value is None:
2129        return None, None
2130    columns = spec.get("columns") or []
2131    orient = (spec.get("orient") or "records").lower()
2132    if orient not in ("records", "columns"):
2133        raise ValueError(f"unsupported rawctx table orientation: {orient}")
2134    column_schema = {}
2135    for column in columns:
2136        name = column.get("name")
2137        if not name:
2138            continue
2139        column_meta = {}
2140        if "dtype" in column:
2141            column_meta["dtype"] = column["dtype"]
2142        if "nullable" in column:
2143            column_meta["nullable"] = column["nullable"]
2144        if "metadata" in column and isinstance(column.get("metadata"), dict):
2145            column_meta["metadata"] = column["metadata"]
2146        if "shape" in column:
2147            column_meta["shape"] = column["shape"]
2148        if "manifest" in column and isinstance(column.get("manifest"), dict):
2149            column_meta["manifest"] = column["manifest"]
2150        if column_meta:
2151            column_schema[name] = column_meta
2152    table_metadata = {"orient": orient}
2153    if column_schema:
2154        table_metadata["schema"] = {"columns": column_schema}
2155    if orient == "records":
2156        if not isinstance(value, (list, tuple)):
2157            raise TypeError("rawctx table expects a list of record dicts")
2158        result = {column.get("name"): [] for column in columns}
2159        for record in value:
2160            if not isinstance(record, dict):
2161                raise TypeError("rawctx table records must be dictionaries")
2162            for column in columns:
2163                name = column.get("name")
2164                if not name:
2165                    continue
2166                if name in record:
2167                    result[name].append(record[name])
2168                elif column.get("optional") or column.get("default") is not None:
2169                    result[name].append(column.get("default"))
2170                else:
2171                    raise KeyError(f"rawctx table column '{name}' is required")
2172        __aardvark__apply_column_decoders(result, columns)
2173        return result, table_metadata
2174    # columns orient
2175    if not isinstance(value, dict):
2176        raise TypeError("rawctx table expects a dict of columns")
2177    result = {}
2178    for column in columns:
2179        name = column.get("name")
2180        if not name:
2181            continue
2182        if name in value:
2183            result[name] = value[name]
2184        elif column.get("optional") or column.get("default") is not None:
2185            result[name] = column.get("default")
2186        else:
2187            raise KeyError(f"rawctx table column '{name}' is required")
2188    __aardvark__apply_column_decoders(result, columns)
2189    return result, table_metadata
2190
2191
2192def __aardvark__apply_column_decoders(result, columns):
2193    for column in columns:
2194        name = column.get("name")
2195        if not name or name not in result:
2196            continue
2197        decoder = column.get("decoder")
2198        if not decoder:
2199            continue
2200        series = result[name]
2201        options = column.get("options") or {}
2202        if isinstance(series, list):
2203            converted = []
2204            for item in series:
2205                payload = __aardvark__prepare_decoder_payload(item, options)
2206                if payload is None:
2207                    converted.append(item)
2208                    continue
2209                value, _, _ = __aardvark__decode_scalar({"decoder": decoder, "options": options}, payload)
2210                converted.append(value)
2211            result[name] = converted
2212        else:
2213            payload = __aardvark__prepare_decoder_payload(series, options)
2214            if payload is None:
2215                continue
2216            value, _, _ = __aardvark__decode_scalar({"decoder": decoder, "options": options}, payload)
2217            result[name] = value
2218
2219
2220def __aardvark__prepare_decoder_payload(item, options):
2221    if isinstance(item, memoryview):
2222        return {"data": item, "metadata": None}
2223    if isinstance(item, bytes):
2224        return {"data": memoryview(item), "metadata": None}
2225    if isinstance(item, bytearray):
2226        return {"data": memoryview(bytes(item)), "metadata": None}
2227    if isinstance(item, str):
2228        encoding = options.get("encoding", "utf-8") if isinstance(options, dict) else "utf-8"
2229        return {"data": memoryview(item.encode(encoding)), "metadata": None}
2230    return None
2231
2232def __aardvark__apply_outputs(spec, result):
2233    if not spec:
2234        return result, False
2235    if isinstance(spec, dict):
2236        return __aardvark__apply_single_output(spec, result)
2237    if not isinstance(spec, (list, tuple)):
2238        raise TypeError("rawctx outputs must be a dict or list of dicts")
2239    final_result = result
2240    handled_any = False
2241    for item in spec:
2242        if item is None:
2243            continue
2244        candidate, handled = __aardvark__apply_single_output(item, result)
2245        if handled:
2246            handled_any = True
2247            final_result = candidate
2248    return final_result, handled_any
2249
2250
2251def __aardvark__apply_single_output(spec, result):
2252    if not spec:
2253        return result, False
2254    if not isinstance(spec, dict):
2255        raise TypeError("rawctx output spec must be a dict")
2256    mode = spec.get("mode") or "publish-buffer"
2257    if mode != "publish-buffer":
2258        return result, False
2259    when_none = spec.get("when_none", "skip")
2260    if result is None:
2261        if when_none == "error":
2262            raise ValueError("rawctx output requires a non-None result")
2263        if when_none == "publish-empty":
2264            data_value = memoryview(b"")
2265        elif when_none == "propagate":
2266            return None, False
2267        else:
2268            return None, False
2269    else:
2270        data_value = result
2271    metadata = spec.get("metadata")
2272    if spec.get("python_transform"):
2273        namespace = {
2274            "result": result,
2275            "metadata": metadata,
2276            "memoryview": memoryview,
2277        }
2278        transformed = eval(spec["python_transform"], {}, namespace)
2279        if isinstance(transformed, tuple) and len(transformed) == 2:
2280            data_value, metadata = transformed
2281        else:
2282            data_value = transformed
2283    transform = spec.get("transform", "memoryview")
2284    if transform == "memoryview":
2285        if not isinstance(data_value, memoryview):
2286            if isinstance(data_value, (bytes, bytearray)):
2287                data_value = memoryview(data_value)
2288            else:
2289                try:
2290                    data_value = memoryview(data_value)
2291                except TypeError:
2292                    data_value = memoryview(bytes(data_value))
2293    elif transform == "bytes":
2294        if not isinstance(data_value, memoryview):
2295            if isinstance(data_value, (bytes, bytearray)):
2296                data_value = memoryview(data_value)
2297            else:
2298                try:
2299                    data_value = memoryview(data_value)
2300                except TypeError:
2301                    data_value = memoryview(bytes(data_value))
2302
2303        try:
2304            cast_view = data_value.cast("B")
2305        except (TypeError, ValueError):
2306            cast_view = None
2307
2308        if cast_view is not None and cast_view.contiguous:
2309            data_value = cast_view
2310        else:
2311            source_view = cast_view if cast_view is not None else data_value
2312            data_value = memoryview(source_view.tobytes())
2313    elif transform == "utf8":
2314        if not isinstance(data_value, str):
2315            raise TypeError("rawctx output expected str for utf8 transform")
2316        encoding = spec.get("encoding", "utf-8")
2317        data_value = memoryview(data_value.encode(encoding))
2318    elif transform == "identity":
2319        pass
2320    else:
2321        raise ValueError(f"unsupported rawctx output transform: {transform}")
2322    publish_id = spec.get("id")
2323    if not publish_id:
2324        raise ValueError("rawctx output publish-buffer requires an id")
2325    from js import globalThis as _js
2326    _js.__aardvarkPublishBuffer(publish_id, data_value, metadata)
2327    behaviour = spec.get("return_behavior") or "none"
2328    if behaviour == "original":
2329        return result, True
2330    if behaviour == "buffer":
2331        return data_value, True
2332    return None, True
2333
2334_module_name, _, _func_name = (__aardvark_rawctx_spec.get("entrypoint") or "").partition(":")
2335if _module_name and _func_name:
2336    _inputs = __aardvark_rawctx_spec.get("inputs") or []
2337    _output_specs = __aardvark_rawctx_spec.get("outputs") or []
2338    _legacy_output_spec = __aardvark_rawctx_spec.get("output")
2339    if not _output_specs and _legacy_output_spec:
2340        _output_specs = [_legacy_output_spec]
2341    if _inputs or _output_specs:
2342        _module = importlib.import_module(_module_name)
2343        _target = getattr(_module, _func_name)
2344
2345        def __aardvark_rawctx_wrapper(
2346            __aardvark_target=_target,
2347            __aardvark_inputs=_inputs,
2348            __aardvark_outputs=tuple(_output_specs),
2349        ):
2350            source = getattr(builtins, "__aardvark_rawctx_inputs", {})
2351            args = []
2352            kwargs = {}
2353            for binding in __aardvark_inputs:
2354                payload = source.get(binding["field"])
2355                if payload is None:
2356                    if "default" in binding:
2357                        value = binding["default"]
2358                        metadata = None
2359                        raw_payload = None
2360                    elif binding.get("optional"):
2361                        value = None
2362                        metadata = None
2363                        raw_payload = None
2364                    else:
2365                        raise KeyError(f"rawctx input '{binding['field']}' is required")
2366                else:
2367                    value, metadata, raw_payload = __aardvark__decode_rawctx(binding, payload)
2368                    if value is None and "default" in binding:
2369                        value = binding["default"]
2370                if binding.get("metadata_arg"):
2371                    kwargs[binding["metadata_arg"]] = metadata
2372                if binding.get("raw_arg"):
2373                    kwargs[binding["raw_arg"]] = payload
2374                arg_name = binding.get("arg")
2375                if arg_name is not None:
2376                    mode = binding.get("mode", "keyword")
2377                    if mode == "positional":
2378                        args.append(value)
2379                    else:
2380                        kwargs[arg_name] = value
2381            result = __aardvark_target(*args, **kwargs)
2382            result, _handled = __aardvark__apply_outputs(__aardvark_outputs, result)
2383            return result
2384
2385        setattr(_module, _func_name, __aardvark_rawctx_wrapper)
2386        del __aardvark_rawctx_wrapper, _module, _target, _inputs, _output_specs, _legacy_output_spec
2387
2388del __aardvark_rawctx_spec
2389"#;
2390
2391fn publish_rawctx_inputs(runtime: &mut JsRuntime, inputs: &[RawCtxInput]) -> Result<()> {
2392    runtime.with_context(|scope, _| {
2393        let global = scope.get_current_context().global(scope);
2394
2395        if let Some(clear_value) = global.get(
2396            scope,
2397            v8::String::new(scope, "__aardvarkClearInputBuffers")
2398                .ok_or_else(|| PyRunnerError::Execution("failed to allocate clear key".into()))?
2399                .into(),
2400        ) {
2401            if let Ok(clear_fn) = v8::Local::<v8::Function>::try_from(clear_value) {
2402                let _ = clear_fn.call(scope, global.into(), &[]);
2403            }
2404        }
2405
2406        let register_key = v8::String::new(scope, "__aardvarkRegisterInputBuffer")
2407            .ok_or_else(|| PyRunnerError::Execution("failed to allocate register key".into()))?;
2408        let register_value = global.get(scope, register_key.into()).ok_or_else(|| {
2409            PyRunnerError::Execution("__aardvarkRegisterInputBuffer is not defined".into())
2410        })?;
2411        let register_fn = v8::Local::<v8::Function>::try_from(register_value).map_err(|_| {
2412            PyRunnerError::Execution("__aardvarkRegisterInputBuffer is not a function".into())
2413        })?;
2414
2415        for input in inputs {
2416            let name_value = v8::String::new(scope, &input.name).ok_or_else(|| {
2417                PyRunnerError::Execution("failed to allocate input buffer name".into())
2418            })?;
2419
2420            let vec = input.buffer.clone().to_vec();
2421            let backing = v8::ArrayBuffer::new_backing_store_from_vec(vec);
2422            let shared = backing.make_shared();
2423            let array_buffer = v8::ArrayBuffer::with_backing_store(scope, &shared);
2424            let typed = v8::Uint8Array::new(scope, array_buffer, 0, input.buffer.len())
2425                .ok_or_else(|| {
2426                    PyRunnerError::Execution(
2427                        "failed to allocate Uint8Array for input buffer".into(),
2428                    )
2429                })?;
2430
2431            let metadata_value: v8::Local<v8::Value> = if let Some(meta) = &input.metadata {
2432                let meta_json = meta.to_json_value()?;
2433                let meta_str = serde_json::to_string(&meta_json).map_err(|err| {
2434                    PyRunnerError::Execution(format!("failed to serialize rawctx metadata: {err}"))
2435                })?;
2436                let meta_js_str = v8::String::new(scope, &meta_str).ok_or_else(|| {
2437                    PyRunnerError::Execution("failed to allocate metadata json string".into())
2438                })?;
2439                v8::json::parse(scope, meta_js_str).ok_or_else(|| {
2440                    PyRunnerError::Execution("failed to parse metadata JSON into JS value".into())
2441                })?
2442            } else {
2443                v8::undefined(scope).into()
2444            };
2445
2446            register_fn
2447                .call(
2448                    scope,
2449                    global.into(),
2450                    &[name_value.into(), typed.into(), metadata_value],
2451                )
2452                .ok_or_else(|| {
2453                    PyRunnerError::Execution("registering rawctx input buffer failed".into())
2454                })?;
2455        }
2456
2457        Ok(())
2458    })
2459}
2460
2461fn clear_rawctx_inputs(runtime: &mut JsRuntime) -> Result<()> {
2462    runtime.with_context(|scope, _| {
2463        let global = scope.get_current_context().global(scope);
2464        let clear_key = v8::String::new(scope, "__aardvarkClearInputBuffers")
2465            .ok_or_else(|| PyRunnerError::Execution("failed to allocate clear key".into()))?;
2466        if let Some(value) = global.get(scope, clear_key.into()) {
2467            if let Ok(clear_fn) = v8::Local::<v8::Function>::try_from(value) {
2468                let _ = clear_fn.call(scope, global.into(), &[]);
2469            }
2470        }
2471        Ok(())
2472    })
2473}