Skip to main content

shape_runtime/stdlib/
arrow_module.rs

1//! Native `arrow` module for reading Arrow IPC files.
2//!
3//! Exports: arrow.read_table, arrow.read_tables, arrow.metadata
4//!
5//! All operations require `FsRead` permission.
6//!
7//! W17-out-of-bundle-A-followups (2026-05-12): `arrow.read_tables`
8//! currently surfaces structured-stop. Per the C+ precedent in
9//! `phase-2d-playbook.md` §3, `Array<DataTable>` is genuinely
10//! homogeneous in `HeapKind::DataTable` — the natural Q25.A specialized
11//! variant is a `TypedArrayData::DataTable(Arc<TypedBuffer<Arc<DataTable>>>)`
12//! arm, which is out of scope for this sub-cluster (the prompt
13//! explicitly forbids new HeapKind variants and an added
14//! TypedArrayData variant would require ~40 exhaustive-match updates).
15//! The surface message names the natural follow-up sub-cluster so
16//! production callers see the structured error rather than a panic.
17//!
18//! Phase 2d Array cluster migration (historical context, 2026-05-07):
19//! ported to the typed marshal layer. `arrow.read_tables` returned
20//! `Array<DataTable>` via `ConcreteReturn::ArrayHeapValue` — each
21//! element was an `Arc<HeapValue::DataTable>`. Post-Q25.A,
22//! `build_specialized_from_heap_arcs` does not have a DataTable arm,
23//! so the marshal projection surfaces a structured error.
24//!
25//! Tests deferred — ValueWord-based test fixtures can't compile and
26//! aren't reconstructed until the shape-vm cascade provides a typed
27//! test harness, mirroring the file_ops migration in commit d716482.
28
29use crate::marshal::register_typed_fn_1;
30use crate::module_exports::ModuleExports;
31use crate::typed_module_exports::{ConcreteReturn, ConcreteType, TypedReturn};
32use arrow_ipc::reader::FileReader;
33use std::io::Cursor;
34use std::sync::Arc;
35
36/// Create the `arrow` module with Arrow IPC file reading functions.
37pub fn create_arrow_module() -> ModuleExports {
38    let mut module = ModuleExports::new("std::core::arrow");
39    module.description = "Arrow IPC columnar file reading".to_string();
40
41    // arrow.read_table(path: string) -> Result<DataTable, string>
42    register_typed_fn_1::<_, Arc<String>>(
43        &mut module,
44        "read_table",
45        "Read the first record batch from an Arrow IPC file",
46        "path",
47        "string",
48        ConcreteType::Result2(
49            Box::new(ConcreteType::DataTable),
50            Box::new(ConcreteType::String),
51        ),
52        |path, ctx| {
53            crate::module_exports::check_fs_permission(
54                ctx,
55                shape_abi_v1::Permission::FsRead,
56                path.as_str(),
57            )?;
58
59            let bytes = std::fs::read(path.as_str())
60                .map_err(|e| format!("arrow.read_table() failed to read '{}': {}", path, e))?;
61
62            let dt = crate::wire_conversion::datatable_from_ipc_bytes(&bytes, None, None)?;
63            Ok(TypedReturn::Ok(ConcreteReturn::DataTable(Arc::new(dt))))
64        },
65    );
66
67    // arrow.read_tables(path: string) -> Result<Array<DataTable>, string>
68    //
69    // W17-out-of-bundle-A-followups (2026-05-12): surface-and-stop. The
70    // `Array<DataTable>` return shape is genuinely homogeneous in
71    // `HeapKind::DataTable` — the natural Q25.A specialized variant is
72    // `TypedArrayData::DataTable(Arc<TypedBuffer<Arc<DataTable>>>)`, but
73    // adding a TypedArrayData variant is out of bundle-A-followups
74    // scope (prompt forbids new HeapKind variants AND a new
75    // TypedArrayData arm cascades through ~40 exhaustive matches).
76    // Body returns a structured `Err` payload so callers see the
77    // tracked follow-up rather than a marshal-layer panic.
78    register_typed_fn_1::<_, Arc<String>>(
79        &mut module,
80        "read_tables",
81        "Read all record batches from an Arrow IPC file",
82        "path",
83        "string",
84        ConcreteType::Result2(
85            Box::new(ConcreteType::ArrayHeapValue("Array<DataTable>".to_string())),
86            Box::new(ConcreteType::String),
87        ),
88        |path, _ctx| {
89            let _ = path; // suppress unused; the SURFACE response is path-independent
90            // phase-2d-hardening:(f) — arrow.read_tables surface-and-stop:
91            // Array<DataTable> needs TypedArrayData::DataTable variant
92            // (homogeneous-in-HeapKind::DataTable case per ADR-006 §2.7.24
93            // Q25.A spec list). Tracked as
94            // W17-typed-carrier-array-datatable follow-up.
95            Err(format!(
96                "arrow.method read_tables() -> SURFACE — `Array<DataTable>` needs a \
97                 typed-array-data DataTable specialized variant in ADR-006 \
98                 §2.7.24 Q25.A's spec list. Tracked as \
99                 W17-typed-carrier-array-datatable follow-up \
100                 (out of bundle-A-followups scope: new TypedArrayData arm \
101                 cascades through exhaustive matches across ~40 files). \
102                 ADR-006 §2.7.24 Q25.A."
103            ))
104        },
105    );
106
107    // arrow.metadata(path: string) -> Result<HashMap<string, string>, string>
108    register_typed_fn_1::<_, Arc<String>>(
109        &mut module,
110        "metadata",
111        "Read schema metadata from an Arrow IPC file header",
112        "path",
113        "string",
114        ConcreteType::Result2(
115            Box::new(ConcreteType::HashMapStringString),
116            Box::new(ConcreteType::String),
117        ),
118        |path, ctx| {
119            crate::module_exports::check_fs_permission(
120                ctx,
121                shape_abi_v1::Permission::FsRead,
122                path.as_str(),
123            )?;
124
125            let bytes = std::fs::read(path.as_str())
126                .map_err(|e| format!("arrow.metadata() failed to read '{}': {}", path, e))?;
127
128            let cursor = Cursor::new(bytes);
129            let reader = FileReader::try_new(cursor, None)
130                .map_err(|e| format!("arrow.metadata() invalid IPC file: {}", e))?;
131
132            let schema = reader.schema();
133            let meta = schema.metadata();
134
135            let pairs: Vec<(String, String)> = meta
136                .iter()
137                .map(|(k, v)| (k.clone(), v.clone()))
138                .collect();
139            Ok(TypedReturn::Ok(ConcreteReturn::HashMapStringString(pairs)))
140        },
141    );
142
143    module
144}