shape_runtime/stdlib/arrow_module.rs
1//! Native `arrow` module for reading Arrow IPC files.
2//!
3//! Exports: arrow.read_table, arrow.read_tables, arrow.metadata
4//!
5//! All operations require `FsRead` permission.
6//!
7//! W17-out-of-bundle-A-followups (2026-05-12): `arrow.read_tables`
8//! currently surfaces structured-stop. Per the C+ precedent in
9//! `phase-2d-playbook.md` §3, `Array<DataTable>` is genuinely
10//! homogeneous in `HeapKind::DataTable` — the natural Q25.A specialized
11//! variant is a `TypedArrayData::DataTable(Arc<TypedBuffer<Arc<DataTable>>>)`
12//! arm, which is out of scope for this sub-cluster (the prompt
13//! explicitly forbids new HeapKind variants and an added
14//! TypedArrayData variant would require ~40 exhaustive-match updates).
15//! The surface message names the natural follow-up sub-cluster so
16//! production callers see the structured error rather than a panic.
17//!
18//! Phase 2d Array cluster migration (historical context, 2026-05-07):
19//! ported to the typed marshal layer. `arrow.read_tables` returned
20//! `Array<DataTable>` via `ConcreteReturn::ArrayHeapValue` — each
21//! element was an `Arc<HeapValue::DataTable>`. Post-Q25.A,
22//! `build_specialized_from_heap_arcs` does not have a DataTable arm,
23//! so the marshal projection surfaces a structured error.
24//!
25//! Tests deferred — ValueWord-based test fixtures can't compile and
26//! aren't reconstructed until the shape-vm cascade provides a typed
27//! test harness, mirroring the file_ops migration in commit d716482.
28
29use crate::marshal::register_typed_fn_1;
30use crate::module_exports::ModuleExports;
31use crate::typed_module_exports::{ConcreteReturn, ConcreteType, TypedReturn};
32use arrow_ipc::reader::FileReader;
33use std::io::Cursor;
34use std::sync::Arc;
35
36/// Create the `arrow` module with Arrow IPC file reading functions.
37pub fn create_arrow_module() -> ModuleExports {
38 let mut module = ModuleExports::new("std::core::arrow");
39 module.description = "Arrow IPC columnar file reading".to_string();
40
41 // arrow.read_table(path: string) -> Result<DataTable, string>
42 register_typed_fn_1::<_, Arc<String>>(
43 &mut module,
44 "read_table",
45 "Read the first record batch from an Arrow IPC file",
46 "path",
47 "string",
48 ConcreteType::Result2(
49 Box::new(ConcreteType::DataTable),
50 Box::new(ConcreteType::String),
51 ),
52 |path, ctx| {
53 crate::module_exports::check_fs_permission(
54 ctx,
55 shape_abi_v1::Permission::FsRead,
56 path.as_str(),
57 )?;
58
59 let bytes = std::fs::read(path.as_str())
60 .map_err(|e| format!("arrow.read_table() failed to read '{}': {}", path, e))?;
61
62 let dt = crate::wire_conversion::datatable_from_ipc_bytes(&bytes, None, None)?;
63 Ok(TypedReturn::Ok(ConcreteReturn::DataTable(Arc::new(dt))))
64 },
65 );
66
67 // arrow.read_tables(path: string) -> Result<Array<DataTable>, string>
68 //
69 // W17-out-of-bundle-A-followups (2026-05-12): surface-and-stop. The
70 // `Array<DataTable>` return shape is genuinely homogeneous in
71 // `HeapKind::DataTable` — the natural Q25.A specialized variant is
72 // `TypedArrayData::DataTable(Arc<TypedBuffer<Arc<DataTable>>>)`, but
73 // adding a TypedArrayData variant is out of bundle-A-followups
74 // scope (prompt forbids new HeapKind variants AND a new
75 // TypedArrayData arm cascades through ~40 exhaustive matches).
76 // Body returns a structured `Err` payload so callers see the
77 // tracked follow-up rather than a marshal-layer panic.
78 register_typed_fn_1::<_, Arc<String>>(
79 &mut module,
80 "read_tables",
81 "Read all record batches from an Arrow IPC file",
82 "path",
83 "string",
84 ConcreteType::Result2(
85 Box::new(ConcreteType::ArrayHeapValue("Array<DataTable>".to_string())),
86 Box::new(ConcreteType::String),
87 ),
88 |path, _ctx| {
89 let _ = path; // suppress unused; the SURFACE response is path-independent
90 // phase-2d-hardening:(f) — arrow.read_tables surface-and-stop:
91 // Array<DataTable> needs TypedArrayData::DataTable variant
92 // (homogeneous-in-HeapKind::DataTable case per ADR-006 §2.7.24
93 // Q25.A spec list). Tracked as
94 // W17-typed-carrier-array-datatable follow-up.
95 Err(format!(
96 "arrow.method read_tables() -> SURFACE — `Array<DataTable>` needs a \
97 typed-array-data DataTable specialized variant in ADR-006 \
98 §2.7.24 Q25.A's spec list. Tracked as \
99 W17-typed-carrier-array-datatable follow-up \
100 (out of bundle-A-followups scope: new TypedArrayData arm \
101 cascades through exhaustive matches across ~40 files). \
102 ADR-006 §2.7.24 Q25.A."
103 ))
104 },
105 );
106
107 // arrow.metadata(path: string) -> Result<HashMap<string, string>, string>
108 register_typed_fn_1::<_, Arc<String>>(
109 &mut module,
110 "metadata",
111 "Read schema metadata from an Arrow IPC file header",
112 "path",
113 "string",
114 ConcreteType::Result2(
115 Box::new(ConcreteType::HashMapStringString),
116 Box::new(ConcreteType::String),
117 ),
118 |path, ctx| {
119 crate::module_exports::check_fs_permission(
120 ctx,
121 shape_abi_v1::Permission::FsRead,
122 path.as_str(),
123 )?;
124
125 let bytes = std::fs::read(path.as_str())
126 .map_err(|e| format!("arrow.metadata() failed to read '{}': {}", path, e))?;
127
128 let cursor = Cursor::new(bytes);
129 let reader = FileReader::try_new(cursor, None)
130 .map_err(|e| format!("arrow.metadata() invalid IPC file: {}", e))?;
131
132 let schema = reader.schema();
133 let meta = schema.metadata();
134
135 let pairs: Vec<(String, String)> = meta
136 .iter()
137 .map(|(k, v)| (k.clone(), v.clone()))
138 .collect();
139 Ok(TypedReturn::Ok(ConcreteReturn::HashMapStringString(pairs)))
140 },
141 );
142
143 module
144}