Skip to main content

vgi_rpc/
arrow_type.rs

1//! `VgiArrow` — the bridge between idiomatic Rust types and Arrow.
2//!
3//! The proc-macro layer (`vgi-rpc-macros`) generates code that maps each
4//! RPC method parameter and return value through this trait so user
5//! handler signatures stay free of Arrow types. Cross-language wire
6//! compatibility with Python `vgi_rpc` is preserved by mirroring the
7//! Arrow `DataType` choices Python's `ArrowSerializableDataclass` uses.
8//!
9//! # Implementing for your own types
10//!
11//! Use `#[derive(VgiArrow)]` from `vgi-rpc-macros` for plain structs.
12//! Hand-implement only when the wire format must diverge from
13//! Python-canonical defaults.
14
15use std::sync::Arc;
16
17use arrow_array::{
18    builder::BinaryBuilder, Array, ArrayRef, BinaryArray, BooleanArray, FixedSizeBinaryArray,
19    Float32Array, Float64Array, Int16Array, Int32Array, Int64Array, Int8Array, LargeBinaryArray,
20    LargeStringArray, ListArray, MapArray, StringArray, UInt16Array, UInt32Array, UInt64Array,
21    UInt8Array,
22};
23use arrow_schema::{DataType, Field};
24
25use crate::errors::{Result, RpcError};
26
27/// Round-trip a Rust value through a single Arrow column.
28///
29/// `arrow_data_type()` returns the column's `DataType`; `nullable()`
30/// indicates whether the column accepts nulls (set by `Option<T>`).
31/// `read()` extracts a value at `idx`; `build_singleton()` builds a
32/// 1-row array carrying `value`.
33///
34/// All builtin scalar / collection / option impls in this module are
35/// `Send + Sync` and allocate at most once per call.
36pub trait VgiArrow: Sized {
37    /// The Arrow `DataType` carrying values of this Rust type.
38    fn arrow_data_type() -> DataType;
39
40    /// Whether the column should be flagged nullable. The default is
41    /// `false`; the `Option<T>` blanket impl returns `true`.
42    fn nullable() -> bool {
43        false
44    }
45
46    /// Wire-format type name surfaced via `__describe__` metadata.
47    /// Mirrors Python: `"str"`, `"int"`, `"list[int]"`, `"int | None"`.
48    fn describe_name() -> String;
49
50    /// Pull this value out of `arr` at row `idx`. Errors with a
51    /// `RpcError::type_error` if `arr`'s concrete type doesn't match
52    /// `Self::arrow_data_type()`.
53    fn read(arr: &dyn Array, idx: usize) -> Result<Self>;
54
55    /// Build a 1-row `ArrayRef` containing `value`.
56    fn build_singleton(value: Self) -> Result<ArrayRef>;
57}
58
59/// Helper: typed downcast or `RpcError::type_error("expected …")`.
60fn as_array<'a, A: Array + 'static>(arr: &'a dyn Array, expected: &str) -> Result<&'a A> {
61    arr.as_any()
62        .downcast_ref::<A>()
63        .ok_or_else(|| RpcError::type_error(format!("expected {expected} array")))
64}
65
66// ---------------------------------------------------------------------------
67// Scalars
68// ---------------------------------------------------------------------------
69
70impl VgiArrow for String {
71    fn arrow_data_type() -> DataType {
72        DataType::Utf8
73    }
74    fn describe_name() -> String {
75        "str".into()
76    }
77    fn read(arr: &dyn Array, idx: usize) -> Result<Self> {
78        // Accept Utf8 directly, plus DictionaryArray<Int16|Int32, Utf8>
79        // — Python's enum-typed params arrive as dict-encoded strings.
80        if let Some(s) = arr.as_any().downcast_ref::<StringArray>() {
81            return Ok(s.value(idx).to_string());
82        }
83        if let Some(d) = arr
84            .as_any()
85            .downcast_ref::<arrow_array::DictionaryArray<arrow_array::types::Int16Type>>()
86        {
87            let key = d.keys().value(idx);
88            let values = as_array::<StringArray>(d.values().as_ref(), "Utf8 (dict values)")?;
89            return Ok(values.value(key as usize).to_string());
90        }
91        if let Some(d) = arr
92            .as_any()
93            .downcast_ref::<arrow_array::DictionaryArray<arrow_array::types::Int32Type>>()
94        {
95            let key = d.keys().value(idx);
96            let values = as_array::<StringArray>(d.values().as_ref(), "Utf8 (dict values)")?;
97            return Ok(values.value(key as usize).to_string());
98        }
99        Err(RpcError::type_error(
100            "expected Utf8 (or DictionaryArray<Int16|Int32, Utf8>) array",
101        ))
102    }
103    fn build_singleton(value: Self) -> Result<ArrayRef> {
104        Ok(Arc::new(StringArray::from(vec![value])))
105    }
106}
107
108impl VgiArrow for i64 {
109    fn arrow_data_type() -> DataType {
110        DataType::Int64
111    }
112    fn describe_name() -> String {
113        "int".into()
114    }
115    fn read(arr: &dyn Array, idx: usize) -> Result<Self> {
116        if let Some(a) = arr.as_any().downcast_ref::<Int64Array>() {
117            return Ok(a.value(idx));
118        }
119        if let Some(a) = arr.as_any().downcast_ref::<Int32Array>() {
120            return Ok(a.value(idx) as i64);
121        }
122        Err(RpcError::type_error("expected Int64/Int32 array"))
123    }
124    fn build_singleton(value: Self) -> Result<ArrayRef> {
125        Ok(Arc::new(Int64Array::from(vec![value])))
126    }
127}
128
129impl VgiArrow for i32 {
130    fn arrow_data_type() -> DataType {
131        DataType::Int32
132    }
133    fn describe_name() -> String {
134        "int".into()
135    }
136    fn read(arr: &dyn Array, idx: usize) -> Result<Self> {
137        if let Some(a) = arr.as_any().downcast_ref::<Int32Array>() {
138            return Ok(a.value(idx));
139        }
140        if let Some(a) = arr.as_any().downcast_ref::<Int64Array>() {
141            return Ok(a.value(idx) as i32);
142        }
143        Err(RpcError::type_error("expected Int32/Int64 array"))
144    }
145    fn build_singleton(value: Self) -> Result<ArrayRef> {
146        Ok(Arc::new(Int32Array::from(vec![value])))
147    }
148}
149
150// Smaller / unsigned integer widths. Python's `Annotated[int, ArrowType(pa.int8())]`
151// shows up on the wire as `Int8` etc.; we expose them as the matching
152// Rust primitive so user signatures are natural.
153macro_rules! impl_int_vgi {
154    ($t:ty, $arr:ty, $dt:expr) => {
155        impl VgiArrow for $t {
156            fn arrow_data_type() -> DataType {
157                $dt
158            }
159            fn describe_name() -> String {
160                "int".into()
161            }
162            fn read(arr: &dyn Array, idx: usize) -> Result<Self> {
163                Ok(as_array::<$arr>(arr, stringify!($t))?.value(idx))
164            }
165            fn build_singleton(value: Self) -> Result<ArrayRef> {
166                Ok(Arc::new(<$arr>::from(vec![value])))
167            }
168        }
169    };
170}
171impl_int_vgi!(i8, Int8Array, DataType::Int8);
172impl_int_vgi!(i16, Int16Array, DataType::Int16);
173impl_int_vgi!(u8, UInt8Array, DataType::UInt8);
174impl_int_vgi!(u16, UInt16Array, DataType::UInt16);
175impl_int_vgi!(u32, UInt32Array, DataType::UInt32);
176impl_int_vgi!(u64, UInt64Array, DataType::UInt64);
177
178impl VgiArrow for f64 {
179    fn arrow_data_type() -> DataType {
180        DataType::Float64
181    }
182    fn describe_name() -> String {
183        "float".into()
184    }
185    fn read(arr: &dyn Array, idx: usize) -> Result<Self> {
186        if let Some(a) = arr.as_any().downcast_ref::<Float64Array>() {
187            return Ok(a.value(idx));
188        }
189        if let Some(a) = arr.as_any().downcast_ref::<Float32Array>() {
190            return Ok(a.value(idx) as f64);
191        }
192        Err(RpcError::type_error("expected Float64/Float32 array"))
193    }
194    fn build_singleton(value: Self) -> Result<ArrayRef> {
195        Ok(Arc::new(Float64Array::from(vec![value])))
196    }
197}
198
199impl VgiArrow for f32 {
200    fn arrow_data_type() -> DataType {
201        DataType::Float32
202    }
203    fn describe_name() -> String {
204        "float".into()
205    }
206    fn read(arr: &dyn Array, idx: usize) -> Result<Self> {
207        if let Some(a) = arr.as_any().downcast_ref::<Float32Array>() {
208            return Ok(a.value(idx));
209        }
210        if let Some(a) = arr.as_any().downcast_ref::<Float64Array>() {
211            return Ok(a.value(idx) as f32);
212        }
213        Err(RpcError::type_error("expected Float32/Float64 array"))
214    }
215    fn build_singleton(value: Self) -> Result<ArrayRef> {
216        Ok(Arc::new(Float32Array::from(vec![value])))
217    }
218}
219
220impl VgiArrow for bool {
221    fn arrow_data_type() -> DataType {
222        DataType::Boolean
223    }
224    fn describe_name() -> String {
225        "bool".into()
226    }
227    fn read(arr: &dyn Array, idx: usize) -> Result<Self> {
228        Ok(as_array::<BooleanArray>(arr, "Boolean")?.value(idx))
229    }
230    fn build_singleton(value: Self) -> Result<ArrayRef> {
231        Ok(Arc::new(BooleanArray::from(vec![value])))
232    }
233}
234
235// ---------------------------------------------------------------------------
236// Bytes (Binary), kept distinct from Vec<u8> via a newtype-style wrapper.
237// ---------------------------------------------------------------------------
238
239/// Newtype indicating a `Vec<u8>` should be carried as Arrow `Binary`,
240/// not `List<UInt8>`. Use this in handler signatures where the wire
241/// type should be `bytes` rather than a list of bytes.
242///
243/// `#[derive(VgiArrow)]` does not auto-pick between the two — there is
244/// no idiomatic Rust signal that `Vec<u8>` in a struct field means
245/// "blob" rather than "byte list", so users opt in via this wrapper.
246#[derive(Clone, Debug, PartialEq, Eq)]
247pub struct Bytes(pub Vec<u8>);
248
249impl From<Vec<u8>> for Bytes {
250    fn from(v: Vec<u8>) -> Self {
251        Self(v)
252    }
253}
254
255impl From<Bytes> for Vec<u8> {
256    fn from(b: Bytes) -> Self {
257        b.0
258    }
259}
260
261impl VgiArrow for Bytes {
262    fn arrow_data_type() -> DataType {
263        DataType::Binary
264    }
265    fn describe_name() -> String {
266        "bytes".into()
267    }
268    fn read(arr: &dyn Array, idx: usize) -> Result<Self> {
269        Ok(Bytes(
270            as_array::<BinaryArray>(arr, "Binary")?.value(idx).to_vec(),
271        ))
272    }
273    fn build_singleton(value: Self) -> Result<ArrayRef> {
274        let mut b = BinaryBuilder::new();
275        b.append_value(value.0);
276        Ok(Arc::new(b.finish()))
277    }
278}
279
280// ---------------------------------------------------------------------------
281// Wide-binary / wide-string newtypes.
282// ---------------------------------------------------------------------------
283
284/// `LargeUtf8` (64-bit-offset string array) wire type. Stored as a
285/// regular `String` in user code; the wrapper just tags the wire shape.
286#[derive(Clone, Debug, PartialEq, Eq)]
287pub struct LargeString(pub String);
288
289impl From<String> for LargeString {
290    fn from(s: String) -> Self {
291        Self(s)
292    }
293}
294impl From<LargeString> for String {
295    fn from(s: LargeString) -> Self {
296        s.0
297    }
298}
299
300impl VgiArrow for LargeString {
301    fn arrow_data_type() -> DataType {
302        DataType::LargeUtf8
303    }
304    fn describe_name() -> String {
305        "str".into()
306    }
307    fn read(arr: &dyn Array, idx: usize) -> Result<Self> {
308        Ok(LargeString(
309            as_array::<LargeStringArray>(arr, "LargeUtf8")?
310                .value(idx)
311                .to_string(),
312        ))
313    }
314    fn build_singleton(value: Self) -> Result<ArrayRef> {
315        Ok(Arc::new(LargeStringArray::from(vec![value.0])))
316    }
317}
318
319/// `LargeBinary` wire type. See [`LargeString`].
320#[derive(Clone, Debug, PartialEq, Eq)]
321pub struct LargeBytes(pub Vec<u8>);
322
323impl From<Vec<u8>> for LargeBytes {
324    fn from(v: Vec<u8>) -> Self {
325        Self(v)
326    }
327}
328impl From<LargeBytes> for Vec<u8> {
329    fn from(b: LargeBytes) -> Self {
330        b.0
331    }
332}
333
334impl VgiArrow for LargeBytes {
335    fn arrow_data_type() -> DataType {
336        DataType::LargeBinary
337    }
338    fn describe_name() -> String {
339        "bytes".into()
340    }
341    fn read(arr: &dyn Array, idx: usize) -> Result<Self> {
342        Ok(LargeBytes(
343            as_array::<LargeBinaryArray>(arr, "LargeBinary")?
344                .value(idx)
345                .to_vec(),
346        ))
347    }
348    fn build_singleton(value: Self) -> Result<ArrayRef> {
349        let arr = LargeBinaryArray::from_iter_values([value.0.as_slice()]);
350        Ok(Arc::new(arr))
351    }
352}
353
354/// `FixedSizeBinary(N)` wire type carried as `[u8; N]`. The const
355/// generic encodes the width so the schema is fully determined.
356#[derive(Clone, Debug, PartialEq, Eq)]
357pub struct FixedBinary<const N: usize>(pub [u8; N]);
358
359impl<const N: usize> From<[u8; N]> for FixedBinary<N> {
360    fn from(b: [u8; N]) -> Self {
361        Self(b)
362    }
363}
364
365impl<const N: usize> VgiArrow for FixedBinary<N> {
366    fn arrow_data_type() -> DataType {
367        DataType::FixedSizeBinary(N as i32)
368    }
369    fn describe_name() -> String {
370        "bytes".into()
371    }
372    fn read(arr: &dyn Array, idx: usize) -> Result<Self> {
373        let a = as_array::<FixedSizeBinaryArray>(arr, "FixedSizeBinary")?;
374        let raw = a.value(idx);
375        if raw.len() != N {
376            return Err(RpcError::type_error(format!(
377                "FixedSizeBinary width mismatch: expected {N}, got {}",
378                raw.len()
379            )));
380        }
381        let mut out = [0u8; N];
382        out.copy_from_slice(raw);
383        Ok(FixedBinary(out))
384    }
385    fn build_singleton(value: Self) -> Result<ArrayRef> {
386        let arr = FixedSizeBinaryArray::try_from_iter([value.0.as_slice()].into_iter())
387            .map_err(RpcError::from)?;
388        Ok(Arc::new(arr))
389    }
390}
391
392/// Dictionary-encoded `Utf8` (`Dictionary(Int16, Utf8)`) wire type.
393/// On the user-facing side it's just a `String`; the newtype controls
394/// the schema choice.
395#[derive(Clone, Debug, PartialEq, Eq)]
396pub struct DictString(pub String);
397
398impl From<String> for DictString {
399    fn from(s: String) -> Self {
400        Self(s)
401    }
402}
403impl From<DictString> for String {
404    fn from(s: DictString) -> Self {
405        s.0
406    }
407}
408
409impl VgiArrow for DictString {
410    fn arrow_data_type() -> DataType {
411        DataType::Dictionary(Box::new(DataType::Int16), Box::new(DataType::Utf8))
412    }
413    fn describe_name() -> String {
414        "str".into()
415    }
416    fn read(arr: &dyn Array, idx: usize) -> Result<Self> {
417        // Reuse the `String` reader which already accepts both plain
418        // Utf8 and DictionaryArray<Int16|Int32, Utf8>.
419        Ok(DictString(<String as VgiArrow>::read(arr, idx)?))
420    }
421    fn build_singleton(value: Self) -> Result<ArrayRef> {
422        use arrow_array::builder::StringDictionaryBuilder;
423        use arrow_array::types::Int16Type;
424        let mut b = StringDictionaryBuilder::<Int16Type>::new();
425        b.append_value(&value.0);
426        Ok(Arc::new(b.finish()))
427    }
428}
429
430// ---------------------------------------------------------------------------
431// Date / time / duration / decimal — chrono + rust_decimal backed.
432// ---------------------------------------------------------------------------
433
434use arrow_array::{
435    Date32Array, Decimal128Array, DurationMicrosecondArray, Time64MicrosecondArray,
436    TimestampMicrosecondArray,
437};
438
439const DATE32_EPOCH: chrono::NaiveDate = match chrono::NaiveDate::from_ymd_opt(1970, 1, 1) {
440    Some(d) => d,
441    None => panic!("epoch"),
442};
443
444impl VgiArrow for chrono::NaiveDate {
445    fn arrow_data_type() -> DataType {
446        DataType::Date32
447    }
448    fn describe_name() -> String {
449        "date".into()
450    }
451    fn read(arr: &dyn Array, idx: usize) -> Result<Self> {
452        let days = as_array::<Date32Array>(arr, "Date32")?.value(idx);
453        DATE32_EPOCH
454            .checked_add_signed(chrono::Duration::days(days as i64))
455            .ok_or_else(|| RpcError::value_error("date32 out of range"))
456    }
457    fn build_singleton(value: Self) -> Result<ArrayRef> {
458        let days = (value - DATE32_EPOCH).num_days() as i32;
459        Ok(Arc::new(Date32Array::from(vec![days])))
460    }
461}
462
463impl VgiArrow for chrono::NaiveDateTime {
464    fn arrow_data_type() -> DataType {
465        DataType::Timestamp(arrow_schema::TimeUnit::Microsecond, None)
466    }
467    fn describe_name() -> String {
468        "datetime".into()
469    }
470    fn read(arr: &dyn Array, idx: usize) -> Result<Self> {
471        let micros = as_array::<TimestampMicrosecondArray>(arr, "Timestamp(us)")?.value(idx);
472        chrono::DateTime::from_timestamp_micros(micros)
473            .map(|dt| dt.naive_utc())
474            .ok_or_else(|| RpcError::value_error("timestamp out of range"))
475    }
476    fn build_singleton(value: Self) -> Result<ArrayRef> {
477        let micros = value.and_utc().timestamp_micros();
478        Ok(Arc::new(TimestampMicrosecondArray::from(vec![micros])))
479    }
480}
481
482/// UTC-tagged timestamp wire type (`Timestamp(us, tz="UTC")`). User
483/// holds a `chrono::DateTime<Utc>`.
484#[derive(Clone, Debug, PartialEq, Eq)]
485pub struct UtcTimestamp(pub chrono::DateTime<chrono::Utc>);
486
487impl From<chrono::DateTime<chrono::Utc>> for UtcTimestamp {
488    fn from(d: chrono::DateTime<chrono::Utc>) -> Self {
489        Self(d)
490    }
491}
492
493impl VgiArrow for UtcTimestamp {
494    fn arrow_data_type() -> DataType {
495        DataType::Timestamp(arrow_schema::TimeUnit::Microsecond, Some("UTC".into()))
496    }
497    fn describe_name() -> String {
498        "datetime".into()
499    }
500    fn read(arr: &dyn Array, idx: usize) -> Result<Self> {
501        let micros = as_array::<TimestampMicrosecondArray>(arr, "Timestamp(us, UTC)")?.value(idx);
502        chrono::DateTime::<chrono::Utc>::from_timestamp_micros(micros)
503            .map(UtcTimestamp)
504            .ok_or_else(|| RpcError::value_error("UTC timestamp out of range"))
505    }
506    fn build_singleton(value: Self) -> Result<ArrayRef> {
507        let micros = value.0.timestamp_micros();
508        let arr = TimestampMicrosecondArray::from(vec![micros]).with_timezone("UTC");
509        Ok(Arc::new(arr))
510    }
511}
512
513impl VgiArrow for chrono::NaiveTime {
514    fn arrow_data_type() -> DataType {
515        DataType::Time64(arrow_schema::TimeUnit::Microsecond)
516    }
517    fn describe_name() -> String {
518        "time".into()
519    }
520    fn read(arr: &dyn Array, idx: usize) -> Result<Self> {
521        let micros = as_array::<Time64MicrosecondArray>(arr, "Time64(us)")?.value(idx);
522        let secs = (micros / 1_000_000) as u32;
523        let nanos = ((micros % 1_000_000) * 1_000) as u32;
524        chrono::NaiveTime::from_num_seconds_from_midnight_opt(secs, nanos)
525            .ok_or_else(|| RpcError::value_error("time-of-day out of range"))
526    }
527    fn build_singleton(value: Self) -> Result<ArrayRef> {
528        use chrono::Timelike;
529        let micros = (value.num_seconds_from_midnight() as i64) * 1_000_000
530            + (value.nanosecond() as i64) / 1_000;
531        Ok(Arc::new(Time64MicrosecondArray::from(vec![micros])))
532    }
533}
534
535impl VgiArrow for chrono::Duration {
536    fn arrow_data_type() -> DataType {
537        DataType::Duration(arrow_schema::TimeUnit::Microsecond)
538    }
539    fn describe_name() -> String {
540        "duration".into()
541    }
542    fn read(arr: &dyn Array, idx: usize) -> Result<Self> {
543        let micros = as_array::<DurationMicrosecondArray>(arr, "Duration(us)")?.value(idx);
544        Ok(chrono::Duration::microseconds(micros))
545    }
546    fn build_singleton(value: Self) -> Result<ArrayRef> {
547        let micros = value.num_microseconds().ok_or_else(|| {
548            RpcError::value_error("duration overflows microsecond representation")
549        })?;
550        Ok(Arc::new(DurationMicrosecondArray::from(vec![micros])))
551    }
552}
553
554/// Decimal128 with precision 20, scale 4 — matches the conformance
555/// schema. Other (precision, scale) combinations use additional
556/// newtypes if needed.
557#[derive(Clone, Copy, Debug, PartialEq, Eq)]
558pub struct Decimal20_4(pub rust_decimal::Decimal);
559
560impl From<rust_decimal::Decimal> for Decimal20_4 {
561    fn from(d: rust_decimal::Decimal) -> Self {
562        Self(d)
563    }
564}
565
566impl VgiArrow for Decimal20_4 {
567    fn arrow_data_type() -> DataType {
568        DataType::Decimal128(20, 4)
569    }
570    fn describe_name() -> String {
571        "Decimal".into()
572    }
573    fn read(arr: &dyn Array, idx: usize) -> Result<Self> {
574        let raw = as_array::<Decimal128Array>(arr, "Decimal128")?.value(idx);
575        // Decimal128 carries the unscaled integer; scale is in the type.
576        let mut d = rust_decimal::Decimal::from_i128_with_scale(raw, 4);
577        d.normalize_assign();
578        Ok(Decimal20_4(d))
579    }
580    fn build_singleton(value: Self) -> Result<ArrayRef> {
581        let mut d = value.0;
582        d.rescale(4);
583        let raw = d.mantissa();
584        let arr = Decimal128Array::from(vec![raw])
585            .with_precision_and_scale(20, 4)
586            .map_err(RpcError::from)?;
587        Ok(Arc::new(arr))
588    }
589}
590
591// ---------------------------------------------------------------------------
592// Option<T> — wraps any VgiArrow with nullable=true.
593// ---------------------------------------------------------------------------
594
595impl<T> VgiArrow for Option<T>
596where
597    T: VgiArrow,
598{
599    fn arrow_data_type() -> DataType {
600        T::arrow_data_type()
601    }
602    fn nullable() -> bool {
603        true
604    }
605    fn describe_name() -> String {
606        format!("{} | None", T::describe_name())
607    }
608    fn read(arr: &dyn Array, idx: usize) -> Result<Self> {
609        if arr.is_null(idx) {
610            Ok(None)
611        } else {
612            Ok(Some(T::read(arr, idx)?))
613        }
614    }
615    fn build_singleton(value: Self) -> Result<ArrayRef> {
616        match value {
617            Some(v) => T::build_singleton(v),
618            None => build_null_singleton::<T>(),
619        }
620    }
621}
622
623fn build_null_singleton<T: VgiArrow>() -> Result<ArrayRef> {
624    use arrow_array::array::new_null_array;
625    Ok(new_null_array(&T::arrow_data_type(), 1))
626}
627
628// ---------------------------------------------------------------------------
629// Vec<T> — list types.
630//
631// Handled as `List<inner>` for arbitrary VgiArrow inner types. Common
632// scalar inners (i64, i32, f64, f32, bool, String) get fast-path
633// builders; everything else falls back to a generic per-row push that
634// goes through `T::build_singleton`.
635// ---------------------------------------------------------------------------
636
637impl<T> VgiArrow for Vec<T>
638where
639    T: VgiArrow,
640{
641    fn arrow_data_type() -> DataType {
642        DataType::List(Arc::new(Field::new("item", T::arrow_data_type(), true)))
643    }
644    fn describe_name() -> String {
645        format!("list[{}]", T::describe_name())
646    }
647    fn read(arr: &dyn Array, idx: usize) -> Result<Self> {
648        let la = as_array::<ListArray>(arr, "List")?;
649        let inner = la.value(idx);
650        let len = inner.len();
651        let mut out = Vec::with_capacity(len);
652        for i in 0..len {
653            out.push(T::read(inner.as_ref(), i)?);
654        }
655        Ok(out)
656    }
657    fn build_singleton(values: Self) -> Result<ArrayRef> {
658        // Generic path: build each element as a 1-row array via
659        // T::build_singleton, concat them into the list's inner array,
660        // and wrap in a ListArray with a single (0..len) offset pair.
661        // No specialization in V1 — fast-path scalar builders can come
662        // later when `min_specialization` stabilizes.
663        let len = values.len();
664        let mut singletons: Vec<ArrayRef> = Vec::with_capacity(len);
665        for v in values {
666            singletons.push(T::build_singleton(v)?);
667        }
668        let refs: Vec<&dyn Array> = singletons.iter().map(|a| a.as_ref()).collect();
669        let inner = if refs.is_empty() {
670            arrow_array::array::new_empty_array(&T::arrow_data_type())
671        } else {
672            arrow_select::concat::concat(&refs).map_err(RpcError::from)?
673        };
674        let offsets = arrow_buffer::OffsetBuffer::new(arrow_buffer::ScalarBuffer::from(vec![
675            0i32, len as i32,
676        ]));
677        let field = Arc::new(Field::new("item", T::arrow_data_type(), true));
678        Ok(Arc::new(ListArray::new(field, offsets, inner, None)))
679    }
680}
681
682// ---------------------------------------------------------------------------
683// Map: Vec<(K, V)>
684//
685// Mirrors the Python wire layout for `dict[K, V]`:
686// `Map(entries{keys: K, values: V (nullable)})`.
687// Only string-keyed maps are supported in V1 because that's what the
688// Python canonical's dataclass introspection emits.
689// ---------------------------------------------------------------------------
690
691/// `Vec<(String, V)>` — wire format `Map<Utf8, V>` (Python canonical).
692impl<V> VgiArrow for Vec<(String, V)>
693where
694    V: VgiArrow,
695{
696    fn arrow_data_type() -> DataType {
697        let entries = Field::new(
698            "entries",
699            DataType::Struct(
700                vec![
701                    Field::new("keys", DataType::Utf8, false),
702                    Field::new("values", V::arrow_data_type(), true),
703                ]
704                .into(),
705            ),
706            false,
707        );
708        DataType::Map(Arc::new(entries), false)
709    }
710    fn describe_name() -> String {
711        format!("dict[str, {}]", V::describe_name())
712    }
713    fn read(arr: &dyn Array, idx: usize) -> Result<Self> {
714        let m = as_array::<MapArray>(arr, "Map")?;
715        let entry = m.value(idx);
716        let keys = as_array::<StringArray>(entry.column(0).as_ref(), "Map.keys (Utf8)")?;
717        let values = entry.column(1);
718        let mut out = Vec::with_capacity(keys.len());
719        for i in 0..keys.len() {
720            let v = V::read(values.as_ref(), i)?;
721            out.push((keys.value(i).to_string(), v));
722        }
723        Ok(out)
724    }
725    fn build_singleton(entries: Self) -> Result<ArrayRef> {
726        use arrow_array::array::new_empty_array;
727        let len = entries.len();
728        let (keys, values): (Vec<String>, Vec<V>) = entries.into_iter().unzip();
729        let key_arr = Arc::new(StringArray::from(keys)) as ArrayRef;
730        let value_arr: ArrayRef = if values.is_empty() {
731            new_empty_array(&V::arrow_data_type())
732        } else {
733            let mut singletons: Vec<ArrayRef> = Vec::with_capacity(values.len());
734            for v in values {
735                singletons.push(V::build_singleton(v)?);
736            }
737            let refs: Vec<&dyn Array> = singletons.iter().map(|a| a.as_ref()).collect();
738            arrow_select::concat::concat(&refs).map_err(RpcError::from)?
739        };
740        let entries_struct = arrow_array::StructArray::from(vec![
741            (Arc::new(Field::new("keys", DataType::Utf8, false)), key_arr),
742            (
743                Arc::new(Field::new("values", V::arrow_data_type(), true)),
744                value_arr,
745            ),
746        ]);
747        let offsets = arrow_buffer::OffsetBuffer::new(arrow_buffer::ScalarBuffer::from(vec![
748            0i32, len as i32,
749        ]));
750        let entries_field = Arc::new(Field::new(
751            "entries",
752            entries_struct.data_type().clone(),
753            false,
754        ));
755        Ok(Arc::new(MapArray::new(
756            entries_field,
757            offsets,
758            entries_struct,
759            None,
760            false,
761        )))
762    }
763}
764
765#[cfg(test)]
766mod tests {
767    use super::*;
768    use arrow_array::RecordBatch;
769    use arrow_schema::Schema;
770
771    fn round_trip<T: VgiArrow + std::fmt::Debug + PartialEq>(value: T) -> T {
772        let arr = T::build_singleton(value).expect("build_singleton");
773        let schema = Arc::new(Schema::new(vec![Field::new(
774            "v",
775            T::arrow_data_type(),
776            T::nullable(),
777        )]));
778        let batch = RecordBatch::try_new(schema, vec![arr]).unwrap();
779        T::read(batch.column(0).as_ref(), 0).expect("read")
780    }
781
782    #[test]
783    fn roundtrip_string() {
784        assert_eq!(round_trip("hello".to_string()), "hello".to_string());
785    }
786
787    #[test]
788    fn roundtrip_i64() {
789        assert_eq!(round_trip(42i64), 42);
790        assert_eq!(round_trip(-1i64), -1);
791    }
792
793    #[test]
794    fn roundtrip_i32() {
795        assert_eq!(round_trip(7i32), 7);
796    }
797
798    #[test]
799    fn roundtrip_f64() {
800        assert_eq!(round_trip(1.5f64), 1.5);
801    }
802
803    #[test]
804    fn roundtrip_f32() {
805        assert_eq!(round_trip(2.5f32), 2.5);
806    }
807
808    #[test]
809    fn roundtrip_bool() {
810        assert!(round_trip(true));
811        assert!(!round_trip(false));
812    }
813
814    #[test]
815    fn roundtrip_bytes() {
816        assert_eq!(
817            round_trip(Bytes(vec![1, 2, 3, 4, 5])),
818            Bytes(vec![1, 2, 3, 4, 5])
819        );
820    }
821
822    #[test]
823    fn roundtrip_option_some() {
824        assert_eq!(round_trip(Some(123i64)), Some(123));
825    }
826
827    #[test]
828    fn roundtrip_option_none() {
829        assert_eq!(round_trip::<Option<i64>>(None), None);
830    }
831
832    #[test]
833    fn roundtrip_option_string() {
834        assert_eq!(
835            round_trip(Some("hello".to_string())),
836            Some("hello".to_string())
837        );
838        assert_eq!(round_trip::<Option<String>>(None), None);
839    }
840
841    #[test]
842    fn roundtrip_vec_i64() {
843        assert_eq!(round_trip(vec![1i64, 2, 3, 4, 5]), vec![1, 2, 3, 4, 5]);
844        let empty: Vec<i64> = Vec::new();
845        assert_eq!(round_trip(empty.clone()), empty);
846    }
847
848    #[test]
849    fn roundtrip_vec_string() {
850        assert_eq!(
851            round_trip(vec!["a".to_string(), "b".to_string()]),
852            vec!["a".to_string(), "b".to_string()]
853        );
854    }
855
856    #[test]
857    fn roundtrip_vec_f64() {
858        assert_eq!(round_trip(vec![1.0f64, 2.5]), vec![1.0, 2.5]);
859    }
860
861    #[test]
862    fn roundtrip_vec_bool() {
863        assert_eq!(round_trip(vec![true, false, true]), vec![true, false, true]);
864    }
865
866    #[test]
867    fn roundtrip_vec_vec_i64() {
868        let v = vec![vec![1i64, 2], vec![3], vec![]];
869        assert_eq!(round_trip(v.clone()), v);
870    }
871
872    #[test]
873    fn roundtrip_map_str_i64() {
874        let m = vec![("a".to_string(), 1i64), ("b".into(), 2)];
875        assert_eq!(round_trip(m.clone()), m);
876    }
877
878    #[test]
879    fn roundtrip_map_str_str() {
880        let m = vec![
881            ("k1".to_string(), "v1".to_string()),
882            ("k2".into(), "v2".into()),
883        ];
884        assert_eq!(round_trip(m.clone()), m);
885    }
886
887    #[test]
888    fn describe_names_match_python() {
889        assert_eq!(<String as VgiArrow>::describe_name(), "str");
890        assert_eq!(<i64 as VgiArrow>::describe_name(), "int");
891        assert_eq!(<f64 as VgiArrow>::describe_name(), "float");
892        assert_eq!(<bool as VgiArrow>::describe_name(), "bool");
893        assert_eq!(<Bytes as VgiArrow>::describe_name(), "bytes");
894        assert_eq!(<Option<String> as VgiArrow>::describe_name(), "str | None");
895        assert_eq!(<Vec<i64> as VgiArrow>::describe_name(), "list[int]");
896        assert_eq!(
897            <Vec<Vec<i64>> as VgiArrow>::describe_name(),
898            "list[list[int]]"
899        );
900        assert_eq!(
901            <Vec<(String, i64)> as VgiArrow>::describe_name(),
902            "dict[str, int]"
903        );
904    }
905
906    #[test]
907    fn nullable_flag_only_set_for_option() {
908        assert!(!<i64 as VgiArrow>::nullable());
909        assert!(!<String as VgiArrow>::nullable());
910        assert!(<Option<i64> as VgiArrow>::nullable());
911        assert!(<Option<Vec<i64>> as VgiArrow>::nullable());
912    }
913}