Skip to main content

oxihuman_export/
arrow_export.rs

1// Copyright (C) 2026 COOLJAPAN OU (Team KitaSan)
2// SPDX-License-Identifier: Apache-2.0
3
4//! Export columnar data in Arrow IPC text format (JSON-encoded metadata stub).
5
6#![allow(dead_code)]
7
8/// Arrow data type.
9#[allow(dead_code)]
10#[derive(Debug, Clone, Copy, PartialEq, Eq)]
11pub enum ArrowType {
12    Bool,
13    Int8,
14    Int16,
15    Int32,
16    Int64,
17    Float32,
18    Float64,
19    Utf8,
20    Binary,
21}
22
23impl ArrowType {
24    /// Return the Arrow type name.
25    #[allow(dead_code)]
26    pub fn type_name(self) -> &'static str {
27        match self {
28            Self::Bool => "bool",
29            Self::Int8 => "int8",
30            Self::Int16 => "int16",
31            Self::Int32 => "int32",
32            Self::Int64 => "int64",
33            Self::Float32 => "float32",
34            Self::Float64 => "float64",
35            Self::Utf8 => "utf8",
36            Self::Binary => "binary",
37        }
38    }
39}
40
41/// An Arrow field.
42#[allow(dead_code)]
43#[derive(Debug, Clone)]
44pub struct ArrowField {
45    pub name: String,
46    pub arrow_type: ArrowType,
47    pub nullable: bool,
48}
49
50/// An Arrow schema.
51#[allow(dead_code)]
52#[derive(Debug, Clone, Default)]
53pub struct ArrowSchema {
54    pub fields: Vec<ArrowField>,
55}
56
57/// An Arrow batch (column data as string-encoded values).
58#[allow(dead_code)]
59#[derive(Debug, Clone)]
60pub struct ArrowBatch {
61    pub num_rows: usize,
62    pub columns: Vec<Vec<String>>,
63}
64
65/// An Arrow export.
66#[allow(dead_code)]
67#[derive(Debug, Clone, Default)]
68pub struct ArrowExport {
69    pub schema: ArrowSchema,
70    pub batches: Vec<ArrowBatch>,
71}
72
73/// Create a new Arrow export.
74#[allow(dead_code)]
75pub fn new_arrow_export() -> ArrowExport {
76    ArrowExport {
77        schema: ArrowSchema { fields: Vec::new() },
78        batches: Vec::new(),
79    }
80}
81
82/// Add a field to the schema.
83#[allow(dead_code)]
84pub fn add_arrow_field(doc: &mut ArrowExport, name: &str, arrow_type: ArrowType, nullable: bool) {
85    doc.schema.fields.push(ArrowField {
86        name: name.to_string(),
87        arrow_type,
88        nullable,
89    });
90}
91
92/// Add a batch.
93#[allow(dead_code)]
94pub fn add_arrow_batch(doc: &mut ArrowExport, num_rows: usize, columns: Vec<Vec<String>>) {
95    doc.batches.push(ArrowBatch { num_rows, columns });
96}
97
98/// Return field count.
99#[allow(dead_code)]
100pub fn arrow_field_count(doc: &ArrowExport) -> usize {
101    doc.schema.fields.len()
102}
103
104/// Return total rows across all batches.
105#[allow(dead_code)]
106pub fn arrow_total_rows(doc: &ArrowExport) -> usize {
107    doc.batches.iter().map(|b| b.num_rows).sum()
108}
109
110/// Serialise the schema as JSON.
111#[allow(dead_code)]
112pub fn arrow_schema_to_json(doc: &ArrowExport) -> String {
113    let fields: Vec<String> = doc
114        .schema
115        .fields
116        .iter()
117        .map(|f| {
118            format!(
119                "{{\"name\":\"{}\",\"type\":\"{}\",\"nullable\":{}}}",
120                f.name,
121                f.arrow_type.type_name(),
122                f.nullable
123            )
124        })
125        .collect();
126    format!("{{\"fields\":[{}]}}", fields.join(","))
127}
128
129/// Serialise a batch as JSON.
130#[allow(dead_code)]
131pub fn arrow_batch_to_json(batch: &ArrowBatch, schema: &ArrowSchema) -> String {
132    let cols: Vec<String> = batch
133        .columns
134        .iter()
135        .enumerate()
136        .map(|(i, col)| {
137            let name = schema.fields.get(i).map_or("col", |f| f.name.as_str());
138            let data = col.join(",");
139            format!("{{\"name\":\"{}\",\"data\":[{}]}}", name, data)
140        })
141        .collect();
142    format!(
143        "{{\"num_rows\":{},\"columns\":[{}]}}",
144        batch.num_rows,
145        cols.join(",")
146    )
147}
148
149/// Export mesh positions as Arrow IPC JSON.
150#[allow(dead_code)]
151pub fn export_positions_arrow(positions: &[[f32; 3]]) -> String {
152    let mut doc = new_arrow_export();
153    add_arrow_field(&mut doc, "x", ArrowType::Float32, false);
154    add_arrow_field(&mut doc, "y", ArrowType::Float32, false);
155    add_arrow_field(&mut doc, "z", ArrowType::Float32, false);
156    let xs: Vec<String> = positions.iter().map(|p| format!("{:.6}", p[0])).collect();
157    let ys: Vec<String> = positions.iter().map(|p| format!("{:.6}", p[1])).collect();
158    let zs: Vec<String> = positions.iter().map(|p| format!("{:.6}", p[2])).collect();
159    let n = positions.len();
160    add_arrow_batch(&mut doc, n, vec![xs, ys, zs]);
161    let schema_json = arrow_schema_to_json(&doc);
162    let batch_json = if doc.batches.is_empty() {
163        String::from("[]")
164    } else {
165        format!("[{}]", arrow_batch_to_json(&doc.batches[0], &doc.schema))
166    };
167    format!("{{\"schema\":{},\"batches\":{}}}", schema_json, batch_json)
168}
169
170#[cfg(test)]
171mod tests {
172    use super::*;
173
174    #[test]
175    fn test_new_arrow_export_empty() {
176        let doc = new_arrow_export();
177        assert_eq!(arrow_field_count(&doc), 0);
178        assert_eq!(arrow_total_rows(&doc), 0);
179    }
180
181    #[test]
182    fn test_add_field() {
183        let mut doc = new_arrow_export();
184        add_arrow_field(&mut doc, "x", ArrowType::Float32, false);
185        assert_eq!(arrow_field_count(&doc), 1);
186    }
187
188    #[test]
189    fn test_add_batch() {
190        let mut doc = new_arrow_export();
191        add_arrow_batch(&mut doc, 5, vec![vec!["1.0".to_string(); 5]]);
192        assert_eq!(arrow_total_rows(&doc), 5);
193    }
194
195    #[test]
196    fn test_type_name_float32() {
197        assert_eq!(ArrowType::Float32.type_name(), "float32");
198    }
199
200    #[test]
201    fn test_type_name_utf8() {
202        assert_eq!(ArrowType::Utf8.type_name(), "utf8");
203    }
204
205    #[test]
206    fn test_schema_to_json_contains_field() {
207        let mut doc = new_arrow_export();
208        add_arrow_field(&mut doc, "pos_x", ArrowType::Float32, false);
209        let s = arrow_schema_to_json(&doc);
210        assert!(s.contains("pos_x"));
211    }
212
213    #[test]
214    fn test_batch_to_json_contains_col() {
215        let schema = ArrowSchema {
216            fields: vec![ArrowField {
217                name: "x".to_string(),
218                arrow_type: ArrowType::Float32,
219                nullable: false,
220            }],
221        };
222        let batch = ArrowBatch {
223            num_rows: 2,
224            columns: vec![vec!["1.0".to_string(), "2.0".to_string()]],
225        };
226        let s = arrow_batch_to_json(&batch, &schema);
227        assert!(s.contains("num_rows"));
228    }
229
230    #[test]
231    fn test_export_positions_arrow() {
232        let pts = vec![[0.0f32, 0.0, 0.0], [1.0, 0.0, 0.0]];
233        let s = export_positions_arrow(&pts);
234        assert!(s.contains("schema"));
235        assert!(s.contains("float32"));
236    }
237
238    #[test]
239    fn test_export_positions_empty() {
240        let pts: Vec<[f32; 3]> = vec![];
241        let s = export_positions_arrow(&pts);
242        assert!(s.contains("fields"));
243    }
244
245    #[test]
246    fn test_total_rows_multi_batch() {
247        let mut doc = new_arrow_export();
248        add_arrow_batch(&mut doc, 10, vec![]);
249        add_arrow_batch(&mut doc, 20, vec![]);
250        assert_eq!(arrow_total_rows(&doc), 30);
251    }
252}