Skip to main content

copybook_arrow/
lib.rs

1#![cfg_attr(not(test), deny(clippy::unwrap_used, clippy::expect_used))]
2// SPDX-License-Identifier: AGPL-3.0-or-later
3//! Typed Arrow and Parquet output for copybook-rs
4//!
5//! Converts COBOL binary data directly to Apache Arrow columnar format,
6//! preserving COBOL type precision (Decimal128 for COMP-3/Zoned, proper int widths, etc.)
7
8/// Columnar `RecordBatch` construction from decoded COBOL records.
9pub mod batch_builder;
10/// Per-column accumulator trait and typed column builders.
11pub mod builders;
12/// Direct binary → Arrow decoding (bypasses JSON intermediate).
13pub mod decode_direct;
14/// Arrow IPC (Feather) file writer.
15pub mod ipc;
16/// Arrow/Parquet output configuration (compression, edited-PIC handling).
17pub mod options;
18/// Apache Parquet file writer with configurable compression.
19pub mod parquet_writer;
20/// COBOL schema → Arrow schema conversion (type mapping).
21pub mod schema_convert;
22/// Streaming record-by-record Arrow output for large files.
23pub mod streaming;
24
25/// Legacy JSONL-to-Arrow API (deprecated; prefer `decode_direct`).
26#[allow(deprecated)]
27pub mod legacy;
28
29// New typed API re-exports
30pub use batch_builder::RecordBatchBuilder;
31pub use ipc::write_ipc;
32pub use options::{ArrowOptions, Compression, EditedPicRepresentation};
33pub use parquet_writer::write_parquet;
34pub use schema_convert::cobol_schema_to_arrow;
35pub use streaming::stream_to_batches;
36
37// Legacy re-exports (deprecated, preserved for backward compatibility)
38#[allow(deprecated)]
39pub use legacy::{
40    LegacyArrowWriter as ArrowWriter, LegacyParquetFileWriter as ParquetFileWriter,
41    json_to_record_batch, json_to_schema,
42};
43
44// Error types
45use thiserror::Error;
46
47/// Errors that can occur during Arrow/Parquet conversion
48#[derive(Error, Debug)]
49pub enum ArrowError {
50    /// JSON conversion error (legacy API)
51    #[error("JSON conversion error: {0}")]
52    JsonConversion(String),
53
54    /// Schema conversion error
55    #[error("Schema conversion error: {0}")]
56    SchemaConversion(String),
57
58    /// Column build error
59    #[error("Column build error: {0}")]
60    ColumnBuild(String),
61
62    /// Parquet write error
63    #[error("Parquet write error: {0}")]
64    ParquetWrite(String),
65
66    /// IPC write error
67    #[error("IPC write error: {0}")]
68    IpcWrite(String),
69
70    /// IO error
71    #[error("IO error: {0}")]
72    Io(#[from] std::io::Error),
73
74    /// Arrow error
75    #[error("Arrow error: {0}")]
76    Arrow(#[from] arrow::error::ArrowError),
77
78    /// Codec error
79    #[error("Codec error: {0}")]
80    Codec(String),
81}
82
83/// Result type for Arrow operations
84pub type Result<T> = std::result::Result<T, ArrowError>;
85
86#[cfg(test)]
87#[allow(clippy::unwrap_used)]
88#[allow(deprecated)]
89mod tests {
90    use super::*;
91
92    #[test]
93    fn test_legacy_json_to_schema() {
94        let json = serde_json::json!({
95            "name": "John",
96            "age": 30,
97            "active": true
98        });
99
100        let schema = json_to_schema(&json).unwrap();
101        assert_eq!(schema.fields().len(), 3);
102    }
103
104    #[test]
105    fn test_legacy_json_to_record_batch() {
106        let json = serde_json::json!({
107            "name": "John",
108            "age": 30,
109            "active": true
110        });
111
112        let schema = json_to_schema(&json).unwrap();
113        let batch = json_to_record_batch(&schema, &json).unwrap();
114
115        assert_eq!(batch.num_rows(), 1);
116        assert_eq!(batch.num_columns(), 3);
117    }
118
119    #[test]
120    fn test_legacy_arrow_writer() {
121        let json = serde_json::json!({
122            "name": "John",
123            "age": 30
124        });
125
126        let schema = json_to_schema(&json).unwrap();
127        let mut writer = ArrowWriter::new(schema);
128
129        writer.add_json_record(&json).unwrap();
130        assert_eq!(writer.batch_count(), 1);
131    }
132
133    #[test]
134    fn test_legacy_parquet_writer_roundtrip() {
135        let json = serde_json::json!({
136            "name": "John",
137            "age": 30,
138            "active": true
139        });
140
141        let schema = json_to_schema(&json).unwrap();
142        let parquet_writer = ParquetFileWriter::new(schema);
143
144        let temp_file = tempfile::NamedTempFile::new().unwrap();
145        let path = temp_file.path();
146
147        parquet_writer
148            .write_json_records(path, std::slice::from_ref(&json))
149            .unwrap();
150
151        assert!(path.exists());
152    }
153}