copybook_arrow/options.rs
1// SPDX-License-Identifier: AGPL-3.0-or-later
2//! Configuration options for Arrow/Parquet output
3
4/// Options for COBOL-to-Arrow conversion.
5#[derive(Debug, Clone)]
6#[allow(clippy::struct_excessive_bools)]
7pub struct ArrowOptions {
8 /// Number of records per Arrow `RecordBatch` (default: 8192)
9 pub batch_size: usize,
10 /// Flatten group fields to top-level columns (default: true)
11 pub flatten_groups: bool,
12 /// How to represent edited PIC fields in Arrow
13 pub edited_pic_as: EditedPicRepresentation,
14 /// Include FILLER fields in output (default: false)
15 pub emit_filler: bool,
16 /// Include metadata column with record info (default: false)
17 pub emit_meta: bool,
18 /// Parquet compression codec
19 pub compression: Compression,
20 /// Row group size for Parquet output
21 pub row_group_size: usize,
22 /// Embed copybook text in Parquet metadata
23 pub embed_copybook: bool,
24 /// Codepage for character conversion
25 pub codepage: copybook_codec::Codepage,
26 /// Floating-point representation for COMP-1/COMP-2 fields
27 pub float_format: copybook_codec::FloatFormat,
28}
29
30/// How edited PIC fields are represented in Arrow
31#[derive(Debug, Clone, Copy, Default)]
32pub enum EditedPicRepresentation {
33 /// Store as Decimal128 (extract numeric value)
34 #[default]
35 Decimal,
36 /// Store as Utf8 string (preserve formatting)
37 String,
38}
39
40/// Compression codec for Parquet files
41#[derive(Debug, Clone, Copy, Default)]
42pub enum Compression {
43 /// No compression
44 None,
45 /// Snappy compression
46 Snappy,
47 /// Gzip compression
48 Gzip,
49 /// LZ4 compression
50 Lz4,
51 /// Zstd compression (default)
52 #[default]
53 Zstd,
54}
55
56impl Default for ArrowOptions {
57 #[inline]
58 fn default() -> Self {
59 Self {
60 batch_size: 8192,
61 flatten_groups: true,
62 edited_pic_as: EditedPicRepresentation::default(),
63 emit_filler: false,
64 emit_meta: false,
65 compression: Compression::default(),
66 row_group_size: 1_000_000,
67 embed_copybook: false,
68 codepage: copybook_codec::Codepage::CP037,
69 float_format: copybook_codec::FloatFormat::IeeeBigEndian,
70 }
71 }
72}