Skip to main content

scirs2_io/columnar/
writer.rs

1//! Writer for the columnar binary format.
2//!
3//! File layout:
4//! - Magic bytes (8 bytes): "SCIRCOL\x01"
5//! - Version (u32): format version
6//! - Column count (u32)
7//! - Row count (u64)
8//! - For each column:
9//!   - Name length (u32) + name bytes
10//!   - Type tag (u8)
11//!   - Encoding type (u8)
12//!   - Data size in bytes (u64)
13//!   - Encoded data bytes
14
15use byteorder::{LittleEndian, WriteBytesExt};
16use std::fs::File;
17use std::io::{BufWriter, Write};
18use std::path::Path;
19
20use crate::error::{IoError, Result};
21
22use super::encoding::encode_column;
23use super::types::{ColumnarTable, EncodingType, COLUMNAR_MAGIC, FORMAT_VERSION};
24
25/// Options for writing columnar files
26#[derive(Debug, Clone, Default)]
27pub struct ColumnarWriteOptions {
28    /// Force a specific encoding for all columns (None = auto-detect)
29    pub encoding: Option<EncodingType>,
30}
31
32/// Write a columnar table to a file
33pub fn write_columnar<P: AsRef<Path>>(path: P, table: &ColumnarTable) -> Result<()> {
34    write_columnar_with_options(path, table, ColumnarWriteOptions::default())
35}
36
37/// Write a columnar table to a file with options
38pub fn write_columnar_with_options<P: AsRef<Path>>(
39    path: P,
40    table: &ColumnarTable,
41    options: ColumnarWriteOptions,
42) -> Result<()> {
43    let file = File::create(path).map_err(|e| IoError::FileError(e.to_string()))?;
44    let mut writer = BufWriter::new(file);
45
46    // Write magic bytes
47    writer
48        .write_all(COLUMNAR_MAGIC)
49        .map_err(|e| IoError::FileError(format!("Failed to write magic: {}", e)))?;
50
51    // Write version
52    writer
53        .write_u32::<LittleEndian>(FORMAT_VERSION)
54        .map_err(|e| IoError::FileError(format!("Failed to write version: {}", e)))?;
55
56    // Write column count
57    writer
58        .write_u32::<LittleEndian>(table.num_columns() as u32)
59        .map_err(|e| IoError::FileError(format!("Failed to write column count: {}", e)))?;
60
61    // Write row count
62    writer
63        .write_u64::<LittleEndian>(table.num_rows() as u64)
64        .map_err(|e| IoError::FileError(format!("Failed to write row count: {}", e)))?;
65
66    // Write each column
67    for col in table.columns() {
68        // Write column name
69        let name_bytes = col.name.as_bytes();
70        writer
71            .write_u32::<LittleEndian>(name_bytes.len() as u32)
72            .map_err(|e| {
73                IoError::FileError(format!("Failed to write column name length: {}", e))
74            })?;
75        writer
76            .write_all(name_bytes)
77            .map_err(|e| IoError::FileError(format!("Failed to write column name: {}", e)))?;
78
79        // Write type tag
80        writer
81            .write_u8(col.data.type_tag() as u8)
82            .map_err(|e| IoError::FileError(format!("Failed to write type tag: {}", e)))?;
83
84        // Determine encoding
85        let encoding = options.encoding.unwrap_or_else(|| col.data.best_encoding());
86
87        // Write encoding type
88        writer
89            .write_u8(encoding as u8)
90            .map_err(|e| IoError::FileError(format!("Failed to write encoding type: {}", e)))?;
91
92        // Encode data to a buffer to get the size
93        let mut data_buf = Vec::new();
94        encode_column(&mut data_buf, &col.data, encoding)?;
95
96        // Write data size
97        writer
98            .write_u64::<LittleEndian>(data_buf.len() as u64)
99            .map_err(|e| IoError::FileError(format!("Failed to write data size: {}", e)))?;
100
101        // Write data
102        writer
103            .write_all(&data_buf)
104            .map_err(|e| IoError::FileError(format!("Failed to write column data: {}", e)))?;
105    }
106
107    writer
108        .flush()
109        .map_err(|e| IoError::FileError(format!("Failed to flush writer: {}", e)))?;
110
111    Ok(())
112}