Skip to main content

zellij_sheets/
data_loader.rs

1//! Data loading module for spreadsheet files
2//!
3//! Provides functionality to load and parse spreadsheet data from various formats
4//! including CSV and Excel files.
5
6use calamine::{open_workbook_auto, Data, Reader};
7use std::io::Read;
8use std::path::Path;
9use thiserror::Error;
10
11/// Errors that can occur during data loading
12#[derive(Debug, Error)]
13pub enum DataLoaderError {
14    #[error("IO error: {0}")]
15    IoError(#[from] std::io::Error),
16
17    #[error("Excel file error: {0}")]
18    ExcelError(#[from] calamine::Error),
19
20    #[error("CSV parsing error: {0}")]
21    CsvError(#[from] csv::Error),
22
23    #[error("Invalid file format: {0}")]
24    InvalidFormat(String),
25}
26
27/// Result type for data loading operations
28pub type Result<T> = std::result::Result<T, DataLoaderError>;
29
30/// Data source type for spreadsheet files
31#[derive(Debug, Clone, Copy, PartialEq, Eq)]
32pub enum DataSource {
33    /// CSV file format
34    Csv,
35    /// Excel file format (.xlsx, .xls)
36    Excel,
37    /// Parquet file format (not yet supported)
38    Parquet,
39}
40
41/// Loaded spreadsheet data
42#[derive(Debug, Clone, PartialEq, Eq)]
43pub struct LoadedData {
44    /// Column headers
45    pub headers: Vec<String>,
46    /// Data rows
47    pub rows: Vec<Vec<String>>,
48    /// Source file format
49    pub source: DataSource,
50}
51
52/// Load spreadsheet data from a file path
53///
54/// # Arguments
55///
56/// * `path` - Path to the spreadsheet file
57///
58/// # Returns
59///
60/// Returns `LoadedData` on success or `DataLoaderError` on failure
61pub fn load_data(path: &Path) -> Result<LoadedData> {
62    let source = get_data_source(path)?;
63    match source {
64        DataSource::Csv => load_csv(path),
65        DataSource::Excel => load_excel(path),
66        DataSource::Parquet => Err(DataLoaderError::InvalidFormat(
67            "Parquet preview is not supported in the rebuilt plugin yet".to_string(),
68        )),
69    }
70}
71
72/// Load data from a CSV file
73///
74/// # Arguments
75///
76/// * `path` - Path to the CSV file
77///
78/// # Returns
79///
80/// Returns `LoadedData` on success or `DataLoaderError` on failure
81pub fn load_csv(path: &Path) -> Result<LoadedData> {
82    load_csv_from_reader(std::fs::File::open(path)?)
83}
84
85pub fn load_csv_from_reader(reader: impl Read) -> Result<LoadedData> {
86    let mut csv_reader = csv::Reader::from_reader(reader);
87    let headers = csv_reader
88        .headers()?
89        .iter()
90        .enumerate()
91        .map(|(index, value)| normalize_header(value, index))
92        .collect::<Vec<_>>();
93
94    let mut rows = Vec::new();
95    for record in csv_reader.records() {
96        let record = record?;
97        let mut row = record.iter().map(ToOwned::to_owned).collect::<Vec<_>>();
98        row.resize(headers.len(), String::new());
99        rows.push(row);
100    }
101
102    Ok(LoadedData {
103        headers,
104        rows,
105        source: DataSource::Csv,
106    })
107}
108
109pub fn write_csv(path: &Path, data: &LoadedData) -> Result<()> {
110    let mut writer = csv::Writer::from_path(path)?;
111    writer.write_record(&data.headers)?;
112    for row in &data.rows {
113        writer.write_record(row)?;
114    }
115    writer.flush()?;
116    Ok(())
117}
118
119pub fn load_excel(path: &Path) -> Result<LoadedData> {
120    let mut workbook = open_workbook_auto(path)?;
121    let sheet_names = workbook.sheet_names().to_owned();
122    let sheet_name = sheet_names
123        .first()
124        .ok_or_else(|| DataLoaderError::InvalidFormat("Excel file has no sheets".to_string()))?;
125    let range = workbook.worksheet_range(sheet_name)?;
126    let mut rows_iter = range.rows();
127    let header_row = rows_iter
128        .next()
129        .ok_or_else(|| DataLoaderError::InvalidFormat("Excel sheet is empty".to_string()))?;
130
131    let headers = header_row
132        .iter()
133        .enumerate()
134        .map(|(index, cell)| normalize_header(&excel_cell_to_string(cell), index))
135        .collect::<Vec<_>>();
136
137    let mut rows = Vec::new();
138    for row in rows_iter {
139        let mut rendered = row.iter().map(excel_cell_to_string).collect::<Vec<_>>();
140        rendered.resize(headers.len(), String::new());
141        rows.push(rendered);
142    }
143
144    Ok(LoadedData {
145        headers,
146        rows,
147        source: DataSource::Excel,
148    })
149}
150
151pub fn get_data_source(path: &Path) -> Result<DataSource> {
152    let extension = path
153        .extension()
154        .and_then(|ext| ext.to_str())
155        .ok_or_else(|| DataLoaderError::InvalidFormat("Unknown file format".to_string()))?;
156
157    match extension.to_ascii_lowercase().as_str() {
158        "csv" => Ok(DataSource::Csv),
159        "xlsx" | "xls" => Ok(DataSource::Excel),
160        "parquet" => Ok(DataSource::Parquet),
161        _ => Err(DataLoaderError::InvalidFormat(format!(
162            "Unsupported file format: {extension}"
163        ))),
164    }
165}
166
167pub fn get_file_name(path: &Path) -> String {
168    path.file_name()
169        .and_then(|name| name.to_str())
170        .unwrap_or("unknown")
171        .to_string()
172}
173
174pub fn get_file_extension(path: &Path) -> String {
175    path.extension()
176        .and_then(|ext| ext.to_str())
177        .unwrap_or("")
178        .to_string()
179}
180
181pub fn file_exists(path: &Path) -> bool {
182    path.exists()
183}
184
185pub fn get_file_size(path: &Path) -> Result<u64> {
186    std::fs::metadata(path)
187        .map(|meta| meta.len())
188        .map_err(DataLoaderError::IoError)
189}
190
191pub fn get_file_modification_time(path: &Path) -> Result<std::time::SystemTime> {
192    std::fs::metadata(path)
193        .and_then(|meta| meta.modified())
194        .map_err(DataLoaderError::IoError)
195}
196
197fn normalize_header(value: &str, index: usize) -> String {
198    let trimmed = value.trim();
199    if trimmed.is_empty() {
200        format!("column_{}", index + 1)
201    } else {
202        trimmed.to_string()
203    }
204}
205
206fn excel_cell_to_string(cell: &Data) -> String {
207    match cell {
208        Data::Empty => String::new(),
209        Data::String(value) | Data::DateTimeIso(value) | Data::DurationIso(value) => value.clone(),
210        Data::Int(value) => value.to_string(),
211        Data::Float(value) => value.to_string(),
212        Data::Bool(value) => value.to_string(),
213        Data::DateTime(value) => value.to_string(),
214        Data::Error(value) => format!("{value:?}"),
215    }
216}