Skip to main content

xls_rs/
types.rs

1//! Type-safe data structures for xls-rs
2//!
3//! This module provides strongly-typed representations of cell data
4//! to improve type safety and performance over string-only representations.
5
6use std::fmt;
7
8/// A strongly-typed cell value that can represent different data types
9///
10/// This enum provides type safety for cell values, allowing the codebase
11/// to distinguish between strings, numbers, booleans, dates, and empty values.
12/// This eliminates the need for repeated string parsing and improves performance.
13#[derive(Debug, Clone, PartialEq)]
14pub enum CellValue {
15    /// String data
16    String(String),
17    /// Numeric data (float)
18    Number(f64),
19    /// Integer data (for exact precision when needed)
20    Integer(i64),
21    /// Boolean data
22    Boolean(bool),
23    /// Date/time data (stored as timestamp)
24    DateTime(i64),
25    /// Empty/null value
26    Empty,
27}
28
29impl CellValue {
30    /// Create a String cell value
31    pub fn string(s: impl Into<String>) -> Self {
32        CellValue::String(s.into())
33    }
34
35    /// Create a Number cell value
36    pub fn number(n: f64) -> Self {
37        CellValue::Number(n)
38    }
39
40    /// Create an Integer cell value
41    pub fn integer(i: i64) -> Self {
42        CellValue::Integer(i)
43    }
44
45    /// Create a Boolean cell value
46    pub fn boolean(b: bool) -> Self {
47        CellValue::Boolean(b)
48    }
49
50    /// Create a DateTime cell value from timestamp
51    pub fn datetime(timestamp: i64) -> Self {
52        CellValue::DateTime(timestamp)
53    }
54
55    /// Create an Empty cell value
56    pub fn empty() -> Self {
57        CellValue::Empty
58    }
59
60    /// Check if the value is empty
61    pub fn is_empty(&self) -> bool {
62        matches!(self, CellValue::Empty)
63    }
64
65    /// Check if the value is numeric (Number or Integer)
66    pub fn is_numeric(&self) -> bool {
67        matches!(self, CellValue::Number(_) | CellValue::Integer(_))
68    }
69
70    /// Get the value as a string reference
71    pub fn as_str(&self) -> Option<&str> {
72        match self {
73            CellValue::String(s) => Some(s),
74            _ => None,
75        }
76    }
77
78    /// Get the value as a number (f64)
79    ///
80    /// Returns Some(f64) for Number and Integer values, None otherwise
81    pub fn as_number(&self) -> Option<f64> {
82        match self {
83            CellValue::Number(n) => Some(*n),
84            CellValue::Integer(i) => Some(*i as f64),
85            _ => None,
86        }
87    }
88
89    /// Get the value as a boolean
90    pub fn as_bool(&self) -> Option<bool> {
91        match self {
92            CellValue::Boolean(b) => Some(*b),
93            _ => None,
94        }
95    }
96
97    /// Convert to display string
98    pub fn to_display_string(&self) -> String {
99        match self {
100            CellValue::String(s) => s.clone(),
101            CellValue::Number(n) => {
102                // Format without unnecessary decimal places
103                if n.fract() == 0.0 && n.abs() < (i64::MAX as f64) {
104                    format!("{}", *n as i64)
105                } else {
106                    format!("{}", n)
107                }
108            }
109            CellValue::Integer(i) => format!("{}", i),
110            CellValue::Boolean(b) => format!("{}", b),
111            CellValue::DateTime(ts) => format!("{}", ts),
112            CellValue::Empty => String::new(),
113        }
114    }
115
116    /// Parse a string into the most appropriate CellValue type
117    ///
118    /// Attempts to parse the string as:
119    /// 1. Empty -> Empty
120    /// 2. Boolean ("true"/"false") -> Boolean
121    /// 3. Integer -> Integer
122    /// 4. Float -> Number
123    /// 5. Otherwise -> String
124    pub fn parse(s: &str) -> Self {
125        let trimmed = s.trim();
126
127        if trimmed.is_empty() {
128            return CellValue::Empty;
129        }
130
131        // Try boolean
132        match trimmed.to_lowercase().as_str() {
133            "true" | "yes" | "1" => return CellValue::Boolean(true),
134            "false" | "no" | "0" => return CellValue::Boolean(false),
135            _ => {}
136        }
137
138        // Try integer first (more precise)
139        if let Ok(i) = trimmed.parse::<i64>() {
140            return CellValue::Integer(i);
141        }
142
143        // Try float
144        if let Ok(n) = trimmed.parse::<f64>() {
145            return CellValue::Number(n);
146        }
147
148        // Default to string
149        CellValue::String(trimmed.to_string())
150    }
151
152    /// Convert from string representation with type hint
153    pub fn from_string_with_type(s: &str, type_hint: Option<&DataType>) -> Self {
154        match type_hint {
155            Some(DataType::Integer) => s.parse::<i64>()
156                .map(CellValue::Integer)
157                .unwrap_or_else(|_| CellValue::String(s.to_string())),
158            Some(DataType::Number) => s.parse::<f64>()
159                .map(CellValue::Number)
160                .unwrap_or_else(|_| CellValue::String(s.to_string())),
161            Some(DataType::Boolean) => match s.to_lowercase().as_str() {
162                "true" | "yes" | "1" => CellValue::Boolean(true),
163                "false" | "no" | "0" => CellValue::Boolean(false),
164                _ => CellValue::String(s.to_string()),
165            },
166            Some(DataType::String) | None => CellValue::parse(s),
167            Some(DataType::DateTime) => s.parse::<i64>()
168                .map(CellValue::DateTime)
169                .unwrap_or_else(|_| CellValue::String(s.to_string())),
170        }
171    }
172}
173
174impl Default for CellValue {
175    fn default() -> Self {
176        CellValue::Empty
177    }
178}
179
180impl fmt::Display for CellValue {
181    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
182        match self {
183            CellValue::String(s) => write!(f, "{}", s),
184            CellValue::Number(n) => write!(f, "{}", n),
185            CellValue::Integer(i) => write!(f, "{}", i),
186            CellValue::Boolean(b) => write!(f, "{}", b),
187            CellValue::DateTime(ts) => write!(f, "{}", ts),
188            CellValue::Empty => Ok(()),
189        }
190    }
191}
192
193impl From<String> for CellValue {
194    fn from(s: String) -> Self {
195        CellValue::parse(&s)
196    }
197}
198
199impl From<&str> for CellValue {
200    fn from(s: &str) -> Self {
201        CellValue::parse(s)
202    }
203}
204
205impl From<f64> for CellValue {
206    fn from(n: f64) -> Self {
207        CellValue::Number(n)
208    }
209}
210
211impl From<i64> for CellValue {
212    fn from(i: i64) -> Self {
213        CellValue::Integer(i)
214    }
215}
216
217impl From<bool> for CellValue {
218    fn from(b: bool) -> Self {
219        CellValue::Boolean(b)
220    }
221}
222
223/// Data type metadata for columns
224#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
225pub enum DataType {
226    String,
227    Number,
228    Integer,
229    Boolean,
230    DateTime,
231}
232
233impl DataType {
234    /// Detect the data type from a cell value
235    pub fn from_value(value: &CellValue) -> Self {
236        match value {
237            CellValue::String(_) => DataType::String,
238            CellValue::Number(_) => DataType::Number,
239            CellValue::Integer(_) => DataType::Integer,
240            CellValue::Boolean(_) => DataType::Boolean,
241            CellValue::DateTime(_) => DataType::DateTime,
242            CellValue::Empty => DataType::String,
243        }
244    }
245
246    /// Get the string name of the data type
247    pub fn name(&self) -> &'static str {
248        match self {
249            DataType::String => "string",
250            DataType::Number => "float",
251            DataType::Integer => "integer",
252            DataType::Boolean => "boolean",
253            DataType::DateTime => "datetime",
254        }
255    }
256}
257
258impl fmt::Display for DataType {
259    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
260        write!(f, "{}", self.name())
261    }
262}
263
264/// A row of type-safe cell values
265pub type DataRow = Vec<CellValue>;
266
267/// A dataset of type-safe data
268#[derive(Debug, Clone, PartialEq)]
269pub struct DataSet {
270    /// Column names (header row)
271    pub columns: Vec<String>,
272    /// Data rows
273    pub rows: Vec<DataRow>,
274    /// Column type metadata
275    pub column_types: Vec<DataType>,
276}
277
278impl DataSet {
279    /// Create a new empty dataset
280    pub fn new() -> Self {
281        Self {
282            columns: Vec::new(),
283            rows: Vec::new(),
284            column_types: Vec::new(),
285        }
286    }
287
288    /// Create a dataset with columns but no data
289    pub fn with_columns(columns: Vec<String>) -> Self {
290        let column_types = vec![DataType::String; columns.len()];
291        Self {
292            columns,
293            rows: Vec::new(),
294            column_types,
295        }
296    }
297
298    /// Add a row to the dataset
299    pub fn push_row(&mut self, row: DataRow) {
300        // Update column types based on new data
301        for (i, cell) in row.iter().enumerate() {
302            if i < self.column_types.len() {
303                let detected = DataType::from_value(cell);
304                // Prefer more specific types
305                if std::mem::discriminant(&self.column_types[i])
306                    != std::mem::discriminant(&detected)
307                {
308                    self.column_types[i] = detected;
309                }
310            }
311        }
312        self.rows.push(row);
313    }
314
315    /// Get the number of rows
316    pub fn row_count(&self) -> usize {
317        self.rows.len()
318    }
319
320    /// Get the number of columns
321    pub fn column_count(&self) -> usize {
322        self.columns.len()
323    }
324
325    /// Check if the dataset is empty
326    pub fn is_empty(&self) -> bool {
327        self.rows.is_empty()
328    }
329
330    /// Infer column types from existing data
331    pub fn infer_types(&mut self) {
332        for col_idx in 0..self.columns.len() {
333            let mut type_count: std::collections::HashMap<DataType, usize> =
334                std::collections::HashMap::new();
335
336            for row in &self.rows {
337                if let Some(cell) = row.get(col_idx) {
338                    let dt = DataType::from_value(cell);
339                    *type_count.entry(dt).or_insert(0) += 1;
340                }
341            }
342
343            // Choose the most common non-empty type
344            let most_common = type_count
345                .iter()
346                .filter(|(dt, _)| *dt != &DataType::String)
347                .max_by_key(|(_, count)| *count)
348                .map(|(dt, _)| *dt)
349                .unwrap_or(DataType::String);
350
351            if col_idx < self.column_types.len() {
352                self.column_types[col_idx] = most_common;
353            }
354        }
355    }
356}
357
358impl Default for DataSet {
359    fn default() -> Self {
360        Self::new()
361    }
362}
363
364/// Conversion from legacy `Vec<Vec<String>>` format
365impl From<Vec<Vec<String>>> for DataSet {
366    fn from(data: Vec<Vec<String>>) -> Self {
367        if data.is_empty() {
368            return DataSet::new();
369        }
370
371        let columns = data[0].clone();
372        let mut dataset = DataSet::with_columns(columns);
373
374        for row in &data[1..] {
375            let typed_row: DataRow = row.iter().map(|s| CellValue::parse(s)).collect();
376            dataset.push_row(typed_row);
377        }
378
379        dataset.infer_types();
380        dataset
381    }
382}
383
384/// Conversion to legacy `Vec<Vec<String>>` format
385impl From<DataSet> for Vec<Vec<String>> {
386    fn from(dataset: DataSet) -> Vec<Vec<String>> {
387        let mut result = vec![dataset.columns];
388
389        for row in dataset.rows {
390            let string_row: Vec<String> =
391                row.iter().map(|v| v.to_display_string()).collect();
392            result.push(string_row);
393        }
394
395        result
396    }
397}
398
399#[cfg(test)]
400mod tests {
401    use super::*;
402
403    #[test]
404    fn test_cell_value_parse() {
405        assert_eq!(CellValue::parse(""), CellValue::Empty);
406        assert_eq!(CellValue::parse("true"), CellValue::Boolean(true));
407        assert_eq!(CellValue::parse("false"), CellValue::Boolean(false));
408        assert_eq!(CellValue::parse("42"), CellValue::Integer(42));
409        assert_eq!(CellValue::parse("3.14"), CellValue::Number(3.14));
410        assert_eq!(CellValue::parse("hello"), CellValue::String("hello".to_string()));
411    }
412
413    #[test]
414    fn test_cell_value_numeric() {
415        assert!(CellValue::Integer(42).is_numeric());
416        assert!(CellValue::Number(3.14).is_numeric());
417        assert!(!CellValue::String("42".to_string()).is_numeric());
418        assert!(!CellValue::Boolean(true).is_numeric());
419    }
420
421    #[test]
422    fn test_cell_value_as_number() {
423        assert_eq!(CellValue::Integer(42).as_number(), Some(42.0));
424        assert_eq!(CellValue::Number(3.14).as_number(), Some(3.14));
425        assert_eq!(CellValue::String("42".to_string()).as_number(), None);
426    }
427
428    #[test]
429    fn test_dataset_conversion() {
430        let legacy = vec![
431            vec!["name".to_string(), "age".to_string()],
432            vec!["Alice".to_string(), "30".to_string()],
433            vec!["Bob".to_string(), "25".to_string()],
434        ];
435
436        let dataset: DataSet = legacy.clone().into();
437        assert_eq!(dataset.columns, vec!["name", "age"]);
438        assert_eq!(dataset.row_count(), 2);
439
440        let back: Vec<Vec<String>> = dataset.into();
441        assert_eq!(back, legacy);
442    }
443}