Skip to main content

hedl_csv/
lib.rs

1// Dweve HEDL - Hierarchical Entity Data Language
2//
3// Copyright (c) 2025 Dweve IP B.V. and individual contributors.
4//
5// SPDX-License-Identifier: Apache-2.0
6//
7// Licensed under the Apache License, Version 2.0 (the "License");
8// you may not use this file except in compliance with the License.
9// You may obtain a copy of the License in the LICENSE file at the
10// root of this repository or at: http://www.apache.org/licenses/LICENSE-2.0
11//
12// Unless required by applicable law or agreed to in writing, software
13// distributed under the License is distributed on an "AS IS" BASIS,
14// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15// See the License for the specific language governing permissions and
16// limitations under the License.
17
18//! CSV file ↔ HEDL format bidirectional conversion.
19//!
20//! This crate provides functionality to convert between CSV files and HEDL documents.
21//! It handles both reading CSV data into HEDL structures and writing HEDL data to CSV format.
22//!
23//! # Features
24//!
25//! - **Bidirectional conversion**: Convert HEDL → CSV and CSV → HEDL
26//! - **Type inference**: Automatically infer types when reading CSV (null, bool, int, float, string, references)
27//! - **Configurable**: Support for custom delimiters, quote styles, and header options
28//! - **Matrix lists**: CSV tables map naturally to HEDL matrix lists
29//! - **Error handling**: Comprehensive error reporting with context
30//!
31//! # Examples
32//!
33//! ## Converting HEDL to CSV
34//!
35//! ```no_run
36//! use hedl_core::{Document, Item, MatrixList, Node, Value};
37//! use hedl_csv::to_csv;
38//!
39//! let mut doc = Document::new((1, 0));
40//! let mut list = MatrixList::new("Person", vec!["name".to_string(), "age".to_string()]);
41//!
42//! list.add_row(Node::new(
43//!     "Person",
44//!     "1",
45//!     vec![Value::String("Alice".to_string().into()), Value::Int(30)],
46//! ));
47//!
48//! doc.root.insert("people".to_string(), Item::List(list));
49//!
50//! let csv_string = to_csv(&doc).unwrap();
51//! println!("{}", csv_string);
52//! // Output:
53//! // id,name,age
54//! // 1,Alice,30
55//! ```
56//!
57//! ## Converting CSV to HEDL
58//!
59//! ```no_run
60//! use hedl_csv::from_csv;
61//!
62//! let csv_data = r#"
63//! id,name,age,active
64//! 1,Alice,30,true
65//! 2,Bob,25,false
66//! "#;
67//!
68//! let doc = from_csv(csv_data, "Person", &["name", "age", "active"]).unwrap();
69//!
70//! // Access the matrix list
71//! let item = doc.get("persons").unwrap();
72//! let list = item.as_list().unwrap();
73//! assert_eq!(list.rows.len(), 2);
74//! ```
75//!
76//! ## Custom Configuration
77//!
78//! ```no_run
79//! use hedl_csv::{from_csv_with_config, to_csv_with_config, FromCsvConfig, ToCsvConfig};
80//!
81//! // Reading CSV with custom delimiter
82//! let csv_data = "id\tname\tage\n1\tAlice\t30";
83//! let config = FromCsvConfig {
84//!     delimiter: b'\t',
85//!     has_headers: true,
86//!     trim: true,
87//!     ..Default::default()
88//! };
89//! let doc = from_csv_with_config(csv_data, "Person", &["name", "age"], config).unwrap();
90//!
91//! // Writing CSV without headers
92//! let config = ToCsvConfig {
93//!     include_headers: false,
94//!     ..Default::default()
95//! };
96//! let csv_string = to_csv_with_config(&doc, config).unwrap();
97//! ```
98//!
99//! ## Custom List Keys (Irregular Plurals)
100//!
101//! ```no_run
102//! use hedl_csv::{from_csv_with_config, FromCsvConfig};
103//!
104//! let csv_data = "id,name,age\n1,Alice,30\n2,Bob,25";
105//!
106//! // Use "people" instead of default "persons" for Person type
107//! let config = FromCsvConfig {
108//!     list_key: Some("people".to_string()),
109//!     ..Default::default()
110//! };
111//! let doc = from_csv_with_config(csv_data, "Person", &["name", "age"], config).unwrap();
112//!
113//! // Access using the custom plural form
114//! let list = doc.get("people").unwrap().as_list().unwrap();
115//! assert_eq!(list.rows.len(), 2);
116//! ```
117//!
118//! ## Selective List Export
119//!
120//! When a document contains multiple lists, you can export each one independently
121//! without converting the entire document:
122//!
123//! ```no_run
124//! use hedl_core::Document;
125//! use hedl_csv::to_csv_list;
126//!
127//! let doc = Document::new((1, 0));
128//! // Export only the "people" list
129//! let csv_people = to_csv_list(&doc, "people").unwrap();
130//! // Export only the "items" list
131//! let csv_items = to_csv_list(&doc, "items").unwrap();
132//! ```
133//!
134//! This is useful when you want to export specific tables from multi-list documents
135//! without exporting everything.
136//!
137//! ## Round-trip Conversion
138//!
139//! ```no_run
140//! use hedl_csv::{from_csv, to_csv};
141//!
142//! let original_csv = "id,name,age\n1,Alice,30\n2,Bob,25\n";
143//! let doc = from_csv(original_csv, "Person", &["name", "age"]).unwrap();
144//! let converted_csv = to_csv(&doc).unwrap();
145//!
146//! // The structure is preserved
147//! assert_eq!(original_csv, converted_csv);
148//! ```
149//!
150//! # Type Inference
151//!
152//! When reading CSV data, values are automatically inferred as:
153//!
154//! - Empty string or `~` → `Value::Null`
155//! - `true` or `false` → `Value::Bool`
156//! - Integer pattern → `Value::Int`
157//! - Float pattern → `Value::Float`
158//! - `@id` or `@Type:id` → `Value::Reference`
159//! - `$(expr)` → `Value::Expression`
160//! - Otherwise → `Value::String`
161//!
162//! Special float values are supported: `NaN`, `Infinity`, `-Infinity`
163
164#![cfg_attr(not(test), warn(missing_docs))]
165mod error;
166/// CSV to HEDL conversion.
167pub mod from_csv;
168mod to_csv;
169
170// Re-export public API
171pub use error::{CsvError, Result};
172pub use from_csv::{
173    from_csv,
174    from_csv_reader,
175    from_csv_reader_with_config,
176    from_csv_with_config,
177    FromCsvConfig,
178    DEFAULT_MAX_CELL_SIZE,
179    // Security limit constants
180    DEFAULT_MAX_COLUMNS,
181    DEFAULT_MAX_HEADER_SIZE,
182    DEFAULT_MAX_ROWS,
183    DEFAULT_MAX_TOTAL_SIZE,
184};
185pub use to_csv::{
186    to_csv, to_csv_list, to_csv_list_with_config, to_csv_list_writer,
187    to_csv_list_writer_with_config, to_csv_with_config, to_csv_writer, to_csv_writer_with_config,
188    ToCsvConfig,
189};
190
191#[cfg(test)]
192mod integration_tests {
193    use super::*;
194    use hedl_core::{Document, Item, MatrixList, Node, Value};
195    use hedl_test::expr_value;
196
197    /// Test round-trip conversion: HEDL → CSV → HEDL
198    #[test]
199    fn test_round_trip_conversion() {
200        // Create original document
201        let mut doc = Document::new((1, 0));
202        // Per SPEC.md: MatrixList.schema includes all column names with ID first
203        let mut list = MatrixList::new(
204            "Person",
205            vec![
206                "id".to_string(),
207                "name".to_string(),
208                "age".to_string(),
209                "score".to_string(),
210                "active".to_string(),
211            ],
212        );
213
214        // Per SPEC.md: Node.fields contains ALL values including ID (first column)
215        list.add_row(Node::new(
216            "Person",
217            "1",
218            vec![
219                Value::String("1".to_string().into()),
220                Value::String("Alice".to_string().into()),
221                Value::Int(30),
222                Value::Float(95.5),
223                Value::Bool(true),
224            ],
225        ));
226
227        list.add_row(Node::new(
228            "Person",
229            "2",
230            vec![
231                Value::String("2".to_string().into()),
232                Value::String("Bob".to_string().into()),
233                Value::Int(25),
234                Value::Float(87.3),
235                Value::Bool(false),
236            ],
237        ));
238
239        doc.root.insert("people".to_string(), Item::List(list));
240
241        // Convert to CSV
242        let csv = to_csv(&doc).unwrap();
243
244        // Convert back to HEDL
245        let doc2 = from_csv(&csv, "Person", &["name", "age", "score", "active"]).unwrap();
246
247        // Verify structure
248        let list2 = doc2.get("persons").unwrap().as_list().unwrap();
249        assert_eq!(list2.rows.len(), 2);
250
251        // Verify first row
252        let row1 = &list2.rows[0];
253        assert_eq!(&*row1.id, "1");
254        assert_eq!(row1.fields[0], Value::Int(1)); // ID field
255        assert_eq!(row1.fields[1], Value::String("Alice".to_string().into()));
256        assert_eq!(row1.fields[2], Value::Int(30));
257        assert_eq!(row1.fields[3], Value::Float(95.5));
258        assert_eq!(row1.fields[4], Value::Bool(true));
259
260        // Verify second row
261        let row2 = &list2.rows[1];
262        assert_eq!(&*row2.id, "2");
263        assert_eq!(row2.fields[0], Value::Int(2)); // ID field
264        assert_eq!(row2.fields[1], Value::String("Bob".to_string().into()));
265        assert_eq!(row2.fields[2], Value::Int(25));
266        assert_eq!(row2.fields[3], Value::Float(87.3));
267        assert_eq!(row2.fields[4], Value::Bool(false));
268    }
269
270    /// Test handling of null values
271    #[test]
272    fn test_null_values() {
273        let mut doc = Document::new((1, 0));
274        let mut list = MatrixList::new("Item", vec!["id".to_string(), "value".to_string()]);
275
276        list.add_row(Node::new(
277            "Item",
278            "1",
279            vec![Value::String("1".to_string().into()), Value::Null],
280        ));
281        doc.root.insert("items".to_string(), Item::List(list));
282
283        let csv = to_csv(&doc).unwrap();
284        let doc2 = from_csv(&csv, "Item", &["value"]).unwrap();
285
286        let list2 = doc2.get("items").unwrap().as_list().unwrap();
287        assert_eq!(list2.rows[0].fields[0], Value::Int(1)); // ID field
288        assert_eq!(list2.rows[0].fields[1], Value::Null);
289    }
290
291    /// Test handling of references
292    #[test]
293    fn test_references() {
294        let mut doc = Document::new((1, 0));
295        let mut list = MatrixList::new("Item", vec!["id".to_string(), "ref".to_string()]);
296
297        list.add_row(Node::new(
298            "Item",
299            "1",
300            vec![
301                Value::String("1".to_string().into()),
302                Value::Reference(hedl_core::Reference::local("user1")),
303            ],
304        ));
305
306        list.add_row(Node::new(
307            "Item",
308            "2",
309            vec![
310                Value::String("2".to_string().into()),
311                Value::Reference(hedl_core::Reference::qualified("User", "user2")),
312            ],
313        ));
314
315        doc.root.insert("items".to_string(), Item::List(list));
316
317        let csv = to_csv(&doc).unwrap();
318        let doc2 = from_csv(&csv, "Item", &["ref"]).unwrap();
319
320        let list2 = doc2.get("items").unwrap().as_list().unwrap();
321
322        // Check local reference
323        assert_eq!(list2.rows[0].fields[0], Value::Int(1)); // ID field
324        let ref1 = list2.rows[0].fields[1].as_reference().unwrap();
325        assert_eq!(&*ref1.id, "user1");
326        assert_eq!(ref1.type_name, None);
327
328        // Check qualified reference
329        assert_eq!(list2.rows[1].fields[0], Value::Int(2)); // ID field
330        let ref2 = list2.rows[1].fields[1].as_reference().unwrap();
331        assert_eq!(&*ref2.id, "user2");
332        assert_eq!(ref2.type_name.as_deref(), Some("User"));
333    }
334
335    /// Test handling of mixed types
336    #[test]
337    fn test_mixed_types() {
338        let csv_data = r"
339id,value
3401,42
3412,3.25
3423,true
3434,hello
3445,@ref1
3456,
346";
347
348        let doc = from_csv(csv_data, "Item", &["value"]).unwrap();
349        let list = doc.get("items").unwrap().as_list().unwrap();
350
351        assert_eq!(list.rows.len(), 6);
352        assert_eq!(list.rows[0].fields[0], Value::Int(1)); // ID field
353        assert_eq!(list.rows[0].fields[1], Value::Int(42));
354        assert_eq!(list.rows[1].fields[0], Value::Int(2)); // ID field
355        assert_eq!(list.rows[1].fields[1], Value::Float(3.25));
356        assert_eq!(list.rows[2].fields[0], Value::Int(3)); // ID field
357        assert_eq!(list.rows[2].fields[1], Value::Bool(true));
358        assert_eq!(list.rows[3].fields[0], Value::Int(4)); // ID field
359        assert_eq!(
360            list.rows[3].fields[1],
361            Value::String("hello".to_string().into())
362        );
363        assert_eq!(list.rows[4].fields[0], Value::Int(5)); // ID field
364        assert!(matches!(list.rows[4].fields[1], Value::Reference(_)));
365        assert_eq!(list.rows[5].fields[0], Value::Int(6)); // ID field
366        assert_eq!(list.rows[5].fields[1], Value::Null);
367    }
368
369    /// Test expressions
370    #[test]
371    fn test_expressions() {
372        let mut doc = Document::new((1, 0));
373        let mut list = MatrixList::new("Item", vec!["id".to_string(), "expr".to_string()]);
374
375        list.add_row(Node::new(
376            "Item",
377            "1",
378            vec![
379                Value::String("1".to_string().into()),
380                expr_value("add(x, y)"),
381            ],
382        ));
383
384        doc.root.insert("items".to_string(), Item::List(list));
385
386        let csv = to_csv(&doc).unwrap();
387        assert!(csv.contains("$(add(x, y))"));
388
389        let doc2 = from_csv(&csv, "Item", &["expr"]).unwrap();
390        let list2 = doc2.get("items").unwrap().as_list().unwrap();
391
392        assert_eq!(list2.rows[0].fields[0], Value::Int(1)); // ID field
393        assert_eq!(list2.rows[0].fields[1], expr_value("add(x, y)"));
394    }
395}