hedl_cli/commands/convert.rs
1// Dweve HEDL - Hierarchical Entity Data Language
2//
3// Copyright (c) 2025 Dweve IP B.V. and individual contributors.
4//
5// SPDX-License-Identifier: Apache-2.0
6//
7// Licensed under the Apache License, Version 2.0 (the "License");
8// you may not use this file except in compliance with the License.
9// You may obtain a copy of the License in the LICENSE file at the
10// root of this repository or at: http://www.apache.org/licenses/LICENSE-2.0
11//
12// Unless required by applicable law or agreed to in writing, software
13// distributed under the License is distributed on an "AS IS" BASIS,
14// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15// See the License for the specific language governing permissions and
16// limitations under the License.
17
18//! Conversion commands - HEDL format interoperability
19//!
20//! This module provides bidirectional conversion between HEDL and popular data formats:
21//! - JSON (compact and pretty-printed)
22//! - YAML
23//! - XML (compact and pretty-printed)
24//! - CSV
25//! - Parquet
26//!
27//! All conversions preserve data fidelity where possible, with format-specific
28//! optimizations and configurations.
29
30use super::{read_file, write_output};
31use crate::error::CliError;
32use hedl_c14n::canonicalize;
33use hedl_core::parse;
34use hedl_csv::{from_csv as csv_to_hedl, to_csv_with_config, ToCsvConfig};
35use hedl_json::{from_json as json_to_hedl, to_json_value, FromJsonConfig, ToJsonConfig};
36use hedl_parquet::{from_parquet as parquet_to_hedl, to_parquet as hedl_to_parquet};
37use hedl_toon::{hedl_to_toon, toon_to_hedl};
38use hedl_xml::{from_xml as xml_to_hedl, to_xml as hedl_to_xml, FromXmlConfig, ToXmlConfig};
39use hedl_yaml::{from_yaml as yaml_to_hedl, to_yaml as hedl_to_yaml, FromYamlConfig, ToYamlConfig};
40use std::path::Path;
41
42// JSON conversion
43
44/// Convert a HEDL file to JSON format.
45///
46/// Parses a HEDL file and converts it to JSON, with options for metadata inclusion
47/// and pretty-printing.
48///
49/// # Arguments
50///
51/// * `file` - Path to the HEDL file to convert
52/// * `output` - Optional output file path. If `None`, writes to stdout
53/// * `metadata` - If `true`, includes HEDL-specific metadata in the JSON output
54/// * `pretty` - If `true`, pretty-prints the JSON with indentation
55///
56/// # Returns
57///
58/// Returns `Ok(())` on success.
59///
60/// # Errors
61///
62/// Returns `Err` if:
63/// - The file cannot be read
64/// - The file contains syntax errors
65/// - JSON conversion fails
66/// - Output writing fails
67///
68/// # Examples
69///
70/// ```no_run
71/// use hedl_cli::commands::to_json;
72///
73/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
74/// // Convert to compact JSON on stdout
75/// to_json("data.hedl", None, false, false)?;
76///
77/// // Convert to pretty JSON with metadata
78/// to_json("data.hedl", Some("output.json"), true, true)?;
79/// # Ok(())
80/// # }
81/// ```
82pub fn to_json(
83 file: &str,
84 output: Option<&str>,
85 metadata: bool,
86 pretty: bool,
87) -> Result<(), CliError> {
88 let content = read_file(file)?;
89
90 let doc =
91 parse(content.as_bytes()).map_err(|e| CliError::parse(format!("Parse error: {e}")))?;
92
93 let config = ToJsonConfig {
94 include_metadata: metadata,
95 ..Default::default()
96 };
97
98 let value = to_json_value(&doc, &config)
99 .map_err(|e| CliError::json_conversion(format!("JSON conversion error: {e}")))?;
100 let output_str = if pretty {
101 serde_json::to_string_pretty(&value)
102 .map_err(|e| CliError::json_conversion(format!("JSON format error: {e}")))?
103 } else {
104 serde_json::to_string(&value)
105 .map_err(|e| CliError::json_conversion(format!("JSON format error: {e}")))?
106 };
107
108 write_output(&output_str, output)
109}
110
111/// Convert a JSON file to HEDL format.
112///
113/// Parses a JSON file and converts it to canonical HEDL format.
114///
115/// # Arguments
116///
117/// * `file` - Path to the JSON file to convert
118/// * `output` - Optional output file path. If `None`, writes to stdout
119///
120/// # Returns
121///
122/// Returns `Ok(())` on success.
123///
124/// # Errors
125///
126/// Returns `Err` if:
127/// - The file cannot be read
128/// - The JSON is malformed
129/// - JSON-to-HEDL conversion fails
130/// - HEDL canonicalization fails
131/// - Output writing fails
132///
133/// # Examples
134///
135/// ```no_run
136/// use hedl_cli::commands::from_json;
137///
138/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
139/// // Convert JSON to HEDL on stdout
140/// from_json("data.json", None)?;
141///
142/// // Convert JSON to HEDL file
143/// from_json("data.json", Some("output.hedl"))?;
144/// # Ok(())
145/// # }
146/// ```
147pub fn from_json(file: &str, output: Option<&str>) -> Result<(), CliError> {
148 let content = read_file(file)?;
149
150 // FK relationships are automatically detected and nested
151 let doc = json_to_hedl(&content, &FromJsonConfig::default())
152 .map_err(|e| CliError::json_conversion(format!("JSON conversion error: {e}")))?;
153
154 let hedl = canonicalize(&doc)
155 .map_err(|e| CliError::canonicalization(format!("HEDL generation error: {e}")))?;
156
157 write_output(&hedl, output)
158}
159
160// YAML conversion
161
162/// Convert a HEDL file to YAML format.
163///
164/// Parses a HEDL file and converts it to YAML format.
165///
166/// # Arguments
167///
168/// * `file` - Path to the HEDL file to convert
169/// * `output` - Optional output file path. If `None`, writes to stdout
170///
171/// # Returns
172///
173/// Returns `Ok(())` on success.
174///
175/// # Errors
176///
177/// Returns `Err` if:
178/// - The file cannot be read
179/// - The file contains syntax errors
180/// - YAML conversion fails
181/// - Output writing fails
182///
183/// # Examples
184///
185/// ```no_run
186/// use hedl_cli::commands::to_yaml;
187///
188/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
189/// // Convert to YAML on stdout
190/// to_yaml("data.hedl", None)?;
191///
192/// // Convert to YAML file
193/// to_yaml("data.hedl", Some("output.yaml"))?;
194/// # Ok(())
195/// # }
196/// ```
197pub fn to_yaml(file: &str, output: Option<&str>) -> Result<(), CliError> {
198 let content = read_file(file)?;
199
200 let doc =
201 parse(content.as_bytes()).map_err(|e| CliError::parse(format!("Parse error: {e}")))?;
202
203 let config = ToYamlConfig::default();
204 let yaml = hedl_to_yaml(&doc, &config)
205 .map_err(|e| CliError::yaml_conversion(format!("YAML conversion error: {e}")))?;
206
207 write_output(&yaml, output)
208}
209
210/// Convert a YAML file to HEDL format.
211///
212/// Parses a YAML file and converts it to canonical HEDL format.
213///
214/// # Arguments
215///
216/// * `file` - Path to the YAML file to convert
217/// * `output` - Optional output file path. If `None`, writes to stdout
218///
219/// # Returns
220///
221/// Returns `Ok(())` on success.
222///
223/// # Errors
224///
225/// Returns `Err` if:
226/// - The file cannot be read
227/// - The YAML is malformed
228/// - YAML-to-HEDL conversion fails
229/// - HEDL canonicalization fails
230/// - Output writing fails
231///
232/// # Examples
233///
234/// ```no_run
235/// use hedl_cli::commands::from_yaml;
236///
237/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
238/// // Convert YAML to HEDL on stdout
239/// from_yaml("data.yaml", None)?;
240///
241/// // Convert YAML to HEDL file
242/// from_yaml("data.yml", Some("output.hedl"))?;
243/// # Ok(())
244/// # }
245/// ```
246pub fn from_yaml(file: &str, output: Option<&str>) -> Result<(), CliError> {
247 let content = read_file(file)?;
248
249 let config = FromYamlConfig::default();
250 let doc = yaml_to_hedl(&content, &config)
251 .map_err(|e| CliError::yaml_conversion(format!("YAML conversion error: {e}")))?;
252
253 let hedl = canonicalize(&doc)
254 .map_err(|e| CliError::canonicalization(format!("HEDL generation error: {e}")))?;
255
256 write_output(&hedl, output)
257}
258
259// XML conversion
260
261/// Convert a HEDL file to XML format.
262///
263/// Parses a HEDL file and converts it to XML, with optional pretty-printing.
264///
265/// # Arguments
266///
267/// * `file` - Path to the HEDL file to convert
268/// * `output` - Optional output file path. If `None`, writes to stdout
269/// * `pretty` - If `true`, pretty-prints the XML with indentation
270///
271/// # Returns
272///
273/// Returns `Ok(())` on success.
274///
275/// # Errors
276///
277/// Returns `Err` if:
278/// - The file cannot be read
279/// - The file contains syntax errors
280/// - XML conversion fails
281/// - Output writing fails
282///
283/// # Examples
284///
285/// ```no_run
286/// use hedl_cli::commands::to_xml;
287///
288/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
289/// // Convert to compact XML on stdout
290/// to_xml("data.hedl", None, false)?;
291///
292/// // Convert to pretty XML file
293/// to_xml("data.hedl", Some("output.xml"), true)?;
294/// # Ok(())
295/// # }
296/// ```
297pub fn to_xml(file: &str, output: Option<&str>, pretty: bool) -> Result<(), CliError> {
298 let content = read_file(file)?;
299
300 let doc =
301 parse(content.as_bytes()).map_err(|e| CliError::parse(format!("Parse error: {e}")))?;
302
303 let config = ToXmlConfig {
304 pretty,
305 include_metadata: true, // Enable for roundtrip fidelity
306 ..Default::default()
307 };
308 let xml = hedl_to_xml(&doc, &config)
309 .map_err(|e| CliError::xml_conversion(format!("XML conversion error: {e}")))?;
310
311 write_output(&xml, output)
312}
313
314/// Convert an XML file to HEDL format.
315///
316/// Parses an XML file and converts it to canonical HEDL format.
317///
318/// # Arguments
319///
320/// * `file` - Path to the XML file to convert
321/// * `output` - Optional output file path. If `None`, writes to stdout
322///
323/// # Returns
324///
325/// Returns `Ok(())` on success.
326///
327/// # Errors
328///
329/// Returns `Err` if:
330/// - The file cannot be read
331/// - The XML is malformed
332/// - XML-to-HEDL conversion fails
333/// - HEDL canonicalization fails
334/// - Output writing fails
335///
336/// # Examples
337///
338/// ```no_run
339/// use hedl_cli::commands::from_xml;
340///
341/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
342/// // Convert XML to HEDL on stdout
343/// from_xml("data.xml", None)?;
344///
345/// // Convert XML to HEDL file
346/// from_xml("data.xml", Some("output.hedl"))?;
347/// # Ok(())
348/// # }
349/// ```
350pub fn from_xml(file: &str, output: Option<&str>) -> Result<(), CliError> {
351 let content = read_file(file)?;
352
353 let config = FromXmlConfig::default();
354 let doc = xml_to_hedl(&content, &config)
355 .map_err(|e| CliError::xml_conversion(format!("XML conversion error: {e}")))?;
356
357 let hedl = canonicalize(&doc)
358 .map_err(|e| CliError::canonicalization(format!("HEDL generation error: {e}")))?;
359
360 write_output(&hedl, output)
361}
362
363// CSV conversion
364
365/// Convert a HEDL file to CSV format.
366///
367/// Parses a HEDL file and converts it to CSV format. Expects the HEDL file to contain
368/// a matrix list that can be represented as a table.
369///
370/// # Arguments
371///
372/// * `file` - Path to the HEDL file to convert
373/// * `output` - Optional output file path. If `None`, writes to stdout
374/// * `include_headers` - If `true`, includes header row with column names (default: `true`)
375///
376/// # Returns
377///
378/// Returns `Ok(())` on success.
379///
380/// # Errors
381///
382/// Returns `Err` if:
383/// - The file cannot be read
384/// - The file contains syntax errors
385/// - The HEDL structure is not compatible with CSV (e.g., nested structures)
386/// - CSV conversion fails
387/// - Output writing fails
388///
389/// # Examples
390///
391/// ```no_run
392/// use hedl_cli::commands::to_csv;
393///
394/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
395/// // Convert to CSV on stdout with headers
396/// to_csv("data.hedl", None, true)?;
397///
398/// // Convert to CSV without headers (useful for appending)
399/// to_csv("data.hedl", Some("output.csv"), false)?;
400/// # Ok(())
401/// # }
402/// ```
403pub fn to_csv(file: &str, output: Option<&str>, include_headers: bool) -> Result<(), CliError> {
404 let content = read_file(file)?;
405
406 let doc =
407 parse(content.as_bytes()).map_err(|e| CliError::parse(format!("Parse error: {e}")))?;
408
409 // Use to_csv_with_config to respect user's header preference
410 let config = ToCsvConfig {
411 include_headers,
412 ..Default::default()
413 };
414 let csv = to_csv_with_config(&doc, config)
415 .map_err(|e| CliError::csv_conversion(format!("CSV conversion error: {e}")))?;
416
417 write_output(&csv, output)
418}
419
420/// Convert a CSV file to HEDL format.
421///
422/// Parses a CSV file and converts it to canonical HEDL format. The first row is assumed
423/// to be the header row containing column names.
424///
425/// # Arguments
426///
427/// * `file` - Path to the CSV file to convert
428/// * `output` - Optional output file path. If `None`, writes to stdout
429/// * `type_name` - The type name to use for the HEDL matrix list (must be alphanumeric)
430///
431/// # Returns
432///
433/// Returns `Ok(())` on success.
434///
435/// # Errors
436///
437/// Returns `Err` if:
438/// - The file cannot be read
439/// - The CSV is malformed or empty
440/// - The type name is invalid (must be alphanumeric with underscores)
441/// - CSV-to-HEDL conversion fails
442/// - HEDL canonicalization fails
443/// - Output writing fails
444///
445/// # Examples
446///
447/// ```no_run
448/// use hedl_cli::commands::from_csv;
449///
450/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
451/// // Convert CSV to HEDL on stdout with type name "Person"
452/// from_csv("people.csv", None, "Person")?;
453///
454/// // Convert CSV to HEDL file
455/// from_csv("data.csv", Some("output.hedl"), "Record")?;
456///
457/// // Invalid type name will fail
458/// let result = from_csv("data.csv", None, "Invalid-Name!");
459/// assert!(result.is_err());
460/// # Ok(())
461/// # }
462/// ```
463///
464/// # Security
465///
466/// The type name is validated to prevent injection attacks. Only alphanumeric
467/// characters and underscores are allowed.
468pub fn from_csv(file: &str, output: Option<&str>, type_name: &str) -> Result<(), CliError> {
469 let content = read_file(file)?;
470
471 // Validate type_name to prevent injection
472 if !type_name.chars().all(|c| c.is_alphanumeric() || c == '_') {
473 return Err(CliError::invalid_input(
474 "Type name must be alphanumeric (with underscores allowed)",
475 ));
476 }
477
478 // Infer column names from header row
479 let first_line = content
480 .lines()
481 .next()
482 .ok_or_else(|| CliError::invalid_input("CSV file is empty or has no header row"))?;
483 let columns: Vec<&str> = first_line.split(',').skip(1).collect(); // Skip ID column
484
485 let doc = csv_to_hedl(&content, type_name, &columns)
486 .map_err(|e| CliError::csv_conversion(format!("CSV conversion error: {e}")))?;
487
488 let hedl = canonicalize(&doc)
489 .map_err(|e| CliError::canonicalization(format!("HEDL generation error: {e}")))?;
490
491 write_output(&hedl, output)
492}
493
494// Parquet conversion
495
496/// Convert a HEDL file to Parquet format.
497///
498/// Parses a HEDL file and converts it to Apache Parquet columnar format. This is ideal
499/// for analytical workloads and integration with data processing frameworks.
500///
501/// # Arguments
502///
503/// * `file` - Path to the HEDL file to convert
504/// * `output` - Output Parquet file path (required, cannot write to stdout)
505///
506/// # Returns
507///
508/// Returns `Ok(())` on success.
509///
510/// # Errors
511///
512/// Returns `Err` if:
513/// - The file cannot be read
514/// - The file contains syntax errors
515/// - The HEDL structure is not compatible with Parquet
516/// - Parquet conversion fails
517/// - Output file cannot be written
518///
519/// # Examples
520///
521/// ```no_run
522/// use hedl_cli::commands::to_parquet;
523///
524/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
525/// // Convert to Parquet file
526/// to_parquet("data.hedl", "output.parquet")?;
527/// # Ok(())
528/// # }
529/// ```
530///
531/// # Note
532///
533/// Parquet requires a file path for output; it cannot write to stdout due to
534/// the binary columnar format.
535pub fn to_parquet(file: &str, output: &str) -> Result<(), CliError> {
536 let content = read_file(file)?;
537
538 let doc =
539 parse(content.as_bytes()).map_err(|e| CliError::parse(format!("Parse error: {e}")))?;
540
541 hedl_to_parquet(&doc, Path::new(output))
542 .map_err(|e| CliError::parquet_conversion(format!("Parquet conversion error: {e}")))?;
543
544 Ok(())
545}
546
547/// Convert a Parquet file to HEDL format.
548///
549/// Reads an Apache Parquet file and converts it to canonical HEDL format.
550///
551/// # Arguments
552///
553/// * `file` - Path to the Parquet file to convert
554/// * `output` - Optional output file path. If `None`, writes to stdout
555///
556/// # Returns
557///
558/// Returns `Ok(())` on success.
559///
560/// # Errors
561///
562/// Returns `Err` if:
563/// - The file cannot be read
564/// - The Parquet file is malformed or unsupported
565/// - Parquet-to-HEDL conversion fails
566/// - HEDL canonicalization fails
567/// - Output writing fails
568///
569/// # Examples
570///
571/// ```no_run
572/// use hedl_cli::commands::from_parquet;
573///
574/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
575/// // Convert Parquet to HEDL on stdout
576/// from_parquet("data.parquet", None)?;
577///
578/// // Convert Parquet to HEDL file
579/// from_parquet("data.parquet", Some("output.hedl"))?;
580/// # Ok(())
581/// # }
582/// ```
583pub fn from_parquet(file: &str, output: Option<&str>) -> Result<(), CliError> {
584 let doc = parquet_to_hedl(Path::new(file))
585 .map_err(|e| CliError::parquet_conversion(format!("Parquet conversion error: {e}")))?;
586
587 let hedl = canonicalize(&doc)
588 .map_err(|e| CliError::canonicalization(format!("HEDL generation error: {e}")))?;
589
590 write_output(&hedl, output)
591}
592
593// TOON conversion
594
595/// Convert a HEDL file to TOON format.
596///
597/// Parses a HEDL file and converts it to TOON format.
598///
599/// # Arguments
600///
601/// * `file` - Path to the HEDL file to convert
602/// * `output` - Optional output file path. If `None`, writes to stdout
603///
604/// # Returns
605///
606/// Returns `Ok(())` on success.
607///
608/// # Errors
609///
610/// Returns `Err` if:
611/// - The file cannot be read
612/// - The file contains syntax errors
613/// - TOON conversion fails
614/// - Output writing fails
615///
616/// # Examples
617///
618/// ```no_run
619/// use hedl_cli::commands::to_toon;
620///
621/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
622/// // Convert to TOON on stdout
623/// to_toon("data.hedl", None)?;
624///
625/// // Convert to TOON file
626/// to_toon("data.hedl", Some("output.toon"))?;
627/// # Ok(())
628/// # }
629/// ```
630pub fn to_toon(file: &str, output: Option<&str>) -> Result<(), CliError> {
631 let content = read_file(file)?;
632
633 let doc =
634 parse(content.as_bytes()).map_err(|e| CliError::parse(format!("Parse error: {e}")))?;
635
636 let toon = hedl_to_toon(&doc)
637 .map_err(|e| CliError::invalid_input(format!("TOON conversion error: {e}")))?;
638
639 write_output(&toon, output)
640}
641
642/// Convert a TOON file to HEDL format.
643///
644/// Parses a TOON file and converts it to HEDL format.
645///
646/// # Arguments
647///
648/// * `file` - Path to the TOON file to convert
649/// * `output` - Optional output file path. If `None`, writes to stdout
650///
651/// # Returns
652///
653/// Returns `Ok(())` on success.
654///
655/// # Errors
656///
657/// Returns `Err` if:
658/// - The file cannot be read
659/// - The file contains syntax errors
660/// - HEDL generation fails
661/// - Output writing fails
662///
663/// # Examples
664///
665/// ```no_run
666/// use hedl_cli::commands::from_toon;
667///
668/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
669/// // Convert TOON to HEDL on stdout
670/// from_toon("data.toon", None)?;
671///
672/// // Convert TOON to HEDL file
673/// from_toon("data.toon", Some("output.hedl"))?;
674/// # Ok(())
675/// # }
676/// ```
677pub fn from_toon(file: &str, output: Option<&str>) -> Result<(), CliError> {
678 let content = read_file(file)?;
679
680 let doc =
681 toon_to_hedl(&content).map_err(|e| CliError::parse(format!("TOON parse error: {e}")))?;
682
683 let hedl = canonicalize(&doc)
684 .map_err(|e| CliError::canonicalization(format!("HEDL generation error: {e}")))?;
685
686 write_output(&hedl, output)
687}