hedl_cli/commands/
convert.rs

1// Dweve HEDL - Hierarchical Entity Data Language
2//
3// Copyright (c) 2025 Dweve IP B.V. and individual contributors.
4//
5// SPDX-License-Identifier: Apache-2.0
6//
7// Licensed under the Apache License, Version 2.0 (the "License");
8// you may not use this file except in compliance with the License.
9// You may obtain a copy of the License in the LICENSE file at the
10// root of this repository or at: http://www.apache.org/licenses/LICENSE-2.0
11//
12// Unless required by applicable law or agreed to in writing, software
13// distributed under the License is distributed on an "AS IS" BASIS,
14// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15// See the License for the specific language governing permissions and
16// limitations under the License.
17
18//! Conversion commands - HEDL format interoperability
19//!
20//! This module provides bidirectional conversion between HEDL and popular data formats:
21//! - JSON (compact and pretty-printed)
22//! - YAML
23//! - XML (compact and pretty-printed)
24//! - CSV
25//! - Parquet
26//!
27//! All conversions preserve data fidelity where possible, with format-specific
28//! optimizations and configurations.
29
30use super::{read_file, write_output};
31use crate::error::CliError;
32use hedl_c14n::canonicalize;
33use hedl_core::parse;
34use hedl_csv::{from_csv as csv_to_hedl, to_csv_with_config, ToCsvConfig};
35use hedl_json::{from_json as json_to_hedl, to_json_value, FromJsonConfig, ToJsonConfig};
36use hedl_parquet::{from_parquet as parquet_to_hedl, to_parquet as hedl_to_parquet};
37use hedl_toon::{hedl_to_toon, toon_to_hedl};
38use hedl_xml::{from_xml as xml_to_hedl, to_xml as hedl_to_xml, FromXmlConfig, ToXmlConfig};
39use hedl_yaml::{from_yaml as yaml_to_hedl, to_yaml as hedl_to_yaml, FromYamlConfig, ToYamlConfig};
40use std::path::Path;
41
42// JSON conversion
43
44/// Convert a HEDL file to JSON format.
45///
46/// Parses a HEDL file and converts it to JSON, with options for metadata inclusion
47/// and pretty-printing.
48///
49/// # Arguments
50///
51/// * `file` - Path to the HEDL file to convert
52/// * `output` - Optional output file path. If `None`, writes to stdout
53/// * `metadata` - If `true`, includes HEDL-specific metadata in the JSON output
54/// * `pretty` - If `true`, pretty-prints the JSON with indentation
55///
56/// # Returns
57///
58/// Returns `Ok(())` on success.
59///
60/// # Errors
61///
62/// Returns `Err` if:
63/// - The file cannot be read
64/// - The file contains syntax errors
65/// - JSON conversion fails
66/// - Output writing fails
67///
68/// # Examples
69///
70/// ```no_run
71/// use hedl_cli::commands::to_json;
72///
73/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
74/// // Convert to compact JSON on stdout
75/// to_json("data.hedl", None, false, false)?;
76///
77/// // Convert to pretty JSON with metadata
78/// to_json("data.hedl", Some("output.json"), true, true)?;
79/// # Ok(())
80/// # }
81/// ```
82pub fn to_json(
83    file: &str,
84    output: Option<&str>,
85    metadata: bool,
86    pretty: bool,
87) -> Result<(), CliError> {
88    let content = read_file(file)?;
89
90    let doc =
91        parse(content.as_bytes()).map_err(|e| CliError::parse(format!("Parse error: {e}")))?;
92
93    let config = ToJsonConfig {
94        include_metadata: metadata,
95        ..Default::default()
96    };
97
98    let value = to_json_value(&doc, &config)
99        .map_err(|e| CliError::json_conversion(format!("JSON conversion error: {e}")))?;
100    let output_str = if pretty {
101        serde_json::to_string_pretty(&value)
102            .map_err(|e| CliError::json_conversion(format!("JSON format error: {e}")))?
103    } else {
104        serde_json::to_string(&value)
105            .map_err(|e| CliError::json_conversion(format!("JSON format error: {e}")))?
106    };
107
108    write_output(&output_str, output)
109}
110
111/// Convert a JSON file to HEDL format.
112///
113/// Parses a JSON file and converts it to canonical HEDL format.
114///
115/// # Arguments
116///
117/// * `file` - Path to the JSON file to convert
118/// * `output` - Optional output file path. If `None`, writes to stdout
119///
120/// # Returns
121///
122/// Returns `Ok(())` on success.
123///
124/// # Errors
125///
126/// Returns `Err` if:
127/// - The file cannot be read
128/// - The JSON is malformed
129/// - JSON-to-HEDL conversion fails
130/// - HEDL canonicalization fails
131/// - Output writing fails
132///
133/// # Examples
134///
135/// ```no_run
136/// use hedl_cli::commands::from_json;
137///
138/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
139/// // Convert JSON to HEDL on stdout
140/// from_json("data.json", None)?;
141///
142/// // Convert JSON to HEDL file
143/// from_json("data.json", Some("output.hedl"))?;
144/// # Ok(())
145/// # }
146/// ```
147pub fn from_json(file: &str, output: Option<&str>) -> Result<(), CliError> {
148    let content = read_file(file)?;
149
150    let config = FromJsonConfig::default();
151    let doc = json_to_hedl(&content, &config)
152        .map_err(|e| CliError::json_conversion(format!("JSON conversion error: {e}")))?;
153
154    let hedl = canonicalize(&doc)
155        .map_err(|e| CliError::canonicalization(format!("HEDL generation error: {e}")))?;
156
157    write_output(&hedl, output)
158}
159
160// YAML conversion
161
162/// Convert a HEDL file to YAML format.
163///
164/// Parses a HEDL file and converts it to YAML format.
165///
166/// # Arguments
167///
168/// * `file` - Path to the HEDL file to convert
169/// * `output` - Optional output file path. If `None`, writes to stdout
170///
171/// # Returns
172///
173/// Returns `Ok(())` on success.
174///
175/// # Errors
176///
177/// Returns `Err` if:
178/// - The file cannot be read
179/// - The file contains syntax errors
180/// - YAML conversion fails
181/// - Output writing fails
182///
183/// # Examples
184///
185/// ```no_run
186/// use hedl_cli::commands::to_yaml;
187///
188/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
189/// // Convert to YAML on stdout
190/// to_yaml("data.hedl", None)?;
191///
192/// // Convert to YAML file
193/// to_yaml("data.hedl", Some("output.yaml"))?;
194/// # Ok(())
195/// # }
196/// ```
197pub fn to_yaml(file: &str, output: Option<&str>) -> Result<(), CliError> {
198    let content = read_file(file)?;
199
200    let doc =
201        parse(content.as_bytes()).map_err(|e| CliError::parse(format!("Parse error: {e}")))?;
202
203    let config = ToYamlConfig::default();
204    let yaml = hedl_to_yaml(&doc, &config)
205        .map_err(|e| CliError::yaml_conversion(format!("YAML conversion error: {e}")))?;
206
207    write_output(&yaml, output)
208}
209
210/// Convert a YAML file to HEDL format.
211///
212/// Parses a YAML file and converts it to canonical HEDL format.
213///
214/// # Arguments
215///
216/// * `file` - Path to the YAML file to convert
217/// * `output` - Optional output file path. If `None`, writes to stdout
218///
219/// # Returns
220///
221/// Returns `Ok(())` on success.
222///
223/// # Errors
224///
225/// Returns `Err` if:
226/// - The file cannot be read
227/// - The YAML is malformed
228/// - YAML-to-HEDL conversion fails
229/// - HEDL canonicalization fails
230/// - Output writing fails
231///
232/// # Examples
233///
234/// ```no_run
235/// use hedl_cli::commands::from_yaml;
236///
237/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
238/// // Convert YAML to HEDL on stdout
239/// from_yaml("data.yaml", None)?;
240///
241/// // Convert YAML to HEDL file
242/// from_yaml("data.yml", Some("output.hedl"))?;
243/// # Ok(())
244/// # }
245/// ```
246pub fn from_yaml(file: &str, output: Option<&str>) -> Result<(), CliError> {
247    let content = read_file(file)?;
248
249    let config = FromYamlConfig::default();
250    let doc = yaml_to_hedl(&content, &config)
251        .map_err(|e| CliError::yaml_conversion(format!("YAML conversion error: {e}")))?;
252
253    let hedl = canonicalize(&doc)
254        .map_err(|e| CliError::canonicalization(format!("HEDL generation error: {e}")))?;
255
256    write_output(&hedl, output)
257}
258
259// XML conversion
260
261/// Convert a HEDL file to XML format.
262///
263/// Parses a HEDL file and converts it to XML, with optional pretty-printing.
264///
265/// # Arguments
266///
267/// * `file` - Path to the HEDL file to convert
268/// * `output` - Optional output file path. If `None`, writes to stdout
269/// * `pretty` - If `true`, pretty-prints the XML with indentation
270///
271/// # Returns
272///
273/// Returns `Ok(())` on success.
274///
275/// # Errors
276///
277/// Returns `Err` if:
278/// - The file cannot be read
279/// - The file contains syntax errors
280/// - XML conversion fails
281/// - Output writing fails
282///
283/// # Examples
284///
285/// ```no_run
286/// use hedl_cli::commands::to_xml;
287///
288/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
289/// // Convert to compact XML on stdout
290/// to_xml("data.hedl", None, false)?;
291///
292/// // Convert to pretty XML file
293/// to_xml("data.hedl", Some("output.xml"), true)?;
294/// # Ok(())
295/// # }
296/// ```
297pub fn to_xml(file: &str, output: Option<&str>, pretty: bool) -> Result<(), CliError> {
298    let content = read_file(file)?;
299
300    let doc =
301        parse(content.as_bytes()).map_err(|e| CliError::parse(format!("Parse error: {e}")))?;
302
303    let config = ToXmlConfig {
304        pretty,
305        ..Default::default()
306    };
307    let xml = hedl_to_xml(&doc, &config)
308        .map_err(|e| CliError::xml_conversion(format!("XML conversion error: {e}")))?;
309
310    write_output(&xml, output)
311}
312
313/// Convert an XML file to HEDL format.
314///
315/// Parses an XML file and converts it to canonical HEDL format.
316///
317/// # Arguments
318///
319/// * `file` - Path to the XML file to convert
320/// * `output` - Optional output file path. If `None`, writes to stdout
321///
322/// # Returns
323///
324/// Returns `Ok(())` on success.
325///
326/// # Errors
327///
328/// Returns `Err` if:
329/// - The file cannot be read
330/// - The XML is malformed
331/// - XML-to-HEDL conversion fails
332/// - HEDL canonicalization fails
333/// - Output writing fails
334///
335/// # Examples
336///
337/// ```no_run
338/// use hedl_cli::commands::from_xml;
339///
340/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
341/// // Convert XML to HEDL on stdout
342/// from_xml("data.xml", None)?;
343///
344/// // Convert XML to HEDL file
345/// from_xml("data.xml", Some("output.hedl"))?;
346/// # Ok(())
347/// # }
348/// ```
349pub fn from_xml(file: &str, output: Option<&str>) -> Result<(), CliError> {
350    let content = read_file(file)?;
351
352    let config = FromXmlConfig::default();
353    let doc = xml_to_hedl(&content, &config)
354        .map_err(|e| CliError::xml_conversion(format!("XML conversion error: {e}")))?;
355
356    let hedl = canonicalize(&doc)
357        .map_err(|e| CliError::canonicalization(format!("HEDL generation error: {e}")))?;
358
359    write_output(&hedl, output)
360}
361
362// CSV conversion
363
364/// Convert a HEDL file to CSV format.
365///
366/// Parses a HEDL file and converts it to CSV format. Expects the HEDL file to contain
367/// a matrix list that can be represented as a table.
368///
369/// # Arguments
370///
371/// * `file` - Path to the HEDL file to convert
372/// * `output` - Optional output file path. If `None`, writes to stdout
373/// * `include_headers` - If `true`, includes header row with column names (default: `true`)
374///
375/// # Returns
376///
377/// Returns `Ok(())` on success.
378///
379/// # Errors
380///
381/// Returns `Err` if:
382/// - The file cannot be read
383/// - The file contains syntax errors
384/// - The HEDL structure is not compatible with CSV (e.g., nested structures)
385/// - CSV conversion fails
386/// - Output writing fails
387///
388/// # Examples
389///
390/// ```no_run
391/// use hedl_cli::commands::to_csv;
392///
393/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
394/// // Convert to CSV on stdout with headers
395/// to_csv("data.hedl", None, true)?;
396///
397/// // Convert to CSV without headers (useful for appending)
398/// to_csv("data.hedl", Some("output.csv"), false)?;
399/// # Ok(())
400/// # }
401/// ```
402pub fn to_csv(file: &str, output: Option<&str>, include_headers: bool) -> Result<(), CliError> {
403    let content = read_file(file)?;
404
405    let doc =
406        parse(content.as_bytes()).map_err(|e| CliError::parse(format!("Parse error: {e}")))?;
407
408    // Use to_csv_with_config to respect user's header preference
409    let config = ToCsvConfig {
410        include_headers,
411        ..Default::default()
412    };
413    let csv = to_csv_with_config(&doc, config)
414        .map_err(|e| CliError::csv_conversion(format!("CSV conversion error: {e}")))?;
415
416    write_output(&csv, output)
417}
418
419/// Convert a CSV file to HEDL format.
420///
421/// Parses a CSV file and converts it to canonical HEDL format. The first row is assumed
422/// to be the header row containing column names.
423///
424/// # Arguments
425///
426/// * `file` - Path to the CSV file to convert
427/// * `output` - Optional output file path. If `None`, writes to stdout
428/// * `type_name` - The type name to use for the HEDL matrix list (must be alphanumeric)
429///
430/// # Returns
431///
432/// Returns `Ok(())` on success.
433///
434/// # Errors
435///
436/// Returns `Err` if:
437/// - The file cannot be read
438/// - The CSV is malformed or empty
439/// - The type name is invalid (must be alphanumeric with underscores)
440/// - CSV-to-HEDL conversion fails
441/// - HEDL canonicalization fails
442/// - Output writing fails
443///
444/// # Examples
445///
446/// ```no_run
447/// use hedl_cli::commands::from_csv;
448///
449/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
450/// // Convert CSV to HEDL on stdout with type name "Person"
451/// from_csv("people.csv", None, "Person")?;
452///
453/// // Convert CSV to HEDL file
454/// from_csv("data.csv", Some("output.hedl"), "Record")?;
455///
456/// // Invalid type name will fail
457/// let result = from_csv("data.csv", None, "Invalid-Name!");
458/// assert!(result.is_err());
459/// # Ok(())
460/// # }
461/// ```
462///
463/// # Security
464///
465/// The type name is validated to prevent injection attacks. Only alphanumeric
466/// characters and underscores are allowed.
467pub fn from_csv(file: &str, output: Option<&str>, type_name: &str) -> Result<(), CliError> {
468    let content = read_file(file)?;
469
470    // Validate type_name to prevent injection
471    if !type_name.chars().all(|c| c.is_alphanumeric() || c == '_') {
472        return Err(CliError::invalid_input(
473            "Type name must be alphanumeric (with underscores allowed)",
474        ));
475    }
476
477    // Infer column names from header row
478    let first_line = content
479        .lines()
480        .next()
481        .ok_or_else(|| CliError::invalid_input("CSV file is empty or has no header row"))?;
482    let columns: Vec<&str> = first_line.split(',').skip(1).collect(); // Skip ID column
483
484    let doc = csv_to_hedl(&content, type_name, &columns)
485        .map_err(|e| CliError::csv_conversion(format!("CSV conversion error: {e}")))?;
486
487    let hedl = canonicalize(&doc)
488        .map_err(|e| CliError::canonicalization(format!("HEDL generation error: {e}")))?;
489
490    write_output(&hedl, output)
491}
492
493// Parquet conversion
494
495/// Convert a HEDL file to Parquet format.
496///
497/// Parses a HEDL file and converts it to Apache Parquet columnar format. This is ideal
498/// for analytical workloads and integration with data processing frameworks.
499///
500/// # Arguments
501///
502/// * `file` - Path to the HEDL file to convert
503/// * `output` - Output Parquet file path (required, cannot write to stdout)
504///
505/// # Returns
506///
507/// Returns `Ok(())` on success.
508///
509/// # Errors
510///
511/// Returns `Err` if:
512/// - The file cannot be read
513/// - The file contains syntax errors
514/// - The HEDL structure is not compatible with Parquet
515/// - Parquet conversion fails
516/// - Output file cannot be written
517///
518/// # Examples
519///
520/// ```no_run
521/// use hedl_cli::commands::to_parquet;
522///
523/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
524/// // Convert to Parquet file
525/// to_parquet("data.hedl", "output.parquet")?;
526/// # Ok(())
527/// # }
528/// ```
529///
530/// # Note
531///
532/// Parquet requires a file path for output; it cannot write to stdout due to
533/// the binary columnar format.
534pub fn to_parquet(file: &str, output: &str) -> Result<(), CliError> {
535    let content = read_file(file)?;
536
537    let doc =
538        parse(content.as_bytes()).map_err(|e| CliError::parse(format!("Parse error: {e}")))?;
539
540    hedl_to_parquet(&doc, Path::new(output))
541        .map_err(|e| CliError::parquet_conversion(format!("Parquet conversion error: {e}")))?;
542
543    Ok(())
544}
545
546/// Convert a Parquet file to HEDL format.
547///
548/// Reads an Apache Parquet file and converts it to canonical HEDL format.
549///
550/// # Arguments
551///
552/// * `file` - Path to the Parquet file to convert
553/// * `output` - Optional output file path. If `None`, writes to stdout
554///
555/// # Returns
556///
557/// Returns `Ok(())` on success.
558///
559/// # Errors
560///
561/// Returns `Err` if:
562/// - The file cannot be read
563/// - The Parquet file is malformed or unsupported
564/// - Parquet-to-HEDL conversion fails
565/// - HEDL canonicalization fails
566/// - Output writing fails
567///
568/// # Examples
569///
570/// ```no_run
571/// use hedl_cli::commands::from_parquet;
572///
573/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
574/// // Convert Parquet to HEDL on stdout
575/// from_parquet("data.parquet", None)?;
576///
577/// // Convert Parquet to HEDL file
578/// from_parquet("data.parquet", Some("output.hedl"))?;
579/// # Ok(())
580/// # }
581/// ```
582pub fn from_parquet(file: &str, output: Option<&str>) -> Result<(), CliError> {
583    let doc = parquet_to_hedl(Path::new(file))
584        .map_err(|e| CliError::parquet_conversion(format!("Parquet conversion error: {e}")))?;
585
586    let hedl = canonicalize(&doc)
587        .map_err(|e| CliError::canonicalization(format!("HEDL generation error: {e}")))?;
588
589    write_output(&hedl, output)
590}
591
592// TOON conversion
593
594/// Convert a HEDL file to TOON format.
595///
596/// Parses a HEDL file and converts it to TOON format.
597///
598/// # Arguments
599///
600/// * `file` - Path to the HEDL file to convert
601/// * `output` - Optional output file path. If `None`, writes to stdout
602///
603/// # Returns
604///
605/// Returns `Ok(())` on success.
606///
607/// # Errors
608///
609/// Returns `Err` if:
610/// - The file cannot be read
611/// - The file contains syntax errors
612/// - TOON conversion fails
613/// - Output writing fails
614///
615/// # Examples
616///
617/// ```no_run
618/// use hedl_cli::commands::to_toon;
619///
620/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
621/// // Convert to TOON on stdout
622/// to_toon("data.hedl", None)?;
623///
624/// // Convert to TOON file
625/// to_toon("data.hedl", Some("output.toon"))?;
626/// # Ok(())
627/// # }
628/// ```
629pub fn to_toon(file: &str, output: Option<&str>) -> Result<(), CliError> {
630    let content = read_file(file)?;
631
632    let doc =
633        parse(content.as_bytes()).map_err(|e| CliError::parse(format!("Parse error: {e}")))?;
634
635    let toon = hedl_to_toon(&doc)
636        .map_err(|e| CliError::invalid_input(format!("TOON conversion error: {e}")))?;
637
638    write_output(&toon, output)
639}
640
641/// Convert a TOON file to HEDL format.
642///
643/// Parses a TOON file and converts it to HEDL format.
644///
645/// # Arguments
646///
647/// * `file` - Path to the TOON file to convert
648/// * `output` - Optional output file path. If `None`, writes to stdout
649///
650/// # Returns
651///
652/// Returns `Ok(())` on success.
653///
654/// # Errors
655///
656/// Returns `Err` if:
657/// - The file cannot be read
658/// - The file contains syntax errors
659/// - HEDL generation fails
660/// - Output writing fails
661///
662/// # Examples
663///
664/// ```no_run
665/// use hedl_cli::commands::from_toon;
666///
667/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
668/// // Convert TOON to HEDL on stdout
669/// from_toon("data.toon", None)?;
670///
671/// // Convert TOON to HEDL file
672/// from_toon("data.toon", Some("output.hedl"))?;
673/// # Ok(())
674/// # }
675/// ```
676pub fn from_toon(file: &str, output: Option<&str>) -> Result<(), CliError> {
677    let content = read_file(file)?;
678
679    let doc =
680        toon_to_hedl(&content).map_err(|e| CliError::parse(format!("TOON parse error: {e}")))?;
681
682    let hedl = canonicalize(&doc)
683        .map_err(|e| CliError::canonicalization(format!("HEDL generation error: {e}")))?;
684
685    write_output(&hedl, output)
686}