hedl_c14n/lib.rs
1// Dweve HEDL - Hierarchical Entity Data Language
2//
3// Copyright (c) 2025 Dweve IP B.V. and individual contributors.
4//
5// SPDX-License-Identifier: Apache-2.0
6//
7// Licensed under the Apache License, Version 2.0 (the "License");
8// you may not use this file except in compliance with the License.
9// You may obtain a copy of the License in the LICENSE file at the
10// root of this repository or at: http://www.apache.org/licenses/LICENSE-2.0
11//
12// Unless required by applicable law or agreed to in writing, software
13// distributed under the License is distributed on an "AS IS" BASIS,
14// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15// See the License for the specific language governing permissions and
16// limitations under the License.
17
18//! HEDL Canonicalization
19//!
20//! Provides deterministic output generation for HEDL documents.
21//! Canonical output ensures stable hashing, diffing, and round-trips.
22//!
23//! # Overview
24//!
25//! This crate implements the canonical serialization format for HEDL documents,
26//! as specified in SPEC.md Section 13.2. Canonicalization ensures:
27//!
28//! - **Deterministic output**: Same document always produces same output
29//! - **Idempotency**: `canonicalize(canonicalize(x)) == canonicalize(x)`
30//! - **Round-trip preservation**: Parsing canonical output preserves semantics
31//! - **Stable hashing**: Enables content-addressable storage and diffing
32//!
33//! # Features
34//!
35//! - Minimal or always-quote string formatting strategies
36//! - Legacy ditto support for pre-v2.0 documents
37//! - Proper escaping of quotes and control characters
38//! - Alphabetically sorted keys, aliases, and struct declarations
39//! - Count hints in STRUCT directives for performance optimization
40//! - Security: Recursion depth limits prevent stack overflow `DoS` attacks
41//!
42//! # Examples
43//!
44//! ```no_run
45//! use hedl_c14n::{canonicalize, CanonicalConfig, CanonicalConfigBuilder, QuotingStrategy};
46//! use hedl_core::Document;
47//!
48//! # fn example(doc: Document) -> Result<(), hedl_core::HedlError> {
49//! // Simple canonicalization with defaults
50//! let output = canonicalize(&doc)?;
51//!
52//! // Custom configuration using fluent API
53//! let config = CanonicalConfig::new()
54//! .with_quoting(QuotingStrategy::Always)
55//! .with_ditto(false);
56//! let output = hedl_c14n::canonicalize_with_config(&doc, &config)?;
57//!
58//! // Custom configuration using builder pattern
59//! let config = CanonicalConfig::builder()
60//! .quoting(QuotingStrategy::Always)
61//! .use_ditto(false)
62//! .sort_keys(true)
63//! .build();
64//! let output = hedl_c14n::canonicalize_with_config(&doc, &config)?;
65//! # Ok(())
66//! # }
67//! ```
68//!
69//! # Security
70//!
71//! This crate implements protection against denial-of-service attacks:
72//!
73//! - **Recursion depth limit**: Maximum nesting depth of 1000 levels prevents stack overflow
74//! - **Proper escaping**: All special characters are escaped to prevent injection attacks
75//! - **Type safety**: Rust's type system prevents memory safety issues
76//!
77//! # Performance
78//!
79//! Several optimizations are implemented:
80//!
81//! - **P0**: Direct `BTreeMap` iteration eliminates key cloning (1.15x speedup, 10-15% fewer allocations)
82//! - **P1**: Pre-allocated output buffer (1.2-1.3x speedup)
83//! - **P1**: Cell buffer reuse across rows (1.05-1.1x speedup for large matrices)
84//! - **Count hints**: `add_count_hints()` function to automatically add count hints to matrix lists
85
86#![cfg_attr(not(test), warn(missing_docs))]
87mod config;
88mod count_hints;
89mod ditto;
90mod writer;
91
92pub use config::{CanonicalConfig, CanonicalConfigBuilder, QuotingStrategy};
93pub use count_hints::add_count_hints;
94pub use ditto::can_use_ditto;
95pub use writer::CanonicalWriter;
96
97use hedl_core::{Document, HedlError};
98
99/// Canonicalize a HEDL document to a string.
100///
101/// Uses default configuration with minimal quoting, ditto optimization enabled,
102/// and STRUCT directives in header (per SPEC.md Section 13.2).
103///
104/// # Arguments
105///
106/// * `doc` - The HEDL document to canonicalize
107///
108/// # Returns
109///
110/// Canonical string representation of the document, or an error if writing fails.
111///
112/// # Errors
113///
114/// Returns `HedlError::Syntax` if:
115/// - Writing to output buffer fails (extremely rare)
116/// - Document nesting exceeds maximum depth of 1000 levels
117///
118/// # Examples
119///
120/// ```no_run
121/// use hedl_c14n::canonicalize;
122/// use hedl_core::Document;
123///
124/// # fn example(doc: Document) -> Result<(), hedl_core::HedlError> {
125/// let canonical_output = canonicalize(&doc)?;
126/// println!("{}", canonical_output);
127/// # Ok(())
128/// # }
129/// ```
130///
131/// # Security
132///
133/// - Protected against stack overflow via recursion depth limit
134/// - All special characters properly escaped
135/// - No unsafe code
136pub fn canonicalize(doc: &Document) -> Result<String, HedlError> {
137 canonicalize_with_config(doc, &CanonicalConfig::default())
138}
139
140/// Canonicalize a HEDL document with custom configuration.
141///
142/// Allows fine-grained control over output format, including quoting strategy,
143/// ditto optimization, and schema placement.
144///
145/// # Arguments
146///
147/// * `doc` - The HEDL document to canonicalize
148/// * `config` - Configuration controlling output format
149///
150/// # Returns
151///
152/// Canonical string representation according to configuration, or an error if writing fails.
153///
154/// # Errors
155///
156/// Returns `HedlError::Syntax` if:
157/// - Writing to output buffer fails
158/// - Document nesting exceeds maximum depth of 1000 levels
159///
160/// # Examples
161///
162/// ```no_run
163/// use hedl_c14n::{canonicalize_with_config, CanonicalConfig, QuotingStrategy};
164/// use hedl_core::Document;
165///
166/// # fn example(doc: Document) -> Result<(), hedl_core::HedlError> {
167/// let config = CanonicalConfig::new()
168/// .with_quoting(QuotingStrategy::Always)
169/// .with_ditto(false)
170/// .with_sort_keys(true)
171/// .with_inline_schemas(true);
172/// let output = canonicalize_with_config(&doc, &config)?;
173/// # Ok(())
174/// # }
175/// ```
176///
177/// # Performance
178///
179/// Pre-allocates 4KB output buffer to minimize reallocations for typical documents.
180pub fn canonicalize_with_config(
181 doc: &Document,
182 config: &CanonicalConfig,
183) -> Result<String, HedlError> {
184 let mut writer = CanonicalWriter::new(config.clone());
185 writer.write_document(doc)
186}