Skip to main content

hedl_stream/
lib.rs

1// Dweve HEDL - Hierarchical Entity Data Language
2//
3// Copyright (c) 2025 Dweve IP B.V. and individual contributors.
4//
5// SPDX-License-Identifier: Apache-2.0
6//
7// Licensed under the Apache License, Version 2.0 (the "License");
8// you may not use this file except in compliance with the License.
9// You may obtain a copy of the License in the LICENSE file at the
10// root of this repository or at: http://www.apache.org/licenses/LICENSE-2.0
11//
12// Unless required by applicable law or agreed to in writing, software
13// distributed under the License is distributed on an "AS IS" BASIS,
14// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15// See the License for the specific language governing permissions and
16// limitations under the License.
17
18//! Streaming HEDL Parser
19//!
20//! This crate provides a streaming, memory-efficient parser for HEDL documents.
21//! Instead of loading the entire document into memory, it yields events or nodes
22//! one at a time, making it suitable for processing multi-GB files.
23//!
24//! # Features
25//!
26//! - **Memory Efficient**: Process files larger than available RAM
27//! - **Iterator-based**: Standard Rust iterator interface (sync)
28//! - **Async Support**: Non-blocking I/O with tokio (optional)
29//! - **Event-driven**: Optional SAX-like event callbacks
30//! - **Timeout Protection**: Prevent infinite loops from malicious/untrusted input
31//! - **Compatible**: Works with `hedl-parquet` and `hedl-neo4j` for streaming export
32//!
33//! # Sync vs Async
34//!
35//! ## Synchronous API (default)
36//!
37//! Use the synchronous API for:
38//! - Processing local files
39//! - Single-threaded batch processing
40//! - Simpler code without async complexity
41//! - CPU-bound workloads with minimal I/O wait
42//!
43//! ```rust,no_run
44//! use hedl_stream::{StreamingParser, NodeEvent};
45//! use std::io::BufReader;
46//! use std::fs::File;
47//!
48//! let file = File::open("large-dataset.hedl").unwrap();
49//! let reader = BufReader::new(file);
50//!
51//! let parser = StreamingParser::new(reader).unwrap();
52//!
53//! for event in parser {
54//!     match event {
55//!         Ok(NodeEvent::Node(node)) => {
56//!             println!("{}:{}", node.type_name, node.id);
57//!         }
58//!         Ok(NodeEvent::ListStart { type_name, .. }) => {
59//!             println!("List started: {}", type_name);
60//!         }
61//!         Err(e) => {
62//!             eprintln!("Error: {}", e);
63//!             break;
64//!         }
65//!         _ => {}
66//!     }
67//! }
68//! ```
69//!
70//! ## Asynchronous API (feature = "async")
71//!
72//! Use the asynchronous API for:
73//! - Processing network streams or pipes
74//! - High-concurrency scenarios (many parallel streams)
75//! - Integration with async web servers or frameworks
76//! - Non-blocking I/O in async runtime contexts
77//!
78//! ```rust,no_run
79//! # #[cfg(feature = "async")]
80//! # async fn example() -> Result<(), Box<dyn std::error::Error>> {
81//! use hedl_stream::{AsyncStreamingParser, NodeEvent};
82//! use tokio::fs::File;
83//!
84//! let file = File::open("large-dataset.hedl").await?;
85//! let mut parser = AsyncStreamingParser::new(file).await?;
86//!
87//! while let Some(event) = parser.next_event().await? {
88//!     match event {
89//!         NodeEvent::Node(node) => {
90//!             println!("{}:{}", node.type_name, node.id);
91//!         }
92//!         NodeEvent::ListStart { type_name, .. } => {
93//!             println!("List started: {}", type_name);
94//!         }
95//!         _ => {}
96//!     }
97//! }
98//! # Ok(())
99//! # }
100//! ```
101//!
102//! # Timeout Protection for Untrusted Input
103//!
104//! When parsing untrusted input, configure a timeout to prevent infinite loops:
105//!
106//! ```rust,no_run
107//! use hedl_stream::{StreamingParser, StreamingParserConfig};
108//! use std::time::Duration;
109//! use std::io::Cursor;
110//!
111//! let config = StreamingParserConfig {
112//!     timeout: Some(Duration::from_secs(10)),
113//!     ..Default::default()
114//! };
115//!
116//! let untrusted_input = "..."; // Input from untrusted source
117//! let parser = StreamingParser::with_config(
118//!     Cursor::new(untrusted_input),
119//!     config
120//! ).unwrap();
121//!
122//! // Parser will return StreamError::Timeout if parsing exceeds 10 seconds
123//! for event in parser {
124//!     // Process events...
125//!     # break;
126//! }
127//! ```
128
129#![cfg_attr(not(test), warn(missing_docs))]
130mod buffer_config;
131mod buffer_pool;
132mod error;
133mod event;
134mod parser;
135mod reader;
136
137#[cfg(feature = "compression")]
138/// Compression support for streaming HEDL.
139pub mod compression;
140
141#[cfg(feature = "async")]
142mod async_parser;
143#[cfg(feature = "async")]
144mod async_reader;
145
146pub use buffer_config::BufferSizeHint;
147pub use buffer_pool::{BufferPool, MemoryLimits};
148pub use error::{StreamError, StreamResult};
149pub use event::{HeaderInfo, NodeEvent, NodeInfo};
150pub use parser::{StreamingParser, StreamingParserConfig};
151pub use reader::LineReader;
152
153#[cfg(feature = "compression")]
154pub use compression::CompressionWriter;
155#[cfg(feature = "compression")]
156pub use compression::{CompressionFormat, CompressionReader};
157
158#[cfg(feature = "async")]
159pub use async_parser::AsyncStreamingParser;
160#[cfg(feature = "async")]
161pub use async_reader::AsyncLineReader;
162
163/// Re-export core types for convenience.
164pub use hedl_core::{Reference, Value};