edifact_rs/lib.rs
1#![cfg_attr(docsrs, feature(doc_cfg))]
2
3//! `edifact-rs` — zero-copy EDIFACT tokenizer, parser, writer, serde traits,
4//! validation engine, and extensible directory support.
5//!
6//! `edifact-rs` is the main entry point of this workspace. The core parsing,
7//! writing, and validation infrastructure is always available. Custom directory
8//! validators can be implemented by downstream crates or generated through
9//! external build tooling.
10//!
11//! # Quick start
12//! ```
13//! use edifact_rs::from_bytes;
14//! let input = b"UNB+UNOA:1+SENDER+RECEIVER+200101:0900+1'UNZ+0+1'";
15//! let segments: Vec<_> = from_bytes(input).collect::<Result<_, _>>().unwrap();
16//! assert_eq!(segments[0].tag, "UNB");
17//! ```
18//!
19//! # Crate features
20//!
21//! - `derive` (enabled by default): re-exports the derive macros from
22//! `edifact-rs-derive`.
23//! - `diagnostics` (disabled by default): enables rich diagnostic output via `miette`.
24//! When enabled, errors implement `miette::Diagnostic` for enhanced error reporting.
25//! This feature adds an optional dependency and has no impact on parsing performance.
26//!
27//! The crate is expected to compile both with defaults and with
28//! `--no-default-features` for consumers who only want the core parsing and
29//! writing functionality.
30//!
31//! ## Feature matrix workflows
32//!
33//! - default features:
34//! `cargo test -p edifact-rs`
35//! - no default features:
36//! `cargo test -p edifact-rs --no-default-features`
37//! - all features:
38//! `cargo test -p edifact-rs --all-features`
39//!
40//! # Diagnostic Feature
41//!
42//! When the `diagnostics` feature is enabled, [`EdifactError`] gains additional
43//! traits and methods that enable rich, human-readable error output:
44//!
45//! ```text
46//! Error: invalid delimiter byte 0xAB at offset 42
47//!
48//! ╭─ input.edi:2:3
49//! │
50//! 2 │ UNB+UNOA:1+....[invalid]...
51//! │ ^^^ invalid byte here
52//! │
53//! Error Code: E002
54//! Help: The byte 0xAB is not a valid delimiter. Check UNA configuration
55//! ```
56//!
57//! This feature is useful for CLI tools and error reporting, but is not required
58//! for applications that handle errors programmatically.
59//!
60//! # Parse And Text Contracts
61//!
62//! Parsing in `edifact-rs` is strict and deterministic:
63//!
64//! - Segment and element text must decode as UTF-8 (`E003` on failure).
65//! - Release characters must escape exactly one following byte.
66//! A trailing `?` at end-of-input is rejected (`E019`).
67//! - Malformed delimiters and truncated segments are reported with stable
68//! error codes rather than panicking.
69//!
70//! These contracts apply to both slice-based parsing (`from_bytes`) and
71//! reader-based parsing (`from_reader`).
72//!
73//! ```
74//! use edifact_rs::from_reader;
75//! use std::io::Cursor;
76//!
77//! let input = b"UNA:;.? 'BGM;220;test?;value'";
78//! let segments = from_reader(Cursor::new(&input[..])).unwrap();
79//! assert_eq!(segments.len(), 1);
80//! assert_eq!(segments[0].tag, "BGM");
81//! assert_eq!(segments[0].elements[0].components[0], "220");
82//! assert_eq!(segments[0].elements[1].components[0], "test;value");
83//! ```
84//!
85//! # Validation Quick Start
86//!
87//! The `Validator` trait and `ValidationContext` provide a flexible framework
88//! for building custom validators. Users can generate validators from official
89//! UNECE sources or implement their own.
90//!
91//! See the [`Validator`] trait documentation and the `cookbook_fixture_validation.rs`
92//! example for details on creating custom validators.
93//!
94//! # Custom Profile Packs
95//!
96//! `ProfileRulePack` is the extension point for downstream MIG/profile crates.
97//! Packs can be authored with public APIs only and plugged into a
98//! [`ValidationContext`]:
99//!
100//! ```
101//! use edifact_rs::{
102//! from_bytes, ProfileRulePack, ValidationContext, ValidationIssue, ValidationSeverity,
103//! };
104//!
105//! let segments: Vec<_> = from_bytes(b"UNH+1+ORDERS:D:96A:UN'BGM+220+PO123+9'UNT+3+1'")
106//! .collect::<Result<_, _>>()?;
107//!
108//! let pack = ProfileRulePack::new("ORDERS-DEMO")
109//! .for_message_type("ORDERS")
110//! .with_stateless_rule_fn(|segments| {
111//! let bgm = segments.iter().find(|segment| segment.tag == "BGM")?;
112//! let document_code = bgm.get_element(0)?.get_component(0)?;
113//! (document_code == "220").then(|| {
114//! ValidationIssue::new(
115//! ValidationSeverity::Warning,
116//! "demo pack rejects BGM 220 for illustration",
117//! )
118//! .with_rule_id("DEMO-P001")
119//! .with_segment("BGM")
120//! .with_element_index(0)
121//! })
122//! });
123//!
124//! let report = ValidationContext::builder()
125//! .with_profile_pack(pack)
126//! .build()
127//! .validate_lenient(&segments);
128//!
129//! assert!(report.has_warnings());
130//! let partner_report = report.filter_by_rule_prefix("DEMO-");
131//! assert!(partner_report.total_issues() >= 1);
132//! # Ok::<(), edifact_rs::EdifactError>(())
133//! ```
134//!
135//! # Async Usage
136//!
137//! `edifact-rs` does not provide a native `async` API. All parsing is
138//! synchronous and driven by the standard `std::io::Read` / `std::io::BufRead`
139//! traits. The recommended integration pattern with async runtimes is:
140//!
141//! 1. Use your async runtime's read utilities to read the entire message into a
142//! `Vec<u8>` (e.g. `tokio::io::AsyncReadExt::read_to_end`).
143//! 2. Parse the in-memory slice with [`from_bytes`].
144//!
145//! ```rust,no_run
146//! # async fn example() -> Result<(), Box<dyn std::error::Error>> {
147//! // With tokio:
148//! // let mut buf = Vec::new();
149//! // reader.read_to_end(&mut buf).await?;
150//! // let segments: Vec<_> = edifact_rs::from_bytes(&buf).collect::<Result<_, _>>()?;
151//! # Ok(())
152//! # }
153//! ```
154//!
155//! A native zero-copy streaming async API is tracked as a future roadmap item.
156// ── core modules ──────────────────────────────────────────────────────────────
157pub mod directory_validator;
158pub(crate) mod envelope;
159/// Error types and validation reporting primitives.
160pub(crate) mod error;
161pub mod group;
162/// Core zero-copy and owned EDIFACT data model types.
163pub(crate) mod model;
164pub(crate) mod parser;
165pub(crate) mod tokenizer;
166pub(crate) mod validator;
167pub(crate) mod writer;
168
169// ── typed serialization layer ─────────────────────────────────────────────────
170pub mod de;
171pub(crate) mod event;
172pub mod ser;
173
174// ── flat re-exports: core ─────────────────────────────────────────────────────
175pub use envelope::{
176 InterchangeEnvelope, MessageEnvelope, MessageIdentifier, parse_unh, validate_envelope,
177};
178pub use error::{EdifactError, IoError, ValidationIssue, ValidationReport, ValidationSeverity};
179pub use group::{GroupDef, SegmentGroup, group_segments};
180pub use model::{
181 BorrowedElement, BorrowedSegment, Element, OwnedElement, OwnedSegment, Segment, Span,
182};
183pub use parser::{
184 Parser, ReaderConfig, from_bufread, from_bufread_stream, from_bufread_stream_with_config,
185 from_reader_with_config,
186};
187pub use tokenizer::{ServiceStringAdvice, Tokenizer};
188pub use validator::{
189 EnvelopeValidator, ProfileRule, ProfileRulePack, ValidationContext, ValidationContextBuilder,
190 ValidationLayer, ValidationRuleContext, Validator, validate_each,
191};
192pub use writer::Writer;
193
194// ── flat re-exports: serde ────────────────────────────────────────────────────
195
196/// User-facing deserialization API.
197pub use de::{
198 CompositeElement, DispatchedMessage, EdifactCompositeDeserialize, EdifactDeserialize,
199 EdifactSegmentTag, MessageDispatch, MessageWindow, MessageWindowsIter, MessageWindowsSliceIter,
200 OwnedMessageWindow, SegmentAccessor, deserialize, deserialize_all_from_reader,
201 deserialize_all_streaming, deserialize_first_from_reader, deserialize_first_streaming,
202 deserialize_messages_bytes, deserialize_messages_from_reader, deserialize_str,
203 groups_are_contiguous_by_qualifier, message_windows_bytes, message_windows_from_reader,
204};
205
206/// Alias for [`message_windows_bytes`] — a more discoverable entry-point for
207/// window-based message parsing.
208///
209/// Splits a byte slice into [`MessageWindow`] views, one per UNH/UNT envelope,
210/// enabling parallel or lazy per-message processing without copying data.
211///
212/// # Example
213/// ```rust,ignore
214/// use edifact_rs::from_bytes_windows;
215/// let windows: Vec<_> = from_bytes_windows(input).collect();
216/// ```
217pub use de::message_windows_bytes as from_bytes_windows;
218
219// ── Proc-macro support ─────────────────────────────────────────────────────────
220/// Private implementation helpers used by code generated from `#[derive(EdifactDeserialize)]`.
221///
222/// **This module is not part of the public API.** Names, signatures, and
223/// existence of items inside `__private` may change in any release without a
224/// semver bump. Do not depend on this module directly.
225#[doc(hidden)]
226pub mod __private {
227 pub use super::de::{
228 composite_element, contiguous_groups_by_qualifier, element_str, find_qualified_segment,
229 find_qualified_segment_owned, find_segment, find_segment_owned, find_segment_typed,
230 find_segments_iter, find_segments_typed, get_components_iter, optional_component,
231 optional_element, qualifier_matches_pattern, required_component, required_element,
232 };
233}
234pub use directory_validator::{
235 DirectoryValidator, DirectoryValidatorBuilder, ElementRef, OwnedElementRef, OwnedSegmentDef,
236 SegmentDefinition, Status,
237};
238#[cfg(feature = "derive")]
239#[cfg_attr(docsrs, doc(cfg(feature = "derive")))]
240pub use edifact_rs_derive::{EdifactDeserialize, EdifactSerialize};
241pub use event::{EdifactEvent, EventEmitter, OwnedEdifactEvent, VecEmitter, WriterEmitter};
242pub use ser::{
243 DecimalFloat, DecimalFloatDisplay, EdifactCompositeSerialize, EdifactSerialize, to_bytes,
244 to_edifact_string,
245};
246
247// ── core free functions ───────────────────────────────────────────────────────
248
249use std::io::{Read, Write};
250
251/// Iterator returned by [`from_bytes`].
252pub struct FromBytesIter<'a> {
253 parser: Option<parser::Parser<'a>>,
254 pending_error: Option<EdifactError>,
255 /// Remaining segment allowance (`None` = unlimited).
256 segments_remaining: Option<usize>,
257 /// Maximum byte budget (`None` = unlimited).
258 bytes_remaining: Option<u64>,
259 /// Byte offset of the start of the current parse position (approximated
260 /// as the sum of previously yielded segment spans — the borrowed tokenizer
261 /// does not expose a byte counter, so we track it from `Segment::span`).
262 bytes_consumed: u64,
263}
264
265/// Iterator returned by [`from_reader_iter`].
266pub struct FromReaderIter<R: Read> {
267 inner: parser::OwnedSegmentStream<std::io::BufReader<R>>,
268}
269
270impl<R: Read> Iterator for FromReaderIter<R> {
271 type Item = Result<OwnedSegment, EdifactError>;
272
273 fn next(&mut self) -> Option<Self::Item> {
274 self.inner.next()
275 }
276}
277
278impl<'a> Iterator for FromBytesIter<'a> {
279 type Item = Result<Segment<'a>, EdifactError>;
280
281 fn next(&mut self) -> Option<Self::Item> {
282 if let Some(err) = self.pending_error.take() {
283 return Some(Err(err));
284 }
285 // max_segments guard
286 if let Some(ref mut remaining) = self.segments_remaining {
287 if *remaining == 0 {
288 self.parser = None;
289 return None;
290 }
291 }
292 // max_input_bytes guard
293 if let Some(max) = self.bytes_remaining {
294 if self.bytes_consumed >= max {
295 self.parser = None;
296 return None;
297 }
298 }
299 let item = self.parser.as_mut()?.next();
300 if let Some(Ok(ref seg)) = item {
301 // Decrement segment allowance
302 if let Some(ref mut remaining) = self.segments_remaining {
303 *remaining = remaining.saturating_sub(1);
304 }
305 // Update byte counter from segment span and eagerly stop if exhausted
306 self.bytes_consumed = self.bytes_consumed.saturating_add(seg.span.len() as u64);
307 if let Some(max) = self.bytes_remaining {
308 if self.bytes_consumed >= max {
309 self.parser = None;
310 }
311 }
312 }
313 item
314 }
315}
316
317/// Parse `input` bytes into an iterator of [`Segment`]s.
318///
319/// Borrows directly from `input` — zero allocation for segment data.
320///
321/// # Segment-size limit
322///
323/// Applies a default 64 KiB per-segment limit, matching the reader-based path.
324/// Use [`from_bytes_with_config`] to override.
325pub fn from_bytes(input: &[u8]) -> FromBytesIter<'_> {
326 from_bytes_with_config(input, parser::ReaderConfig::default())
327}
328
329/// Parse `input` bytes into an iterator of [`Segment`]s with explicit configuration.
330///
331/// All three [`ReaderConfig`] limits are enforced:
332/// - `max_segment_bytes`: returns [`EdifactError::SegmentTooLong`] if a single segment
333/// exceeds the threshold.
334/// - `max_segments`: stops the iterator after this many segments have been yielded.
335/// - `max_input_bytes`: stops the iterator once this many bytes have been consumed
336/// (byte count is approximated from segment spans; the last segment that pushes
337/// consumption over the threshold is still returned).
338///
339/// Pass `ReaderConfig::default()` to use the default 64 KiB per-segment limit with
340/// no segment-count or byte-budget cap.
341///
342/// # Example
343///
344/// ```
345/// use edifact_rs::{ReaderConfig, from_bytes_with_config};
346///
347/// let cfg = ReaderConfig::default().max_segment_bytes(128);
348/// let result: Result<Vec<_>, _> = from_bytes_with_config(b"BGM+220+1+9'", cfg).collect();
349/// assert!(result.is_ok());
350/// ```
351pub fn from_bytes_with_config<'a>(
352 input: &'a [u8],
353 config: parser::ReaderConfig,
354) -> FromBytesIter<'a> {
355 let segments_remaining = config.max_segments;
356 let bytes_remaining = config.max_input_bytes;
357 match tokenizer::ServiceStringAdvice::from_bytes_strict(input) {
358 Ok(ssa) => {
359 let t = tokenizer::Tokenizer::with_limit(input, ssa, config.max_segment_bytes);
360 FromBytesIter {
361 parser: Some(parser::Parser::new(t)),
362 pending_error: None,
363 segments_remaining,
364 bytes_remaining,
365 bytes_consumed: 0,
366 }
367 }
368 Err(error) => FromBytesIter {
369 parser: None,
370 pending_error: Some(error),
371 segments_remaining,
372 bytes_remaining,
373 bytes_consumed: 0,
374 },
375 }
376}
377
378/// Parse a reader into owned segments.
379///
380/// # Errors
381///
382/// Returns an error if the input contains malformed EDIFACT syntax,
383/// invalid UTF-8 segment text, dangling release sequences, or underlying I/O failures.
384pub fn from_reader<R: Read>(reader: R) -> Result<Vec<OwnedSegment>, EdifactError> {
385 parser::from_reader(reader)
386}
387
388/// Parse `input` bytes eagerly into an iterator of [`OwnedSegment`]s.
389///
390/// Unlike [`from_bytes`] (which yields borrowed [`Segment`]s tied to the input
391/// lifetime), every segment returned here is fully owned. This is convenient
392/// when you need to store or return segments without retaining a reference to
393/// the original byte slice.
394///
395/// # Example
396///
397/// ```
398/// let segs: Vec<edifact_rs::OwnedSegment> = edifact_rs::from_bytes_owned(b"BGM+220+1+9'")
399/// .collect::<Result<_, _>>()
400/// .unwrap();
401/// assert_eq!(segs[0].tag, "BGM");
402/// ```
403pub fn from_bytes_owned(
404 input: &[u8],
405) -> impl Iterator<Item = Result<OwnedSegment, EdifactError>> + '_ {
406 from_bytes(input).map(|r| r.map(OwnedSegment::from))
407}
408
409/// Parse a reader into owned segments as a streaming iterator.
410///
411/// This keeps memory bounded by yielding segments incrementally instead of
412/// materializing the full interchange up front.
413pub fn from_reader_iter<R: Read>(reader: R) -> FromReaderIter<R> {
414 FromReaderIter {
415 inner: parser::from_reader_stream(reader),
416 }
417}
418
419/// Serialize `segments` to an [`std::io::Write`] implementation.
420///
421/// # Errors
422///
423/// Returns an error if writing fails or if segment serialization fails.
424pub fn to_writer<'a, 'b, W, I>(w: W, segments: I) -> Result<(), EdifactError>
425where
426 'b: 'a,
427 W: Write,
428 I: IntoIterator<Item = &'a Segment<'b>>,
429{
430 let mut wr = writer::Writer::new(w);
431 for seg in segments {
432 wr.write_segment(seg)?;
433 }
434 wr.finish().map(|_| ())
435}
436
437/// Serialize `segments` to an owned `Vec<u8>`.
438///
439/// # Errors
440///
441/// Returns an error if serialization fails.
442pub fn segments_to_bytes<'a, 'b, I>(segments: I) -> Result<Vec<u8>, EdifactError>
443where
444 'b: 'a,
445 I: IntoIterator<Item = &'a Segment<'b>>,
446{
447 let mut buf = Vec::new();
448 to_writer(&mut buf, segments)?;
449 Ok(buf)
450}
451
452#[cfg(test)]
453mod tests {
454 use super::*;
455
456 #[test]
457 fn from_bytes_rejects_invalid_una() {
458 let err = from_bytes(b"UNA::.? 'BGM:220'")
459 .collect::<Result<Vec<_>, _>>()
460 .expect_err("invalid UNA should fail slice parsing");
461 assert!(matches!(err, EdifactError::InvalidUna));
462 }
463}