Skip to main content

edifact_rs/
lib.rs

1#![cfg_attr(docsrs, feature(doc_cfg))]
2
3//! `edifact-rs` — zero-copy EDIFACT tokenizer, parser, writer, serde traits,
4//! validation engine, and extensible directory support.
5//!
6//! `edifact-rs` is the main entry point of this workspace. The core parsing,
7//! writing, and validation infrastructure is always available. Custom directory
8//! validators can be implemented by downstream crates or generated through
9//! external build tooling.
10//!
11//! # Quick start
12//! ```
13//! use edifact_rs::from_bytes;
14//! let input = b"UNB+UNOA:1+SENDER+RECEIVER+200101:0900+1'UNZ+0+1'";
15//! let segments: Vec<_> = from_bytes(input).collect::<Result<_, _>>().unwrap();
16//! assert_eq!(segments[0].tag, "UNB");
17//! ```
18//!
19//! # Crate features
20//!
21//! - `derive` (enabled by default): re-exports the derive macros from
22//!   `edifact-rs-derive`.
23//! - `diagnostics` (disabled by default): enables rich diagnostic output via `miette`.
24//!   When enabled, errors implement `miette::Diagnostic` for enhanced error reporting.
25//!   This feature adds an optional dependency and has no impact on parsing performance.
26//!
27//! The crate is expected to compile both with defaults and with
28//! `--no-default-features` for consumers who only want the core parsing and
29//! writing functionality.
30//!
31//! ## Feature matrix workflows
32//!
33//! - default features:
34//!   `cargo test -p edifact-rs`
35//! - no default features:
36//!   `cargo test -p edifact-rs --no-default-features`
37//! - all features:
38//!   `cargo test -p edifact-rs --all-features`
39//!
40//! # Diagnostic Feature
41//!
42//! When the `diagnostics` feature is enabled, [`EdifactError`] gains additional
43//! traits and methods that enable rich, human-readable error output:
44//!
45//! ```text
46//! Error: invalid delimiter byte 0xAB at offset 42
47//!
48//!  ╭─ input.edi:2:3
49//!  │
50//!  2 │ UNB+UNOA:1+....[invalid]...
51//!  │         ^^^ invalid byte here
52//!  │
53//! Error Code: E002
54//! Help: The byte 0xAB is not a valid delimiter. Check UNA configuration
55//! ```
56//!
57//! This feature is useful for CLI tools and error reporting, but is not required
58//! for applications that handle errors programmatically.
59//!
60//! # Parse And Text Contracts
61//!
62//! Parsing in `edifact-rs` is strict and deterministic:
63//!
64//! - Segment and element text must decode as UTF-8 (`E003` on failure).
65//! - Release characters must escape exactly one following byte.
66//!   A trailing `?` at end-of-input is rejected (`E019`).
67//! - Malformed delimiters and truncated segments are reported with stable
68//!   error codes rather than panicking.
69//!
70//! These contracts apply to both slice-based parsing (`from_bytes`) and
71//! reader-based parsing (`from_reader`).
72//!
73//! ```
74//! use edifact_rs::from_reader_collect;
75//! use std::io::Cursor;
76//!
77//! let input = b"UNA:;.? 'BGM;220;test?;value'";
78//! let segments = from_reader_collect(Cursor::new(&input[..])).unwrap();
79//! assert_eq!(segments.len(), 1);
80//! assert_eq!(segments[0].tag, "BGM");
81//! assert_eq!(segments[0].element_str(0), Some("220"));
82//! assert_eq!(segments[0].element_str(1), Some("test;value"));
83//! ```
84//!
85//! # Validation Quick Start
86//!
87//! The `Validator` trait and `ValidationContext` provide a flexible framework
88//! for building custom validators. Users can generate validators from official
89//! UNECE sources or implement their own.
90//!
91//! See the [`Validator`] trait documentation and the `cookbook_fixture_validation.rs`
92//! example for details on creating custom validators.
93//!
94//! # Custom Profile Packs
95//!
96//! `ProfileRulePack` is the extension point for downstream MIG/profile crates.
97//! Packs can be authored with public APIs only and plugged into a
98//! [`ValidationContext`]:
99//!
100//! ```
101//! use edifact_rs::{
102//!     from_bytes, ProfileRulePack, ValidationContext, ValidationIssue, ValidationSeverity,
103//! };
104//!
105//! let segments: Vec<_> = from_bytes(b"UNH+1+ORDERS:D:96A:UN'BGM+220+PO123+9'UNT+3+1'")
106//!     .collect::<Result<_, _>>()?;
107//!
108//! let pack = ProfileRulePack::new("ORDERS-DEMO")
109//!     .for_message_type("ORDERS")
110//!     .with_stateless_rule_fn(|segments, issues| {
111//!         if let Some(bgm) = segments.iter().find(|segment| segment.tag == "BGM") {
112//!             if let Some(code) = bgm.get_element(0).and_then(|e| e.get_component(0)) {
113//!                 if code == "220" {
114//!                     issues.push(
115//!                         ValidationIssue::new(
116//!                             ValidationSeverity::Warning,
117//!                             "demo pack rejects BGM 220 for illustration",
118//!                         )
119//!                         .with_rule_id("DEMO-P001")
120//!                         .with_segment("BGM")
121//!                         .with_element_index(0),
122//!                     );
123//!                 }
124//!             }
125//!         }
126//!     });
127//!
128//! let report = ValidationContext::builder()
129//!     .with_profile_pack(pack)
130//!     .build()
131//!     .validate_lenient(&segments);
132//!
133//! assert!(report.has_warnings());
134//! let partner_report = report.filter_by_rule_prefix("DEMO-");
135//! assert!(partner_report.total_issues() >= 1);
136//! # Ok::<(), edifact_rs::EdifactError>(())
137//! ```
138//!
139//! # Async Usage
140//!
141//! `edifact-rs` does not provide a native `async` API.  All parsing is
142//! synchronous and driven by the standard `std::io::Read` / `std::io::BufRead`
143//! traits.  The recommended integration pattern with async runtimes is:
144//!
145//! 1. Use your async runtime's read utilities to read the entire message into a
146//!    `Vec<u8>` (e.g. `tokio::io::AsyncReadExt::read_to_end`).
147//! 2. Parse the in-memory slice with [`from_bytes`].
148//!
149//! ```rust,no_run
150//! # async fn example() -> Result<(), Box<dyn std::error::Error>> {
151//! // With tokio:
152//! // let mut buf = Vec::new();
153//! // reader.read_to_end(&mut buf).await?;
154//! // let segments: Vec<_> = edifact_rs::from_bytes(&buf).collect::<Result<_, _>>()?;
155//! # Ok(())
156//! # }
157//! ```
158//!
159//! A native zero-copy streaming async API is tracked as a future roadmap item.
160// ── core modules ──────────────────────────────────────────────────────────────
161pub mod directory_validator;
162pub(crate) mod envelope;
163/// Error types and validation reporting primitives.
164pub(crate) mod error;
165pub mod group;
166/// Core zero-copy and owned EDIFACT data model types.
167pub(crate) mod model;
168pub(crate) mod parser;
169pub(crate) mod tokenizer;
170pub(crate) mod validator;
171pub(crate) mod writer;
172
173// ── typed serialization layer ─────────────────────────────────────────────────
174pub mod de;
175pub(crate) mod event;
176pub mod ser;
177
178// ── flat re-exports: core ─────────────────────────────────────────────────────
179pub use envelope::{
180    InterchangeEnvelope, MessageEnvelope, MessageIdentifier, parse_unh, validate_envelope,
181    validate_envelope_lenient,
182};
183pub use error::{EdifactError, IoError, ValidationIssue, ValidationReport, ValidationSeverity};
184pub use group::{
185    GroupDef, SegmentGroup, SegmentGroupIndexed, group_segments, group_segments_indexed,
186};
187pub use model::{
188    BorrowedElement, BorrowedSegment, Element, OwnedElement, OwnedSegment, Segment, Span,
189};
190pub use parser::{
191    Parser, ReaderConfig, from_bufread, from_bufread_stream, from_bufread_stream_with_config,
192    from_reader_with_config,
193};
194pub use tokenizer::{ServiceStringAdvice, Tokenizer};
195pub use validator::{
196    EnvelopeValidator, ProfileRule, ProfileRulePack, ValidationContext, ValidationContextBuilder,
197    ValidationLayer, ValidationRuleContext, Validator, validate_each,
198};
199pub use writer::Writer;
200
201// ── flat re-exports: serde ────────────────────────────────────────────────────
202
203/// User-facing deserialization API.
204pub use de::{
205    CompositeElement, DispatchedMessage, EdifactCompositeDeserialize, EdifactDeserialize,
206    EdifactSegmentTag, MessageDispatch, MessageWindow, MessageWindowsIter, MessageWindowsSliceIter,
207    OwnedMessageWindow, SegmentAccessor, deserialize, deserialize_all_from_reader,
208    deserialize_all_streaming, deserialize_first_from_reader, deserialize_first_streaming,
209    deserialize_messages_bytes, deserialize_messages_from_reader, deserialize_str, element_str,
210    find_qualified_segment, find_segment, groups_are_contiguous_by_qualifier,
211    message_windows_from_reader, optional_element, required_element,
212};
213
214/// Splits a byte slice into [`MessageWindow`] views, one per `UNH`/`UNT` envelope,
215/// enabling parallel or lazy per-message processing without copying data.
216///
217/// # Example
218/// ```rust,ignore
219/// use edifact_rs::from_bytes_windows;
220/// let windows: Vec<_> = from_bytes_windows(input).collect();
221/// ```
222pub use de::message_windows_bytes as from_bytes_windows;
223
224// ── Proc-macro support ─────────────────────────────────────────────────────────
225
226/// Segment-navigation helpers for working with parsed EDIFACT segments.
227///
228/// These functions cover the most common patterns when extracting data from
229/// a parsed `&[Segment<'_>]` or `&[OwnedSegment]` slice.
230///
231/// ## Segment lookup
232///
233/// - [`find_segment`] — locate the first segment with a given tag.
234/// - [`find_qualified_segment`] — locate a segment by tag *and* qualifier (element 0).
235/// - [`helpers::find_qualified_segment_owned`] — owned-segment variant.
236/// - [`helpers::find_segment_owned`] — owned-segment variant of `find_segment`.
237/// - [`helpers::find_segment_typed`] — find a segment matching an `EdifactSegmentTag` implementor.
238/// - [`helpers::find_segments_typed`] — iterate all segments matching a tag type.
239/// - [`helpers::find_segments_iter`] — iterate all segments matching a tag string.
240///
241/// ## Element and component access
242///
243/// - [`element_str`] — extract the raw string value of an element.
244/// - [`required_element`] — extract a mandatory element, returning an error when absent.
245/// - [`optional_element`] — extract an optional element as `Option<&str>`.
246/// - [`helpers::required_component`] — extract a mandatory component within a composite element.
247/// - [`helpers::optional_component`] — extract an optional component within a composite element.
248/// - [`helpers::get_components_iter`] — iterate over the components of a composite element.
249/// - [`helpers::composite_element`] — retrieve a composite element as a [`crate::CompositeElement`].
250///
251/// ## Pattern matching
252///
253/// - [`helpers::qualifier_matches_pattern`] — test whether a qualifier value matches a
254///   wildcard pattern (e.g. `"E01*"` matches `"E010"`, `"E011"`, …).
255///
256/// ## Groups
257///
258/// - [`helpers::contiguous_groups_by_qualifier`] — collect contiguous groups of segments
259///   sharing the same qualifier value into a `Vec<Vec<…>>`.
260///
261/// # Example
262///
263/// ```rust,ignore
264/// use edifact_rs::helpers::{find_segment, required_element};
265///
266/// let bgm = find_segment(segments, "BGM").ok_or(/* … */)?;
267/// let doc_code = required_element(bgm, 0)?;
268/// ```
269pub mod helpers {
270    pub use crate::de::{
271        composite_element, contiguous_groups_by_qualifier, element_str, find_qualified_segment,
272        find_qualified_segment_owned, find_segment, find_segment_owned, find_segment_typed,
273        find_segments_iter, find_segments_typed, get_components_iter, optional_component,
274        optional_element, qualifier_matches_pattern, required_component, required_element,
275    };
276}
277pub use directory_validator::{
278    DirectoryValidator, DirectoryValidatorBuilder, ElementRef, OwnedElementRef, OwnedSegmentDef,
279    SegmentDefinition, Status,
280};
281#[cfg(feature = "derive")]
282#[cfg_attr(docsrs, doc(cfg(feature = "derive")))]
283pub use edifact_rs_derive::{EdifactDeserialize, EdifactSerialize};
284pub use event::{EdifactEvent, EventEmitter, OwnedEdifactEvent, VecEmitter, WriterEmitter};
285pub use ser::{
286    DecimalFloat, DecimalFloatDisplay, EdifactCompositeSerialize, EdifactSerialize, to_bytes,
287    to_edifact_string,
288};
289
290// ── core free functions ───────────────────────────────────────────────────────
291
292use std::io::{Read, Write};
293
294/// Iterator returned by [`from_bytes`].
295pub struct FromBytesIter<'a> {
296    parser: Option<parser::Parser<'a>>,
297    pending_error: Option<EdifactError>,
298    /// Remaining segment allowance (`None` = unlimited).
299    segments_remaining: Option<usize>,
300    /// Maximum byte budget (`None` = unlimited).
301    bytes_remaining: Option<u64>,
302    /// Byte offset of the start of the current parse position (approximated
303    /// as the sum of previously yielded segment spans — the borrowed tokenizer
304    /// does not expose a byte counter, so we track it from `Segment::span`).
305    bytes_consumed: u64,
306}
307
308/// Iterator returned by [`from_reader_iter`].
309pub struct FromReaderIter<R: Read> {
310    inner: parser::OwnedSegmentStream<std::io::BufReader<R>>,
311}
312
313impl<R: Read> Iterator for FromReaderIter<R> {
314    type Item = Result<OwnedSegment, EdifactError>;
315
316    fn next(&mut self) -> Option<Self::Item> {
317        self.inner.next()
318    }
319}
320
321impl<'a> Iterator for FromBytesIter<'a> {
322    type Item = Result<Segment<'a>, EdifactError>;
323
324    fn next(&mut self) -> Option<Self::Item> {
325        if let Some(err) = self.pending_error.take() {
326            return Some(Err(err));
327        }
328        // max_segments guard
329        if let Some(ref mut remaining) = self.segments_remaining {
330            if *remaining == 0 {
331                self.parser = None;
332                return None;
333            }
334        }
335        // max_input_bytes guard
336        if let Some(max) = self.bytes_remaining {
337            if self.bytes_consumed >= max {
338                self.parser = None;
339                return None;
340            }
341        }
342        let item = self.parser.as_mut()?.next();
343        if let Some(Ok(ref seg)) = item {
344            // Decrement segment allowance
345            if let Some(ref mut remaining) = self.segments_remaining {
346                *remaining = remaining.saturating_sub(1);
347            }
348            // Update byte counter from segment span and eagerly stop if exhausted
349            self.bytes_consumed = self.bytes_consumed.saturating_add(seg.span.len() as u64);
350            if let Some(max) = self.bytes_remaining {
351                if self.bytes_consumed >= max {
352                    self.parser = None;
353                }
354            }
355        }
356        item
357    }
358}
359
360/// Parse `input` bytes into an iterator of [`Segment`]s.
361///
362/// Borrows directly from `input` — zero allocation for segment data.
363///
364/// # Segment-size limit
365///
366/// Applies a default 64 KiB per-segment limit, matching the reader-based path.
367/// Use [`from_bytes_with_config`] to override.
368pub fn from_bytes(input: &[u8]) -> FromBytesIter<'_> {
369    from_bytes_with_config(input, parser::ReaderConfig::default())
370}
371
372/// Parse `input` bytes into an iterator of [`Segment`]s with explicit configuration.
373///
374/// All three [`ReaderConfig`] limits are enforced:
375/// - `max_segment_bytes`: returns [`EdifactError::SegmentTooLong`] if a single segment
376///   exceeds the threshold.
377/// - `max_segments`: stops the iterator after this many segments have been yielded.
378/// - `max_input_bytes`: stops the iterator once this many bytes have been consumed
379///   (byte count is approximated from segment spans; the last segment that pushes
380///   consumption over the threshold is still returned).
381///
382/// Pass `ReaderConfig::default()` to use the default 64 KiB per-segment limit with
383/// no segment-count or byte-budget cap.
384///
385/// # Example
386///
387/// ```
388/// use edifact_rs::{ReaderConfig, from_bytes_with_config};
389///
390/// let cfg = ReaderConfig::default().max_segment_bytes(128);
391/// let result: Result<Vec<_>, _> = from_bytes_with_config(b"BGM+220+1+9'", cfg).collect();
392/// assert!(result.is_ok());
393/// ```
394pub fn from_bytes_with_config<'a>(
395    input: &'a [u8],
396    config: parser::ReaderConfig,
397) -> FromBytesIter<'a> {
398    let segments_remaining = config.max_segments;
399    let bytes_remaining = config.max_input_bytes;
400    match tokenizer::ServiceStringAdvice::from_bytes_strict(input) {
401        Ok(ssa) => {
402            let t = tokenizer::Tokenizer::with_limit(input, ssa, config.max_segment_bytes);
403            FromBytesIter {
404                parser: Some(parser::Parser::new(t)),
405                pending_error: None,
406                segments_remaining,
407                bytes_remaining,
408                bytes_consumed: 0,
409            }
410        }
411        Err(error) => FromBytesIter {
412            parser: None,
413            pending_error: Some(error),
414            segments_remaining,
415            bytes_remaining,
416            bytes_consumed: 0,
417        },
418    }
419}
420
421/// Parse a reader into a lazy iterator of [`OwnedSegment`]s.
422///
423/// Returns a [`FromReaderIter`] that parses and yields segments on demand,
424/// keeping memory bounded. Use [`from_reader_collect`] to eagerly materialise
425/// all segments into a `Vec`.
426///
427/// # Errors
428///
429/// Each `next()` call yields `Some(Ok(segment))` for a successfully parsed
430/// segment, `Some(Err(EdifactError))` for a parse or I/O failure, and `None`
431/// when the end of the stream has been reached.
432pub fn from_reader<R: Read>(reader: R) -> FromReaderIter<R> {
433    from_reader_iter(reader)
434}
435
436/// Parse a reader into an owned `Vec` of all segments.
437///
438/// Eagerly collects the full interchange into memory. If you only need a
439/// subset of segments, prefer [`from_reader`] (lazy iterator) to avoid
440/// unnecessary allocations.
441///
442/// # Errors
443///
444/// Returns an error if the input contains malformed EDIFACT syntax,
445/// invalid UTF-8 segment text, dangling release sequences, or underlying I/O failures.
446pub fn from_reader_collect<R: Read>(reader: R) -> Result<Vec<OwnedSegment>, EdifactError> {
447    parser::from_reader(reader)
448}
449
450/// Parse `input` bytes eagerly into an iterator of [`OwnedSegment`]s.
451///
452/// Unlike [`from_bytes`] (which yields borrowed [`Segment`]s tied to the input
453/// lifetime), every segment returned here is fully owned.  This is convenient
454/// when you need to store or return segments without retaining a reference to
455/// the original byte slice.
456///
457/// # Example
458///
459/// ```
460/// let segs: Vec<edifact_rs::OwnedSegment> = edifact_rs::from_bytes_owned(b"BGM+220+1+9'")
461///     .collect::<Result<_, _>>()
462///     .unwrap();
463/// assert_eq!(segs[0].tag, "BGM");
464/// ```
465pub fn from_bytes_owned(
466    input: &[u8],
467) -> impl Iterator<Item = Result<OwnedSegment, EdifactError>> + '_ {
468    from_bytes(input).map(|r| r.map(OwnedSegment::from))
469}
470
471/// Parse `input` bytes eagerly into an iterator of [`OwnedSegment`]s with a
472/// custom [`ReaderConfig`].
473///
474/// Identical to [`from_bytes_owned`] but applies the limits and settings from
475/// `config` (e.g. `max_segment_bytes`, `max_segments`, `max_input_bytes`).
476///
477/// # Example
478///
479/// ```
480/// use edifact_rs::ReaderConfig;
481/// let config = ReaderConfig::default().max_segments(10);
482/// let segs: Vec<edifact_rs::OwnedSegment> = edifact_rs::from_bytes_owned_with_config(
483///     b"BGM+220+1+9'",
484///     config,
485/// )
486/// .collect::<Result<_, _>>()
487/// .unwrap();
488/// assert_eq!(segs[0].tag, "BGM");
489/// ```
490pub fn from_bytes_owned_with_config(
491    input: &[u8],
492    config: ReaderConfig,
493) -> impl Iterator<Item = Result<OwnedSegment, EdifactError>> + '_ {
494    from_bytes_with_config(input, config).map(|r| r.map(OwnedSegment::from))
495}
496
497/// Parse a reader into owned segments as a streaming iterator.
498///
499/// This keeps memory bounded by yielding segments incrementally instead of
500/// materializing the full interchange up front.
501pub fn from_reader_iter<R: Read>(reader: R) -> FromReaderIter<R> {
502    FromReaderIter {
503        inner: parser::from_reader_stream(reader),
504    }
505}
506
507/// Serialize `segments` to an [`std::io::Write`] implementation.
508///
509/// # Errors
510///
511/// Returns an error if writing fails or if segment serialization fails.
512pub fn to_writer<'a, 'b, W, I>(w: W, segments: I) -> Result<(), EdifactError>
513where
514    'b: 'a,
515    W: Write,
516    I: IntoIterator<Item = &'a Segment<'b>>,
517{
518    let mut wr = writer::Writer::new(w);
519    for seg in segments {
520        wr.write_segment(seg)?;
521    }
522    wr.finish().map(|_| ())
523}
524
525/// Serialize `segments` to an owned `Vec<u8>`.
526///
527/// # Errors
528///
529/// Returns an error if serialization fails.
530pub fn segments_to_bytes<'a, 'b, I>(segments: I) -> Result<Vec<u8>, EdifactError>
531where
532    'b: 'a,
533    I: IntoIterator<Item = &'a Segment<'b>>,
534{
535    let mut buf = Vec::new();
536    to_writer(&mut buf, segments)?;
537    Ok(buf)
538}
539
540/// Serialize a slice of [`OwnedSegment`]s to an owned `Vec<u8>`.
541///
542/// Convenience wrapper around [`to_writer`] that accepts owned segments
543/// directly.  Each segment is converted to its borrowed form on the fly
544/// and written immediately — no intermediate `Vec<Segment<'_>>` is
545/// allocated, so peak memory stays proportional to one segment at a time
546/// rather than the full slice.
547///
548/// # Errors
549///
550/// Returns an error if serialization fails.
551pub fn segments_to_bytes_owned(segments: &[OwnedSegment]) -> Result<Vec<u8>, EdifactError> {
552    let mut buf = Vec::new();
553    let mut wr = writer::Writer::new(&mut buf);
554    for seg in segments {
555        wr.write_segment(&seg.as_borrowed())?;
556    }
557    wr.finish()?;
558    Ok(buf)
559}
560
561/// Validate the envelope structure of an owned-segment slice.
562///
563/// Convenience wrapper around [`validate_envelope`] that accepts
564/// `&[OwnedSegment]` directly, avoiding a manual `.as_borrowed()` conversion.
565///
566/// # Errors
567///
568/// Returns an error if the envelope is structurally invalid.
569pub fn validate_envelope_owned(segments: &[OwnedSegment]) -> Result<(), EdifactError> {
570    let borrowed: Vec<Segment<'_>> = segments.iter().map(|s| s.as_borrowed()).collect();
571    envelope::validate_envelope(&borrowed).map(|_| ())
572}
573
574/// Lenient envelope validation over owned segments — collects all errors.
575///
576/// Convenience wrapper around [`validate_envelope_lenient`] that accepts
577/// `&[OwnedSegment]` directly.  Returns an empty `Vec` when the envelope is valid.
578pub fn validate_envelope_lenient_owned(segments: &[OwnedSegment]) -> Vec<EdifactError> {
579    let borrowed: Vec<Segment<'_>> = segments.iter().map(|s| s.as_borrowed()).collect();
580    envelope::validate_envelope_lenient(&borrowed)
581}
582
583#[cfg(test)]
584mod tests {
585    use super::*;
586
587    #[test]
588    fn from_bytes_rejects_invalid_una() {
589        let err = from_bytes(b"UNA::.? 'BGM:220'")
590            .collect::<Result<Vec<_>, _>>()
591            .expect_err("invalid UNA should fail slice parsing");
592        assert!(matches!(err, EdifactError::InvalidUna));
593    }
594}