Skip to main content

edifact_rs/
writer.rs

1//! EDIFACT writer — serializes [`Segment`]s to wire format.
2
3use crate::{error::EdifactError, model::Segment, tokenizer::ServiceStringAdvice};
4use std::io::Write;
5
6/// Streaming EDIFACT writer.
7///
8/// Wraps any [`Write`] implementation and serializes segments one at a time.
9/// Call [`Writer::finish`] to flush and get the underlying writer back.
10pub struct Writer<W: Write> {
11    inner: W,
12    ssa: ServiceStringAdvice,
13    /// Running count of segments written.  `u64` to prevent silent overflow on
14    /// pathological inputs (a `u32` would wrap after ~4 billion segments).
15    segment_count: u64,
16}
17
18impl<W: Write> Writer<W> {
19    /// Create a new writer with default EDIFACT delimiters.
20    pub fn new(inner: W) -> Self {
21        Self {
22            inner,
23            ssa: ServiceStringAdvice::default(),
24            segment_count: 0,
25        }
26    }
27
28    /// Create a writer with custom delimiters and write a UNA segment first.
29    pub fn with_una(mut inner: W, ssa: ServiceStringAdvice) -> Result<Self, EdifactError> {
30        // EDIFACT syntax requires all delimiter bytes to be ASCII (0x00–0x7F).
31        // Non-ASCII bytes would bisect multi-byte UTF-8 sequences in data values.
32        if [ssa.component_sep, ssa.element_sep, ssa.decimal_mark, ssa.release_char, ssa.segment_term]
33            .iter()
34            .any(|&b| b > 0x7F)
35        {
36            return Err(EdifactError::InvalidUna);
37        }
38        // UNA: component_sep, element_sep, decimal_mark, release_char, space, segment_term
39        let una = [
40            b'U',
41            b'N',
42            b'A',
43            ssa.component_sep,
44            ssa.element_sep,
45            ssa.decimal_mark,
46            ssa.release_char,
47            b' ',
48            ssa.segment_term,
49        ];
50        inner.write_all(&una)?;
51        Ok(Self {
52            inner,
53            ssa,
54            segment_count: 0,
55        })
56    }
57
58    /// Write a single segment.
59    pub fn write_segment(&mut self, seg: &Segment<'_>) -> Result<(), EdifactError> {
60        // Tag
61        self.inner.write_all(seg.tag.as_bytes())?;
62
63        for element in &seg.elements {
64            // Element separator
65            self.inner.write_all(&[self.ssa.element_sep])?;
66            let mut first_component = true;
67            for component in &element.components {
68                if !first_component {
69                    self.inner.write_all(&[self.ssa.component_sep])?;
70                }
71                first_component = false;
72                self.write_escaped(component)?;
73            }
74        }
75
76        // Segment terminator
77        self.inner.write_all(&[self.ssa.segment_term])?;
78        self.segment_count += 1;
79        Ok(())
80    }
81
82    /// Write a raw segment from tag + element string slices.
83    ///
84    /// Each element string is split on the **active component-separator byte** from the
85    /// configured [`ServiceStringAdvice`][crate::ServiceStringAdvice] to identify component
86    /// boundaries.  The default component separator is `:` (0x3A), but this can differ when a
87    /// non-default `UNA` string was used to construct the writer.
88    ///
89    /// # Delimiter dependency
90    ///
91    /// Callers that embed the literal `:` character in element strings rely on `:` being
92    /// the component separator.  When the writer uses a non-default delimiter set, `:` will
93    /// **not** be treated as a component boundary and the segment will be written incorrectly.
94    ///
95    /// **UTF-8 safety**: EDIFACT syntax requires all delimiter bytes to be single-byte ASCII
96    /// characters (values 0x00–0x7F).  Non-ASCII delimiter bytes would bisect multi-byte UTF-8
97    /// sequences in data values and produce malformed output.  All fields of
98    /// [`ServiceStringAdvice`][crate::ServiceStringAdvice] must therefore hold ASCII byte values.
99    ///
100    /// To produce correct output regardless of the active delimiter, prefer
101    /// [`Self::write_segment_parts`] which accepts pre-split component slices.
102    pub fn write_raw(&mut self, tag: &str, elements: &[&str]) -> Result<(), EdifactError> {
103        self.inner.write_all(tag.as_bytes())?;
104        let comp_sep = self.ssa.component_sep;
105        for el in elements {
106            self.inner.write_all(&[self.ssa.element_sep])?;
107            // Byte-level split: EDIFACT delimiters are always single bytes.
108            let mut parts = el.as_bytes().split(|&b| b == comp_sep);
109            if let Some(first) = parts.next() {
110                // SAFETY: input is valid UTF-8 and we split on a single-byte delimiter,
111                // so each part remains a valid UTF-8 slice.
112                self.write_escaped(std::str::from_utf8(first).map_err(|_| EdifactError::InvalidUtf8)?)?;
113            }
114            for part in parts {
115                self.inner.write_all(&[comp_sep])?;
116                self.write_escaped(std::str::from_utf8(part).map_err(|_| EdifactError::InvalidUtf8)?)?;
117            }
118        }
119        self.inner.write_all(&[self.ssa.segment_term])?;
120        self.segment_count += 1;
121        Ok(())
122    }
123
124    /// Write a segment from a tag and pre-split element/component data.
125    ///
126    /// `elements` is a slice of elements; each element is a sequence of component strings.
127    /// This avoids the lifetime constraints of [`Self::write_segment`] when building
128    /// segments from runtime-owned data (e.g. inside [`crate::WriterEmitter`]).
129    pub fn write_segment_parts<E>(
130        &mut self,
131        tag: &str,
132        elements: &[E],
133    ) -> Result<(), EdifactError>
134    where
135        E: AsRef<[String]>,
136    {
137        self.inner.write_all(tag.as_bytes())?;
138        for element in elements {
139            self.inner.write_all(&[self.ssa.element_sep])?;
140            let mut first = true;
141            for comp in element.as_ref() {
142                if !first {
143                    self.inner.write_all(&[self.ssa.component_sep])?;
144                }
145                first = false;
146                self.write_escaped(comp.as_str())?;
147            }
148        }
149        self.inner.write_all(&[self.ssa.segment_term])?;
150        self.segment_count += 1;
151        Ok(())
152    }
153
154    /// Flush and return the underlying writer.
155    pub fn finish(mut self) -> Result<W, EdifactError> {
156        self.inner.flush()?;
157        Ok(self.inner)
158    }
159
160    /// Returns the total number of segments written so far.
161    pub fn segment_count(&self) -> u64 {
162        self.segment_count
163    }
164
165    /// Write only the segment tag bytes — no element separator or terminator.
166    ///
167    /// Used by [`crate::WriterEmitter`] for eager, zero-allocation event writing.
168    #[inline]
169    pub(crate) fn write_tag_only(&mut self, tag: &str) -> Result<(), EdifactError> {
170        self.inner.write_all(tag.as_bytes())?;
171        Ok(())
172    }
173
174    /// Write one element separator byte.
175    #[inline]
176    pub(crate) fn write_element_sep(&mut self) -> Result<(), EdifactError> {
177        self.inner.write_all(&[self.ssa.element_sep])?;
178        Ok(())
179    }
180
181    /// Write one component separator byte.
182    #[inline]
183    pub(crate) fn write_component_sep(&mut self) -> Result<(), EdifactError> {
184        self.inner.write_all(&[self.ssa.component_sep])?;
185        Ok(())
186    }
187
188    /// Write the segment terminator and increment the internal segment counter.
189    #[inline]
190    pub(crate) fn write_segment_term_and_count(&mut self) -> Result<(), EdifactError> {
191        self.inner.write_all(&[self.ssa.segment_term])?;
192        self.segment_count += 1;
193        Ok(())
194    }
195
196    /// Write a value, escaping any delimiter characters.
197    pub(crate) fn write_escaped(&mut self, value: &str) -> Result<(), EdifactError> {
198        let (elem, comp, release, term) = (
199            self.ssa.element_sep,
200            self.ssa.component_sep,
201            self.ssa.release_char,
202            self.ssa.segment_term,
203        );
204        let bytes = value.as_bytes();
205        let mut pos = 0;
206        while pos < bytes.len() {
207            // Find next byte that needs escaping
208            let end = bytes[pos..]
209                .iter()
210                .position(|&b| b == elem || b == comp || b == release || b == term)
211                .map(|r| pos + r)
212                .unwrap_or(bytes.len());
213            if end > pos {
214                self.inner.write_all(&bytes[pos..end])?;
215            }
216            if end < bytes.len() {
217                self.inner.write_all(&[release, bytes[end]])?;
218                pos = end + 1;
219            } else {
220                break;
221            }
222        }
223        Ok(())
224    }
225}
226
227#[cfg(test)]
228mod tests {
229    use super::*;
230    use crate::model::Element;
231
232    #[test]
233    fn write_and_parse_simple_segment() {
234        let segs: Vec<Segment<'static>> = vec![Segment::new(
235            "BGM",
236            vec![Element::of(&["220"]), Element::of(&["ORDER123"])],
237        )];
238        let bytes = crate::segments_to_bytes(&segs).unwrap();
239        let s = std::str::from_utf8(&bytes).unwrap();
240        assert!(s.starts_with("BGM+220+ORDER123'"));
241    }
242
243    #[test]
244    fn release_char_escaped() {
245        let segs: Vec<Segment<'static>> = vec![Segment::new(
246            "FTX",
247            vec![Element::of(&["value+with+delimiters"])],
248        )];
249        let bytes = crate::segments_to_bytes(&segs).unwrap();
250        let s = std::str::from_utf8(&bytes).unwrap();
251        // The `+` in the value must be escaped as `?+`
252        assert!(s.contains("?+"), "escape missing: {s}");
253    }
254
255    #[test]
256    fn round_trip_preserves_values() {
257        let segs: Vec<Segment<'static>> = vec![
258            Segment::new(
259                "UNB",
260                vec![
261                    Element::of(&["UNOA", "1"]),
262                    Element::of(&["SENDER"]),
263                    Element::of(&["RECEIVER"]),
264                ],
265            ),
266            Segment::new("UNZ", vec![Element::of(&["0"]), Element::of(&["1"])]),
267        ];
268        let bytes = crate::segments_to_bytes(&segs).unwrap();
269        let rt: Vec<crate::OwnedSegment> =
270            crate::parser::from_reader(std::io::Cursor::new(&bytes))
271                .expect("round-trip parse failed");
272        assert_eq!(rt[0].tag, "UNB");
273        assert_eq!(rt[0].as_borrowed().element_str(0), Some("UNOA"));
274        assert_eq!(rt[1].tag, "UNZ");
275    }
276
277    /// Verify that `Writer::with_una` uses the configured delimiters throughout,
278    /// and that `write_segment_parts` (the delimiter-agnostic API) produces correct
279    /// component separators even with a non-default UNA.
280    #[test]
281    fn with_una_non_default_delimiters() {
282        use crate::tokenizer::ServiceStringAdvice;
283
284        // Custom UNA: comp_sep=|  elem_sep=!  esc=?  dec_mark=,  seg_term=~
285        let ssa = ServiceStringAdvice {
286            component_sep: b'|',
287            element_sep: b'!',
288            release_char: b'?',
289            decimal_mark: b',',
290            segment_term: b'~',
291        };
292
293        let buf = Vec::new();
294        let mut writer = Writer::with_una(buf, ssa).expect("writer creation failed");
295
296        // write_segment_parts: pre-split; no hard-coded `:` in element strings
297        writer
298            .write_segment_parts("BGM", &[vec!["220".to_owned(), "SUB1".to_owned()], vec!["PO1".to_owned()]])
299            .expect("write failed");
300
301        let out = writer.finish().expect("finish failed");
302        let s = std::str::from_utf8(&out).unwrap();
303
304        // Output must use `!` as element separator, `|` as component separator, `~` as terminator.
305        // The writer also emits a UNA header when with_una is used.
306        assert!(s.contains("BGM"), "BGM segment missing: {s}");
307        // Slice after UNA so assertions target segment output, not UNA header bytes.
308        let after_una = s.find("BGM").map(|i| &s[i..]).unwrap_or(s);
309        assert!(after_una.contains('!'), "missing element sep in segment: {after_una}");
310        assert!(after_una.contains('|'), "missing component sep in segment: {after_una}");
311        assert!(after_una.ends_with('~'), "missing segment term in segment: {after_una}");
312        // Decimal mark appears in the UNA header (no decimal-bearing values in this segment).
313        assert!(s.contains(','), "missing decimal mark in UNA: {s}");
314        assert!(!s.contains('+'), "default element sep leaked: {s}");
315        assert!(!s.contains(':'), "default component sep leaked: {s}");
316        // segment_term '~' is not the default; ensure no default ' leaks (UNA itself aside)
317        assert!(!after_una.contains('\''), "default segment term leaked after UNA: {after_una}");
318    }
319}