Skip to main content

edifact_rs/
writer.rs

1//! EDIFACT writer — serializes [`Segment`]s to wire format.
2
3use crate::{error::EdifactError, model::Segment, tokenizer::ServiceStringAdvice};
4use std::io::Write;
5
6/// Streaming EDIFACT writer.
7///
8/// Wraps any [`Write`] implementation and serializes segments one at a time.
9/// Call [`Writer::finish`] to flush and get the underlying writer back.
10pub struct Writer<W: Write> {
11    inner: W,
12    ssa: ServiceStringAdvice,
13    segment_count: u32,
14}
15
16impl<W: Write> Writer<W> {
17    /// Create a new writer with default EDIFACT delimiters.
18    pub fn new(inner: W) -> Self {
19        Self {
20            inner,
21            ssa: ServiceStringAdvice::default(),
22            segment_count: 0,
23        }
24    }
25
26    /// Create a writer with custom delimiters and write a UNA segment first.
27    pub fn with_una(mut inner: W, ssa: ServiceStringAdvice) -> Result<Self, EdifactError> {
28        // EDIFACT syntax requires all delimiter bytes to be ASCII (0x00–0x7F).
29        // Non-ASCII bytes would bisect multi-byte UTF-8 sequences in data values.
30        if [ssa.component_sep, ssa.element_sep, ssa.decimal_mark, ssa.release_char, ssa.segment_term]
31            .iter()
32            .any(|&b| b > 0x7F)
33        {
34            return Err(EdifactError::InvalidUna);
35        }
36        // UNA: component_sep, element_sep, decimal_mark, release_char, space, segment_term
37        let una = [
38            b'U',
39            b'N',
40            b'A',
41            ssa.component_sep,
42            ssa.element_sep,
43            ssa.decimal_mark,
44            ssa.release_char,
45            b' ',
46            ssa.segment_term,
47        ];
48        inner.write_all(&una)?;
49        Ok(Self {
50            inner,
51            ssa,
52            segment_count: 0,
53        })
54    }
55
56    /// Write a single segment.
57    pub fn write_segment(&mut self, seg: &Segment<'_>) -> Result<(), EdifactError> {
58        // Tag
59        self.inner.write_all(seg.tag.as_bytes())?;
60
61        for element in &seg.elements {
62            // Element separator
63            self.inner.write_all(&[self.ssa.element_sep])?;
64            let mut first_component = true;
65            for component in &element.components {
66                if !first_component {
67                    self.inner.write_all(&[self.ssa.component_sep])?;
68                }
69                first_component = false;
70                self.write_escaped(component)?;
71            }
72        }
73
74        // Segment terminator
75        self.inner.write_all(&[self.ssa.segment_term])?;
76        self.segment_count += 1;
77        Ok(())
78    }
79
80    /// Write a raw segment from tag + element string slices.
81    ///
82    /// Each element string is split on the **active component-separator byte** from the
83    /// configured [`ServiceStringAdvice`][crate::ServiceStringAdvice] to identify component
84    /// boundaries.  The default component separator is `:` (0x3A), but this can differ when a
85    /// non-default `UNA` string was used to construct the writer.
86    ///
87    /// # Delimiter dependency
88    ///
89    /// Callers that embed the literal `:` character in element strings rely on `:` being
90    /// the component separator.  When the writer uses a non-default delimiter set, `:` will
91    /// **not** be treated as a component boundary and the segment will be written incorrectly.
92    ///
93    /// **UTF-8 safety**: EDIFACT syntax requires all delimiter bytes to be single-byte ASCII
94    /// characters (values 0x00–0x7F).  Non-ASCII delimiter bytes would bisect multi-byte UTF-8
95    /// sequences in data values and produce malformed output.  All fields of
96    /// [`ServiceStringAdvice`][crate::ServiceStringAdvice] must therefore hold ASCII byte values.
97    ///
98    /// To produce correct output regardless of the active delimiter, prefer
99    /// [`Self::write_segment_parts`] which accepts pre-split component slices.
100    pub fn write_raw(&mut self, tag: &str, elements: &[&str]) -> Result<(), EdifactError> {
101        self.inner.write_all(tag.as_bytes())?;
102        let comp_sep = self.ssa.component_sep;
103        for el in elements {
104            self.inner.write_all(&[self.ssa.element_sep])?;
105            // Byte-level split: EDIFACT delimiters are always single bytes.
106            let mut parts = el.as_bytes().split(|&b| b == comp_sep);
107            if let Some(first) = parts.next() {
108                // SAFETY: input is valid UTF-8 and we split on a single-byte delimiter,
109                // so each part remains a valid UTF-8 slice.
110                self.write_escaped(std::str::from_utf8(first).map_err(|_| EdifactError::InvalidUtf8)?)?;
111            }
112            for part in parts {
113                self.inner.write_all(&[comp_sep])?;
114                self.write_escaped(std::str::from_utf8(part).map_err(|_| EdifactError::InvalidUtf8)?)?;
115            }
116        }
117        self.inner.write_all(&[self.ssa.segment_term])?;
118        self.segment_count += 1;
119        Ok(())
120    }
121
122    /// Write a segment from a tag and pre-split element/component data.
123    ///
124    /// `elements` is a slice of elements; each element is a sequence of component strings.
125    /// This avoids the lifetime constraints of [`Self::write_segment`] when building
126    /// segments from runtime-owned data (e.g. inside [`crate::WriterEmitter`]).
127    pub fn write_segment_parts<E>(
128        &mut self,
129        tag: &str,
130        elements: &[E],
131    ) -> Result<(), EdifactError>
132    where
133        E: AsRef<[String]>,
134    {
135        self.inner.write_all(tag.as_bytes())?;
136        for element in elements {
137            self.inner.write_all(&[self.ssa.element_sep])?;
138            let mut first = true;
139            for comp in element.as_ref() {
140                if !first {
141                    self.inner.write_all(&[self.ssa.component_sep])?;
142                }
143                first = false;
144                self.write_escaped(comp.as_str())?;
145            }
146        }
147        self.inner.write_all(&[self.ssa.segment_term])?;
148        self.segment_count += 1;
149        Ok(())
150    }
151
152    /// Flush and return the underlying writer.
153    pub fn finish(mut self) -> Result<W, EdifactError> {
154        self.inner.flush()?;
155        Ok(self.inner)
156    }
157
158    /// Number of segments written so far.
159    pub fn segment_count(&self) -> u32 {
160        self.segment_count
161    }
162
163    /// Write only the segment tag bytes — no element separator or terminator.
164    ///
165    /// Used by [`crate::WriterEmitter`] for eager, zero-allocation event writing.
166    #[inline]
167    pub(crate) fn write_tag_only(&mut self, tag: &str) -> Result<(), EdifactError> {
168        self.inner.write_all(tag.as_bytes())?;
169        Ok(())
170    }
171
172    /// Write one element separator byte.
173    #[inline]
174    pub(crate) fn write_element_sep(&mut self) -> Result<(), EdifactError> {
175        self.inner.write_all(&[self.ssa.element_sep])?;
176        Ok(())
177    }
178
179    /// Write one component separator byte.
180    #[inline]
181    pub(crate) fn write_component_sep(&mut self) -> Result<(), EdifactError> {
182        self.inner.write_all(&[self.ssa.component_sep])?;
183        Ok(())
184    }
185
186    /// Write the segment terminator and increment the internal segment counter.
187    #[inline]
188    pub(crate) fn write_segment_term_and_count(&mut self) -> Result<(), EdifactError> {
189        self.inner.write_all(&[self.ssa.segment_term])?;
190        self.segment_count += 1;
191        Ok(())
192    }
193
194    /// Write a value, escaping any delimiter characters.
195    pub(crate) fn write_escaped(&mut self, value: &str) -> Result<(), EdifactError> {
196        let (elem, comp, release, term) = (
197            self.ssa.element_sep,
198            self.ssa.component_sep,
199            self.ssa.release_char,
200            self.ssa.segment_term,
201        );
202        let bytes = value.as_bytes();
203        let mut pos = 0;
204        while pos < bytes.len() {
205            // Find next byte that needs escaping
206            let end = bytes[pos..]
207                .iter()
208                .position(|&b| b == elem || b == comp || b == release || b == term)
209                .map(|r| pos + r)
210                .unwrap_or(bytes.len());
211            if end > pos {
212                self.inner.write_all(&bytes[pos..end])?;
213            }
214            if end < bytes.len() {
215                self.inner.write_all(&[release, bytes[end]])?;
216                pos = end + 1;
217            } else {
218                break;
219            }
220        }
221        Ok(())
222    }
223}
224
225#[cfg(test)]
226mod tests {
227    use super::*;
228    use crate::model::Element;
229
230    #[test]
231    fn write_and_parse_simple_segment() {
232        let segs: Vec<Segment<'static>> = vec![Segment::new(
233            "BGM",
234            vec![Element::of(&["220"]), Element::of(&["ORDER123"])],
235        )];
236        let bytes = crate::segments_to_bytes(&segs).unwrap();
237        let s = std::str::from_utf8(&bytes).unwrap();
238        assert!(s.starts_with("BGM+220+ORDER123'"));
239    }
240
241    #[test]
242    fn release_char_escaped() {
243        let segs: Vec<Segment<'static>> = vec![Segment::new(
244            "FTX",
245            vec![Element::of(&["value+with+delimiters"])],
246        )];
247        let bytes = crate::segments_to_bytes(&segs).unwrap();
248        let s = std::str::from_utf8(&bytes).unwrap();
249        // The `+` in the value must be escaped as `?+`
250        assert!(s.contains("?+"), "escape missing: {s}");
251    }
252
253    #[test]
254    fn round_trip_preserves_values() {
255        let segs: Vec<Segment<'static>> = vec![
256            Segment::new(
257                "UNB",
258                vec![
259                    Element::of(&["UNOA", "1"]),
260                    Element::of(&["SENDER"]),
261                    Element::of(&["RECEIVER"]),
262                ],
263            ),
264            Segment::new("UNZ", vec![Element::of(&["0"]), Element::of(&["1"])]),
265        ];
266        let bytes = crate::segments_to_bytes(&segs).unwrap();
267        let rt: Vec<crate::OwnedSegment> =
268            crate::parser::from_reader(std::io::Cursor::new(&bytes))
269                .expect("round-trip parse failed");
270        assert_eq!(rt[0].tag, "UNB");
271        assert_eq!(rt[0].as_borrowed().element_str(0), Some("UNOA"));
272        assert_eq!(rt[1].tag, "UNZ");
273    }
274
275    /// Verify that `Writer::with_una` uses the configured delimiters throughout,
276    /// and that `write_segment_parts` (the delimiter-agnostic API) produces correct
277    /// component separators even with a non-default UNA.
278    #[test]
279    fn with_una_non_default_delimiters() {
280        use crate::tokenizer::ServiceStringAdvice;
281
282        // Custom UNA: comp_sep=|  elem_sep=!  esc=?  dec_mark=,  seg_term=~
283        let ssa = ServiceStringAdvice {
284            component_sep: b'|',
285            element_sep: b'!',
286            release_char: b'?',
287            decimal_mark: b',',
288            segment_term: b'~',
289        };
290
291        let buf = Vec::new();
292        let mut writer = Writer::with_una(buf, ssa).expect("writer creation failed");
293
294        // write_segment_parts: pre-split; no hard-coded `:` in element strings
295        writer
296            .write_segment_parts("BGM", &[vec!["220".to_owned(), "SUB1".to_owned()], vec!["PO1".to_owned()]])
297            .expect("write failed");
298
299        let out = writer.finish().expect("finish failed");
300        let s = std::str::from_utf8(&out).unwrap();
301
302        // Output must use `!` as element separator, `|` as component separator, `~` as terminator.
303        // The writer also emits a UNA header when with_una is used.
304        assert!(s.contains("BGM"), "BGM segment missing: {s}");
305        // Slice after UNA so assertions target segment output, not UNA header bytes.
306        let after_una = s.find("BGM").map(|i| &s[i..]).unwrap_or(s);
307        assert!(after_una.contains('!'), "missing element sep in segment: {after_una}");
308        assert!(after_una.contains('|'), "missing component sep in segment: {after_una}");
309        assert!(after_una.ends_with('~'), "missing segment term in segment: {after_una}");
310        // Decimal mark appears in the UNA header (no decimal-bearing values in this segment).
311        assert!(s.contains(','), "missing decimal mark in UNA: {s}");
312        assert!(!s.contains('+'), "default element sep leaked: {s}");
313        assert!(!s.contains(':'), "default component sep leaked: {s}");
314        // segment_term '~' is not the default; ensure no default ' leaks (UNA itself aside)
315        assert!(!after_una.contains('\''), "default segment term leaked after UNA: {after_una}");
316    }
317}