Skip to main content

edifact_rs/
writer.rs

1//! EDIFACT writer — serializes [`Segment`]s to wire format.
2
3use crate::{error::EdifactError, model::Segment, tokenizer::ServiceStringAdvice};
4use std::io::Write;
5
6/// Streaming EDIFACT writer.
7///
8/// Wraps any [`Write`] implementation and serializes segments one at a time.
9/// Call [`Writer::finish`] to flush and get the underlying writer back.
10pub struct Writer<W: Write> {
11    inner: W,
12    ssa: ServiceStringAdvice,
13    segment_count: u32,
14}
15
16impl<W: Write> Writer<W> {
17    /// Create a new writer with default EDIFACT delimiters.
18    pub fn new(inner: W) -> Self {
19        Self {
20            inner,
21            ssa: ServiceStringAdvice::default(),
22            segment_count: 0,
23        }
24    }
25
26    /// Create a writer with custom delimiters and write a UNA segment first.
27    pub fn with_una(mut inner: W, ssa: ServiceStringAdvice) -> Result<Self, EdifactError> {
28        // UNA: component_sep, element_sep, decimal_mark('.'), release_char, space, segment_term
29        let una = [
30            b'U',
31            b'N',
32            b'A',
33            ssa.component_sep,
34            ssa.element_sep,
35            b'.',
36            ssa.release_char,
37            b' ',
38            ssa.segment_term,
39        ];
40        inner.write_all(&una)?;
41        Ok(Self {
42            inner,
43            ssa,
44            segment_count: 0,
45        })
46    }
47
48    /// Write a single segment.
49    pub fn write_segment(&mut self, seg: &Segment<'_>) -> Result<(), EdifactError> {
50        // Tag
51        self.inner.write_all(seg.tag.as_bytes())?;
52
53        for element in &seg.elements {
54            // Element separator
55            self.inner.write_all(&[self.ssa.element_sep])?;
56            let mut first_component = true;
57            for component in &element.components {
58                if !first_component {
59                    self.inner.write_all(&[self.ssa.component_sep])?;
60                }
61                first_component = false;
62                self.write_escaped(component)?;
63            }
64        }
65
66        // Segment terminator
67        self.inner.write_all(&[self.ssa.segment_term])?;
68        self.segment_count += 1;
69        Ok(())
70    }
71
72    /// Write a raw segment from tag + element string slices.
73    ///
74    /// Each element string may contain `:` to separate components.
75    pub fn write_raw(&mut self, tag: &str, elements: &[&str]) -> Result<(), EdifactError> {
76        self.inner.write_all(tag.as_bytes())?;
77        let comp_sep = self.ssa.component_sep;
78        for el in elements {
79            self.inner.write_all(&[self.ssa.element_sep])?;
80            // Byte-level split: EDIFACT delimiters are always single bytes.
81            let mut parts = el.as_bytes().split(|&b| b == comp_sep);
82            if let Some(first) = parts.next() {
83                // SAFETY: input is valid UTF-8 and we split on a single-byte delimiter,
84                // so each part remains a valid UTF-8 slice.
85                self.write_escaped(std::str::from_utf8(first).map_err(|_| EdifactError::InvalidUtf8)?)?;
86            }
87            for part in parts {
88                self.inner.write_all(&[comp_sep])?;
89                self.write_escaped(std::str::from_utf8(part).map_err(|_| EdifactError::InvalidUtf8)?)?;
90            }
91        }
92        self.inner.write_all(&[self.ssa.segment_term])?;
93        self.segment_count += 1;
94        Ok(())
95    }
96
97    /// Write a segment from a tag and pre-split element/component data.
98    ///
99    /// `elements` is a slice of elements; each element is a sequence of component strings.
100    /// This avoids the lifetime constraints of [`Self::write_segment`] when building
101    /// segments from runtime-owned data (e.g. inside [`crate::WriterEmitter`]).
102    pub fn write_segment_parts<E>(
103        &mut self,
104        tag: &str,
105        elements: &[E],
106    ) -> Result<(), EdifactError>
107    where
108        E: AsRef<[String]>,
109    {
110        self.inner.write_all(tag.as_bytes())?;
111        for element in elements {
112            self.inner.write_all(&[self.ssa.element_sep])?;
113            let mut first = true;
114            for comp in element.as_ref() {
115                if !first {
116                    self.inner.write_all(&[self.ssa.component_sep])?;
117                }
118                first = false;
119                self.write_escaped(comp.as_str())?;
120            }
121        }
122        self.inner.write_all(&[self.ssa.segment_term])?;
123        self.segment_count += 1;
124        Ok(())
125    }
126
127    /// Flush and return the underlying writer.
128    pub fn finish(mut self) -> Result<W, EdifactError> {
129        self.inner.flush()?;
130        Ok(self.inner)
131    }
132
133    /// Number of segments written so far.
134    pub fn segment_count(&self) -> u32 {
135        self.segment_count
136    }
137
138    /// Write only the segment tag bytes — no element separator or terminator.
139    ///
140    /// Used by [`crate::WriterEmitter`] for eager, zero-allocation event writing.
141    #[inline]
142    pub(crate) fn write_tag_only(&mut self, tag: &str) -> Result<(), EdifactError> {
143        self.inner.write_all(tag.as_bytes())?;
144        Ok(())
145    }
146
147    /// Write one element separator byte.
148    #[inline]
149    pub(crate) fn write_element_sep(&mut self) -> Result<(), EdifactError> {
150        self.inner.write_all(&[self.ssa.element_sep])?;
151        Ok(())
152    }
153
154    /// Write one component separator byte.
155    #[inline]
156    pub(crate) fn write_component_sep(&mut self) -> Result<(), EdifactError> {
157        self.inner.write_all(&[self.ssa.component_sep])?;
158        Ok(())
159    }
160
161    /// Write the segment terminator and increment the internal segment counter.
162    #[inline]
163    pub(crate) fn write_segment_term_and_count(&mut self) -> Result<(), EdifactError> {
164        self.inner.write_all(&[self.ssa.segment_term])?;
165        self.segment_count += 1;
166        Ok(())
167    }
168
169    /// Write a value, escaping any delimiter characters.
170    pub(crate) fn write_escaped(&mut self, value: &str) -> Result<(), EdifactError> {
171        let (elem, comp, release, term) = (
172            self.ssa.element_sep,
173            self.ssa.component_sep,
174            self.ssa.release_char,
175            self.ssa.segment_term,
176        );
177        let bytes = value.as_bytes();
178        let mut pos = 0;
179        while pos < bytes.len() {
180            // Find next byte that needs escaping
181            let end = bytes[pos..]
182                .iter()
183                .position(|&b| b == elem || b == comp || b == release || b == term)
184                .map(|r| pos + r)
185                .unwrap_or(bytes.len());
186            if end > pos {
187                self.inner.write_all(&bytes[pos..end])?;
188            }
189            if end < bytes.len() {
190                self.inner.write_all(&[release, bytes[end]])?;
191                pos = end + 1;
192            } else {
193                break;
194            }
195        }
196        Ok(())
197    }
198}
199
200#[cfg(test)]
201mod tests {
202    use super::*;
203    use crate::model::Element;
204
205    #[test]
206    fn write_and_parse_simple_segment() {
207        let segs: Vec<Segment<'static>> = vec![Segment::new(
208            "BGM",
209            vec![Element::of(&["220"]), Element::of(&["ORDER123"])],
210        )];
211        let bytes = crate::to_bytes(&segs).unwrap();
212        let s = std::str::from_utf8(&bytes).unwrap();
213        assert!(s.starts_with("BGM+220+ORDER123'"));
214    }
215
216    #[test]
217    fn release_char_escaped() {
218        let segs: Vec<Segment<'static>> = vec![Segment::new(
219            "FTX",
220            vec![Element::of(&["value+with+delimiters"])],
221        )];
222        let bytes = crate::to_bytes(&segs).unwrap();
223        let s = std::str::from_utf8(&bytes).unwrap();
224        // The `+` in the value must be escaped as `?+`
225        assert!(s.contains("?+"), "escape missing: {s}");
226    }
227
228    #[test]
229    fn round_trip_preserves_values() {
230        let segs: Vec<Segment<'static>> = vec![
231            Segment::new(
232                "UNB",
233                vec![
234                    Element::of(&["UNOA", "1"]),
235                    Element::of(&["SENDER"]),
236                    Element::of(&["RECEIVER"]),
237                ],
238            ),
239            Segment::new("UNZ", vec![Element::of(&["0"]), Element::of(&["1"])]),
240        ];
241        let bytes = crate::to_bytes(&segs).unwrap();
242        let rt: Vec<crate::OwnedSegment> =
243            crate::parser::from_reader(std::io::Cursor::new(&bytes))
244                .expect("round-trip parse failed");
245        assert_eq!(rt[0].tag, "UNB");
246        assert_eq!(rt[0].as_borrowed().element_str(0), Some("UNOA"));
247        assert_eq!(rt[1].tag, "UNZ");
248    }
249}