Skip to main content

edifact_primitives/
segment.rs

1use crate::SegmentPosition;
2
3/// A parsed EDIFACT segment that borrows from the input buffer.
4///
5/// Zero-copy design: all string data references the original input,
6/// avoiding allocations during the parsing hot path.
7///
8/// # Structure
9///
10/// An EDIFACT segment like `NAD+Z04+9900123000002:500` has:
11/// - `id`: `"NAD"`
12/// - `elements[0]`: `["Z04"]` (simple element)
13/// - `elements[1]`: `["9900123000002", "500"]` (composite element with 2 components)
14#[derive(Debug, Clone)]
15pub struct RawSegment<'a> {
16    /// Segment identifier (e.g., "NAD", "LOC", "DTM").
17    pub id: &'a str,
18    /// Elements, where each element is a vector of component strings.
19    /// `elements[i][j]` = component `j` of element `i`.
20    pub elements: Vec<Vec<&'a str>>,
21    /// Position metadata for this segment.
22    pub position: SegmentPosition,
23}
24
25impl<'a> RawSegment<'a> {
26    /// Creates a new RawSegment.
27    pub fn new(id: &'a str, elements: Vec<Vec<&'a str>>, position: SegmentPosition) -> Self {
28        Self {
29            id,
30            elements,
31            position,
32        }
33    }
34
35    /// Returns the number of elements (excluding the segment ID).
36    pub fn element_count(&self) -> usize {
37        self.elements.len()
38    }
39
40    /// Gets the first component of element at `index`, or empty string if missing.
41    ///
42    /// This is a convenience method for accessing simple (non-composite) elements.
43    pub fn get_element(&self, index: usize) -> &str {
44        self.elements
45            .get(index)
46            .and_then(|e| e.first())
47            .copied()
48            .unwrap_or("")
49    }
50
51    /// Gets a specific component within an element, or empty string if missing.
52    ///
53    /// `element_index` is the 0-based element position.
54    /// `component_index` is the 0-based component position within that element.
55    pub fn get_component(&self, element_index: usize, component_index: usize) -> &str {
56        self.elements
57            .get(element_index)
58            .and_then(|e| e.get(component_index))
59            .copied()
60            .unwrap_or("")
61    }
62
63    /// Returns all components of element at `index`, or empty slice if missing.
64    pub fn get_components(&self, element_index: usize) -> &[&'a str] {
65        self.elements
66            .get(element_index)
67            .map_or(&[], |e| e.as_slice())
68    }
69
70    /// Checks if the segment has the given ID (case-insensitive).
71    pub fn is(&self, segment_id: &str) -> bool {
72        self.id.eq_ignore_ascii_case(segment_id)
73    }
74
75    /// Reconstruct the raw segment string (without terminator) using the given delimiters.
76    ///
77    /// This produces `ID+elem1:comp1:comp2+elem2` format (without the trailing terminator).
78    pub fn to_raw_string(&self, delimiters: &crate::EdifactDelimiters) -> String {
79        let elem_sep = delimiters.element as char;
80        let comp_sep = delimiters.component as char;
81
82        let mut result = self.id.to_string();
83
84        for element in &self.elements {
85            result.push(elem_sep);
86            // Preserve ALL components including trailing empty ones for roundtrip fidelity.
87            // E.g. CAV+SA::::' must keep the trailing colons.
88            for (j, component) in element.iter().enumerate() {
89                if j > 0 {
90                    result.push(comp_sep);
91                }
92                result.push_str(component);
93            }
94        }
95
96        // Trim trailing empty elements (trailing element separators)
97        while result.ends_with(elem_sep) {
98            result.pop();
99        }
100
101        result
102    }
103}
104
105impl<'a> std::fmt::Display for RawSegment<'a> {
106    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
107        write!(f, "{}", self.id)?;
108        for element in &self.elements {
109            write!(f, "+")?;
110            for (j, component) in element.iter().enumerate() {
111                if j > 0 {
112                    write!(f, ":")?;
113                }
114                write!(f, "{component}")?;
115            }
116        }
117        Ok(())
118    }
119}
120
121#[cfg(test)]
122mod tests {
123    use super::*;
124
125    fn make_position() -> SegmentPosition {
126        SegmentPosition::new(1, 0, 1)
127    }
128
129    #[test]
130    fn test_raw_segment_simple() {
131        let seg = RawSegment::new(
132            "UNH",
133            vec![vec!["00001"], vec!["UTILMD", "D", "11A", "UN", "S2.1"]],
134            make_position(),
135        );
136        assert_eq!(seg.id, "UNH");
137        assert_eq!(seg.element_count(), 2);
138        assert_eq!(seg.get_element(0), "00001");
139        assert_eq!(seg.get_component(1, 0), "UTILMD");
140        assert_eq!(seg.get_component(1, 4), "S2.1");
141    }
142
143    #[test]
144    fn test_raw_segment_get_element_out_of_bounds() {
145        let seg = RawSegment::new("BGM", vec![vec!["E03"]], make_position());
146        assert_eq!(seg.get_element(0), "E03");
147        assert_eq!(seg.get_element(1), "");
148        assert_eq!(seg.get_element(99), "");
149    }
150
151    #[test]
152    fn test_raw_segment_get_component_out_of_bounds() {
153        let seg = RawSegment::new("NAD", vec![vec!["Z04", "123"]], make_position());
154        assert_eq!(seg.get_component(0, 0), "Z04");
155        assert_eq!(seg.get_component(0, 1), "123");
156        assert_eq!(seg.get_component(0, 2), "");
157        assert_eq!(seg.get_component(1, 0), "");
158    }
159
160    #[test]
161    fn test_raw_segment_display() {
162        let seg = RawSegment::new(
163            "NAD",
164            vec![vec!["Z04"], vec!["9900123000002", "500"]],
165            make_position(),
166        );
167        assert_eq!(seg.to_string(), "NAD+Z04+9900123000002:500");
168    }
169
170    #[test]
171    fn test_raw_segment_display_no_elements() {
172        let seg = RawSegment::new("UNA", vec![], make_position());
173        assert_eq!(seg.to_string(), "UNA");
174    }
175
176    #[test]
177    fn test_raw_segment_is_case_insensitive() {
178        let seg = RawSegment::new("NAD", vec![], make_position());
179        assert!(seg.is("NAD"));
180        assert!(seg.is("nad"));
181        assert!(seg.is("Nad"));
182        assert!(!seg.is("LOC"));
183    }
184
185    #[test]
186    fn test_raw_segment_get_components() {
187        let seg = RawSegment::new(
188            "DTM",
189            vec![vec!["137", "202501010000+01", "303"]],
190            make_position(),
191        );
192        let components = seg.get_components(0);
193        assert_eq!(components, &["137", "202501010000+01", "303"]);
194        assert!(seg.get_components(1).is_empty());
195    }
196
197    #[test]
198    fn test_raw_segment_zero_copy_lifetime() {
199        let input = String::from("NAD+Z04+9900123000002:500");
200        let seg = RawSegment::new(
201            &input[0..3],
202            vec![vec![&input[4..7]], vec![&input[8..21], &input[22..25]]],
203            make_position(),
204        );
205        // Verify that the segment borrows from the input
206        assert_eq!(seg.id, "NAD");
207        assert_eq!(seg.get_element(0), "Z04");
208        assert_eq!(seg.get_component(1, 0), "9900123000002");
209        assert_eq!(seg.get_component(1, 1), "500");
210    }
211
212    #[test]
213    fn test_raw_segment_clone() {
214        let seg = RawSegment::new("LOC", vec![vec!["Z16", "DE00014545768"]], make_position());
215        let cloned = seg.clone();
216        assert_eq!(seg.id, cloned.id);
217        assert_eq!(seg.elements, cloned.elements);
218        assert_eq!(seg.position, cloned.position);
219    }
220
221    #[test]
222    fn test_raw_segment_to_raw_string() {
223        let seg = RawSegment::new(
224            "LOC",
225            vec![vec!["Z16"], vec!["DE00014545768S0000000000000003054"]],
226            make_position(),
227        );
228        let delimiters = crate::EdifactDelimiters::default();
229        assert_eq!(
230            seg.to_raw_string(&delimiters),
231            "LOC+Z16+DE00014545768S0000000000000003054"
232        );
233    }
234
235    #[test]
236    fn test_raw_segment_to_raw_string_composite() {
237        let seg = RawSegment::new(
238            "DTM",
239            vec![vec!["137", "202507011330", "303"]],
240            make_position(),
241        );
242        let delimiters = crate::EdifactDelimiters::default();
243        assert_eq!(seg.to_raw_string(&delimiters), "DTM+137:202507011330:303");
244    }
245
246    #[test]
247    fn test_raw_segment_to_raw_string_no_elements() {
248        let seg = RawSegment::new("UNA", vec![], make_position());
249        let delimiters = crate::EdifactDelimiters::default();
250        assert_eq!(seg.to_raw_string(&delimiters), "UNA");
251    }
252
253    #[test]
254    fn test_raw_segment_to_raw_string_trailing_empty_components() {
255        // Segment like "CCI+Z30++Z07" where element[1] is empty
256        let seg = RawSegment::new(
257            "CCI",
258            vec![vec!["Z30"], vec![""], vec!["Z07"]],
259            make_position(),
260        );
261        let delimiters = crate::EdifactDelimiters::default();
262        assert_eq!(seg.to_raw_string(&delimiters), "CCI+Z30++Z07");
263    }
264
265    #[test]
266    fn test_raw_segment_to_raw_string_trailing_empty_elements() {
267        // Trailing empty elements should be trimmed
268        let seg = RawSegment::new(
269            "BGM",
270            vec![vec!["E03"], vec![""], vec![""]],
271            make_position(),
272        );
273        let delimiters = crate::EdifactDelimiters::default();
274        assert_eq!(seg.to_raw_string(&delimiters), "BGM+E03");
275    }
276}