Skip to main content

hl7v2_parser/
lib.rs

1//! HL7 v2 message parser.
2//!
3//! This crate provides parsing functionality for HL7 v2 messages,
4//! including:
5//! - Message parsing from raw bytes
6//! - Batch message handling (FHS/BHS/BTS/FTS)
7//! - MLLP-framed message parsing
8//! - Path-based field access (re-exported from hl7v2-query)
9//!
10//! # Memory Efficiency
11//!
12//! This parser uses a "zero-allocation where possible" approach rather than true zero-copy.
13//! Parsed messages own their data via `Vec<u8>`, which provides:
14//!
15//! - Safe lifetime management without complex borrow checker patterns
16//! - Ergonomic API that doesn't require managing input lifetimes
17//! - Ability to modify and re-serialize messages
18//!
19//! For memory-constrained environments or very large messages, consider using
20//! [`hl7v2_stream`](../hl7v2_stream/index.html) which provides an event-based
21//! streaming parser with bounded memory usage.
22//!
23//! # Example
24//!
25//! ```
26//! use hl7v2_parser::parse;
27//!
28//! let hl7 = b"MSH|^~\\&|SendingApp|SendingFac|ReceivingApp|ReceivingFac|20250128152312||ADT^A01|ABC123|P|2.5.1\rPID|1||123456^^^HOSP^MR||Doe^John\r";
29//! let message = parse(hl7).unwrap();
30//!
31//! assert_eq!(message.segments.len(), 2);
32//! ```
33
34use hl7v2_escape::unescape_text;
35use hl7v2_model::*;
36
37// Re-export query functionality from hl7v2-query for backward compatibility
38pub use hl7v2_query::{get, get_presence};
39
40/// Parse HL7 v2 message from bytes.
41///
42/// This is the primary entry point for parsing HL7 messages.
43///
44/// # Arguments
45///
46/// * `bytes` - The raw HL7 message bytes
47///
48/// # Returns
49///
50/// The parsed `Message`, or an error if parsing fails
51///
52/// # Example
53///
54/// ```
55/// use hl7v2_parser::parse;
56///
57/// let hl7 = b"MSH|^~\\&|SendingApp|SendingFac|ReceivingApp|ReceivingFac|20250128152312||ADT^A01|ABC123|P|2.5.1\rPID|1||123456^^^HOSP^MR||Doe^John\r";
58/// let message = parse(hl7).unwrap();
59/// assert_eq!(message.segments.len(), 2);
60/// ```
61pub fn parse(bytes: &[u8]) -> Result<Message, Error> {
62    // Convert bytes to string
63    let text = std::str::from_utf8(bytes).map_err(|_| Error::InvalidCharset)?;
64
65    // Split into lines (segments)
66    let lines: Vec<&str> = text.split('\r').filter(|line| !line.is_empty()).collect();
67
68    if lines.is_empty() {
69        return Err(Error::InvalidSegmentId);
70    }
71
72    // First segment must be MSH
73    if !lines[0].starts_with("MSH") {
74        return Err(Error::InvalidSegmentId);
75    }
76
77    // Parse delimiters from MSH segment
78    let delims = Delims::parse_from_msh(lines[0]).map_err(|e| Error::ParseError {
79        segment_id: "MSH".to_string(),
80        field_index: 0,
81        source: Box::new(e),
82    })?;
83
84    // Parse all segments
85    let mut segments = Vec::new();
86    for line in lines {
87        let segment = parse_segment(line, &delims).map_err(|e| Error::ParseError {
88            segment_id: if line.len() >= 3 {
89                line[..3].to_string()
90            } else {
91                line.to_string()
92            },
93            field_index: 0,
94            source: Box::new(e),
95        })?;
96        segments.push(segment);
97    }
98
99    // Extract charset information from MSH-18 if present
100    let charsets = extract_charsets(&segments);
101
102    Ok(Message {
103        delims,
104        segments,
105        charsets,
106    })
107}
108
109/// Parse HL7 v2 message from MLLP framed bytes.
110///
111/// This function first removes the MLLP framing and then parses the message.
112///
113/// # Arguments
114///
115/// * `bytes` - The MLLP-framed HL7 message bytes
116///
117/// # Returns
118///
119/// The parsed `Message`, or an error if parsing fails
120///
121/// # Example
122///
123/// ```
124/// use hl7v2_parser::parse_mllp;
125/// use hl7v2_mllp::wrap_mllp;
126///
127/// let hl7 = b"MSH|^~\\&|SendingApp|SendingFac|ReceivingApp|ReceivingFac|20250128152312||ADT^A01|ABC123|P|2.5.1\r";
128/// let framed = wrap_mllp(hl7);
129/// let message = parse_mllp(&framed).unwrap();
130/// assert_eq!(message.segments.len(), 1);
131/// ```
132pub fn parse_mllp(bytes: &[u8]) -> Result<Message, Error> {
133    let hl7_content = hl7v2_mllp::unwrap_mllp(bytes).map_err(|e| Error::Framing(e.to_string()))?;
134    parse(hl7_content)
135}
136
137/// Parse HL7 v2 batch from bytes.
138///
139/// # Arguments
140///
141/// * `bytes` - The raw HL7 batch bytes
142///
143/// # Returns
144///
145/// The parsed `Batch`, or an error if parsing fails
146pub fn parse_batch(bytes: &[u8]) -> Result<Batch, Error> {
147    // Convert bytes to string
148    let text = std::str::from_utf8(bytes).map_err(|_| Error::InvalidCharset)?;
149
150    // Split into lines (segments)
151    let lines: Vec<&str> = text.split('\r').filter(|line| !line.is_empty()).collect();
152
153    if lines.is_empty() {
154        return Err(Error::InvalidSegmentId);
155    }
156
157    // Check if this is a batch (starts with BHS) or regular message (starts with MSH)
158    let first_line = lines[0];
159    if first_line.starts_with("BHS") {
160        parse_batch_with_header(&lines)
161    } else if first_line.starts_with("MSH") {
162        // This is a single message, wrap it in a batch
163        let message = parse(bytes)?;
164        Ok(Batch {
165            header: None,
166            messages: vec![message],
167            trailer: None,
168        })
169    } else {
170        Err(Error::InvalidSegmentId)
171    }
172}
173
174/// Parse HL7 v2 file batch from bytes.
175///
176/// # Arguments
177///
178/// * `bytes` - The raw HL7 file batch bytes
179///
180/// # Returns
181///
182/// The parsed `FileBatch`, or an error if parsing fails
183pub fn parse_file_batch(bytes: &[u8]) -> Result<FileBatch, Error> {
184    // Convert bytes to string
185    let text = std::str::from_utf8(bytes).map_err(|_| Error::InvalidCharset)?;
186
187    // Split into lines (segments)
188    let lines: Vec<&str> = text.split('\r').filter(|line| !line.is_empty()).collect();
189
190    if lines.is_empty() {
191        return Err(Error::InvalidSegmentId);
192    }
193
194    // Check if this is a file batch (starts with FHS)
195    let first_line = lines[0];
196    if first_line.starts_with("FHS") {
197        parse_file_batch_with_header(&lines)
198    } else if first_line.starts_with("BHS") || first_line.starts_with("MSH") {
199        // This is a batch or single message, wrap it in a file batch
200        let batch_data = parse_batch(bytes)?;
201        Ok(FileBatch {
202            header: None,
203            batches: vec![batch_data],
204            trailer: None,
205        })
206    } else {
207        Err(Error::InvalidSegmentId)
208    }
209}
210
211// ============================================================================
212// Internal parsing functions
213// ============================================================================
214
215/// Parse a single segment
216fn parse_segment(line: &str, delims: &Delims) -> Result<Segment, Error> {
217    if line.len() < 3 {
218        return Err(Error::InvalidSegmentId);
219    }
220
221    // Parse segment ID
222    let id_bytes = &line.as_bytes()[0..3];
223    let mut id = [0u8; 3];
224    id.copy_from_slice(id_bytes);
225
226    // Ensure segment ID is all uppercase ASCII letters or digits
227    for &byte in &id {
228        if !(byte.is_ascii_uppercase() || byte.is_ascii_digit()) {
229            return Err(Error::InvalidSegmentId);
230        }
231    }
232
233    // Parse fields
234    let fields_str = if line.len() > 4 {
235        &line[4..] // Skip segment ID and field separator
236    } else {
237        ""
238    };
239
240    let mut fields = parse_fields(fields_str, delims).map_err(|e| Error::ParseError {
241        segment_id: String::from_utf8_lossy(&id).to_string(),
242        field_index: 0,
243        source: Box::new(e),
244    })?;
245
246    // Special handling for MSH segment
247    if &id == b"MSH" {
248        // MSH-2 (the encoding characters) should be treated as a single atomic value
249        if !fields.is_empty() {
250            let encoding_chars =
251                String::from_iter([delims.comp, delims.rep, delims.esc, delims.sub]);
252
253            let encoding_field = Field {
254                reps: vec![Rep {
255                    comps: vec![Comp {
256                        subs: vec![Atom::Text(encoding_chars)],
257                    }],
258                }],
259            };
260            // Replace the first field with the corrected encoding field
261            fields[0] = encoding_field;
262        }
263        Ok(Segment { id, fields })
264    } else {
265        Ok(Segment { id, fields })
266    }
267}
268
269/// Parse fields from a segment
270fn parse_fields(fields_str: &str, delims: &Delims) -> Result<Vec<Field>, Error> {
271    if fields_str.is_empty() {
272        return Ok(vec![]);
273    }
274
275    // Count fields first to pre-allocate the vector
276    let field_count = fields_str.matches(delims.field).count() + 1;
277    let mut fields = Vec::with_capacity(field_count);
278
279    // Use split iterator directly instead of collecting into intermediate vector
280    for (i, field_str) in fields_str.split(delims.field).enumerate() {
281        let field = parse_field(field_str, delims).map_err(|e| Error::ParseError {
282            segment_id: "UNKNOWN".to_string(),
283            field_index: i,
284            source: Box::new(e),
285        })?;
286        fields.push(field);
287    }
288
289    Ok(fields)
290}
291
292/// Parse a single field
293fn parse_field(field_str: &str, delims: &Delims) -> Result<Field, Error> {
294    // Validate field format
295    if field_str.contains('\n') || field_str.contains('\r') {
296        return Err(Error::InvalidFieldFormat {
297            details: "Field contains invalid line break characters".to_string(),
298        });
299    }
300
301    // Count repetitions first to pre-allocate the vector
302    let rep_count = field_str.matches(delims.rep).count() + 1;
303    let mut reps = Vec::with_capacity(rep_count);
304
305    for (i, rep_str) in field_str.split(delims.rep).enumerate() {
306        let rep = parse_rep(rep_str, delims).map_err(|e| match e {
307            Error::InvalidRepFormat { .. } => e,
308            _ => Error::InvalidRepFormat {
309                details: format!("Repetition {}: {}", i, e),
310            },
311        })?;
312        reps.push(rep);
313    }
314
315    Ok(Field { reps })
316}
317
318/// Parse a repetition
319fn parse_rep(rep_str: &str, delims: &Delims) -> Result<Rep, Error> {
320    // Handle NULL value
321    if rep_str == "\"\"" {
322        return Ok(Rep {
323            comps: vec![Comp {
324                subs: vec![Atom::Null],
325            }],
326        });
327    }
328
329    // Validate repetition format
330    if rep_str.contains('\n') || rep_str.contains('\r') {
331        return Err(Error::InvalidRepFormat {
332            details: "Repetition contains invalid line break characters".to_string(),
333        });
334    }
335
336    // Count components first to pre-allocate the vector
337    let comp_count = rep_str.matches(delims.comp).count() + 1;
338    let mut comps = Vec::with_capacity(comp_count);
339
340    for (i, comp_str) in rep_str.split(delims.comp).enumerate() {
341        let comp = parse_comp(comp_str, delims).map_err(|e| match e {
342            Error::InvalidCompFormat { .. } => e,
343            _ => Error::InvalidCompFormat {
344                details: format!("Component {}: {}", i, e),
345            },
346        })?;
347        comps.push(comp);
348    }
349
350    Ok(Rep { comps })
351}
352
353/// Parse a component
354fn parse_comp(comp_str: &str, delims: &Delims) -> Result<Comp, Error> {
355    // Validate component format
356    if comp_str.contains('\n') || comp_str.contains('\r') {
357        return Err(Error::InvalidCompFormat {
358            details: "Component contains invalid line break characters".to_string(),
359        });
360    }
361
362    // Count subcomponents first to pre-allocate the vector
363    let sub_count = comp_str.matches(delims.sub).count() + 1;
364    let mut subs = Vec::with_capacity(sub_count);
365
366    for (i, sub_str) in comp_str.split(delims.sub).enumerate() {
367        let atom = parse_atom(sub_str, delims).map_err(|e| match e {
368            Error::InvalidSubcompFormat { .. } => e,
369            _ => Error::InvalidSubcompFormat {
370                details: format!("Subcomponent {}: {}", i, e),
371            },
372        })?;
373        subs.push(atom);
374    }
375
376    Ok(Comp { subs })
377}
378
379/// Parse an atom (unescaped text or NULL)
380fn parse_atom(atom_str: &str, delims: &Delims) -> Result<Atom, Error> {
381    // Handle NULL value
382    if atom_str == "\"\"" {
383        return Ok(Atom::Null);
384    }
385
386    // Validate atom format
387    if atom_str.contains('\n') || atom_str.contains('\r') {
388        return Err(Error::InvalidSubcompFormat {
389            details: "Subcomponent contains invalid line break characters".to_string(),
390        });
391    }
392
393    // Unescape the text
394    let unescaped = unescape_text(atom_str, delims)?;
395    Ok(Atom::Text(unescaped))
396}
397
398/// Extract character sets from MSH-18 field
399fn extract_charsets(segments: &[Segment]) -> Vec<String> {
400    // Look for the MSH segment (should be the first one)
401    if let Some(msh_segment) = segments.first()
402        && &msh_segment.id == b"MSH"
403    {
404        // MSH-18 is parsed field index 17
405        if msh_segment.fields.len() > 17 {
406            let field_18 = &msh_segment.fields[17];
407
408            if !field_18.reps.is_empty() {
409                let rep = &field_18.reps[0];
410
411                let mut charsets = Vec::new();
412                for comp in &rep.comps {
413                    if !comp.subs.is_empty() {
414                        match &comp.subs[0] {
415                            Atom::Text(text) => {
416                                if !text.is_empty() {
417                                    charsets.push(text.clone());
418                                }
419                            }
420                            Atom::Null => continue,
421                        }
422                    }
423                }
424
425                return charsets;
426            }
427        }
428    }
429    vec![]
430}
431
432/// Parse a batch that starts with BHS
433fn parse_batch_with_header(lines: &[&str]) -> Result<Batch, Error> {
434    if !lines[0].starts_with("BHS") {
435        return Err(Error::InvalidBatchHeader {
436            details: "Batch must start with BHS segment".to_string(),
437        });
438    }
439
440    // Parse delimiters from the first MSH segment we find
441    let delims = find_and_parse_delimiters(lines).map_err(|e| Error::BatchParseError {
442        details: format!("Failed to parse delimiters: {}", e),
443    })?;
444
445    let mut header = None;
446    let mut messages = Vec::new();
447    let mut trailer = None;
448    let mut current_message_lines = Vec::new();
449
450    for &line in lines {
451        if line.starts_with("BHS") {
452            let bhs_segment =
453                parse_segment(line, &delims).map_err(|e| Error::InvalidBatchHeader {
454                    details: format!("Failed to parse BHS segment: {}", e),
455                })?;
456            header = Some(bhs_segment);
457        } else if line.starts_with("BTS") {
458            let bts_segment =
459                parse_segment(line, &delims).map_err(|e| Error::InvalidBatchTrailer {
460                    details: format!("Failed to parse BTS segment: {}", e),
461                })?;
462            trailer = Some(bts_segment);
463        } else if line.starts_with("MSH") {
464            if !current_message_lines.is_empty() {
465                let message_text = current_message_lines.to_vec().join("\r");
466                let message =
467                    parse(message_text.as_bytes()).map_err(|e| Error::BatchParseError {
468                        details: format!("Failed to parse message in batch: {}", e),
469                    })?;
470                messages.push(message);
471                current_message_lines.clear();
472            }
473            current_message_lines.push(line);
474        } else {
475            current_message_lines.push(line);
476        }
477    }
478
479    if !current_message_lines.is_empty() {
480        let message_text = current_message_lines.to_vec().join("\r");
481        let message = parse(message_text.as_bytes()).map_err(|e| Error::BatchParseError {
482            details: format!("Failed to parse final message in batch: {}", e),
483        })?;
484        messages.push(message);
485    }
486
487    Ok(Batch {
488        header,
489        messages,
490        trailer,
491    })
492}
493
494/// Parse a file batch that starts with FHS
495fn parse_file_batch_with_header(lines: &[&str]) -> Result<FileBatch, Error> {
496    if !lines[0].starts_with("FHS") {
497        return Err(Error::InvalidBatchHeader {
498            details: "File batch must start with FHS segment".to_string(),
499        });
500    }
501
502    let delims = find_and_parse_delimiters(lines).map_err(|e| Error::BatchParseError {
503        details: format!("Failed to parse delimiters: {}", e),
504    })?;
505
506    let mut header = None;
507    let mut batches = Vec::new();
508    let mut trailer = None;
509    let mut current_batch_lines = Vec::new();
510
511    for &line in lines {
512        if line.starts_with("FHS") {
513            let fhs_segment =
514                parse_segment(line, &delims).map_err(|e| Error::InvalidBatchHeader {
515                    details: format!("Failed to parse FHS segment: {}", e),
516                })?;
517            header = Some(fhs_segment);
518        } else if line.starts_with("FTS") {
519            let fts_segment =
520                parse_segment(line, &delims).map_err(|e| Error::InvalidBatchTrailer {
521                    details: format!("Failed to parse FTS segment: {}", e),
522                })?;
523            trailer = Some(fts_segment);
524        } else if line.starts_with("BHS") {
525            if !current_batch_lines.is_empty() {
526                let batch_text = current_batch_lines.to_vec().join("\r");
527                match parse_batch(batch_text.as_bytes()) {
528                    Ok(batch) => batches.push(batch),
529                    Err(e) => {
530                        let message = parse(batch_text.as_bytes()).map_err(|_| e)?;
531                        batches.push(Batch {
532                            header: None,
533                            messages: vec![message],
534                            trailer: None,
535                        });
536                    }
537                }
538                current_batch_lines.clear();
539            }
540            current_batch_lines.push(line);
541        } else {
542            current_batch_lines.push(line);
543        }
544    }
545
546    if !current_batch_lines.is_empty() {
547        let batch_text = current_batch_lines.to_vec().join("\r");
548        match parse_batch(batch_text.as_bytes()) {
549            Ok(batch) => batches.push(batch),
550            Err(e) => {
551                let message = parse(batch_text.as_bytes()).map_err(|_| e)?;
552                batches.push(Batch {
553                    header: None,
554                    messages: vec![message],
555                    trailer: None,
556                });
557            }
558        }
559    }
560
561    Ok(FileBatch {
562        header,
563        batches,
564        trailer,
565    })
566}
567
568/// Find and parse delimiters from the first MSH segment in the lines
569fn find_and_parse_delimiters(lines: &[&str]) -> Result<Delims, Error> {
570    for line in lines {
571        if line.starts_with("MSH") {
572            return Delims::parse_from_msh(line);
573        }
574    }
575    Ok(Delims::default())
576}
577
578#[cfg(test)]
579mod tests {
580    use super::*;
581
582    #[test]
583    fn test_parse_simple_message() {
584        let hl7 = b"MSH|^~\\&|SendingApp|SendingFac|ReceivingApp|ReceivingFac|20250128152312||ADT^A01^ADT_A01|ABC123|P|2.5.1\rPID|1||123456^^^HOSP^MR||Doe^John\r";
585        let message = parse(hl7).unwrap();
586
587        assert_eq!(message.delims.field, '|');
588        assert_eq!(message.delims.comp, '^');
589        assert_eq!(message.delims.rep, '~');
590        assert_eq!(message.delims.esc, '\\');
591        assert_eq!(message.delims.sub, '&');
592
593        assert_eq!(message.segments.len(), 2);
594        assert_eq!(&message.segments[0].id, b"MSH");
595        assert_eq!(&message.segments[1].id, b"PID");
596    }
597
598    #[test]
599    fn test_get_simple_field() {
600        let hl7 = b"MSH|^~\\&|SendingApp|SendingFac|ReceivingApp|ReceivingFac|20250128152312||ADT^A01^ADT_A01|ABC123|P|2.5.1\rPID|1||123456^^^HOSP^MR||Doe^John\r";
601        let message = parse(hl7).unwrap();
602
603        // Get patient's last name (PID.5.1)
604        assert_eq!(get(&message, "PID.5.1"), Some("Doe"));
605
606        // Get patient's first name (PID.5.2)
607        assert_eq!(get(&message, "PID.5.2"), Some("John"));
608    }
609
610    #[test]
611    fn test_get_msh_fields() {
612        let hl7 = b"MSH|^~\\&|SendingApp|SendingFac|ReceivingApp|ReceivingFac|20250128152312||ADT^A01^ADT_A01|ABC123|P|2.5.1\r";
613        let message = parse(hl7).unwrap();
614
615        // Get sending application (MSH.3)
616        assert_eq!(get(&message, "MSH.3"), Some("SendingApp"));
617
618        // Get message type (MSH.9)
619        assert_eq!(get(&message, "MSH.9.1"), Some("ADT"));
620        assert_eq!(get(&message, "MSH.9.2"), Some("A01"));
621    }
622
623    #[test]
624    fn test_get_with_repetitions() {
625        let hl7 =
626            b"MSH|^~\\&|SendingApp|SendingFac\rPID|1||123456^^^HOSP^MR||Doe^John~Smith^Jane\r";
627        let message = parse(hl7).unwrap();
628
629        // Test first repetition (default)
630        assert_eq!(get(&message, "PID.5.1"), Some("Doe"));
631        assert_eq!(get(&message, "PID.5.2"), Some("John"));
632
633        // Test second repetition
634        assert_eq!(get(&message, "PID.5[2].1"), Some("Smith"));
635        assert_eq!(get(&message, "PID.5[2].2"), Some("Jane"));
636    }
637
638    #[test]
639    fn test_parse_mllp() {
640        let hl7 = b"MSH|^~\\&|SendingApp|SendingFac|ReceivingApp|ReceivingFac|20250128152312||ADT^A01|ABC123|P|2.5.1\r";
641        let framed = hl7v2_mllp::wrap_mllp(hl7);
642        let message = parse_mllp(&framed).unwrap();
643
644        assert_eq!(message.segments.len(), 1);
645    }
646
647    #[test]
648    fn test_presence_semantics() {
649        let hl7 = b"MSH|^~\\&|SendingApp|SendingFac\rPID|1||123456^^^HOSP^MR||Doe^John|||\r";
650        let message = parse(hl7).unwrap();
651
652        // Test existing field with value
653        match get_presence(&message, "PID.5.1") {
654            Presence::Value(val) => assert_eq!(val, "Doe"),
655            _ => panic!("Expected Value"),
656        }
657
658        // Test existing field with empty value
659        match get_presence(&message, "PID.8.1") {
660            Presence::Empty => {}
661            _ => panic!("Expected Empty"),
662        }
663
664        // Test missing field
665        match get_presence(&message, "PID.50.1") {
666            Presence::Missing => {}
667            _ => panic!("Expected Missing"),
668        }
669    }
670}
671
672// Comprehensive test suite modules
673#[cfg(test)]
674pub mod comprehensive_tests;