1use hl7v2_escape::unescape_text;
35use hl7v2_model::*;
36
37pub use hl7v2_query::{get, get_presence};
39
40pub fn parse(bytes: &[u8]) -> Result<Message, Error> {
62 let text = std::str::from_utf8(bytes).map_err(|_| Error::InvalidCharset)?;
64
65 let lines: Vec<&str> = text.split('\r').filter(|line| !line.is_empty()).collect();
67
68 if lines.is_empty() {
69 return Err(Error::InvalidSegmentId);
70 }
71
72 if !lines[0].starts_with("MSH") {
74 return Err(Error::InvalidSegmentId);
75 }
76
77 let delims = Delims::parse_from_msh(lines[0]).map_err(|e| Error::ParseError {
79 segment_id: "MSH".to_string(),
80 field_index: 0,
81 source: Box::new(e),
82 })?;
83
84 let mut segments = Vec::new();
86 for line in lines {
87 let segment = parse_segment(line, &delims).map_err(|e| Error::ParseError {
88 segment_id: if line.len() >= 3 {
89 line[..3].to_string()
90 } else {
91 line.to_string()
92 },
93 field_index: 0,
94 source: Box::new(e),
95 })?;
96 segments.push(segment);
97 }
98
99 let charsets = extract_charsets(&segments);
101
102 Ok(Message {
103 delims,
104 segments,
105 charsets,
106 })
107}
108
109pub fn parse_mllp(bytes: &[u8]) -> Result<Message, Error> {
133 let hl7_content = hl7v2_mllp::unwrap_mllp(bytes).map_err(|e| Error::Framing(e.to_string()))?;
134 parse(hl7_content)
135}
136
137pub fn parse_batch(bytes: &[u8]) -> Result<Batch, Error> {
147 let text = std::str::from_utf8(bytes).map_err(|_| Error::InvalidCharset)?;
149
150 let lines: Vec<&str> = text.split('\r').filter(|line| !line.is_empty()).collect();
152
153 if lines.is_empty() {
154 return Err(Error::InvalidSegmentId);
155 }
156
157 let first_line = lines[0];
159 if first_line.starts_with("BHS") {
160 parse_batch_with_header(&lines)
161 } else if first_line.starts_with("MSH") {
162 let message = parse(bytes)?;
164 Ok(Batch {
165 header: None,
166 messages: vec![message],
167 trailer: None,
168 })
169 } else {
170 Err(Error::InvalidSegmentId)
171 }
172}
173
174pub fn parse_file_batch(bytes: &[u8]) -> Result<FileBatch, Error> {
184 let text = std::str::from_utf8(bytes).map_err(|_| Error::InvalidCharset)?;
186
187 let lines: Vec<&str> = text.split('\r').filter(|line| !line.is_empty()).collect();
189
190 if lines.is_empty() {
191 return Err(Error::InvalidSegmentId);
192 }
193
194 let first_line = lines[0];
196 if first_line.starts_with("FHS") {
197 parse_file_batch_with_header(&lines)
198 } else if first_line.starts_with("BHS") || first_line.starts_with("MSH") {
199 let batch_data = parse_batch(bytes)?;
201 Ok(FileBatch {
202 header: None,
203 batches: vec![batch_data],
204 trailer: None,
205 })
206 } else {
207 Err(Error::InvalidSegmentId)
208 }
209}
210
211fn parse_segment(line: &str, delims: &Delims) -> Result<Segment, Error> {
217 if line.len() < 3 {
218 return Err(Error::InvalidSegmentId);
219 }
220
221 let id_bytes = &line.as_bytes()[0..3];
223 let mut id = [0u8; 3];
224 id.copy_from_slice(id_bytes);
225
226 for &byte in &id {
228 if !(byte.is_ascii_uppercase() || byte.is_ascii_digit()) {
229 return Err(Error::InvalidSegmentId);
230 }
231 }
232
233 let fields_str = if line.len() > 4 {
235 &line[4..] } else {
237 ""
238 };
239
240 let mut fields = parse_fields(fields_str, delims).map_err(|e| Error::ParseError {
241 segment_id: String::from_utf8_lossy(&id).to_string(),
242 field_index: 0,
243 source: Box::new(e),
244 })?;
245
246 if &id == b"MSH" {
248 if !fields.is_empty() {
250 let encoding_chars =
251 String::from_iter([delims.comp, delims.rep, delims.esc, delims.sub]);
252
253 let encoding_field = Field {
254 reps: vec![Rep {
255 comps: vec![Comp {
256 subs: vec![Atom::Text(encoding_chars)],
257 }],
258 }],
259 };
260 fields[0] = encoding_field;
262 }
263 Ok(Segment { id, fields })
264 } else {
265 Ok(Segment { id, fields })
266 }
267}
268
269fn parse_fields(fields_str: &str, delims: &Delims) -> Result<Vec<Field>, Error> {
271 if fields_str.is_empty() {
272 return Ok(vec![]);
273 }
274
275 let field_count = fields_str.matches(delims.field).count() + 1;
277 let mut fields = Vec::with_capacity(field_count);
278
279 for (i, field_str) in fields_str.split(delims.field).enumerate() {
281 let field = parse_field(field_str, delims).map_err(|e| Error::ParseError {
282 segment_id: "UNKNOWN".to_string(),
283 field_index: i,
284 source: Box::new(e),
285 })?;
286 fields.push(field);
287 }
288
289 Ok(fields)
290}
291
292fn parse_field(field_str: &str, delims: &Delims) -> Result<Field, Error> {
294 if field_str.contains('\n') || field_str.contains('\r') {
296 return Err(Error::InvalidFieldFormat {
297 details: "Field contains invalid line break characters".to_string(),
298 });
299 }
300
301 let rep_count = field_str.matches(delims.rep).count() + 1;
303 let mut reps = Vec::with_capacity(rep_count);
304
305 for (i, rep_str) in field_str.split(delims.rep).enumerate() {
306 let rep = parse_rep(rep_str, delims).map_err(|e| match e {
307 Error::InvalidRepFormat { .. } => e,
308 _ => Error::InvalidRepFormat {
309 details: format!("Repetition {}: {}", i, e),
310 },
311 })?;
312 reps.push(rep);
313 }
314
315 Ok(Field { reps })
316}
317
318fn parse_rep(rep_str: &str, delims: &Delims) -> Result<Rep, Error> {
320 if rep_str == "\"\"" {
322 return Ok(Rep {
323 comps: vec![Comp {
324 subs: vec![Atom::Null],
325 }],
326 });
327 }
328
329 if rep_str.contains('\n') || rep_str.contains('\r') {
331 return Err(Error::InvalidRepFormat {
332 details: "Repetition contains invalid line break characters".to_string(),
333 });
334 }
335
336 let comp_count = rep_str.matches(delims.comp).count() + 1;
338 let mut comps = Vec::with_capacity(comp_count);
339
340 for (i, comp_str) in rep_str.split(delims.comp).enumerate() {
341 let comp = parse_comp(comp_str, delims).map_err(|e| match e {
342 Error::InvalidCompFormat { .. } => e,
343 _ => Error::InvalidCompFormat {
344 details: format!("Component {}: {}", i, e),
345 },
346 })?;
347 comps.push(comp);
348 }
349
350 Ok(Rep { comps })
351}
352
353fn parse_comp(comp_str: &str, delims: &Delims) -> Result<Comp, Error> {
355 if comp_str.contains('\n') || comp_str.contains('\r') {
357 return Err(Error::InvalidCompFormat {
358 details: "Component contains invalid line break characters".to_string(),
359 });
360 }
361
362 let sub_count = comp_str.matches(delims.sub).count() + 1;
364 let mut subs = Vec::with_capacity(sub_count);
365
366 for (i, sub_str) in comp_str.split(delims.sub).enumerate() {
367 let atom = parse_atom(sub_str, delims).map_err(|e| match e {
368 Error::InvalidSubcompFormat { .. } => e,
369 _ => Error::InvalidSubcompFormat {
370 details: format!("Subcomponent {}: {}", i, e),
371 },
372 })?;
373 subs.push(atom);
374 }
375
376 Ok(Comp { subs })
377}
378
379fn parse_atom(atom_str: &str, delims: &Delims) -> Result<Atom, Error> {
381 if atom_str == "\"\"" {
383 return Ok(Atom::Null);
384 }
385
386 if atom_str.contains('\n') || atom_str.contains('\r') {
388 return Err(Error::InvalidSubcompFormat {
389 details: "Subcomponent contains invalid line break characters".to_string(),
390 });
391 }
392
393 let unescaped = unescape_text(atom_str, delims)?;
395 Ok(Atom::Text(unescaped))
396}
397
398fn extract_charsets(segments: &[Segment]) -> Vec<String> {
400 if let Some(msh_segment) = segments.first()
402 && &msh_segment.id == b"MSH"
403 {
404 if msh_segment.fields.len() > 17 {
406 let field_18 = &msh_segment.fields[17];
407
408 if !field_18.reps.is_empty() {
409 let rep = &field_18.reps[0];
410
411 let mut charsets = Vec::new();
412 for comp in &rep.comps {
413 if !comp.subs.is_empty() {
414 match &comp.subs[0] {
415 Atom::Text(text) => {
416 if !text.is_empty() {
417 charsets.push(text.clone());
418 }
419 }
420 Atom::Null => continue,
421 }
422 }
423 }
424
425 return charsets;
426 }
427 }
428 }
429 vec![]
430}
431
432fn parse_batch_with_header(lines: &[&str]) -> Result<Batch, Error> {
434 if !lines[0].starts_with("BHS") {
435 return Err(Error::InvalidBatchHeader {
436 details: "Batch must start with BHS segment".to_string(),
437 });
438 }
439
440 let delims = find_and_parse_delimiters(lines).map_err(|e| Error::BatchParseError {
442 details: format!("Failed to parse delimiters: {}", e),
443 })?;
444
445 let mut header = None;
446 let mut messages = Vec::new();
447 let mut trailer = None;
448 let mut current_message_lines = Vec::new();
449
450 for &line in lines {
451 if line.starts_with("BHS") {
452 let bhs_segment =
453 parse_segment(line, &delims).map_err(|e| Error::InvalidBatchHeader {
454 details: format!("Failed to parse BHS segment: {}", e),
455 })?;
456 header = Some(bhs_segment);
457 } else if line.starts_with("BTS") {
458 let bts_segment =
459 parse_segment(line, &delims).map_err(|e| Error::InvalidBatchTrailer {
460 details: format!("Failed to parse BTS segment: {}", e),
461 })?;
462 trailer = Some(bts_segment);
463 } else if line.starts_with("MSH") {
464 if !current_message_lines.is_empty() {
465 let message_text = current_message_lines.to_vec().join("\r");
466 let message =
467 parse(message_text.as_bytes()).map_err(|e| Error::BatchParseError {
468 details: format!("Failed to parse message in batch: {}", e),
469 })?;
470 messages.push(message);
471 current_message_lines.clear();
472 }
473 current_message_lines.push(line);
474 } else {
475 current_message_lines.push(line);
476 }
477 }
478
479 if !current_message_lines.is_empty() {
480 let message_text = current_message_lines.to_vec().join("\r");
481 let message = parse(message_text.as_bytes()).map_err(|e| Error::BatchParseError {
482 details: format!("Failed to parse final message in batch: {}", e),
483 })?;
484 messages.push(message);
485 }
486
487 Ok(Batch {
488 header,
489 messages,
490 trailer,
491 })
492}
493
494fn parse_file_batch_with_header(lines: &[&str]) -> Result<FileBatch, Error> {
496 if !lines[0].starts_with("FHS") {
497 return Err(Error::InvalidBatchHeader {
498 details: "File batch must start with FHS segment".to_string(),
499 });
500 }
501
502 let delims = find_and_parse_delimiters(lines).map_err(|e| Error::BatchParseError {
503 details: format!("Failed to parse delimiters: {}", e),
504 })?;
505
506 let mut header = None;
507 let mut batches = Vec::new();
508 let mut trailer = None;
509 let mut current_batch_lines = Vec::new();
510
511 for &line in lines {
512 if line.starts_with("FHS") {
513 let fhs_segment =
514 parse_segment(line, &delims).map_err(|e| Error::InvalidBatchHeader {
515 details: format!("Failed to parse FHS segment: {}", e),
516 })?;
517 header = Some(fhs_segment);
518 } else if line.starts_with("FTS") {
519 let fts_segment =
520 parse_segment(line, &delims).map_err(|e| Error::InvalidBatchTrailer {
521 details: format!("Failed to parse FTS segment: {}", e),
522 })?;
523 trailer = Some(fts_segment);
524 } else if line.starts_with("BHS") {
525 if !current_batch_lines.is_empty() {
526 let batch_text = current_batch_lines.to_vec().join("\r");
527 match parse_batch(batch_text.as_bytes()) {
528 Ok(batch) => batches.push(batch),
529 Err(e) => {
530 let message = parse(batch_text.as_bytes()).map_err(|_| e)?;
531 batches.push(Batch {
532 header: None,
533 messages: vec![message],
534 trailer: None,
535 });
536 }
537 }
538 current_batch_lines.clear();
539 }
540 current_batch_lines.push(line);
541 } else {
542 current_batch_lines.push(line);
543 }
544 }
545
546 if !current_batch_lines.is_empty() {
547 let batch_text = current_batch_lines.to_vec().join("\r");
548 match parse_batch(batch_text.as_bytes()) {
549 Ok(batch) => batches.push(batch),
550 Err(e) => {
551 let message = parse(batch_text.as_bytes()).map_err(|_| e)?;
552 batches.push(Batch {
553 header: None,
554 messages: vec![message],
555 trailer: None,
556 });
557 }
558 }
559 }
560
561 Ok(FileBatch {
562 header,
563 batches,
564 trailer,
565 })
566}
567
568fn find_and_parse_delimiters(lines: &[&str]) -> Result<Delims, Error> {
570 for line in lines {
571 if line.starts_with("MSH") {
572 return Delims::parse_from_msh(line);
573 }
574 }
575 Ok(Delims::default())
576}
577
578#[cfg(test)]
579mod tests {
580 use super::*;
581
582 #[test]
583 fn test_parse_simple_message() {
584 let hl7 = b"MSH|^~\\&|SendingApp|SendingFac|ReceivingApp|ReceivingFac|20250128152312||ADT^A01^ADT_A01|ABC123|P|2.5.1\rPID|1||123456^^^HOSP^MR||Doe^John\r";
585 let message = parse(hl7).unwrap();
586
587 assert_eq!(message.delims.field, '|');
588 assert_eq!(message.delims.comp, '^');
589 assert_eq!(message.delims.rep, '~');
590 assert_eq!(message.delims.esc, '\\');
591 assert_eq!(message.delims.sub, '&');
592
593 assert_eq!(message.segments.len(), 2);
594 assert_eq!(&message.segments[0].id, b"MSH");
595 assert_eq!(&message.segments[1].id, b"PID");
596 }
597
598 #[test]
599 fn test_get_simple_field() {
600 let hl7 = b"MSH|^~\\&|SendingApp|SendingFac|ReceivingApp|ReceivingFac|20250128152312||ADT^A01^ADT_A01|ABC123|P|2.5.1\rPID|1||123456^^^HOSP^MR||Doe^John\r";
601 let message = parse(hl7).unwrap();
602
603 assert_eq!(get(&message, "PID.5.1"), Some("Doe"));
605
606 assert_eq!(get(&message, "PID.5.2"), Some("John"));
608 }
609
610 #[test]
611 fn test_get_msh_fields() {
612 let hl7 = b"MSH|^~\\&|SendingApp|SendingFac|ReceivingApp|ReceivingFac|20250128152312||ADT^A01^ADT_A01|ABC123|P|2.5.1\r";
613 let message = parse(hl7).unwrap();
614
615 assert_eq!(get(&message, "MSH.3"), Some("SendingApp"));
617
618 assert_eq!(get(&message, "MSH.9.1"), Some("ADT"));
620 assert_eq!(get(&message, "MSH.9.2"), Some("A01"));
621 }
622
623 #[test]
624 fn test_get_with_repetitions() {
625 let hl7 =
626 b"MSH|^~\\&|SendingApp|SendingFac\rPID|1||123456^^^HOSP^MR||Doe^John~Smith^Jane\r";
627 let message = parse(hl7).unwrap();
628
629 assert_eq!(get(&message, "PID.5.1"), Some("Doe"));
631 assert_eq!(get(&message, "PID.5.2"), Some("John"));
632
633 assert_eq!(get(&message, "PID.5[2].1"), Some("Smith"));
635 assert_eq!(get(&message, "PID.5[2].2"), Some("Jane"));
636 }
637
638 #[test]
639 fn test_parse_mllp() {
640 let hl7 = b"MSH|^~\\&|SendingApp|SendingFac|ReceivingApp|ReceivingFac|20250128152312||ADT^A01|ABC123|P|2.5.1\r";
641 let framed = hl7v2_mllp::wrap_mllp(hl7);
642 let message = parse_mllp(&framed).unwrap();
643
644 assert_eq!(message.segments.len(), 1);
645 }
646
647 #[test]
648 fn test_presence_semantics() {
649 let hl7 = b"MSH|^~\\&|SendingApp|SendingFac\rPID|1||123456^^^HOSP^MR||Doe^John|||\r";
650 let message = parse(hl7).unwrap();
651
652 match get_presence(&message, "PID.5.1") {
654 Presence::Value(val) => assert_eq!(val, "Doe"),
655 _ => panic!("Expected Value"),
656 }
657
658 match get_presence(&message, "PID.8.1") {
660 Presence::Empty => {}
661 _ => panic!("Expected Empty"),
662 }
663
664 match get_presence(&message, "PID.50.1") {
666 Presence::Missing => {}
667 _ => panic!("Expected Missing"),
668 }
669 }
670}
671
672#[cfg(test)]
674pub mod comprehensive_tests;