1extern crate core;
16
17use serde::{Deserialize, Serialize};
18use std::collections::HashMap;
19use std::fmt::{self, Display};
20use std::num::ParseIntError;
21use std::str::FromStr;
22use std::{error, result};
23
24#[derive(Debug)]
26pub struct ValueError;
27
28impl Display for ValueError {
29 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
30 write!(f, "Received an unexpected value")
31 }
32}
33
34impl error::Error for ValueError {}
35
36type Result<T> = std::result::Result<T, Box<dyn error::Error>>;
38
39fn split_text(text: &str) -> Vec<&str> {
42 text.split('|').collect()
43}
44
45fn label_mmi_parts(parts: Vec<&str>) -> Result<HashMap<&str, &str>> {
48 if parts.len() != 10 {
49 println!(
50 "Record is not of the right length, expected 10 pipe-separated components, found {}",
51 parts.len()
52 );
53 return Err(Box::new(ValueError));
54 }
55 let mut map = HashMap::new();
56 map.insert("id", parts[0]);
57 map.insert("mmi", parts[1]);
58 map.insert("score", parts[2]);
59 map.insert("name", parts[3]);
60 map.insert("cui", parts[4]);
61 map.insert("semantic_types", parts[5]);
62 map.insert("triggers", parts[6]);
63 map.insert("location", parts[7]);
64 map.insert("positional_info", parts[8]);
65 map.insert("tree_codes", parts[9]);
66 Ok(map)
67}
68
69fn parse_semantic_types(semantic_types: &str) -> Vec<String> {
71 let cleaned = semantic_types.trim_start_matches('[').trim_end_matches(']');
72 let clean_list = cleaned.split(',').map(|x| x.to_string()).collect();
73 clean_list
74}
75
76#[derive(PartialEq, Eq, Debug, Serialize, Deserialize)]
78pub enum Location {
79 TI,
80 AB,
81 TX,
82 Tiab,
83}
84
85impl FromStr for Location {
86 type Err = ValueError;
87 fn from_str(s: &str) -> std::result::Result<Location, ValueError> {
89 match s.to_uppercase().as_str() {
90 "TI" => Ok(Location::TI),
91 "AB" => Ok(Location::AB),
92 "TX" => Ok(Location::TX),
93 "TI;AB" => Ok(Location::Tiab),
94 _ => Err(ValueError),
95 }
96 }
97}
98
99fn parse_tree_codes(codes: &str) -> Option<Vec<String>> {
103 if codes.is_empty() {
104 return None;
105 }
106 Some(codes.split(';').map(|x| x.to_string()).collect())
107}
108
109fn split_with_quote_context(x: &str, pattern: char) -> Vec<String> {
115 let mut is_in_quotes = false;
116 let mut start_position = 0;
117 let final_position = x.len();
118 let mut parts: Vec<String> = Vec::new();
119 for (i, c) in x.chars().enumerate() {
120 if c == '\"' {
121 is_in_quotes = !is_in_quotes;
122 } else if c == pattern && !is_in_quotes {
123 parts.push(x[start_position..i].to_string());
124 start_position = i + 1;
125 } else if i == final_position - 1 {
126 parts.push(x[start_position..final_position].to_string());
128 }
129 }
130 parts
131}
132
133#[derive(PartialEq, Eq, Debug, Serialize, Deserialize)]
135pub struct Trigger {
136 pub name: String,
138 pub loc: Location,
140 pub loc_position: i32,
142 pub text: String,
144 pub part_of_speech: String,
146 pub negation: bool,
148}
149
150fn parse_bool(x: &str) -> Result<bool> {
155 match x {
156 "1" => Ok(true),
157 "0" => Ok(false),
158 _ => {
159 println!("Unexpected boolean: {}", x);
160 Err(Box::new(ValueError))
161 }
162 }
163}
164
165impl Trigger {
166 pub fn new(
168 n: &str,
169 loc: &str,
170 loc_pos: &str,
171 t: &str,
172 part_of_speech: &str,
173 neg: bool,
174 ) -> Trigger {
175 Trigger {
176 name: n.replace('\"', ""),
177 loc: Location::from_str(loc).expect("unable to parse Location"),
178 loc_position: loc_pos
179 .parse::<i32>()
180 .expect("unable to parse integer from location"),
181 text: t.replace('\"', ""),
182 part_of_speech: part_of_speech.replace('\"', ""),
183 negation: neg,
184 }
185 }
186}
187
188fn parse_triggers(info: &str) -> Result<Vec<Trigger>> {
190 let mut triggers: Vec<Trigger> = Vec::new();
191 let trigger_list = split_with_quote_context(info, ',');
192 for t in trigger_list {
193 let clean = t.trim_start_matches('[').trim_end_matches(']');
194 let parts = split_with_quote_context(clean, '-');
195 if parts.len() != 6 {
196 println!(
197 "Trigger format does not make sense, expected sextuple (6), got {} parts instead.",
198 &parts.len()
199 );
200 return Err(Box::new(ValueError));
201 } else {
202 let negation = parse_bool(&parts[5])?;
204 let trigger = Trigger::new(
205 &parts[0], &parts[1], &parts[2], &parts[3], &parts[4], negation,
206 );
207 triggers.push(trigger)
208 }
209 }
210 Ok(triggers)
211}
212
213fn split_with_bracket_context(x: &str) -> Vec<String> {
216 let mut is_in_brackets = false;
217 let mut start_position = 0;
218 let final_position = x.len();
219 let mut parts: Vec<String> = Vec::new();
220 for (i, c) in x.chars().enumerate() {
221 if c == '[' {
222 is_in_brackets = !is_in_brackets;
223 } else if c == ']' {
224 is_in_brackets = !is_in_brackets;
225 if i == final_position - 1 {
226 parts.push(x[start_position..final_position].to_string());
228 }
229 } else if c == ',' && !is_in_brackets {
230 parts.push(x[start_position..i].to_string());
231 start_position = i + 1;
232 }
233 }
234 parts
235}
236
237fn parse_bracketed_info(x: &str) -> result::Result<Vec<i32>, ParseIntError> {
240 let parts = x.trim_start_matches('[').trim_end_matches(']').split('/');
241 let collected = parts
242 .into_iter()
243 .map(|p| p.parse::<i32>())
244 .collect::<result::Result<Vec<i32>, ParseIntError>>()?;
245 Ok(collected)
246}
247
248#[derive(Debug, PartialEq, Eq, Serialize, Deserialize)]
250pub enum PositionalInfoType {
251 A,
252 B,
253 C,
254 D,
255}
256
257fn tag_pos_info(x: &str) -> (bool, bool, bool) {
260 let mut has_brackets = false;
262 let mut has_comma_inside_brackets = false;
263 let mut has_comma_outside_brackets = false;
264 let mut in_bracket = false;
265 for c in x.chars() {
266 if c == '[' {
268 has_brackets = true;
269 in_bracket = true;
270 } else if c == ']' {
271 in_bracket = false;
272 } else if c == ',' && !in_bracket {
273 has_comma_outside_brackets = true;
274 } else if c == ',' && in_bracket {
275 has_comma_inside_brackets = true;
276 }
277 }
278 (
279 has_brackets,
280 has_comma_inside_brackets,
281 has_comma_outside_brackets,
282 )
283}
284
285fn categorize_positional_info(
288 has_brackets: bool,
289 has_comma_inside_brackets: bool,
290 has_comma_outside_brackets: bool,
291) -> Result<PositionalInfoType> {
292 if !has_comma_outside_brackets && !has_comma_inside_brackets {
293 Ok(PositionalInfoType::A)
294 } else if !has_brackets && has_comma_outside_brackets {
295 Ok(PositionalInfoType::B)
296 } else if has_brackets && has_comma_outside_brackets && !has_comma_inside_brackets {
297 Ok(PositionalInfoType::C)
298 } else if has_brackets && has_comma_outside_brackets && has_comma_inside_brackets {
299 Ok(PositionalInfoType::D)
300 } else {
301 println!("could not parse positional information.");
302 Err(Box::new(ValueError))
303 }
304}
305
306#[derive(PartialEq, Eq, Debug, Serialize, Deserialize)]
308pub struct Position {
309 pub start: i32,
311 pub length: i32,
313 pub case: PositionalInfoType,
315}
316
317impl Position {
318 pub fn new(start: i32, length: i32, case: PositionalInfoType) -> Position {
320 Position {
321 start,
322 length,
323 case,
324 }
325 }
326}
327
328pub fn check_parts(parts: &[&str]) -> Result<()> {
330 if parts.len() != 2 {
331 return Err(Box::new(ValueError));
332 }
333 Ok(())
334}
335
336pub fn parse_position_parts(position_str: &str, case: PositionalInfoType) -> Result<Position> {
337 let parts = position_str.split('/').collect::<Vec<&str>>();
338 check_parts(&parts)?;
339 let p1 = parts[0].parse::<i32>()?;
340 let p2 = parts[1].parse::<i32>()?;
341 Ok(Position::new(p1, p2, case))
342}
343
344fn parse_positional_info(info: &str) -> Result<Vec<Position>> {
346 let tags = tag_pos_info(info);
347 let category = categorize_positional_info(tags.0, tags.1, tags.2)?;
348 let mut positions: Vec<Position> = Vec::new();
349 match category {
350 PositionalInfoType::A => {
351 for section in info.split(';') {
352 let p = parse_position_parts(section, PositionalInfoType::A)?;
353 positions.push(p);
354 }
355 Ok(positions)
356 }
357 PositionalInfoType::B => {
358 for section in info.split(';') {
359 for subsection in section.split(',') {
360 let p = parse_position_parts(subsection, PositionalInfoType::B)?;
361 positions.push(p);
362 }
363 }
364 Ok(positions)
365 }
366 PositionalInfoType::C => {
367 for section in info.split(';') {
368 for subsection in section.split(',') {
369 let parts = parse_bracketed_info(subsection)?;
370 let p = Position::new(parts[0], parts[1], PositionalInfoType::C);
371 positions.push(p);
372 }
373 }
374 Ok(positions)
375 }
376 PositionalInfoType::D => {
377 for section in info.split(';') {
378 for subsection in split_with_bracket_context(section) {
379 for underground in subsection.split(',') {
380 let parts = parse_bracketed_info(underground)?;
381 let p = Position::new(parts[0], parts[1], PositionalInfoType::D);
382 positions.push(p);
383 }
384 }
385 }
386 Ok(positions)
387 }
388 }
389}
390
391#[derive(Debug, PartialEq, Serialize, Deserialize)]
394pub struct MmiOutput {
395 pub id: String,
397 pub mmi: String,
399 pub score: f64,
401 pub name: String,
403 pub cui: String,
405 pub semantic_types: Vec<String>,
407 pub triggers: Vec<Trigger>,
409 pub location: Location,
411 pub positional_info: Vec<Position>,
413 pub tree_codes: Option<Vec<String>>,
415}
416
417impl MmiOutput {
418 pub fn assemble(parts: HashMap<&str, &str>) -> Result<Self> {
426 let id = parts["id"].to_string();
429 let mmi = parts["mmi"].to_string();
430 let score = parts["score"].parse::<f64>()?;
431 let name = parts["name"].to_string();
432 let cui = parts["cui"].to_string();
433 let source_sem_types = parts["semantic_types"].to_string();
434 let semantic_types = parse_semantic_types(&source_sem_types);
435 let source_triggers = parts["triggers"].to_string();
436 let triggers = parse_triggers(&source_triggers)?;
437 let source_location = parts["location"].to_string();
438 let location = Location::from_str(&source_location)?;
439 let source_positions = parts["positional_info"].to_string();
440 let positional_info = parse_positional_info(&source_positions)?;
441 let source_tree_codes = parts["tree_codes"].to_string();
442 let tree_codes = parse_tree_codes(&source_tree_codes);
443 let mmi_output = MmiOutput {
444 id,
445 mmi,
446 score,
447 name,
448 cui,
449 semantic_types,
450 triggers,
451 location,
452 positional_info,
453 tree_codes,
454 };
455 Ok(mmi_output)
456 }
457}
458
459#[derive(PartialEq, Eq, Debug, Serialize, Deserialize)]
461pub enum AbbreviationType {
462 AA,
464 UA,
466}
467
468impl FromStr for AbbreviationType {
469 type Err = ValueError;
470 fn from_str(s: &str) -> std::result::Result<AbbreviationType, ValueError> {
472 match s.to_uppercase().as_str() {
473 "AA" => Ok(AbbreviationType::AA),
474 "UA" => Ok(AbbreviationType::UA),
475 _ => Err(ValueError),
476 }
477 }
478}
479
480#[derive(PartialEq, Eq, Debug, Serialize, Deserialize)]
482pub struct AaPosInfo {
483 pub start: i32,
484 pub length: i32,
485}
486
487impl AaPosInfo {
488 pub fn new(s: i32, l: i32) -> Self {
490 AaPosInfo {
491 start: s,
492 length: l,
493 }
494 }
495}
496
497#[derive(PartialEq, Eq, Debug, Serialize, Deserialize)]
500pub struct AaOutput {
501 pub id: String,
503 pub abbreviation_type: AbbreviationType,
505 pub short_form: String,
507 pub long_form: String,
509 pub short_token_count: i32,
511 pub short_character_count: i32,
513 pub long_token_count: i32,
515 pub long_character_count: i32,
517 pub positional_info: AaPosInfo,
519}
520
521impl AaOutput {
522 pub fn assemble(parts: HashMap<&str, &str>) -> Result<Self> {
530 let id = parts["id"].to_string();
533 let abbreviation_type = AbbreviationType::from_str(parts["abbreviation_type"])?;
534 let short_form = parts["short_form"].to_string();
535 let long_form = parts["long_form"].to_string();
536 let short_token_count = parts["short_token_count"].parse::<i32>()?;
537 let short_character_count = parts["short_character_count"].parse::<i32>()?;
538 let long_token_count = parts["long_token_count"].parse::<i32>()?;
539 let long_character_count = parts["long_character_count"].parse::<i32>()?;
540 let position_parts = parts["positional_info"].split(':').collect::<Vec<&str>>();
541 let pp1 = position_parts[0].parse::<i32>()?;
542 let pp2 = position_parts[1].parse::<i32>()?;
543 let positional_info = AaPosInfo::new(pp1, pp2);
544 let aa_output = AaOutput {
545 id,
546 abbreviation_type,
547 short_form,
548 long_form,
549 short_token_count,
550 short_character_count,
551 long_token_count,
552 long_character_count,
553 positional_info,
554 };
555 Ok(aa_output)
556 }
557}
558
559pub fn label_aa_parts(parts: Vec<&str>) -> Result<HashMap<&str, &str>> {
561 if parts.len() != 9 {
562 return Err(Box::new(ValueError));
563 }
564 let mut map: HashMap<&str, &str> = HashMap::new();
565 map.insert("id", parts[0]);
566 map.insert("abbreviation_type", parts[1]);
567 map.insert("short_form", parts[2]);
568 map.insert("long_form", parts[3]);
569 map.insert("short_token_count", parts[4]);
570 map.insert("short_character_count", parts[5]);
571 map.insert("long_token_count", parts[6]);
572 map.insert("long_character_count", parts[7]);
573 map.insert("positional_info", parts[8]);
574 Ok(map)
575}
576
577#[derive(Serialize, Deserialize, Debug)]
578pub enum Output {
579 MMI(MmiOutput),
580 AA(AaOutput),
581}
582
583pub fn parse_record(text: &str) -> Result<Output> {
619 let parts = split_text(text);
620 if parts.len() != 10 && parts.len() != 9 {
622 return Err(Box::new(ValueError));
623 }
624 match parts[1].to_ascii_uppercase().as_str() {
625 "MMI" => {
626 let fields = label_mmi_parts(parts)?;
627 let output = MmiOutput::assemble(fields)?;
628 Ok(Output::MMI(output))
629 }
630 "AA" | "UA" => {
631 let fields = label_aa_parts(parts)?;
632 let output = AaOutput::assemble(fields)?;
633 Ok(Output::AA(output))
634 }
635 _ => Err(Box::new(ValueError)),
636 }
637}
638
639#[cfg(test)]
640mod tests {
641 use core::panic;
642
643 use super::*;
644
645 #[test]
646 fn test_parse_bool() {
647 assert!(parse_bool("1").unwrap());
648 assert!(!parse_bool("0").unwrap());
649 assert!(parse_bool("2").is_err());
650 }
651
652 #[test]
653 fn test_split_with_bracket_context() {
654 let s1 = "[4061/10,4075/11],[4061/10,4075/11]";
655 let r1 = split_with_bracket_context(s1);
656 assert_eq!(r1, vec!["[4061/10,4075/11]", "[4061/10,4075/11]"])
657 }
658
659 #[test]
662 fn test_pos_info_categorization() {
663 let s1 = "[4061/10,4075/11],[4061/10,4075/11]";
665 let r1 = tag_pos_info(s1);
666 let cat = categorize_positional_info(r1.0, r1.1, r1.2);
667
668 assert_eq!(r1, (true, true, true));
669 assert_eq!(cat.unwrap(), PositionalInfoType::D);
670
671 let s1 = "117/5;122/4";
672 let r1 = tag_pos_info(s1);
673 let cat = categorize_positional_info(r1.0, r1.1, r1.2);
674
675 assert_eq!(r1, (false, false, false));
676 assert_eq!(cat.unwrap(), PositionalInfoType::A);
677
678 let s1 = "117/5";
679 let r1 = tag_pos_info(s1);
680 let cat = categorize_positional_info(r1.0, r1.1, r1.2);
681
682 assert_eq!(r1, (false, false, false));
683 assert_eq!(cat.unwrap(), PositionalInfoType::A);
684
685 let s1 = "117/5,122/4,113/2";
686 let r1 = tag_pos_info(s1);
687 let cat = categorize_positional_info(r1.0, r1.1, r1.2);
688
689 assert_eq!(r1, (false, false, true));
690 assert_eq!(cat.unwrap(), PositionalInfoType::B);
691
692 let s1 = "[122/4],[117/6]";
693 let r1 = tag_pos_info(s1);
694 let cat = categorize_positional_info(r1.0, r1.1, r1.2);
695
696 assert_eq!(r1, (true, false, true));
697 assert_eq!(cat.unwrap(), PositionalInfoType::C);
698
699 let r1 = categorize_positional_info(true, true, false);
700 assert!(r1.is_err());
701 }
702
703 #[test]
704 fn test_quote_splitter() {
705 let sample = "[\"Drug, NOS\"-tx-33-\"medicine\"-noun-0,\"Drug, NOS\"-tx-31-\"medicine\"-noun-0,\"Drug - NOS\"-tx-29-\"medication\"-noun-0,\"Drug, NOS\"-tx-5-\"drug\"-noun-0]";
706 let r = split_with_quote_context(sample, ',');
707 assert_eq!(r.len(), 4);
708 for x in r {
709 let r2 = split_with_quote_context(&x, '-');
710 assert_eq!(6, r2.len()); }
712 }
713
714 #[test]
715 fn test_split_text() {
716 let sample = "24119710|MMI|637.30|Isopoda|C0598806|[euka]|";
717 assert_eq!(
718 split_text(sample),
719 ["24119710", "MMI", "637.30", "Isopoda", "C0598806", "[euka]", ""]
720 );
721 }
722
723 #[test]
724 fn test_name_parts() {
725 let sample = "24119710|MMI|637.30|Isopoda|C0598806|[euka]|[\"Isopod\"-ab-1-\"isopod\"-adj-0,\"Isopoda\"-ti-1-\"Isopoda\"-noun-0]|TI;AB|228/6;136/7|B01.050.500.131.365.400";
726 let split = split_text(sample);
727 assert_eq!(label_mmi_parts(split).unwrap(), {
728 let mut map = HashMap::new();
729 map.insert("id", "24119710");
730 map.insert("mmi", "MMI");
731 map.insert("score", "637.30");
732 map.insert("name", "Isopoda");
733 map.insert("cui", "C0598806");
734 map.insert("semantic_types", "[euka]");
735 map.insert(
736 "triggers",
737 "[\"Isopod\"-ab-1-\"isopod\"-adj-0,\"Isopoda\"-ti-1-\"Isopoda\"-noun-0]",
738 );
739 map.insert("location", "TI;AB");
740 map.insert("positional_info", "228/6;136/7");
741 map.insert("tree_codes", "B01.050.500.131.365.400");
742 map
743 });
744 let split = split_text(sample);
745 assert!(label_mmi_parts(split[0..5].to_vec()).is_err());
746 }
747
748 #[test]
749 fn test_parse_semantic_types() {
750 let sample = "[euka,helalo]";
751 assert_eq!(parse_semantic_types(sample), ["euka", "helalo"]);
752 }
753
754 #[test]
755 fn test_location() {
756 let sample = "ti";
757 assert_eq!(
758 Location::from_str(sample.to_uppercase().as_str()).unwrap(),
759 Location::TI
760 );
761 let sample = "AB";
762 assert_eq!(Location::from_str(sample).unwrap(), Location::AB);
763 let sample = "TX";
764 assert_eq!(Location::from_str(sample).unwrap(), Location::TX);
765 let sample = "TI;AB";
766 assert_eq!(Location::from_str(sample).unwrap(), Location::Tiab);
767 }
768 #[test]
769 #[should_panic]
770 fn test_invalid_location() {
771 let sample = "BG";
772 assert_eq!(Location::from_str(sample).unwrap(), Location::Tiab);
773 }
774
775 #[test]
776 fn test_parse_tree_codes() {
777 let sample = "";
778 assert_eq!(parse_tree_codes(sample), None);
779 let sample = "B01.050.500.131.365.400";
780 assert_eq!(
781 parse_tree_codes(sample),
782 Some(vec![String::from("B01.050.500.131.365.400")])
783 );
784 let sample = "B01.050.500.131.365.400;B01.050.500.131.365.400";
785 assert_eq!(
786 parse_tree_codes(sample),
787 Some(vec![
788 "B01.050.500.131.365.400".to_string(),
789 "B01.050.500.131.365.400".to_string()
790 ])
791 );
792 }
793
794 #[test]
795 fn test_parse_positional_info() {
796 let sample = "228/6;136/7";
797 assert_eq!(
798 parse_positional_info(sample).unwrap(),
799 vec![
800 Position::new(228, 6, PositionalInfoType::A),
801 Position::new(136, 7, PositionalInfoType::A)
802 ]
803 );
804 let s1 = "[4061/10,4075/11],[4061/10,4075/11]";
805 assert_eq!(
806 parse_positional_info(s1).unwrap(),
807 vec![
808 Position::new(4061, 10, PositionalInfoType::D),
809 Position::new(4075, 11, PositionalInfoType::D),
810 Position::new(4061, 10, PositionalInfoType::D),
811 Position::new(4075, 11, PositionalInfoType::D),
812 ]
813 );
814 let s1 = "7059/5,7073/5";
815 assert_eq!(
816 parse_positional_info(s1).unwrap(),
817 vec![
818 Position::new(7059, 5, PositionalInfoType::B),
819 Position::new(7073, 5, PositionalInfoType::B),
820 ]
821 );
822 let s1 = "[1351/8],[1437/8]";
823 assert_eq!(
824 parse_positional_info(s1).unwrap(),
825 vec![
826 Position::new(1351, 8, PositionalInfoType::C),
827 Position::new(1437, 8, PositionalInfoType::C),
828 ]
829 );
830 }
831
832 #[test]
833 fn test_new_trigger() {
834 let t = ("hi", "tI;aB", "124", "fun times", "testing stuff", true);
835 let tt = Trigger::new(t.0, t.1, t.2, t.3, t.4, t.5);
836 let actual_tt = Trigger {
837 name: String::from("hi"),
838 loc: Location::Tiab,
839 loc_position: 124,
840 text: "fun times".to_string(),
841 part_of_speech: "testing stuff".to_string(),
842 negation: true,
843 };
844 assert_eq!(tt, actual_tt);
845 }
846
847 #[test]
848 fn test_parse_triggers() {
849 let sample = "[\"Crustacea\"-ti-1-\"Crustacea\"-noun-0]";
850 let result = parse_triggers(sample);
851 assert_eq!(
852 result.unwrap(),
853 [Trigger {
854 name: "Crustacea".to_string(),
855 loc: Location::TI,
856 loc_position: 1,
857 text: "Crustacea".to_string(),
858 part_of_speech: "noun".to_string(),
859 negation: false
860 }]
861 );
862 let s2 = "[\"Crustacea\"-ti-1-\"Crustacea\"-noun";
863 assert!(parse_triggers(s2).is_err());
864 }
865
866 #[test]
867 fn test_new_mmi() {
868 let mut map = HashMap::new();
869 map.insert("id", "24119710");
870 map.insert("mmi", "MMI");
871 map.insert("score", "637.30");
872 map.insert("name", "Isopoda");
873 map.insert("cui", "C0598806");
874 map.insert("semantic_types", "[euka]");
875 map.insert(
876 "triggers",
877 "[\"Isopod\"-ab-1-\"isopod\"-adj-0,\"Isopoda\"-ti-1-\"Isopoda\"-noun-0]",
878 );
879 map.insert("location", "TI;AB");
880 map.insert("positional_info", "228/6;136/7");
881 map.insert("tree_codes", "B01.050.500.131.365.400");
882 let expected = MmiOutput {
883 id: "24119710".to_string(),
884 mmi: "MMI".to_string(),
885 score: 637.30,
886 name: "Isopoda".to_string(),
887 cui: "C0598806".to_string(),
888 semantic_types: vec!["euka".to_string()],
889 triggers: vec![
890 Trigger {
891 name: "Isopod".to_string(),
892 loc: Location::AB,
893 loc_position: 1,
894 text: "isopod".to_string(),
895 part_of_speech: "adj".to_string(),
896 negation: false,
897 },
898 Trigger {
899 name: "Isopoda".to_string(),
900 loc: Location::TI,
901 loc_position: 1,
902 text: "Isopoda".to_string(),
903 part_of_speech: "noun".to_string(),
904 negation: false,
905 },
906 ],
907 location: Location::Tiab,
908 positional_info: vec![
909 Position {
910 start: 228,
911 length: 6,
912 case: PositionalInfoType::A,
913 },
914 Position {
915 start: 136,
916 length: 7,
917 case: PositionalInfoType::A,
918 },
919 ],
920 tree_codes: Some(vec!["B01.050.500.131.365.400".to_string()]),
921 };
922 assert_eq!(expected, MmiOutput::assemble(map).unwrap());
923 }
924
925 #[test]
926 fn test_parse_mmi_for_mmi() {
927 let s1 = "3124119710|MMI|637.30|Isopoda|C0598806|[euka]|[\"Isopod\"-ab-1-\"isopod\"-adj-0,\"Isopoda\"-ti-1-\"Isopoda\"-noun-0]|TI;AB|228/6;136/7|B01.050.500.131.365.400";
928 let expected = MmiOutput {
929 id: "3124119710".to_string(),
930 mmi: "MMI".to_string(),
931 score: 637.3,
932 name: "Isopoda".to_string(),
933 cui: "C0598806".to_string(),
934 semantic_types: vec!["euka".to_string()],
935 triggers: vec![
936 Trigger {
937 name: "Isopod".to_string(),
938 loc: Location::AB,
939 loc_position: 1,
940 text: "isopod".to_string(),
941 part_of_speech: "adj".to_string(),
942 negation: false,
943 },
944 Trigger {
945 name: "Isopoda".to_string(),
946 loc: Location::TI,
947 loc_position: 1,
948 text: "Isopoda".to_string(),
949 part_of_speech: "noun".to_string(),
950 negation: false,
951 },
952 ],
953 location: Location::Tiab,
954 positional_info: vec![
955 Position {
956 start: 228,
957 length: 6,
958 case: PositionalInfoType::A,
959 },
960 Position {
961 start: 136,
962 length: 7,
963 case: PositionalInfoType::A,
964 },
965 ],
966 tree_codes: Some(vec!["B01.050.500.131.365.400".to_string()]),
967 };
968 let parsed = match parse_record(s1).unwrap() {
969 Output::MMI(x) => x,
970 _ => panic!("stuff"),
971 };
972 assert_eq!(parsed, expected);
973 }
974
975 #[test]
976 fn test_parse_mmi_for_aa() {
977 let s1 = "23074487|AA|FY|fiscal years|1|2|3|12|9362:2";
978 let expected = match parse_record(s1).unwrap() {
979 Output::AA(x) => x,
980 _ => panic!("stuff"),
981 };
982 println!("{:?}", expected);
983 }
984
985 #[test]
986 #[should_panic]
987 fn test_panic_parse_mmi() {
988 let s1 = "asda|fake|other stuff|";
989 parse_record(s1).unwrap();
990 }
991
992 #[test]
993 fn test_abbreviation_type() {
994 assert_eq!(
995 AbbreviationType::AA,
996 AbbreviationType::from_str("AA").unwrap()
997 );
998 assert_eq!(
999 AbbreviationType::UA,
1000 AbbreviationType::from_str("UA").unwrap()
1001 );
1002 assert!(AbbreviationType::from_str("asfnkjsanf").is_err())
1003 }
1004
1005 #[test]
1006 fn test_parse_bracketed_info() {
1007 let t = parse_bracketed_info("[12/hi]");
1008 assert!(t.is_err());
1009 }
1010
1011 #[test]
1012 fn test_check_parts() {
1013 assert!(check_parts(&["hi", "bye"]).is_ok());
1014 assert!(check_parts(&["hi", "bye", "see ya"]).is_err());
1015 }
1016
1017 #[test]
1018 fn test_label_aa_parts() {
1019 let sample = vec!["hi", "by", "se", "yA", "later", "alligator"];
1020 assert!(label_aa_parts(sample).is_err());
1021 }
1022
1023 #[test]
1024 fn test_parse_record_fail() {
1025 assert!(parse_record("hi").is_err());
1026 }
1027}