use crate::edi_parse_error::EdiParseError;
pub type DocumentTokens<'a> = Vec<SegmentTokens<'a>>;
pub type SegmentTokens<'a> = Vec<&'a str>;
pub fn tokenize(input: &str) -> Result<DocumentTokens, EdiParseError> {
edi_assert!(
input.len() >= 106,
"input not long enough to contain ISA header delimiters"
);
let delimiters_str: Vec<char> = input[103..106].chars().collect();
let (element_delimiter, sub_element_delimiter, segment_delimiter) =
(delimiters_str[0], delimiters_str[1], delimiters_str[2]);
edi_assert!(
element_delimiter != sub_element_delimiter,
"element and subelement delimiters cannot be the same"
);
edi_assert!(
sub_element_delimiter != segment_delimiter,
"subelement and segment delimiters cannot be the same"
);
edi_assert!(
element_delimiter != segment_delimiter,
"element and segment delimiters cannot be the same"
);
let segments: SegmentTokens = input
.split(segment_delimiter)
.map(|x| x.trim())
.filter(|x| *x != "")
.collect();
let tokens: DocumentTokens = segments
.iter()
.map(|x| x.split(element_delimiter).collect::<Vec<&str>>())
.collect();
Ok(tokens)
}
#[test]
fn basic_segment_tokenize() {
let test_input = "ISA*00* *00* *ZZ*SENDERISA *14*0073268795005 *020226*1534*U*00401*000000001*0*T*>~
GS*PO*SENDERGS*007326879*20020226*1534*1*X*004010~
ST*850*000000001~
BEG*00*SA*A99999-01**19970214~
REF*VR*54321~
ITD*01*3*1**15**16~
DTM*002*19971219~
DTM*002*19971219~
SE*35*000000001~
GE*1*1~
IEA*1*000000001~";
let tokens = tokenize(test_input).unwrap();
assert_eq!(tokens.len(), 11);
assert_eq!(tokens[0].len(), 17)
}
#[test]
fn fail_to_tokenize_no_header() {
let test_input =
"00* *ZZ*SENDERISA *14*0073268795005 *020226*1534*U*00401*000000001*0*T";
assert!(tokenize(test_input).is_err());
}
#[test]
fn fail_same_delimiters() {
let test_input = "ISA*00* *00* *ZZ*SENDERISA *14*0073268795005 *020226*1534*U*00401*000000001*0*T~~~
GS*PO*SENDERGS*007326879*20020226*1534*1*X*004010~
ST*850*000000001~";
assert!(tokenize(test_input).is_err());
}