use deunicode::deunicode_char;
use pest::Parser;
use crate::MT940Parser;
use crate::Rule;
pub fn sanitize(s: &str) -> String {
let s1 = to_swift_charset(s);
let s2 = strip_stuff_between_messages(&s1);
strip_excess_tag86_lines(&s2)
}
pub fn to_swift_charset(s: &str) -> String {
s.chars()
.map(|x| {
let char_as_string = x.to_string();
let parsed = MT940Parser::parse(Rule::swift_char, &char_as_string);
if parsed.is_ok() {
char_as_string.clone()
} else {
let deunicoded = if x == 'ä' {
"a".to_string()
} else {
deunicode_char(x).unwrap_or(".").to_string()
};
let parsed_after_deunicode = MT940Parser::parse(Rule::swift_chars, &deunicoded);
if parsed_after_deunicode.is_ok() {
deunicoded.clone()
} else {
".".to_string()
}
}
})
.collect()
}
pub fn strip_stuff_between_messages(s: &str) -> String {
let total_lines = s.lines().count();
let mut lines_with_tag_20 = vec![];
let mut lines_with_tags = vec![];
let mut last_tag = "20";
for (i, line) in s.lines().enumerate() {
let parsed = MT940Parser::parse(Rule::field, line);
if let Ok(mut parsed) = parsed {
last_tag = parsed
.next()
.unwrap()
.into_inner()
.next()
.unwrap()
.into_inner()
.next()
.unwrap()
.as_str();
if last_tag == "20" {
lines_with_tag_20.push(i);
}
lines_with_tags.push(i);
}
}
let mut lines_to_delete = vec![];
for tag_20_index in lines_with_tag_20 {
let mut i = tag_20_index;
while i > 0 {
i -= 1;
if lines_with_tags.contains(&i) {
break;
} else {
lines_to_delete.push(i);
}
}
}
if last_tag != "86" {
let last_tag_index = *lines_with_tags.last().unwrap_or(&0) + 1;
lines_to_delete.extend(last_tag_index..total_lines);
}
s.lines()
.enumerate()
.filter(|&(i, _contents)| !lines_to_delete.contains(&i))
.map(|(_i, contents)| contents)
.chain(std::iter::once(""))
.collect::<Vec<&str>>()
.join("\r\n")
}
pub fn strip_excess_tag86_lines(input: &str) -> String {
let mut lines_to_delete = vec![];
let tag_86_lines = input.lines().enumerate().filter_map(|(line, contents)| {
if contents.starts_with(":86:") {
Some(line)
} else {
None
}
});
for line_no in tag_86_lines {
let lines = input.lines().skip(line_no + 1);
let to_delete = lines
.enumerate()
.take_while(|(_, contents)| !contents.starts_with(':'))
.filter_map(move |(line, _)| {
if line >= 5 {
Some(line + line_no + 1)
} else {
None
}
});
lines_to_delete.extend(to_delete);
}
input
.lines()
.enumerate()
.filter(|&(line, _contents)| !lines_to_delete.contains(&line))
.map(|(_line, contents)| contents)
.chain(std::iter::once(""))
.collect::<Vec<&str>>()
.join("\r\n")
}
#[cfg(test)]
mod tests {
use pretty_assertions::assert_eq;
use proptest::{prop_assert, proptest};
use rstest::rstest;
use super::*;
proptest! {
#[test]
fn to_swift_charset_no_parsing_failure_after_conversion(input in r".+") {
let result = to_swift_charset(&input);
let parsed = MT940Parser::parse(Rule::swift_chars, &result);
prop_assert!(parsed.is_ok());
}
}
#[test]
fn to_swift_charset_sanitize_sentence() {
let input = "hällö waß íst lös";
let result = to_swift_charset(input);
let expected = "hallo wass ist los";
assert_eq!(result, expected);
}
#[rstest]
#[case("ä", "a")]
#[case("ö", "o")]
#[case("ú", "u")]
#[case("é", "e")]
#[case("å", "a")]
#[case("á", "a")]
#[case("ß", "ss")]
#[case("ú", "u")]
#[case("ó", "o")]
#[case("í", "i")]
#[case("ë", "e")]
#[case("=", ".")]
#[case("!", ".")]
fn to_swift_charset_special_char_conversions(#[case] input: &str, #[case] expected: &str) {
let result = to_swift_charset(input);
assert_eq!(result, expected);
}
#[test]
fn strip_stuff_between_messages_success() {
let input = "\
:86:asdasdads\r\n\
------\r\n\
:20:vvvvv\r\n\
:86:hello\r\n\
multi line string\r\n\
here is ok\r\n\
:64:end of message\r\n\
stuff between messages\r\n\
should be removed\r\n\
:20:aaaaa\r\n\
:64:some more\r\n\
ö»»«»«äää\r\n\
:20:lolab\r\n\
:86:zzzz\r\n\
:64:asda\r\n\
--\r\n\
";
let expected = "\
:86:asdasdads\r\n\
:20:vvvvv\r\n\
:86:hello\r\n\
multi line string\r\n\
here is ok\r\n\
:64:end of message\r\n\
:20:aaaaa\r\n\
:64:some more\r\n\
:20:lolab\r\n\
:86:zzzz\r\n\
:64:asda\r\n\
";
let result = strip_stuff_between_messages(input);
assert_eq!(result, expected);
}
#[test]
fn strip_stuff_between_messages_last_is_86() {
let input = "\
:20:vvvvv\r\n\
:86:hello\r\n\
multi line string\r\n\
here is ok\r\n\
--\r\n\
";
let expected = "\
:20:vvvvv\r\n\
:86:hello\r\n\
multi line string\r\n\
here is ok\r\n\
--\r\n\
";
let result = strip_stuff_between_messages(input);
assert_eq!(result, expected);
}
#[test]
fn excess_tag86_are_stripped() {
let input = "\
:20:vvvvv\r\n\
:86:hello\r\n\
multi line string\r\n\
here is ok even with date that looks like a tag 20:10:43\r\n\
but not when\r\n\
it is way too many\r\n\
lines\r\n\
in fact i shouldnt be here\r\n\
and i shouldnt either\r\n\
:62F:C123EUR321,98\r\n\
:20:vvvvv\r\n\
:86:hello\r\n\
multi line string\r\n\
but not many lines\r\n\
:62F:C123EUR321,98\r\n\
:20:vvvvv\r\n\
:86:hi there\r\n\
a very multi lined string\r\n\
here is ok even with date that looks like a tag 20:86:43\r\n\
but not when\r\n\
it is way too many\r\n\
lines\r\n\
in fact i shouldnt be here\r\n\
and i shouldnt either\r\n\
and i certainly aint supposed to be here as well\r\n\
:62F:C321EUR123,98\r\n\
";
let expected = "\
:20:vvvvv\r\n\
:86:hello\r\n\
multi line string\r\n\
here is ok even with date that looks like a tag 20:10:43\r\n\
but not when\r\n\
it is way too many\r\n\
lines\r\n\
:62F:C123EUR321,98\r\n\
:20:vvvvv\r\n\
:86:hello\r\n\
multi line string\r\n\
but not many lines\r\n\
:62F:C123EUR321,98\r\n\
:20:vvvvv\r\n\
:86:hi there\r\n\
a very multi lined string\r\n\
here is ok even with date that looks like a tag 20:86:43\r\n\
but not when\r\n\
it is way too many\r\n\
lines\r\n\
:62F:C321EUR123,98\r\n\
";
let result = strip_excess_tag86_lines(input);
assert_eq!(result, expected);
}
}