#[derive(Default)]
pub enum BlockDelimiter {
#[default]
DoubleLineGeneric,
Delimiter(String),
Pattern(String),
}
fn delimiters(crlf: bool, block_delimiter: &BlockDelimiter) -> (String, String) {
let line_delimiter = if crlf { "\r\n" } else { "\n" }.to_owned();
let block_delimiter = match (block_delimiter, crlf) {
(BlockDelimiter::Pattern(_), _) => todo!("Pattern / Regex not implemented yet"),
(BlockDelimiter::DoubleLineGeneric, true) => "\r\n\r\n".to_owned(),
(BlockDelimiter::DoubleLineGeneric, false) => "\n\n".to_owned(),
(BlockDelimiter::Delimiter(d), _) => d.clone(),
};
(line_delimiter, block_delimiter)
}
pub trait TextBlocks: AsRef<str> + Sized
where
Self: AsRef<str> + Sized,
{
fn as_blocks(&self, block_delimiter: &BlockDelimiter) -> Vec<Vec<&str>> {
let s = self.as_ref();
let (line_delimiter, block_delimiter) = delimiters(s.contains('\r'), block_delimiter);
if s.is_empty() {
return vec![];
}
s.trim()
.split(&block_delimiter)
.map(|x| x.trim().split(&line_delimiter).collect())
.collect()
}
fn block_parse_lines<INNER, LP>(
&self,
block_delimiter: &BlockDelimiter,
line_parser: LP,
) -> Vec<Vec<INNER>>
where
LP: Fn(&str) -> INNER,
{
let s = self.as_ref();
let (line_delimiter, block_delimiter) = delimiters(s.contains('\r'), block_delimiter);
if s.is_empty() {
return vec![];
}
#[allow(clippy::redundant_closure)]
s.trim()
.split(&block_delimiter)
.map(|x| {
x.trim()
.split(&line_delimiter)
.map(|line| line_parser(line))
.collect()
})
.collect()
}
fn block_parse<INNER, BLOCK, LP, BP>(
&self,
block_delimiter: &BlockDelimiter,
line_parser: LP,
block_parser: BP,
) -> Vec<BLOCK>
where
LP: Fn(&str) -> INNER,
BP: Fn(Vec<INNER>) -> BLOCK,
{
let s = self.as_ref();
let (line_delimiter, block_delimiter) = delimiters(s.contains('\r'), block_delimiter);
if s.is_empty() {
return vec![];
}
#[allow(clippy::redundant_closure)]
s.trim()
.split(&block_delimiter)
.map(|block| {
block
.split(&line_delimiter)
.map(|line| line_parser(line))
.collect()
})
.map(block_parser)
.collect()
}
}
impl<T> TextBlocks for T where T: AsRef<str> + Sized {}
#[cfg(test)]
mod tests {
use super::*;
const INT_EXAMPLE: &str = "1000\n2000\n3000\n\n4000\n\n5000\n6000\n\n7000\n8000\n9000\n\n10000";
#[test]
fn test_block_split() {
let block_delimiter = BlockDelimiter::default();
let input = "abc\n\na\nb\nc\n\nab\nac\n\na\na\na\na\n\nb".as_blocks(&block_delimiter);
let expected = vec![
vec!["abc"],
vec!["a", "b", "c"],
vec!["ab", "ac"],
vec!["a", "a", "a", "a"],
vec!["b"],
];
assert_eq!(input, expected);
}
#[test]
fn test_block_split_crlf() {
let block_delimiter = BlockDelimiter::default();
let s = "abc\r\n\r\na\r\nb\r\nc\r\n\r\nab\r\nac\r\n\r\na\r\na\r\na\r\na\r\n\r\nb"
.as_blocks(&block_delimiter);
let expected = vec![
vec!["abc"],
vec!["a", "b", "c"],
vec!["ab", "ac"],
vec!["a", "a", "a", "a"],
vec!["b"],
];
assert_eq!(s, expected);
}
#[test]
fn test_string_delimiter() {
let block_delimiter = BlockDelimiter::Delimiter("***".to_string());
let s =
"abc\n***\na\nb\nc\n***\nab\nac\n***\na\na\na\na\n***\nb".as_blocks(&block_delimiter);
let expected = vec![
vec!["abc"],
vec!["a", "b", "c"],
vec!["ab", "ac"],
vec!["a", "a", "a", "a"],
vec!["b"],
];
assert_eq!(s, expected);
}
#[test]
fn test_block_split_empty() {
let block_delimiter = BlockDelimiter::default();
let expected: Vec<Vec<&str>> = vec![];
assert_eq!(String::new().as_blocks(&block_delimiter), expected);
assert_eq!("".as_blocks(&block_delimiter), expected);
}
#[test]
fn test_block_split_single() {
let block_delimiter = BlockDelimiter::default();
assert_eq!("abc".as_blocks(&block_delimiter), [["abc"]]);
}
#[test]
fn test_block_split_single_with_newline() {
let block_delimiter = BlockDelimiter::default();
assert_eq!("abc\n".as_blocks(&block_delimiter), [["abc"]]);
}
#[test]
fn test_block_split_single_with_newline_and_empty() {
let block_delimiter = BlockDelimiter::default();
assert_eq!("abc\n\n".as_blocks(&block_delimiter), [["abc"]]);
}
#[test]
fn test_parse_lines_int() {
let block_delimiter = BlockDelimiter::default();
let expected = vec![
vec![1000, 2000, 3000],
vec![4000],
vec![5000, 6000],
vec![7000, 8000, 9000],
vec![10000],
];
let parsed = INT_EXAMPLE.block_parse_lines(&block_delimiter, |x| x.parse::<u32>().unwrap());
assert_eq!(parsed, expected);
}
#[test]
fn test_parse_lines_empty() {
let block_delimiter = BlockDelimiter::default();
let expected: Vec<Vec<u32>> = vec![];
let parsed =
String::new().block_parse_lines(&block_delimiter, |x| x.parse::<u32>().unwrap());
assert_eq!(parsed, expected);
}
#[test]
fn test_parse_blocks_empty() {
let block_delimiter = BlockDelimiter::default();
let expected: Vec<Vec<u32>> = vec![];
let parsed = "".block_parse(&block_delimiter, |x| x.parse::<u32>().unwrap(), |x| x);
assert_eq!(parsed, expected);
}
#[test]
fn test_parse_blocks_non_reduced() {
let block_delimiter = BlockDelimiter::default();
let expected = vec![
vec![1000, 2000, 3000],
vec![4000],
vec![5000, 6000],
vec![7000, 8000, 9000],
vec![10000],
];
let parsed =
INT_EXAMPLE.block_parse(&block_delimiter, |x| x.parse::<u32>().unwrap(), |x| x);
assert_eq!(parsed, expected);
let parsed = INT_EXAMPLE.block_parse(
&block_delimiter,
|x| x.parse::<u32>().unwrap(),
|x| x.iter().rev().copied().collect::<Vec<u32>>(),
);
assert_eq!(
parsed,
expected
.iter()
.map(|x| x.iter().rev().copied().collect())
.collect::<Vec<Vec<u32>>>()
);
let expected = vec![
vec![3000, 2000, 1000],
vec![4000],
vec![6000, 5000],
vec![9000, 8000, 7000],
vec![10000],
];
assert_eq!(parsed, expected);
}
#[test]
fn test_parse_blocks_reduced() {
let block_delimiter = BlockDelimiter::default();
let expected = vec![2000, 0, 1000, 2000, 0];
let parsed = INT_EXAMPLE.block_parse(
&block_delimiter,
|x| x.parse::<u32>().unwrap(),
|x| x.iter().max().unwrap() - x.iter().min().unwrap(),
);
assert_eq!(parsed, expected);
}
}