collective_utils/
discretize.rs

1use once_cell::sync::Lazy;
2use regex::Regex;
3
4pub fn string(input: &str) -> Vec<&str> {
5    let mut result = Vec::new();
6
7    static WORD: Lazy<Regex> = Lazy::new(|| Regex::new(r".{1,4000}\s?").unwrap());
8
9    WORD.find_iter(input).for_each(|m| {
10        result.push(m.as_str());
11    });
12
13    if let Some((idx, _)) = input.char_indices().nth(2000) {
14        let input = &input[idx..];
15        WORD.find_iter(input).for_each(|m| {
16            result.push(m.as_str());
17        });
18    }
19
20    result
21}
22
23#[cfg(test)]
24mod tests {
25    #[test]
26    fn test_discretize_markdown() {
27        let input = "Hello, world!";
28        let res = super::string(input);
29        assert_eq!(vec!["Hello, world!"], res);
30    }
31}