textblocks/
lib.rs

1//! A simple crate for parsing text blocks.
2//! Can be used to parse text files with blocks of data separated by blank lines.
3//! Works well with \n or \r\n line endings.
4//!
5//! Contains the `TextBlocks` trait which adds the methods `as_blocks`, `block_parse_lines` and `block_parse` to `str` and `String`.
6//!
7//! # Install
8//!
9//! Run the following command in your project directory:
10//!
11//! ```bash
12//! cargo add textblocks
13//! ```
14//!
15//! Or add the following to your `Cargo.toml`:
16//!
17//! ```toml
18//! [dependencies]
19//! textblocks = "0.1.0"
20//! ```
21//!
22//! Check the [crates.io](https://crates.io/crates/textblocks) page for the latest version.
23//!
24//! # Usage
25//!
26//! To parse text into blocks, you need to provide a block delimiter, a line parser and a block parser.
27//!
28//! - The *block delimiter* is a string that separates blocks. The default is a blank line (double newline), but you can use any string.
29//!   - `BlockDelimiter::DoubleLineGeneric` (the default) will use `"\r\n\r\n"` if the string contains `"\r\n"` newlines, otherwise `"\n\n"`.
30//!   - `BlockDelimiter::Delimiter(s)` will use `s` (a `String`) as the delimiter.
31//! - The *line parser* is any function or closure that takes a `&str` and returns a value of type `T`. The final result will be a `Vec<Vec<T>>`.
32//! You can use the `block_parse_lines` method if you don't need a block parser and only want to parse the lines.
33//! - The *block parser* is any function or closure that takes a `&[T]` and returns a value of type `U`. The final result will be a `Vec<U>`.
34//!
35//! # Examples
36//!
37//! - Parse a block into a vector of lines
38//!
39//! > [!IMPORTANT]
40//! > This will allocate a vector of vectors of `&str`. If you want to avoid these allocations, use `block_parse_lines` or `block_parse`.
41//! > In that case, A vector will only be allocated for the requested result type.
42//!
43//! ```rust
44//! use textblocks::*;
45//! let s = "100\n200\n\n300\n400\n\n500\n600";
46//! let block_delimiter = BlockDelimiter::DoubleLineGeneric;
47//! assert_eq!(s.as_blocks(&block_delimiter), vec![vec!["100", "200"], vec!["300", "400"], vec!["500", "600"]]);
48//! assert_eq!(s.as_blocks(&block_delimiter), [["100", "200"], ["300", "400"], ["500", "600"]]);
49//! ```
50//!
51//! - Parse a block into a vector of lines, where each line is parsed into a number (u32)
52//!
53//! ```rust
54//! use textblocks::*;
55//! let s = "100\n200\n\n300\n400\n\n500\n600";
56//! let block_delimiter = BlockDelimiter::DoubleLineGeneric;
57//! let result = s.block_parse_lines(&block_delimiter,|line| line.parse::<u32>().unwrap());
58//! assert_eq!(result, [[100, 200], [300, 400], [500, 600]]);
59//! ```
60//!
61//! - Parse a block into a vector of lines, where each line is parsed into a number (u32), and then sum the numbers
62//!
63//! ```rust
64//! use textblocks::*;
65//! let s = "100\n200\n\n300\n400\n\n500\n600";
66//! let block_delimiter = BlockDelimiter::DoubleLineGeneric;
67//! let result = s.block_parse(
68//!     &block_delimiter,
69//!     |line| line.parse::<u32>().unwrap(),
70//!     |block| block.iter().sum::<u32>()
71//! );
72//! assert_eq!(result, [300, 700, 1100]);
73//! ```
74
75/// A block delimiter.
76/// Can be a generic double line (the default), a delimiter string, or a regex pattern.
77/// If the delimiter is a double line, it will be "\r\n\r\n" if the string contains "\r\n", otherwise "\n\n".
78/// If the delimiter is a string, it will be used as is.
79#[derive(Default)]
80pub enum BlockDelimiter {
81    /// A double line delimiter, "\r\n\r\n" if the string contains "\r\n", otherwise "\n\n".
82    #[default]
83    DoubleLineGeneric,
84    /// A custom delimiter string.
85    Delimiter(String),
86    /// A regex pattern. Not implemented yet.
87    Pattern(String),
88}
89
90fn delimiters(crlf: bool, block_delimiter: &BlockDelimiter) -> (String, String) {
91    let line_delimiter = if crlf { "\r\n" } else { "\n" }.to_owned();
92    let block_delimiter = match (block_delimiter, crlf) {
93        (BlockDelimiter::Pattern(_), _) => todo!("Pattern / Regex not implemented yet"),
94        (BlockDelimiter::DoubleLineGeneric, true) => "\r\n\r\n".to_owned(),
95        (BlockDelimiter::DoubleLineGeneric, false) => "\n\n".to_owned(),
96        (BlockDelimiter::Delimiter(d), _) => d.clone(),
97    };
98    (line_delimiter, block_delimiter)
99}
100
101pub trait TextBlocks: AsRef<str> + Sized
102where
103    Self: AsRef<str> + Sized,
104{
105    /// Parse a string into blocks, where a block is a vector of lines.
106    /// Blocks are separated by a blank line. Works well with \n or \r\n line endings.
107    ///
108    /// # Example
109    /// ```rust
110    /// use textblocks::*;
111    /// let s = "100\n200\n\n300\n400\n\n500\n600";
112    /// let block_delimiter = BlockDelimiter::DoubleLineGeneric;
113    /// assert_eq!(s.as_blocks(&block_delimiter), vec![vec!["100", "200"], vec!["300", "400"], vec!["500", "600"]]);
114    /// ```
115    fn as_blocks(&self, block_delimiter: &BlockDelimiter) -> Vec<Vec<&str>> {
116        let s = self.as_ref();
117        let (line_delimiter, block_delimiter) = delimiters(s.contains('\r'), block_delimiter);
118        if s.is_empty() {
119            return vec![];
120        }
121        s.trim()
122            .split(&block_delimiter)
123            .map(|x| x.trim().split(&line_delimiter).collect())
124            .collect()
125    }
126
127    /// Parse a block into a vector of lines, where each line is parsed into a type T, using the provided line parser.
128    /// If some lines cannot be parsed, make sure to use a type that can handle that (e.g. `Option<T>` or `Result<T, E>`)
129    /// and then use `filter_map` to remove the lines that could not be parsed.
130    ///
131    /// # Example
132    /// ```rust
133    /// use textblocks::*;
134    /// let s = "100\n200\n\n300\n400\n\n500\n600";
135    /// let block_delimiter = BlockDelimiter::DoubleLineGeneric;
136    /// let result = s.block_parse_lines(&block_delimiter, |line| line.parse::<u32>().unwrap());
137    /// assert_eq!(result, vec![vec![100, 200], vec![300, 400], vec![500, 600]]);
138    /// ```
139    fn block_parse_lines<INNER, LP>(
140        &self,
141        block_delimiter: &BlockDelimiter,
142        line_parser: LP,
143    ) -> Vec<Vec<INNER>>
144    where
145        LP: Fn(&str) -> INNER,
146    {
147        let s = self.as_ref();
148        let (line_delimiter, block_delimiter) = delimiters(s.contains('\r'), block_delimiter);
149        if s.is_empty() {
150            return vec![];
151        }
152        #[allow(clippy::redundant_closure)]
153        // The line_parser function cannot be used as it doesn't implement Copy
154        s.trim()
155            .split(&block_delimiter)
156            .map(|x| {
157                x.trim()
158                    .split(&line_delimiter)
159                    .map(|line| line_parser(line))
160                    .collect()
161            })
162            .collect()
163    }
164
165    /// Parse a block using the provided block parser. Blocks may be reduced to a single value, or parsed into a vector,
166    /// using the provided block parser. Similar to `parse_lines`, if some blocks cannot be parsed, make sure to use a type
167    /// that can handle that (e.g. `Option<T>` or `Result<T, E>`) and then use `filter_map` to remove the blocks that could not be parsed.
168    ///
169    /// # Example
170    /// ```rust
171    /// use textblocks::*;
172    /// let s = "abcde\nwow\n\n11111\n22222\n33333";
173    /// let block_delimiter = BlockDelimiter::DoubleLineGeneric;
174    /// let result = s.block_parse(
175    ///    &block_delimiter,
176    ///    |line| line.chars().next().unwrap(),
177    ///    |block| block.iter().collect::<String>(),
178    /// );
179    /// assert_eq!(result, vec!["aw", "123"]);
180    /// ```
181    fn block_parse<INNER, BLOCK, LP, BP>(
182        &self,
183        block_delimiter: &BlockDelimiter,
184        line_parser: LP,
185        block_parser: BP,
186    ) -> Vec<BLOCK>
187    where
188        LP: Fn(&str) -> INNER,
189        BP: Fn(Vec<INNER>) -> BLOCK,
190    {
191        let s = self.as_ref();
192        let (line_delimiter, block_delimiter) = delimiters(s.contains('\r'), block_delimiter);
193        if s.is_empty() {
194            return vec![];
195        }
196        #[allow(clippy::redundant_closure)]
197        // The line_parser function cannot be used as it doesn't implement Copy
198        s.trim()
199            .split(&block_delimiter)
200            .map(|block| {
201                block
202                    .split(&line_delimiter)
203                    .map(|line| line_parser(line))
204                    .collect()
205            })
206            .map(block_parser)
207            .collect()
208    }
209}
210
211impl<T> TextBlocks for T where T: AsRef<str> + Sized {}
212
213#[cfg(test)]
214mod tests {
215    use super::*;
216    const INT_EXAMPLE: &str = "1000\n2000\n3000\n\n4000\n\n5000\n6000\n\n7000\n8000\n9000\n\n10000";
217
218    #[test]
219    fn test_block_split() {
220        let block_delimiter = BlockDelimiter::default();
221        let input = "abc\n\na\nb\nc\n\nab\nac\n\na\na\na\na\n\nb".as_blocks(&block_delimiter);
222        let expected = vec![
223            vec!["abc"],
224            vec!["a", "b", "c"],
225            vec!["ab", "ac"],
226            vec!["a", "a", "a", "a"],
227            vec!["b"],
228        ];
229        assert_eq!(input, expected);
230    }
231
232    #[test]
233    fn test_block_split_crlf() {
234        let block_delimiter = BlockDelimiter::default();
235        let s = "abc\r\n\r\na\r\nb\r\nc\r\n\r\nab\r\nac\r\n\r\na\r\na\r\na\r\na\r\n\r\nb"
236            .as_blocks(&block_delimiter);
237        let expected = vec![
238            vec!["abc"],
239            vec!["a", "b", "c"],
240            vec!["ab", "ac"],
241            vec!["a", "a", "a", "a"],
242            vec!["b"],
243        ];
244        assert_eq!(s, expected);
245    }
246
247    #[test]
248    fn test_string_delimiter() {
249        let block_delimiter = BlockDelimiter::Delimiter("***".to_string());
250        let s =
251            "abc\n***\na\nb\nc\n***\nab\nac\n***\na\na\na\na\n***\nb".as_blocks(&block_delimiter);
252        let expected = vec![
253            vec!["abc"],
254            vec!["a", "b", "c"],
255            vec!["ab", "ac"],
256            vec!["a", "a", "a", "a"],
257            vec!["b"],
258        ];
259        assert_eq!(s, expected);
260    }
261
262    #[test]
263    fn test_block_split_empty() {
264        let block_delimiter = BlockDelimiter::default();
265        let expected: Vec<Vec<&str>> = vec![];
266        assert_eq!(String::new().as_blocks(&block_delimiter), expected);
267        assert_eq!("".as_blocks(&block_delimiter), expected);
268    }
269
270    #[test]
271    fn test_block_split_single() {
272        let block_delimiter = BlockDelimiter::default();
273        assert_eq!("abc".as_blocks(&block_delimiter), [["abc"]]);
274    }
275
276    #[test]
277    fn test_block_split_single_with_newline() {
278        let block_delimiter = BlockDelimiter::default();
279        assert_eq!("abc\n".as_blocks(&block_delimiter), [["abc"]]);
280    }
281
282    #[test]
283    fn test_block_split_single_with_newline_and_empty() {
284        let block_delimiter = BlockDelimiter::default();
285        assert_eq!("abc\n\n".as_blocks(&block_delimiter), [["abc"]]);
286    }
287
288    #[test]
289    fn test_parse_lines_int() {
290        let block_delimiter = BlockDelimiter::default();
291        let expected = vec![
292            vec![1000, 2000, 3000],
293            vec![4000],
294            vec![5000, 6000],
295            vec![7000, 8000, 9000],
296            vec![10000],
297        ];
298        let parsed = INT_EXAMPLE.block_parse_lines(&block_delimiter, |x| x.parse::<u32>().unwrap());
299        assert_eq!(parsed, expected);
300    }
301
302    #[test]
303    fn test_parse_lines_empty() {
304        let block_delimiter = BlockDelimiter::default();
305        let expected: Vec<Vec<u32>> = vec![];
306        let parsed =
307            String::new().block_parse_lines(&block_delimiter, |x| x.parse::<u32>().unwrap());
308        assert_eq!(parsed, expected);
309    }
310
311    #[test]
312    fn test_parse_blocks_empty() {
313        let block_delimiter = BlockDelimiter::default();
314        let expected: Vec<Vec<u32>> = vec![];
315        let parsed = "".block_parse(&block_delimiter, |x| x.parse::<u32>().unwrap(), |x| x);
316        assert_eq!(parsed, expected);
317    }
318
319    #[test]
320    fn test_parse_blocks_non_reduced() {
321        let block_delimiter = BlockDelimiter::default();
322        let expected = vec![
323            vec![1000, 2000, 3000],
324            vec![4000],
325            vec![5000, 6000],
326            vec![7000, 8000, 9000],
327            vec![10000],
328        ];
329        let parsed =
330            INT_EXAMPLE.block_parse(&block_delimiter, |x| x.parse::<u32>().unwrap(), |x| x);
331        assert_eq!(parsed, expected);
332        let parsed = INT_EXAMPLE.block_parse(
333            &block_delimiter,
334            |x| x.parse::<u32>().unwrap(),
335            |x| x.iter().rev().copied().collect::<Vec<u32>>(),
336        );
337        assert_eq!(
338            parsed,
339            expected
340                .iter()
341                .map(|x| x.iter().rev().copied().collect())
342                .collect::<Vec<Vec<u32>>>()
343        );
344        let expected = vec![
345            vec![3000, 2000, 1000],
346            vec![4000],
347            vec![6000, 5000],
348            vec![9000, 8000, 7000],
349            vec![10000],
350        ];
351        assert_eq!(parsed, expected);
352    }
353
354    #[test]
355    fn test_parse_blocks_reduced() {
356        let block_delimiter = BlockDelimiter::default();
357        let expected = vec![2000, 0, 1000, 2000, 0];
358        let parsed = INT_EXAMPLE.block_parse(
359            &block_delimiter,
360            |x| x.parse::<u32>().unwrap(),
361            |x| x.iter().max().unwrap() - x.iter().min().unwrap(),
362        );
363        assert_eq!(parsed, expected);
364    }
365}