split_paragraphs/
lib.rs

1//! A crate that provides paragraph iteration for strings.
2//!
3//! This crate extends [`str`] with the ability to iterate over paragraphs via the [`SplitParagraphs`] trait.
4//! A paragraph is defined as one or more consecutive non-empty lines, separated by one or more blank lines.
5//!
6//! # Example
7//! ```
8//! use split_paragraphs::SplitParagraphs;
9//!
10//! let text = "foo\r\nbar\n\nbaz\r";
11//! let mut paragraphs = text.paragraphs();
12//!
13//! assert_eq!(paragraphs.next(), Some("foo\r\nbar"));
14//! assert_eq!(paragraphs.next(), Some("baz\r"));
15//! assert_eq!(paragraphs.next(), None);
16//! ```
17
18#![no_std]
19#![deny(clippy::all, clippy::pedantic, clippy::nursery, clippy::cargo)]
20#![allow(clippy::cast_sign_loss)]
21
22use core::iter::FusedIterator;
23use core::slice;
24use core::str::{from_utf8_unchecked, Lines};
25
26/// Trait extending [`str`] with [`paragraphs`].
27///
28/// [`paragraphs`]: SplitParagraphs::paragraphs
29pub trait SplitParagraphs {
30    /// Returns an iterator over paragraphs of a string, as string slices.
31    ///
32    /// A paragraph consists of one or more lines containing non-whitespace characters,
33    /// separated by empty lines or lines containing only whitespace.
34    ///
35    /// Paragraphs always contain at least one line with at least one non-whitespace
36    /// character.
37    ///
38    /// Paragraphs never contain empty lines or whitespace-only lines.
39    ///
40    /// Paragraphs support line endings that are either newlines (`\n`) or
41    /// carriage return followed by line feed (`\r\n`).
42    ///
43    /// Line terminators between paragraphs are not included in the returned slices.
44    ///
45    /// Line terminators within paragraphs are preserved in their original form.
46    ///
47    /// Handling of line endings matches [`lines`]. See its documentation for more details.
48    ///
49    /// # Examples
50    ///
51    /// Basic usage:
52    ///
53    /// ```
54    /// # use split_paragraphs::SplitParagraphs;
55    /// let text = "foo\r\nbar\n\nbaz\r";
56    /// let mut paragraphs = text.paragraphs();
57    ///
58    /// assert_eq!(Some("foo\r\nbar"), paragraphs.next());
59    /// // Trailing carriage return is included in the last paragraph
60    /// assert_eq!(Some("baz\r"), paragraphs.next());
61    ///
62    /// assert_eq!(None, paragraphs.next());
63    /// ```
64    ///
65    /// The final paragraph does not require any ending:
66    ///
67    /// ```
68    /// # use split_paragraphs::SplitParagraphs;
69    /// let text = "\n\n\nfoo\nbar\n\r\nbaz";
70    /// let mut paragraphs = text.paragraphs();
71    ///
72    /// assert_eq!(Some("foo\nbar"), paragraphs.next());
73    /// assert_eq!(Some("baz"), paragraphs.next());
74    ///
75    /// assert_eq!(None, paragraphs.next());
76    /// ```
77    ///
78    /// [`paragraphs`]: SplitParagraphs::paragraphs
79    /// [`lines`]: str::lines
80    fn paragraphs(&self) -> Paragraphs;
81}
82
83/// An iterator over the paragraphs of a string, as string slices.
84///
85/// This struct is created with the [`paragraphs`] method on [`str`] via
86/// the [`SplitParagraphs`] trait.
87/// See its documentation for more.
88///
89/// [`paragraphs`]: SplitParagraphs::paragraphs
90#[must_use = "iterators are lazy and do nothing unless consumed"]
91#[derive(Clone, Debug)]
92pub struct Paragraphs<'a> {
93    lines: Lines<'a>,
94}
95
96impl SplitParagraphs for str {
97    #[inline]
98    fn paragraphs(&self) -> Paragraphs {
99        Paragraphs {
100            lines: self.lines(),
101        }
102    }
103}
104
105impl<'a> Iterator for Paragraphs<'a> {
106    type Item = &'a str;
107
108    #[inline]
109    fn size_hint(&self) -> (usize, Option<usize>) {
110        (0, self.lines.size_hint().1.map(|n| (n + 1) / 2))
111    }
112
113    #[inline]
114    fn next(&mut self) -> Option<Self::Item> {
115        let first_line = self.lines.next()?;
116
117        let first_non_empty_line = if first_line.trim().is_empty() {
118            loop {
119                let line = self.lines.next()?;
120                if !line.trim().is_empty() {
121                    break line;
122                }
123            }
124        } else {
125            first_line
126        };
127
128        let mut last_non_empty_line = first_non_empty_line;
129        loop {
130            let Some(line) = self.lines.next() else {
131                break;
132            };
133            if line.trim().is_empty() {
134                break;
135            }
136            last_non_empty_line = line;
137        }
138
139        let result: &str = unsafe {
140            from_utf8_unchecked(slice::from_raw_parts(
141                first_non_empty_line.as_ptr(),
142                (last_non_empty_line
143                    .as_ptr()
144                    .offset_from(first_non_empty_line.as_ptr()) as usize)
145                    .unchecked_add(last_non_empty_line.len()),
146            ))
147        };
148
149        Some(result)
150    }
151}
152
153impl DoubleEndedIterator for Paragraphs<'_> {
154    #[inline]
155    fn next_back(&mut self) -> Option<Self::Item> {
156        let last_line = self.lines.next_back()?;
157
158        let last_non_empty_line = if last_line.trim().is_empty() {
159            loop {
160                let line = self.lines.next_back()?;
161                if !line.trim().is_empty() {
162                    break line;
163                }
164            }
165        } else {
166            last_line
167        };
168
169        let mut first_non_empty_line = last_non_empty_line;
170        loop {
171            let Some(line) = self.lines.next_back() else {
172                break;
173            };
174            if line.trim().is_empty() {
175                break;
176            }
177            first_non_empty_line = line;
178        }
179
180        let result: &str = unsafe {
181            from_utf8_unchecked(slice::from_raw_parts(
182                first_non_empty_line.as_ptr(),
183                (last_non_empty_line
184                    .as_ptr()
185                    .offset_from(first_non_empty_line.as_ptr()) as usize)
186                    .unchecked_add(last_non_empty_line.len()),
187            ))
188        };
189
190        Some(result)
191    }
192}
193
194impl FusedIterator for Paragraphs<'_> {}