split_paragraphs/lib.rs
1//! A crate that provides paragraph iteration for strings.
2//!
3//! This crate extends [`str`] with the ability to iterate over paragraphs via the [`SplitParagraphs`] trait.
4//! A paragraph is defined as one or more consecutive non-empty lines, separated by one or more blank lines.
5//!
6//! # Example
7//! ```
8//! use split_paragraphs::SplitParagraphs;
9//!
10//! let text = "foo\r\nbar\n\nbaz\r";
11//! let mut paragraphs = text.paragraphs();
12//!
13//! assert_eq!(paragraphs.next(), Some("foo\r\nbar"));
14//! assert_eq!(paragraphs.next(), Some("baz\r"));
15//! assert_eq!(paragraphs.next(), None);
16//! ```
17
18#![no_std]
19#![deny(clippy::all, clippy::pedantic, clippy::nursery, clippy::cargo)]
20#![allow(clippy::cast_sign_loss)]
21
22use core::iter::FusedIterator;
23use core::slice;
24use core::str::{from_utf8_unchecked, Lines};
25
26/// Trait extending [`str`] with [`paragraphs`].
27///
28/// [`paragraphs`]: SplitParagraphs::paragraphs
29pub trait SplitParagraphs {
30 /// Returns an iterator over paragraphs of a string, as string slices.
31 ///
32 /// A paragraph consists of one or more lines containing non-whitespace characters,
33 /// separated by empty lines or lines containing only whitespace.
34 ///
35 /// Paragraphs always contain at least one line with at least one non-whitespace
36 /// character.
37 ///
38 /// Paragraphs never contain empty lines or whitespace-only lines.
39 ///
40 /// Paragraphs support line endings that are either newlines (`\n`) or
41 /// carriage return followed by line feed (`\r\n`).
42 ///
43 /// Line terminators between paragraphs are not included in the returned slices.
44 ///
45 /// Line terminators within paragraphs are preserved in their original form.
46 ///
47 /// Handling of line endings matches [`lines`]. See its documentation for more details.
48 ///
49 /// # Examples
50 ///
51 /// Basic usage:
52 ///
53 /// ```
54 /// # use split_paragraphs::SplitParagraphs;
55 /// let text = "foo\r\nbar\n\nbaz\r";
56 /// let mut paragraphs = text.paragraphs();
57 ///
58 /// assert_eq!(Some("foo\r\nbar"), paragraphs.next());
59 /// // Trailing carriage return is included in the last paragraph
60 /// assert_eq!(Some("baz\r"), paragraphs.next());
61 ///
62 /// assert_eq!(None, paragraphs.next());
63 /// ```
64 ///
65 /// The final paragraph does not require any ending:
66 ///
67 /// ```
68 /// # use split_paragraphs::SplitParagraphs;
69 /// let text = "\n\n\nfoo\nbar\n\r\nbaz";
70 /// let mut paragraphs = text.paragraphs();
71 ///
72 /// assert_eq!(Some("foo\nbar"), paragraphs.next());
73 /// assert_eq!(Some("baz"), paragraphs.next());
74 ///
75 /// assert_eq!(None, paragraphs.next());
76 /// ```
77 ///
78 /// [`paragraphs`]: SplitParagraphs::paragraphs
79 /// [`lines`]: str::lines
80 fn paragraphs(&self) -> Paragraphs;
81}
82
83/// An iterator over the paragraphs of a string, as string slices.
84///
85/// This struct is created with the [`paragraphs`] method on [`str`] via
86/// the [`SplitParagraphs`] trait.
87/// See its documentation for more.
88///
89/// [`paragraphs`]: SplitParagraphs::paragraphs
90#[must_use = "iterators are lazy and do nothing unless consumed"]
91#[derive(Clone, Debug)]
92pub struct Paragraphs<'a> {
93 lines: Lines<'a>,
94}
95
96impl SplitParagraphs for str {
97 #[inline]
98 fn paragraphs(&self) -> Paragraphs {
99 Paragraphs {
100 lines: self.lines(),
101 }
102 }
103}
104
105impl<'a> Iterator for Paragraphs<'a> {
106 type Item = &'a str;
107
108 #[inline]
109 fn size_hint(&self) -> (usize, Option<usize>) {
110 (0, self.lines.size_hint().1.map(|n| (n + 1) / 2))
111 }
112
113 #[inline]
114 fn next(&mut self) -> Option<Self::Item> {
115 let first_line = self.lines.next()?;
116
117 let first_non_empty_line = if first_line.trim().is_empty() {
118 loop {
119 let line = self.lines.next()?;
120 if !line.trim().is_empty() {
121 break line;
122 }
123 }
124 } else {
125 first_line
126 };
127
128 let mut last_non_empty_line = first_non_empty_line;
129 loop {
130 let Some(line) = self.lines.next() else {
131 break;
132 };
133 if line.trim().is_empty() {
134 break;
135 }
136 last_non_empty_line = line;
137 }
138
139 let result: &str = unsafe {
140 from_utf8_unchecked(slice::from_raw_parts(
141 first_non_empty_line.as_ptr(),
142 (last_non_empty_line
143 .as_ptr()
144 .offset_from(first_non_empty_line.as_ptr()) as usize)
145 .unchecked_add(last_non_empty_line.len()),
146 ))
147 };
148
149 Some(result)
150 }
151}
152
153impl DoubleEndedIterator for Paragraphs<'_> {
154 #[inline]
155 fn next_back(&mut self) -> Option<Self::Item> {
156 let last_line = self.lines.next_back()?;
157
158 let last_non_empty_line = if last_line.trim().is_empty() {
159 loop {
160 let line = self.lines.next_back()?;
161 if !line.trim().is_empty() {
162 break line;
163 }
164 }
165 } else {
166 last_line
167 };
168
169 let mut first_non_empty_line = last_non_empty_line;
170 loop {
171 let Some(line) = self.lines.next_back() else {
172 break;
173 };
174 if line.trim().is_empty() {
175 break;
176 }
177 first_non_empty_line = line;
178 }
179
180 let result: &str = unsafe {
181 from_utf8_unchecked(slice::from_raw_parts(
182 first_non_empty_line.as_ptr(),
183 (last_non_empty_line
184 .as_ptr()
185 .offset_from(first_non_empty_line.as_ptr()) as usize)
186 .unchecked_add(last_non_empty_line.len()),
187 ))
188 };
189
190 Some(result)
191 }
192}
193
194impl FusedIterator for Paragraphs<'_> {}