docspec_core/skip_empty_blocks.rs
1#![cfg_attr(test, allow(clippy::unwrap_used, clippy::expect_used))]
2
3use crate::{Event, EventSource, Result};
4
5/// A streaming `EventSource` adapter that suppresses empty `Heading`, `BlockQuote`, and
6/// `Paragraph` Start/End pairs from the wrapped source.
7///
8/// The adapter uses a **1-event look-back** (hold-1) algorithm. When it sees a candidate
9/// skippable `Start*` event (`StartHeading`, `StartBlockQuote`, or `StartParagraph`), it
10/// buffers that event and peeks the next event from the inner source. If the next event is
11/// the matching `End*` (i.e., the block is empty), both events are dropped and the adapter
12/// keeps draining iteratively until it finds an event to emit. If the next event is something
13/// else, the buffered `Start*` is emitted immediately and the "something else" is stashed as
14/// `pending` for the next call. Memory is O(1) — at most two `Event` values are held at any
15/// time. Stack is O(1) — the implementation is a single `loop` with no recursion, so an
16/// arbitrarily long run of empty blocks consumes constant stack regardless of input size.
17///
18/// # Skip Set
19///
20/// Exactly three Start/End pairs are matched:
21///
22/// - `Event::StartHeading { .. }` ↔ `Event::EndHeading`
23/// - `Event::StartBlockQuote { .. }` ↔ `Event::EndBlockQuote`
24/// - `Event::StartParagraph { .. }` ↔ `Event::EndParagraph`
25///
26/// No other variants are suppressed. Empty `StartTable`, `StartOrderedListItem`, and all
27/// other containers pass through unchanged.
28///
29/// # Asymmetric API
30///
31/// `docspec-cli` and `docspec-http` apply this filter automatically by default.
32/// Library users opt in by wrapping their `EventSource` explicitly:
33/// `SkipEmptyBlocks::new(my_reader)`.
34///
35/// # Known Limitations
36///
37/// **No cascading**: an outer container that becomes empty *because* its inner contents were
38/// filtered is preserved. Example: `StartBlockQuote → StartParagraph → EndParagraph → EndBlockQuote`
39/// produces `StartBlockQuote → EndBlockQuote` (the inner empty paragraph pair is dropped, but the
40/// outer block quote is preserved). A subsequent pass would be needed to suppress the outer.
41///
42/// **Empty table cells**: an empty table cell containing only `StartParagraph → EndParagraph`
43/// will have the inner pair dropped, leaving the cell with no child events. The cell itself is
44/// preserved (table cells are not in the skip set).
45///
46/// **Fail-fast**: if the inner source returns `Err` while a `Start*` is buffered, the error
47/// propagates immediately and the buffered `Start*` is dropped silently. The stream is considered
48/// terminated; no partial recovery is attempted.
49///
50/// # Example
51///
52/// ```
53/// use docspec_core::{Event, EventSource, Result, SkipEmptyBlocks};
54///
55/// struct Replay {
56/// events: std::vec::IntoIter<Event>,
57/// }
58/// impl Replay {
59/// fn new(events: Vec<Event>) -> Self {
60/// Self { events: events.into_iter() }
61/// }
62/// }
63/// impl EventSource for Replay {
64/// fn next_event(&mut self) -> Result<Option<Event>> {
65/// Ok(self.events.next())
66/// }
67/// }
68///
69/// // An empty heading followed by a heading with text:
70/// let inner = Replay::new(vec![
71/// Event::StartHeading { id: None, level: 1 },
72/// Event::EndHeading,
73/// Event::StartHeading { id: None, level: 2 },
74/// Event::Text { content: String::from("Hello") },
75/// Event::EndHeading,
76/// ]);
77/// let mut filtered = SkipEmptyBlocks::new(inner);
78///
79/// // The empty H1 is dropped; the H2 with text passes through.
80/// assert_eq!(
81/// filtered.next_event().unwrap(),
82/// Some(Event::StartHeading { id: None, level: 2 }),
83/// );
84/// assert_eq!(
85/// filtered.next_event().unwrap(),
86/// Some(Event::Text { content: String::from("Hello") }),
87/// );
88/// assert_eq!(filtered.next_event().unwrap(), Some(Event::EndHeading));
89/// assert_eq!(filtered.next_event().unwrap(), None);
90/// ```
91pub struct SkipEmptyBlocks<S: EventSource> {
92 inner: S,
93 /// Holds a candidate skippable `Start*` (`StartHeading`, `StartBlockQuote`, or
94 /// `StartParagraph`) while we peek the next event to decide drop-or-flush.
95 buffered: Option<Event>,
96 /// Holds a non-skippable event that arrived while we were flushing the previous
97 /// `buffered` `Start*`. The next call to `next_event` returns this before pulling
98 /// from `inner` again.
99 pending: Option<Event>,
100}
101
102impl<S: EventSource> SkipEmptyBlocks<S> {
103 /// Wraps the given source so that empty `Heading`, `BlockQuote`, and `Paragraph`
104 /// Start/End pairs are suppressed in the emitted stream.
105 #[inline]
106 pub fn new(inner: S) -> Self {
107 Self {
108 inner,
109 buffered: None,
110 pending: None,
111 }
112 }
113}
114
115impl<S: EventSource> EventSource for SkipEmptyBlocks<S> {
116 #[inline]
117 fn next_event(&mut self) -> Result<Option<Event>> {
118 loop {
119 // 1. Drain `pending` first (already decided to emit on a previous call).
120 if let Some(pending) = self.pending.take() {
121 return Ok(Some(pending));
122 }
123 // 2. If a buffered `Start*` exists, peek next from inner.
124 // NOTE: `?` propagation here means an `Err` from inner while a
125 // `Start*` is buffered surfaces IMMEDIATELY on this same call; the
126 // buffered `Start*` is dropped. This is intentional and matches the
127 // project's "Fail Fast" principle (MANIFESTO.md).
128 if let Some(buffered) = self.buffered.take() {
129 match self.inner.next_event()? {
130 Some(next) if is_matching_end(&buffered, &next) => {
131 // Empty block detected: drop BOTH and keep draining iteratively.
132 continue;
133 }
134 Some(next) if is_skippable_start(&next) => {
135 // Emit `buffered` now; the new `Start*` becomes the new buffer.
136 self.buffered = Some(next);
137 return Ok(Some(buffered));
138 }
139 Some(next) => {
140 // Emit `buffered` now; stash `next` as pending for the next call.
141 self.pending = Some(next);
142 return Ok(Some(buffered));
143 }
144 None => {
145 // Truncated stream: emit buffered `Start*`; subsequent call returns None.
146 return Ok(Some(buffered));
147 }
148 }
149 }
150 // 3. No buffer, no pending. Pull from inner.
151 match self.inner.next_event()? {
152 Some(event) if is_skippable_start(&event) => {
153 self.buffered = Some(event);
154 // Loop iterates: re-enter the buffered branch above to peek the next event.
155 }
156 other => return Ok(other),
157 }
158 }
159 }
160}
161
162// Returns true if `event` is a candidate start event for empty-block suppression.
163fn is_skippable_start(event: &Event) -> bool {
164 matches!(
165 event,
166 Event::StartHeading { .. } | Event::StartBlockQuote { .. } | Event::StartParagraph { .. }
167 )
168}
169
170// Returns true if `end` is the matching close event for the given `start`.
171fn is_matching_end(start: &Event, end: &Event) -> bool {
172 matches!(
173 (start, end),
174 (Event::StartHeading { .. }, Event::EndHeading)
175 | (Event::StartBlockQuote { .. }, Event::EndBlockQuote)
176 | (Event::StartParagraph { .. }, Event::EndParagraph)
177 )
178}
179
180#[cfg(test)]
181mod tests {
182 use super::*;
183 use crate::{Event, EventSource, Result};
184
185 /// Minimal in-memory `EventSource` for testing.
186 struct Replay {
187 events: alloc::vec::IntoIter<Event>,
188 error_at_end: bool,
189 }
190
191 impl Replay {
192 fn new(events: alloc::vec::Vec<Event>) -> Self {
193 Self {
194 events: events.into_iter(),
195 error_at_end: false,
196 }
197 }
198
199 fn with_terminal_error(events: alloc::vec::Vec<Event>) -> Self {
200 Self {
201 events: events.into_iter(),
202 error_at_end: true,
203 }
204 }
205 }
206
207 impl EventSource for Replay {
208 fn next_event(&mut self) -> Result<Option<Event>> {
209 if let Some(e) = self.events.next() {
210 Ok(Some(e))
211 } else if self.error_at_end {
212 self.error_at_end = false;
213 Err(crate::Error::Other {
214 message: "simulated".into(),
215 })
216 } else {
217 Ok(None)
218 }
219 }
220 }
221
222 fn drain<S: EventSource>(mut src: S) -> alloc::vec::Vec<Event> {
223 let mut out = alloc::vec::Vec::new();
224 while let Some(e) = src.next_event().expect("unexpected error") {
225 out.push(e);
226 }
227 out
228 }
229
230 #[test]
231 fn empty_heading_is_dropped() {
232 let replay = Replay::new(alloc::vec![
233 Event::StartHeading { id: None, level: 1 },
234 Event::EndHeading,
235 ]);
236 assert_eq!(drain(SkipEmptyBlocks::new(replay)), alloc::vec![]);
237 }
238
239 #[test]
240 fn empty_block_quote_is_dropped() {
241 let replay = Replay::new(alloc::vec![
242 Event::StartBlockQuote { id: None },
243 Event::EndBlockQuote,
244 ]);
245 assert_eq!(drain(SkipEmptyBlocks::new(replay)), alloc::vec![]);
246 }
247
248 #[test]
249 fn empty_paragraph_is_dropped() {
250 let replay = Replay::new(alloc::vec![
251 Event::StartParagraph {
252 alignment: None,
253 id: None
254 },
255 Event::EndParagraph,
256 ]);
257 assert_eq!(drain(SkipEmptyBlocks::new(replay)), alloc::vec![]);
258 }
259
260 #[test]
261 fn non_empty_heading_is_preserved() {
262 let input = alloc::vec![
263 Event::StartHeading { id: None, level: 2 },
264 Event::Text {
265 content: "h".into()
266 },
267 Event::EndHeading,
268 ];
269 let replay = Replay::new(input.clone());
270 assert_eq!(drain(SkipEmptyBlocks::new(replay)), input);
271 }
272
273 #[test]
274 fn non_empty_paragraph_is_preserved() {
275 let input = alloc::vec![
276 Event::StartParagraph {
277 alignment: None,
278 id: None
279 },
280 Event::Text {
281 content: "p".into()
282 },
283 Event::EndParagraph,
284 ];
285 let replay = Replay::new(input.clone());
286 assert_eq!(drain(SkipEmptyBlocks::new(replay)), input);
287 }
288
289 #[test]
290 fn non_empty_block_quote_is_preserved() {
291 let input = alloc::vec![
292 Event::StartBlockQuote { id: None },
293 Event::StartParagraph {
294 alignment: None,
295 id: None
296 },
297 Event::Text {
298 content: "q".into()
299 },
300 Event::EndParagraph,
301 Event::EndBlockQuote,
302 ];
303 let replay = Replay::new(input.clone());
304 assert_eq!(drain(SkipEmptyBlocks::new(replay)), input);
305 }
306
307 #[test]
308 fn consecutive_empty_headings_all_dropped() {
309 let replay = Replay::new(alloc::vec![
310 Event::StartHeading { id: None, level: 1 },
311 Event::EndHeading,
312 Event::StartHeading { id: None, level: 2 },
313 Event::EndHeading,
314 Event::StartHeading { id: None, level: 3 },
315 Event::EndHeading,
316 ]);
317 assert_eq!(drain(SkipEmptyBlocks::new(replay)), alloc::vec![]);
318 }
319
320 #[test]
321 fn empty_then_nonempty_heading() {
322 let replay = Replay::new(alloc::vec![
323 Event::StartHeading { id: None, level: 1 },
324 Event::EndHeading,
325 Event::StartHeading { id: None, level: 2 },
326 Event::Text {
327 content: "x".into()
328 },
329 Event::EndHeading,
330 ]);
331 assert_eq!(
332 drain(SkipEmptyBlocks::new(replay)),
333 alloc::vec![
334 Event::StartHeading { id: None, level: 2 },
335 Event::Text {
336 content: "x".into()
337 },
338 Event::EndHeading,
339 ]
340 );
341 }
342
343 #[test]
344 fn empty_heading_with_id_is_still_dropped() {
345 // Proves the `{ .. }` wildcard in is_skippable_start ignores all fields.
346 let replay = Replay::new(alloc::vec![
347 Event::StartHeading {
348 id: Some("anchor".into()),
349 level: 1
350 },
351 Event::EndHeading,
352 ]);
353 assert_eq!(drain(SkipEmptyBlocks::new(replay)), alloc::vec![]);
354 }
355
356 #[test]
357 fn empty_paragraph_with_alignment_is_still_dropped() {
358 // Proves alignment field is ignored by the wildcard pattern.
359 let replay = Replay::new(alloc::vec![
360 Event::StartParagraph {
361 alignment: Some(crate::TextAlignment::Center),
362 id: None
363 },
364 Event::EndParagraph,
365 ]);
366 assert_eq!(drain(SkipEmptyBlocks::new(replay)), alloc::vec![]);
367 }
368
369 #[test]
370 fn nested_empty_blockquote_containing_empty_paragraph_preserves_outer() {
371 // Documents the no-cascading limitation: the inner empty paragraph is dropped,
372 // but the outer block quote (now empty) is preserved. Lookback-1 is the contract.
373 let replay = Replay::new(alloc::vec![
374 Event::StartBlockQuote { id: None },
375 Event::StartParagraph {
376 alignment: None,
377 id: None
378 },
379 Event::EndParagraph,
380 Event::EndBlockQuote,
381 ]);
382 assert_eq!(
383 drain(SkipEmptyBlocks::new(replay)),
384 alloc::vec![Event::StartBlockQuote { id: None }, Event::EndBlockQuote,]
385 );
386 }
387
388 #[test]
389 fn non_skippable_kinds_pass_through_unchanged() {
390 let input = alloc::vec![Event::StartTable { id: None }, Event::EndTable,];
391 let replay = Replay::new(input.clone());
392 assert_eq!(drain(SkipEmptyBlocks::new(replay)), input);
393 }
394
395 #[test]
396 fn empty_document_passes_through() {
397 // StartDocument and EndDocument are NOT in the skip set.
398 let input = alloc::vec![
399 Event::StartDocument {
400 id: None,
401 language: None,
402 metadata: None
403 },
404 Event::EndDocument,
405 ];
406 let replay = Replay::new(input.clone());
407 assert_eq!(drain(SkipEmptyBlocks::new(replay)), input);
408 }
409
410 #[test]
411 fn error_from_inner_propagates_when_no_buffer() {
412 // When there is no buffered Start*, an Err from the inner source propagates directly.
413 let replay = Replay::with_terminal_error(alloc::vec![]);
414 let mut filter = SkipEmptyBlocks::new(replay);
415 assert!(filter.next_event().is_err());
416 }
417
418 #[test]
419 fn error_while_start_buffered_surfaces_immediately() {
420 // Fail-fast contract (MANIFESTO.md): when the inner source returns Err
421 // while a Start* is buffered, the error propagates immediately and the
422 // buffered Start* is dropped silently. The stream is considered terminated.
423 let replay =
424 Replay::with_terminal_error(alloc::vec![Event::StartHeading { id: None, level: 1 },]);
425 let mut filter = SkipEmptyBlocks::new(replay);
426 // First call: buffers StartHeading, calls inner (gets Err), propagates Err.
427 assert!(filter.next_event().is_err());
428 // Second call: no buffer, no pending, inner also returns Ok(None).
429 assert_eq!(filter.next_event().unwrap(), None);
430 }
431
432 #[test]
433 fn send_sync_compile_assertion() {
434 fn assert_send_sync<T: Send + Sync>() {}
435 assert_send_sync::<SkipEmptyBlocks<Replay>>();
436 }
437
438 #[test]
439 fn many_consecutive_empty_blocks_do_not_blow_stack() {
440 // Stack-safety regression: the previous recursive implementation grew the call
441 // stack by ~2 frames per empty block, so a long run could overflow Rust's default
442 // 8 MiB main-thread / 2 MiB test-thread stack. The iterative `loop` form must
443 // drain an arbitrarily long run in O(1) stack. 100_000 empties is well above the
444 // overflow threshold of the old code and still completes in milliseconds.
445 const N: usize = 100_000;
446 let mut events = alloc::vec::Vec::with_capacity(N * 2);
447 for _ in 0..N {
448 events.push(Event::StartHeading { id: None, level: 1 });
449 events.push(Event::EndHeading);
450 }
451 let replay = Replay::new(events);
452 assert_eq!(drain(SkipEmptyBlocks::new(replay)), alloc::vec![]);
453 }
454}