Skip to main content

docspec_core/
skip_empty_blocks.rs

1#![cfg_attr(test, allow(clippy::unwrap_used, clippy::expect_used))]
2
3use crate::{Event, EventSource, Result};
4
5/// A streaming `EventSource` adapter that suppresses empty `Heading`, `BlockQuote`, and
6/// `Paragraph` Start/End pairs from the wrapped source.
7///
8/// The adapter uses a **1-event look-back** (hold-1) algorithm. When it sees a candidate
9/// skippable `Start*` event (`StartHeading`, `StartBlockQuote`, or `StartParagraph`), it
10/// buffers that event and peeks the next event from the inner source. If the next event is
11/// the matching `End*` (i.e., the block is empty), both events are dropped and the adapter
12/// keeps draining iteratively until it finds an event to emit. If the next event is something
13/// else, the buffered `Start*` is emitted immediately and the "something else" is stashed as
14/// `pending` for the next call. Memory is O(1) — at most two `Event` values are held at any
15/// time. Stack is O(1) — the implementation is a single `loop` with no recursion, so an
16/// arbitrarily long run of empty blocks consumes constant stack regardless of input size.
17///
18/// # Skip Set
19///
20/// Exactly three Start/End pairs are matched:
21///
22/// - `Event::StartHeading { .. }` ↔ `Event::EndHeading`
23/// - `Event::StartBlockQuote { .. }` ↔ `Event::EndBlockQuote`
24/// - `Event::StartParagraph { .. }` ↔ `Event::EndParagraph`
25///
26/// No other variants are suppressed. Empty `StartTable`, `StartOrderedListItem`, and all
27/// other containers pass through unchanged.
28///
29/// # Asymmetric API
30///
31/// `docspec-cli` and `docspec-http` apply this filter automatically by default.
32/// Library users opt in by wrapping their `EventSource` explicitly:
33/// `SkipEmptyBlocks::new(my_reader)`.
34///
35/// # Known Limitations
36///
37/// **No cascading**: an outer container that becomes empty *because* its inner contents were
38/// filtered is preserved. Example: `StartBlockQuote → StartParagraph → EndParagraph → EndBlockQuote`
39/// produces `StartBlockQuote → EndBlockQuote` (the inner empty paragraph pair is dropped, but the
40/// outer block quote is preserved). A subsequent pass would be needed to suppress the outer.
41///
42/// **Empty table cells**: an empty table cell containing only `StartParagraph → EndParagraph`
43/// will have the inner pair dropped, leaving the cell with no child events. The cell itself is
44/// preserved (table cells are not in the skip set).
45///
46/// **Fail-fast**: if the inner source returns `Err` while a `Start*` is buffered, the error
47/// propagates immediately and the buffered `Start*` is dropped silently. The stream is considered
48/// terminated; no partial recovery is attempted.
49///
50/// # Example
51///
52/// ```
53/// use docspec_core::{Event, EventSource, Result, SkipEmptyBlocks};
54///
55/// struct Replay {
56///     events: std::vec::IntoIter<Event>,
57/// }
58/// impl Replay {
59///     fn new(events: Vec<Event>) -> Self {
60///         Self { events: events.into_iter() }
61///     }
62/// }
63/// impl EventSource for Replay {
64///     fn next_event(&mut self) -> Result<Option<Event>> {
65///         Ok(self.events.next())
66///     }
67/// }
68///
69/// // An empty heading followed by a heading with text:
70/// let inner = Replay::new(vec![
71///     Event::StartHeading { id: None, level: 1 },
72///     Event::EndHeading,
73///     Event::StartHeading { id: None, level: 2 },
74///     Event::Text { content: String::from("Hello") },
75///     Event::EndHeading,
76/// ]);
77/// let mut filtered = SkipEmptyBlocks::new(inner);
78///
79/// // The empty H1 is dropped; the H2 with text passes through.
80/// assert_eq!(
81///     filtered.next_event().unwrap(),
82///     Some(Event::StartHeading { id: None, level: 2 }),
83/// );
84/// assert_eq!(
85///     filtered.next_event().unwrap(),
86///     Some(Event::Text { content: String::from("Hello") }),
87/// );
88/// assert_eq!(filtered.next_event().unwrap(), Some(Event::EndHeading));
89/// assert_eq!(filtered.next_event().unwrap(), None);
90/// ```
91pub struct SkipEmptyBlocks<S: EventSource> {
92    inner: S,
93    /// Holds a candidate skippable `Start*` (`StartHeading`, `StartBlockQuote`, or
94    /// `StartParagraph`) while we peek the next event to decide drop-or-flush.
95    buffered: Option<Event>,
96    /// Holds a non-skippable event that arrived while we were flushing the previous
97    /// `buffered` `Start*`. The next call to `next_event` returns this before pulling
98    /// from `inner` again.
99    pending: Option<Event>,
100}
101
102impl<S: EventSource> SkipEmptyBlocks<S> {
103    /// Wraps the given source so that empty `Heading`, `BlockQuote`, and `Paragraph`
104    /// Start/End pairs are suppressed in the emitted stream.
105    #[inline]
106    pub fn new(inner: S) -> Self {
107        Self {
108            inner,
109            buffered: None,
110            pending: None,
111        }
112    }
113}
114
115impl<S: EventSource> EventSource for SkipEmptyBlocks<S> {
116    #[inline]
117    fn next_event(&mut self) -> Result<Option<Event>> {
118        loop {
119            // 1. Drain `pending` first (already decided to emit on a previous call).
120            if let Some(pending) = self.pending.take() {
121                return Ok(Some(pending));
122            }
123            // 2. If a buffered `Start*` exists, peek next from inner.
124            //    NOTE: `?` propagation here means an `Err` from inner while a
125            //    `Start*` is buffered surfaces IMMEDIATELY on this same call; the
126            //    buffered `Start*` is dropped. This is intentional and matches the
127            //    project's "Fail Fast" principle (MANIFESTO.md).
128            if let Some(buffered) = self.buffered.take() {
129                match self.inner.next_event()? {
130                    Some(next) if is_matching_end(&buffered, &next) => {
131                        // Empty block detected: drop BOTH and keep draining iteratively.
132                        continue;
133                    }
134                    Some(next) if is_skippable_start(&next) => {
135                        // Emit `buffered` now; the new `Start*` becomes the new buffer.
136                        self.buffered = Some(next);
137                        return Ok(Some(buffered));
138                    }
139                    Some(next) => {
140                        // Emit `buffered` now; stash `next` as pending for the next call.
141                        self.pending = Some(next);
142                        return Ok(Some(buffered));
143                    }
144                    None => {
145                        // Truncated stream: emit buffered `Start*`; subsequent call returns None.
146                        return Ok(Some(buffered));
147                    }
148                }
149            }
150            // 3. No buffer, no pending. Pull from inner.
151            match self.inner.next_event()? {
152                Some(event) if is_skippable_start(&event) => {
153                    self.buffered = Some(event);
154                    // Loop iterates: re-enter the buffered branch above to peek the next event.
155                }
156                other => return Ok(other),
157            }
158        }
159    }
160}
161
162// Returns true if `event` is a candidate start event for empty-block suppression.
163fn is_skippable_start(event: &Event) -> bool {
164    matches!(
165        event,
166        Event::StartHeading { .. } | Event::StartBlockQuote { .. } | Event::StartParagraph { .. }
167    )
168}
169
170// Returns true if `end` is the matching close event for the given `start`.
171fn is_matching_end(start: &Event, end: &Event) -> bool {
172    matches!(
173        (start, end),
174        (Event::StartHeading { .. }, Event::EndHeading)
175            | (Event::StartBlockQuote { .. }, Event::EndBlockQuote)
176            | (Event::StartParagraph { .. }, Event::EndParagraph)
177    )
178}
179
180#[cfg(test)]
181mod tests {
182    use super::*;
183    use crate::{Event, EventSource, Result};
184
185    /// Minimal in-memory `EventSource` for testing.
186    struct Replay {
187        events: alloc::vec::IntoIter<Event>,
188        error_at_end: bool,
189    }
190
191    impl Replay {
192        fn new(events: alloc::vec::Vec<Event>) -> Self {
193            Self {
194                events: events.into_iter(),
195                error_at_end: false,
196            }
197        }
198
199        fn with_terminal_error(events: alloc::vec::Vec<Event>) -> Self {
200            Self {
201                events: events.into_iter(),
202                error_at_end: true,
203            }
204        }
205    }
206
207    impl EventSource for Replay {
208        fn next_event(&mut self) -> Result<Option<Event>> {
209            if let Some(e) = self.events.next() {
210                Ok(Some(e))
211            } else if self.error_at_end {
212                self.error_at_end = false;
213                Err(crate::Error::Other {
214                    message: "simulated".into(),
215                })
216            } else {
217                Ok(None)
218            }
219        }
220    }
221
222    fn drain<S: EventSource>(mut src: S) -> alloc::vec::Vec<Event> {
223        let mut out = alloc::vec::Vec::new();
224        while let Some(e) = src.next_event().expect("unexpected error") {
225            out.push(e);
226        }
227        out
228    }
229
230    #[test]
231    fn empty_heading_is_dropped() {
232        let replay = Replay::new(alloc::vec![
233            Event::StartHeading { id: None, level: 1 },
234            Event::EndHeading,
235        ]);
236        assert_eq!(drain(SkipEmptyBlocks::new(replay)), alloc::vec![]);
237    }
238
239    #[test]
240    fn empty_block_quote_is_dropped() {
241        let replay = Replay::new(alloc::vec![
242            Event::StartBlockQuote { id: None },
243            Event::EndBlockQuote,
244        ]);
245        assert_eq!(drain(SkipEmptyBlocks::new(replay)), alloc::vec![]);
246    }
247
248    #[test]
249    fn empty_paragraph_is_dropped() {
250        let replay = Replay::new(alloc::vec![
251            Event::StartParagraph {
252                alignment: None,
253                id: None
254            },
255            Event::EndParagraph,
256        ]);
257        assert_eq!(drain(SkipEmptyBlocks::new(replay)), alloc::vec![]);
258    }
259
260    #[test]
261    fn non_empty_heading_is_preserved() {
262        let input = alloc::vec![
263            Event::StartHeading { id: None, level: 2 },
264            Event::Text {
265                content: "h".into()
266            },
267            Event::EndHeading,
268        ];
269        let replay = Replay::new(input.clone());
270        assert_eq!(drain(SkipEmptyBlocks::new(replay)), input);
271    }
272
273    #[test]
274    fn non_empty_paragraph_is_preserved() {
275        let input = alloc::vec![
276            Event::StartParagraph {
277                alignment: None,
278                id: None
279            },
280            Event::Text {
281                content: "p".into()
282            },
283            Event::EndParagraph,
284        ];
285        let replay = Replay::new(input.clone());
286        assert_eq!(drain(SkipEmptyBlocks::new(replay)), input);
287    }
288
289    #[test]
290    fn non_empty_block_quote_is_preserved() {
291        let input = alloc::vec![
292            Event::StartBlockQuote { id: None },
293            Event::StartParagraph {
294                alignment: None,
295                id: None
296            },
297            Event::Text {
298                content: "q".into()
299            },
300            Event::EndParagraph,
301            Event::EndBlockQuote,
302        ];
303        let replay = Replay::new(input.clone());
304        assert_eq!(drain(SkipEmptyBlocks::new(replay)), input);
305    }
306
307    #[test]
308    fn consecutive_empty_headings_all_dropped() {
309        let replay = Replay::new(alloc::vec![
310            Event::StartHeading { id: None, level: 1 },
311            Event::EndHeading,
312            Event::StartHeading { id: None, level: 2 },
313            Event::EndHeading,
314            Event::StartHeading { id: None, level: 3 },
315            Event::EndHeading,
316        ]);
317        assert_eq!(drain(SkipEmptyBlocks::new(replay)), alloc::vec![]);
318    }
319
320    #[test]
321    fn empty_then_nonempty_heading() {
322        let replay = Replay::new(alloc::vec![
323            Event::StartHeading { id: None, level: 1 },
324            Event::EndHeading,
325            Event::StartHeading { id: None, level: 2 },
326            Event::Text {
327                content: "x".into()
328            },
329            Event::EndHeading,
330        ]);
331        assert_eq!(
332            drain(SkipEmptyBlocks::new(replay)),
333            alloc::vec![
334                Event::StartHeading { id: None, level: 2 },
335                Event::Text {
336                    content: "x".into()
337                },
338                Event::EndHeading,
339            ]
340        );
341    }
342
343    #[test]
344    fn empty_heading_with_id_is_still_dropped() {
345        // Proves the `{ .. }` wildcard in is_skippable_start ignores all fields.
346        let replay = Replay::new(alloc::vec![
347            Event::StartHeading {
348                id: Some("anchor".into()),
349                level: 1
350            },
351            Event::EndHeading,
352        ]);
353        assert_eq!(drain(SkipEmptyBlocks::new(replay)), alloc::vec![]);
354    }
355
356    #[test]
357    fn empty_paragraph_with_alignment_is_still_dropped() {
358        // Proves alignment field is ignored by the wildcard pattern.
359        let replay = Replay::new(alloc::vec![
360            Event::StartParagraph {
361                alignment: Some(crate::TextAlignment::Center),
362                id: None
363            },
364            Event::EndParagraph,
365        ]);
366        assert_eq!(drain(SkipEmptyBlocks::new(replay)), alloc::vec![]);
367    }
368
369    #[test]
370    fn nested_empty_blockquote_containing_empty_paragraph_preserves_outer() {
371        // Documents the no-cascading limitation: the inner empty paragraph is dropped,
372        // but the outer block quote (now empty) is preserved. Lookback-1 is the contract.
373        let replay = Replay::new(alloc::vec![
374            Event::StartBlockQuote { id: None },
375            Event::StartParagraph {
376                alignment: None,
377                id: None
378            },
379            Event::EndParagraph,
380            Event::EndBlockQuote,
381        ]);
382        assert_eq!(
383            drain(SkipEmptyBlocks::new(replay)),
384            alloc::vec![Event::StartBlockQuote { id: None }, Event::EndBlockQuote,]
385        );
386    }
387
388    #[test]
389    fn non_skippable_kinds_pass_through_unchanged() {
390        let input = alloc::vec![Event::StartTable { id: None }, Event::EndTable,];
391        let replay = Replay::new(input.clone());
392        assert_eq!(drain(SkipEmptyBlocks::new(replay)), input);
393    }
394
395    #[test]
396    fn empty_document_passes_through() {
397        // StartDocument and EndDocument are NOT in the skip set.
398        let input = alloc::vec![
399            Event::StartDocument {
400                id: None,
401                language: None,
402                metadata: None
403            },
404            Event::EndDocument,
405        ];
406        let replay = Replay::new(input.clone());
407        assert_eq!(drain(SkipEmptyBlocks::new(replay)), input);
408    }
409
410    #[test]
411    fn error_from_inner_propagates_when_no_buffer() {
412        // When there is no buffered Start*, an Err from the inner source propagates directly.
413        let replay = Replay::with_terminal_error(alloc::vec![]);
414        let mut filter = SkipEmptyBlocks::new(replay);
415        assert!(filter.next_event().is_err());
416    }
417
418    #[test]
419    fn error_while_start_buffered_surfaces_immediately() {
420        // Fail-fast contract (MANIFESTO.md): when the inner source returns Err
421        // while a Start* is buffered, the error propagates immediately and the
422        // buffered Start* is dropped silently. The stream is considered terminated.
423        let replay =
424            Replay::with_terminal_error(alloc::vec![Event::StartHeading { id: None, level: 1 },]);
425        let mut filter = SkipEmptyBlocks::new(replay);
426        // First call: buffers StartHeading, calls inner (gets Err), propagates Err.
427        assert!(filter.next_event().is_err());
428        // Second call: no buffer, no pending, inner also returns Ok(None).
429        assert_eq!(filter.next_event().unwrap(), None);
430    }
431
432    #[test]
433    fn send_sync_compile_assertion() {
434        fn assert_send_sync<T: Send + Sync>() {}
435        assert_send_sync::<SkipEmptyBlocks<Replay>>();
436    }
437
438    #[test]
439    fn many_consecutive_empty_blocks_do_not_blow_stack() {
440        // Stack-safety regression: the previous recursive implementation grew the call
441        // stack by ~2 frames per empty block, so a long run could overflow Rust's default
442        // 8 MiB main-thread / 2 MiB test-thread stack. The iterative `loop` form must
443        // drain an arbitrarily long run in O(1) stack. 100_000 empties is well above the
444        // overflow threshold of the old code and still completes in milliseconds.
445        const N: usize = 100_000;
446        let mut events = alloc::vec::Vec::with_capacity(N * 2);
447        for _ in 0..N {
448            events.push(Event::StartHeading { id: None, level: 1 });
449            events.push(Event::EndHeading);
450        }
451        let replay = Replay::new(events);
452        assert_eq!(drain(SkipEmptyBlocks::new(replay)), alloc::vec![]);
453    }
454}