Skip to main content

rumdl_lib/utils/
blockquote.rs

1//! Blockquote-related utilities for rumdl.
2//!
3//! Provides functions for working with blockquote-prefixed lines, including
4//! calculating effective indentation within blockquote context.
5
6/// Parsed blockquote prefix components.
7///
8/// This parser supports both compact (`>> text`) and spaced (`> > text`) nesting.
9/// It preserves at most one optional space/tab after the final marker in `prefix`;
10/// additional whitespace is left in `content`.
11#[derive(Debug, Clone, Copy, PartialEq, Eq)]
12pub struct ParsedBlockquotePrefix<'a> {
13    /// Leading indentation before the first marker.
14    pub indent: &'a str,
15    /// Prefix up to (and including) the optional single space/tab after final `>`.
16    pub prefix: &'a str,
17    /// Content after `prefix` (may begin with additional spaces/tabs).
18    pub content: &'a str,
19    /// Number of blockquote markers at the start of the line.
20    pub nesting_level: usize,
21    /// All whitespace after the final `>` (including the optional first space/tab).
22    pub spaces_after_marker: &'a str,
23}
24
25#[inline]
26fn is_space_or_tab(byte: u8) -> bool {
27    byte == b' ' || byte == b'\t'
28}
29
30/// Parse a blockquote prefix from a line.
31///
32/// Returns `None` when the line does not start (after leading indentation) with
33/// a blockquote marker.
34#[inline]
35pub fn parse_blockquote_prefix(line: &str) -> Option<ParsedBlockquotePrefix<'_>> {
36    let bytes = line.as_bytes();
37    let mut pos = 0;
38
39    // Leading indentation before the first marker.
40    while pos < bytes.len() && is_space_or_tab(bytes[pos]) {
41        pos += 1;
42    }
43    let indent_end = pos;
44
45    if pos >= bytes.len() || bytes[pos] != b'>' {
46        return None;
47    }
48
49    let mut nesting_level = 0;
50    let mut prefix_end = pos;
51    let mut spaces_after_marker_start = pos;
52    let mut spaces_after_marker_end = pos;
53
54    loop {
55        if pos >= bytes.len() || bytes[pos] != b'>' {
56            break;
57        }
58
59        nesting_level += 1;
60        pos += 1; // past '>'
61        let marker_end = pos;
62
63        // Include at most one optional space/tab in the preserved prefix.
64        if pos < bytes.len() && is_space_or_tab(bytes[pos]) {
65            pos += 1;
66        }
67        let content_start_candidate = pos;
68
69        // Consume any additional whitespace.
70        while pos < bytes.len() && is_space_or_tab(bytes[pos]) {
71            pos += 1;
72        }
73
74        // If another marker follows, all consumed whitespace belongs to nesting.
75        if pos < bytes.len() && bytes[pos] == b'>' {
76            continue;
77        }
78
79        // Final marker.
80        prefix_end = content_start_candidate;
81        spaces_after_marker_start = marker_end;
82        spaces_after_marker_end = pos;
83        break;
84    }
85
86    Some(ParsedBlockquotePrefix {
87        indent: &line[..indent_end],
88        prefix: &line[..prefix_end],
89        content: &line[prefix_end..],
90        nesting_level,
91        spaces_after_marker: &line[spaces_after_marker_start..spaces_after_marker_end],
92    })
93}
94
95/// Calculate the effective indentation of a line within a blockquote context.
96///
97/// For lines inside blockquotes, the "raw" leading whitespace (before `>`) is always 0,
98/// but the semantically meaningful indent is the whitespace *after* the blockquote markers.
99///
100/// # Arguments
101///
102/// * `line_content` - The full line content including any blockquote markers
103/// * `expected_bq_level` - The blockquote nesting level to match (0 for no blockquote)
104/// * `fallback_indent` - The indent to return if blockquote levels don't match or if
105///   `expected_bq_level` is 0
106///
107/// # Returns
108///
109/// The effective indentation:
110/// - If `expected_bq_level` is 0: returns `fallback_indent`
111/// - If line's blockquote level matches `expected_bq_level`: returns indent after stripping markers
112/// - If blockquote levels don't match: returns `fallback_indent`
113///
114/// # Examples
115///
116/// ```
117/// use rumdl_lib::utils::blockquote::effective_indent_in_blockquote;
118///
119/// // Regular line (no blockquote context)
120/// assert_eq!(effective_indent_in_blockquote("   text", 0, 3), 3);
121///
122/// // Blockquote with 2 spaces after marker (first space consumed as marker syntax)
123/// assert_eq!(effective_indent_in_blockquote(">  text", 1, 0), 1);
124///
125/// // Nested blockquote with 3 spaces after last marker (first space consumed as marker syntax)
126/// assert_eq!(effective_indent_in_blockquote("> >   text", 2, 0), 2);
127///
128/// // Mismatched blockquote level - returns fallback
129/// assert_eq!(effective_indent_in_blockquote("> text", 2, 5), 5);
130/// ```
131pub fn effective_indent_in_blockquote(line_content: &str, expected_bq_level: usize, fallback_indent: usize) -> usize {
132    if expected_bq_level == 0 {
133        return fallback_indent;
134    }
135
136    // Count blockquote markers at the start of the line
137    // Markers can be separated by whitespace: "> > text" or ">> text"
138    let line_bq_level = line_content
139        .chars()
140        .take_while(|c| *c == '>' || c.is_whitespace())
141        .filter(|&c| c == '>')
142        .count();
143
144    if line_bq_level != expected_bq_level {
145        return fallback_indent;
146    }
147
148    // Strip blockquote markers and compute indent within the blockquote context
149    let mut pos = 0;
150    let mut found_markers = 0;
151    for c in line_content.chars() {
152        pos += c.len_utf8();
153        if c == '>' {
154            found_markers += 1;
155            if found_markers == line_bq_level {
156                // Skip optional space after final >
157                if line_content.get(pos..pos + 1) == Some(" ") {
158                    pos += 1;
159                }
160                break;
161            }
162        }
163    }
164
165    let after_bq = &line_content[pos..];
166    after_bq.len() - after_bq.trim_start().len()
167}
168
169/// Count the number of blockquote markers (`>`) at the start of a line.
170///
171/// Handles both compact (`>>text`) and spaced (`> > text`) blockquote syntax.
172fn count_blockquote_level(line_content: &str) -> usize {
173    line_content
174        .chars()
175        .take_while(|c| *c == '>' || c.is_whitespace())
176        .filter(|&c| c == '>')
177        .count()
178}
179
180/// Extract the content after blockquote markers.
181///
182/// Returns the portion of the line after all blockquote markers and the
183/// optional space following the last marker.
184///
185/// # Examples
186///
187/// ```
188/// use rumdl_lib::utils::blockquote::content_after_blockquote;
189///
190/// assert_eq!(content_after_blockquote("> text", 1), "text");
191/// assert_eq!(content_after_blockquote(">  indented", 1), " indented");
192/// assert_eq!(content_after_blockquote("> > nested", 2), "nested");
193/// assert_eq!(content_after_blockquote("no quote", 0), "no quote");
194/// ```
195pub fn content_after_blockquote(line_content: &str, expected_bq_level: usize) -> &str {
196    if expected_bq_level == 0 {
197        return line_content;
198    }
199
200    // First, verify the line has the expected blockquote level
201    let actual_level = count_blockquote_level(line_content);
202    if actual_level != expected_bq_level {
203        return line_content;
204    }
205
206    let mut pos = 0;
207    let mut found_markers = 0;
208    for c in line_content.chars() {
209        pos += c.len_utf8();
210        if c == '>' {
211            found_markers += 1;
212            if found_markers == expected_bq_level {
213                // Skip optional space after final >
214                if line_content.get(pos..pos + 1) == Some(" ") {
215                    pos += 1;
216                }
217                break;
218            }
219        }
220    }
221
222    &line_content[pos..]
223}
224
225/// Strip all blockquote markers from a line, returning the content after them.
226///
227/// Unlike [`content_after_blockquote`], this does not require knowing the
228/// expected nesting level — it strips all `>` markers and their trailing spaces.
229/// Returns the original line unchanged if it has no blockquote markers.
230///
231/// # Examples
232///
233/// ```
234/// use rumdl_lib::utils::blockquote::strip_blockquote_prefix;
235///
236/// assert_eq!(strip_blockquote_prefix("> text"), "text");
237/// assert_eq!(strip_blockquote_prefix("> > nested"), "nested");
238/// assert_eq!(strip_blockquote_prefix(">>compact"), "compact");
239/// assert_eq!(strip_blockquote_prefix("  > indented"), "indented");
240/// assert_eq!(strip_blockquote_prefix("no quote"), "no quote");
241/// ```
242pub fn strip_blockquote_prefix(line: &str) -> &str {
243    match parse_blockquote_prefix(line) {
244        Some(parsed) => parsed.content,
245        None => line,
246    }
247}
248
249#[cfg(test)]
250mod tests {
251    use super::*;
252
253    // ==========================================================================
254    // parse_blockquote_prefix tests
255    // ==========================================================================
256
257    #[test]
258    fn test_parse_blockquote_prefix_compact_nested() {
259        let parsed = parse_blockquote_prefix(">> text").expect("should parse compact nested blockquote");
260        assert_eq!(parsed.indent, "");
261        assert_eq!(parsed.prefix, ">> ");
262        assert_eq!(parsed.content, "text");
263        assert_eq!(parsed.nesting_level, 2);
264        assert_eq!(parsed.spaces_after_marker, " ");
265    }
266
267    #[test]
268    fn test_parse_blockquote_prefix_spaced_nested() {
269        let parsed = parse_blockquote_prefix("> >  text").expect("should parse spaced nested blockquote");
270        assert_eq!(parsed.indent, "");
271        assert_eq!(parsed.prefix, "> > ");
272        assert_eq!(parsed.content, " text");
273        assert_eq!(parsed.nesting_level, 2);
274        assert_eq!(parsed.spaces_after_marker, "  ");
275    }
276
277    #[test]
278    fn test_parse_blockquote_prefix_with_indent() {
279        let parsed = parse_blockquote_prefix("   > quote").expect("should parse indented blockquote");
280        assert_eq!(parsed.indent, "   ");
281        assert_eq!(parsed.prefix, "   > ");
282        assert_eq!(parsed.content, "quote");
283        assert_eq!(parsed.nesting_level, 1);
284        assert_eq!(parsed.spaces_after_marker, " ");
285    }
286
287    #[test]
288    fn test_parse_blockquote_prefix_non_blockquote() {
289        assert!(parse_blockquote_prefix("plain text").is_none());
290        assert!(parse_blockquote_prefix("  plain text").is_none());
291    }
292
293    // ==========================================================================
294    // effective_indent_in_blockquote tests
295    // ==========================================================================
296
297    #[test]
298    fn test_effective_indent_no_blockquote_context() {
299        // When expected_bq_level is 0, always return fallback
300        assert_eq!(effective_indent_in_blockquote("text", 0, 0), 0);
301        assert_eq!(effective_indent_in_blockquote("   text", 0, 3), 3);
302        assert_eq!(effective_indent_in_blockquote("> text", 0, 5), 5);
303    }
304
305    #[test]
306    fn test_effective_indent_single_level_blockquote() {
307        // Single > with various indents after
308        assert_eq!(effective_indent_in_blockquote("> text", 1, 99), 0);
309        assert_eq!(effective_indent_in_blockquote(">  text", 1, 99), 1);
310        assert_eq!(effective_indent_in_blockquote(">   text", 1, 99), 2);
311        assert_eq!(effective_indent_in_blockquote(">    text", 1, 99), 3);
312    }
313
314    #[test]
315    fn test_effective_indent_no_space_after_marker() {
316        // >text (no space after >) - should have 0 effective indent
317        assert_eq!(effective_indent_in_blockquote(">text", 1, 99), 0);
318        assert_eq!(effective_indent_in_blockquote(">>text", 2, 99), 0);
319    }
320
321    #[test]
322    fn test_effective_indent_nested_blockquote_compact() {
323        // Compact nested: >>text, >> text, >>  text
324        assert_eq!(effective_indent_in_blockquote(">> text", 2, 99), 0);
325        assert_eq!(effective_indent_in_blockquote(">>  text", 2, 99), 1);
326        assert_eq!(effective_indent_in_blockquote(">>   text", 2, 99), 2);
327    }
328
329    #[test]
330    fn test_effective_indent_nested_blockquote_spaced() {
331        // Spaced nested: > > text, > >  text
332        assert_eq!(effective_indent_in_blockquote("> > text", 2, 99), 0);
333        assert_eq!(effective_indent_in_blockquote("> >  text", 2, 99), 1);
334        assert_eq!(effective_indent_in_blockquote("> >   text", 2, 99), 2);
335    }
336
337    #[test]
338    fn test_effective_indent_mismatched_level() {
339        // Line has different blockquote level than expected - return fallback
340        assert_eq!(effective_indent_in_blockquote("> text", 2, 42), 42);
341        assert_eq!(effective_indent_in_blockquote(">> text", 1, 42), 42);
342        assert_eq!(effective_indent_in_blockquote("text", 1, 42), 42);
343    }
344
345    #[test]
346    fn test_effective_indent_empty_blockquote() {
347        // Empty blockquote lines
348        assert_eq!(effective_indent_in_blockquote(">", 1, 99), 0);
349        assert_eq!(effective_indent_in_blockquote("> ", 1, 99), 0);
350        assert_eq!(effective_indent_in_blockquote(">  ", 1, 99), 1);
351    }
352
353    #[test]
354    fn test_effective_indent_issue_268_case() {
355        // The exact pattern from issue #268:
356        // ">   text" where we expect 2 spaces of indent (list continuation)
357        assert_eq!(effective_indent_in_blockquote(">   Opening the app", 1, 0), 2);
358        assert_eq!(
359            effective_indent_in_blockquote(">   [**See preview here!**](https://example.com)", 1, 0),
360            2
361        );
362    }
363
364    #[test]
365    fn test_effective_indent_triple_nested() {
366        // Triple nested blockquotes
367        assert_eq!(effective_indent_in_blockquote("> > > text", 3, 99), 0);
368        assert_eq!(effective_indent_in_blockquote("> > >  text", 3, 99), 1);
369        assert_eq!(effective_indent_in_blockquote(">>> text", 3, 99), 0);
370        assert_eq!(effective_indent_in_blockquote(">>>  text", 3, 99), 1);
371    }
372
373    // ==========================================================================
374    // count_blockquote_level tests
375    // ==========================================================================
376
377    #[test]
378    fn test_count_blockquote_level_none() {
379        assert_eq!(count_blockquote_level("regular text"), 0);
380        assert_eq!(count_blockquote_level("   indented text"), 0);
381        assert_eq!(count_blockquote_level(""), 0);
382    }
383
384    #[test]
385    fn test_count_blockquote_level_single() {
386        assert_eq!(count_blockquote_level("> text"), 1);
387        assert_eq!(count_blockquote_level(">text"), 1);
388        assert_eq!(count_blockquote_level(">"), 1);
389    }
390
391    #[test]
392    fn test_count_blockquote_level_nested() {
393        assert_eq!(count_blockquote_level(">> text"), 2);
394        assert_eq!(count_blockquote_level("> > text"), 2);
395        assert_eq!(count_blockquote_level(">>> text"), 3);
396        assert_eq!(count_blockquote_level("> > > text"), 3);
397    }
398
399    // ==========================================================================
400    // content_after_blockquote tests
401    // ==========================================================================
402
403    #[test]
404    fn test_content_after_blockquote_no_quote() {
405        assert_eq!(content_after_blockquote("text", 0), "text");
406        assert_eq!(content_after_blockquote("   indented", 0), "   indented");
407    }
408
409    #[test]
410    fn test_content_after_blockquote_single() {
411        assert_eq!(content_after_blockquote("> text", 1), "text");
412        assert_eq!(content_after_blockquote(">text", 1), "text");
413        assert_eq!(content_after_blockquote(">  indented", 1), " indented");
414    }
415
416    #[test]
417    fn test_content_after_blockquote_nested() {
418        assert_eq!(content_after_blockquote(">> text", 2), "text");
419        assert_eq!(content_after_blockquote("> > text", 2), "text");
420        assert_eq!(content_after_blockquote("> >  indented", 2), " indented");
421    }
422
423    #[test]
424    fn test_content_after_blockquote_mismatched_level() {
425        // If level doesn't match, return original
426        assert_eq!(content_after_blockquote("> text", 2), "> text");
427        assert_eq!(content_after_blockquote(">> text", 1), ">> text");
428    }
429}