Skip to main content

rumdl_lib/utils/
blockquote.rs

1//! Blockquote-related utilities for rumdl.
2//!
3//! Provides functions for working with blockquote-prefixed lines, including
4//! calculating effective indentation within blockquote context.
5
6/// Parsed blockquote prefix components.
7///
8/// This parser supports both compact (`>> text`) and spaced (`> > text`) nesting.
9/// It preserves at most one optional space/tab after the final marker in `prefix`;
10/// additional whitespace is left in `content`.
11#[derive(Debug, Clone, Copy, PartialEq, Eq)]
12pub struct ParsedBlockquotePrefix<'a> {
13    /// Leading indentation before the first marker.
14    pub indent: &'a str,
15    /// Prefix up to (and including) the optional single space/tab after final `>`.
16    pub prefix: &'a str,
17    /// Content after `prefix` (may begin with additional spaces/tabs).
18    pub content: &'a str,
19    /// Number of blockquote markers at the start of the line.
20    pub nesting_level: usize,
21    /// All whitespace after the final `>` (including the optional first space/tab).
22    pub spaces_after_marker: &'a str,
23}
24
25#[inline]
26fn is_space_or_tab(byte: u8) -> bool {
27    byte == b' ' || byte == b'\t'
28}
29
30/// Parse a blockquote prefix from a line.
31///
32/// Returns `None` when the line does not start (after leading indentation) with
33/// a blockquote marker.
34#[inline]
35pub fn parse_blockquote_prefix(line: &str) -> Option<ParsedBlockquotePrefix<'_>> {
36    let bytes = line.as_bytes();
37    let mut pos = 0;
38
39    // Leading indentation before the first marker.
40    while pos < bytes.len() && is_space_or_tab(bytes[pos]) {
41        pos += 1;
42    }
43    let indent_end = pos;
44
45    if pos >= bytes.len() || bytes[pos] != b'>' {
46        return None;
47    }
48
49    let mut nesting_level = 0;
50    let mut prefix_end = pos;
51    let mut spaces_after_marker_start = pos;
52    let mut spaces_after_marker_end = pos;
53
54    loop {
55        if pos >= bytes.len() || bytes[pos] != b'>' {
56            break;
57        }
58
59        nesting_level += 1;
60        pos += 1; // past '>'
61        let marker_end = pos;
62
63        // Include at most one optional space/tab in the preserved prefix.
64        if pos < bytes.len() && is_space_or_tab(bytes[pos]) {
65            pos += 1;
66        }
67        let content_start_candidate = pos;
68
69        // Consume any additional whitespace.
70        while pos < bytes.len() && is_space_or_tab(bytes[pos]) {
71            pos += 1;
72        }
73
74        // If another marker follows, all consumed whitespace belongs to nesting.
75        if pos < bytes.len() && bytes[pos] == b'>' {
76            continue;
77        }
78
79        // Final marker.
80        prefix_end = content_start_candidate;
81        spaces_after_marker_start = marker_end;
82        spaces_after_marker_end = pos;
83        break;
84    }
85
86    Some(ParsedBlockquotePrefix {
87        indent: &line[..indent_end],
88        prefix: &line[..prefix_end],
89        content: &line[prefix_end..],
90        nesting_level,
91        spaces_after_marker: &line[spaces_after_marker_start..spaces_after_marker_end],
92    })
93}
94
95/// Calculate the effective indentation of a line within a blockquote context.
96///
97/// For lines inside blockquotes, the "raw" leading whitespace (before `>`) is always 0,
98/// but the semantically meaningful indent is the whitespace *after* the blockquote markers.
99///
100/// # Arguments
101///
102/// * `line_content` - The full line content including any blockquote markers
103/// * `expected_bq_level` - The blockquote nesting level to match (0 for no blockquote)
104/// * `fallback_indent` - The indent to return if blockquote levels don't match or if
105///   `expected_bq_level` is 0
106///
107/// # Returns
108///
109/// The effective indentation:
110/// - If `expected_bq_level` is 0: returns `fallback_indent`
111/// - If line's blockquote level matches `expected_bq_level`: returns indent after stripping markers
112/// - If blockquote levels don't match: returns `fallback_indent`
113///
114/// # Examples
115///
116/// ```
117/// use rumdl_lib::utils::blockquote::effective_indent_in_blockquote;
118///
119/// // Regular line (no blockquote context)
120/// assert_eq!(effective_indent_in_blockquote("   text", 0, 3), 3);
121///
122/// // Blockquote line with 2 spaces after marker
123/// assert_eq!(effective_indent_in_blockquote(">  text", 1, 0), 2);
124///
125/// // Nested blockquote with 3 spaces after markers
126/// assert_eq!(effective_indent_in_blockquote("> >   text", 2, 0), 3);
127///
128/// // Mismatched blockquote level - returns fallback
129/// assert_eq!(effective_indent_in_blockquote("> text", 2, 5), 5);
130/// ```
131pub fn effective_indent_in_blockquote(line_content: &str, expected_bq_level: usize, fallback_indent: usize) -> usize {
132    if expected_bq_level == 0 {
133        return fallback_indent;
134    }
135
136    // Count blockquote markers at the start of the line
137    // Markers can be separated by whitespace: "> > text" or ">> text"
138    let line_bq_level = line_content
139        .chars()
140        .take_while(|c| *c == '>' || c.is_whitespace())
141        .filter(|&c| c == '>')
142        .count();
143
144    if line_bq_level != expected_bq_level {
145        return fallback_indent;
146    }
147
148    // Strip blockquote markers and compute indent within the blockquote context
149    let mut pos = 0;
150    let mut found_markers = 0;
151    for c in line_content.chars() {
152        pos += c.len_utf8();
153        if c == '>' {
154            found_markers += 1;
155            if found_markers == line_bq_level {
156                // Skip optional space after final >
157                if line_content.get(pos..pos + 1) == Some(" ") {
158                    pos += 1;
159                }
160                break;
161            }
162        }
163    }
164
165    let after_bq = &line_content[pos..];
166    after_bq.len() - after_bq.trim_start().len()
167}
168
169/// Count the number of blockquote markers (`>`) at the start of a line.
170///
171/// Handles both compact (`>>text`) and spaced (`> > text`) blockquote syntax.
172///
173/// # Examples
174///
175/// ```
176/// use rumdl_lib::utils::blockquote::count_blockquote_level;
177///
178/// assert_eq!(count_blockquote_level("regular text"), 0);
179/// assert_eq!(count_blockquote_level("> quoted"), 1);
180/// assert_eq!(count_blockquote_level(">> nested"), 2);
181/// assert_eq!(count_blockquote_level("> > spaced nested"), 2);
182/// ```
183pub fn count_blockquote_level(line_content: &str) -> usize {
184    line_content
185        .chars()
186        .take_while(|c| *c == '>' || c.is_whitespace())
187        .filter(|&c| c == '>')
188        .count()
189}
190
191/// Extract the content after blockquote markers.
192///
193/// Returns the portion of the line after all blockquote markers and the
194/// optional space following the last marker.
195///
196/// # Examples
197///
198/// ```
199/// use rumdl_lib::utils::blockquote::content_after_blockquote;
200///
201/// assert_eq!(content_after_blockquote("> text", 1), "text");
202/// assert_eq!(content_after_blockquote(">  indented", 1), " indented");
203/// assert_eq!(content_after_blockquote("> > nested", 2), "nested");
204/// assert_eq!(content_after_blockquote("no quote", 0), "no quote");
205/// ```
206pub fn content_after_blockquote(line_content: &str, expected_bq_level: usize) -> &str {
207    if expected_bq_level == 0 {
208        return line_content;
209    }
210
211    // First, verify the line has the expected blockquote level
212    let actual_level = count_blockquote_level(line_content);
213    if actual_level != expected_bq_level {
214        return line_content;
215    }
216
217    let mut pos = 0;
218    let mut found_markers = 0;
219    for c in line_content.chars() {
220        pos += c.len_utf8();
221        if c == '>' {
222            found_markers += 1;
223            if found_markers == expected_bq_level {
224                // Skip optional space after final >
225                if line_content.get(pos..pos + 1) == Some(" ") {
226                    pos += 1;
227                }
228                break;
229            }
230        }
231    }
232
233    &line_content[pos..]
234}
235
236/// Strip all blockquote markers from a line, returning the content after them.
237///
238/// Unlike [`content_after_blockquote`], this does not require knowing the
239/// expected nesting level — it strips all `>` markers and their trailing spaces.
240/// Returns the original line unchanged if it has no blockquote markers.
241///
242/// # Examples
243///
244/// ```
245/// use rumdl_lib::utils::blockquote::strip_blockquote_prefix;
246///
247/// assert_eq!(strip_blockquote_prefix("> text"), "text");
248/// assert_eq!(strip_blockquote_prefix("> > nested"), "nested");
249/// assert_eq!(strip_blockquote_prefix(">>compact"), "compact");
250/// assert_eq!(strip_blockquote_prefix("  > indented"), "indented");
251/// assert_eq!(strip_blockquote_prefix("no quote"), "no quote");
252/// ```
253pub fn strip_blockquote_prefix(line: &str) -> &str {
254    match parse_blockquote_prefix(line) {
255        Some(parsed) => parsed.content,
256        None => line,
257    }
258}
259
260#[cfg(test)]
261mod tests {
262    use super::*;
263
264    // ==========================================================================
265    // parse_blockquote_prefix tests
266    // ==========================================================================
267
268    #[test]
269    fn test_parse_blockquote_prefix_compact_nested() {
270        let parsed = parse_blockquote_prefix(">> text").expect("should parse compact nested blockquote");
271        assert_eq!(parsed.indent, "");
272        assert_eq!(parsed.prefix, ">> ");
273        assert_eq!(parsed.content, "text");
274        assert_eq!(parsed.nesting_level, 2);
275        assert_eq!(parsed.spaces_after_marker, " ");
276    }
277
278    #[test]
279    fn test_parse_blockquote_prefix_spaced_nested() {
280        let parsed = parse_blockquote_prefix("> >  text").expect("should parse spaced nested blockquote");
281        assert_eq!(parsed.indent, "");
282        assert_eq!(parsed.prefix, "> > ");
283        assert_eq!(parsed.content, " text");
284        assert_eq!(parsed.nesting_level, 2);
285        assert_eq!(parsed.spaces_after_marker, "  ");
286    }
287
288    #[test]
289    fn test_parse_blockquote_prefix_with_indent() {
290        let parsed = parse_blockquote_prefix("   > quote").expect("should parse indented blockquote");
291        assert_eq!(parsed.indent, "   ");
292        assert_eq!(parsed.prefix, "   > ");
293        assert_eq!(parsed.content, "quote");
294        assert_eq!(parsed.nesting_level, 1);
295        assert_eq!(parsed.spaces_after_marker, " ");
296    }
297
298    #[test]
299    fn test_parse_blockquote_prefix_non_blockquote() {
300        assert!(parse_blockquote_prefix("plain text").is_none());
301        assert!(parse_blockquote_prefix("  plain text").is_none());
302    }
303
304    // ==========================================================================
305    // effective_indent_in_blockquote tests
306    // ==========================================================================
307
308    #[test]
309    fn test_effective_indent_no_blockquote_context() {
310        // When expected_bq_level is 0, always return fallback
311        assert_eq!(effective_indent_in_blockquote("text", 0, 0), 0);
312        assert_eq!(effective_indent_in_blockquote("   text", 0, 3), 3);
313        assert_eq!(effective_indent_in_blockquote("> text", 0, 5), 5);
314    }
315
316    #[test]
317    fn test_effective_indent_single_level_blockquote() {
318        // Single > with various indents after
319        assert_eq!(effective_indent_in_blockquote("> text", 1, 99), 0);
320        assert_eq!(effective_indent_in_blockquote(">  text", 1, 99), 1);
321        assert_eq!(effective_indent_in_blockquote(">   text", 1, 99), 2);
322        assert_eq!(effective_indent_in_blockquote(">    text", 1, 99), 3);
323    }
324
325    #[test]
326    fn test_effective_indent_no_space_after_marker() {
327        // >text (no space after >) - should have 0 effective indent
328        assert_eq!(effective_indent_in_blockquote(">text", 1, 99), 0);
329        assert_eq!(effective_indent_in_blockquote(">>text", 2, 99), 0);
330    }
331
332    #[test]
333    fn test_effective_indent_nested_blockquote_compact() {
334        // Compact nested: >>text, >> text, >>  text
335        assert_eq!(effective_indent_in_blockquote(">> text", 2, 99), 0);
336        assert_eq!(effective_indent_in_blockquote(">>  text", 2, 99), 1);
337        assert_eq!(effective_indent_in_blockquote(">>   text", 2, 99), 2);
338    }
339
340    #[test]
341    fn test_effective_indent_nested_blockquote_spaced() {
342        // Spaced nested: > > text, > >  text
343        assert_eq!(effective_indent_in_blockquote("> > text", 2, 99), 0);
344        assert_eq!(effective_indent_in_blockquote("> >  text", 2, 99), 1);
345        assert_eq!(effective_indent_in_blockquote("> >   text", 2, 99), 2);
346    }
347
348    #[test]
349    fn test_effective_indent_mismatched_level() {
350        // Line has different blockquote level than expected - return fallback
351        assert_eq!(effective_indent_in_blockquote("> text", 2, 42), 42);
352        assert_eq!(effective_indent_in_blockquote(">> text", 1, 42), 42);
353        assert_eq!(effective_indent_in_blockquote("text", 1, 42), 42);
354    }
355
356    #[test]
357    fn test_effective_indent_empty_blockquote() {
358        // Empty blockquote lines
359        assert_eq!(effective_indent_in_blockquote(">", 1, 99), 0);
360        assert_eq!(effective_indent_in_blockquote("> ", 1, 99), 0);
361        assert_eq!(effective_indent_in_blockquote(">  ", 1, 99), 1);
362    }
363
364    #[test]
365    fn test_effective_indent_issue_268_case() {
366        // The exact pattern from issue #268:
367        // ">   text" where we expect 2 spaces of indent (list continuation)
368        assert_eq!(effective_indent_in_blockquote(">   Opening the app", 1, 0), 2);
369        assert_eq!(
370            effective_indent_in_blockquote(">   [**See preview here!**](https://example.com)", 1, 0),
371            2
372        );
373    }
374
375    #[test]
376    fn test_effective_indent_triple_nested() {
377        // Triple nested blockquotes
378        assert_eq!(effective_indent_in_blockquote("> > > text", 3, 99), 0);
379        assert_eq!(effective_indent_in_blockquote("> > >  text", 3, 99), 1);
380        assert_eq!(effective_indent_in_blockquote(">>> text", 3, 99), 0);
381        assert_eq!(effective_indent_in_blockquote(">>>  text", 3, 99), 1);
382    }
383
384    // ==========================================================================
385    // count_blockquote_level tests
386    // ==========================================================================
387
388    #[test]
389    fn test_count_blockquote_level_none() {
390        assert_eq!(count_blockquote_level("regular text"), 0);
391        assert_eq!(count_blockquote_level("   indented text"), 0);
392        assert_eq!(count_blockquote_level(""), 0);
393    }
394
395    #[test]
396    fn test_count_blockquote_level_single() {
397        assert_eq!(count_blockquote_level("> text"), 1);
398        assert_eq!(count_blockquote_level(">text"), 1);
399        assert_eq!(count_blockquote_level(">"), 1);
400    }
401
402    #[test]
403    fn test_count_blockquote_level_nested() {
404        assert_eq!(count_blockquote_level(">> text"), 2);
405        assert_eq!(count_blockquote_level("> > text"), 2);
406        assert_eq!(count_blockquote_level(">>> text"), 3);
407        assert_eq!(count_blockquote_level("> > > text"), 3);
408    }
409
410    // ==========================================================================
411    // content_after_blockquote tests
412    // ==========================================================================
413
414    #[test]
415    fn test_content_after_blockquote_no_quote() {
416        assert_eq!(content_after_blockquote("text", 0), "text");
417        assert_eq!(content_after_blockquote("   indented", 0), "   indented");
418    }
419
420    #[test]
421    fn test_content_after_blockquote_single() {
422        assert_eq!(content_after_blockquote("> text", 1), "text");
423        assert_eq!(content_after_blockquote(">text", 1), "text");
424        assert_eq!(content_after_blockquote(">  indented", 1), " indented");
425    }
426
427    #[test]
428    fn test_content_after_blockquote_nested() {
429        assert_eq!(content_after_blockquote(">> text", 2), "text");
430        assert_eq!(content_after_blockquote("> > text", 2), "text");
431        assert_eq!(content_after_blockquote("> >  indented", 2), " indented");
432    }
433
434    #[test]
435    fn test_content_after_blockquote_mismatched_level() {
436        // If level doesn't match, return original
437        assert_eq!(content_after_blockquote("> text", 2), "> text");
438        assert_eq!(content_after_blockquote(">> text", 1), ">> text");
439    }
440}