Skip to main content

rumdl_lib/utils/
blockquote.rs

1//! Blockquote-related utilities for rumdl.
2//!
3//! Provides functions for working with blockquote-prefixed lines, including
4//! calculating effective indentation within blockquote context.
5
6/// Parsed blockquote prefix components.
7///
8/// This parser supports both compact (`>> text`) and spaced (`> > text`) nesting.
9/// It preserves at most one optional space/tab after the final marker in `prefix`;
10/// additional whitespace is left in `content`.
11#[derive(Debug, Clone, Copy, PartialEq, Eq)]
12pub struct ParsedBlockquotePrefix<'a> {
13    /// Leading indentation before the first marker.
14    pub indent: &'a str,
15    /// Prefix up to (and including) the optional single space/tab after final `>`.
16    pub prefix: &'a str,
17    /// Content after `prefix` (may begin with additional spaces/tabs).
18    pub content: &'a str,
19    /// Number of blockquote markers at the start of the line.
20    pub nesting_level: usize,
21    /// All whitespace after the final `>` (including the optional first space/tab).
22    pub spaces_after_marker: &'a str,
23}
24
25#[inline]
26fn is_space_or_tab(byte: u8) -> bool {
27    byte == b' ' || byte == b'\t'
28}
29
30/// Parse a blockquote prefix from a line.
31///
32/// Returns `None` when the line does not start (after leading indentation) with
33/// a blockquote marker.
34#[inline]
35pub fn parse_blockquote_prefix(line: &str) -> Option<ParsedBlockquotePrefix<'_>> {
36    let bytes = line.as_bytes();
37    let mut pos = 0;
38
39    // Leading indentation before the first marker.
40    while pos < bytes.len() && is_space_or_tab(bytes[pos]) {
41        pos += 1;
42    }
43    let indent_end = pos;
44
45    if pos >= bytes.len() || bytes[pos] != b'>' {
46        return None;
47    }
48
49    let mut nesting_level = 0;
50    let mut prefix_end = pos;
51    let mut spaces_after_marker_start = pos;
52    let mut spaces_after_marker_end = pos;
53
54    loop {
55        if pos >= bytes.len() || bytes[pos] != b'>' {
56            break;
57        }
58
59        nesting_level += 1;
60        pos += 1; // past '>'
61        let marker_end = pos;
62
63        // Include at most one optional space/tab in the preserved prefix.
64        if pos < bytes.len() && is_space_or_tab(bytes[pos]) {
65            pos += 1;
66        }
67        let content_start_candidate = pos;
68
69        // Consume any additional whitespace.
70        while pos < bytes.len() && is_space_or_tab(bytes[pos]) {
71            pos += 1;
72        }
73
74        // If another marker follows, all consumed whitespace belongs to nesting.
75        if pos < bytes.len() && bytes[pos] == b'>' {
76            continue;
77        }
78
79        // Final marker.
80        prefix_end = content_start_candidate;
81        spaces_after_marker_start = marker_end;
82        spaces_after_marker_end = pos;
83        break;
84    }
85
86    Some(ParsedBlockquotePrefix {
87        indent: &line[..indent_end],
88        prefix: &line[..prefix_end],
89        content: &line[prefix_end..],
90        nesting_level,
91        spaces_after_marker: &line[spaces_after_marker_start..spaces_after_marker_end],
92    })
93}
94
95/// Calculate the effective indentation of a line within a blockquote context.
96///
97/// For lines inside blockquotes, the "raw" leading whitespace (before `>`) is always 0,
98/// but the semantically meaningful indent is the whitespace *after* the blockquote markers.
99///
100/// # Arguments
101///
102/// * `line_content` - The full line content including any blockquote markers
103/// * `expected_bq_level` - The blockquote nesting level to match (0 for no blockquote)
104/// * `fallback_indent` - The indent to return if blockquote levels don't match or if
105///   `expected_bq_level` is 0
106///
107/// # Returns
108///
109/// The effective indentation:
110/// - If `expected_bq_level` is 0: returns `fallback_indent`
111/// - If line's blockquote level matches `expected_bq_level`: returns indent after stripping markers
112/// - If blockquote levels don't match: returns `fallback_indent`
113///
114/// # Examples
115///
116/// ```
117/// use rumdl_lib::utils::blockquote::effective_indent_in_blockquote;
118///
119/// // Regular line (no blockquote context)
120/// assert_eq!(effective_indent_in_blockquote("   text", 0, 3), 3);
121///
122/// // Blockquote line with 2 spaces after marker
123/// assert_eq!(effective_indent_in_blockquote(">  text", 1, 0), 2);
124///
125/// // Nested blockquote with 3 spaces after markers
126/// assert_eq!(effective_indent_in_blockquote("> >   text", 2, 0), 3);
127///
128/// // Mismatched blockquote level - returns fallback
129/// assert_eq!(effective_indent_in_blockquote("> text", 2, 5), 5);
130/// ```
131pub fn effective_indent_in_blockquote(line_content: &str, expected_bq_level: usize, fallback_indent: usize) -> usize {
132    if expected_bq_level == 0 {
133        return fallback_indent;
134    }
135
136    // Count blockquote markers at the start of the line
137    // Markers can be separated by whitespace: "> > text" or ">> text"
138    let line_bq_level = line_content
139        .chars()
140        .take_while(|c| *c == '>' || c.is_whitespace())
141        .filter(|&c| c == '>')
142        .count();
143
144    if line_bq_level != expected_bq_level {
145        return fallback_indent;
146    }
147
148    // Strip blockquote markers and compute indent within the blockquote context
149    let mut pos = 0;
150    let mut found_markers = 0;
151    for c in line_content.chars() {
152        pos += c.len_utf8();
153        if c == '>' {
154            found_markers += 1;
155            if found_markers == line_bq_level {
156                // Skip optional space after final >
157                if line_content.get(pos..pos + 1) == Some(" ") {
158                    pos += 1;
159                }
160                break;
161            }
162        }
163    }
164
165    let after_bq = &line_content[pos..];
166    after_bq.len() - after_bq.trim_start().len()
167}
168
169/// Count the number of blockquote markers (`>`) at the start of a line.
170///
171/// Handles both compact (`>>text`) and spaced (`> > text`) blockquote syntax.
172///
173/// # Examples
174///
175/// ```
176/// use rumdl_lib::utils::blockquote::count_blockquote_level;
177///
178/// assert_eq!(count_blockquote_level("regular text"), 0);
179/// assert_eq!(count_blockquote_level("> quoted"), 1);
180/// assert_eq!(count_blockquote_level(">> nested"), 2);
181/// assert_eq!(count_blockquote_level("> > spaced nested"), 2);
182/// ```
183pub fn count_blockquote_level(line_content: &str) -> usize {
184    line_content
185        .chars()
186        .take_while(|c| *c == '>' || c.is_whitespace())
187        .filter(|&c| c == '>')
188        .count()
189}
190
191/// Extract the content after blockquote markers.
192///
193/// Returns the portion of the line after all blockquote markers and the
194/// optional space following the last marker.
195///
196/// # Examples
197///
198/// ```
199/// use rumdl_lib::utils::blockquote::content_after_blockquote;
200///
201/// assert_eq!(content_after_blockquote("> text", 1), "text");
202/// assert_eq!(content_after_blockquote(">  indented", 1), " indented");
203/// assert_eq!(content_after_blockquote("> > nested", 2), "nested");
204/// assert_eq!(content_after_blockquote("no quote", 0), "no quote");
205/// ```
206pub fn content_after_blockquote(line_content: &str, expected_bq_level: usize) -> &str {
207    if expected_bq_level == 0 {
208        return line_content;
209    }
210
211    // First, verify the line has the expected blockquote level
212    let actual_level = count_blockquote_level(line_content);
213    if actual_level != expected_bq_level {
214        return line_content;
215    }
216
217    let mut pos = 0;
218    let mut found_markers = 0;
219    for c in line_content.chars() {
220        pos += c.len_utf8();
221        if c == '>' {
222            found_markers += 1;
223            if found_markers == expected_bq_level {
224                // Skip optional space after final >
225                if line_content.get(pos..pos + 1) == Some(" ") {
226                    pos += 1;
227                }
228                break;
229            }
230        }
231    }
232
233    &line_content[pos..]
234}
235
236#[cfg(test)]
237mod tests {
238    use super::*;
239
240    // ==========================================================================
241    // parse_blockquote_prefix tests
242    // ==========================================================================
243
244    #[test]
245    fn test_parse_blockquote_prefix_compact_nested() {
246        let parsed = parse_blockquote_prefix(">> text").expect("should parse compact nested blockquote");
247        assert_eq!(parsed.indent, "");
248        assert_eq!(parsed.prefix, ">> ");
249        assert_eq!(parsed.content, "text");
250        assert_eq!(parsed.nesting_level, 2);
251        assert_eq!(parsed.spaces_after_marker, " ");
252    }
253
254    #[test]
255    fn test_parse_blockquote_prefix_spaced_nested() {
256        let parsed = parse_blockquote_prefix("> >  text").expect("should parse spaced nested blockquote");
257        assert_eq!(parsed.indent, "");
258        assert_eq!(parsed.prefix, "> > ");
259        assert_eq!(parsed.content, " text");
260        assert_eq!(parsed.nesting_level, 2);
261        assert_eq!(parsed.spaces_after_marker, "  ");
262    }
263
264    #[test]
265    fn test_parse_blockquote_prefix_with_indent() {
266        let parsed = parse_blockquote_prefix("   > quote").expect("should parse indented blockquote");
267        assert_eq!(parsed.indent, "   ");
268        assert_eq!(parsed.prefix, "   > ");
269        assert_eq!(parsed.content, "quote");
270        assert_eq!(parsed.nesting_level, 1);
271        assert_eq!(parsed.spaces_after_marker, " ");
272    }
273
274    #[test]
275    fn test_parse_blockquote_prefix_non_blockquote() {
276        assert!(parse_blockquote_prefix("plain text").is_none());
277        assert!(parse_blockquote_prefix("  plain text").is_none());
278    }
279
280    // ==========================================================================
281    // effective_indent_in_blockquote tests
282    // ==========================================================================
283
284    #[test]
285    fn test_effective_indent_no_blockquote_context() {
286        // When expected_bq_level is 0, always return fallback
287        assert_eq!(effective_indent_in_blockquote("text", 0, 0), 0);
288        assert_eq!(effective_indent_in_blockquote("   text", 0, 3), 3);
289        assert_eq!(effective_indent_in_blockquote("> text", 0, 5), 5);
290    }
291
292    #[test]
293    fn test_effective_indent_single_level_blockquote() {
294        // Single > with various indents after
295        assert_eq!(effective_indent_in_blockquote("> text", 1, 99), 0);
296        assert_eq!(effective_indent_in_blockquote(">  text", 1, 99), 1);
297        assert_eq!(effective_indent_in_blockquote(">   text", 1, 99), 2);
298        assert_eq!(effective_indent_in_blockquote(">    text", 1, 99), 3);
299    }
300
301    #[test]
302    fn test_effective_indent_no_space_after_marker() {
303        // >text (no space after >) - should have 0 effective indent
304        assert_eq!(effective_indent_in_blockquote(">text", 1, 99), 0);
305        assert_eq!(effective_indent_in_blockquote(">>text", 2, 99), 0);
306    }
307
308    #[test]
309    fn test_effective_indent_nested_blockquote_compact() {
310        // Compact nested: >>text, >> text, >>  text
311        assert_eq!(effective_indent_in_blockquote(">> text", 2, 99), 0);
312        assert_eq!(effective_indent_in_blockquote(">>  text", 2, 99), 1);
313        assert_eq!(effective_indent_in_blockquote(">>   text", 2, 99), 2);
314    }
315
316    #[test]
317    fn test_effective_indent_nested_blockquote_spaced() {
318        // Spaced nested: > > text, > >  text
319        assert_eq!(effective_indent_in_blockquote("> > text", 2, 99), 0);
320        assert_eq!(effective_indent_in_blockquote("> >  text", 2, 99), 1);
321        assert_eq!(effective_indent_in_blockquote("> >   text", 2, 99), 2);
322    }
323
324    #[test]
325    fn test_effective_indent_mismatched_level() {
326        // Line has different blockquote level than expected - return fallback
327        assert_eq!(effective_indent_in_blockquote("> text", 2, 42), 42);
328        assert_eq!(effective_indent_in_blockquote(">> text", 1, 42), 42);
329        assert_eq!(effective_indent_in_blockquote("text", 1, 42), 42);
330    }
331
332    #[test]
333    fn test_effective_indent_empty_blockquote() {
334        // Empty blockquote lines
335        assert_eq!(effective_indent_in_blockquote(">", 1, 99), 0);
336        assert_eq!(effective_indent_in_blockquote("> ", 1, 99), 0);
337        assert_eq!(effective_indent_in_blockquote(">  ", 1, 99), 1);
338    }
339
340    #[test]
341    fn test_effective_indent_issue_268_case() {
342        // The exact pattern from issue #268:
343        // ">   text" where we expect 2 spaces of indent (list continuation)
344        assert_eq!(effective_indent_in_blockquote(">   Opening the app", 1, 0), 2);
345        assert_eq!(
346            effective_indent_in_blockquote(">   [**See preview here!**](https://example.com)", 1, 0),
347            2
348        );
349    }
350
351    #[test]
352    fn test_effective_indent_triple_nested() {
353        // Triple nested blockquotes
354        assert_eq!(effective_indent_in_blockquote("> > > text", 3, 99), 0);
355        assert_eq!(effective_indent_in_blockquote("> > >  text", 3, 99), 1);
356        assert_eq!(effective_indent_in_blockquote(">>> text", 3, 99), 0);
357        assert_eq!(effective_indent_in_blockquote(">>>  text", 3, 99), 1);
358    }
359
360    // ==========================================================================
361    // count_blockquote_level tests
362    // ==========================================================================
363
364    #[test]
365    fn test_count_blockquote_level_none() {
366        assert_eq!(count_blockquote_level("regular text"), 0);
367        assert_eq!(count_blockquote_level("   indented text"), 0);
368        assert_eq!(count_blockquote_level(""), 0);
369    }
370
371    #[test]
372    fn test_count_blockquote_level_single() {
373        assert_eq!(count_blockquote_level("> text"), 1);
374        assert_eq!(count_blockquote_level(">text"), 1);
375        assert_eq!(count_blockquote_level(">"), 1);
376    }
377
378    #[test]
379    fn test_count_blockquote_level_nested() {
380        assert_eq!(count_blockquote_level(">> text"), 2);
381        assert_eq!(count_blockquote_level("> > text"), 2);
382        assert_eq!(count_blockquote_level(">>> text"), 3);
383        assert_eq!(count_blockquote_level("> > > text"), 3);
384    }
385
386    // ==========================================================================
387    // content_after_blockquote tests
388    // ==========================================================================
389
390    #[test]
391    fn test_content_after_blockquote_no_quote() {
392        assert_eq!(content_after_blockquote("text", 0), "text");
393        assert_eq!(content_after_blockquote("   indented", 0), "   indented");
394    }
395
396    #[test]
397    fn test_content_after_blockquote_single() {
398        assert_eq!(content_after_blockquote("> text", 1), "text");
399        assert_eq!(content_after_blockquote(">text", 1), "text");
400        assert_eq!(content_after_blockquote(">  indented", 1), " indented");
401    }
402
403    #[test]
404    fn test_content_after_blockquote_nested() {
405        assert_eq!(content_after_blockquote(">> text", 2), "text");
406        assert_eq!(content_after_blockquote("> > text", 2), "text");
407        assert_eq!(content_after_blockquote("> >  indented", 2), " indented");
408    }
409
410    #[test]
411    fn test_content_after_blockquote_mismatched_level() {
412        // If level doesn't match, return original
413        assert_eq!(content_after_blockquote("> text", 2), "> text");
414        assert_eq!(content_after_blockquote(">> text", 1), ">> text");
415    }
416}