Skip to main content

panache_parser/parser/utils/
marker_utils.rs

1//! Consolidated utilities for parsing block markers (blockquotes, lists, definitions).
2//!
3//! This module provides common functionality for parsing markers that follow
4//! similar patterns: optional leading spaces, marker character(s), optional trailing space.
5
6/// Information about a single blockquote marker.
7#[derive(Debug, Clone, PartialEq)]
8pub(crate) struct BlockQuoteMarkerInfo {
9    pub leading_spaces: usize,
10    pub has_trailing_space: bool,
11}
12
13/// Check if line starts with a blockquote marker (up to 3 spaces + >).
14/// Returns (marker_end_byte, content_start_byte) if found.
15pub(crate) fn try_parse_blockquote_marker(line: &str) -> Option<(usize, usize)> {
16    let bytes = line.as_bytes();
17    let mut i = 0;
18
19    // Skip up to 3 spaces
20    let mut spaces = 0;
21    while i < bytes.len() && bytes[i] == b' ' && spaces < 3 {
22        spaces += 1;
23        i += 1;
24    }
25
26    // Must have > next
27    if i >= bytes.len() || bytes[i] != b'>' {
28        return None;
29    }
30    let marker_end = i + 1;
31
32    // Optional space after >
33    let content_start = if marker_end < bytes.len() && bytes[marker_end] == b' ' {
34        marker_end + 1
35    } else {
36        marker_end
37    };
38
39    Some((marker_end, content_start))
40}
41
42/// Count how many blockquote levels a line has, returning (depth, remaining_content).
43pub(crate) fn count_blockquote_markers(line: &str) -> (usize, &str) {
44    let mut depth = 0;
45    let mut remaining = line;
46
47    while let Some((_, content_start)) = try_parse_blockquote_marker(remaining) {
48        depth += 1;
49        remaining = &remaining[content_start..];
50    }
51
52    (depth, remaining)
53}
54
55/// Parse all blockquote markers from a line and return detailed info about each.
56/// Returns Vec of BlockQuoteMarkerInfo for each marker found.
57/// This is useful for lossless parsing where we need to preserve exact whitespace.
58pub(crate) fn parse_blockquote_marker_info(line: &str) -> Vec<BlockQuoteMarkerInfo> {
59    let mut markers = Vec::new();
60    let mut remaining = line;
61
62    loop {
63        let bytes = remaining.as_bytes();
64        let mut i = 0;
65
66        // Count leading whitespace (up to 3 spaces before >)
67        let mut spaces = 0;
68        while i < bytes.len() && bytes[i] == b' ' && spaces < 3 {
69            spaces += 1;
70            i += 1;
71        }
72
73        // Check if there's a > marker
74        if i >= bytes.len() || bytes[i] != b'>' {
75            break;
76        }
77        i += 1; // skip '>'
78
79        // Check for optional space after >
80        let has_trailing_space = i < bytes.len() && bytes[i] == b' ';
81        if has_trailing_space {
82            i += 1;
83        }
84
85        markers.push(BlockQuoteMarkerInfo {
86            leading_spaces: spaces,
87            has_trailing_space,
88        });
89        remaining = &remaining[i..];
90    }
91
92    markers
93}
94
95#[cfg(test)]
96mod tests {
97    use super::*;
98
99    #[test]
100    fn test_simple_marker() {
101        assert_eq!(try_parse_blockquote_marker("> text"), Some((1, 2)));
102    }
103
104    #[test]
105    fn test_marker_no_space() {
106        assert_eq!(try_parse_blockquote_marker(">text"), Some((1, 1)));
107    }
108
109    #[test]
110    fn test_marker_with_leading_spaces() {
111        assert_eq!(try_parse_blockquote_marker("   > text"), Some((4, 5)));
112    }
113
114    #[test]
115    fn test_four_spaces_not_blockquote() {
116        assert_eq!(try_parse_blockquote_marker("    > text"), None);
117    }
118
119    #[test]
120    fn test_count_nested() {
121        let (depth, content) = count_blockquote_markers("> > > nested");
122        assert_eq!(depth, 3);
123        assert_eq!(content, "nested");
124    }
125
126    #[test]
127    fn test_parse_marker_info_single() {
128        let markers = parse_blockquote_marker_info("> text");
129        assert_eq!(markers.len(), 1);
130        assert_eq!(markers[0].leading_spaces, 0);
131        assert!(markers[0].has_trailing_space);
132    }
133
134    #[test]
135    fn test_parse_marker_info_nested() {
136        let markers = parse_blockquote_marker_info("> > > nested");
137        assert_eq!(markers.len(), 3);
138        assert_eq!(markers[0].leading_spaces, 0);
139        assert_eq!(markers[1].leading_spaces, 0);
140        assert_eq!(markers[2].leading_spaces, 0);
141        assert!(markers.iter().all(|m| m.has_trailing_space));
142    }
143
144    #[test]
145    fn test_parse_marker_info_with_leading_spaces() {
146        let markers = parse_blockquote_marker_info("  > text");
147        assert_eq!(markers.len(), 1);
148        assert_eq!(markers[0].leading_spaces, 2);
149        assert!(markers[0].has_trailing_space);
150    }
151}