mdbook_validator/
parser.rs

1//! Markdown parsing and code block extraction
2
3/// Parses an info string from a fenced code block.
4///
5/// Returns `(language, validator, skip)` tuple.
6///
7/// # Examples
8///
9/// - `"sql validator=sqlite"` → `("sql", Some("sqlite"), false)`
10/// - `"rust"` → `("rust", None, false)`
11/// - `"sql validator=osquery skip"` → `("sql", Some("osquery"), true)`
12#[must_use]
13pub fn parse_info_string(info: &str) -> (String, Option<String>, bool) {
14    let parts: Vec<&str> = info.split_whitespace().collect();
15
16    let language = parts.first().map_or(String::new(), |s| (*s).to_owned());
17
18    let validator = parts
19        .iter()
20        .find_map(|part| part.strip_prefix("validator=").map(ToOwned::to_owned))
21        .filter(|v| !v.is_empty());
22
23    let skip = parts.contains(&"skip");
24
25    (language, validator, skip)
26}
27
28/// Result of extracting markers from code block content.
29#[derive(Debug, Clone, Default)]
30pub struct ExtractedMarkers {
31    /// Setup content from `<!--SETUP-->` marker
32    pub setup: Option<String>,
33    /// Assertions from `<!--ASSERT-->` marker
34    pub assertions: Option<String>,
35    /// Expected output from `<!--EXPECT-->` marker
36    pub expect: Option<String>,
37    /// The visible content (with all markers removed)
38    pub visible_content: String,
39}
40
41impl ExtractedMarkers {
42    /// Get content for validation (with `@@` prefix stripped but lines kept).
43    ///
44    /// This returns `visible_content` with the `@@` prefix removed from each line,
45    /// but the line content is preserved (unlike output which removes entire lines).
46    #[must_use]
47    pub fn validation_content(&self) -> String {
48        strip_double_at_prefix(&self.visible_content)
49    }
50}
51
52/// Extracts markers from code block content.
53///
54/// Parses `<!--SETUP-->`, `<!--ASSERT-->`, and `<!--EXPECT-->` blocks,
55/// returning their content and the remaining visible content.
56#[must_use]
57pub fn extract_markers(content: &str) -> ExtractedMarkers {
58    let mut result = ExtractedMarkers::default();
59    let mut remaining = content.to_owned();
60
61    // Extract SETUP block
62    if let Some((before, inner, after)) = extract_marker_block(&remaining, "<!--SETUP") {
63        result.setup = Some(inner);
64        remaining = format!("{before}{after}");
65    }
66
67    // Extract ASSERT block
68    if let Some((before, inner, after)) = extract_marker_block(&remaining, "<!--ASSERT") {
69        result.assertions = Some(inner);
70        remaining = format!("{before}{after}");
71    }
72
73    // Extract EXPECT block
74    if let Some((before, inner, after)) = extract_marker_block(&remaining, "<!--EXPECT") {
75        result.expect = Some(inner);
76        remaining = format!("{before}{after}");
77    }
78
79    // Trim leading/trailing whitespace from visible content
80    remaining.trim().clone_into(&mut result.visible_content);
81
82    result
83}
84
85/// Strips the `@@` prefix from lines while keeping the content.
86///
87/// This is used for validation content - `@@` lines should be validated
88/// but the `@@` prefix itself is not part of the syntax being validated.
89///
90/// # Examples
91///
92/// - `"@@SELECT 'hidden';\nSELECT 'visible';"` → `"SELECT 'hidden';\nSELECT 'visible';"`
93/// - `"@@\nvisible"` → `"\nvisible"` (empty @@ line becomes empty line)
94#[must_use]
95pub fn strip_double_at_prefix(content: &str) -> String {
96    content
97        .lines()
98        .map(|line| line.strip_prefix("@@").unwrap_or(line))
99        .collect::<Vec<_>>()
100        .join("\n")
101}
102
103/// Extracts content between a marker and `-->`.
104///
105/// Returns `(before, inner_content, after)` if found.
106fn extract_marker_block(content: &str, marker: &str) -> Option<(String, String, String)> {
107    let start = content.find(marker)?;
108    let marker_end = content[start..].find('\n').map(|i| start + i + 1)?;
109    let end_marker = content[marker_end..].find("-->")?;
110    let end = marker_end + end_marker;
111
112    let before = &content[..start];
113    let inner = content[marker_end..end].trim();
114    let after = &content[end + 3..]; // Skip "-->"
115
116    Some((before.to_owned(), inner.to_owned(), after.to_owned()))
117}
118
119#[cfg(test)]
120mod tests {
121    use super::*;
122
123    // ==================== parse_info_string tests ====================
124
125    #[test]
126    fn parse_info_string_language_only() {
127        let (lang, validator, skip) = parse_info_string("sql");
128        assert_eq!(lang, "sql");
129        assert_eq!(validator, None);
130        assert!(!skip);
131    }
132
133    #[test]
134    fn parse_info_string_with_validator() {
135        let (lang, validator, skip) = parse_info_string("sql validator=sqlite");
136        assert_eq!(lang, "sql");
137        assert_eq!(validator, Some("sqlite".to_owned()));
138        assert!(!skip);
139    }
140
141    #[test]
142    fn parse_info_string_with_skip() {
143        let (lang, validator, skip) = parse_info_string("sql validator=osquery skip");
144        assert_eq!(lang, "sql");
145        assert_eq!(validator, Some("osquery".to_owned()));
146        assert!(skip);
147    }
148
149    #[test]
150    fn parse_info_string_skip_without_validator() {
151        let (lang, validator, skip) = parse_info_string("bash skip");
152        assert_eq!(lang, "bash");
153        assert_eq!(validator, None);
154        assert!(skip);
155    }
156
157    #[test]
158    fn parse_info_string_empty() {
159        let (lang, validator, skip) = parse_info_string("");
160        assert_eq!(lang, "");
161        assert_eq!(validator, None);
162        assert!(!skip);
163    }
164
165    #[test]
166    fn parse_info_string_extra_whitespace() {
167        let (lang, validator, skip) = parse_info_string("  sql   validator=sqlite   skip  ");
168        assert_eq!(lang, "sql");
169        assert_eq!(validator, Some("sqlite".to_owned()));
170        assert!(skip);
171    }
172
173    #[test]
174    fn parse_info_string_empty_validator_ignored() {
175        let (lang, validator, skip) = parse_info_string("sql validator=");
176        assert_eq!(lang, "sql");
177        assert_eq!(validator, None); // Empty validator is filtered out
178        assert!(!skip);
179    }
180
181    #[test]
182    fn parse_info_string_multiple_validators_takes_first() {
183        let (lang, validator, skip) = parse_info_string("sql validator=first validator=second");
184        assert_eq!(lang, "sql");
185        assert_eq!(validator, Some("first".to_owned()));
186        assert!(!skip);
187    }
188
189    // ==================== extract_markers tests ====================
190
191    #[test]
192    fn extract_markers_setup_only() {
193        let content = "<!--SETUP\nCREATE TABLE test;\n-->\nSELECT * FROM test;";
194        let result = extract_markers(content);
195        assert_eq!(result.setup, Some("CREATE TABLE test;".to_owned()));
196        assert_eq!(result.assertions, None);
197        assert_eq!(result.expect, None);
198        assert_eq!(result.visible_content, "SELECT * FROM test;");
199    }
200
201    #[test]
202    fn extract_markers_assert_only() {
203        let content = "SELECT * FROM test;\n<!--ASSERT\nrows >= 1\n-->";
204        let result = extract_markers(content);
205        assert_eq!(result.setup, None);
206        assert_eq!(result.assertions, Some("rows >= 1".to_owned()));
207        assert_eq!(result.expect, None);
208        assert_eq!(result.visible_content, "SELECT * FROM test;");
209    }
210
211    #[test]
212    fn extract_markers_expect_only() {
213        let content = "SELECT 1;\n<!--EXPECT\n[{\"1\": 1}]\n-->";
214        let result = extract_markers(content);
215        assert_eq!(result.setup, None);
216        assert_eq!(result.assertions, None);
217        assert_eq!(result.expect, Some("[{\"1\": 1}]".to_owned()));
218        assert_eq!(result.visible_content, "SELECT 1;");
219    }
220
221    #[test]
222    fn extract_markers_all_three() {
223        let content = "<!--SETUP\nCREATE TABLE t;\n-->\nSELECT * FROM t;\n<!--ASSERT\nrows = 0\n-->\n<!--EXPECT\n[]\n-->";
224        let result = extract_markers(content);
225        assert_eq!(result.setup, Some("CREATE TABLE t;".to_owned()));
226        assert_eq!(result.assertions, Some("rows = 0".to_owned()));
227        assert_eq!(result.expect, Some("[]".to_owned()));
228        assert_eq!(result.visible_content, "SELECT * FROM t;");
229    }
230
231    #[test]
232    fn extract_markers_none() {
233        let content = "SELECT * FROM users;";
234        let result = extract_markers(content);
235        assert_eq!(result.setup, None);
236        assert_eq!(result.assertions, None);
237        assert_eq!(result.expect, None);
238        assert_eq!(result.visible_content, "SELECT * FROM users;");
239    }
240
241    #[test]
242    fn extract_markers_multiline_setup() {
243        let content = "<!--SETUP\nCREATE TABLE t (id INT);\nINSERT INTO t VALUES (1);\nINSERT INTO t VALUES (2);\n-->\nSELECT * FROM t;";
244        let result = extract_markers(content);
245        assert!(result.setup.is_some());
246        let setup = result.setup.unwrap();
247        assert!(setup.contains("CREATE TABLE"));
248        assert!(setup.contains("INSERT INTO t VALUES (1)"));
249        assert!(setup.contains("INSERT INTO t VALUES (2)"));
250    }
251
252    #[test]
253    fn extract_markers_multiline_assertions() {
254        let content = "SELECT * FROM t;\n<!--ASSERT\nrows >= 1\ncontains \"foo\"\n-->";
255        let result = extract_markers(content);
256        assert!(result.assertions.is_some());
257        let assertions = result.assertions.unwrap();
258        assert!(assertions.contains("rows >= 1"));
259        assert!(assertions.contains("contains \"foo\""));
260    }
261
262    #[test]
263    fn extract_markers_preserves_visible_content_order() {
264        let content = "-- First line\n<!--SETUP\nsetup;\n-->\n-- Second line\nSELECT 1;";
265        let result = extract_markers(content);
266        assert!(result.visible_content.contains("First line"));
267        assert!(result.visible_content.contains("Second line"));
268        assert!(result.visible_content.contains("SELECT 1"));
269    }
270
271    // ==================== strip_double_at_prefix tests ====================
272
273    #[test]
274    fn strip_double_at_prefix_strips_prefix() {
275        let content = "@@SELECT 'hidden';\nSELECT 'visible';";
276        let result = strip_double_at_prefix(content);
277        assert_eq!(result, "SELECT 'hidden';\nSELECT 'visible';");
278    }
279
280    #[test]
281    fn strip_double_at_prefix_preserves_lines_without_prefix() {
282        let content = "SELECT 'visible';\nSELECT 'also visible';";
283        let result = strip_double_at_prefix(content);
284        assert_eq!(result, content);
285    }
286
287    #[test]
288    fn strip_double_at_prefix_empty_at_line() {
289        // @@ alone becomes empty line
290        let content = "@@\nvisible";
291        let result = strip_double_at_prefix(content);
292        assert_eq!(result, "\nvisible");
293    }
294
295    #[test]
296    fn strip_double_at_prefix_at_in_middle_unchanged() {
297        // @@ in middle of line is NOT stripped (must be at start)
298        let content = "line with @@ in middle";
299        let result = strip_double_at_prefix(content);
300        assert_eq!(result, content);
301    }
302
303    #[test]
304    fn strip_double_at_prefix_multiple_at_lines() {
305        let content = "@@first\n@@second\nvisible\n@@third";
306        let result = strip_double_at_prefix(content);
307        assert_eq!(result, "first\nsecond\nvisible\nthird");
308    }
309
310    #[test]
311    fn strip_double_at_prefix_only_at_lines() {
312        let content = "@@line1\n@@line2";
313        let result = strip_double_at_prefix(content);
314        assert_eq!(result, "line1\nline2");
315    }
316
317    #[test]
318    fn strip_double_at_prefix_double_prefix_strips_one() {
319        // @@@@foo should become @@foo (only one @@ prefix stripped per line)
320        // This is intentional: if user writes @@@@, they want @@ in validation content
321        let content = "@@@@foo";
322        let result = strip_double_at_prefix(content);
323        assert_eq!(result, "@@foo");
324    }
325
326    #[test]
327    fn strip_double_at_prefix_mixed_leading_and_middle() {
328        // Only leading @@ should be stripped, @@ in middle of line stays
329        let content = "@@first line\nline with @@ middle\n@@another hidden";
330        let result = strip_double_at_prefix(content);
331        assert_eq!(result, "first line\nline with @@ middle\nanother hidden");
332    }
333
334    // ==================== validation_content tests ====================
335
336    #[test]
337    fn extracted_markers_validation_content_strips_at_prefix() {
338        let content = "@@SELECT 'hidden';\nSELECT 'visible';";
339        let markers = extract_markers(content);
340        assert_eq!(
341            markers.validation_content(),
342            "SELECT 'hidden';\nSELECT 'visible';"
343        );
344    }
345}