mdbook_validator/
parser.rs

1//! Markdown parsing and code block extraction
2
3/// Parses an info string from a fenced code block.
4///
5/// Returns `(language, validator, skip, hidden)` tuple.
6///
7/// # Examples
8///
9/// - `"sql validator=sqlite"` → `("sql", Some("sqlite"), false, false)`
10/// - `"rust"` → `("rust", None, false, false)`
11/// - `"sql validator=osquery skip"` → `("sql", Some("osquery"), true, false)`
12/// - `"sql validator=sqlite hidden"` → `("sql", Some("sqlite"), false, true)`
13#[must_use]
14pub fn parse_info_string(info: &str) -> (String, Option<String>, bool, bool) {
15    let parts: Vec<&str> = info.split_whitespace().collect();
16
17    let language = parts.first().map_or(String::new(), |s| (*s).to_owned());
18
19    let validator = parts
20        .iter()
21        .find_map(|part| part.strip_prefix("validator=").map(ToOwned::to_owned))
22        .filter(|v| !v.is_empty());
23
24    let skip = parts.contains(&"skip");
25    let hidden = parts.contains(&"hidden");
26
27    (language, validator, skip, hidden)
28}
29
30/// Result of extracting markers from code block content.
31#[derive(Debug, Clone, Default)]
32pub struct ExtractedMarkers {
33    /// Setup content from `<!--SETUP-->` marker
34    pub setup: Option<String>,
35    /// Assertions from `<!--ASSERT-->` marker
36    pub assertions: Option<String>,
37    /// Expected output from `<!--EXPECT-->` marker
38    pub expect: Option<String>,
39    /// The visible content (with all markers removed)
40    pub visible_content: String,
41}
42
43impl ExtractedMarkers {
44    /// Get content for validation (with `@@` prefix stripped but lines kept).
45    ///
46    /// This returns `visible_content` with the `@@` prefix removed from each line,
47    /// but the line content is preserved (unlike output which removes entire lines).
48    #[must_use]
49    pub fn validation_content(&self) -> String {
50        strip_double_at_prefix(&self.visible_content)
51    }
52}
53
54/// Extracts markers from code block content.
55///
56/// Parses `<!--SETUP-->`, `<!--ASSERT-->`, and `<!--EXPECT-->` blocks,
57/// returning their content and the remaining visible content.
58#[must_use]
59pub fn extract_markers(content: &str) -> ExtractedMarkers {
60    let mut result = ExtractedMarkers::default();
61    let mut remaining = content.to_owned();
62
63    // Extract SETUP block
64    if let Some((before, inner, after)) = extract_marker_block(&remaining, "<!--SETUP") {
65        result.setup = Some(inner);
66        remaining = format!("{before}{after}");
67    }
68
69    // Extract ASSERT block
70    if let Some((before, inner, after)) = extract_marker_block(&remaining, "<!--ASSERT") {
71        result.assertions = Some(inner);
72        remaining = format!("{before}{after}");
73    }
74
75    // Extract EXPECT block
76    if let Some((before, inner, after)) = extract_marker_block(&remaining, "<!--EXPECT") {
77        result.expect = Some(inner);
78        remaining = format!("{before}{after}");
79    }
80
81    // Trim leading/trailing whitespace from visible content
82    remaining.trim().clone_into(&mut result.visible_content);
83
84    result
85}
86
87/// Strips the `@@` prefix from lines while keeping the content.
88///
89/// This is used for validation content - `@@` lines should be validated
90/// but the `@@` prefix itself is not part of the syntax being validated.
91///
92/// # Examples
93///
94/// - `"@@SELECT 'hidden';\nSELECT 'visible';"` → `"SELECT 'hidden';\nSELECT 'visible';"`
95/// - `"@@\nvisible"` → `"\nvisible"` (empty @@ line becomes empty line)
96#[must_use]
97pub fn strip_double_at_prefix(content: &str) -> String {
98    content
99        .lines()
100        .map(|line| line.strip_prefix("@@").unwrap_or(line))
101        .collect::<Vec<_>>()
102        .join("\n")
103}
104
105/// Extracts content between a marker and `-->`.
106///
107/// Returns `(before, inner_content, after)` if found.
108fn extract_marker_block(content: &str, marker: &str) -> Option<(String, String, String)> {
109    let start = content.find(marker)?;
110    let marker_end = content[start..].find('\n').map(|i| start + i + 1)?;
111    let end_marker = content[marker_end..].find("-->")?;
112    let end = marker_end + end_marker;
113
114    let before = &content[..start];
115    let inner = content[marker_end..end].trim();
116    let after = &content[end + 3..]; // Skip "-->"
117
118    Some((before.to_owned(), inner.to_owned(), after.to_owned()))
119}
120
121#[cfg(test)]
122mod tests {
123    use super::*;
124
125    // ==================== parse_info_string tests ====================
126
127    #[test]
128    fn parse_info_string_language_only() {
129        let (lang, validator, skip, hidden) = parse_info_string("sql");
130        assert_eq!(lang, "sql");
131        assert_eq!(validator, None);
132        assert!(!skip);
133        assert!(!hidden);
134    }
135
136    #[test]
137    fn parse_info_string_with_validator() {
138        let (lang, validator, skip, hidden) = parse_info_string("sql validator=sqlite");
139        assert_eq!(lang, "sql");
140        assert_eq!(validator, Some("sqlite".to_owned()));
141        assert!(!skip);
142        assert!(!hidden);
143    }
144
145    #[test]
146    fn parse_info_string_with_skip() {
147        let (lang, validator, skip, hidden) = parse_info_string("sql validator=osquery skip");
148        assert_eq!(lang, "sql");
149        assert_eq!(validator, Some("osquery".to_owned()));
150        assert!(skip);
151        assert!(!hidden);
152    }
153
154    #[test]
155    fn parse_info_string_skip_without_validator() {
156        let (lang, validator, skip, hidden) = parse_info_string("bash skip");
157        assert_eq!(lang, "bash");
158        assert_eq!(validator, None);
159        assert!(skip);
160        assert!(!hidden);
161    }
162
163    #[test]
164    fn parse_info_string_empty() {
165        let (lang, validator, skip, hidden) = parse_info_string("");
166        assert_eq!(lang, "");
167        assert_eq!(validator, None);
168        assert!(!skip);
169        assert!(!hidden);
170    }
171
172    #[test]
173    fn parse_info_string_extra_whitespace() {
174        let (lang, validator, skip, hidden) =
175            parse_info_string("  sql   validator=sqlite   skip  ");
176        assert_eq!(lang, "sql");
177        assert_eq!(validator, Some("sqlite".to_owned()));
178        assert!(skip);
179        assert!(!hidden);
180    }
181
182    #[test]
183    fn parse_info_string_empty_validator_ignored() {
184        let (lang, validator, skip, hidden) = parse_info_string("sql validator=");
185        assert_eq!(lang, "sql");
186        assert_eq!(validator, None); // Empty validator is filtered out
187        assert!(!skip);
188        assert!(!hidden);
189    }
190
191    #[test]
192    fn parse_info_string_multiple_validators_takes_first() {
193        let (lang, validator, skip, hidden) =
194            parse_info_string("sql validator=first validator=second");
195        assert_eq!(lang, "sql");
196        assert_eq!(validator, Some("first".to_owned()));
197        assert!(!skip);
198        assert!(!hidden);
199    }
200
201    // ==================== hidden attribute tests ====================
202
203    #[test]
204    fn parse_info_string_with_hidden() {
205        let (lang, validator, skip, hidden) = parse_info_string("sql validator=sqlite hidden");
206        assert_eq!(lang, "sql");
207        assert_eq!(validator, Some("sqlite".to_owned()));
208        assert!(!skip);
209        assert!(hidden);
210    }
211
212    #[test]
213    fn parse_info_string_hidden_order_independent() {
214        let (lang, validator, skip, hidden) = parse_info_string("sql hidden validator=sqlite");
215        assert_eq!(lang, "sql");
216        assert_eq!(validator, Some("sqlite".to_owned()));
217        assert!(!skip);
218        assert!(hidden);
219    }
220
221    #[test]
222    fn parse_info_string_hidden_without_validator() {
223        let (lang, validator, skip, hidden) = parse_info_string("bash hidden");
224        assert_eq!(lang, "bash");
225        assert_eq!(validator, None);
226        assert!(!skip);
227        assert!(hidden);
228    }
229
230    #[test]
231    fn parse_info_string_skip_only() {
232        let (lang, validator, skip, hidden) = parse_info_string("sql skip");
233        assert_eq!(lang, "sql");
234        assert_eq!(validator, None);
235        assert!(skip);
236        assert!(!hidden);
237    }
238
239    #[test]
240    fn parse_info_string_neither_skip_nor_hidden() {
241        let (lang, validator, skip, hidden) = parse_info_string("sql");
242        assert_eq!(lang, "sql");
243        assert_eq!(validator, None);
244        assert!(!skip);
245        assert!(!hidden);
246    }
247
248    #[test]
249    fn parse_info_string_both_skip_and_hidden() {
250        // Parser returns both flags; mutual exclusivity checked at higher level
251        let (lang, validator, skip, hidden) = parse_info_string("sql validator=sqlite skip hidden");
252        assert_eq!(lang, "sql");
253        assert_eq!(validator, Some("sqlite".to_owned()));
254        assert!(skip);
255        assert!(hidden);
256    }
257
258    // ==================== extract_markers tests ====================
259
260    #[test]
261    fn extract_markers_setup_only() {
262        let content = "<!--SETUP\nCREATE TABLE test;\n-->\nSELECT * FROM test;";
263        let result = extract_markers(content);
264        assert_eq!(result.setup, Some("CREATE TABLE test;".to_owned()));
265        assert_eq!(result.assertions, None);
266        assert_eq!(result.expect, None);
267        assert_eq!(result.visible_content, "SELECT * FROM test;");
268    }
269
270    #[test]
271    fn extract_markers_assert_only() {
272        let content = "SELECT * FROM test;\n<!--ASSERT\nrows >= 1\n-->";
273        let result = extract_markers(content);
274        assert_eq!(result.setup, None);
275        assert_eq!(result.assertions, Some("rows >= 1".to_owned()));
276        assert_eq!(result.expect, None);
277        assert_eq!(result.visible_content, "SELECT * FROM test;");
278    }
279
280    #[test]
281    fn extract_markers_expect_only() {
282        let content = "SELECT 1;\n<!--EXPECT\n[{\"1\": 1}]\n-->";
283        let result = extract_markers(content);
284        assert_eq!(result.setup, None);
285        assert_eq!(result.assertions, None);
286        assert_eq!(result.expect, Some("[{\"1\": 1}]".to_owned()));
287        assert_eq!(result.visible_content, "SELECT 1;");
288    }
289
290    #[test]
291    fn extract_markers_all_three() {
292        let content = "<!--SETUP\nCREATE TABLE t;\n-->\nSELECT * FROM t;\n<!--ASSERT\nrows = 0\n-->\n<!--EXPECT\n[]\n-->";
293        let result = extract_markers(content);
294        assert_eq!(result.setup, Some("CREATE TABLE t;".to_owned()));
295        assert_eq!(result.assertions, Some("rows = 0".to_owned()));
296        assert_eq!(result.expect, Some("[]".to_owned()));
297        assert_eq!(result.visible_content, "SELECT * FROM t;");
298    }
299
300    #[test]
301    fn extract_markers_none() {
302        let content = "SELECT * FROM users;";
303        let result = extract_markers(content);
304        assert_eq!(result.setup, None);
305        assert_eq!(result.assertions, None);
306        assert_eq!(result.expect, None);
307        assert_eq!(result.visible_content, "SELECT * FROM users;");
308    }
309
310    #[test]
311    fn extract_markers_multiline_setup() {
312        let content = "<!--SETUP\nCREATE TABLE t (id INT);\nINSERT INTO t VALUES (1);\nINSERT INTO t VALUES (2);\n-->\nSELECT * FROM t;";
313        let result = extract_markers(content);
314        assert!(result.setup.is_some());
315        let setup = result.setup.unwrap();
316        assert!(setup.contains("CREATE TABLE"));
317        assert!(setup.contains("INSERT INTO t VALUES (1)"));
318        assert!(setup.contains("INSERT INTO t VALUES (2)"));
319    }
320
321    #[test]
322    fn extract_markers_multiline_assertions() {
323        let content = "SELECT * FROM t;\n<!--ASSERT\nrows >= 1\ncontains \"foo\"\n-->";
324        let result = extract_markers(content);
325        assert!(result.assertions.is_some());
326        let assertions = result.assertions.unwrap();
327        assert!(assertions.contains("rows >= 1"));
328        assert!(assertions.contains("contains \"foo\""));
329    }
330
331    #[test]
332    fn extract_markers_preserves_visible_content_order() {
333        let content = "-- First line\n<!--SETUP\nsetup;\n-->\n-- Second line\nSELECT 1;";
334        let result = extract_markers(content);
335        assert!(result.visible_content.contains("First line"));
336        assert!(result.visible_content.contains("Second line"));
337        assert!(result.visible_content.contains("SELECT 1"));
338    }
339
340    // ==================== strip_double_at_prefix tests ====================
341
342    #[test]
343    fn strip_double_at_prefix_strips_prefix() {
344        let content = "@@SELECT 'hidden';\nSELECT 'visible';";
345        let result = strip_double_at_prefix(content);
346        assert_eq!(result, "SELECT 'hidden';\nSELECT 'visible';");
347    }
348
349    #[test]
350    fn strip_double_at_prefix_preserves_lines_without_prefix() {
351        let content = "SELECT 'visible';\nSELECT 'also visible';";
352        let result = strip_double_at_prefix(content);
353        assert_eq!(result, content);
354    }
355
356    #[test]
357    fn strip_double_at_prefix_empty_at_line() {
358        // @@ alone becomes empty line
359        let content = "@@\nvisible";
360        let result = strip_double_at_prefix(content);
361        assert_eq!(result, "\nvisible");
362    }
363
364    #[test]
365    fn strip_double_at_prefix_at_in_middle_unchanged() {
366        // @@ in middle of line is NOT stripped (must be at start)
367        let content = "line with @@ in middle";
368        let result = strip_double_at_prefix(content);
369        assert_eq!(result, content);
370    }
371
372    #[test]
373    fn strip_double_at_prefix_multiple_at_lines() {
374        let content = "@@first\n@@second\nvisible\n@@third";
375        let result = strip_double_at_prefix(content);
376        assert_eq!(result, "first\nsecond\nvisible\nthird");
377    }
378
379    #[test]
380    fn strip_double_at_prefix_only_at_lines() {
381        let content = "@@line1\n@@line2";
382        let result = strip_double_at_prefix(content);
383        assert_eq!(result, "line1\nline2");
384    }
385
386    #[test]
387    fn strip_double_at_prefix_double_prefix_strips_one() {
388        // @@@@foo should become @@foo (only one @@ prefix stripped per line)
389        // This is intentional: if user writes @@@@, they want @@ in validation content
390        let content = "@@@@foo";
391        let result = strip_double_at_prefix(content);
392        assert_eq!(result, "@@foo");
393    }
394
395    #[test]
396    fn strip_double_at_prefix_mixed_leading_and_middle() {
397        // Only leading @@ should be stripped, @@ in middle of line stays
398        let content = "@@first line\nline with @@ middle\n@@another hidden";
399        let result = strip_double_at_prefix(content);
400        assert_eq!(result, "first line\nline with @@ middle\nanother hidden");
401    }
402
403    // ==================== validation_content tests ====================
404
405    #[test]
406    fn extracted_markers_validation_content_strips_at_prefix() {
407        let content = "@@SELECT 'hidden';\nSELECT 'visible';";
408        let markers = extract_markers(content);
409        assert_eq!(
410            markers.validation_content(),
411            "SELECT 'hidden';\nSELECT 'visible';"
412        );
413    }
414}