mdbook_validator/
preprocessor.rs

1//! mdBook preprocessor implementation
2//!
3//! Bridges the synchronous mdBook Preprocessor trait to async container validation.
4
5use tracing::{debug, info, trace};
6
7// Default exec commands for validators when not configured
8const DEFAULT_EXEC_SQLITE: &str = "sqlite3 -json /tmp/test.db";
9const DEFAULT_EXEC_OSQUERY: &str = "osqueryi --json";
10const DEFAULT_EXEC_FALLBACK: &str = "cat";
11
12use std::collections::hash_map::Entry;
13use std::collections::HashMap;
14use std::fmt::Write;
15use std::path::Path;
16
17use mdbook_preprocessor::book::{Book, BookItem, Chapter};
18use mdbook_preprocessor::errors::Error;
19use mdbook_preprocessor::{Preprocessor, PreprocessorContext};
20use pulldown_cmark::{CodeBlockKind, Event, Parser, Tag, TagEnd};
21
22use crate::command::RealCommandRunner;
23use crate::config::{Config, ValidatorConfig};
24use crate::container::ValidatorContainer;
25use crate::error::ValidatorError;
26use crate::host_validator;
27use crate::parser::{extract_markers, parse_info_string, ExtractedMarkers};
28use crate::transpiler::strip_markers;
29
30/// The mdbook-validator preprocessor
31pub struct ValidatorPreprocessor;
32
33impl ValidatorPreprocessor {
34    /// Create a new preprocessor instance
35    #[must_use]
36    pub fn new() -> Self {
37        Self
38    }
39}
40
41impl Default for ValidatorPreprocessor {
42    fn default() -> Self {
43        Self::new()
44    }
45}
46
47impl Preprocessor for ValidatorPreprocessor {
48    fn name(&self) -> &'static str {
49        "validator"
50    }
51
52    fn run(&self, ctx: &PreprocessorContext, mut book: Book) -> Result<Book, Error> {
53        // Parse config from book.toml
54        let config = Config::from_context(ctx)
55            .map_err(|e| Error::msg(format!("Failed to parse config: {e}")))?;
56
57        // Create tokio runtime for async->sync bridge
58        let rt = tokio::runtime::Builder::new_current_thread()
59            .enable_all()
60            .build()
61            .map_err(|e| Error::msg(format!("Failed to create tokio runtime: {e}")))?;
62
63        rt.block_on(async {
64            self.run_async_with_config(&mut book, &config, &ctx.root)
65                .await
66        })?;
67
68        Ok(book)
69    }
70
71    fn supports_renderer(&self, renderer: &str) -> Result<bool, anyhow::Error> {
72        // Support all renderers - we validate and strip markers,
73        // producing valid markdown for any output format
74        let _ = renderer;
75        Ok(true)
76    }
77}
78
79impl ValidatorPreprocessor {
80    /// Process a book with a custom validator script.
81    ///
82    /// This is primarily for testing different validator behaviors.
83    /// Uses the default Alpine container with the provided script.
84    pub fn process_book_with_script(
85        &self,
86        mut book: Book,
87        validator_script: &[u8],
88    ) -> Result<Book, Error> {
89        let rt = tokio::runtime::Builder::new_current_thread()
90            .enable_all()
91            .build()
92            .map_err(|e| Error::msg(format!("Failed to create tokio runtime: {e}")))?;
93
94        rt.block_on(async {
95            self.run_async_with_script(&mut book, validator_script)
96                .await
97        })?;
98
99        Ok(book)
100    }
101
102    /// Process a book with explicit config (for testing).
103    ///
104    /// Allows testing with a custom config without needing a full `PreprocessorContext`.
105    pub fn process_book_with_config(
106        &self,
107        mut book: Book,
108        config: &Config,
109        book_root: &Path,
110    ) -> Result<Book, Error> {
111        let rt = tokio::runtime::Builder::new_current_thread()
112            .enable_all()
113            .build()
114            .map_err(|e| Error::msg(format!("Failed to create tokio runtime: {e}")))?;
115
116        rt.block_on(async {
117            self.run_async_with_config(&mut book, config, book_root)
118                .await
119        })?;
120
121        Ok(book)
122    }
123
124    /// Run with explicit config - starts per-validator containers.
125    async fn run_async_with_config(
126        &self,
127        book: &mut Book,
128        config: &Config,
129        book_root: &Path,
130    ) -> Result<(), Error> {
131        // Cache started containers by validator name
132        let mut containers: HashMap<String, ValidatorContainer> = HashMap::new();
133
134        for item in &mut book.items {
135            self.process_book_item_with_config(item, config, book_root, &mut containers)
136                .await?;
137        }
138
139        Ok(())
140    }
141
142    /// Run with default script (for testing without config).
143    async fn run_async_with_script(
144        &self,
145        book: &mut Book,
146        validator_script: &[u8],
147    ) -> Result<(), Error> {
148        let container = ValidatorContainer::start(validator_script)
149            .await
150            .map_err(|e| Error::msg(format!("Failed to start container: {e}")))?;
151
152        for item in &mut book.items {
153            self.process_book_item(item, &container).await?;
154        }
155
156        Ok(())
157    }
158
159    async fn process_book_item(
160        &self,
161        item: &mut BookItem,
162        container: &ValidatorContainer,
163    ) -> Result<(), Error> {
164        if let BookItem::Chapter(chapter) = item {
165            self.process_chapter(chapter, container).await?;
166
167            // Process sub-items recursively
168            for sub_item in &mut chapter.sub_items {
169                Box::pin(self.process_book_item(sub_item, container)).await?;
170            }
171        }
172        Ok(())
173    }
174
175    async fn process_book_item_with_config(
176        &self,
177        item: &mut BookItem,
178        config: &Config,
179        book_root: &Path,
180        containers: &mut HashMap<String, ValidatorContainer>,
181    ) -> Result<(), Error> {
182        if let BookItem::Chapter(chapter) = item {
183            self.process_chapter_with_config(chapter, config, book_root, containers)
184                .await?;
185
186            // Process sub-items recursively
187            for sub_item in &mut chapter.sub_items {
188                Box::pin(
189                    self.process_book_item_with_config(sub_item, config, book_root, containers),
190                )
191                .await?;
192            }
193        }
194        Ok(())
195    }
196
197    async fn process_chapter(
198        &self,
199        chapter: &mut Chapter,
200        container: &ValidatorContainer,
201    ) -> Result<(), Error> {
202        if chapter.content.is_empty() {
203            return Ok(());
204        }
205
206        // Collect all code blocks that need validation
207        let blocks = Self::find_validator_blocks(&chapter.content);
208
209        if blocks.is_empty() {
210            return Ok(());
211        }
212
213        // Validate each block
214        for block in &blocks {
215            if block.skip {
216                continue;
217            }
218
219            let validation_content = block.markers.validation_content();
220            let result = container
221                .exec_with_env(
222                    block.markers.setup.as_deref(),
223                    &validation_content,
224                    block.markers.assertions.as_deref(),
225                    block.markers.expect.as_deref(),
226                )
227                .await
228                .map_err(|e| {
229                    Error::msg(format!(
230                        "Validation exec failed in '{}': {}",
231                        chapter.name, e
232                    ))
233                })?;
234
235            if result.exit_code != 0 {
236                let mut error_msg = format!(
237                    "Validation failed in '{}' (exit code {}):\n\nCode:\n{}\n",
238                    chapter.name, result.exit_code, block.markers.visible_content
239                );
240                if !result.stderr.is_empty() {
241                    let _ = write!(error_msg, "\nValidator stderr:\n{}", result.stderr);
242                }
243                if !result.stdout.is_empty() {
244                    let _ = write!(error_msg, "\nValidator stdout:\n{}", result.stdout);
245                }
246                return Err(Error::msg(error_msg));
247            }
248        }
249
250        // All validations passed - strip markers from chapter content
251        chapter.content = Self::strip_markers_from_chapter(&chapter.content);
252
253        Ok(())
254    }
255
256    async fn process_chapter_with_config(
257        &self,
258        chapter: &mut Chapter,
259        config: &Config,
260        book_root: &Path,
261        containers: &mut HashMap<String, ValidatorContainer>,
262    ) -> Result<(), Error> {
263        if chapter.content.is_empty() {
264            return Ok(());
265        }
266
267        // Collect all code blocks that need validation
268        let blocks = Self::find_validator_blocks(&chapter.content);
269
270        if blocks.is_empty() {
271            return Ok(());
272        }
273
274        info!(chapter = %chapter.name, blocks = blocks.len(), "Validating");
275
276        // Check for mutually exclusive attributes (fail fast)
277        for block in &blocks {
278            if block.skip && block.hidden {
279                return Err(Error::new(ValidatorError::MutuallyExclusiveAttributes));
280            }
281        }
282
283        // Validate each block using configured validator
284        for (idx, block) in blocks.iter().enumerate() {
285            if block.skip {
286                debug!(block = idx + 1, validator = %block.validator_name, "Skipping (skip=true)");
287                continue;
288            }
289
290            debug!(block = idx + 1, validator = %block.validator_name, "Validating block");
291
292            // Get validator config
293            let validator_config = config.get_validator(&block.validator_name).map_err(|e| {
294                Error::msg(format!(
295                    "Unknown validator '{}': {}",
296                    block.validator_name, e
297                ))
298            })?;
299
300            // Get or start container for this validator
301            let container = self
302                .get_or_start_container(&block.validator_name, config, book_root, containers)
303                .await?;
304
305            // Use host-based validation: run query in container, validate on host
306            self.validate_block_host_based(
307                container,
308                validator_config,
309                block,
310                &chapter.name,
311                book_root,
312            )
313            .await?;
314        }
315
316        // All validations passed - strip markers from chapter content
317        chapter.content = Self::strip_markers_from_chapter(&chapter.content);
318
319        info!(chapter = %chapter.name, "✓ Passed");
320
321        Ok(())
322    }
323
324    /// Validate a code block using host-based validation.
325    ///
326    /// This runs the query in the container and validates the output on the host.
327    async fn validate_block_host_based(
328        &self,
329        container: &ValidatorContainer,
330        validator_config: &ValidatorConfig,
331        block: &ValidatorBlock,
332        chapter_name: &str,
333        book_root: &Path,
334    ) -> Result<(), Error> {
335        // 0. Verify validator script exists first (fail fast before container work)
336        let script_path = book_root.join(&validator_config.script);
337        if !script_path.exists() {
338            return Err(Error::msg(format!(
339                "Failed to read validator script '{}': file not found",
340                script_path.display()
341            )));
342        }
343
344        debug!(script = %script_path.display(), "Using validator script");
345
346        // Get exec command (use defaults if not configured)
347        let exec_cmd = Self::get_exec_command(&block.validator_name, validator_config);
348        debug!(exec_command = %exec_cmd, "Container exec command");
349
350        // 1. Run setup script in container (if any)
351        // SETUP content IS the shell command - run directly via sh -c
352        if let Some(setup) = &block.markers.setup {
353            let setup_script = setup.trim();
354            if !setup_script.is_empty() {
355                debug!("Running SETUP script");
356                trace!(setup = %setup_script, "SETUP content");
357                let setup_result = container
358                    .exec_raw(&["sh", "-c", setup_script])
359                    .await
360                    .map_err(|e| Error::msg(format!("Setup exec failed: {e}")))?;
361
362                if setup_result.exit_code != 0 {
363                    #[allow(clippy::cast_possible_truncation)]
364                    return Err(ValidatorError::SetupFailed {
365                        exit_code: setup_result.exit_code as i32,
366                        message: format!(
367                            "in '{}' (validator: {}):\n\nScript:\n{}\n\nError:\n{}",
368                            chapter_name, block.validator_name, setup_script, setup_result.stderr
369                        ),
370                    }
371                    .into());
372                }
373            }
374        }
375
376        // 2. Run query in container, get JSON output
377        // Content is passed via stdin to avoid shell injection
378        // Use validation_content() to strip @@ prefix (but keep line content)
379        let query_sql = block.markers.validation_content();
380        let query_sql = query_sql.trim();
381        if query_sql.is_empty() {
382            return Err(Error::msg(format!(
383                "Validation failed in '{}' (validator: {}): Query content is empty",
384                chapter_name, block.validator_name
385            )));
386        }
387
388        debug!("Executing query in container");
389        trace!(query = %query_sql, "Query content");
390
391        // Pass content via stdin (secure) instead of shell interpolation (vulnerable)
392        let query_result = container
393            .exec_with_stdin(&["sh", "-c", &exec_cmd], query_sql)
394            .await
395            .map_err(|e| Error::msg(format!("Query exec failed: {e}")))?;
396
397        trace!(exit_code = query_result.exit_code, stdout = %query_result.stdout, stderr = %query_result.stderr, "Query result");
398
399        if query_result.exit_code != 0 {
400            return Err(Error::msg(format!(
401                "Query failed in '{}' (validator: {}):\n\nSQL:\n{}\n\nError:\n{}",
402                chapter_name, block.validator_name, query_sql, query_result.stderr
403            )));
404        }
405
406        // 3. Validate JSON output on host using validator script
407        // (script_path already validated at the start of this function)
408        let script_path_str = script_path
409            .to_str()
410            .ok_or_else(|| Error::msg(format!("Invalid script path: {}", script_path.display())))?;
411
412        debug!("Running host validator");
413        let validation_result = host_validator::run_validator(
414            &RealCommandRunner,
415            script_path_str,
416            &query_result.stdout,
417            block.markers.assertions.as_deref(),
418            block.markers.expect.as_deref(),
419            Some(&query_result.stderr), // Pass container stderr for warning detection
420        )
421        .map_err(|e| {
422            Error::msg(format!(
423                "Host validator failed in '{}' (validator: {}): {}",
424                chapter_name, block.validator_name, e
425            ))
426        })?;
427
428        trace!(exit_code = validation_result.exit_code, stdout = %validation_result.stdout, stderr = %validation_result.stderr, "Validator result");
429
430        if validation_result.exit_code != 0 {
431            let mut error_msg = format!(
432                "in '{}' (validator: {}):\n\nCode:\n{}\n",
433                chapter_name, block.validator_name, block.markers.visible_content
434            );
435            if !validation_result.stderr.is_empty() {
436                let _ = write!(
437                    error_msg,
438                    "\nValidator stderr:\n{}",
439                    validation_result.stderr
440                );
441            }
442            if !validation_result.stdout.is_empty() {
443                let _ = write!(
444                    error_msg,
445                    "\nValidator stdout:\n{}",
446                    validation_result.stdout
447                );
448            }
449            return Err(ValidatorError::ValidationFailed {
450                exit_code: validation_result.exit_code,
451                message: error_msg,
452            }
453            .into());
454        }
455
456        Ok(())
457    }
458
459    /// Get exec command for a validator.
460    ///
461    /// Uses configured command if available, otherwise uses defaults based on validator name.
462    fn get_exec_command(validator_name: &str, config: &ValidatorConfig) -> String {
463        config
464            .exec_command
465            .clone()
466            .unwrap_or_else(|| match validator_name {
467                "sqlite" => DEFAULT_EXEC_SQLITE.to_owned(),
468                "osquery" => DEFAULT_EXEC_OSQUERY.to_owned(),
469                _ => DEFAULT_EXEC_FALLBACK.to_owned(),
470            })
471    }
472
473    /// Get an existing container or start a new one for the given validator.
474    async fn get_or_start_container<'a>(
475        &self,
476        validator_name: &str,
477        config: &Config,
478        book_root: &Path,
479        containers: &'a mut HashMap<String, ValidatorContainer>,
480    ) -> Result<&'a ValidatorContainer, Error> {
481        match containers.entry(validator_name.to_owned()) {
482            Entry::Occupied(entry) => Ok(entry.into_mut()),
483            Entry::Vacant(entry) => {
484                // Look up validator config
485                let validator_config = config.get_validator(validator_name).map_err(|e| {
486                    Error::msg(format!("Unknown validator '{validator_name}': {e}"))
487                })?;
488
489                // Validate config values
490                validator_config.validate(validator_name)?;
491
492                // Resolve and validate fixtures_dir if configured
493                let mount = if let Some(ref fixtures_dir) = config.fixtures_dir {
494                    // Resolve relative path from book_root
495                    let fixtures_path = if fixtures_dir.is_absolute() {
496                        fixtures_dir.clone()
497                    } else {
498                        book_root.join(fixtures_dir)
499                    };
500
501                    // Validate fixtures_dir exists and is a directory
502                    if !fixtures_path.exists() {
503                        return Err(Error::msg(format!(
504                            "fixtures_dir '{}' does not exist",
505                            fixtures_path.display()
506                        )));
507                    }
508                    if !fixtures_path.is_dir() {
509                        return Err(Error::msg(format!(
510                            "fixtures_dir '{}' is not a directory",
511                            fixtures_path.display()
512                        )));
513                    }
514
515                    Some((fixtures_path, "/fixtures"))
516                } else {
517                    None
518                };
519
520                // Start the container with optional mount
521                let container = ValidatorContainer::start_raw_with_mount(
522                    &validator_config.container,
523                    mount.as_ref().map(|(p, c)| (p.as_path(), *c)),
524                )
525                .await
526                .map_err(|e| {
527                    Error::msg(format!(
528                        "Failed to start container '{}': {}",
529                        validator_config.container, e
530                    ))
531                })?;
532
533                Ok(entry.insert(container))
534            }
535        }
536    }
537
538    /// Find all code blocks with `validator=` attribute
539    fn find_validator_blocks(content: &str) -> Vec<ValidatorBlock> {
540        let mut blocks = Vec::new();
541        let parser = Parser::new(content);
542
543        let mut in_code_block = false;
544        let mut current_info = String::new();
545        let mut current_content = String::new();
546
547        for event in parser {
548            match event {
549                Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(info))) => {
550                    in_code_block = true;
551                    current_info = info.to_string();
552                    current_content.clear();
553                }
554                Event::Text(text) if in_code_block => {
555                    current_content.push_str(&text);
556                }
557                Event::End(TagEnd::CodeBlock) if in_code_block => {
558                    in_code_block = false;
559
560                    let (_language, validator, skip, hidden) = parse_info_string(&current_info);
561
562                    // Only process blocks with validator= attribute
563                    if let Some(validator_name) = validator {
564                        // Handle empty validator= as "no validator"
565                        if !validator_name.is_empty() {
566                            let markers = extract_markers(&current_content);
567                            blocks.push(ValidatorBlock {
568                                validator_name,
569                                markers,
570                                skip,
571                                hidden,
572                            });
573                        }
574                    }
575                }
576                _ => {}
577            }
578        }
579
580        blocks
581    }
582
583    /// Strip all validation markers from chapter content, preserving code block structure.
584    ///
585    /// Uses span-based editing to surgically modify only code block contents,
586    /// preserving ALL other markdown formatting (lists, links, emphasis, etc.).
587    ///
588    /// If a code block has the `hidden` attribute, the entire fence is removed from output.
589    fn strip_markers_from_chapter(content: &str) -> String {
590        use std::ops::Range;
591
592        // Represents an edit to apply to the source
593        enum Edit {
594            /// Replace a range with new content (for stripping markers)
595            Replace {
596                range: Range<usize>,
597                content: String,
598            },
599            /// Delete a range entirely (for hidden blocks)
600            Delete { range: Range<usize> },
601        }
602
603        let mut edits: Vec<Edit> = Vec::new();
604        let parser = Parser::new(content).into_offset_iter();
605
606        let mut current_block_start: Option<usize> = None;
607        let mut current_hidden = false;
608        let mut current_has_validator = false;
609        let mut current_content_range: Option<Range<usize>> = None;
610
611        for (event, range) in parser {
612            match &event {
613                Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(info))) => {
614                    let (_language, validator, _skip, hidden) = parse_info_string(info);
615                    current_hidden = hidden;
616                    current_has_validator = validator.is_some();
617                    current_block_start = Some(range.start);
618                    current_content_range = None;
619                }
620                Event::Text(_) if current_block_start.is_some() => {
621                    // Track the content range within the code block
622                    current_content_range = Some(range);
623                }
624                Event::End(TagEnd::CodeBlock) if current_block_start.is_some() => {
625                    let Some(block_start) = current_block_start.take() else {
626                        unreachable!("current_block_start must be Some here")
627                    };
628
629                    if current_hidden {
630                        // Delete the entire code block (including surrounding whitespace)
631                        // Find the start of the line containing the opening fence
632                        let line_start = content[..block_start].rfind('\n').map_or(0, |i| i + 1);
633                        // Find the end of the line containing the closing fence
634                        let line_end = content[range.end..]
635                            .find('\n')
636                            .map_or(range.end, |i| range.end + i + 1);
637
638                        edits.push(Edit::Delete {
639                            range: line_start..line_end,
640                        });
641                    } else if current_has_validator {
642                        // Strip markers from the content, but preserve the fence
643                        if let Some(content_range) = current_content_range.take() {
644                            let original_content = &content[content_range.clone()];
645                            let stripped = strip_markers(original_content);
646                            let trimmed = stripped.trim();
647                            if trimmed != original_content.trim() {
648                                // Only create an edit if content actually changed
649                                edits.push(Edit::Replace {
650                                    range: content_range,
651                                    content: format!("{trimmed}\n"),
652                                });
653                            }
654                        }
655                    }
656
657                    current_hidden = false;
658                    current_has_validator = false;
659                }
660                _ => {}
661            }
662        }
663
664        // Apply edits from end to start to preserve byte offsets
665        edits.sort_by(|a, b| {
666            let a_start = match a {
667                Edit::Replace { range, .. } | Edit::Delete { range } => range.start,
668            };
669            let b_start = match b {
670                Edit::Replace { range, .. } | Edit::Delete { range } => range.start,
671            };
672            b_start.cmp(&a_start) // Reverse order (end to start)
673        });
674
675        let mut result = content.to_owned();
676        for edit in edits {
677            match edit {
678                Edit::Replace { range, content } => {
679                    result.replace_range(range, &content);
680                }
681                Edit::Delete { range } => {
682                    result.replace_range(range, "");
683                }
684            }
685        }
686
687        // Clean up any excessive blank lines left by deletions
688        Self::normalize_blank_lines(&result)
689    }
690
691    /// Normalize blank lines: collapse 3+ consecutive newlines to 2, trim edges
692    fn normalize_blank_lines(content: &str) -> String {
693        let mut result = String::with_capacity(content.len());
694        let mut consecutive_newlines = 0;
695
696        for ch in content.chars() {
697            if ch == '\n' {
698                consecutive_newlines += 1;
699                if consecutive_newlines <= 2 {
700                    result.push(ch);
701                }
702            } else {
703                consecutive_newlines = 0;
704                result.push(ch);
705            }
706        }
707
708        result.trim().to_owned()
709    }
710}
711
712/// A code block that requires validation
713struct ValidatorBlock {
714    /// Name of the validator (e.g., "osquery", "sqlite")
715    validator_name: String,
716    /// Extracted markers from the code block
717    markers: ExtractedMarkers,
718    /// Whether to skip validation
719    skip: bool,
720    /// Whether to hide the block from output (but still validate)
721    hidden: bool,
722}
723
724#[cfg(test)]
725#[allow(clippy::needless_raw_string_hashes)]
726mod tests {
727    use super::*;
728
729    // ==================== strip_markers_from_chapter hidden block tests ====================
730
731    #[test]
732    fn strip_markers_from_chapter_removes_hidden_block() {
733        let content = r#"Some text
734
735```sql validator=sqlite hidden
736SELECT 1;
737```
738
739More text"#;
740        let result = ValidatorPreprocessor::strip_markers_from_chapter(content);
741        // Hidden block should be completely removed
742        assert!(!result.contains("SELECT 1"));
743        assert!(!result.contains("```sql"));
744        assert!(result.contains("Some text"));
745        assert!(result.contains("More text"));
746    }
747
748    #[test]
749    fn strip_markers_from_chapter_keeps_non_hidden_block() {
750        let content = r#"Some text
751
752```sql validator=sqlite
753SELECT 1;
754```
755
756More text"#;
757        let result = ValidatorPreprocessor::strip_markers_from_chapter(content);
758        // Non-hidden block should be kept (with markers stripped)
759        assert!(result.contains("SELECT 1"));
760        assert!(result.contains("```sql"));
761        assert!(result.contains("Some text"));
762        assert!(result.contains("More text"));
763    }
764
765    #[test]
766    fn strip_markers_from_chapter_mixed_hidden_and_non_hidden() {
767        let content = r#"Start
768
769```sql validator=sqlite hidden
770HIDDEN QUERY;
771```
772
773Middle
774
775```sql validator=sqlite
776VISIBLE QUERY;
777```
778
779End"#;
780        let result = ValidatorPreprocessor::strip_markers_from_chapter(content);
781        // Hidden block removed, non-hidden kept
782        assert!(!result.contains("HIDDEN QUERY"));
783        assert!(result.contains("VISIBLE QUERY"));
784        assert!(result.contains("Start"));
785        assert!(result.contains("Middle"));
786        assert!(result.contains("End"));
787    }
788
789    #[test]
790    fn strip_markers_from_chapter_adjacent_hidden_blocks() {
791        let content = r#"Start
792
793```sql validator=sqlite hidden
794HIDDEN 1;
795```
796
797```sql validator=sqlite hidden
798HIDDEN 2;
799```
800
801End"#;
802        let result = ValidatorPreprocessor::strip_markers_from_chapter(content);
803        // Both hidden blocks should be removed
804        assert!(!result.contains("HIDDEN 1"));
805        assert!(!result.contains("HIDDEN 2"));
806        assert!(result.contains("Start"));
807        assert!(result.contains("End"));
808    }
809
810    #[test]
811    fn strip_markers_from_chapter_hidden_block_at_start() {
812        let content = r#"```sql validator=sqlite hidden
813HIDDEN;
814```
815
816Visible content"#;
817        let result = ValidatorPreprocessor::strip_markers_from_chapter(content);
818        // Hidden block at start should not leave leading whitespace
819        assert!(!result.contains("HIDDEN"));
820        assert!(result.contains("Visible content"));
821        // Should not start with blank lines
822        assert!(!result.starts_with('\n'));
823    }
824
825    #[test]
826    fn strip_markers_from_chapter_hidden_block_at_end() {
827        let content = r#"Visible content
828
829```sql validator=sqlite hidden
830HIDDEN;
831```"#;
832        let result = ValidatorPreprocessor::strip_markers_from_chapter(content);
833        // Hidden block at end should not leave trailing whitespace
834        assert!(!result.contains("HIDDEN"));
835        assert!(result.contains("Visible content"));
836        // Should not end with excessive blank lines
837        assert!(!result.ends_with("\n\n"));
838    }
839
840    #[test]
841    fn strip_markers_from_chapter_only_hidden_block() {
842        let content = r#"```sql validator=sqlite hidden
843HIDDEN;
844```"#;
845        let result = ValidatorPreprocessor::strip_markers_from_chapter(content);
846        // Single hidden block should result in empty output
847        assert!(!result.contains("HIDDEN"));
848        assert!(result.is_empty() || result.trim().is_empty());
849    }
850
851    #[test]
852    fn strip_markers_from_chapter_hidden_with_markers() {
853        let content = r#"Text
854
855```sql validator=sqlite hidden
856<!--SETUP
857CREATE TABLE t;
858-->
859SELECT * FROM t;
860<!--ASSERT
861rows >= 1
862-->
863```
864
865More text"#;
866        let result = ValidatorPreprocessor::strip_markers_from_chapter(content);
867        // Hidden block with markers should be completely removed
868        assert!(!result.contains("SETUP"));
869        assert!(!result.contains("ASSERT"));
870        assert!(!result.contains("CREATE TABLE"));
871        assert!(!result.contains("SELECT"));
872        assert!(result.contains("Text"));
873        assert!(result.contains("More text"));
874    }
875
876    // ==================== Regression tests for markdown preservation ====================
877    // These tests ensure that strip_markers_from_chapter preserves all markdown formatting
878    // that exists OUTSIDE of code blocks with validator= attributes.
879
880    #[test]
881    fn strip_markers_preserves_lists() {
882        let content = r#"# Chapter
883
884Some text:
885
886- Item one
887- Item two
888- Item three
889
890### Next Section
891
892More text."#;
893        let result = ValidatorPreprocessor::strip_markers_from_chapter(content);
894        // Lists must be preserved exactly
895        assert!(
896            result.contains("- Item one"),
897            "List items must be preserved"
898        );
899        assert!(
900            result.contains("- Item two"),
901            "List items must be preserved"
902        );
903        assert!(
904            result.contains("- Item three"),
905            "List items must be preserved"
906        );
907        assert!(
908            result.contains("### Next Section"),
909            "Headings must be preserved"
910        );
911    }
912
913    #[test]
914    fn strip_markers_preserves_lists_with_code_block() {
915        let content = r#"# Chapter
916
917Some text:
918
919- Item one
920- Item two
921- Item three
922
923```sql validator=sqlite
924<!--SETUP
925CREATE TABLE t;
926-->
927SELECT 1;
928```
929
930### Next Section
931
932More text."#;
933        let result = ValidatorPreprocessor::strip_markers_from_chapter(content);
934        // Lists must be preserved
935        assert!(
936            result.contains("- Item one"),
937            "List items must be preserved"
938        );
939        assert!(
940            result.contains("- Item two"),
941            "List items must be preserved"
942        );
943        assert!(
944            result.contains("- Item three"),
945            "List items must be preserved"
946        );
947        // Code block content stripped of markers but preserved
948        assert!(result.contains("SELECT 1"), "Code block content preserved");
949        assert!(!result.contains("SETUP"), "Markers stripped");
950        assert!(!result.contains("CREATE TABLE"), "Setup content stripped");
951        // Headings preserved
952        assert!(
953            result.contains("### Next Section"),
954            "Headings must be preserved"
955        );
956    }
957
958    #[test]
959    fn strip_markers_preserves_numbered_lists() {
960        let content = r#"Steps:
961
9621. First step
9632. Second step
9643. Third step
965
966Done."#;
967        let result = ValidatorPreprocessor::strip_markers_from_chapter(content);
968        assert!(
969            result.contains("1. First step"),
970            "Numbered lists must be preserved"
971        );
972        assert!(
973            result.contains("2. Second step"),
974            "Numbered lists must be preserved"
975        );
976        assert!(
977            result.contains("3. Third step"),
978            "Numbered lists must be preserved"
979        );
980    }
981
982    #[test]
983    fn strip_markers_preserves_blockquotes() {
984        let content = r#"Quote:
985
986> This is a blockquote
987> with multiple lines
988
989End."#;
990        let result = ValidatorPreprocessor::strip_markers_from_chapter(content);
991        assert!(
992            result.contains("> This is a blockquote"),
993            "Blockquotes must be preserved"
994        );
995    }
996
997    #[test]
998    fn strip_markers_preserves_links() {
999        let content = r#"See [the documentation](https://example.com) for details.
1000
1001And [another link](https://other.com)."#;
1002        let result = ValidatorPreprocessor::strip_markers_from_chapter(content);
1003        assert!(
1004            result.contains("[the documentation](https://example.com)"),
1005            "Links must be preserved"
1006        );
1007        assert!(
1008            result.contains("[another link](https://other.com)"),
1009            "Links must be preserved"
1010        );
1011    }
1012
1013    #[test]
1014    fn strip_markers_preserves_inline_code() {
1015        let content = r#"Use the `SELECT` statement to query data.
1016
1017Also `INSERT` works."#;
1018        let result = ValidatorPreprocessor::strip_markers_from_chapter(content);
1019        assert!(result.contains("`SELECT`"), "Inline code must be preserved");
1020        assert!(result.contains("`INSERT`"), "Inline code must be preserved");
1021    }
1022
1023    #[test]
1024    fn strip_markers_preserves_emphasis() {
1025        let content = r#"This is *italic* and **bold** text.
1026
1027Also _underscores_ and __double__."#;
1028        let result = ValidatorPreprocessor::strip_markers_from_chapter(content);
1029        assert!(result.contains("*italic*"), "Italic must be preserved");
1030        assert!(result.contains("**bold**"), "Bold must be preserved");
1031    }
1032
1033    #[test]
1034    fn strip_markers_preserves_tables() {
1035        let content = r#"| Column A | Column B |
1036|----------|----------|
1037| Value 1  | Value 2  |
1038| Value 3  | Value 4  |"#;
1039        let result = ValidatorPreprocessor::strip_markers_from_chapter(content);
1040        assert!(
1041            result.contains("| Column A | Column B |"),
1042            "Tables must be preserved"
1043        );
1044        assert!(
1045            result.contains("| Value 1  | Value 2  |"),
1046            "Table rows must be preserved"
1047        );
1048    }
1049
1050    #[test]
1051    fn strip_markers_preserves_code_blocks_without_validator() {
1052        let content = r#"Regular code:
1053
1054```python
1055def hello():
1056    print("world")
1057```
1058
1059End."#;
1060        let result = ValidatorPreprocessor::strip_markers_from_chapter(content);
1061        assert!(result.contains("```python"), "Code fence must be preserved");
1062        assert!(
1063            result.contains("def hello():"),
1064            "Code content must be preserved"
1065        );
1066        assert!(
1067            result.contains("print(\"world\")"),
1068            "Code content must be preserved"
1069        );
1070    }
1071
1072    #[test]
1073    fn strip_markers_complex_document() {
1074        // This tests a realistic document with mixed content
1075        let content = r#"# Getting Started
1076
1077Welcome to the guide. Here's what you'll learn:
1078
1079- How to query data
1080- How to filter results
1081- How to join tables
1082
1083## Basic Queries
1084
1085First, let's set up our database:
1086
1087```sql validator=sqlite hidden
1088<!--SETUP
1089CREATE TABLE users (id INTEGER, name TEXT);
1090INSERT INTO users VALUES (1, 'Alice'), (2, 'Bob');
1091-->
1092SELECT 'setup complete';
1093```
1094
1095Now run a simple query:
1096
1097```sql validator=sqlite
1098SELECT * FROM users;
1099<!--ASSERT
1100rows >= 1
1101-->
1102```
1103
1104> **Note**: The query above returns all users.
1105
1106See [SQL documentation](https://sqlite.org) for more.
1107
1108### Summary
1109
11101. We created a table
11112. We queried the data
11123. We verified the results
1113
1114Done!"#;
1115        let result = ValidatorPreprocessor::strip_markers_from_chapter(content);
1116
1117        // Lists preserved
1118        assert!(
1119            result.contains("- How to query data"),
1120            "Bullet lists preserved"
1121        );
1122        assert!(
1123            result.contains("1. We created a table"),
1124            "Numbered lists preserved"
1125        );
1126
1127        // Hidden block removed
1128        assert!(
1129            !result.contains("CREATE TABLE users"),
1130            "Hidden block content removed"
1131        );
1132        assert!(
1133            !result.contains("INSERT INTO users"),
1134            "Hidden block content removed"
1135        );
1136
1137        // Visible code block preserved (without markers)
1138        assert!(
1139            result.contains("SELECT * FROM users"),
1140            "Visible query preserved"
1141        );
1142        assert!(!result.contains("ASSERT"), "Markers stripped");
1143
1144        // Blockquote preserved
1145        assert!(result.contains("> **Note**"), "Blockquote preserved");
1146
1147        // Link preserved
1148        assert!(
1149            result.contains("[SQL documentation](https://sqlite.org)"),
1150            "Link preserved"
1151        );
1152
1153        // Headings preserved
1154        assert!(result.contains("## Basic Queries"), "H2 preserved");
1155        assert!(result.contains("### Summary"), "H3 preserved");
1156    }
1157
1158    #[test]
1159    fn strip_markers_preserves_headings_with_links() {
1160        // Regression test: headings containing links were being corrupted
1161        let content = r#"# Introduction
1162
1163Some intro text.
1164
1165### [Configuration Guide](https://example.com/config)
1166
1167This section explains configuration.
1168
1169### [API Reference](https://example.com/api)
1170
1171API docs here.
1172
1173```sql validator=sqlite
1174SELECT 1;
1175```
1176
1177### [Advanced Topics](https://example.com/advanced)
1178
1179More content."#;
1180        let result = ValidatorPreprocessor::strip_markers_from_chapter(content);
1181
1182        // Headings with links must be preserved exactly
1183        assert!(
1184            result.contains("### [Configuration Guide](https://example.com/config)"),
1185            "Heading with link must be preserved"
1186        );
1187        assert!(
1188            result.contains("### [API Reference](https://example.com/api)"),
1189            "Heading with link must be preserved"
1190        );
1191        assert!(
1192            result.contains("### [Advanced Topics](https://example.com/advanced)"),
1193            "Heading with link must be preserved"
1194        );
1195        // Code block still processed
1196        assert!(result.contains("SELECT 1"), "Code block content preserved");
1197    }
1198
1199    #[test]
1200    fn strip_markers_preserves_paths_with_wildcards() {
1201        // Regression test: paths with * were being parsed as emphasis
1202        let content = r#"# File Patterns
1203
1204Match all files in a directory:
1205
1206- `/etc/osquery/*`
1207- `/var/log/*.log`
1208- `C:\Users\*\AppData`
1209
1210You can also use `/some/path/**/*.json` for recursive matching.
1211
1212```sql validator=sqlite
1213SELECT 1;
1214```
1215
1216The path `/tmp/*` is commonly used."#;
1217        let result = ValidatorPreprocessor::strip_markers_from_chapter(content);
1218
1219        // Paths with wildcards must be preserved exactly
1220        assert!(
1221            result.contains("/etc/osquery/*"),
1222            "Path with wildcard must be preserved"
1223        );
1224        assert!(
1225            result.contains("/var/log/*.log"),
1226            "Path with wildcard must be preserved"
1227        );
1228        assert!(
1229            result.contains(r"C:\Users\*\AppData"),
1230            "Windows path with wildcard must be preserved"
1231        );
1232        assert!(
1233            result.contains("/some/path/**/*.json"),
1234            "Recursive glob must be preserved"
1235        );
1236        assert!(
1237            result.contains("/tmp/*"),
1238            "Inline path with wildcard must be preserved"
1239        );
1240    }
1241
1242    #[test]
1243    fn strip_markers_preserves_inline_code_with_special_chars() {
1244        // Regression test: inline code with special characters
1245        let content = r#"# Code Examples
1246
1247Use `SELECT * FROM users` to get all users.
1248
1249The command `rm -rf /tmp/*` removes temp files.
1250
1251Run `echo $HOME` to print home directory.
1252
1253Use `git log --oneline | head -10` for recent commits.
1254
1255The regex `\d+\.\d+` matches decimals.
1256
1257```sql validator=sqlite
1258SELECT 1;
1259```
1260
1261Also try `jq '.[] | .name'` for JSON parsing."#;
1262        let result = ValidatorPreprocessor::strip_markers_from_chapter(content);
1263
1264        // Inline code must be preserved exactly
1265        assert!(
1266            result.contains("`SELECT * FROM users`"),
1267            "Inline code with * must be preserved"
1268        );
1269        assert!(
1270            result.contains("`rm -rf /tmp/*`"),
1271            "Inline code with path must be preserved"
1272        );
1273        assert!(
1274            result.contains("`echo $HOME`"),
1275            "Inline code with $ must be preserved"
1276        );
1277        assert!(
1278            result.contains("`git log --oneline | head -10`"),
1279            "Inline code with pipe must be preserved"
1280        );
1281        assert!(
1282            result.contains(r"`\d+\.\d+`"),
1283            "Inline code with backslashes must be preserved"
1284        );
1285        assert!(
1286            result.contains("`jq '.[] | .name'`"),
1287            "Inline code with quotes must be preserved"
1288        );
1289    }
1290
1291    #[test]
1292    fn strip_markers_preserves_asterisks_in_text() {
1293        // Regression test: asterisks in regular text (not emphasis)
1294        let content = r#"# Wildcards
1295
1296The pattern `*` matches everything.
1297
1298File paths like /etc/* are common.
1299
1300Use * for wildcards and ** for recursive.
1301
1302Math: 5 * 3 = 15
1303
1304```sql validator=sqlite
1305SELECT 1;
1306```
1307
1308Done."#;
1309        let result = ValidatorPreprocessor::strip_markers_from_chapter(content);
1310
1311        // Asterisks in various contexts
1312        assert!(
1313            result.contains("The pattern `*` matches everything"),
1314            "Backtick asterisk preserved"
1315        );
1316        assert!(result.contains("/etc/*"), "Path asterisk preserved");
1317        assert!(result.contains("5 * 3 = 15"), "Math asterisk preserved");
1318    }
1319
1320    #[test]
1321    fn strip_markers_preserves_complex_inline_formatting() {
1322        // Test various inline formatting combinations
1323        let content = r#"# Formatting Test
1324
1325This has **bold** and *italic* text.
1326
1327This has `code with **asterisks**` inside.
1328
1329This has [link with `code`](https://example.com).
1330
1331This has **bold with `code` inside**.
1332
1333```sql validator=sqlite
1334SELECT 1;
1335```
1336
1337End."#;
1338        let result = ValidatorPreprocessor::strip_markers_from_chapter(content);
1339
1340        assert!(result.contains("**bold**"), "Bold preserved");
1341        assert!(result.contains("*italic*"), "Italic preserved");
1342        assert!(
1343            result.contains("`code with **asterisks**`"),
1344            "Code with asterisks preserved"
1345        );
1346        assert!(
1347            result.contains("[link with `code`](https://example.com)"),
1348            "Link with code preserved"
1349        );
1350        assert!(
1351            result.contains("**bold with `code` inside**"),
1352            "Bold with code preserved"
1353        );
1354    }
1355}