mdbook_validator/
preprocessor.rs

1//! mdBook preprocessor implementation
2//!
3//! Bridges the synchronous mdBook Preprocessor trait to async container validation.
4
5use tracing::{debug, info, trace};
6
7// Default exec commands for validators when not configured
8const DEFAULT_EXEC_SQLITE: &str = "sqlite3 -json /tmp/test.db";
9const DEFAULT_EXEC_OSQUERY: &str = "osqueryi --json";
10const DEFAULT_EXEC_FALLBACK: &str = "cat";
11
12use std::collections::hash_map::Entry;
13use std::collections::HashMap;
14use std::fmt::Write;
15use std::path::Path;
16
17use mdbook_preprocessor::book::{Book, BookItem, Chapter};
18use mdbook_preprocessor::errors::Error;
19use mdbook_preprocessor::{Preprocessor, PreprocessorContext};
20use pulldown_cmark::{CodeBlockKind, Event, Parser, Tag, TagEnd};
21
22use crate::command::RealCommandRunner;
23use crate::config::{Config, ValidatorConfig};
24use crate::container::ValidatorContainer;
25use crate::error::ValidatorError;
26use crate::host_validator;
27use crate::parser::{extract_markers, parse_info_string, ExtractedMarkers};
28use crate::transpiler::strip_markers;
29
30/// The mdbook-validator preprocessor
31pub struct ValidatorPreprocessor;
32
33impl ValidatorPreprocessor {
34    /// Create a new preprocessor instance
35    #[must_use]
36    pub fn new() -> Self {
37        Self
38    }
39}
40
41impl Default for ValidatorPreprocessor {
42    fn default() -> Self {
43        Self::new()
44    }
45}
46
47impl Preprocessor for ValidatorPreprocessor {
48    fn name(&self) -> &'static str {
49        "validator"
50    }
51
52    fn run(&self, ctx: &PreprocessorContext, mut book: Book) -> Result<Book, Error> {
53        // Parse config from book.toml
54        let config = Config::from_context(ctx)
55            .map_err(|e| Error::msg(format!("Failed to parse config: {e}")))?;
56
57        // Create tokio runtime for async->sync bridge
58        let rt = tokio::runtime::Builder::new_current_thread()
59            .enable_all()
60            .build()
61            .map_err(|e| Error::msg(format!("Failed to create tokio runtime: {e}")))?;
62
63        rt.block_on(async {
64            self.run_async_with_config(&mut book, &config, &ctx.root)
65                .await
66        })?;
67
68        Ok(book)
69    }
70
71    fn supports_renderer(&self, renderer: &str) -> Result<bool, anyhow::Error> {
72        // Support all renderers - we validate and strip markers,
73        // producing valid markdown for any output format
74        let _ = renderer;
75        Ok(true)
76    }
77}
78
79impl ValidatorPreprocessor {
80    /// Process a book with a custom validator script.
81    ///
82    /// This is primarily for testing different validator behaviors.
83    /// Uses the default Alpine container with the provided script.
84    pub fn process_book_with_script(
85        &self,
86        mut book: Book,
87        validator_script: &[u8],
88    ) -> Result<Book, Error> {
89        let rt = tokio::runtime::Builder::new_current_thread()
90            .enable_all()
91            .build()
92            .map_err(|e| Error::msg(format!("Failed to create tokio runtime: {e}")))?;
93
94        rt.block_on(async {
95            self.run_async_with_script(&mut book, validator_script)
96                .await
97        })?;
98
99        Ok(book)
100    }
101
102    /// Process a book with explicit config (for testing).
103    ///
104    /// Allows testing with a custom config without needing a full `PreprocessorContext`.
105    pub fn process_book_with_config(
106        &self,
107        mut book: Book,
108        config: &Config,
109        book_root: &Path,
110    ) -> Result<Book, Error> {
111        let rt = tokio::runtime::Builder::new_current_thread()
112            .enable_all()
113            .build()
114            .map_err(|e| Error::msg(format!("Failed to create tokio runtime: {e}")))?;
115
116        rt.block_on(async {
117            self.run_async_with_config(&mut book, config, book_root)
118                .await
119        })?;
120
121        Ok(book)
122    }
123
124    /// Run with explicit config - starts per-validator containers.
125    async fn run_async_with_config(
126        &self,
127        book: &mut Book,
128        config: &Config,
129        book_root: &Path,
130    ) -> Result<(), Error> {
131        // Cache started containers by validator name
132        let mut containers: HashMap<String, ValidatorContainer> = HashMap::new();
133
134        for item in &mut book.items {
135            self.process_book_item_with_config(item, config, book_root, &mut containers)
136                .await?;
137        }
138
139        Ok(())
140    }
141
142    /// Run with default script (for testing without config).
143    async fn run_async_with_script(
144        &self,
145        book: &mut Book,
146        validator_script: &[u8],
147    ) -> Result<(), Error> {
148        let container = ValidatorContainer::start(validator_script)
149            .await
150            .map_err(|e| Error::msg(format!("Failed to start container: {e}")))?;
151
152        for item in &mut book.items {
153            self.process_book_item(item, &container).await?;
154        }
155
156        Ok(())
157    }
158
159    async fn process_book_item(
160        &self,
161        item: &mut BookItem,
162        container: &ValidatorContainer,
163    ) -> Result<(), Error> {
164        if let BookItem::Chapter(chapter) = item {
165            self.process_chapter(chapter, container).await?;
166
167            // Process sub-items recursively
168            for sub_item in &mut chapter.sub_items {
169                Box::pin(self.process_book_item(sub_item, container)).await?;
170            }
171        }
172        Ok(())
173    }
174
175    async fn process_book_item_with_config(
176        &self,
177        item: &mut BookItem,
178        config: &Config,
179        book_root: &Path,
180        containers: &mut HashMap<String, ValidatorContainer>,
181    ) -> Result<(), Error> {
182        if let BookItem::Chapter(chapter) = item {
183            self.process_chapter_with_config(chapter, config, book_root, containers)
184                .await?;
185
186            // Process sub-items recursively
187            for sub_item in &mut chapter.sub_items {
188                Box::pin(
189                    self.process_book_item_with_config(sub_item, config, book_root, containers),
190                )
191                .await?;
192            }
193        }
194        Ok(())
195    }
196
197    async fn process_chapter(
198        &self,
199        chapter: &mut Chapter,
200        container: &ValidatorContainer,
201    ) -> Result<(), Error> {
202        if chapter.content.is_empty() {
203            return Ok(());
204        }
205
206        // Collect all code blocks that need validation
207        let blocks = Self::find_validator_blocks(&chapter.content);
208
209        if blocks.is_empty() {
210            return Ok(());
211        }
212
213        // Validate each block
214        for block in &blocks {
215            if block.skip {
216                continue;
217            }
218
219            let validation_content = block.markers.validation_content();
220            let result = container
221                .exec_with_env(
222                    block.markers.setup.as_deref(),
223                    &validation_content,
224                    block.markers.assertions.as_deref(),
225                    block.markers.expect.as_deref(),
226                )
227                .await
228                .map_err(|e| {
229                    Error::msg(format!(
230                        "Validation exec failed in '{}': {}",
231                        chapter.name, e
232                    ))
233                })?;
234
235            if result.exit_code != 0 {
236                let mut error_msg = format!(
237                    "Validation failed in '{}' (exit code {}):\n\nCode:\n{}\n",
238                    chapter.name, result.exit_code, block.markers.visible_content
239                );
240                if !result.stderr.is_empty() {
241                    let _ = write!(error_msg, "\nValidator stderr:\n{}", result.stderr);
242                }
243                if !result.stdout.is_empty() {
244                    let _ = write!(error_msg, "\nValidator stdout:\n{}", result.stdout);
245                }
246                return Err(Error::msg(error_msg));
247            }
248        }
249
250        // All validations passed - strip markers from chapter content
251        chapter.content = Self::strip_markers_from_chapter(&chapter.content);
252
253        Ok(())
254    }
255
256    async fn process_chapter_with_config(
257        &self,
258        chapter: &mut Chapter,
259        config: &Config,
260        book_root: &Path,
261        containers: &mut HashMap<String, ValidatorContainer>,
262    ) -> Result<(), Error> {
263        if chapter.content.is_empty() {
264            return Ok(());
265        }
266
267        // Collect all code blocks that need validation
268        let blocks = Self::find_validator_blocks(&chapter.content);
269
270        if blocks.is_empty() {
271            return Ok(());
272        }
273
274        info!(chapter = %chapter.name, blocks = blocks.len(), "Validating");
275
276        // Check for mutually exclusive attributes (fail fast)
277        for block in &blocks {
278            if block.skip && block.hidden {
279                return Err(Error::new(ValidatorError::MutuallyExclusiveAttributes));
280            }
281        }
282
283        // Validate each block using configured validator
284        for (idx, block) in blocks.iter().enumerate() {
285            if block.skip {
286                debug!(block = idx + 1, validator = %block.validator_name, "Skipping (skip=true)");
287                continue;
288            }
289
290            debug!(block = idx + 1, validator = %block.validator_name, "Validating block");
291
292            // Get validator config
293            let validator_config = config.get_validator(&block.validator_name).map_err(|e| {
294                Error::msg(format!(
295                    "Unknown validator '{}': {}",
296                    block.validator_name, e
297                ))
298            })?;
299
300            // Get or start container for this validator
301            let container = self
302                .get_or_start_container(&block.validator_name, config, book_root, containers)
303                .await?;
304
305            // Use host-based validation: run query in container, validate on host
306            self.validate_block_host_based(
307                container,
308                validator_config,
309                block,
310                &chapter.name,
311                book_root,
312            )
313            .await?;
314        }
315
316        // All validations passed - strip markers from chapter content
317        chapter.content = Self::strip_markers_from_chapter(&chapter.content);
318
319        info!(chapter = %chapter.name, "✓ Passed");
320
321        Ok(())
322    }
323
324    /// Validate a code block using host-based validation.
325    ///
326    /// This runs the query in the container and validates the output on the host.
327    async fn validate_block_host_based(
328        &self,
329        container: &ValidatorContainer,
330        validator_config: &ValidatorConfig,
331        block: &ValidatorBlock,
332        chapter_name: &str,
333        book_root: &Path,
334    ) -> Result<(), Error> {
335        // 0. Verify validator script exists first (fail fast before container work)
336        let script_path = book_root.join(&validator_config.script);
337        if !script_path.exists() {
338            return Err(Error::msg(format!(
339                "Failed to read validator script '{}': file not found",
340                script_path.display()
341            )));
342        }
343
344        debug!(script = %script_path.display(), "Using validator script");
345
346        // Get exec command (use defaults if not configured)
347        let exec_cmd = Self::get_exec_command(&block.validator_name, validator_config);
348        debug!(exec_command = %exec_cmd, "Container exec command");
349
350        // 1. Run setup script in container (if any)
351        // SETUP content IS the shell command - run directly via sh -c
352        if let Some(setup) = &block.markers.setup {
353            let setup_script = setup.trim();
354            if !setup_script.is_empty() {
355                debug!("Running SETUP script");
356                trace!(setup = %setup_script, "SETUP content");
357                let setup_result = container
358                    .exec_raw(&["sh", "-c", setup_script])
359                    .await
360                    .map_err(|e| Error::msg(format!("Setup exec failed: {e}")))?;
361
362                if setup_result.exit_code != 0 {
363                    #[allow(clippy::cast_possible_truncation)]
364                    return Err(ValidatorError::SetupFailed {
365                        exit_code: setup_result.exit_code as i32,
366                        message: format!(
367                            "in '{}' (validator: {}):\n\nScript:\n{}\n\nError:\n{}",
368                            chapter_name, block.validator_name, setup_script, setup_result.stderr
369                        ),
370                    }
371                    .into());
372                }
373            }
374        }
375
376        // 2. Run query in container, get JSON output
377        // Content is passed via stdin to avoid shell injection
378        // Use validation_content() to strip @@ prefix (but keep line content)
379        let query_sql = block.markers.validation_content();
380        let query_sql = query_sql.trim();
381        if query_sql.is_empty() {
382            return Err(Error::msg(format!(
383                "Validation failed in '{}' (validator: {}): Query content is empty",
384                chapter_name, block.validator_name
385            )));
386        }
387
388        debug!("Executing query in container");
389        trace!(query = %query_sql, "Query content");
390
391        // Pass content via stdin (secure) instead of shell interpolation (vulnerable)
392        let query_result = container
393            .exec_with_stdin(&["sh", "-c", &exec_cmd], query_sql)
394            .await
395            .map_err(|e| Error::msg(format!("Query exec failed: {e}")))?;
396
397        trace!(exit_code = query_result.exit_code, stdout = %query_result.stdout, stderr = %query_result.stderr, "Query result");
398
399        if query_result.exit_code != 0 {
400            return Err(Error::msg(format!(
401                "Query failed in '{}' (validator: {}):\n\nSQL:\n{}\n\nError:\n{}",
402                chapter_name, block.validator_name, query_sql, query_result.stderr
403            )));
404        }
405
406        // 3. Validate JSON output on host using validator script
407        // (script_path already validated at the start of this function)
408        let script_path_str = script_path
409            .to_str()
410            .ok_or_else(|| Error::msg(format!("Invalid script path: {}", script_path.display())))?;
411
412        debug!("Running host validator");
413        let validation_result = host_validator::run_validator(
414            &RealCommandRunner,
415            script_path_str,
416            &query_result.stdout,
417            block.markers.assertions.as_deref(),
418            block.markers.expect.as_deref(),
419            Some(&query_result.stderr), // Pass container stderr for warning detection
420        )
421        .map_err(|e| {
422            Error::msg(format!(
423                "Host validator failed in '{}' (validator: {}): {}",
424                chapter_name, block.validator_name, e
425            ))
426        })?;
427
428        trace!(exit_code = validation_result.exit_code, stdout = %validation_result.stdout, stderr = %validation_result.stderr, "Validator result");
429
430        if validation_result.exit_code != 0 {
431            let mut error_msg = format!(
432                "in '{}' (validator: {}):\n\nCode:\n{}\n",
433                chapter_name, block.validator_name, block.markers.visible_content
434            );
435            if !validation_result.stderr.is_empty() {
436                let _ = write!(
437                    error_msg,
438                    "\nValidator stderr:\n{}",
439                    validation_result.stderr
440                );
441            }
442            if !validation_result.stdout.is_empty() {
443                let _ = write!(
444                    error_msg,
445                    "\nValidator stdout:\n{}",
446                    validation_result.stdout
447                );
448            }
449            return Err(ValidatorError::ValidationFailed {
450                exit_code: validation_result.exit_code,
451                message: error_msg,
452            }
453            .into());
454        }
455
456        Ok(())
457    }
458
459    /// Get exec command for a validator.
460    ///
461    /// Uses configured command if available, otherwise uses defaults based on validator name.
462    fn get_exec_command(validator_name: &str, config: &ValidatorConfig) -> String {
463        config
464            .exec_command
465            .clone()
466            .unwrap_or_else(|| match validator_name {
467                "sqlite" => DEFAULT_EXEC_SQLITE.to_owned(),
468                "osquery" => DEFAULT_EXEC_OSQUERY.to_owned(),
469                _ => DEFAULT_EXEC_FALLBACK.to_owned(),
470            })
471    }
472
473    /// Get an existing container or start a new one for the given validator.
474    async fn get_or_start_container<'a>(
475        &self,
476        validator_name: &str,
477        config: &Config,
478        book_root: &Path,
479        containers: &'a mut HashMap<String, ValidatorContainer>,
480    ) -> Result<&'a ValidatorContainer, Error> {
481        match containers.entry(validator_name.to_owned()) {
482            Entry::Occupied(entry) => Ok(entry.into_mut()),
483            Entry::Vacant(entry) => {
484                // Look up validator config
485                let validator_config = config.get_validator(validator_name).map_err(|e| {
486                    Error::msg(format!("Unknown validator '{validator_name}': {e}"))
487                })?;
488
489                // Validate config values
490                validator_config.validate(validator_name)?;
491
492                // Resolve and validate fixtures_dir if configured
493                let mount = if let Some(ref fixtures_dir) = config.fixtures_dir {
494                    // Resolve relative path from book_root
495                    let fixtures_path = if fixtures_dir.is_absolute() {
496                        fixtures_dir.clone()
497                    } else {
498                        book_root.join(fixtures_dir)
499                    };
500
501                    // Validate fixtures_dir exists and is a directory
502                    if !fixtures_path.exists() {
503                        return Err(Error::msg(format!(
504                            "fixtures_dir '{}' does not exist",
505                            fixtures_path.display()
506                        )));
507                    }
508                    if !fixtures_path.is_dir() {
509                        return Err(Error::msg(format!(
510                            "fixtures_dir '{}' is not a directory",
511                            fixtures_path.display()
512                        )));
513                    }
514
515                    // Canonicalize to resolve symlinks (Docker requires real paths)
516                    let fixtures_path = fixtures_path.canonicalize().map_err(|e| {
517                        Error::msg(format!(
518                            "fixtures_dir '{}' could not be canonicalized: {}",
519                            fixtures_path.display(),
520                            e
521                        ))
522                    })?;
523
524                    Some((fixtures_path, "/fixtures"))
525                } else {
526                    None
527                };
528
529                // Start the container with optional mount
530                let container = ValidatorContainer::start_raw_with_mount(
531                    &validator_config.container,
532                    mount.as_ref().map(|(p, c)| (p.as_path(), *c)),
533                )
534                .await
535                .map_err(|e| {
536                    Error::msg(format!(
537                        "Failed to start container '{}': {}",
538                        validator_config.container, e
539                    ))
540                })?;
541
542                Ok(entry.insert(container))
543            }
544        }
545    }
546
547    /// Find all code blocks with `validator=` attribute
548    fn find_validator_blocks(content: &str) -> Vec<ValidatorBlock> {
549        let mut blocks = Vec::new();
550        let parser = Parser::new(content);
551
552        let mut in_code_block = false;
553        let mut current_info = String::new();
554        let mut current_content = String::new();
555
556        for event in parser {
557            match event {
558                Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(info))) => {
559                    in_code_block = true;
560                    current_info = info.to_string();
561                    current_content.clear();
562                }
563                Event::Text(text) if in_code_block => {
564                    current_content.push_str(&text);
565                }
566                Event::End(TagEnd::CodeBlock) if in_code_block => {
567                    in_code_block = false;
568
569                    let (_language, validator, skip, hidden) = parse_info_string(&current_info);
570
571                    // Only process blocks with validator= attribute
572                    if let Some(validator_name) = validator {
573                        // Handle empty validator= as "no validator"
574                        if !validator_name.is_empty() {
575                            let markers = extract_markers(&current_content);
576                            blocks.push(ValidatorBlock {
577                                validator_name,
578                                markers,
579                                skip,
580                                hidden,
581                            });
582                        }
583                    }
584                }
585                _ => {}
586            }
587        }
588
589        blocks
590    }
591
592    /// Strip all validation markers from chapter content, preserving code block structure.
593    ///
594    /// Uses span-based editing to surgically modify only code block contents,
595    /// preserving ALL other markdown formatting (lists, links, emphasis, etc.).
596    ///
597    /// If a code block has the `hidden` attribute, the entire fence is removed from output.
598    fn strip_markers_from_chapter(content: &str) -> String {
599        use std::ops::Range;
600
601        // Represents an edit to apply to the source
602        enum Edit {
603            /// Replace a range with new content (for stripping markers)
604            Replace {
605                range: Range<usize>,
606                content: String,
607            },
608            /// Delete a range entirely (for hidden blocks)
609            Delete { range: Range<usize> },
610        }
611
612        let mut edits: Vec<Edit> = Vec::new();
613        let parser = Parser::new(content).into_offset_iter();
614
615        let mut current_block_start: Option<usize> = None;
616        let mut current_hidden = false;
617        let mut current_has_validator = false;
618        let mut current_content_range: Option<Range<usize>> = None;
619
620        for (event, range) in parser {
621            match &event {
622                Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(info))) => {
623                    let (_language, validator, _skip, hidden) = parse_info_string(info);
624                    current_hidden = hidden;
625                    current_has_validator = validator.is_some();
626                    current_block_start = Some(range.start);
627                    current_content_range = None;
628                }
629                Event::Text(_) if current_block_start.is_some() => {
630                    // Track the content range within the code block
631                    current_content_range = Some(range);
632                }
633                Event::End(TagEnd::CodeBlock) if current_block_start.is_some() => {
634                    let Some(block_start) = current_block_start.take() else {
635                        unreachable!("current_block_start must be Some here")
636                    };
637
638                    if current_hidden {
639                        // Delete the entire code block (including surrounding whitespace)
640                        // Find the start of the line containing the opening fence
641                        let line_start = content[..block_start].rfind('\n').map_or(0, |i| i + 1);
642                        // Find the end of the line containing the closing fence
643                        let line_end = content[range.end..]
644                            .find('\n')
645                            .map_or(range.end, |i| range.end + i + 1);
646
647                        edits.push(Edit::Delete {
648                            range: line_start..line_end,
649                        });
650                    } else if current_has_validator {
651                        // Strip markers from the content, but preserve the fence
652                        if let Some(content_range) = current_content_range.take() {
653                            let original_content = &content[content_range.clone()];
654                            let stripped = strip_markers(original_content);
655                            let trimmed = stripped.trim();
656                            if trimmed != original_content.trim() {
657                                // Only create an edit if content actually changed
658                                edits.push(Edit::Replace {
659                                    range: content_range,
660                                    content: format!("{trimmed}\n"),
661                                });
662                            }
663                        }
664                    }
665
666                    current_hidden = false;
667                    current_has_validator = false;
668                }
669                _ => {}
670            }
671        }
672
673        // Apply edits from end to start to preserve byte offsets
674        edits.sort_by(|a, b| {
675            let a_start = match a {
676                Edit::Replace { range, .. } | Edit::Delete { range } => range.start,
677            };
678            let b_start = match b {
679                Edit::Replace { range, .. } | Edit::Delete { range } => range.start,
680            };
681            b_start.cmp(&a_start) // Reverse order (end to start)
682        });
683
684        let mut result = content.to_owned();
685        for edit in edits {
686            match edit {
687                Edit::Replace { range, content } => {
688                    result.replace_range(range, &content);
689                }
690                Edit::Delete { range } => {
691                    result.replace_range(range, "");
692                }
693            }
694        }
695
696        // Clean up any excessive blank lines left by deletions
697        Self::normalize_blank_lines(&result)
698    }
699
700    /// Normalize blank lines: collapse 3+ consecutive newlines to 2, trim edges
701    fn normalize_blank_lines(content: &str) -> String {
702        let mut result = String::with_capacity(content.len());
703        let mut consecutive_newlines = 0;
704
705        for ch in content.chars() {
706            if ch == '\n' {
707                consecutive_newlines += 1;
708                if consecutive_newlines <= 2 {
709                    result.push(ch);
710                }
711            } else {
712                consecutive_newlines = 0;
713                result.push(ch);
714            }
715        }
716
717        result.trim().to_owned()
718    }
719}
720
721/// A code block that requires validation
722struct ValidatorBlock {
723    /// Name of the validator (e.g., "osquery", "sqlite")
724    validator_name: String,
725    /// Extracted markers from the code block
726    markers: ExtractedMarkers,
727    /// Whether to skip validation
728    skip: bool,
729    /// Whether to hide the block from output (but still validate)
730    hidden: bool,
731}
732
733#[cfg(test)]
734#[allow(clippy::needless_raw_string_hashes)]
735mod tests {
736    use super::*;
737
738    // ==================== strip_markers_from_chapter hidden block tests ====================
739
740    #[test]
741    fn strip_markers_from_chapter_removes_hidden_block() {
742        let content = r#"Some text
743
744```sql validator=sqlite hidden
745SELECT 1;
746```
747
748More text"#;
749        let result = ValidatorPreprocessor::strip_markers_from_chapter(content);
750        // Hidden block should be completely removed
751        assert!(!result.contains("SELECT 1"));
752        assert!(!result.contains("```sql"));
753        assert!(result.contains("Some text"));
754        assert!(result.contains("More text"));
755    }
756
757    #[test]
758    fn strip_markers_from_chapter_keeps_non_hidden_block() {
759        let content = r#"Some text
760
761```sql validator=sqlite
762SELECT 1;
763```
764
765More text"#;
766        let result = ValidatorPreprocessor::strip_markers_from_chapter(content);
767        // Non-hidden block should be kept (with markers stripped)
768        assert!(result.contains("SELECT 1"));
769        assert!(result.contains("```sql"));
770        assert!(result.contains("Some text"));
771        assert!(result.contains("More text"));
772    }
773
774    #[test]
775    fn strip_markers_from_chapter_mixed_hidden_and_non_hidden() {
776        let content = r#"Start
777
778```sql validator=sqlite hidden
779HIDDEN QUERY;
780```
781
782Middle
783
784```sql validator=sqlite
785VISIBLE QUERY;
786```
787
788End"#;
789        let result = ValidatorPreprocessor::strip_markers_from_chapter(content);
790        // Hidden block removed, non-hidden kept
791        assert!(!result.contains("HIDDEN QUERY"));
792        assert!(result.contains("VISIBLE QUERY"));
793        assert!(result.contains("Start"));
794        assert!(result.contains("Middle"));
795        assert!(result.contains("End"));
796    }
797
798    #[test]
799    fn strip_markers_from_chapter_adjacent_hidden_blocks() {
800        let content = r#"Start
801
802```sql validator=sqlite hidden
803HIDDEN 1;
804```
805
806```sql validator=sqlite hidden
807HIDDEN 2;
808```
809
810End"#;
811        let result = ValidatorPreprocessor::strip_markers_from_chapter(content);
812        // Both hidden blocks should be removed
813        assert!(!result.contains("HIDDEN 1"));
814        assert!(!result.contains("HIDDEN 2"));
815        assert!(result.contains("Start"));
816        assert!(result.contains("End"));
817    }
818
819    #[test]
820    fn strip_markers_from_chapter_hidden_block_at_start() {
821        let content = r#"```sql validator=sqlite hidden
822HIDDEN;
823```
824
825Visible content"#;
826        let result = ValidatorPreprocessor::strip_markers_from_chapter(content);
827        // Hidden block at start should not leave leading whitespace
828        assert!(!result.contains("HIDDEN"));
829        assert!(result.contains("Visible content"));
830        // Should not start with blank lines
831        assert!(!result.starts_with('\n'));
832    }
833
834    #[test]
835    fn strip_markers_from_chapter_hidden_block_at_end() {
836        let content = r#"Visible content
837
838```sql validator=sqlite hidden
839HIDDEN;
840```"#;
841        let result = ValidatorPreprocessor::strip_markers_from_chapter(content);
842        // Hidden block at end should not leave trailing whitespace
843        assert!(!result.contains("HIDDEN"));
844        assert!(result.contains("Visible content"));
845        // Should not end with excessive blank lines
846        assert!(!result.ends_with("\n\n"));
847    }
848
849    #[test]
850    fn strip_markers_from_chapter_only_hidden_block() {
851        let content = r#"```sql validator=sqlite hidden
852HIDDEN;
853```"#;
854        let result = ValidatorPreprocessor::strip_markers_from_chapter(content);
855        // Single hidden block should result in empty output
856        assert!(!result.contains("HIDDEN"));
857        assert!(result.is_empty() || result.trim().is_empty());
858    }
859
860    #[test]
861    fn strip_markers_from_chapter_hidden_with_markers() {
862        let content = r#"Text
863
864```sql validator=sqlite hidden
865<!--SETUP
866CREATE TABLE t;
867-->
868SELECT * FROM t;
869<!--ASSERT
870rows >= 1
871-->
872```
873
874More text"#;
875        let result = ValidatorPreprocessor::strip_markers_from_chapter(content);
876        // Hidden block with markers should be completely removed
877        assert!(!result.contains("SETUP"));
878        assert!(!result.contains("ASSERT"));
879        assert!(!result.contains("CREATE TABLE"));
880        assert!(!result.contains("SELECT"));
881        assert!(result.contains("Text"));
882        assert!(result.contains("More text"));
883    }
884
885    // ==================== Regression tests for markdown preservation ====================
886    // These tests ensure that strip_markers_from_chapter preserves all markdown formatting
887    // that exists OUTSIDE of code blocks with validator= attributes.
888
889    #[test]
890    fn strip_markers_preserves_lists() {
891        let content = r#"# Chapter
892
893Some text:
894
895- Item one
896- Item two
897- Item three
898
899### Next Section
900
901More text."#;
902        let result = ValidatorPreprocessor::strip_markers_from_chapter(content);
903        // Lists must be preserved exactly
904        assert!(
905            result.contains("- Item one"),
906            "List items must be preserved"
907        );
908        assert!(
909            result.contains("- Item two"),
910            "List items must be preserved"
911        );
912        assert!(
913            result.contains("- Item three"),
914            "List items must be preserved"
915        );
916        assert!(
917            result.contains("### Next Section"),
918            "Headings must be preserved"
919        );
920    }
921
922    #[test]
923    fn strip_markers_preserves_lists_with_code_block() {
924        let content = r#"# Chapter
925
926Some text:
927
928- Item one
929- Item two
930- Item three
931
932```sql validator=sqlite
933<!--SETUP
934CREATE TABLE t;
935-->
936SELECT 1;
937```
938
939### Next Section
940
941More text."#;
942        let result = ValidatorPreprocessor::strip_markers_from_chapter(content);
943        // Lists must be preserved
944        assert!(
945            result.contains("- Item one"),
946            "List items must be preserved"
947        );
948        assert!(
949            result.contains("- Item two"),
950            "List items must be preserved"
951        );
952        assert!(
953            result.contains("- Item three"),
954            "List items must be preserved"
955        );
956        // Code block content stripped of markers but preserved
957        assert!(result.contains("SELECT 1"), "Code block content preserved");
958        assert!(!result.contains("SETUP"), "Markers stripped");
959        assert!(!result.contains("CREATE TABLE"), "Setup content stripped");
960        // Headings preserved
961        assert!(
962            result.contains("### Next Section"),
963            "Headings must be preserved"
964        );
965    }
966
967    #[test]
968    fn strip_markers_preserves_numbered_lists() {
969        let content = r#"Steps:
970
9711. First step
9722. Second step
9733. Third step
974
975Done."#;
976        let result = ValidatorPreprocessor::strip_markers_from_chapter(content);
977        assert!(
978            result.contains("1. First step"),
979            "Numbered lists must be preserved"
980        );
981        assert!(
982            result.contains("2. Second step"),
983            "Numbered lists must be preserved"
984        );
985        assert!(
986            result.contains("3. Third step"),
987            "Numbered lists must be preserved"
988        );
989    }
990
991    #[test]
992    fn strip_markers_preserves_blockquotes() {
993        let content = r#"Quote:
994
995> This is a blockquote
996> with multiple lines
997
998End."#;
999        let result = ValidatorPreprocessor::strip_markers_from_chapter(content);
1000        assert!(
1001            result.contains("> This is a blockquote"),
1002            "Blockquotes must be preserved"
1003        );
1004    }
1005
1006    #[test]
1007    fn strip_markers_preserves_links() {
1008        let content = r#"See [the documentation](https://example.com) for details.
1009
1010And [another link](https://other.com)."#;
1011        let result = ValidatorPreprocessor::strip_markers_from_chapter(content);
1012        assert!(
1013            result.contains("[the documentation](https://example.com)"),
1014            "Links must be preserved"
1015        );
1016        assert!(
1017            result.contains("[another link](https://other.com)"),
1018            "Links must be preserved"
1019        );
1020    }
1021
1022    #[test]
1023    fn strip_markers_preserves_inline_code() {
1024        let content = r#"Use the `SELECT` statement to query data.
1025
1026Also `INSERT` works."#;
1027        let result = ValidatorPreprocessor::strip_markers_from_chapter(content);
1028        assert!(result.contains("`SELECT`"), "Inline code must be preserved");
1029        assert!(result.contains("`INSERT`"), "Inline code must be preserved");
1030    }
1031
1032    #[test]
1033    fn strip_markers_preserves_emphasis() {
1034        let content = r#"This is *italic* and **bold** text.
1035
1036Also _underscores_ and __double__."#;
1037        let result = ValidatorPreprocessor::strip_markers_from_chapter(content);
1038        assert!(result.contains("*italic*"), "Italic must be preserved");
1039        assert!(result.contains("**bold**"), "Bold must be preserved");
1040    }
1041
1042    #[test]
1043    fn strip_markers_preserves_tables() {
1044        let content = r#"| Column A | Column B |
1045|----------|----------|
1046| Value 1  | Value 2  |
1047| Value 3  | Value 4  |"#;
1048        let result = ValidatorPreprocessor::strip_markers_from_chapter(content);
1049        assert!(
1050            result.contains("| Column A | Column B |"),
1051            "Tables must be preserved"
1052        );
1053        assert!(
1054            result.contains("| Value 1  | Value 2  |"),
1055            "Table rows must be preserved"
1056        );
1057    }
1058
1059    #[test]
1060    fn strip_markers_preserves_code_blocks_without_validator() {
1061        let content = r#"Regular code:
1062
1063```python
1064def hello():
1065    print("world")
1066```
1067
1068End."#;
1069        let result = ValidatorPreprocessor::strip_markers_from_chapter(content);
1070        assert!(result.contains("```python"), "Code fence must be preserved");
1071        assert!(
1072            result.contains("def hello():"),
1073            "Code content must be preserved"
1074        );
1075        assert!(
1076            result.contains("print(\"world\")"),
1077            "Code content must be preserved"
1078        );
1079    }
1080
1081    #[test]
1082    fn strip_markers_complex_document() {
1083        // This tests a realistic document with mixed content
1084        let content = r#"# Getting Started
1085
1086Welcome to the guide. Here's what you'll learn:
1087
1088- How to query data
1089- How to filter results
1090- How to join tables
1091
1092## Basic Queries
1093
1094First, let's set up our database:
1095
1096```sql validator=sqlite hidden
1097<!--SETUP
1098CREATE TABLE users (id INTEGER, name TEXT);
1099INSERT INTO users VALUES (1, 'Alice'), (2, 'Bob');
1100-->
1101SELECT 'setup complete';
1102```
1103
1104Now run a simple query:
1105
1106```sql validator=sqlite
1107SELECT * FROM users;
1108<!--ASSERT
1109rows >= 1
1110-->
1111```
1112
1113> **Note**: The query above returns all users.
1114
1115See [SQL documentation](https://sqlite.org) for more.
1116
1117### Summary
1118
11191. We created a table
11202. We queried the data
11213. We verified the results
1122
1123Done!"#;
1124        let result = ValidatorPreprocessor::strip_markers_from_chapter(content);
1125
1126        // Lists preserved
1127        assert!(
1128            result.contains("- How to query data"),
1129            "Bullet lists preserved"
1130        );
1131        assert!(
1132            result.contains("1. We created a table"),
1133            "Numbered lists preserved"
1134        );
1135
1136        // Hidden block removed
1137        assert!(
1138            !result.contains("CREATE TABLE users"),
1139            "Hidden block content removed"
1140        );
1141        assert!(
1142            !result.contains("INSERT INTO users"),
1143            "Hidden block content removed"
1144        );
1145
1146        // Visible code block preserved (without markers)
1147        assert!(
1148            result.contains("SELECT * FROM users"),
1149            "Visible query preserved"
1150        );
1151        assert!(!result.contains("ASSERT"), "Markers stripped");
1152
1153        // Blockquote preserved
1154        assert!(result.contains("> **Note**"), "Blockquote preserved");
1155
1156        // Link preserved
1157        assert!(
1158            result.contains("[SQL documentation](https://sqlite.org)"),
1159            "Link preserved"
1160        );
1161
1162        // Headings preserved
1163        assert!(result.contains("## Basic Queries"), "H2 preserved");
1164        assert!(result.contains("### Summary"), "H3 preserved");
1165    }
1166
1167    #[test]
1168    fn strip_markers_preserves_headings_with_links() {
1169        // Regression test: headings containing links were being corrupted
1170        let content = r#"# Introduction
1171
1172Some intro text.
1173
1174### [Configuration Guide](https://example.com/config)
1175
1176This section explains configuration.
1177
1178### [API Reference](https://example.com/api)
1179
1180API docs here.
1181
1182```sql validator=sqlite
1183SELECT 1;
1184```
1185
1186### [Advanced Topics](https://example.com/advanced)
1187
1188More content."#;
1189        let result = ValidatorPreprocessor::strip_markers_from_chapter(content);
1190
1191        // Headings with links must be preserved exactly
1192        assert!(
1193            result.contains("### [Configuration Guide](https://example.com/config)"),
1194            "Heading with link must be preserved"
1195        );
1196        assert!(
1197            result.contains("### [API Reference](https://example.com/api)"),
1198            "Heading with link must be preserved"
1199        );
1200        assert!(
1201            result.contains("### [Advanced Topics](https://example.com/advanced)"),
1202            "Heading with link must be preserved"
1203        );
1204        // Code block still processed
1205        assert!(result.contains("SELECT 1"), "Code block content preserved");
1206    }
1207
1208    #[test]
1209    fn strip_markers_preserves_paths_with_wildcards() {
1210        // Regression test: paths with * were being parsed as emphasis
1211        let content = r#"# File Patterns
1212
1213Match all files in a directory:
1214
1215- `/etc/osquery/*`
1216- `/var/log/*.log`
1217- `C:\Users\*\AppData`
1218
1219You can also use `/some/path/**/*.json` for recursive matching.
1220
1221```sql validator=sqlite
1222SELECT 1;
1223```
1224
1225The path `/tmp/*` is commonly used."#;
1226        let result = ValidatorPreprocessor::strip_markers_from_chapter(content);
1227
1228        // Paths with wildcards must be preserved exactly
1229        assert!(
1230            result.contains("/etc/osquery/*"),
1231            "Path with wildcard must be preserved"
1232        );
1233        assert!(
1234            result.contains("/var/log/*.log"),
1235            "Path with wildcard must be preserved"
1236        );
1237        assert!(
1238            result.contains(r"C:\Users\*\AppData"),
1239            "Windows path with wildcard must be preserved"
1240        );
1241        assert!(
1242            result.contains("/some/path/**/*.json"),
1243            "Recursive glob must be preserved"
1244        );
1245        assert!(
1246            result.contains("/tmp/*"),
1247            "Inline path with wildcard must be preserved"
1248        );
1249    }
1250
1251    #[test]
1252    fn strip_markers_preserves_inline_code_with_special_chars() {
1253        // Regression test: inline code with special characters
1254        let content = r#"# Code Examples
1255
1256Use `SELECT * FROM users` to get all users.
1257
1258The command `rm -rf /tmp/*` removes temp files.
1259
1260Run `echo $HOME` to print home directory.
1261
1262Use `git log --oneline | head -10` for recent commits.
1263
1264The regex `\d+\.\d+` matches decimals.
1265
1266```sql validator=sqlite
1267SELECT 1;
1268```
1269
1270Also try `jq '.[] | .name'` for JSON parsing."#;
1271        let result = ValidatorPreprocessor::strip_markers_from_chapter(content);
1272
1273        // Inline code must be preserved exactly
1274        assert!(
1275            result.contains("`SELECT * FROM users`"),
1276            "Inline code with * must be preserved"
1277        );
1278        assert!(
1279            result.contains("`rm -rf /tmp/*`"),
1280            "Inline code with path must be preserved"
1281        );
1282        assert!(
1283            result.contains("`echo $HOME`"),
1284            "Inline code with $ must be preserved"
1285        );
1286        assert!(
1287            result.contains("`git log --oneline | head -10`"),
1288            "Inline code with pipe must be preserved"
1289        );
1290        assert!(
1291            result.contains(r"`\d+\.\d+`"),
1292            "Inline code with backslashes must be preserved"
1293        );
1294        assert!(
1295            result.contains("`jq '.[] | .name'`"),
1296            "Inline code with quotes must be preserved"
1297        );
1298    }
1299
1300    #[test]
1301    fn strip_markers_preserves_asterisks_in_text() {
1302        // Regression test: asterisks in regular text (not emphasis)
1303        let content = r#"# Wildcards
1304
1305The pattern `*` matches everything.
1306
1307File paths like /etc/* are common.
1308
1309Use * for wildcards and ** for recursive.
1310
1311Math: 5 * 3 = 15
1312
1313```sql validator=sqlite
1314SELECT 1;
1315```
1316
1317Done."#;
1318        let result = ValidatorPreprocessor::strip_markers_from_chapter(content);
1319
1320        // Asterisks in various contexts
1321        assert!(
1322            result.contains("The pattern `*` matches everything"),
1323            "Backtick asterisk preserved"
1324        );
1325        assert!(result.contains("/etc/*"), "Path asterisk preserved");
1326        assert!(result.contains("5 * 3 = 15"), "Math asterisk preserved");
1327    }
1328
1329    #[test]
1330    fn strip_markers_preserves_complex_inline_formatting() {
1331        // Test various inline formatting combinations
1332        let content = r#"# Formatting Test
1333
1334This has **bold** and *italic* text.
1335
1336This has `code with **asterisks**` inside.
1337
1338This has [link with `code`](https://example.com).
1339
1340This has **bold with `code` inside**.
1341
1342```sql validator=sqlite
1343SELECT 1;
1344```
1345
1346End."#;
1347        let result = ValidatorPreprocessor::strip_markers_from_chapter(content);
1348
1349        assert!(result.contains("**bold**"), "Bold preserved");
1350        assert!(result.contains("*italic*"), "Italic preserved");
1351        assert!(
1352            result.contains("`code with **asterisks**`"),
1353            "Code with asterisks preserved"
1354        );
1355        assert!(
1356            result.contains("[link with `code`](https://example.com)"),
1357            "Link with code preserved"
1358        );
1359        assert!(
1360            result.contains("**bold with `code` inside**"),
1361            "Bold with code preserved"
1362        );
1363    }
1364}