mdbook_validator/
preprocessor.rs

1//! mdBook preprocessor implementation
2//!
3//! Bridges the synchronous mdBook Preprocessor trait to async container validation.
4
5// Default exec commands for validators when not configured
6const DEFAULT_EXEC_SQLITE: &str = "sqlite3 -json /tmp/test.db";
7const DEFAULT_EXEC_OSQUERY: &str = "osqueryi --json";
8const DEFAULT_EXEC_FALLBACK: &str = "cat";
9
10use std::collections::hash_map::Entry;
11use std::collections::HashMap;
12use std::fmt::Write;
13use std::path::Path;
14
15use mdbook_preprocessor::book::{Book, BookItem, Chapter};
16use mdbook_preprocessor::errors::Error;
17use mdbook_preprocessor::{Preprocessor, PreprocessorContext};
18use pulldown_cmark::{CodeBlockKind, Event, Parser, Tag, TagEnd};
19
20use crate::command::RealCommandRunner;
21use crate::config::{Config, ValidatorConfig};
22use crate::container::ValidatorContainer;
23use crate::error::ValidatorError;
24use crate::host_validator;
25use crate::parser::{extract_markers, parse_info_string, ExtractedMarkers};
26use crate::transpiler::strip_markers;
27
28/// The mdbook-validator preprocessor
29pub struct ValidatorPreprocessor;
30
31impl ValidatorPreprocessor {
32    /// Create a new preprocessor instance
33    #[must_use]
34    pub fn new() -> Self {
35        Self
36    }
37}
38
39impl Default for ValidatorPreprocessor {
40    fn default() -> Self {
41        Self::new()
42    }
43}
44
45impl Preprocessor for ValidatorPreprocessor {
46    fn name(&self) -> &'static str {
47        "validator"
48    }
49
50    fn run(&self, ctx: &PreprocessorContext, mut book: Book) -> Result<Book, Error> {
51        // Parse config from book.toml
52        let config = Config::from_context(ctx)
53            .map_err(|e| Error::msg(format!("Failed to parse config: {e}")))?;
54
55        // Create tokio runtime for async->sync bridge
56        let rt = tokio::runtime::Builder::new_current_thread()
57            .enable_all()
58            .build()
59            .map_err(|e| Error::msg(format!("Failed to create tokio runtime: {e}")))?;
60
61        rt.block_on(async {
62            self.run_async_with_config(&mut book, &config, &ctx.root)
63                .await
64        })?;
65
66        Ok(book)
67    }
68
69    fn supports_renderer(&self, renderer: &str) -> Result<bool, anyhow::Error> {
70        // Support all renderers - we validate and strip markers,
71        // producing valid markdown for any output format
72        let _ = renderer;
73        Ok(true)
74    }
75}
76
77impl ValidatorPreprocessor {
78    /// Process a book with a custom validator script.
79    ///
80    /// This is primarily for testing different validator behaviors.
81    /// Uses the default Alpine container with the provided script.
82    pub fn process_book_with_script(
83        &self,
84        mut book: Book,
85        validator_script: &[u8],
86    ) -> Result<Book, Error> {
87        let rt = tokio::runtime::Builder::new_current_thread()
88            .enable_all()
89            .build()
90            .map_err(|e| Error::msg(format!("Failed to create tokio runtime: {e}")))?;
91
92        rt.block_on(async {
93            self.run_async_with_script(&mut book, validator_script)
94                .await
95        })?;
96
97        Ok(book)
98    }
99
100    /// Process a book with explicit config (for testing).
101    ///
102    /// Allows testing with a custom config without needing a full `PreprocessorContext`.
103    pub fn process_book_with_config(
104        &self,
105        mut book: Book,
106        config: &Config,
107        book_root: &Path,
108    ) -> Result<Book, Error> {
109        let rt = tokio::runtime::Builder::new_current_thread()
110            .enable_all()
111            .build()
112            .map_err(|e| Error::msg(format!("Failed to create tokio runtime: {e}")))?;
113
114        rt.block_on(async {
115            self.run_async_with_config(&mut book, config, book_root)
116                .await
117        })?;
118
119        Ok(book)
120    }
121
122    /// Run with explicit config - starts per-validator containers.
123    async fn run_async_with_config(
124        &self,
125        book: &mut Book,
126        config: &Config,
127        book_root: &Path,
128    ) -> Result<(), Error> {
129        // Cache started containers by validator name
130        let mut containers: HashMap<String, ValidatorContainer> = HashMap::new();
131
132        for item in &mut book.items {
133            self.process_book_item_with_config(item, config, book_root, &mut containers)
134                .await?;
135        }
136
137        Ok(())
138    }
139
140    /// Run with default script (for testing without config).
141    async fn run_async_with_script(
142        &self,
143        book: &mut Book,
144        validator_script: &[u8],
145    ) -> Result<(), Error> {
146        let container = ValidatorContainer::start(validator_script)
147            .await
148            .map_err(|e| Error::msg(format!("Failed to start container: {e}")))?;
149
150        for item in &mut book.items {
151            self.process_book_item(item, &container).await?;
152        }
153
154        Ok(())
155    }
156
157    async fn process_book_item(
158        &self,
159        item: &mut BookItem,
160        container: &ValidatorContainer,
161    ) -> Result<(), Error> {
162        if let BookItem::Chapter(chapter) = item {
163            self.process_chapter(chapter, container).await?;
164
165            // Process sub-items recursively
166            for sub_item in &mut chapter.sub_items {
167                Box::pin(self.process_book_item(sub_item, container)).await?;
168            }
169        }
170        Ok(())
171    }
172
173    async fn process_book_item_with_config(
174        &self,
175        item: &mut BookItem,
176        config: &Config,
177        book_root: &Path,
178        containers: &mut HashMap<String, ValidatorContainer>,
179    ) -> Result<(), Error> {
180        if let BookItem::Chapter(chapter) = item {
181            self.process_chapter_with_config(chapter, config, book_root, containers)
182                .await?;
183
184            // Process sub-items recursively
185            for sub_item in &mut chapter.sub_items {
186                Box::pin(
187                    self.process_book_item_with_config(sub_item, config, book_root, containers),
188                )
189                .await?;
190            }
191        }
192        Ok(())
193    }
194
195    async fn process_chapter(
196        &self,
197        chapter: &mut Chapter,
198        container: &ValidatorContainer,
199    ) -> Result<(), Error> {
200        if chapter.content.is_empty() {
201            return Ok(());
202        }
203
204        // Collect all code blocks that need validation
205        let blocks = Self::find_validator_blocks(&chapter.content);
206
207        if blocks.is_empty() {
208            return Ok(());
209        }
210
211        // Validate each block
212        for block in &blocks {
213            if block.skip {
214                continue;
215            }
216
217            let validation_content = block.markers.validation_content();
218            let result = container
219                .exec_with_env(
220                    block.markers.setup.as_deref(),
221                    &validation_content,
222                    block.markers.assertions.as_deref(),
223                    block.markers.expect.as_deref(),
224                )
225                .await
226                .map_err(|e| {
227                    Error::msg(format!(
228                        "Validation exec failed in '{}': {}",
229                        chapter.name, e
230                    ))
231                })?;
232
233            if result.exit_code != 0 {
234                let mut error_msg = format!(
235                    "Validation failed in '{}' (exit code {}):\n\nCode:\n{}\n",
236                    chapter.name, result.exit_code, block.markers.visible_content
237                );
238                if !result.stderr.is_empty() {
239                    let _ = write!(error_msg, "\nValidator stderr:\n{}", result.stderr);
240                }
241                if !result.stdout.is_empty() {
242                    let _ = write!(error_msg, "\nValidator stdout:\n{}", result.stdout);
243                }
244                return Err(Error::msg(error_msg));
245            }
246        }
247
248        // All validations passed - strip markers from chapter content
249        chapter.content = Self::strip_markers_from_chapter(&chapter.content);
250
251        Ok(())
252    }
253
254    async fn process_chapter_with_config(
255        &self,
256        chapter: &mut Chapter,
257        config: &Config,
258        book_root: &Path,
259        containers: &mut HashMap<String, ValidatorContainer>,
260    ) -> Result<(), Error> {
261        if chapter.content.is_empty() {
262            return Ok(());
263        }
264
265        // Collect all code blocks that need validation
266        let blocks = Self::find_validator_blocks(&chapter.content);
267
268        if blocks.is_empty() {
269            return Ok(());
270        }
271
272        // Validate each block using configured validator
273        for block in &blocks {
274            if block.skip {
275                continue;
276            }
277
278            // Get validator config
279            let validator_config = config.get_validator(&block.validator_name).map_err(|e| {
280                Error::msg(format!(
281                    "Unknown validator '{}': {}",
282                    block.validator_name, e
283                ))
284            })?;
285
286            // Get or start container for this validator
287            let container = self
288                .get_or_start_container(&block.validator_name, config, book_root, containers)
289                .await?;
290
291            // Use host-based validation: run query in container, validate on host
292            self.validate_block_host_based(
293                container,
294                validator_config,
295                block,
296                &chapter.name,
297                book_root,
298            )
299            .await?;
300        }
301
302        // All validations passed - strip markers from chapter content
303        chapter.content = Self::strip_markers_from_chapter(&chapter.content);
304
305        Ok(())
306    }
307
308    /// Validate a code block using host-based validation.
309    ///
310    /// This runs the query in the container and validates the output on the host.
311    async fn validate_block_host_based(
312        &self,
313        container: &ValidatorContainer,
314        validator_config: &ValidatorConfig,
315        block: &ValidatorBlock,
316        chapter_name: &str,
317        book_root: &Path,
318    ) -> Result<(), Error> {
319        // 0. Verify validator script exists first (fail fast before container work)
320        let script_path = book_root.join(&validator_config.script);
321        if !script_path.exists() {
322            return Err(Error::msg(format!(
323                "Failed to read validator script '{}': file not found",
324                script_path.display()
325            )));
326        }
327
328        // Get exec command (use defaults if not configured)
329        let exec_cmd = Self::get_exec_command(&block.validator_name, validator_config);
330
331        // 1. Run setup script in container (if any)
332        // SETUP content IS the shell command - run directly via sh -c
333        if let Some(setup) = &block.markers.setup {
334            let setup_script = setup.trim();
335            if !setup_script.is_empty() {
336                let setup_result = container
337                    .exec_raw(&["sh", "-c", setup_script])
338                    .await
339                    .map_err(|e| Error::msg(format!("Setup exec failed: {e}")))?;
340
341                if setup_result.exit_code != 0 {
342                    #[allow(clippy::cast_possible_truncation)]
343                    return Err(ValidatorError::SetupFailed {
344                        exit_code: setup_result.exit_code as i32,
345                        message: format!(
346                            "in '{}' (validator: {}):\n\nScript:\n{}\n\nError:\n{}",
347                            chapter_name, block.validator_name, setup_script, setup_result.stderr
348                        ),
349                    }
350                    .into());
351                }
352            }
353        }
354
355        // 2. Run query in container, get JSON output
356        // Content is passed via stdin to avoid shell injection
357        // Use validation_content() to strip @@ prefix (but keep line content)
358        let query_sql = block.markers.validation_content();
359        let query_sql = query_sql.trim();
360        if query_sql.is_empty() {
361            return Err(Error::msg(format!(
362                "Validation failed in '{}' (validator: {}): Query content is empty",
363                chapter_name, block.validator_name
364            )));
365        }
366
367        // Pass content via stdin (secure) instead of shell interpolation (vulnerable)
368        let query_result = container
369            .exec_with_stdin(&["sh", "-c", &exec_cmd], query_sql)
370            .await
371            .map_err(|e| Error::msg(format!("Query exec failed: {e}")))?;
372
373        if query_result.exit_code != 0 {
374            return Err(Error::msg(format!(
375                "Query failed in '{}' (validator: {}):\n\nSQL:\n{}\n\nError:\n{}",
376                chapter_name, block.validator_name, query_sql, query_result.stderr
377            )));
378        }
379
380        // 3. Validate JSON output on host using validator script
381        // (script_path already validated at the start of this function)
382        let script_path_str = script_path
383            .to_str()
384            .ok_or_else(|| Error::msg(format!("Invalid script path: {}", script_path.display())))?;
385
386        let validation_result = host_validator::run_validator(
387            &RealCommandRunner,
388            script_path_str,
389            &query_result.stdout,
390            block.markers.assertions.as_deref(),
391            block.markers.expect.as_deref(),
392            Some(&query_result.stderr), // Pass container stderr for warning detection
393        )
394        .map_err(|e| {
395            Error::msg(format!(
396                "Host validator failed in '{}' (validator: {}): {}",
397                chapter_name, block.validator_name, e
398            ))
399        })?;
400
401        if validation_result.exit_code != 0 {
402            let mut error_msg = format!(
403                "in '{}' (validator: {}):\n\nCode:\n{}\n",
404                chapter_name, block.validator_name, block.markers.visible_content
405            );
406            if !validation_result.stderr.is_empty() {
407                let _ = write!(
408                    error_msg,
409                    "\nValidator stderr:\n{}",
410                    validation_result.stderr
411                );
412            }
413            if !validation_result.stdout.is_empty() {
414                let _ = write!(
415                    error_msg,
416                    "\nValidator stdout:\n{}",
417                    validation_result.stdout
418                );
419            }
420            return Err(ValidatorError::ValidationFailed {
421                exit_code: validation_result.exit_code,
422                message: error_msg,
423            }
424            .into());
425        }
426
427        Ok(())
428    }
429
430    /// Get exec command for a validator.
431    ///
432    /// Uses configured command if available, otherwise uses defaults based on validator name.
433    fn get_exec_command(validator_name: &str, config: &ValidatorConfig) -> String {
434        config
435            .exec_command
436            .clone()
437            .unwrap_or_else(|| match validator_name {
438                "sqlite" => DEFAULT_EXEC_SQLITE.to_owned(),
439                "osquery" => DEFAULT_EXEC_OSQUERY.to_owned(),
440                _ => DEFAULT_EXEC_FALLBACK.to_owned(),
441            })
442    }
443
444    /// Get an existing container or start a new one for the given validator.
445    async fn get_or_start_container<'a>(
446        &self,
447        validator_name: &str,
448        config: &Config,
449        book_root: &Path,
450        containers: &'a mut HashMap<String, ValidatorContainer>,
451    ) -> Result<&'a ValidatorContainer, Error> {
452        match containers.entry(validator_name.to_owned()) {
453            Entry::Occupied(entry) => Ok(entry.into_mut()),
454            Entry::Vacant(entry) => {
455                // Look up validator config
456                let validator_config = config.get_validator(validator_name).map_err(|e| {
457                    Error::msg(format!("Unknown validator '{validator_name}': {e}"))
458                })?;
459
460                // Validate config values
461                validator_config.validate(validator_name)?;
462
463                // Resolve and validate fixtures_dir if configured
464                let mount = if let Some(ref fixtures_dir) = config.fixtures_dir {
465                    // Resolve relative path from book_root
466                    let fixtures_path = if fixtures_dir.is_absolute() {
467                        fixtures_dir.clone()
468                    } else {
469                        book_root.join(fixtures_dir)
470                    };
471
472                    // Validate fixtures_dir exists and is a directory
473                    if !fixtures_path.exists() {
474                        return Err(Error::msg(format!(
475                            "fixtures_dir '{}' does not exist",
476                            fixtures_path.display()
477                        )));
478                    }
479                    if !fixtures_path.is_dir() {
480                        return Err(Error::msg(format!(
481                            "fixtures_dir '{}' is not a directory",
482                            fixtures_path.display()
483                        )));
484                    }
485
486                    Some((fixtures_path, "/fixtures"))
487                } else {
488                    None
489                };
490
491                // Start the container with optional mount
492                let container = ValidatorContainer::start_raw_with_mount(
493                    &validator_config.container,
494                    mount.as_ref().map(|(p, c)| (p.as_path(), *c)),
495                )
496                .await
497                .map_err(|e| {
498                    Error::msg(format!(
499                        "Failed to start container '{}': {}",
500                        validator_config.container, e
501                    ))
502                })?;
503
504                Ok(entry.insert(container))
505            }
506        }
507    }
508
509    /// Find all code blocks with `validator=` attribute
510    fn find_validator_blocks(content: &str) -> Vec<ValidatorBlock> {
511        let mut blocks = Vec::new();
512        let parser = Parser::new(content);
513
514        let mut in_code_block = false;
515        let mut current_info = String::new();
516        let mut current_content = String::new();
517
518        for event in parser {
519            match event {
520                Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(info))) => {
521                    in_code_block = true;
522                    current_info = info.to_string();
523                    current_content.clear();
524                }
525                Event::Text(text) if in_code_block => {
526                    current_content.push_str(&text);
527                }
528                Event::End(TagEnd::CodeBlock) if in_code_block => {
529                    in_code_block = false;
530
531                    let (_language, validator, skip) = parse_info_string(&current_info);
532
533                    // Only process blocks with validator= attribute
534                    if let Some(validator_name) = validator {
535                        // Handle empty validator= as "no validator"
536                        if !validator_name.is_empty() {
537                            let markers = extract_markers(&current_content);
538                            blocks.push(ValidatorBlock {
539                                validator_name,
540                                markers,
541                                skip,
542                            });
543                        }
544                    }
545                }
546                _ => {}
547            }
548        }
549
550        blocks
551    }
552
553    /// Strip all validation markers from chapter content, preserving code block structure
554    fn strip_markers_from_chapter(content: &str) -> String {
555        let mut result = String::new();
556        let parser = Parser::new(content);
557
558        let mut in_code_block = false;
559        let mut current_info = String::new();
560
561        for event in parser {
562            match &event {
563                Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(info))) => {
564                    in_code_block = true;
565                    current_info = info.to_string();
566                    result.push_str("```");
567                    result.push_str(&current_info);
568                    result.push('\n');
569                }
570                Event::Text(text) if in_code_block => {
571                    let (_language, validator, _skip) = parse_info_string(&current_info);
572
573                    // Strip markers only from blocks with validator= attribute
574                    if validator.is_some() {
575                        let stripped = strip_markers(text);
576                        // Trim and add back newline
577                        let trimmed = stripped.trim();
578                        if !trimmed.is_empty() {
579                            result.push_str(trimmed);
580                            result.push('\n');
581                        }
582                    } else {
583                        result.push_str(text);
584                    }
585                }
586                Event::End(TagEnd::CodeBlock) if in_code_block => {
587                    in_code_block = false;
588                    result.push_str("```\n");
589                }
590                Event::Start(Tag::CodeBlock(CodeBlockKind::Indented)) => {
591                    // Handle indented code blocks - pass through unchanged
592                    in_code_block = true;
593                    current_info.clear();
594                }
595                Event::End(TagEnd::CodeBlock) => {
596                    in_code_block = false;
597                }
598                Event::SoftBreak | Event::HardBreak => {
599                    if !in_code_block {
600                        result.push('\n');
601                    }
602                }
603                Event::Text(text) if !in_code_block => {
604                    result.push_str(text);
605                }
606                Event::End(TagEnd::Paragraph | TagEnd::Heading(_)) => {
607                    result.push_str("\n\n");
608                }
609                Event::Start(Tag::Heading { level, .. }) => {
610                    result.push_str(&"#".repeat(*level as usize));
611                    result.push(' ');
612                }
613                _ => {}
614            }
615        }
616
617        result.trim().to_owned()
618    }
619}
620
621/// A code block that requires validation
622struct ValidatorBlock {
623    /// Name of the validator (e.g., "osquery", "sqlite")
624    validator_name: String,
625    /// Extracted markers from the code block
626    markers: ExtractedMarkers,
627    /// Whether to skip validation
628    skip: bool,
629}