1use tracing::{debug, info, trace};
6
7const DEFAULT_EXEC_SQLITE: &str = "sqlite3 -json /tmp/test.db";
9const DEFAULT_EXEC_OSQUERY: &str = "osqueryi --json";
10const DEFAULT_EXEC_FALLBACK: &str = "cat";
11
12use std::collections::hash_map::Entry;
13use std::collections::HashMap;
14use std::fmt::Write;
15use std::path::Path;
16
17use mdbook_preprocessor::book::{Book, BookItem, Chapter};
18use mdbook_preprocessor::errors::Error;
19use mdbook_preprocessor::{Preprocessor, PreprocessorContext};
20use pulldown_cmark::{CodeBlockKind, Event, Parser, Tag, TagEnd};
21
22use crate::command::RealCommandRunner;
23use crate::config::{Config, ValidatorConfig};
24use crate::container::ValidatorContainer;
25use crate::error::ValidatorError;
26use crate::host_validator;
27use crate::parser::{extract_markers, parse_info_string, ExtractedMarkers};
28use crate::transpiler::strip_markers;
29
30pub struct ValidatorPreprocessor;
32
33impl ValidatorPreprocessor {
34 #[must_use]
36 pub fn new() -> Self {
37 Self
38 }
39}
40
41impl Default for ValidatorPreprocessor {
42 fn default() -> Self {
43 Self::new()
44 }
45}
46
47impl Preprocessor for ValidatorPreprocessor {
48 fn name(&self) -> &'static str {
49 "validator"
50 }
51
52 fn run(&self, ctx: &PreprocessorContext, mut book: Book) -> Result<Book, Error> {
53 let config = Config::from_context(ctx)
55 .map_err(|e| Error::msg(format!("Failed to parse config: {e}")))?;
56
57 let rt = tokio::runtime::Builder::new_current_thread()
59 .enable_all()
60 .build()
61 .map_err(|e| Error::msg(format!("Failed to create tokio runtime: {e}")))?;
62
63 rt.block_on(async {
64 self.run_async_with_config(&mut book, &config, &ctx.root)
65 .await
66 })?;
67
68 Ok(book)
69 }
70
71 fn supports_renderer(&self, renderer: &str) -> Result<bool, anyhow::Error> {
72 let _ = renderer;
75 Ok(true)
76 }
77}
78
79impl ValidatorPreprocessor {
80 pub fn process_book_with_script(
85 &self,
86 mut book: Book,
87 validator_script: &[u8],
88 ) -> Result<Book, Error> {
89 let rt = tokio::runtime::Builder::new_current_thread()
90 .enable_all()
91 .build()
92 .map_err(|e| Error::msg(format!("Failed to create tokio runtime: {e}")))?;
93
94 rt.block_on(async {
95 self.run_async_with_script(&mut book, validator_script)
96 .await
97 })?;
98
99 Ok(book)
100 }
101
102 pub fn process_book_with_config(
106 &self,
107 mut book: Book,
108 config: &Config,
109 book_root: &Path,
110 ) -> Result<Book, Error> {
111 let rt = tokio::runtime::Builder::new_current_thread()
112 .enable_all()
113 .build()
114 .map_err(|e| Error::msg(format!("Failed to create tokio runtime: {e}")))?;
115
116 rt.block_on(async {
117 self.run_async_with_config(&mut book, config, book_root)
118 .await
119 })?;
120
121 Ok(book)
122 }
123
124 async fn run_async_with_config(
126 &self,
127 book: &mut Book,
128 config: &Config,
129 book_root: &Path,
130 ) -> Result<(), Error> {
131 let mut containers: HashMap<String, ValidatorContainer> = HashMap::new();
133
134 for item in &mut book.items {
135 self.process_book_item_with_config(item, config, book_root, &mut containers)
136 .await?;
137 }
138
139 Ok(())
140 }
141
142 async fn run_async_with_script(
144 &self,
145 book: &mut Book,
146 validator_script: &[u8],
147 ) -> Result<(), Error> {
148 let container = ValidatorContainer::start(validator_script)
149 .await
150 .map_err(|e| Error::msg(format!("Failed to start container: {e}")))?;
151
152 for item in &mut book.items {
153 self.process_book_item(item, &container).await?;
154 }
155
156 Ok(())
157 }
158
159 async fn process_book_item(
160 &self,
161 item: &mut BookItem,
162 container: &ValidatorContainer,
163 ) -> Result<(), Error> {
164 if let BookItem::Chapter(chapter) = item {
165 self.process_chapter(chapter, container).await?;
166
167 for sub_item in &mut chapter.sub_items {
169 Box::pin(self.process_book_item(sub_item, container)).await?;
170 }
171 }
172 Ok(())
173 }
174
175 async fn process_book_item_with_config(
176 &self,
177 item: &mut BookItem,
178 config: &Config,
179 book_root: &Path,
180 containers: &mut HashMap<String, ValidatorContainer>,
181 ) -> Result<(), Error> {
182 if let BookItem::Chapter(chapter) = item {
183 self.process_chapter_with_config(chapter, config, book_root, containers)
184 .await?;
185
186 for sub_item in &mut chapter.sub_items {
188 Box::pin(
189 self.process_book_item_with_config(sub_item, config, book_root, containers),
190 )
191 .await?;
192 }
193 }
194 Ok(())
195 }
196
197 async fn process_chapter(
198 &self,
199 chapter: &mut Chapter,
200 container: &ValidatorContainer,
201 ) -> Result<(), Error> {
202 if chapter.content.is_empty() {
203 return Ok(());
204 }
205
206 let blocks = Self::find_validator_blocks(&chapter.content);
208
209 if blocks.is_empty() {
210 return Ok(());
211 }
212
213 for block in &blocks {
215 if block.skip {
216 continue;
217 }
218
219 let validation_content = block.markers.validation_content();
220 let result = container
221 .exec_with_env(
222 block.markers.setup.as_deref(),
223 &validation_content,
224 block.markers.assertions.as_deref(),
225 block.markers.expect.as_deref(),
226 )
227 .await
228 .map_err(|e| {
229 Error::msg(format!(
230 "Validation exec failed in '{}': {}",
231 chapter.name, e
232 ))
233 })?;
234
235 if result.exit_code != 0 {
236 let mut error_msg = format!(
237 "Validation failed in '{}' (exit code {}):\n\nCode:\n{}\n",
238 chapter.name, result.exit_code, block.markers.visible_content
239 );
240 if !result.stderr.is_empty() {
241 let _ = write!(error_msg, "\nValidator stderr:\n{}", result.stderr);
242 }
243 if !result.stdout.is_empty() {
244 let _ = write!(error_msg, "\nValidator stdout:\n{}", result.stdout);
245 }
246 return Err(Error::msg(error_msg));
247 }
248 }
249
250 chapter.content = Self::strip_markers_from_chapter(&chapter.content);
252
253 Ok(())
254 }
255
256 async fn process_chapter_with_config(
257 &self,
258 chapter: &mut Chapter,
259 config: &Config,
260 book_root: &Path,
261 containers: &mut HashMap<String, ValidatorContainer>,
262 ) -> Result<(), Error> {
263 if chapter.content.is_empty() {
264 return Ok(());
265 }
266
267 let blocks = Self::find_validator_blocks(&chapter.content);
269
270 if blocks.is_empty() {
271 return Ok(());
272 }
273
274 info!(chapter = %chapter.name, blocks = blocks.len(), "Validating");
275
276 for block in &blocks {
278 if block.skip && block.hidden {
279 return Err(Error::new(ValidatorError::MutuallyExclusiveAttributes));
280 }
281 }
282
283 for (idx, block) in blocks.iter().enumerate() {
285 if block.skip {
286 debug!(block = idx + 1, validator = %block.validator_name, "Skipping (skip=true)");
287 continue;
288 }
289
290 debug!(block = idx + 1, validator = %block.validator_name, "Validating block");
291
292 let validator_config = config.get_validator(&block.validator_name).map_err(|e| {
294 Error::msg(format!(
295 "Unknown validator '{}': {}",
296 block.validator_name, e
297 ))
298 })?;
299
300 let container = self
302 .get_or_start_container(&block.validator_name, config, book_root, containers)
303 .await?;
304
305 self.validate_block_host_based(
307 container,
308 validator_config,
309 block,
310 &chapter.name,
311 book_root,
312 )
313 .await?;
314 }
315
316 chapter.content = Self::strip_markers_from_chapter(&chapter.content);
318
319 info!(chapter = %chapter.name, "✓ Passed");
320
321 Ok(())
322 }
323
324 async fn validate_block_host_based(
328 &self,
329 container: &ValidatorContainer,
330 validator_config: &ValidatorConfig,
331 block: &ValidatorBlock,
332 chapter_name: &str,
333 book_root: &Path,
334 ) -> Result<(), Error> {
335 let script_path = book_root.join(&validator_config.script);
337 if !script_path.exists() {
338 return Err(Error::msg(format!(
339 "Failed to read validator script '{}': file not found",
340 script_path.display()
341 )));
342 }
343
344 debug!(script = %script_path.display(), "Using validator script");
345
346 let exec_cmd = Self::get_exec_command(&block.validator_name, validator_config);
348 debug!(exec_command = %exec_cmd, "Container exec command");
349
350 if let Some(setup) = &block.markers.setup {
353 let setup_script = setup.trim();
354 if !setup_script.is_empty() {
355 debug!("Running SETUP script");
356 trace!(setup = %setup_script, "SETUP content");
357 let setup_result = container
358 .exec_raw(&["sh", "-c", setup_script])
359 .await
360 .map_err(|e| Error::msg(format!("Setup exec failed: {e}")))?;
361
362 if setup_result.exit_code != 0 {
363 #[allow(clippy::cast_possible_truncation)]
364 return Err(ValidatorError::SetupFailed {
365 exit_code: setup_result.exit_code as i32,
366 message: format!(
367 "in '{}' (validator: {}):\n\nScript:\n{}\n\nError:\n{}",
368 chapter_name, block.validator_name, setup_script, setup_result.stderr
369 ),
370 }
371 .into());
372 }
373 }
374 }
375
376 let query_sql = block.markers.validation_content();
380 let query_sql = query_sql.trim();
381 if query_sql.is_empty() {
382 return Err(Error::msg(format!(
383 "Validation failed in '{}' (validator: {}): Query content is empty",
384 chapter_name, block.validator_name
385 )));
386 }
387
388 debug!("Executing query in container");
389 trace!(query = %query_sql, "Query content");
390
391 let query_result = container
393 .exec_with_stdin(&["sh", "-c", &exec_cmd], query_sql)
394 .await
395 .map_err(|e| Error::msg(format!("Query exec failed: {e}")))?;
396
397 trace!(exit_code = query_result.exit_code, stdout = %query_result.stdout, stderr = %query_result.stderr, "Query result");
398
399 if query_result.exit_code != 0 {
400 return Err(Error::msg(format!(
401 "Query failed in '{}' (validator: {}):\n\nSQL:\n{}\n\nError:\n{}",
402 chapter_name, block.validator_name, query_sql, query_result.stderr
403 )));
404 }
405
406 let script_path_str = script_path
409 .to_str()
410 .ok_or_else(|| Error::msg(format!("Invalid script path: {}", script_path.display())))?;
411
412 debug!("Running host validator");
413 let validation_result = host_validator::run_validator(
414 &RealCommandRunner,
415 script_path_str,
416 &query_result.stdout,
417 block.markers.assertions.as_deref(),
418 block.markers.expect.as_deref(),
419 Some(&query_result.stderr), )
421 .map_err(|e| {
422 Error::msg(format!(
423 "Host validator failed in '{}' (validator: {}): {}",
424 chapter_name, block.validator_name, e
425 ))
426 })?;
427
428 trace!(exit_code = validation_result.exit_code, stdout = %validation_result.stdout, stderr = %validation_result.stderr, "Validator result");
429
430 if validation_result.exit_code != 0 {
431 let mut error_msg = format!(
432 "in '{}' (validator: {}):\n\nCode:\n{}\n",
433 chapter_name, block.validator_name, block.markers.visible_content
434 );
435 if !validation_result.stderr.is_empty() {
436 let _ = write!(
437 error_msg,
438 "\nValidator stderr:\n{}",
439 validation_result.stderr
440 );
441 }
442 if !validation_result.stdout.is_empty() {
443 let _ = write!(
444 error_msg,
445 "\nValidator stdout:\n{}",
446 validation_result.stdout
447 );
448 }
449 return Err(ValidatorError::ValidationFailed {
450 exit_code: validation_result.exit_code,
451 message: error_msg,
452 }
453 .into());
454 }
455
456 Ok(())
457 }
458
459 fn get_exec_command(validator_name: &str, config: &ValidatorConfig) -> String {
463 config
464 .exec_command
465 .clone()
466 .unwrap_or_else(|| match validator_name {
467 "sqlite" => DEFAULT_EXEC_SQLITE.to_owned(),
468 "osquery" => DEFAULT_EXEC_OSQUERY.to_owned(),
469 _ => DEFAULT_EXEC_FALLBACK.to_owned(),
470 })
471 }
472
473 async fn get_or_start_container<'a>(
475 &self,
476 validator_name: &str,
477 config: &Config,
478 book_root: &Path,
479 containers: &'a mut HashMap<String, ValidatorContainer>,
480 ) -> Result<&'a ValidatorContainer, Error> {
481 match containers.entry(validator_name.to_owned()) {
482 Entry::Occupied(entry) => Ok(entry.into_mut()),
483 Entry::Vacant(entry) => {
484 let validator_config = config.get_validator(validator_name).map_err(|e| {
486 Error::msg(format!("Unknown validator '{validator_name}': {e}"))
487 })?;
488
489 validator_config.validate(validator_name)?;
491
492 let mount = if let Some(ref fixtures_dir) = config.fixtures_dir {
494 let fixtures_path = if fixtures_dir.is_absolute() {
496 fixtures_dir.clone()
497 } else {
498 book_root.join(fixtures_dir)
499 };
500
501 if !fixtures_path.exists() {
503 return Err(Error::msg(format!(
504 "fixtures_dir '{}' does not exist",
505 fixtures_path.display()
506 )));
507 }
508 if !fixtures_path.is_dir() {
509 return Err(Error::msg(format!(
510 "fixtures_dir '{}' is not a directory",
511 fixtures_path.display()
512 )));
513 }
514
515 Some((fixtures_path, "/fixtures"))
516 } else {
517 None
518 };
519
520 let container = ValidatorContainer::start_raw_with_mount(
522 &validator_config.container,
523 mount.as_ref().map(|(p, c)| (p.as_path(), *c)),
524 )
525 .await
526 .map_err(|e| {
527 Error::msg(format!(
528 "Failed to start container '{}': {}",
529 validator_config.container, e
530 ))
531 })?;
532
533 Ok(entry.insert(container))
534 }
535 }
536 }
537
538 fn find_validator_blocks(content: &str) -> Vec<ValidatorBlock> {
540 let mut blocks = Vec::new();
541 let parser = Parser::new(content);
542
543 let mut in_code_block = false;
544 let mut current_info = String::new();
545 let mut current_content = String::new();
546
547 for event in parser {
548 match event {
549 Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(info))) => {
550 in_code_block = true;
551 current_info = info.to_string();
552 current_content.clear();
553 }
554 Event::Text(text) if in_code_block => {
555 current_content.push_str(&text);
556 }
557 Event::End(TagEnd::CodeBlock) if in_code_block => {
558 in_code_block = false;
559
560 let (_language, validator, skip, hidden) = parse_info_string(¤t_info);
561
562 if let Some(validator_name) = validator {
564 if !validator_name.is_empty() {
566 let markers = extract_markers(¤t_content);
567 blocks.push(ValidatorBlock {
568 validator_name,
569 markers,
570 skip,
571 hidden,
572 });
573 }
574 }
575 }
576 _ => {}
577 }
578 }
579
580 blocks
581 }
582
583 fn strip_markers_from_chapter(content: &str) -> String {
590 use std::ops::Range;
591
592 enum Edit {
594 Replace {
596 range: Range<usize>,
597 content: String,
598 },
599 Delete { range: Range<usize> },
601 }
602
603 let mut edits: Vec<Edit> = Vec::new();
604 let parser = Parser::new(content).into_offset_iter();
605
606 let mut current_block_start: Option<usize> = None;
607 let mut current_hidden = false;
608 let mut current_has_validator = false;
609 let mut current_content_range: Option<Range<usize>> = None;
610
611 for (event, range) in parser {
612 match &event {
613 Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(info))) => {
614 let (_language, validator, _skip, hidden) = parse_info_string(info);
615 current_hidden = hidden;
616 current_has_validator = validator.is_some();
617 current_block_start = Some(range.start);
618 current_content_range = None;
619 }
620 Event::Text(_) if current_block_start.is_some() => {
621 current_content_range = Some(range);
623 }
624 Event::End(TagEnd::CodeBlock) if current_block_start.is_some() => {
625 let Some(block_start) = current_block_start.take() else {
626 unreachable!("current_block_start must be Some here")
627 };
628
629 if current_hidden {
630 let line_start = content[..block_start].rfind('\n').map_or(0, |i| i + 1);
633 let line_end = content[range.end..]
635 .find('\n')
636 .map_or(range.end, |i| range.end + i + 1);
637
638 edits.push(Edit::Delete {
639 range: line_start..line_end,
640 });
641 } else if current_has_validator {
642 if let Some(content_range) = current_content_range.take() {
644 let original_content = &content[content_range.clone()];
645 let stripped = strip_markers(original_content);
646 let trimmed = stripped.trim();
647 if trimmed != original_content.trim() {
648 edits.push(Edit::Replace {
650 range: content_range,
651 content: format!("{trimmed}\n"),
652 });
653 }
654 }
655 }
656
657 current_hidden = false;
658 current_has_validator = false;
659 }
660 _ => {}
661 }
662 }
663
664 edits.sort_by(|a, b| {
666 let a_start = match a {
667 Edit::Replace { range, .. } | Edit::Delete { range } => range.start,
668 };
669 let b_start = match b {
670 Edit::Replace { range, .. } | Edit::Delete { range } => range.start,
671 };
672 b_start.cmp(&a_start) });
674
675 let mut result = content.to_owned();
676 for edit in edits {
677 match edit {
678 Edit::Replace { range, content } => {
679 result.replace_range(range, &content);
680 }
681 Edit::Delete { range } => {
682 result.replace_range(range, "");
683 }
684 }
685 }
686
687 Self::normalize_blank_lines(&result)
689 }
690
691 fn normalize_blank_lines(content: &str) -> String {
693 let mut result = String::with_capacity(content.len());
694 let mut consecutive_newlines = 0;
695
696 for ch in content.chars() {
697 if ch == '\n' {
698 consecutive_newlines += 1;
699 if consecutive_newlines <= 2 {
700 result.push(ch);
701 }
702 } else {
703 consecutive_newlines = 0;
704 result.push(ch);
705 }
706 }
707
708 result.trim().to_owned()
709 }
710}
711
712struct ValidatorBlock {
714 validator_name: String,
716 markers: ExtractedMarkers,
718 skip: bool,
720 hidden: bool,
722}
723
724#[cfg(test)]
725#[allow(clippy::needless_raw_string_hashes)]
726mod tests {
727 use super::*;
728
729 #[test]
732 fn strip_markers_from_chapter_removes_hidden_block() {
733 let content = r#"Some text
734
735```sql validator=sqlite hidden
736SELECT 1;
737```
738
739More text"#;
740 let result = ValidatorPreprocessor::strip_markers_from_chapter(content);
741 assert!(!result.contains("SELECT 1"));
743 assert!(!result.contains("```sql"));
744 assert!(result.contains("Some text"));
745 assert!(result.contains("More text"));
746 }
747
748 #[test]
749 fn strip_markers_from_chapter_keeps_non_hidden_block() {
750 let content = r#"Some text
751
752```sql validator=sqlite
753SELECT 1;
754```
755
756More text"#;
757 let result = ValidatorPreprocessor::strip_markers_from_chapter(content);
758 assert!(result.contains("SELECT 1"));
760 assert!(result.contains("```sql"));
761 assert!(result.contains("Some text"));
762 assert!(result.contains("More text"));
763 }
764
765 #[test]
766 fn strip_markers_from_chapter_mixed_hidden_and_non_hidden() {
767 let content = r#"Start
768
769```sql validator=sqlite hidden
770HIDDEN QUERY;
771```
772
773Middle
774
775```sql validator=sqlite
776VISIBLE QUERY;
777```
778
779End"#;
780 let result = ValidatorPreprocessor::strip_markers_from_chapter(content);
781 assert!(!result.contains("HIDDEN QUERY"));
783 assert!(result.contains("VISIBLE QUERY"));
784 assert!(result.contains("Start"));
785 assert!(result.contains("Middle"));
786 assert!(result.contains("End"));
787 }
788
789 #[test]
790 fn strip_markers_from_chapter_adjacent_hidden_blocks() {
791 let content = r#"Start
792
793```sql validator=sqlite hidden
794HIDDEN 1;
795```
796
797```sql validator=sqlite hidden
798HIDDEN 2;
799```
800
801End"#;
802 let result = ValidatorPreprocessor::strip_markers_from_chapter(content);
803 assert!(!result.contains("HIDDEN 1"));
805 assert!(!result.contains("HIDDEN 2"));
806 assert!(result.contains("Start"));
807 assert!(result.contains("End"));
808 }
809
810 #[test]
811 fn strip_markers_from_chapter_hidden_block_at_start() {
812 let content = r#"```sql validator=sqlite hidden
813HIDDEN;
814```
815
816Visible content"#;
817 let result = ValidatorPreprocessor::strip_markers_from_chapter(content);
818 assert!(!result.contains("HIDDEN"));
820 assert!(result.contains("Visible content"));
821 assert!(!result.starts_with('\n'));
823 }
824
825 #[test]
826 fn strip_markers_from_chapter_hidden_block_at_end() {
827 let content = r#"Visible content
828
829```sql validator=sqlite hidden
830HIDDEN;
831```"#;
832 let result = ValidatorPreprocessor::strip_markers_from_chapter(content);
833 assert!(!result.contains("HIDDEN"));
835 assert!(result.contains("Visible content"));
836 assert!(!result.ends_with("\n\n"));
838 }
839
840 #[test]
841 fn strip_markers_from_chapter_only_hidden_block() {
842 let content = r#"```sql validator=sqlite hidden
843HIDDEN;
844```"#;
845 let result = ValidatorPreprocessor::strip_markers_from_chapter(content);
846 assert!(!result.contains("HIDDEN"));
848 assert!(result.is_empty() || result.trim().is_empty());
849 }
850
851 #[test]
852 fn strip_markers_from_chapter_hidden_with_markers() {
853 let content = r#"Text
854
855```sql validator=sqlite hidden
856<!--SETUP
857CREATE TABLE t;
858-->
859SELECT * FROM t;
860<!--ASSERT
861rows >= 1
862-->
863```
864
865More text"#;
866 let result = ValidatorPreprocessor::strip_markers_from_chapter(content);
867 assert!(!result.contains("SETUP"));
869 assert!(!result.contains("ASSERT"));
870 assert!(!result.contains("CREATE TABLE"));
871 assert!(!result.contains("SELECT"));
872 assert!(result.contains("Text"));
873 assert!(result.contains("More text"));
874 }
875
876 #[test]
881 fn strip_markers_preserves_lists() {
882 let content = r#"# Chapter
883
884Some text:
885
886- Item one
887- Item two
888- Item three
889
890### Next Section
891
892More text."#;
893 let result = ValidatorPreprocessor::strip_markers_from_chapter(content);
894 assert!(
896 result.contains("- Item one"),
897 "List items must be preserved"
898 );
899 assert!(
900 result.contains("- Item two"),
901 "List items must be preserved"
902 );
903 assert!(
904 result.contains("- Item three"),
905 "List items must be preserved"
906 );
907 assert!(
908 result.contains("### Next Section"),
909 "Headings must be preserved"
910 );
911 }
912
913 #[test]
914 fn strip_markers_preserves_lists_with_code_block() {
915 let content = r#"# Chapter
916
917Some text:
918
919- Item one
920- Item two
921- Item three
922
923```sql validator=sqlite
924<!--SETUP
925CREATE TABLE t;
926-->
927SELECT 1;
928```
929
930### Next Section
931
932More text."#;
933 let result = ValidatorPreprocessor::strip_markers_from_chapter(content);
934 assert!(
936 result.contains("- Item one"),
937 "List items must be preserved"
938 );
939 assert!(
940 result.contains("- Item two"),
941 "List items must be preserved"
942 );
943 assert!(
944 result.contains("- Item three"),
945 "List items must be preserved"
946 );
947 assert!(result.contains("SELECT 1"), "Code block content preserved");
949 assert!(!result.contains("SETUP"), "Markers stripped");
950 assert!(!result.contains("CREATE TABLE"), "Setup content stripped");
951 assert!(
953 result.contains("### Next Section"),
954 "Headings must be preserved"
955 );
956 }
957
958 #[test]
959 fn strip_markers_preserves_numbered_lists() {
960 let content = r#"Steps:
961
9621. First step
9632. Second step
9643. Third step
965
966Done."#;
967 let result = ValidatorPreprocessor::strip_markers_from_chapter(content);
968 assert!(
969 result.contains("1. First step"),
970 "Numbered lists must be preserved"
971 );
972 assert!(
973 result.contains("2. Second step"),
974 "Numbered lists must be preserved"
975 );
976 assert!(
977 result.contains("3. Third step"),
978 "Numbered lists must be preserved"
979 );
980 }
981
982 #[test]
983 fn strip_markers_preserves_blockquotes() {
984 let content = r#"Quote:
985
986> This is a blockquote
987> with multiple lines
988
989End."#;
990 let result = ValidatorPreprocessor::strip_markers_from_chapter(content);
991 assert!(
992 result.contains("> This is a blockquote"),
993 "Blockquotes must be preserved"
994 );
995 }
996
997 #[test]
998 fn strip_markers_preserves_links() {
999 let content = r#"See [the documentation](https://example.com) for details.
1000
1001And [another link](https://other.com)."#;
1002 let result = ValidatorPreprocessor::strip_markers_from_chapter(content);
1003 assert!(
1004 result.contains("[the documentation](https://example.com)"),
1005 "Links must be preserved"
1006 );
1007 assert!(
1008 result.contains("[another link](https://other.com)"),
1009 "Links must be preserved"
1010 );
1011 }
1012
1013 #[test]
1014 fn strip_markers_preserves_inline_code() {
1015 let content = r#"Use the `SELECT` statement to query data.
1016
1017Also `INSERT` works."#;
1018 let result = ValidatorPreprocessor::strip_markers_from_chapter(content);
1019 assert!(result.contains("`SELECT`"), "Inline code must be preserved");
1020 assert!(result.contains("`INSERT`"), "Inline code must be preserved");
1021 }
1022
1023 #[test]
1024 fn strip_markers_preserves_emphasis() {
1025 let content = r#"This is *italic* and **bold** text.
1026
1027Also _underscores_ and __double__."#;
1028 let result = ValidatorPreprocessor::strip_markers_from_chapter(content);
1029 assert!(result.contains("*italic*"), "Italic must be preserved");
1030 assert!(result.contains("**bold**"), "Bold must be preserved");
1031 }
1032
1033 #[test]
1034 fn strip_markers_preserves_tables() {
1035 let content = r#"| Column A | Column B |
1036|----------|----------|
1037| Value 1 | Value 2 |
1038| Value 3 | Value 4 |"#;
1039 let result = ValidatorPreprocessor::strip_markers_from_chapter(content);
1040 assert!(
1041 result.contains("| Column A | Column B |"),
1042 "Tables must be preserved"
1043 );
1044 assert!(
1045 result.contains("| Value 1 | Value 2 |"),
1046 "Table rows must be preserved"
1047 );
1048 }
1049
1050 #[test]
1051 fn strip_markers_preserves_code_blocks_without_validator() {
1052 let content = r#"Regular code:
1053
1054```python
1055def hello():
1056 print("world")
1057```
1058
1059End."#;
1060 let result = ValidatorPreprocessor::strip_markers_from_chapter(content);
1061 assert!(result.contains("```python"), "Code fence must be preserved");
1062 assert!(
1063 result.contains("def hello():"),
1064 "Code content must be preserved"
1065 );
1066 assert!(
1067 result.contains("print(\"world\")"),
1068 "Code content must be preserved"
1069 );
1070 }
1071
1072 #[test]
1073 fn strip_markers_complex_document() {
1074 let content = r#"# Getting Started
1076
1077Welcome to the guide. Here's what you'll learn:
1078
1079- How to query data
1080- How to filter results
1081- How to join tables
1082
1083## Basic Queries
1084
1085First, let's set up our database:
1086
1087```sql validator=sqlite hidden
1088<!--SETUP
1089CREATE TABLE users (id INTEGER, name TEXT);
1090INSERT INTO users VALUES (1, 'Alice'), (2, 'Bob');
1091-->
1092SELECT 'setup complete';
1093```
1094
1095Now run a simple query:
1096
1097```sql validator=sqlite
1098SELECT * FROM users;
1099<!--ASSERT
1100rows >= 1
1101-->
1102```
1103
1104> **Note**: The query above returns all users.
1105
1106See [SQL documentation](https://sqlite.org) for more.
1107
1108### Summary
1109
11101. We created a table
11112. We queried the data
11123. We verified the results
1113
1114Done!"#;
1115 let result = ValidatorPreprocessor::strip_markers_from_chapter(content);
1116
1117 assert!(
1119 result.contains("- How to query data"),
1120 "Bullet lists preserved"
1121 );
1122 assert!(
1123 result.contains("1. We created a table"),
1124 "Numbered lists preserved"
1125 );
1126
1127 assert!(
1129 !result.contains("CREATE TABLE users"),
1130 "Hidden block content removed"
1131 );
1132 assert!(
1133 !result.contains("INSERT INTO users"),
1134 "Hidden block content removed"
1135 );
1136
1137 assert!(
1139 result.contains("SELECT * FROM users"),
1140 "Visible query preserved"
1141 );
1142 assert!(!result.contains("ASSERT"), "Markers stripped");
1143
1144 assert!(result.contains("> **Note**"), "Blockquote preserved");
1146
1147 assert!(
1149 result.contains("[SQL documentation](https://sqlite.org)"),
1150 "Link preserved"
1151 );
1152
1153 assert!(result.contains("## Basic Queries"), "H2 preserved");
1155 assert!(result.contains("### Summary"), "H3 preserved");
1156 }
1157
1158 #[test]
1159 fn strip_markers_preserves_headings_with_links() {
1160 let content = r#"# Introduction
1162
1163Some intro text.
1164
1165### [Configuration Guide](https://example.com/config)
1166
1167This section explains configuration.
1168
1169### [API Reference](https://example.com/api)
1170
1171API docs here.
1172
1173```sql validator=sqlite
1174SELECT 1;
1175```
1176
1177### [Advanced Topics](https://example.com/advanced)
1178
1179More content."#;
1180 let result = ValidatorPreprocessor::strip_markers_from_chapter(content);
1181
1182 assert!(
1184 result.contains("### [Configuration Guide](https://example.com/config)"),
1185 "Heading with link must be preserved"
1186 );
1187 assert!(
1188 result.contains("### [API Reference](https://example.com/api)"),
1189 "Heading with link must be preserved"
1190 );
1191 assert!(
1192 result.contains("### [Advanced Topics](https://example.com/advanced)"),
1193 "Heading with link must be preserved"
1194 );
1195 assert!(result.contains("SELECT 1"), "Code block content preserved");
1197 }
1198
1199 #[test]
1200 fn strip_markers_preserves_paths_with_wildcards() {
1201 let content = r#"# File Patterns
1203
1204Match all files in a directory:
1205
1206- `/etc/osquery/*`
1207- `/var/log/*.log`
1208- `C:\Users\*\AppData`
1209
1210You can also use `/some/path/**/*.json` for recursive matching.
1211
1212```sql validator=sqlite
1213SELECT 1;
1214```
1215
1216The path `/tmp/*` is commonly used."#;
1217 let result = ValidatorPreprocessor::strip_markers_from_chapter(content);
1218
1219 assert!(
1221 result.contains("/etc/osquery/*"),
1222 "Path with wildcard must be preserved"
1223 );
1224 assert!(
1225 result.contains("/var/log/*.log"),
1226 "Path with wildcard must be preserved"
1227 );
1228 assert!(
1229 result.contains(r"C:\Users\*\AppData"),
1230 "Windows path with wildcard must be preserved"
1231 );
1232 assert!(
1233 result.contains("/some/path/**/*.json"),
1234 "Recursive glob must be preserved"
1235 );
1236 assert!(
1237 result.contains("/tmp/*"),
1238 "Inline path with wildcard must be preserved"
1239 );
1240 }
1241
1242 #[test]
1243 fn strip_markers_preserves_inline_code_with_special_chars() {
1244 let content = r#"# Code Examples
1246
1247Use `SELECT * FROM users` to get all users.
1248
1249The command `rm -rf /tmp/*` removes temp files.
1250
1251Run `echo $HOME` to print home directory.
1252
1253Use `git log --oneline | head -10` for recent commits.
1254
1255The regex `\d+\.\d+` matches decimals.
1256
1257```sql validator=sqlite
1258SELECT 1;
1259```
1260
1261Also try `jq '.[] | .name'` for JSON parsing."#;
1262 let result = ValidatorPreprocessor::strip_markers_from_chapter(content);
1263
1264 assert!(
1266 result.contains("`SELECT * FROM users`"),
1267 "Inline code with * must be preserved"
1268 );
1269 assert!(
1270 result.contains("`rm -rf /tmp/*`"),
1271 "Inline code with path must be preserved"
1272 );
1273 assert!(
1274 result.contains("`echo $HOME`"),
1275 "Inline code with $ must be preserved"
1276 );
1277 assert!(
1278 result.contains("`git log --oneline | head -10`"),
1279 "Inline code with pipe must be preserved"
1280 );
1281 assert!(
1282 result.contains(r"`\d+\.\d+`"),
1283 "Inline code with backslashes must be preserved"
1284 );
1285 assert!(
1286 result.contains("`jq '.[] | .name'`"),
1287 "Inline code with quotes must be preserved"
1288 );
1289 }
1290
1291 #[test]
1292 fn strip_markers_preserves_asterisks_in_text() {
1293 let content = r#"# Wildcards
1295
1296The pattern `*` matches everything.
1297
1298File paths like /etc/* are common.
1299
1300Use * for wildcards and ** for recursive.
1301
1302Math: 5 * 3 = 15
1303
1304```sql validator=sqlite
1305SELECT 1;
1306```
1307
1308Done."#;
1309 let result = ValidatorPreprocessor::strip_markers_from_chapter(content);
1310
1311 assert!(
1313 result.contains("The pattern `*` matches everything"),
1314 "Backtick asterisk preserved"
1315 );
1316 assert!(result.contains("/etc/*"), "Path asterisk preserved");
1317 assert!(result.contains("5 * 3 = 15"), "Math asterisk preserved");
1318 }
1319
1320 #[test]
1321 fn strip_markers_preserves_complex_inline_formatting() {
1322 let content = r#"# Formatting Test
1324
1325This has **bold** and *italic* text.
1326
1327This has `code with **asterisks**` inside.
1328
1329This has [link with `code`](https://example.com).
1330
1331This has **bold with `code` inside**.
1332
1333```sql validator=sqlite
1334SELECT 1;
1335```
1336
1337End."#;
1338 let result = ValidatorPreprocessor::strip_markers_from_chapter(content);
1339
1340 assert!(result.contains("**bold**"), "Bold preserved");
1341 assert!(result.contains("*italic*"), "Italic preserved");
1342 assert!(
1343 result.contains("`code with **asterisks**`"),
1344 "Code with asterisks preserved"
1345 );
1346 assert!(
1347 result.contains("[link with `code`](https://example.com)"),
1348 "Link with code preserved"
1349 );
1350 assert!(
1351 result.contains("**bold with `code` inside**"),
1352 "Bold with code preserved"
1353 );
1354 }
1355}