1use tracing::{debug, info, trace};
6
7const DEFAULT_EXEC_SQLITE: &str = "sqlite3 -json /tmp/test.db";
9const DEFAULT_EXEC_OSQUERY: &str = "osqueryi --json";
10const DEFAULT_EXEC_FALLBACK: &str = "cat";
11
12use std::collections::hash_map::Entry;
13use std::collections::HashMap;
14use std::fmt::Write;
15use std::path::Path;
16
17use mdbook_preprocessor::book::{Book, BookItem, Chapter};
18use mdbook_preprocessor::errors::Error;
19use mdbook_preprocessor::{Preprocessor, PreprocessorContext};
20use pulldown_cmark::{CodeBlockKind, Event, Parser, Tag, TagEnd};
21
22use crate::command::RealCommandRunner;
23use crate::config::{Config, ValidatorConfig};
24use crate::container::ValidatorContainer;
25use crate::error::ValidatorError;
26use crate::host_validator;
27use crate::parser::{extract_markers, parse_info_string, ExtractedMarkers};
28use crate::transpiler::strip_markers;
29
30pub struct ValidatorPreprocessor;
32
33impl ValidatorPreprocessor {
34 #[must_use]
36 pub fn new() -> Self {
37 Self
38 }
39}
40
41impl Default for ValidatorPreprocessor {
42 fn default() -> Self {
43 Self::new()
44 }
45}
46
47impl Preprocessor for ValidatorPreprocessor {
48 fn name(&self) -> &'static str {
49 "validator"
50 }
51
52 fn run(&self, ctx: &PreprocessorContext, mut book: Book) -> Result<Book, Error> {
53 let config = Config::from_context(ctx)
55 .map_err(|e| Error::msg(format!("Failed to parse config: {e}")))?;
56
57 let rt = tokio::runtime::Builder::new_current_thread()
59 .enable_all()
60 .build()
61 .map_err(|e| Error::msg(format!("Failed to create tokio runtime: {e}")))?;
62
63 rt.block_on(async {
64 self.run_async_with_config(&mut book, &config, &ctx.root)
65 .await
66 })?;
67
68 Ok(book)
69 }
70
71 fn supports_renderer(&self, renderer: &str) -> Result<bool, anyhow::Error> {
72 let _ = renderer;
75 Ok(true)
76 }
77}
78
79impl ValidatorPreprocessor {
80 pub fn process_book_with_script(
85 &self,
86 mut book: Book,
87 validator_script: &[u8],
88 ) -> Result<Book, Error> {
89 let rt = tokio::runtime::Builder::new_current_thread()
90 .enable_all()
91 .build()
92 .map_err(|e| Error::msg(format!("Failed to create tokio runtime: {e}")))?;
93
94 rt.block_on(async {
95 self.run_async_with_script(&mut book, validator_script)
96 .await
97 })?;
98
99 Ok(book)
100 }
101
102 pub fn process_book_with_config(
106 &self,
107 mut book: Book,
108 config: &Config,
109 book_root: &Path,
110 ) -> Result<Book, Error> {
111 let rt = tokio::runtime::Builder::new_current_thread()
112 .enable_all()
113 .build()
114 .map_err(|e| Error::msg(format!("Failed to create tokio runtime: {e}")))?;
115
116 rt.block_on(async {
117 self.run_async_with_config(&mut book, config, book_root)
118 .await
119 })?;
120
121 Ok(book)
122 }
123
124 async fn run_async_with_config(
126 &self,
127 book: &mut Book,
128 config: &Config,
129 book_root: &Path,
130 ) -> Result<(), Error> {
131 let mut containers: HashMap<String, ValidatorContainer> = HashMap::new();
133
134 for item in &mut book.items {
135 self.process_book_item_with_config(item, config, book_root, &mut containers)
136 .await?;
137 }
138
139 Ok(())
140 }
141
142 async fn run_async_with_script(
144 &self,
145 book: &mut Book,
146 validator_script: &[u8],
147 ) -> Result<(), Error> {
148 let container = ValidatorContainer::start(validator_script)
149 .await
150 .map_err(|e| Error::msg(format!("Failed to start container: {e}")))?;
151
152 for item in &mut book.items {
153 self.process_book_item(item, &container).await?;
154 }
155
156 Ok(())
157 }
158
159 async fn process_book_item(
160 &self,
161 item: &mut BookItem,
162 container: &ValidatorContainer,
163 ) -> Result<(), Error> {
164 if let BookItem::Chapter(chapter) = item {
165 self.process_chapter(chapter, container).await?;
166
167 for sub_item in &mut chapter.sub_items {
169 Box::pin(self.process_book_item(sub_item, container)).await?;
170 }
171 }
172 Ok(())
173 }
174
175 async fn process_book_item_with_config(
176 &self,
177 item: &mut BookItem,
178 config: &Config,
179 book_root: &Path,
180 containers: &mut HashMap<String, ValidatorContainer>,
181 ) -> Result<(), Error> {
182 if let BookItem::Chapter(chapter) = item {
183 self.process_chapter_with_config(chapter, config, book_root, containers)
184 .await?;
185
186 for sub_item in &mut chapter.sub_items {
188 Box::pin(
189 self.process_book_item_with_config(sub_item, config, book_root, containers),
190 )
191 .await?;
192 }
193 }
194 Ok(())
195 }
196
197 async fn process_chapter(
198 &self,
199 chapter: &mut Chapter,
200 container: &ValidatorContainer,
201 ) -> Result<(), Error> {
202 if chapter.content.is_empty() {
203 return Ok(());
204 }
205
206 let blocks = Self::find_validator_blocks(&chapter.content);
208
209 if blocks.is_empty() {
210 return Ok(());
211 }
212
213 for block in &blocks {
215 if block.skip {
216 continue;
217 }
218
219 let validation_content = block.markers.validation_content();
220 let result = container
221 .exec_with_env(
222 block.markers.setup.as_deref(),
223 &validation_content,
224 block.markers.assertions.as_deref(),
225 block.markers.expect.as_deref(),
226 )
227 .await
228 .map_err(|e| {
229 Error::msg(format!(
230 "Validation exec failed in '{}': {}",
231 chapter.name, e
232 ))
233 })?;
234
235 if result.exit_code != 0 {
236 let mut error_msg = format!(
237 "Validation failed in '{}' (exit code {}):\n\nCode:\n{}\n",
238 chapter.name, result.exit_code, block.markers.visible_content
239 );
240 if !result.stderr.is_empty() {
241 let _ = write!(error_msg, "\nValidator stderr:\n{}", result.stderr);
242 }
243 if !result.stdout.is_empty() {
244 let _ = write!(error_msg, "\nValidator stdout:\n{}", result.stdout);
245 }
246 return Err(Error::msg(error_msg));
247 }
248 }
249
250 chapter.content = Self::strip_markers_from_chapter(&chapter.content);
252
253 Ok(())
254 }
255
256 async fn process_chapter_with_config(
257 &self,
258 chapter: &mut Chapter,
259 config: &Config,
260 book_root: &Path,
261 containers: &mut HashMap<String, ValidatorContainer>,
262 ) -> Result<(), Error> {
263 if chapter.content.is_empty() {
264 return Ok(());
265 }
266
267 let blocks = Self::find_validator_blocks(&chapter.content);
269
270 if blocks.is_empty() {
271 return Ok(());
272 }
273
274 info!(chapter = %chapter.name, blocks = blocks.len(), "Validating");
275
276 for block in &blocks {
278 if block.skip && block.hidden {
279 return Err(Error::new(ValidatorError::MutuallyExclusiveAttributes));
280 }
281 }
282
283 for (idx, block) in blocks.iter().enumerate() {
285 if block.skip {
286 debug!(block = idx + 1, validator = %block.validator_name, "Skipping (skip=true)");
287 continue;
288 }
289
290 debug!(block = idx + 1, validator = %block.validator_name, "Validating block");
291
292 let validator_config = config.get_validator(&block.validator_name).map_err(|e| {
294 Error::msg(format!(
295 "Unknown validator '{}': {}",
296 block.validator_name, e
297 ))
298 })?;
299
300 let container = self
302 .get_or_start_container(&block.validator_name, config, book_root, containers)
303 .await?;
304
305 self.validate_block_host_based(
307 container,
308 validator_config,
309 block,
310 &chapter.name,
311 book_root,
312 )
313 .await?;
314 }
315
316 chapter.content = Self::strip_markers_from_chapter(&chapter.content);
318
319 info!(chapter = %chapter.name, "✓ Passed");
320
321 Ok(())
322 }
323
324 async fn validate_block_host_based(
328 &self,
329 container: &ValidatorContainer,
330 validator_config: &ValidatorConfig,
331 block: &ValidatorBlock,
332 chapter_name: &str,
333 book_root: &Path,
334 ) -> Result<(), Error> {
335 let script_path = book_root.join(&validator_config.script);
337 if !script_path.exists() {
338 return Err(Error::msg(format!(
339 "Failed to read validator script '{}': file not found",
340 script_path.display()
341 )));
342 }
343
344 debug!(script = %script_path.display(), "Using validator script");
345
346 let exec_cmd = Self::get_exec_command(&block.validator_name, validator_config);
348 debug!(exec_command = %exec_cmd, "Container exec command");
349
350 if let Some(setup) = &block.markers.setup {
353 let setup_script = setup.trim();
354 if !setup_script.is_empty() {
355 debug!("Running SETUP script");
356 trace!(setup = %setup_script, "SETUP content");
357 let setup_result = container
358 .exec_raw(&["sh", "-c", setup_script])
359 .await
360 .map_err(|e| Error::msg(format!("Setup exec failed: {e}")))?;
361
362 if setup_result.exit_code != 0 {
363 #[allow(clippy::cast_possible_truncation)]
364 return Err(ValidatorError::SetupFailed {
365 exit_code: setup_result.exit_code as i32,
366 message: format!(
367 "in '{}' (validator: {}):\n\nScript:\n{}\n\nError:\n{}",
368 chapter_name, block.validator_name, setup_script, setup_result.stderr
369 ),
370 }
371 .into());
372 }
373 }
374 }
375
376 let query_sql = block.markers.validation_content();
380 let query_sql = query_sql.trim();
381 if query_sql.is_empty() {
382 return Err(Error::msg(format!(
383 "Validation failed in '{}' (validator: {}): Query content is empty",
384 chapter_name, block.validator_name
385 )));
386 }
387
388 debug!("Executing query in container");
389 trace!(query = %query_sql, "Query content");
390
391 let query_result = container
393 .exec_with_stdin(&["sh", "-c", &exec_cmd], query_sql)
394 .await
395 .map_err(|e| Error::msg(format!("Query exec failed: {e}")))?;
396
397 trace!(exit_code = query_result.exit_code, stdout = %query_result.stdout, stderr = %query_result.stderr, "Query result");
398
399 if query_result.exit_code != 0 {
400 return Err(Error::msg(format!(
401 "Query failed in '{}' (validator: {}):\n\nSQL:\n{}\n\nError:\n{}",
402 chapter_name, block.validator_name, query_sql, query_result.stderr
403 )));
404 }
405
406 let script_path_str = script_path
409 .to_str()
410 .ok_or_else(|| Error::msg(format!("Invalid script path: {}", script_path.display())))?;
411
412 debug!("Running host validator");
413 let validation_result = host_validator::run_validator(
414 &RealCommandRunner,
415 script_path_str,
416 &query_result.stdout,
417 block.markers.assertions.as_deref(),
418 block.markers.expect.as_deref(),
419 Some(&query_result.stderr), )
421 .map_err(|e| {
422 Error::msg(format!(
423 "Host validator failed in '{}' (validator: {}): {}",
424 chapter_name, block.validator_name, e
425 ))
426 })?;
427
428 trace!(exit_code = validation_result.exit_code, stdout = %validation_result.stdout, stderr = %validation_result.stderr, "Validator result");
429
430 if validation_result.exit_code != 0 {
431 let mut error_msg = format!(
432 "in '{}' (validator: {}):\n\nCode:\n{}\n",
433 chapter_name, block.validator_name, block.markers.visible_content
434 );
435 if !validation_result.stderr.is_empty() {
436 let _ = write!(
437 error_msg,
438 "\nValidator stderr:\n{}",
439 validation_result.stderr
440 );
441 }
442 if !validation_result.stdout.is_empty() {
443 let _ = write!(
444 error_msg,
445 "\nValidator stdout:\n{}",
446 validation_result.stdout
447 );
448 }
449 return Err(ValidatorError::ValidationFailed {
450 exit_code: validation_result.exit_code,
451 message: error_msg,
452 }
453 .into());
454 }
455
456 Ok(())
457 }
458
459 fn get_exec_command(validator_name: &str, config: &ValidatorConfig) -> String {
463 config
464 .exec_command
465 .clone()
466 .unwrap_or_else(|| match validator_name {
467 "sqlite" => DEFAULT_EXEC_SQLITE.to_owned(),
468 "osquery" => DEFAULT_EXEC_OSQUERY.to_owned(),
469 _ => DEFAULT_EXEC_FALLBACK.to_owned(),
470 })
471 }
472
473 async fn get_or_start_container<'a>(
475 &self,
476 validator_name: &str,
477 config: &Config,
478 book_root: &Path,
479 containers: &'a mut HashMap<String, ValidatorContainer>,
480 ) -> Result<&'a ValidatorContainer, Error> {
481 match containers.entry(validator_name.to_owned()) {
482 Entry::Occupied(entry) => Ok(entry.into_mut()),
483 Entry::Vacant(entry) => {
484 let validator_config = config.get_validator(validator_name).map_err(|e| {
486 Error::msg(format!("Unknown validator '{validator_name}': {e}"))
487 })?;
488
489 validator_config.validate(validator_name)?;
491
492 let mount = if let Some(ref fixtures_dir) = config.fixtures_dir {
494 let fixtures_path = if fixtures_dir.is_absolute() {
496 fixtures_dir.clone()
497 } else {
498 book_root.join(fixtures_dir)
499 };
500
501 if !fixtures_path.exists() {
503 return Err(Error::msg(format!(
504 "fixtures_dir '{}' does not exist",
505 fixtures_path.display()
506 )));
507 }
508 if !fixtures_path.is_dir() {
509 return Err(Error::msg(format!(
510 "fixtures_dir '{}' is not a directory",
511 fixtures_path.display()
512 )));
513 }
514
515 let fixtures_path = fixtures_path.canonicalize().map_err(|e| {
517 Error::msg(format!(
518 "fixtures_dir '{}' could not be canonicalized: {}",
519 fixtures_path.display(),
520 e
521 ))
522 })?;
523
524 Some((fixtures_path, "/fixtures"))
525 } else {
526 None
527 };
528
529 let container = ValidatorContainer::start_raw_with_mount(
531 &validator_config.container,
532 mount.as_ref().map(|(p, c)| (p.as_path(), *c)),
533 )
534 .await
535 .map_err(|e| {
536 Error::msg(format!(
537 "Failed to start container '{}': {}",
538 validator_config.container, e
539 ))
540 })?;
541
542 Ok(entry.insert(container))
543 }
544 }
545 }
546
547 fn find_validator_blocks(content: &str) -> Vec<ValidatorBlock> {
549 let mut blocks = Vec::new();
550 let parser = Parser::new(content);
551
552 let mut in_code_block = false;
553 let mut current_info = String::new();
554 let mut current_content = String::new();
555
556 for event in parser {
557 match event {
558 Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(info))) => {
559 in_code_block = true;
560 current_info = info.to_string();
561 current_content.clear();
562 }
563 Event::Text(text) if in_code_block => {
564 current_content.push_str(&text);
565 }
566 Event::End(TagEnd::CodeBlock) if in_code_block => {
567 in_code_block = false;
568
569 let (_language, validator, skip, hidden) = parse_info_string(¤t_info);
570
571 if let Some(validator_name) = validator {
573 if !validator_name.is_empty() {
575 let markers = extract_markers(¤t_content);
576 blocks.push(ValidatorBlock {
577 validator_name,
578 markers,
579 skip,
580 hidden,
581 });
582 }
583 }
584 }
585 _ => {}
586 }
587 }
588
589 blocks
590 }
591
592 fn strip_markers_from_chapter(content: &str) -> String {
599 use std::ops::Range;
600
601 enum Edit {
603 Replace {
605 range: Range<usize>,
606 content: String,
607 },
608 Delete { range: Range<usize> },
610 }
611
612 let mut edits: Vec<Edit> = Vec::new();
613 let parser = Parser::new(content).into_offset_iter();
614
615 let mut current_block_start: Option<usize> = None;
616 let mut current_hidden = false;
617 let mut current_has_validator = false;
618 let mut current_content_range: Option<Range<usize>> = None;
619
620 for (event, range) in parser {
621 match &event {
622 Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(info))) => {
623 let (_language, validator, _skip, hidden) = parse_info_string(info);
624 current_hidden = hidden;
625 current_has_validator = validator.is_some();
626 current_block_start = Some(range.start);
627 current_content_range = None;
628 }
629 Event::Text(_) if current_block_start.is_some() => {
630 current_content_range = Some(range);
632 }
633 Event::End(TagEnd::CodeBlock) if current_block_start.is_some() => {
634 let Some(block_start) = current_block_start.take() else {
635 unreachable!("current_block_start must be Some here")
636 };
637
638 if current_hidden {
639 let line_start = content[..block_start].rfind('\n').map_or(0, |i| i + 1);
642 let line_end = content[range.end..]
644 .find('\n')
645 .map_or(range.end, |i| range.end + i + 1);
646
647 edits.push(Edit::Delete {
648 range: line_start..line_end,
649 });
650 } else if current_has_validator {
651 if let Some(content_range) = current_content_range.take() {
653 let original_content = &content[content_range.clone()];
654 let stripped = strip_markers(original_content);
655 let trimmed = stripped.trim();
656 if trimmed != original_content.trim() {
657 edits.push(Edit::Replace {
659 range: content_range,
660 content: format!("{trimmed}\n"),
661 });
662 }
663 }
664 }
665
666 current_hidden = false;
667 current_has_validator = false;
668 }
669 _ => {}
670 }
671 }
672
673 edits.sort_by(|a, b| {
675 let a_start = match a {
676 Edit::Replace { range, .. } | Edit::Delete { range } => range.start,
677 };
678 let b_start = match b {
679 Edit::Replace { range, .. } | Edit::Delete { range } => range.start,
680 };
681 b_start.cmp(&a_start) });
683
684 let mut result = content.to_owned();
685 for edit in edits {
686 match edit {
687 Edit::Replace { range, content } => {
688 result.replace_range(range, &content);
689 }
690 Edit::Delete { range } => {
691 result.replace_range(range, "");
692 }
693 }
694 }
695
696 Self::normalize_blank_lines(&result)
698 }
699
700 fn normalize_blank_lines(content: &str) -> String {
702 let mut result = String::with_capacity(content.len());
703 let mut consecutive_newlines = 0;
704
705 for ch in content.chars() {
706 if ch == '\n' {
707 consecutive_newlines += 1;
708 if consecutive_newlines <= 2 {
709 result.push(ch);
710 }
711 } else {
712 consecutive_newlines = 0;
713 result.push(ch);
714 }
715 }
716
717 result.trim().to_owned()
718 }
719}
720
721struct ValidatorBlock {
723 validator_name: String,
725 markers: ExtractedMarkers,
727 skip: bool,
729 hidden: bool,
731}
732
733#[cfg(test)]
734#[allow(clippy::needless_raw_string_hashes)]
735mod tests {
736 use super::*;
737
738 #[test]
741 fn strip_markers_from_chapter_removes_hidden_block() {
742 let content = r#"Some text
743
744```sql validator=sqlite hidden
745SELECT 1;
746```
747
748More text"#;
749 let result = ValidatorPreprocessor::strip_markers_from_chapter(content);
750 assert!(!result.contains("SELECT 1"));
752 assert!(!result.contains("```sql"));
753 assert!(result.contains("Some text"));
754 assert!(result.contains("More text"));
755 }
756
757 #[test]
758 fn strip_markers_from_chapter_keeps_non_hidden_block() {
759 let content = r#"Some text
760
761```sql validator=sqlite
762SELECT 1;
763```
764
765More text"#;
766 let result = ValidatorPreprocessor::strip_markers_from_chapter(content);
767 assert!(result.contains("SELECT 1"));
769 assert!(result.contains("```sql"));
770 assert!(result.contains("Some text"));
771 assert!(result.contains("More text"));
772 }
773
774 #[test]
775 fn strip_markers_from_chapter_mixed_hidden_and_non_hidden() {
776 let content = r#"Start
777
778```sql validator=sqlite hidden
779HIDDEN QUERY;
780```
781
782Middle
783
784```sql validator=sqlite
785VISIBLE QUERY;
786```
787
788End"#;
789 let result = ValidatorPreprocessor::strip_markers_from_chapter(content);
790 assert!(!result.contains("HIDDEN QUERY"));
792 assert!(result.contains("VISIBLE QUERY"));
793 assert!(result.contains("Start"));
794 assert!(result.contains("Middle"));
795 assert!(result.contains("End"));
796 }
797
798 #[test]
799 fn strip_markers_from_chapter_adjacent_hidden_blocks() {
800 let content = r#"Start
801
802```sql validator=sqlite hidden
803HIDDEN 1;
804```
805
806```sql validator=sqlite hidden
807HIDDEN 2;
808```
809
810End"#;
811 let result = ValidatorPreprocessor::strip_markers_from_chapter(content);
812 assert!(!result.contains("HIDDEN 1"));
814 assert!(!result.contains("HIDDEN 2"));
815 assert!(result.contains("Start"));
816 assert!(result.contains("End"));
817 }
818
819 #[test]
820 fn strip_markers_from_chapter_hidden_block_at_start() {
821 let content = r#"```sql validator=sqlite hidden
822HIDDEN;
823```
824
825Visible content"#;
826 let result = ValidatorPreprocessor::strip_markers_from_chapter(content);
827 assert!(!result.contains("HIDDEN"));
829 assert!(result.contains("Visible content"));
830 assert!(!result.starts_with('\n'));
832 }
833
834 #[test]
835 fn strip_markers_from_chapter_hidden_block_at_end() {
836 let content = r#"Visible content
837
838```sql validator=sqlite hidden
839HIDDEN;
840```"#;
841 let result = ValidatorPreprocessor::strip_markers_from_chapter(content);
842 assert!(!result.contains("HIDDEN"));
844 assert!(result.contains("Visible content"));
845 assert!(!result.ends_with("\n\n"));
847 }
848
849 #[test]
850 fn strip_markers_from_chapter_only_hidden_block() {
851 let content = r#"```sql validator=sqlite hidden
852HIDDEN;
853```"#;
854 let result = ValidatorPreprocessor::strip_markers_from_chapter(content);
855 assert!(!result.contains("HIDDEN"));
857 assert!(result.is_empty() || result.trim().is_empty());
858 }
859
860 #[test]
861 fn strip_markers_from_chapter_hidden_with_markers() {
862 let content = r#"Text
863
864```sql validator=sqlite hidden
865<!--SETUP
866CREATE TABLE t;
867-->
868SELECT * FROM t;
869<!--ASSERT
870rows >= 1
871-->
872```
873
874More text"#;
875 let result = ValidatorPreprocessor::strip_markers_from_chapter(content);
876 assert!(!result.contains("SETUP"));
878 assert!(!result.contains("ASSERT"));
879 assert!(!result.contains("CREATE TABLE"));
880 assert!(!result.contains("SELECT"));
881 assert!(result.contains("Text"));
882 assert!(result.contains("More text"));
883 }
884
885 #[test]
890 fn strip_markers_preserves_lists() {
891 let content = r#"# Chapter
892
893Some text:
894
895- Item one
896- Item two
897- Item three
898
899### Next Section
900
901More text."#;
902 let result = ValidatorPreprocessor::strip_markers_from_chapter(content);
903 assert!(
905 result.contains("- Item one"),
906 "List items must be preserved"
907 );
908 assert!(
909 result.contains("- Item two"),
910 "List items must be preserved"
911 );
912 assert!(
913 result.contains("- Item three"),
914 "List items must be preserved"
915 );
916 assert!(
917 result.contains("### Next Section"),
918 "Headings must be preserved"
919 );
920 }
921
922 #[test]
923 fn strip_markers_preserves_lists_with_code_block() {
924 let content = r#"# Chapter
925
926Some text:
927
928- Item one
929- Item two
930- Item three
931
932```sql validator=sqlite
933<!--SETUP
934CREATE TABLE t;
935-->
936SELECT 1;
937```
938
939### Next Section
940
941More text."#;
942 let result = ValidatorPreprocessor::strip_markers_from_chapter(content);
943 assert!(
945 result.contains("- Item one"),
946 "List items must be preserved"
947 );
948 assert!(
949 result.contains("- Item two"),
950 "List items must be preserved"
951 );
952 assert!(
953 result.contains("- Item three"),
954 "List items must be preserved"
955 );
956 assert!(result.contains("SELECT 1"), "Code block content preserved");
958 assert!(!result.contains("SETUP"), "Markers stripped");
959 assert!(!result.contains("CREATE TABLE"), "Setup content stripped");
960 assert!(
962 result.contains("### Next Section"),
963 "Headings must be preserved"
964 );
965 }
966
967 #[test]
968 fn strip_markers_preserves_numbered_lists() {
969 let content = r#"Steps:
970
9711. First step
9722. Second step
9733. Third step
974
975Done."#;
976 let result = ValidatorPreprocessor::strip_markers_from_chapter(content);
977 assert!(
978 result.contains("1. First step"),
979 "Numbered lists must be preserved"
980 );
981 assert!(
982 result.contains("2. Second step"),
983 "Numbered lists must be preserved"
984 );
985 assert!(
986 result.contains("3. Third step"),
987 "Numbered lists must be preserved"
988 );
989 }
990
991 #[test]
992 fn strip_markers_preserves_blockquotes() {
993 let content = r#"Quote:
994
995> This is a blockquote
996> with multiple lines
997
998End."#;
999 let result = ValidatorPreprocessor::strip_markers_from_chapter(content);
1000 assert!(
1001 result.contains("> This is a blockquote"),
1002 "Blockquotes must be preserved"
1003 );
1004 }
1005
1006 #[test]
1007 fn strip_markers_preserves_links() {
1008 let content = r#"See [the documentation](https://example.com) for details.
1009
1010And [another link](https://other.com)."#;
1011 let result = ValidatorPreprocessor::strip_markers_from_chapter(content);
1012 assert!(
1013 result.contains("[the documentation](https://example.com)"),
1014 "Links must be preserved"
1015 );
1016 assert!(
1017 result.contains("[another link](https://other.com)"),
1018 "Links must be preserved"
1019 );
1020 }
1021
1022 #[test]
1023 fn strip_markers_preserves_inline_code() {
1024 let content = r#"Use the `SELECT` statement to query data.
1025
1026Also `INSERT` works."#;
1027 let result = ValidatorPreprocessor::strip_markers_from_chapter(content);
1028 assert!(result.contains("`SELECT`"), "Inline code must be preserved");
1029 assert!(result.contains("`INSERT`"), "Inline code must be preserved");
1030 }
1031
1032 #[test]
1033 fn strip_markers_preserves_emphasis() {
1034 let content = r#"This is *italic* and **bold** text.
1035
1036Also _underscores_ and __double__."#;
1037 let result = ValidatorPreprocessor::strip_markers_from_chapter(content);
1038 assert!(result.contains("*italic*"), "Italic must be preserved");
1039 assert!(result.contains("**bold**"), "Bold must be preserved");
1040 }
1041
1042 #[test]
1043 fn strip_markers_preserves_tables() {
1044 let content = r#"| Column A | Column B |
1045|----------|----------|
1046| Value 1 | Value 2 |
1047| Value 3 | Value 4 |"#;
1048 let result = ValidatorPreprocessor::strip_markers_from_chapter(content);
1049 assert!(
1050 result.contains("| Column A | Column B |"),
1051 "Tables must be preserved"
1052 );
1053 assert!(
1054 result.contains("| Value 1 | Value 2 |"),
1055 "Table rows must be preserved"
1056 );
1057 }
1058
1059 #[test]
1060 fn strip_markers_preserves_code_blocks_without_validator() {
1061 let content = r#"Regular code:
1062
1063```python
1064def hello():
1065 print("world")
1066```
1067
1068End."#;
1069 let result = ValidatorPreprocessor::strip_markers_from_chapter(content);
1070 assert!(result.contains("```python"), "Code fence must be preserved");
1071 assert!(
1072 result.contains("def hello():"),
1073 "Code content must be preserved"
1074 );
1075 assert!(
1076 result.contains("print(\"world\")"),
1077 "Code content must be preserved"
1078 );
1079 }
1080
1081 #[test]
1082 fn strip_markers_complex_document() {
1083 let content = r#"# Getting Started
1085
1086Welcome to the guide. Here's what you'll learn:
1087
1088- How to query data
1089- How to filter results
1090- How to join tables
1091
1092## Basic Queries
1093
1094First, let's set up our database:
1095
1096```sql validator=sqlite hidden
1097<!--SETUP
1098CREATE TABLE users (id INTEGER, name TEXT);
1099INSERT INTO users VALUES (1, 'Alice'), (2, 'Bob');
1100-->
1101SELECT 'setup complete';
1102```
1103
1104Now run a simple query:
1105
1106```sql validator=sqlite
1107SELECT * FROM users;
1108<!--ASSERT
1109rows >= 1
1110-->
1111```
1112
1113> **Note**: The query above returns all users.
1114
1115See [SQL documentation](https://sqlite.org) for more.
1116
1117### Summary
1118
11191. We created a table
11202. We queried the data
11213. We verified the results
1122
1123Done!"#;
1124 let result = ValidatorPreprocessor::strip_markers_from_chapter(content);
1125
1126 assert!(
1128 result.contains("- How to query data"),
1129 "Bullet lists preserved"
1130 );
1131 assert!(
1132 result.contains("1. We created a table"),
1133 "Numbered lists preserved"
1134 );
1135
1136 assert!(
1138 !result.contains("CREATE TABLE users"),
1139 "Hidden block content removed"
1140 );
1141 assert!(
1142 !result.contains("INSERT INTO users"),
1143 "Hidden block content removed"
1144 );
1145
1146 assert!(
1148 result.contains("SELECT * FROM users"),
1149 "Visible query preserved"
1150 );
1151 assert!(!result.contains("ASSERT"), "Markers stripped");
1152
1153 assert!(result.contains("> **Note**"), "Blockquote preserved");
1155
1156 assert!(
1158 result.contains("[SQL documentation](https://sqlite.org)"),
1159 "Link preserved"
1160 );
1161
1162 assert!(result.contains("## Basic Queries"), "H2 preserved");
1164 assert!(result.contains("### Summary"), "H3 preserved");
1165 }
1166
1167 #[test]
1168 fn strip_markers_preserves_headings_with_links() {
1169 let content = r#"# Introduction
1171
1172Some intro text.
1173
1174### [Configuration Guide](https://example.com/config)
1175
1176This section explains configuration.
1177
1178### [API Reference](https://example.com/api)
1179
1180API docs here.
1181
1182```sql validator=sqlite
1183SELECT 1;
1184```
1185
1186### [Advanced Topics](https://example.com/advanced)
1187
1188More content."#;
1189 let result = ValidatorPreprocessor::strip_markers_from_chapter(content);
1190
1191 assert!(
1193 result.contains("### [Configuration Guide](https://example.com/config)"),
1194 "Heading with link must be preserved"
1195 );
1196 assert!(
1197 result.contains("### [API Reference](https://example.com/api)"),
1198 "Heading with link must be preserved"
1199 );
1200 assert!(
1201 result.contains("### [Advanced Topics](https://example.com/advanced)"),
1202 "Heading with link must be preserved"
1203 );
1204 assert!(result.contains("SELECT 1"), "Code block content preserved");
1206 }
1207
1208 #[test]
1209 fn strip_markers_preserves_paths_with_wildcards() {
1210 let content = r#"# File Patterns
1212
1213Match all files in a directory:
1214
1215- `/etc/osquery/*`
1216- `/var/log/*.log`
1217- `C:\Users\*\AppData`
1218
1219You can also use `/some/path/**/*.json` for recursive matching.
1220
1221```sql validator=sqlite
1222SELECT 1;
1223```
1224
1225The path `/tmp/*` is commonly used."#;
1226 let result = ValidatorPreprocessor::strip_markers_from_chapter(content);
1227
1228 assert!(
1230 result.contains("/etc/osquery/*"),
1231 "Path with wildcard must be preserved"
1232 );
1233 assert!(
1234 result.contains("/var/log/*.log"),
1235 "Path with wildcard must be preserved"
1236 );
1237 assert!(
1238 result.contains(r"C:\Users\*\AppData"),
1239 "Windows path with wildcard must be preserved"
1240 );
1241 assert!(
1242 result.contains("/some/path/**/*.json"),
1243 "Recursive glob must be preserved"
1244 );
1245 assert!(
1246 result.contains("/tmp/*"),
1247 "Inline path with wildcard must be preserved"
1248 );
1249 }
1250
1251 #[test]
1252 fn strip_markers_preserves_inline_code_with_special_chars() {
1253 let content = r#"# Code Examples
1255
1256Use `SELECT * FROM users` to get all users.
1257
1258The command `rm -rf /tmp/*` removes temp files.
1259
1260Run `echo $HOME` to print home directory.
1261
1262Use `git log --oneline | head -10` for recent commits.
1263
1264The regex `\d+\.\d+` matches decimals.
1265
1266```sql validator=sqlite
1267SELECT 1;
1268```
1269
1270Also try `jq '.[] | .name'` for JSON parsing."#;
1271 let result = ValidatorPreprocessor::strip_markers_from_chapter(content);
1272
1273 assert!(
1275 result.contains("`SELECT * FROM users`"),
1276 "Inline code with * must be preserved"
1277 );
1278 assert!(
1279 result.contains("`rm -rf /tmp/*`"),
1280 "Inline code with path must be preserved"
1281 );
1282 assert!(
1283 result.contains("`echo $HOME`"),
1284 "Inline code with $ must be preserved"
1285 );
1286 assert!(
1287 result.contains("`git log --oneline | head -10`"),
1288 "Inline code with pipe must be preserved"
1289 );
1290 assert!(
1291 result.contains(r"`\d+\.\d+`"),
1292 "Inline code with backslashes must be preserved"
1293 );
1294 assert!(
1295 result.contains("`jq '.[] | .name'`"),
1296 "Inline code with quotes must be preserved"
1297 );
1298 }
1299
1300 #[test]
1301 fn strip_markers_preserves_asterisks_in_text() {
1302 let content = r#"# Wildcards
1304
1305The pattern `*` matches everything.
1306
1307File paths like /etc/* are common.
1308
1309Use * for wildcards and ** for recursive.
1310
1311Math: 5 * 3 = 15
1312
1313```sql validator=sqlite
1314SELECT 1;
1315```
1316
1317Done."#;
1318 let result = ValidatorPreprocessor::strip_markers_from_chapter(content);
1319
1320 assert!(
1322 result.contains("The pattern `*` matches everything"),
1323 "Backtick asterisk preserved"
1324 );
1325 assert!(result.contains("/etc/*"), "Path asterisk preserved");
1326 assert!(result.contains("5 * 3 = 15"), "Math asterisk preserved");
1327 }
1328
1329 #[test]
1330 fn strip_markers_preserves_complex_inline_formatting() {
1331 let content = r#"# Formatting Test
1333
1334This has **bold** and *italic* text.
1335
1336This has `code with **asterisks**` inside.
1337
1338This has [link with `code`](https://example.com).
1339
1340This has **bold with `code` inside**.
1341
1342```sql validator=sqlite
1343SELECT 1;
1344```
1345
1346End."#;
1347 let result = ValidatorPreprocessor::strip_markers_from_chapter(content);
1348
1349 assert!(result.contains("**bold**"), "Bold preserved");
1350 assert!(result.contains("*italic*"), "Italic preserved");
1351 assert!(
1352 result.contains("`code with **asterisks**`"),
1353 "Code with asterisks preserved"
1354 );
1355 assert!(
1356 result.contains("[link with `code`](https://example.com)"),
1357 "Link with code preserved"
1358 );
1359 assert!(
1360 result.contains("**bold with `code` inside**"),
1361 "Bold with code preserved"
1362 );
1363 }
1364}