rumdl_lib/
filtered_lines.rs

1//! Filtered line iteration for markdown linting
2//!
3//! This module provides a zero-cost abstraction for iterating over markdown lines
4//! while automatically filtering out non-content regions like front matter, code blocks,
5//! and HTML blocks. This ensures rules only process actual markdown content.
6//!
7//! # Architecture
8//!
9//! The filtered iterator approach centralizes the logic of what content should be
10//! processed by rules, eliminating error-prone manual checks in each rule implementation.
11//!
12//! # Examples
13//!
14//! ```rust
15//! use rumdl_lib::lint_context::LintContext;
16//! use rumdl_lib::filtered_lines::FilteredLinesExt;
17//!
18//! let content = "---\nurl: http://example.com\n---\n\n# Title\n\nContent";
19//! let ctx = LintContext::new(content, rumdl_lib::config::MarkdownFlavor::Standard, None);
20//!
21//! // Simple: get all content lines (skips front matter by default)
22//! for line in ctx.content_lines() {
23//!     println!("Line {}: {}", line.line_num, line.content);
24//! }
25//!
26//! // Advanced: custom filter configuration
27//! for line in ctx.filtered_lines()
28//!     .skip_code_blocks()
29//!     .skip_front_matter()
30//!     .skip_html_blocks() {
31//!     println!("Line {}: {}", line.line_num, line.content);
32//! }
33//! ```
34
35use crate::lint_context::{LineInfo, LintContext};
36
37/// A single line from a filtered iteration, with guaranteed 1-indexed line numbers
38#[derive(Debug, Clone)]
39pub struct FilteredLine<'a> {
40    /// The 1-indexed line number in the original document
41    pub line_num: usize,
42    /// Reference to the line's metadata
43    pub line_info: &'a LineInfo,
44    /// The actual line content
45    pub content: &'a str,
46}
47
48/// Configuration for filtering lines during iteration
49///
50/// Use the builder pattern to configure which types of content should be skipped:
51///
52/// ```rust
53/// use rumdl_lib::filtered_lines::LineFilterConfig;
54///
55/// let config = LineFilterConfig::new()
56///     .skip_front_matter()
57///     .skip_code_blocks()
58///     .skip_html_blocks()
59///     .skip_html_comments()
60///     .skip_mkdocstrings()
61///     .skip_esm_blocks()
62///     .skip_quarto_divs();
63/// ```
64#[derive(Debug, Clone, Default)]
65pub struct LineFilterConfig {
66    /// Skip lines inside front matter (YAML/TOML/JSON metadata)
67    pub skip_front_matter: bool,
68    /// Skip lines inside fenced code blocks
69    pub skip_code_blocks: bool,
70    /// Skip lines inside HTML blocks
71    pub skip_html_blocks: bool,
72    /// Skip lines inside HTML comments
73    pub skip_html_comments: bool,
74    /// Skip lines inside mkdocstrings blocks
75    pub skip_mkdocstrings: bool,
76    /// Skip lines inside ESM (ECMAScript Module) blocks
77    pub skip_esm_blocks: bool,
78    /// Skip lines inside math blocks ($$ ... $$)
79    pub skip_math_blocks: bool,
80    /// Skip lines inside Quarto div blocks (::: ... :::)
81    pub skip_quarto_divs: bool,
82    /// Skip lines containing or inside JSX expressions (MDX: {expression})
83    pub skip_jsx_expressions: bool,
84    /// Skip lines inside MDX comments ({/* ... */})
85    pub skip_mdx_comments: bool,
86    /// Skip lines inside MkDocs admonitions (!!! or ???)
87    pub skip_admonitions: bool,
88    /// Skip lines inside MkDocs content tabs (=== "Tab")
89    pub skip_content_tabs: bool,
90    /// Skip lines inside definition lists (:  definition)
91    pub skip_definition_lists: bool,
92}
93
94impl LineFilterConfig {
95    /// Create a new filter configuration with all filters disabled
96    #[must_use]
97    pub fn new() -> Self {
98        Self::default()
99    }
100
101    /// Skip lines that are part of front matter (YAML/TOML/JSON)
102    ///
103    /// Front matter is metadata at the start of a markdown file and should
104    /// not be processed by markdown linting rules.
105    #[must_use]
106    pub fn skip_front_matter(mut self) -> Self {
107        self.skip_front_matter = true;
108        self
109    }
110
111    /// Skip lines inside fenced code blocks
112    ///
113    /// Code blocks contain source code, not markdown, and most rules should
114    /// not process them.
115    #[must_use]
116    pub fn skip_code_blocks(mut self) -> Self {
117        self.skip_code_blocks = true;
118        self
119    }
120
121    /// Skip lines inside HTML blocks
122    ///
123    /// HTML blocks contain raw HTML and most markdown rules should not
124    /// process them.
125    #[must_use]
126    pub fn skip_html_blocks(mut self) -> Self {
127        self.skip_html_blocks = true;
128        self
129    }
130
131    /// Skip lines inside HTML comments
132    ///
133    /// HTML comments (<!-- ... -->) are metadata and should not be processed
134    /// by most markdown linting rules.
135    #[must_use]
136    pub fn skip_html_comments(mut self) -> Self {
137        self.skip_html_comments = true;
138        self
139    }
140
141    /// Skip lines inside mkdocstrings blocks
142    ///
143    /// Mkdocstrings blocks contain auto-generated documentation and most
144    /// markdown rules should not process them.
145    #[must_use]
146    pub fn skip_mkdocstrings(mut self) -> Self {
147        self.skip_mkdocstrings = true;
148        self
149    }
150
151    /// Skip lines inside ESM (ECMAScript Module) blocks
152    ///
153    /// ESM blocks contain JavaScript/TypeScript module code and most
154    /// markdown rules should not process them.
155    #[must_use]
156    pub fn skip_esm_blocks(mut self) -> Self {
157        self.skip_esm_blocks = true;
158        self
159    }
160
161    /// Skip lines inside math blocks ($$ ... $$)
162    ///
163    /// Math blocks contain LaTeX/mathematical notation and markdown rules
164    /// should not process them as regular markdown content.
165    #[must_use]
166    pub fn skip_math_blocks(mut self) -> Self {
167        self.skip_math_blocks = true;
168        self
169    }
170
171    /// Skip lines inside Quarto div blocks (::: ... :::)
172    ///
173    /// Quarto divs are fenced containers for callouts, panels, and other
174    /// structured content. Rules may need to skip them for accurate processing.
175    #[must_use]
176    pub fn skip_quarto_divs(mut self) -> Self {
177        self.skip_quarto_divs = true;
178        self
179    }
180
181    /// Skip lines containing or inside JSX expressions (MDX: {expression})
182    ///
183    /// JSX expressions contain JavaScript code and most markdown rules
184    /// should not process them as regular markdown content.
185    #[must_use]
186    pub fn skip_jsx_expressions(mut self) -> Self {
187        self.skip_jsx_expressions = true;
188        self
189    }
190
191    /// Skip lines inside MDX comments ({/* ... */})
192    ///
193    /// MDX comments are metadata and should not be processed by most
194    /// markdown linting rules.
195    #[must_use]
196    pub fn skip_mdx_comments(mut self) -> Self {
197        self.skip_mdx_comments = true;
198        self
199    }
200
201    /// Skip lines inside MkDocs admonitions (!!! or ???)
202    ///
203    /// Admonitions are callout blocks and may have special formatting
204    /// that rules should not process as regular content.
205    #[must_use]
206    pub fn skip_admonitions(mut self) -> Self {
207        self.skip_admonitions = true;
208        self
209    }
210
211    /// Skip lines inside MkDocs content tabs (=== "Tab")
212    ///
213    /// Content tabs contain tabbed content that may need special handling.
214    #[must_use]
215    pub fn skip_content_tabs(mut self) -> Self {
216        self.skip_content_tabs = true;
217        self
218    }
219
220    /// Skip lines inside definition lists (:  definition)
221    ///
222    /// Definition lists have special formatting that rules should
223    /// not process as regular content.
224    #[must_use]
225    pub fn skip_definition_lists(mut self) -> Self {
226        self.skip_definition_lists = true;
227        self
228    }
229
230    /// Check if a line should be filtered out based on this configuration
231    fn should_filter(&self, line_info: &LineInfo) -> bool {
232        (self.skip_front_matter && line_info.in_front_matter)
233            || (self.skip_code_blocks && line_info.in_code_block)
234            || (self.skip_html_blocks && line_info.in_html_block)
235            || (self.skip_html_comments && line_info.in_html_comment)
236            || (self.skip_mkdocstrings && line_info.in_mkdocstrings)
237            || (self.skip_esm_blocks && line_info.in_esm_block)
238            || (self.skip_math_blocks && line_info.in_math_block)
239            || (self.skip_quarto_divs && line_info.in_quarto_div)
240            || (self.skip_jsx_expressions && line_info.in_jsx_expression)
241            || (self.skip_mdx_comments && line_info.in_mdx_comment)
242            || (self.skip_admonitions && line_info.in_admonition)
243            || (self.skip_content_tabs && line_info.in_content_tab)
244            || (self.skip_definition_lists && line_info.in_definition_list)
245    }
246}
247
248/// Iterator that yields filtered lines based on configuration
249pub struct FilteredLinesIter<'a> {
250    ctx: &'a LintContext<'a>,
251    config: LineFilterConfig,
252    current_index: usize,
253    content_lines: Vec<&'a str>,
254}
255
256impl<'a> FilteredLinesIter<'a> {
257    /// Create a new filtered lines iterator
258    fn new(ctx: &'a LintContext<'a>, config: LineFilterConfig) -> Self {
259        Self {
260            ctx,
261            config,
262            current_index: 0,
263            content_lines: ctx.content.lines().collect(),
264        }
265    }
266}
267
268impl<'a> Iterator for FilteredLinesIter<'a> {
269    type Item = FilteredLine<'a>;
270
271    fn next(&mut self) -> Option<Self::Item> {
272        let lines = &self.ctx.lines;
273
274        while self.current_index < lines.len() {
275            let idx = self.current_index;
276            self.current_index += 1;
277
278            // Check if this line should be filtered
279            if self.config.should_filter(&lines[idx]) {
280                continue;
281            }
282
283            // Get the actual line content from the document
284            let line_content = self.content_lines.get(idx).copied().unwrap_or("");
285
286            // Return the filtered line with 1-indexed line number
287            return Some(FilteredLine {
288                line_num: idx + 1, // Convert 0-indexed to 1-indexed
289                line_info: &lines[idx],
290                content: line_content,
291            });
292        }
293
294        None
295    }
296}
297
298/// Extension trait that adds filtered iteration methods to `LintContext`
299///
300/// This trait provides convenient methods for iterating over lines while
301/// automatically filtering out non-content regions.
302pub trait FilteredLinesExt {
303    /// Start building a filtered lines iterator
304    ///
305    /// Returns a `LineFilterConfig` builder that can be used to configure
306    /// which types of content should be filtered out.
307    ///
308    /// # Examples
309    ///
310    /// ```rust
311    /// use rumdl_lib::lint_context::LintContext;
312    /// use rumdl_lib::filtered_lines::FilteredLinesExt;
313    ///
314    /// let content = "# Title\n\n```rust\ncode\n```\n\nContent";
315    /// let ctx = LintContext::new(content, rumdl_lib::config::MarkdownFlavor::Standard, None);
316    ///
317    /// for line in ctx.filtered_lines().skip_code_blocks() {
318    ///     println!("Line {}: {}", line.line_num, line.content);
319    /// }
320    /// ```
321    fn filtered_lines(&self) -> FilteredLinesBuilder<'_>;
322
323    /// Get an iterator over content lines only
324    ///
325    /// This is a convenience method that returns an iterator with front matter
326    /// filtered out by default. This is the most common use case for rules that
327    /// should only process markdown content.
328    ///
329    /// Equivalent to: `ctx.filtered_lines().skip_front_matter()`
330    ///
331    /// # Examples
332    ///
333    /// ```rust
334    /// use rumdl_lib::lint_context::LintContext;
335    /// use rumdl_lib::filtered_lines::FilteredLinesExt;
336    ///
337    /// let content = "---\ntitle: Test\n---\n\n# Content";
338    /// let ctx = LintContext::new(content, rumdl_lib::config::MarkdownFlavor::Standard, None);
339    ///
340    /// for line in ctx.content_lines() {
341    ///     // Front matter is automatically skipped
342    ///     println!("Line {}: {}", line.line_num, line.content);
343    /// }
344    /// ```
345    fn content_lines(&self) -> FilteredLinesIter<'_>;
346}
347
348/// Builder type that allows chaining filter configuration and converting to an iterator
349pub struct FilteredLinesBuilder<'a> {
350    ctx: &'a LintContext<'a>,
351    config: LineFilterConfig,
352}
353
354impl<'a> FilteredLinesBuilder<'a> {
355    fn new(ctx: &'a LintContext<'a>) -> Self {
356        Self {
357            ctx,
358            config: LineFilterConfig::new(),
359        }
360    }
361
362    /// Skip lines that are part of front matter (YAML/TOML/JSON)
363    #[must_use]
364    pub fn skip_front_matter(mut self) -> Self {
365        self.config = self.config.skip_front_matter();
366        self
367    }
368
369    /// Skip lines inside fenced code blocks
370    #[must_use]
371    pub fn skip_code_blocks(mut self) -> Self {
372        self.config = self.config.skip_code_blocks();
373        self
374    }
375
376    /// Skip lines inside HTML blocks
377    #[must_use]
378    pub fn skip_html_blocks(mut self) -> Self {
379        self.config = self.config.skip_html_blocks();
380        self
381    }
382
383    /// Skip lines inside HTML comments
384    #[must_use]
385    pub fn skip_html_comments(mut self) -> Self {
386        self.config = self.config.skip_html_comments();
387        self
388    }
389
390    /// Skip lines inside mkdocstrings blocks
391    #[must_use]
392    pub fn skip_mkdocstrings(mut self) -> Self {
393        self.config = self.config.skip_mkdocstrings();
394        self
395    }
396
397    /// Skip lines inside ESM (ECMAScript Module) blocks
398    #[must_use]
399    pub fn skip_esm_blocks(mut self) -> Self {
400        self.config = self.config.skip_esm_blocks();
401        self
402    }
403
404    /// Skip lines inside math blocks ($$ ... $$)
405    #[must_use]
406    pub fn skip_math_blocks(mut self) -> Self {
407        self.config = self.config.skip_math_blocks();
408        self
409    }
410
411    /// Skip lines inside Quarto div blocks (::: ... :::)
412    #[must_use]
413    pub fn skip_quarto_divs(mut self) -> Self {
414        self.config = self.config.skip_quarto_divs();
415        self
416    }
417
418    /// Skip lines containing or inside JSX expressions (MDX: {expression})
419    #[must_use]
420    pub fn skip_jsx_expressions(mut self) -> Self {
421        self.config = self.config.skip_jsx_expressions();
422        self
423    }
424
425    /// Skip lines inside MDX comments ({/* ... */})
426    #[must_use]
427    pub fn skip_mdx_comments(mut self) -> Self {
428        self.config = self.config.skip_mdx_comments();
429        self
430    }
431
432    /// Skip lines inside MkDocs admonitions (!!! or ???)
433    #[must_use]
434    pub fn skip_admonitions(mut self) -> Self {
435        self.config = self.config.skip_admonitions();
436        self
437    }
438
439    /// Skip lines inside MkDocs content tabs (=== "Tab")
440    #[must_use]
441    pub fn skip_content_tabs(mut self) -> Self {
442        self.config = self.config.skip_content_tabs();
443        self
444    }
445
446    /// Skip lines inside definition lists (:  definition)
447    #[must_use]
448    pub fn skip_definition_lists(mut self) -> Self {
449        self.config = self.config.skip_definition_lists();
450        self
451    }
452}
453
454impl<'a> IntoIterator for FilteredLinesBuilder<'a> {
455    type Item = FilteredLine<'a>;
456    type IntoIter = FilteredLinesIter<'a>;
457
458    fn into_iter(self) -> Self::IntoIter {
459        FilteredLinesIter::new(self.ctx, self.config)
460    }
461}
462
463impl<'a> FilteredLinesExt for LintContext<'a> {
464    fn filtered_lines(&self) -> FilteredLinesBuilder<'_> {
465        FilteredLinesBuilder::new(self)
466    }
467
468    fn content_lines(&self) -> FilteredLinesIter<'_> {
469        FilteredLinesIter::new(self, LineFilterConfig::new().skip_front_matter())
470    }
471}
472
473#[cfg(test)]
474mod tests {
475    use super::*;
476    use crate::config::MarkdownFlavor;
477
478    #[test]
479    fn test_filtered_line_structure() {
480        let content = "# Title\n\nContent";
481        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
482
483        let line = ctx.content_lines().next().unwrap();
484        assert_eq!(line.line_num, 1);
485        assert_eq!(line.content, "# Title");
486        assert!(!line.line_info.in_front_matter);
487    }
488
489    #[test]
490    fn test_skip_front_matter_yaml() {
491        let content = "---\ntitle: Test\nurl: http://example.com\n---\n\n# Content\n\nMore content";
492        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
493
494        let lines: Vec<_> = ctx.content_lines().collect();
495        // After front matter (lines 1-4), we have: empty line, "# Content", empty line, "More content"
496        assert_eq!(lines.len(), 4);
497        assert_eq!(lines[0].line_num, 5); // First line after front matter
498        assert_eq!(lines[0].content, "");
499        assert_eq!(lines[1].line_num, 6);
500        assert_eq!(lines[1].content, "# Content");
501        assert_eq!(lines[2].line_num, 7);
502        assert_eq!(lines[2].content, "");
503        assert_eq!(lines[3].line_num, 8);
504        assert_eq!(lines[3].content, "More content");
505    }
506
507    #[test]
508    fn test_skip_front_matter_toml() {
509        let content = "+++\ntitle = \"Test\"\nurl = \"http://example.com\"\n+++\n\n# Content";
510        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
511
512        let lines: Vec<_> = ctx.content_lines().collect();
513        assert_eq!(lines.len(), 2); // Empty line + "# Content"
514        assert_eq!(lines[0].line_num, 5);
515        assert_eq!(lines[1].line_num, 6);
516        assert_eq!(lines[1].content, "# Content");
517    }
518
519    #[test]
520    fn test_skip_front_matter_json() {
521        let content = "{\n\"title\": \"Test\",\n\"url\": \"http://example.com\"\n}\n\n# Content";
522        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
523
524        let lines: Vec<_> = ctx.content_lines().collect();
525        assert_eq!(lines.len(), 2); // Empty line + "# Content"
526        assert_eq!(lines[0].line_num, 5);
527        assert_eq!(lines[1].line_num, 6);
528        assert_eq!(lines[1].content, "# Content");
529    }
530
531    #[test]
532    fn test_skip_code_blocks() {
533        let content = "# Title\n\n```rust\nlet x = 1;\nlet y = 2;\n```\n\nContent";
534        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
535
536        let lines: Vec<_> = ctx.filtered_lines().skip_code_blocks().into_iter().collect();
537
538        // Should have: "# Title", empty line, "```rust" fence, "```" fence, empty line, "Content"
539        // Wait, actually code blocks include the fences. Let me check the line_info
540        // Looking at the implementation, in_code_block is true for lines INSIDE code blocks
541        // The fences themselves are not marked as in_code_block
542        assert!(lines.iter().any(|l| l.content == "# Title"));
543        assert!(lines.iter().any(|l| l.content == "Content"));
544        // The actual code lines should be filtered out
545        assert!(!lines.iter().any(|l| l.content == "let x = 1;"));
546        assert!(!lines.iter().any(|l| l.content == "let y = 2;"));
547    }
548
549    #[test]
550    fn test_no_filters() {
551        let content = "---\ntitle: Test\n---\n\n# Content";
552        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
553
554        // With no filters, all lines should be included
555        let lines: Vec<_> = ctx.filtered_lines().into_iter().collect();
556        assert_eq!(lines.len(), ctx.lines.len());
557    }
558
559    #[test]
560    fn test_multiple_filters() {
561        let content = "---\ntitle: Test\n---\n\n# Title\n\n```rust\ncode\n```\n\nContent";
562        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
563
564        let lines: Vec<_> = ctx
565            .filtered_lines()
566            .skip_front_matter()
567            .skip_code_blocks()
568            .into_iter()
569            .collect();
570
571        // Should skip front matter (lines 1-3) and code block content (line 8)
572        assert!(lines.iter().any(|l| l.content == "# Title"));
573        assert!(lines.iter().any(|l| l.content == "Content"));
574        assert!(!lines.iter().any(|l| l.content == "title: Test"));
575        assert!(!lines.iter().any(|l| l.content == "code"));
576    }
577
578    #[test]
579    fn test_line_numbering_is_1_indexed() {
580        let content = "First\nSecond\nThird";
581        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
582
583        let lines: Vec<_> = ctx.content_lines().collect();
584        assert_eq!(lines[0].line_num, 1);
585        assert_eq!(lines[0].content, "First");
586        assert_eq!(lines[1].line_num, 2);
587        assert_eq!(lines[1].content, "Second");
588        assert_eq!(lines[2].line_num, 3);
589        assert_eq!(lines[2].content, "Third");
590    }
591
592    #[test]
593    fn test_content_lines_convenience_method() {
594        let content = "---\nfoo: bar\n---\n\nContent";
595        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
596
597        // content_lines() should automatically skip front matter
598        let lines: Vec<_> = ctx.content_lines().collect();
599        assert!(!lines.iter().any(|l| l.content.contains("foo")));
600        assert!(lines.iter().any(|l| l.content == "Content"));
601    }
602
603    #[test]
604    fn test_empty_document() {
605        let content = "";
606        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
607
608        let lines: Vec<_> = ctx.content_lines().collect();
609        assert_eq!(lines.len(), 0);
610    }
611
612    #[test]
613    fn test_only_front_matter() {
614        let content = "---\ntitle: Test\n---";
615        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
616
617        let lines: Vec<_> = ctx.content_lines().collect();
618        assert_eq!(
619            lines.len(),
620            0,
621            "Document with only front matter should have no content lines"
622        );
623    }
624
625    #[test]
626    fn test_builder_pattern_ergonomics() {
627        let content = "# Title\n\n```\ncode\n```\n\nContent";
628        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
629
630        // Test that builder pattern works smoothly
631        let _lines: Vec<_> = ctx
632            .filtered_lines()
633            .skip_front_matter()
634            .skip_code_blocks()
635            .skip_html_blocks()
636            .into_iter()
637            .collect();
638
639        // If this compiles and runs, the builder pattern is working
640    }
641
642    #[test]
643    fn test_filtered_line_access_to_line_info() {
644        let content = "# Title\n\nContent";
645        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
646
647        for line in ctx.content_lines() {
648            // Should be able to access line_info fields
649            assert!(!line.line_info.in_front_matter);
650            assert!(!line.line_info.in_code_block);
651        }
652    }
653
654    #[test]
655    fn test_skip_mkdocstrings() {
656        let content = r#"# API Documentation
657
658::: mymodule.MyClass
659    options:
660      show_root_heading: true
661      show_source: false
662
663Some regular content here.
664
665::: mymodule.function
666    options:
667      show_signature: true
668
669More content."#;
670        let ctx = LintContext::new(content, MarkdownFlavor::MkDocs, None);
671        let lines: Vec<_> = ctx.filtered_lines().skip_mkdocstrings().into_iter().collect();
672
673        // Verify lines OUTSIDE mkdocstrings blocks are INCLUDED
674        assert!(
675            lines.iter().any(|l| l.content.contains("# API Documentation")),
676            "Should include lines outside mkdocstrings blocks"
677        );
678        assert!(
679            lines.iter().any(|l| l.content.contains("Some regular content")),
680            "Should include content between mkdocstrings blocks"
681        );
682        assert!(
683            lines.iter().any(|l| l.content.contains("More content")),
684            "Should include content after mkdocstrings blocks"
685        );
686
687        // Verify lines INSIDE mkdocstrings blocks are EXCLUDED
688        assert!(
689            !lines.iter().any(|l| l.content.contains("::: mymodule")),
690            "Should exclude mkdocstrings marker lines"
691        );
692        assert!(
693            !lines.iter().any(|l| l.content.contains("show_root_heading")),
694            "Should exclude mkdocstrings option lines"
695        );
696        assert!(
697            !lines.iter().any(|l| l.content.contains("show_signature")),
698            "Should exclude all mkdocstrings option lines"
699        );
700
701        // Verify line numbers are preserved (1-indexed)
702        assert_eq!(lines[0].line_num, 1, "First line should be line 1");
703    }
704
705    #[test]
706    fn test_skip_esm_blocks() {
707        // MDX 2.0+ allows ESM imports/exports anywhere in the document
708        let content = r#"import {Chart} from './components.js'
709import {Table} from './table.js'
710export const year = 2023
711
712# Last year's snowfall
713
714Content about snowfall data.
715
716import {Footer} from './footer.js'
717
718More content."#;
719        let ctx = LintContext::new(content, MarkdownFlavor::MDX, None);
720        let lines: Vec<_> = ctx.filtered_lines().skip_esm_blocks().into_iter().collect();
721
722        // Verify lines OUTSIDE ESM blocks are INCLUDED
723        assert!(
724            lines.iter().any(|l| l.content.contains("# Last year's snowfall")),
725            "Should include markdown headings"
726        );
727        assert!(
728            lines.iter().any(|l| l.content.contains("Content about snowfall")),
729            "Should include markdown content"
730        );
731        assert!(
732            lines.iter().any(|l| l.content.contains("More content")),
733            "Should include content after ESM blocks"
734        );
735
736        // Verify ALL ESM blocks are EXCLUDED (MDX 2.0+ allows imports anywhere)
737        assert!(
738            !lines.iter().any(|l| l.content.contains("import {Chart}")),
739            "Should exclude import statements at top of file"
740        );
741        assert!(
742            !lines.iter().any(|l| l.content.contains("import {Table}")),
743            "Should exclude all import statements at top of file"
744        );
745        assert!(
746            !lines.iter().any(|l| l.content.contains("export const year")),
747            "Should exclude export statements at top of file"
748        );
749        // MDX 2.0+ allows imports anywhere - they should ALL be excluded
750        assert!(
751            !lines.iter().any(|l| l.content.contains("import {Footer}")),
752            "Should exclude import statements even after markdown content (MDX 2.0+ ESM anywhere)"
753        );
754
755        // Verify line numbers are preserved
756        let heading_line = lines
757            .iter()
758            .find(|l| l.content.contains("# Last year's snowfall"))
759            .unwrap();
760        assert_eq!(heading_line.line_num, 5, "Heading should be on line 5");
761    }
762
763    #[test]
764    fn test_all_filters_combined() {
765        let content = r#"---
766title: Test
767---
768
769# Title
770
771```
772code
773```
774
775<!-- HTML comment here -->
776
777::: mymodule.Class
778    options:
779      show_root_heading: true
780
781<div>
782HTML block
783</div>
784
785Content"#;
786        let ctx = LintContext::new(content, MarkdownFlavor::MkDocs, None);
787
788        let lines: Vec<_> = ctx
789            .filtered_lines()
790            .skip_front_matter()
791            .skip_code_blocks()
792            .skip_html_blocks()
793            .skip_html_comments()
794            .skip_mkdocstrings()
795            .into_iter()
796            .collect();
797
798        // Verify markdown content is INCLUDED
799        assert!(
800            lines.iter().any(|l| l.content == "# Title"),
801            "Should include markdown headings"
802        );
803        assert!(
804            lines.iter().any(|l| l.content == "Content"),
805            "Should include markdown content"
806        );
807
808        // Verify all filtered content is EXCLUDED
809        assert!(
810            !lines.iter().any(|l| l.content == "title: Test"),
811            "Should exclude front matter"
812        );
813        assert!(
814            !lines.iter().any(|l| l.content == "code"),
815            "Should exclude code block content"
816        );
817        assert!(
818            !lines.iter().any(|l| l.content.contains("HTML comment")),
819            "Should exclude HTML comments"
820        );
821        assert!(
822            !lines.iter().any(|l| l.content.contains("::: mymodule")),
823            "Should exclude mkdocstrings blocks"
824        );
825        assert!(
826            !lines.iter().any(|l| l.content.contains("show_root_heading")),
827            "Should exclude mkdocstrings options"
828        );
829        assert!(
830            !lines.iter().any(|l| l.content.contains("HTML block")),
831            "Should exclude HTML blocks"
832        );
833    }
834
835    #[test]
836    fn test_skip_math_blocks() {
837        let content = r#"# Heading
838
839Some regular text.
840
841$$
842A = \left[
843\begin{array}{c}
8441 \\
845-D
846\end{array}
847\right]
848$$
849
850More content after math."#;
851        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
852        let lines: Vec<_> = ctx.filtered_lines().skip_math_blocks().into_iter().collect();
853
854        // Verify lines OUTSIDE math blocks are INCLUDED
855        assert!(
856            lines.iter().any(|l| l.content.contains("# Heading")),
857            "Should include markdown headings"
858        );
859        assert!(
860            lines.iter().any(|l| l.content.contains("Some regular text")),
861            "Should include regular text before math block"
862        );
863        assert!(
864            lines.iter().any(|l| l.content.contains("More content after math")),
865            "Should include content after math block"
866        );
867
868        // Verify lines INSIDE math blocks are EXCLUDED
869        assert!(
870            !lines.iter().any(|l| l.content == "$$"),
871            "Should exclude math block delimiters"
872        );
873        assert!(
874            !lines.iter().any(|l| l.content.contains("\\left[")),
875            "Should exclude LaTeX content inside math block"
876        );
877        assert!(
878            !lines.iter().any(|l| l.content.contains("-D")),
879            "Should exclude content that looks like list items inside math block"
880        );
881        assert!(
882            !lines.iter().any(|l| l.content.contains("\\begin{array}")),
883            "Should exclude LaTeX array content"
884        );
885    }
886
887    #[test]
888    fn test_math_blocks_not_confused_with_code_blocks() {
889        let content = r#"# Title
890
891```python
892# This $$ is inside a code block
893x = 1
894```
895
896$$
897y = 2
898$$
899
900Regular text."#;
901        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
902
903        // Check that the $$ inside code block doesn't start a math block
904        let lines: Vec<_> = ctx.filtered_lines().skip_math_blocks().into_iter().collect();
905
906        // The $$ inside the code block should NOT trigger math block detection
907        // So when we skip math blocks, the code block content is still there (until we also skip code blocks)
908        assert!(
909            lines.iter().any(|l| l.content.contains("# This $$")),
910            "Code block content with $$ should not be detected as math block"
911        );
912
913        // But the real math block content should be excluded
914        assert!(
915            !lines.iter().any(|l| l.content == "y = 2"),
916            "Actual math block content should be excluded"
917        );
918    }
919
920    #[test]
921    fn test_skip_quarto_divs() {
922        let content = r#"# Heading
923
924::: {.callout-note}
925This is a callout note.
926With multiple lines.
927:::
928
929Regular text outside.
930
931::: {.bordered}
932Content inside bordered div.
933:::
934
935More content."#;
936        let ctx = LintContext::new(content, MarkdownFlavor::Quarto, None);
937        let lines: Vec<_> = ctx.filtered_lines().skip_quarto_divs().into_iter().collect();
938
939        // Verify lines OUTSIDE Quarto divs are INCLUDED
940        assert!(
941            lines.iter().any(|l| l.content.contains("# Heading")),
942            "Should include markdown headings"
943        );
944        assert!(
945            lines.iter().any(|l| l.content.contains("Regular text outside")),
946            "Should include content between divs"
947        );
948        assert!(
949            lines.iter().any(|l| l.content.contains("More content")),
950            "Should include content after divs"
951        );
952
953        // Verify lines INSIDE Quarto divs are EXCLUDED
954        assert!(
955            !lines.iter().any(|l| l.content.contains("::: {.callout-note}")),
956            "Should exclude callout div markers"
957        );
958        assert!(
959            !lines.iter().any(|l| l.content.contains("This is a callout note")),
960            "Should exclude callout content"
961        );
962        assert!(
963            !lines.iter().any(|l| l.content.contains("Content inside bordered")),
964            "Should exclude bordered div content"
965        );
966    }
967
968    #[test]
969    fn test_skip_jsx_expressions() {
970        let content = r#"# MDX Document
971
972Here is some content with {myVariable} inline.
973
974{items.map(item => (
975  <Item key={item.id} />
976))}
977
978Regular paragraph after expression.
979
980{/* This should NOT be skipped by jsx_expressions filter */}
981{/* MDX comments have their own filter */}
982
983More content."#;
984        let ctx = LintContext::new(content, MarkdownFlavor::MDX, None);
985        let lines: Vec<_> = ctx.filtered_lines().skip_jsx_expressions().into_iter().collect();
986
987        // Verify lines OUTSIDE JSX expressions are INCLUDED
988        assert!(
989            lines.iter().any(|l| l.content.contains("# MDX Document")),
990            "Should include markdown headings"
991        );
992        assert!(
993            lines.iter().any(|l| l.content.contains("Regular paragraph")),
994            "Should include regular paragraphs"
995        );
996        assert!(
997            lines.iter().any(|l| l.content.contains("More content")),
998            "Should include content after expressions"
999        );
1000
1001        // Verify lines with JSX expressions are EXCLUDED
1002        assert!(
1003            !lines.iter().any(|l| l.content.contains("{myVariable}")),
1004            "Should exclude lines with inline JSX expressions"
1005        );
1006        assert!(
1007            !lines.iter().any(|l| l.content.contains("items.map")),
1008            "Should exclude multi-line JSX expression content"
1009        );
1010        assert!(
1011            !lines.iter().any(|l| l.content.contains("<Item key")),
1012            "Should exclude JSX inside expressions"
1013        );
1014    }
1015
1016    #[test]
1017    fn test_skip_quarto_divs_nested() {
1018        let content = r#"# Title
1019
1020::: {.outer}
1021Outer content.
1022
1023::: {.inner}
1024Inner content.
1025:::
1026
1027Back to outer.
1028:::
1029
1030Outside text."#;
1031        let ctx = LintContext::new(content, MarkdownFlavor::Quarto, None);
1032        let lines: Vec<_> = ctx.filtered_lines().skip_quarto_divs().into_iter().collect();
1033
1034        // Should include content outside all divs
1035        assert!(
1036            lines.iter().any(|l| l.content.contains("# Title")),
1037            "Should include heading"
1038        );
1039        assert!(
1040            lines.iter().any(|l| l.content.contains("Outside text")),
1041            "Should include text after divs"
1042        );
1043
1044        // Should exclude all div content
1045        assert!(
1046            !lines.iter().any(|l| l.content.contains("Outer content")),
1047            "Should exclude outer div content"
1048        );
1049        assert!(
1050            !lines.iter().any(|l| l.content.contains("Inner content")),
1051            "Should exclude inner div content"
1052        );
1053    }
1054
1055    #[test]
1056    fn test_skip_quarto_divs_not_in_standard_flavor() {
1057        let content = r#"::: {.callout-note}
1058This should NOT be skipped in standard flavor.
1059:::"#;
1060        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
1061        let lines: Vec<_> = ctx.filtered_lines().skip_quarto_divs().into_iter().collect();
1062
1063        // In standard flavor, Quarto divs are not detected, so nothing is skipped
1064        assert!(
1065            lines.iter().any(|l| l.content.contains("This should NOT be skipped")),
1066            "Standard flavor should not detect Quarto divs"
1067        );
1068    }
1069
1070    #[test]
1071    fn test_skip_mdx_comments() {
1072        let content = r#"# MDX Document
1073
1074{/* This is an MDX comment */}
1075
1076Regular content here.
1077
1078{/*
1079  Multi-line
1080  MDX comment
1081*/}
1082
1083More content after comment."#;
1084        let ctx = LintContext::new(content, MarkdownFlavor::MDX, None);
1085        let lines: Vec<_> = ctx.filtered_lines().skip_mdx_comments().into_iter().collect();
1086
1087        // Verify lines OUTSIDE MDX comments are INCLUDED
1088        assert!(
1089            lines.iter().any(|l| l.content.contains("# MDX Document")),
1090            "Should include markdown headings"
1091        );
1092        assert!(
1093            lines.iter().any(|l| l.content.contains("Regular content")),
1094            "Should include regular content"
1095        );
1096        assert!(
1097            lines.iter().any(|l| l.content.contains("More content")),
1098            "Should include content after comments"
1099        );
1100
1101        // Verify lines with MDX comments are EXCLUDED
1102        assert!(
1103            !lines.iter().any(|l| l.content.contains("{/* This is")),
1104            "Should exclude single-line MDX comments"
1105        );
1106        assert!(
1107            !lines.iter().any(|l| l.content.contains("Multi-line")),
1108            "Should exclude multi-line MDX comment content"
1109        );
1110    }
1111
1112    #[test]
1113    fn test_jsx_expressions_with_nested_braces() {
1114        // Test that nested braces are handled correctly
1115        let content = r#"# Document
1116
1117{props.style || {color: "red", background: "blue"}}
1118
1119Regular content."#;
1120        let ctx = LintContext::new(content, MarkdownFlavor::MDX, None);
1121        let lines: Vec<_> = ctx.filtered_lines().skip_jsx_expressions().into_iter().collect();
1122
1123        // Verify nested braces don't break detection
1124        assert!(
1125            !lines.iter().any(|l| l.content.contains("props.style")),
1126            "Should exclude JSX expression with nested braces"
1127        );
1128        assert!(
1129            lines.iter().any(|l| l.content.contains("Regular content")),
1130            "Should include content after nested expression"
1131        );
1132    }
1133
1134    #[test]
1135    fn test_jsx_and_mdx_comments_combined() {
1136        // Test both filters together
1137        let content = r#"# Title
1138
1139{variable}
1140
1141{/* comment */}
1142
1143Content."#;
1144        let ctx = LintContext::new(content, MarkdownFlavor::MDX, None);
1145        let lines: Vec<_> = ctx
1146            .filtered_lines()
1147            .skip_jsx_expressions()
1148            .skip_mdx_comments()
1149            .into_iter()
1150            .collect();
1151
1152        assert!(
1153            lines.iter().any(|l| l.content.contains("# Title")),
1154            "Should include heading"
1155        );
1156        assert!(
1157            lines.iter().any(|l| l.content.contains("Content")),
1158            "Should include regular content"
1159        );
1160        assert!(
1161            !lines.iter().any(|l| l.content.contains("{variable}")),
1162            "Should exclude JSX expression"
1163        );
1164        assert!(
1165            !lines.iter().any(|l| l.content.contains("{/* comment */")),
1166            "Should exclude MDX comment"
1167        );
1168    }
1169
1170    #[test]
1171    fn test_jsx_expressions_not_detected_in_standard_flavor() {
1172        // JSX expressions should only be detected in MDX flavor
1173        let content = r#"# Document
1174
1175{this is not JSX in standard markdown}
1176
1177Content."#;
1178        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
1179        let lines: Vec<_> = ctx.filtered_lines().skip_jsx_expressions().into_iter().collect();
1180
1181        // In standard markdown, braces are just text - nothing should be filtered
1182        assert!(
1183            lines.iter().any(|l| l.content.contains("{this is not JSX")),
1184            "Should NOT exclude brace content in standard markdown"
1185        );
1186    }
1187}