Skip to main content

rumdl_lib/
filtered_lines.rs

1//! Filtered line iteration for markdown linting
2//!
3//! This module provides a zero-cost abstraction for iterating over markdown lines
4//! while automatically filtering out non-content regions like front matter, code blocks,
5//! and HTML blocks. This ensures rules only process actual markdown content.
6//!
7//! # Architecture
8//!
9//! The filtered iterator approach centralizes the logic of what content should be
10//! processed by rules, eliminating error-prone manual checks in each rule implementation.
11//!
12//! # Examples
13//!
14//! ```rust
15//! use rumdl_lib::lint_context::LintContext;
16//! use rumdl_lib::filtered_lines::FilteredLinesExt;
17//!
18//! let content = "---\nurl: http://example.com\n---\n\n# Title\n\nContent";
19//! let ctx = LintContext::new(content, rumdl_lib::config::MarkdownFlavor::Standard, None);
20//!
21//! // Simple: get all content lines (skips front matter by default)
22//! for line in ctx.content_lines() {
23//!     println!("Line {}: {}", line.line_num, line.content);
24//! }
25//!
26//! // Advanced: custom filter configuration
27//! for line in ctx.filtered_lines()
28//!     .skip_code_blocks()
29//!     .skip_front_matter()
30//!     .skip_html_blocks() {
31//!     println!("Line {}: {}", line.line_num, line.content);
32//! }
33//! ```
34
35use crate::lint_context::{LineInfo, LintContext};
36
37/// A single line from a filtered iteration, with guaranteed 1-indexed line numbers
38#[derive(Debug, Clone)]
39pub struct FilteredLine<'a> {
40    /// The 1-indexed line number in the original document
41    pub line_num: usize,
42    /// Reference to the line's metadata
43    pub line_info: &'a LineInfo,
44    /// The actual line content
45    pub content: &'a str,
46}
47
48/// Configuration for filtering lines during iteration
49///
50/// Use the builder pattern to configure which types of content should be skipped:
51///
52/// ```rust
53/// use rumdl_lib::filtered_lines::LineFilterConfig;
54///
55/// let config = LineFilterConfig::new()
56///     .skip_front_matter()
57///     .skip_code_blocks()
58///     .skip_html_blocks()
59///     .skip_html_comments()
60///     .skip_mkdocstrings()
61///     .skip_esm_blocks()
62///     .skip_quarto_divs();
63/// ```
64#[derive(Debug, Clone, Default)]
65pub struct LineFilterConfig {
66    /// Skip lines inside front matter (YAML/TOML/JSON metadata)
67    pub skip_front_matter: bool,
68    /// Skip lines inside fenced code blocks
69    pub skip_code_blocks: bool,
70    /// Skip lines inside HTML blocks
71    pub skip_html_blocks: bool,
72    /// Skip lines inside HTML comments
73    pub skip_html_comments: bool,
74    /// Skip lines inside mkdocstrings blocks
75    pub skip_mkdocstrings: bool,
76    /// Skip lines inside ESM (ECMAScript Module) blocks
77    pub skip_esm_blocks: bool,
78    /// Skip lines inside math blocks ($$ ... $$)
79    pub skip_math_blocks: bool,
80    /// Skip lines inside Quarto div blocks (::: ... :::)
81    pub skip_quarto_divs: bool,
82    /// Skip lines containing or inside JSX expressions (MDX: {expression})
83    pub skip_jsx_expressions: bool,
84    /// Skip lines inside MDX comments ({/* ... */})
85    pub skip_mdx_comments: bool,
86    /// Skip lines inside MkDocs admonitions (!!! or ???)
87    pub skip_admonitions: bool,
88    /// Skip lines inside MkDocs content tabs (=== "Tab")
89    pub skip_content_tabs: bool,
90    /// Skip lines inside definition lists (:  definition)
91    pub skip_definition_lists: bool,
92}
93
94impl LineFilterConfig {
95    /// Create a new filter configuration with all filters disabled
96    #[must_use]
97    pub fn new() -> Self {
98        Self::default()
99    }
100
101    /// Skip lines that are part of front matter (YAML/TOML/JSON)
102    ///
103    /// Front matter is metadata at the start of a markdown file and should
104    /// not be processed by markdown linting rules.
105    #[must_use]
106    pub fn skip_front_matter(mut self) -> Self {
107        self.skip_front_matter = true;
108        self
109    }
110
111    /// Skip lines inside fenced code blocks
112    ///
113    /// Code blocks contain source code, not markdown, and most rules should
114    /// not process them.
115    #[must_use]
116    pub fn skip_code_blocks(mut self) -> Self {
117        self.skip_code_blocks = true;
118        self
119    }
120
121    /// Skip lines inside HTML blocks
122    ///
123    /// HTML blocks contain raw HTML and most markdown rules should not
124    /// process them.
125    #[must_use]
126    pub fn skip_html_blocks(mut self) -> Self {
127        self.skip_html_blocks = true;
128        self
129    }
130
131    /// Skip lines inside HTML comments
132    ///
133    /// HTML comments (<!-- ... -->) are metadata and should not be processed
134    /// by most markdown linting rules.
135    #[must_use]
136    pub fn skip_html_comments(mut self) -> Self {
137        self.skip_html_comments = true;
138        self
139    }
140
141    /// Skip lines inside mkdocstrings blocks
142    ///
143    /// Mkdocstrings blocks contain auto-generated documentation and most
144    /// markdown rules should not process them.
145    #[must_use]
146    pub fn skip_mkdocstrings(mut self) -> Self {
147        self.skip_mkdocstrings = true;
148        self
149    }
150
151    /// Skip lines inside ESM (ECMAScript Module) blocks
152    ///
153    /// ESM blocks contain JavaScript/TypeScript module code and most
154    /// markdown rules should not process them.
155    #[must_use]
156    pub fn skip_esm_blocks(mut self) -> Self {
157        self.skip_esm_blocks = true;
158        self
159    }
160
161    /// Skip lines inside math blocks ($$ ... $$)
162    ///
163    /// Math blocks contain LaTeX/mathematical notation and markdown rules
164    /// should not process them as regular markdown content.
165    #[must_use]
166    pub fn skip_math_blocks(mut self) -> Self {
167        self.skip_math_blocks = true;
168        self
169    }
170
171    /// Skip lines inside Quarto div blocks (::: ... :::)
172    ///
173    /// Quarto divs are fenced containers for callouts, panels, and other
174    /// structured content. Rules may need to skip them for accurate processing.
175    #[must_use]
176    pub fn skip_quarto_divs(mut self) -> Self {
177        self.skip_quarto_divs = true;
178        self
179    }
180
181    /// Skip lines containing or inside JSX expressions (MDX: {expression})
182    ///
183    /// JSX expressions contain JavaScript code and most markdown rules
184    /// should not process them as regular markdown content.
185    #[must_use]
186    pub fn skip_jsx_expressions(mut self) -> Self {
187        self.skip_jsx_expressions = true;
188        self
189    }
190
191    /// Skip lines inside MDX comments ({/* ... */})
192    ///
193    /// MDX comments are metadata and should not be processed by most
194    /// markdown linting rules.
195    #[must_use]
196    pub fn skip_mdx_comments(mut self) -> Self {
197        self.skip_mdx_comments = true;
198        self
199    }
200
201    /// Skip lines inside MkDocs admonitions (!!! or ???)
202    ///
203    /// Admonitions are callout blocks and may have special formatting
204    /// that rules should not process as regular content.
205    #[must_use]
206    pub fn skip_admonitions(mut self) -> Self {
207        self.skip_admonitions = true;
208        self
209    }
210
211    /// Skip lines inside MkDocs content tabs (=== "Tab")
212    ///
213    /// Content tabs contain tabbed content that may need special handling.
214    #[must_use]
215    pub fn skip_content_tabs(mut self) -> Self {
216        self.skip_content_tabs = true;
217        self
218    }
219
220    /// Skip lines inside any MkDocs container (admonitions or content tabs)
221    ///
222    /// This is a convenience method that enables both `skip_admonitions` and
223    /// `skip_content_tabs`. MkDocs containers use 4-space indented content
224    /// which may need special handling to preserve structure.
225    #[must_use]
226    pub fn skip_mkdocs_containers(mut self) -> Self {
227        self.skip_admonitions = true;
228        self.skip_content_tabs = true;
229        self
230    }
231
232    /// Skip lines inside definition lists (:  definition)
233    ///
234    /// Definition lists have special formatting that rules should
235    /// not process as regular content.
236    #[must_use]
237    pub fn skip_definition_lists(mut self) -> Self {
238        self.skip_definition_lists = true;
239        self
240    }
241
242    /// Check if a line should be filtered out based on this configuration
243    fn should_filter(&self, line_info: &LineInfo) -> bool {
244        (self.skip_front_matter && line_info.in_front_matter)
245            || (self.skip_code_blocks && line_info.in_code_block)
246            || (self.skip_html_blocks && line_info.in_html_block)
247            || (self.skip_html_comments && line_info.in_html_comment)
248            || (self.skip_mkdocstrings && line_info.in_mkdocstrings)
249            || (self.skip_esm_blocks && line_info.in_esm_block)
250            || (self.skip_math_blocks && line_info.in_math_block)
251            || (self.skip_quarto_divs && line_info.in_quarto_div)
252            || (self.skip_jsx_expressions && line_info.in_jsx_expression)
253            || (self.skip_mdx_comments && line_info.in_mdx_comment)
254            || (self.skip_admonitions && line_info.in_admonition)
255            || (self.skip_content_tabs && line_info.in_content_tab)
256            || (self.skip_definition_lists && line_info.in_definition_list)
257    }
258}
259
260/// Iterator that yields filtered lines based on configuration
261pub struct FilteredLinesIter<'a> {
262    ctx: &'a LintContext<'a>,
263    config: LineFilterConfig,
264    current_index: usize,
265    content_lines: Vec<&'a str>,
266}
267
268impl<'a> FilteredLinesIter<'a> {
269    /// Create a new filtered lines iterator
270    fn new(ctx: &'a LintContext<'a>, config: LineFilterConfig) -> Self {
271        Self {
272            ctx,
273            config,
274            current_index: 0,
275            content_lines: ctx.content.lines().collect(),
276        }
277    }
278}
279
280impl<'a> Iterator for FilteredLinesIter<'a> {
281    type Item = FilteredLine<'a>;
282
283    fn next(&mut self) -> Option<Self::Item> {
284        let lines = &self.ctx.lines;
285
286        while self.current_index < lines.len() {
287            let idx = self.current_index;
288            self.current_index += 1;
289
290            // Check if this line should be filtered
291            if self.config.should_filter(&lines[idx]) {
292                continue;
293            }
294
295            // Get the actual line content from the document
296            let line_content = self.content_lines.get(idx).copied().unwrap_or("");
297
298            // Return the filtered line with 1-indexed line number
299            return Some(FilteredLine {
300                line_num: idx + 1, // Convert 0-indexed to 1-indexed
301                line_info: &lines[idx],
302                content: line_content,
303            });
304        }
305
306        None
307    }
308}
309
310/// Extension trait that adds filtered iteration methods to `LintContext`
311///
312/// This trait provides convenient methods for iterating over lines while
313/// automatically filtering out non-content regions.
314pub trait FilteredLinesExt {
315    /// Start building a filtered lines iterator
316    ///
317    /// Returns a `LineFilterConfig` builder that can be used to configure
318    /// which types of content should be filtered out.
319    ///
320    /// # Examples
321    ///
322    /// ```rust
323    /// use rumdl_lib::lint_context::LintContext;
324    /// use rumdl_lib::filtered_lines::FilteredLinesExt;
325    ///
326    /// let content = "# Title\n\n```rust\ncode\n```\n\nContent";
327    /// let ctx = LintContext::new(content, rumdl_lib::config::MarkdownFlavor::Standard, None);
328    ///
329    /// for line in ctx.filtered_lines().skip_code_blocks() {
330    ///     println!("Line {}: {}", line.line_num, line.content);
331    /// }
332    /// ```
333    fn filtered_lines(&self) -> FilteredLinesBuilder<'_>;
334
335    /// Get an iterator over content lines only
336    ///
337    /// This is a convenience method that returns an iterator with front matter
338    /// filtered out by default. This is the most common use case for rules that
339    /// should only process markdown content.
340    ///
341    /// Equivalent to: `ctx.filtered_lines().skip_front_matter()`
342    ///
343    /// # Examples
344    ///
345    /// ```rust
346    /// use rumdl_lib::lint_context::LintContext;
347    /// use rumdl_lib::filtered_lines::FilteredLinesExt;
348    ///
349    /// let content = "---\ntitle: Test\n---\n\n# Content";
350    /// let ctx = LintContext::new(content, rumdl_lib::config::MarkdownFlavor::Standard, None);
351    ///
352    /// for line in ctx.content_lines() {
353    ///     // Front matter is automatically skipped
354    ///     println!("Line {}: {}", line.line_num, line.content);
355    /// }
356    /// ```
357    fn content_lines(&self) -> FilteredLinesIter<'_>;
358}
359
360/// Builder type that allows chaining filter configuration and converting to an iterator
361pub struct FilteredLinesBuilder<'a> {
362    ctx: &'a LintContext<'a>,
363    config: LineFilterConfig,
364}
365
366impl<'a> FilteredLinesBuilder<'a> {
367    fn new(ctx: &'a LintContext<'a>) -> Self {
368        Self {
369            ctx,
370            config: LineFilterConfig::new(),
371        }
372    }
373
374    /// Skip lines that are part of front matter (YAML/TOML/JSON)
375    #[must_use]
376    pub fn skip_front_matter(mut self) -> Self {
377        self.config = self.config.skip_front_matter();
378        self
379    }
380
381    /// Skip lines inside fenced code blocks
382    #[must_use]
383    pub fn skip_code_blocks(mut self) -> Self {
384        self.config = self.config.skip_code_blocks();
385        self
386    }
387
388    /// Skip lines inside HTML blocks
389    #[must_use]
390    pub fn skip_html_blocks(mut self) -> Self {
391        self.config = self.config.skip_html_blocks();
392        self
393    }
394
395    /// Skip lines inside HTML comments
396    #[must_use]
397    pub fn skip_html_comments(mut self) -> Self {
398        self.config = self.config.skip_html_comments();
399        self
400    }
401
402    /// Skip lines inside mkdocstrings blocks
403    #[must_use]
404    pub fn skip_mkdocstrings(mut self) -> Self {
405        self.config = self.config.skip_mkdocstrings();
406        self
407    }
408
409    /// Skip lines inside ESM (ECMAScript Module) blocks
410    #[must_use]
411    pub fn skip_esm_blocks(mut self) -> Self {
412        self.config = self.config.skip_esm_blocks();
413        self
414    }
415
416    /// Skip lines inside math blocks ($$ ... $$)
417    #[must_use]
418    pub fn skip_math_blocks(mut self) -> Self {
419        self.config = self.config.skip_math_blocks();
420        self
421    }
422
423    /// Skip lines inside Quarto div blocks (::: ... :::)
424    #[must_use]
425    pub fn skip_quarto_divs(mut self) -> Self {
426        self.config = self.config.skip_quarto_divs();
427        self
428    }
429
430    /// Skip lines containing or inside JSX expressions (MDX: {expression})
431    #[must_use]
432    pub fn skip_jsx_expressions(mut self) -> Self {
433        self.config = self.config.skip_jsx_expressions();
434        self
435    }
436
437    /// Skip lines inside MDX comments ({/* ... */})
438    #[must_use]
439    pub fn skip_mdx_comments(mut self) -> Self {
440        self.config = self.config.skip_mdx_comments();
441        self
442    }
443
444    /// Skip lines inside MkDocs admonitions (!!! or ???)
445    #[must_use]
446    pub fn skip_admonitions(mut self) -> Self {
447        self.config = self.config.skip_admonitions();
448        self
449    }
450
451    /// Skip lines inside MkDocs content tabs (=== "Tab")
452    #[must_use]
453    pub fn skip_content_tabs(mut self) -> Self {
454        self.config = self.config.skip_content_tabs();
455        self
456    }
457
458    /// Skip lines inside any MkDocs container (admonitions or content tabs)
459    ///
460    /// This is a convenience method that enables both `skip_admonitions` and
461    /// `skip_content_tabs`. MkDocs containers use 4-space indented content
462    /// which may need special handling to preserve structure.
463    #[must_use]
464    pub fn skip_mkdocs_containers(mut self) -> Self {
465        self.config = self.config.skip_mkdocs_containers();
466        self
467    }
468
469    /// Skip lines inside definition lists (:  definition)
470    #[must_use]
471    pub fn skip_definition_lists(mut self) -> Self {
472        self.config = self.config.skip_definition_lists();
473        self
474    }
475}
476
477impl<'a> IntoIterator for FilteredLinesBuilder<'a> {
478    type Item = FilteredLine<'a>;
479    type IntoIter = FilteredLinesIter<'a>;
480
481    fn into_iter(self) -> Self::IntoIter {
482        FilteredLinesIter::new(self.ctx, self.config)
483    }
484}
485
486impl<'a> FilteredLinesExt for LintContext<'a> {
487    fn filtered_lines(&self) -> FilteredLinesBuilder<'_> {
488        FilteredLinesBuilder::new(self)
489    }
490
491    fn content_lines(&self) -> FilteredLinesIter<'_> {
492        FilteredLinesIter::new(self, LineFilterConfig::new().skip_front_matter())
493    }
494}
495
496#[cfg(test)]
497mod tests {
498    use super::*;
499    use crate::config::MarkdownFlavor;
500
501    #[test]
502    fn test_filtered_line_structure() {
503        let content = "# Title\n\nContent";
504        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
505
506        let line = ctx.content_lines().next().unwrap();
507        assert_eq!(line.line_num, 1);
508        assert_eq!(line.content, "# Title");
509        assert!(!line.line_info.in_front_matter);
510    }
511
512    #[test]
513    fn test_skip_front_matter_yaml() {
514        let content = "---\ntitle: Test\nurl: http://example.com\n---\n\n# Content\n\nMore content";
515        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
516
517        let lines: Vec<_> = ctx.content_lines().collect();
518        // After front matter (lines 1-4), we have: empty line, "# Content", empty line, "More content"
519        assert_eq!(lines.len(), 4);
520        assert_eq!(lines[0].line_num, 5); // First line after front matter
521        assert_eq!(lines[0].content, "");
522        assert_eq!(lines[1].line_num, 6);
523        assert_eq!(lines[1].content, "# Content");
524        assert_eq!(lines[2].line_num, 7);
525        assert_eq!(lines[2].content, "");
526        assert_eq!(lines[3].line_num, 8);
527        assert_eq!(lines[3].content, "More content");
528    }
529
530    #[test]
531    fn test_skip_front_matter_toml() {
532        let content = "+++\ntitle = \"Test\"\nurl = \"http://example.com\"\n+++\n\n# Content";
533        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
534
535        let lines: Vec<_> = ctx.content_lines().collect();
536        assert_eq!(lines.len(), 2); // Empty line + "# Content"
537        assert_eq!(lines[0].line_num, 5);
538        assert_eq!(lines[1].line_num, 6);
539        assert_eq!(lines[1].content, "# Content");
540    }
541
542    #[test]
543    fn test_skip_front_matter_json() {
544        let content = "{\n\"title\": \"Test\",\n\"url\": \"http://example.com\"\n}\n\n# Content";
545        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
546
547        let lines: Vec<_> = ctx.content_lines().collect();
548        assert_eq!(lines.len(), 2); // Empty line + "# Content"
549        assert_eq!(lines[0].line_num, 5);
550        assert_eq!(lines[1].line_num, 6);
551        assert_eq!(lines[1].content, "# Content");
552    }
553
554    #[test]
555    fn test_skip_code_blocks() {
556        let content = "# Title\n\n```rust\nlet x = 1;\nlet y = 2;\n```\n\nContent";
557        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
558
559        let lines: Vec<_> = ctx.filtered_lines().skip_code_blocks().into_iter().collect();
560
561        // Should have: "# Title", empty line, "```rust" fence, "```" fence, empty line, "Content"
562        // Wait, actually code blocks include the fences. Let me check the line_info
563        // Looking at the implementation, in_code_block is true for lines INSIDE code blocks
564        // The fences themselves are not marked as in_code_block
565        assert!(lines.iter().any(|l| l.content == "# Title"));
566        assert!(lines.iter().any(|l| l.content == "Content"));
567        // The actual code lines should be filtered out
568        assert!(!lines.iter().any(|l| l.content == "let x = 1;"));
569        assert!(!lines.iter().any(|l| l.content == "let y = 2;"));
570    }
571
572    #[test]
573    fn test_no_filters() {
574        let content = "---\ntitle: Test\n---\n\n# Content";
575        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
576
577        // With no filters, all lines should be included
578        let lines: Vec<_> = ctx.filtered_lines().into_iter().collect();
579        assert_eq!(lines.len(), ctx.lines.len());
580    }
581
582    #[test]
583    fn test_multiple_filters() {
584        let content = "---\ntitle: Test\n---\n\n# Title\n\n```rust\ncode\n```\n\nContent";
585        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
586
587        let lines: Vec<_> = ctx
588            .filtered_lines()
589            .skip_front_matter()
590            .skip_code_blocks()
591            .into_iter()
592            .collect();
593
594        // Should skip front matter (lines 1-3) and code block content (line 8)
595        assert!(lines.iter().any(|l| l.content == "# Title"));
596        assert!(lines.iter().any(|l| l.content == "Content"));
597        assert!(!lines.iter().any(|l| l.content == "title: Test"));
598        assert!(!lines.iter().any(|l| l.content == "code"));
599    }
600
601    #[test]
602    fn test_line_numbering_is_1_indexed() {
603        let content = "First\nSecond\nThird";
604        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
605
606        let lines: Vec<_> = ctx.content_lines().collect();
607        assert_eq!(lines[0].line_num, 1);
608        assert_eq!(lines[0].content, "First");
609        assert_eq!(lines[1].line_num, 2);
610        assert_eq!(lines[1].content, "Second");
611        assert_eq!(lines[2].line_num, 3);
612        assert_eq!(lines[2].content, "Third");
613    }
614
615    #[test]
616    fn test_content_lines_convenience_method() {
617        let content = "---\nfoo: bar\n---\n\nContent";
618        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
619
620        // content_lines() should automatically skip front matter
621        let lines: Vec<_> = ctx.content_lines().collect();
622        assert!(!lines.iter().any(|l| l.content.contains("foo")));
623        assert!(lines.iter().any(|l| l.content == "Content"));
624    }
625
626    #[test]
627    fn test_empty_document() {
628        let content = "";
629        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
630
631        let lines: Vec<_> = ctx.content_lines().collect();
632        assert_eq!(lines.len(), 0);
633    }
634
635    #[test]
636    fn test_only_front_matter() {
637        let content = "---\ntitle: Test\n---";
638        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
639
640        let lines: Vec<_> = ctx.content_lines().collect();
641        assert_eq!(
642            lines.len(),
643            0,
644            "Document with only front matter should have no content lines"
645        );
646    }
647
648    #[test]
649    fn test_builder_pattern_ergonomics() {
650        let content = "# Title\n\n```\ncode\n```\n\nContent";
651        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
652
653        // Test that builder pattern works smoothly
654        let _lines: Vec<_> = ctx
655            .filtered_lines()
656            .skip_front_matter()
657            .skip_code_blocks()
658            .skip_html_blocks()
659            .into_iter()
660            .collect();
661
662        // If this compiles and runs, the builder pattern is working
663    }
664
665    #[test]
666    fn test_filtered_line_access_to_line_info() {
667        let content = "# Title\n\nContent";
668        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
669
670        for line in ctx.content_lines() {
671            // Should be able to access line_info fields
672            assert!(!line.line_info.in_front_matter);
673            assert!(!line.line_info.in_code_block);
674        }
675    }
676
677    #[test]
678    fn test_skip_mkdocstrings() {
679        let content = r#"# API Documentation
680
681::: mymodule.MyClass
682    options:
683      show_root_heading: true
684      show_source: false
685
686Some regular content here.
687
688::: mymodule.function
689    options:
690      show_signature: true
691
692More content."#;
693        let ctx = LintContext::new(content, MarkdownFlavor::MkDocs, None);
694        let lines: Vec<_> = ctx.filtered_lines().skip_mkdocstrings().into_iter().collect();
695
696        // Verify lines OUTSIDE mkdocstrings blocks are INCLUDED
697        assert!(
698            lines.iter().any(|l| l.content.contains("# API Documentation")),
699            "Should include lines outside mkdocstrings blocks"
700        );
701        assert!(
702            lines.iter().any(|l| l.content.contains("Some regular content")),
703            "Should include content between mkdocstrings blocks"
704        );
705        assert!(
706            lines.iter().any(|l| l.content.contains("More content")),
707            "Should include content after mkdocstrings blocks"
708        );
709
710        // Verify lines INSIDE mkdocstrings blocks are EXCLUDED
711        assert!(
712            !lines.iter().any(|l| l.content.contains("::: mymodule")),
713            "Should exclude mkdocstrings marker lines"
714        );
715        assert!(
716            !lines.iter().any(|l| l.content.contains("show_root_heading")),
717            "Should exclude mkdocstrings option lines"
718        );
719        assert!(
720            !lines.iter().any(|l| l.content.contains("show_signature")),
721            "Should exclude all mkdocstrings option lines"
722        );
723
724        // Verify line numbers are preserved (1-indexed)
725        assert_eq!(lines[0].line_num, 1, "First line should be line 1");
726    }
727
728    #[test]
729    fn test_skip_esm_blocks() {
730        // MDX 2.0+ allows ESM imports/exports anywhere in the document
731        let content = r#"import {Chart} from './components.js'
732import {Table} from './table.js'
733export const year = 2023
734
735# Last year's snowfall
736
737Content about snowfall data.
738
739import {Footer} from './footer.js'
740
741More content."#;
742        let ctx = LintContext::new(content, MarkdownFlavor::MDX, None);
743        let lines: Vec<_> = ctx.filtered_lines().skip_esm_blocks().into_iter().collect();
744
745        // Verify lines OUTSIDE ESM blocks are INCLUDED
746        assert!(
747            lines.iter().any(|l| l.content.contains("# Last year's snowfall")),
748            "Should include markdown headings"
749        );
750        assert!(
751            lines.iter().any(|l| l.content.contains("Content about snowfall")),
752            "Should include markdown content"
753        );
754        assert!(
755            lines.iter().any(|l| l.content.contains("More content")),
756            "Should include content after ESM blocks"
757        );
758
759        // Verify ALL ESM blocks are EXCLUDED (MDX 2.0+ allows imports anywhere)
760        assert!(
761            !lines.iter().any(|l| l.content.contains("import {Chart}")),
762            "Should exclude import statements at top of file"
763        );
764        assert!(
765            !lines.iter().any(|l| l.content.contains("import {Table}")),
766            "Should exclude all import statements at top of file"
767        );
768        assert!(
769            !lines.iter().any(|l| l.content.contains("export const year")),
770            "Should exclude export statements at top of file"
771        );
772        // MDX 2.0+ allows imports anywhere - they should ALL be excluded
773        assert!(
774            !lines.iter().any(|l| l.content.contains("import {Footer}")),
775            "Should exclude import statements even after markdown content (MDX 2.0+ ESM anywhere)"
776        );
777
778        // Verify line numbers are preserved
779        let heading_line = lines
780            .iter()
781            .find(|l| l.content.contains("# Last year's snowfall"))
782            .unwrap();
783        assert_eq!(heading_line.line_num, 5, "Heading should be on line 5");
784    }
785
786    #[test]
787    fn test_all_filters_combined() {
788        let content = r#"---
789title: Test
790---
791
792# Title
793
794```
795code
796```
797
798<!-- HTML comment here -->
799
800::: mymodule.Class
801    options:
802      show_root_heading: true
803
804<div>
805HTML block
806</div>
807
808Content"#;
809        let ctx = LintContext::new(content, MarkdownFlavor::MkDocs, None);
810
811        let lines: Vec<_> = ctx
812            .filtered_lines()
813            .skip_front_matter()
814            .skip_code_blocks()
815            .skip_html_blocks()
816            .skip_html_comments()
817            .skip_mkdocstrings()
818            .into_iter()
819            .collect();
820
821        // Verify markdown content is INCLUDED
822        assert!(
823            lines.iter().any(|l| l.content == "# Title"),
824            "Should include markdown headings"
825        );
826        assert!(
827            lines.iter().any(|l| l.content == "Content"),
828            "Should include markdown content"
829        );
830
831        // Verify all filtered content is EXCLUDED
832        assert!(
833            !lines.iter().any(|l| l.content == "title: Test"),
834            "Should exclude front matter"
835        );
836        assert!(
837            !lines.iter().any(|l| l.content == "code"),
838            "Should exclude code block content"
839        );
840        assert!(
841            !lines.iter().any(|l| l.content.contains("HTML comment")),
842            "Should exclude HTML comments"
843        );
844        assert!(
845            !lines.iter().any(|l| l.content.contains("::: mymodule")),
846            "Should exclude mkdocstrings blocks"
847        );
848        assert!(
849            !lines.iter().any(|l| l.content.contains("show_root_heading")),
850            "Should exclude mkdocstrings options"
851        );
852        assert!(
853            !lines.iter().any(|l| l.content.contains("HTML block")),
854            "Should exclude HTML blocks"
855        );
856    }
857
858    #[test]
859    fn test_skip_math_blocks() {
860        let content = r#"# Heading
861
862Some regular text.
863
864$$
865A = \left[
866\begin{array}{c}
8671 \\
868-D
869\end{array}
870\right]
871$$
872
873More content after math."#;
874        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
875        let lines: Vec<_> = ctx.filtered_lines().skip_math_blocks().into_iter().collect();
876
877        // Verify lines OUTSIDE math blocks are INCLUDED
878        assert!(
879            lines.iter().any(|l| l.content.contains("# Heading")),
880            "Should include markdown headings"
881        );
882        assert!(
883            lines.iter().any(|l| l.content.contains("Some regular text")),
884            "Should include regular text before math block"
885        );
886        assert!(
887            lines.iter().any(|l| l.content.contains("More content after math")),
888            "Should include content after math block"
889        );
890
891        // Verify lines INSIDE math blocks are EXCLUDED
892        assert!(
893            !lines.iter().any(|l| l.content == "$$"),
894            "Should exclude math block delimiters"
895        );
896        assert!(
897            !lines.iter().any(|l| l.content.contains("\\left[")),
898            "Should exclude LaTeX content inside math block"
899        );
900        assert!(
901            !lines.iter().any(|l| l.content.contains("-D")),
902            "Should exclude content that looks like list items inside math block"
903        );
904        assert!(
905            !lines.iter().any(|l| l.content.contains("\\begin{array}")),
906            "Should exclude LaTeX array content"
907        );
908    }
909
910    #[test]
911    fn test_math_blocks_not_confused_with_code_blocks() {
912        let content = r#"# Title
913
914```python
915# This $$ is inside a code block
916x = 1
917```
918
919$$
920y = 2
921$$
922
923Regular text."#;
924        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
925
926        // Check that the $$ inside code block doesn't start a math block
927        let lines: Vec<_> = ctx.filtered_lines().skip_math_blocks().into_iter().collect();
928
929        // The $$ inside the code block should NOT trigger math block detection
930        // So when we skip math blocks, the code block content is still there (until we also skip code blocks)
931        assert!(
932            lines.iter().any(|l| l.content.contains("# This $$")),
933            "Code block content with $$ should not be detected as math block"
934        );
935
936        // But the real math block content should be excluded
937        assert!(
938            !lines.iter().any(|l| l.content == "y = 2"),
939            "Actual math block content should be excluded"
940        );
941    }
942
943    #[test]
944    fn test_skip_quarto_divs() {
945        let content = r#"# Heading
946
947::: {.callout-note}
948This is a callout note.
949With multiple lines.
950:::
951
952Regular text outside.
953
954::: {.bordered}
955Content inside bordered div.
956:::
957
958More content."#;
959        let ctx = LintContext::new(content, MarkdownFlavor::Quarto, None);
960        let lines: Vec<_> = ctx.filtered_lines().skip_quarto_divs().into_iter().collect();
961
962        // Verify lines OUTSIDE Quarto divs are INCLUDED
963        assert!(
964            lines.iter().any(|l| l.content.contains("# Heading")),
965            "Should include markdown headings"
966        );
967        assert!(
968            lines.iter().any(|l| l.content.contains("Regular text outside")),
969            "Should include content between divs"
970        );
971        assert!(
972            lines.iter().any(|l| l.content.contains("More content")),
973            "Should include content after divs"
974        );
975
976        // Verify lines INSIDE Quarto divs are EXCLUDED
977        assert!(
978            !lines.iter().any(|l| l.content.contains("::: {.callout-note}")),
979            "Should exclude callout div markers"
980        );
981        assert!(
982            !lines.iter().any(|l| l.content.contains("This is a callout note")),
983            "Should exclude callout content"
984        );
985        assert!(
986            !lines.iter().any(|l| l.content.contains("Content inside bordered")),
987            "Should exclude bordered div content"
988        );
989    }
990
991    #[test]
992    fn test_skip_jsx_expressions() {
993        let content = r#"# MDX Document
994
995Here is some content with {myVariable} inline.
996
997{items.map(item => (
998  <Item key={item.id} />
999))}
1000
1001Regular paragraph after expression.
1002
1003{/* This should NOT be skipped by jsx_expressions filter */}
1004{/* MDX comments have their own filter */}
1005
1006More content."#;
1007        let ctx = LintContext::new(content, MarkdownFlavor::MDX, None);
1008        let lines: Vec<_> = ctx.filtered_lines().skip_jsx_expressions().into_iter().collect();
1009
1010        // Verify lines OUTSIDE JSX expressions are INCLUDED
1011        assert!(
1012            lines.iter().any(|l| l.content.contains("# MDX Document")),
1013            "Should include markdown headings"
1014        );
1015        assert!(
1016            lines.iter().any(|l| l.content.contains("Regular paragraph")),
1017            "Should include regular paragraphs"
1018        );
1019        assert!(
1020            lines.iter().any(|l| l.content.contains("More content")),
1021            "Should include content after expressions"
1022        );
1023
1024        // Verify lines with JSX expressions are EXCLUDED
1025        assert!(
1026            !lines.iter().any(|l| l.content.contains("{myVariable}")),
1027            "Should exclude lines with inline JSX expressions"
1028        );
1029        assert!(
1030            !lines.iter().any(|l| l.content.contains("items.map")),
1031            "Should exclude multi-line JSX expression content"
1032        );
1033        assert!(
1034            !lines.iter().any(|l| l.content.contains("<Item key")),
1035            "Should exclude JSX inside expressions"
1036        );
1037    }
1038
1039    #[test]
1040    fn test_skip_quarto_divs_nested() {
1041        let content = r#"# Title
1042
1043::: {.outer}
1044Outer content.
1045
1046::: {.inner}
1047Inner content.
1048:::
1049
1050Back to outer.
1051:::
1052
1053Outside text."#;
1054        let ctx = LintContext::new(content, MarkdownFlavor::Quarto, None);
1055        let lines: Vec<_> = ctx.filtered_lines().skip_quarto_divs().into_iter().collect();
1056
1057        // Should include content outside all divs
1058        assert!(
1059            lines.iter().any(|l| l.content.contains("# Title")),
1060            "Should include heading"
1061        );
1062        assert!(
1063            lines.iter().any(|l| l.content.contains("Outside text")),
1064            "Should include text after divs"
1065        );
1066
1067        // Should exclude all div content
1068        assert!(
1069            !lines.iter().any(|l| l.content.contains("Outer content")),
1070            "Should exclude outer div content"
1071        );
1072        assert!(
1073            !lines.iter().any(|l| l.content.contains("Inner content")),
1074            "Should exclude inner div content"
1075        );
1076    }
1077
1078    #[test]
1079    fn test_skip_quarto_divs_not_in_standard_flavor() {
1080        let content = r#"::: {.callout-note}
1081This should NOT be skipped in standard flavor.
1082:::"#;
1083        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
1084        let lines: Vec<_> = ctx.filtered_lines().skip_quarto_divs().into_iter().collect();
1085
1086        // In standard flavor, Quarto divs are not detected, so nothing is skipped
1087        assert!(
1088            lines.iter().any(|l| l.content.contains("This should NOT be skipped")),
1089            "Standard flavor should not detect Quarto divs"
1090        );
1091    }
1092
1093    #[test]
1094    fn test_skip_mdx_comments() {
1095        let content = r#"# MDX Document
1096
1097{/* This is an MDX comment */}
1098
1099Regular content here.
1100
1101{/*
1102  Multi-line
1103  MDX comment
1104*/}
1105
1106More content after comment."#;
1107        let ctx = LintContext::new(content, MarkdownFlavor::MDX, None);
1108        let lines: Vec<_> = ctx.filtered_lines().skip_mdx_comments().into_iter().collect();
1109
1110        // Verify lines OUTSIDE MDX comments are INCLUDED
1111        assert!(
1112            lines.iter().any(|l| l.content.contains("# MDX Document")),
1113            "Should include markdown headings"
1114        );
1115        assert!(
1116            lines.iter().any(|l| l.content.contains("Regular content")),
1117            "Should include regular content"
1118        );
1119        assert!(
1120            lines.iter().any(|l| l.content.contains("More content")),
1121            "Should include content after comments"
1122        );
1123
1124        // Verify lines with MDX comments are EXCLUDED
1125        assert!(
1126            !lines.iter().any(|l| l.content.contains("{/* This is")),
1127            "Should exclude single-line MDX comments"
1128        );
1129        assert!(
1130            !lines.iter().any(|l| l.content.contains("Multi-line")),
1131            "Should exclude multi-line MDX comment content"
1132        );
1133    }
1134
1135    #[test]
1136    fn test_jsx_expressions_with_nested_braces() {
1137        // Test that nested braces are handled correctly
1138        let content = r#"# Document
1139
1140{props.style || {color: "red", background: "blue"}}
1141
1142Regular content."#;
1143        let ctx = LintContext::new(content, MarkdownFlavor::MDX, None);
1144        let lines: Vec<_> = ctx.filtered_lines().skip_jsx_expressions().into_iter().collect();
1145
1146        // Verify nested braces don't break detection
1147        assert!(
1148            !lines.iter().any(|l| l.content.contains("props.style")),
1149            "Should exclude JSX expression with nested braces"
1150        );
1151        assert!(
1152            lines.iter().any(|l| l.content.contains("Regular content")),
1153            "Should include content after nested expression"
1154        );
1155    }
1156
1157    #[test]
1158    fn test_jsx_and_mdx_comments_combined() {
1159        // Test both filters together
1160        let content = r#"# Title
1161
1162{variable}
1163
1164{/* comment */}
1165
1166Content."#;
1167        let ctx = LintContext::new(content, MarkdownFlavor::MDX, None);
1168        let lines: Vec<_> = ctx
1169            .filtered_lines()
1170            .skip_jsx_expressions()
1171            .skip_mdx_comments()
1172            .into_iter()
1173            .collect();
1174
1175        assert!(
1176            lines.iter().any(|l| l.content.contains("# Title")),
1177            "Should include heading"
1178        );
1179        assert!(
1180            lines.iter().any(|l| l.content.contains("Content")),
1181            "Should include regular content"
1182        );
1183        assert!(
1184            !lines.iter().any(|l| l.content.contains("{variable}")),
1185            "Should exclude JSX expression"
1186        );
1187        assert!(
1188            !lines.iter().any(|l| l.content.contains("{/* comment */")),
1189            "Should exclude MDX comment"
1190        );
1191    }
1192
1193    #[test]
1194    fn test_jsx_expressions_not_detected_in_standard_flavor() {
1195        // JSX expressions should only be detected in MDX flavor
1196        let content = r#"# Document
1197
1198{this is not JSX in standard markdown}
1199
1200Content."#;
1201        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
1202        let lines: Vec<_> = ctx.filtered_lines().skip_jsx_expressions().into_iter().collect();
1203
1204        // In standard markdown, braces are just text - nothing should be filtered
1205        assert!(
1206            lines.iter().any(|l| l.content.contains("{this is not JSX")),
1207            "Should NOT exclude brace content in standard markdown"
1208        );
1209    }
1210}