Skip to main content

rumdl_lib/
filtered_lines.rs

1//! Filtered line iteration for markdown linting
2//!
3//! This module provides a zero-cost abstraction for iterating over markdown lines
4//! while automatically filtering out non-content regions like front matter, code blocks,
5//! and HTML blocks. This ensures rules only process actual markdown content.
6//!
7//! # Architecture
8//!
9//! The filtered iterator approach centralizes the logic of what content should be
10//! processed by rules, eliminating error-prone manual checks in each rule implementation.
11//!
12//! # Examples
13//!
14//! ```rust
15//! use rumdl_lib::lint_context::LintContext;
16//! use rumdl_lib::filtered_lines::FilteredLinesExt;
17//!
18//! let content = "---\nurl: http://example.com\n---\n\n# Title\n\nContent";
19//! let ctx = LintContext::new(content, rumdl_lib::config::MarkdownFlavor::Standard, None);
20//!
21//! // Simple: get all content lines (skips front matter by default)
22//! for line in ctx.content_lines() {
23//!     println!("Line {}: {}", line.line_num, line.content);
24//! }
25//!
26//! // Advanced: custom filter configuration
27//! for line in ctx.filtered_lines()
28//!     .skip_code_blocks()
29//!     .skip_front_matter()
30//!     .skip_html_blocks() {
31//!     println!("Line {}: {}", line.line_num, line.content);
32//! }
33//! ```
34
35use crate::lint_context::{LineInfo, LintContext};
36
37/// A single line from a filtered iteration, with guaranteed 1-indexed line numbers
38#[derive(Debug, Clone)]
39pub struct FilteredLine<'a> {
40    /// The 1-indexed line number in the original document
41    pub line_num: usize,
42    /// Reference to the line's metadata
43    pub line_info: &'a LineInfo,
44    /// The actual line content
45    pub content: &'a str,
46}
47
48/// Configuration for filtering lines during iteration
49///
50/// Use the builder pattern to configure which types of content should be skipped:
51///
52/// ```rust
53/// use rumdl_lib::filtered_lines::LineFilterConfig;
54///
55/// let config = LineFilterConfig::new()
56///     .skip_front_matter()
57///     .skip_code_blocks()
58///     .skip_html_blocks()
59///     .skip_html_comments()
60///     .skip_mkdocstrings()
61///     .skip_esm_blocks()
62///     .skip_quarto_divs();
63/// ```
64#[derive(Debug, Clone, Default)]
65pub struct LineFilterConfig {
66    /// Skip lines inside front matter (YAML/TOML/JSON metadata)
67    pub skip_front_matter: bool,
68    /// Skip lines inside fenced code blocks
69    pub skip_code_blocks: bool,
70    /// Skip lines inside HTML blocks
71    pub skip_html_blocks: bool,
72    /// Skip lines inside HTML comments
73    pub skip_html_comments: bool,
74    /// Skip lines inside mkdocstrings blocks
75    pub skip_mkdocstrings: bool,
76    /// Skip lines inside ESM (ECMAScript Module) blocks
77    pub skip_esm_blocks: bool,
78    /// Skip lines inside math blocks ($$ ... $$)
79    pub skip_math_blocks: bool,
80    /// Skip lines inside Quarto div blocks (::: ... :::)
81    pub skip_quarto_divs: bool,
82    /// Skip lines containing or inside JSX expressions (MDX: {expression})
83    pub skip_jsx_expressions: bool,
84    /// Skip lines inside MDX comments ({/* ... */})
85    pub skip_mdx_comments: bool,
86    /// Skip lines inside MkDocs admonitions (!!! or ???)
87    pub skip_admonitions: bool,
88    /// Skip lines inside MkDocs content tabs (=== "Tab")
89    pub skip_content_tabs: bool,
90    /// Skip lines inside definition lists (:  definition)
91    pub skip_definition_lists: bool,
92    /// Skip lines inside Obsidian comments (%%...%%)
93    pub skip_obsidian_comments: bool,
94}
95
96impl LineFilterConfig {
97    /// Create a new filter configuration with all filters disabled
98    #[must_use]
99    pub fn new() -> Self {
100        Self::default()
101    }
102
103    /// Skip lines that are part of front matter (YAML/TOML/JSON)
104    ///
105    /// Front matter is metadata at the start of a markdown file and should
106    /// not be processed by markdown linting rules.
107    #[must_use]
108    pub fn skip_front_matter(mut self) -> Self {
109        self.skip_front_matter = true;
110        self
111    }
112
113    /// Skip lines inside fenced code blocks
114    ///
115    /// Code blocks contain source code, not markdown, and most rules should
116    /// not process them.
117    #[must_use]
118    pub fn skip_code_blocks(mut self) -> Self {
119        self.skip_code_blocks = true;
120        self
121    }
122
123    /// Skip lines inside HTML blocks
124    ///
125    /// HTML blocks contain raw HTML and most markdown rules should not
126    /// process them.
127    #[must_use]
128    pub fn skip_html_blocks(mut self) -> Self {
129        self.skip_html_blocks = true;
130        self
131    }
132
133    /// Skip lines inside HTML comments
134    ///
135    /// HTML comments (<!-- ... -->) are metadata and should not be processed
136    /// by most markdown linting rules.
137    #[must_use]
138    pub fn skip_html_comments(mut self) -> Self {
139        self.skip_html_comments = true;
140        self
141    }
142
143    /// Skip lines inside mkdocstrings blocks
144    ///
145    /// Mkdocstrings blocks contain auto-generated documentation and most
146    /// markdown rules should not process them.
147    #[must_use]
148    pub fn skip_mkdocstrings(mut self) -> Self {
149        self.skip_mkdocstrings = true;
150        self
151    }
152
153    /// Skip lines inside ESM (ECMAScript Module) blocks
154    ///
155    /// ESM blocks contain JavaScript/TypeScript module code and most
156    /// markdown rules should not process them.
157    #[must_use]
158    pub fn skip_esm_blocks(mut self) -> Self {
159        self.skip_esm_blocks = true;
160        self
161    }
162
163    /// Skip lines inside math blocks ($$ ... $$)
164    ///
165    /// Math blocks contain LaTeX/mathematical notation and markdown rules
166    /// should not process them as regular markdown content.
167    #[must_use]
168    pub fn skip_math_blocks(mut self) -> Self {
169        self.skip_math_blocks = true;
170        self
171    }
172
173    /// Skip lines inside Quarto div blocks (::: ... :::)
174    ///
175    /// Quarto divs are fenced containers for callouts, panels, and other
176    /// structured content. Rules may need to skip them for accurate processing.
177    #[must_use]
178    pub fn skip_quarto_divs(mut self) -> Self {
179        self.skip_quarto_divs = true;
180        self
181    }
182
183    /// Skip lines containing or inside JSX expressions (MDX: {expression})
184    ///
185    /// JSX expressions contain JavaScript code and most markdown rules
186    /// should not process them as regular markdown content.
187    #[must_use]
188    pub fn skip_jsx_expressions(mut self) -> Self {
189        self.skip_jsx_expressions = true;
190        self
191    }
192
193    /// Skip lines inside MDX comments ({/* ... */})
194    ///
195    /// MDX comments are metadata and should not be processed by most
196    /// markdown linting rules.
197    #[must_use]
198    pub fn skip_mdx_comments(mut self) -> Self {
199        self.skip_mdx_comments = true;
200        self
201    }
202
203    /// Skip lines inside MkDocs admonitions (!!! or ???)
204    ///
205    /// Admonitions are callout blocks and may have special formatting
206    /// that rules should not process as regular content.
207    #[must_use]
208    pub fn skip_admonitions(mut self) -> Self {
209        self.skip_admonitions = true;
210        self
211    }
212
213    /// Skip lines inside MkDocs content tabs (=== "Tab")
214    ///
215    /// Content tabs contain tabbed content that may need special handling.
216    #[must_use]
217    pub fn skip_content_tabs(mut self) -> Self {
218        self.skip_content_tabs = true;
219        self
220    }
221
222    /// Skip lines inside any MkDocs container (admonitions or content tabs)
223    ///
224    /// This is a convenience method that enables both `skip_admonitions` and
225    /// `skip_content_tabs`. MkDocs containers use 4-space indented content
226    /// which may need special handling to preserve structure.
227    #[must_use]
228    pub fn skip_mkdocs_containers(mut self) -> Self {
229        self.skip_admonitions = true;
230        self.skip_content_tabs = true;
231        self
232    }
233
234    /// Skip lines inside definition lists (:  definition)
235    ///
236    /// Definition lists have special formatting that rules should
237    /// not process as regular content.
238    #[must_use]
239    pub fn skip_definition_lists(mut self) -> Self {
240        self.skip_definition_lists = true;
241        self
242    }
243
244    /// Skip lines inside Obsidian comments (%%...%%)
245    ///
246    /// Obsidian comments are content hidden from rendering and most
247    /// markdown rules should not process them.
248    #[must_use]
249    pub fn skip_obsidian_comments(mut self) -> Self {
250        self.skip_obsidian_comments = true;
251        self
252    }
253
254    /// Check if a line should be filtered out based on this configuration
255    fn should_filter(&self, line_info: &LineInfo) -> bool {
256        (self.skip_front_matter && line_info.in_front_matter)
257            || (self.skip_code_blocks && line_info.in_code_block)
258            || (self.skip_html_blocks && line_info.in_html_block)
259            || (self.skip_html_comments && line_info.in_html_comment)
260            || (self.skip_mkdocstrings && line_info.in_mkdocstrings)
261            || (self.skip_esm_blocks && line_info.in_esm_block)
262            || (self.skip_math_blocks && line_info.in_math_block)
263            || (self.skip_quarto_divs && line_info.in_quarto_div)
264            || (self.skip_jsx_expressions && line_info.in_jsx_expression)
265            || (self.skip_mdx_comments && line_info.in_mdx_comment)
266            || (self.skip_admonitions && line_info.in_admonition)
267            || (self.skip_content_tabs && line_info.in_content_tab)
268            || (self.skip_definition_lists && line_info.in_definition_list)
269            || (self.skip_obsidian_comments && line_info.in_obsidian_comment)
270    }
271}
272
273/// Iterator that yields filtered lines based on configuration
274pub struct FilteredLinesIter<'a> {
275    ctx: &'a LintContext<'a>,
276    config: LineFilterConfig,
277    current_index: usize,
278    content_lines: Vec<&'a str>,
279}
280
281impl<'a> FilteredLinesIter<'a> {
282    /// Create a new filtered lines iterator
283    fn new(ctx: &'a LintContext<'a>, config: LineFilterConfig) -> Self {
284        Self {
285            ctx,
286            config,
287            current_index: 0,
288            content_lines: ctx.content.lines().collect(),
289        }
290    }
291}
292
293impl<'a> Iterator for FilteredLinesIter<'a> {
294    type Item = FilteredLine<'a>;
295
296    fn next(&mut self) -> Option<Self::Item> {
297        let lines = &self.ctx.lines;
298
299        while self.current_index < lines.len() {
300            let idx = self.current_index;
301            self.current_index += 1;
302
303            // Check if this line should be filtered
304            if self.config.should_filter(&lines[idx]) {
305                continue;
306            }
307
308            // Get the actual line content from the document
309            let line_content = self.content_lines.get(idx).copied().unwrap_or("");
310
311            // Return the filtered line with 1-indexed line number
312            return Some(FilteredLine {
313                line_num: idx + 1, // Convert 0-indexed to 1-indexed
314                line_info: &lines[idx],
315                content: line_content,
316            });
317        }
318
319        None
320    }
321}
322
323/// Extension trait that adds filtered iteration methods to `LintContext`
324///
325/// This trait provides convenient methods for iterating over lines while
326/// automatically filtering out non-content regions.
327pub trait FilteredLinesExt {
328    /// Start building a filtered lines iterator
329    ///
330    /// Returns a `LineFilterConfig` builder that can be used to configure
331    /// which types of content should be filtered out.
332    ///
333    /// # Examples
334    ///
335    /// ```rust
336    /// use rumdl_lib::lint_context::LintContext;
337    /// use rumdl_lib::filtered_lines::FilteredLinesExt;
338    ///
339    /// let content = "# Title\n\n```rust\ncode\n```\n\nContent";
340    /// let ctx = LintContext::new(content, rumdl_lib::config::MarkdownFlavor::Standard, None);
341    ///
342    /// for line in ctx.filtered_lines().skip_code_blocks() {
343    ///     println!("Line {}: {}", line.line_num, line.content);
344    /// }
345    /// ```
346    fn filtered_lines(&self) -> FilteredLinesBuilder<'_>;
347
348    /// Get an iterator over content lines only
349    ///
350    /// This is a convenience method that returns an iterator with front matter
351    /// filtered out by default. This is the most common use case for rules that
352    /// should only process markdown content.
353    ///
354    /// Equivalent to: `ctx.filtered_lines().skip_front_matter()`
355    ///
356    /// # Examples
357    ///
358    /// ```rust
359    /// use rumdl_lib::lint_context::LintContext;
360    /// use rumdl_lib::filtered_lines::FilteredLinesExt;
361    ///
362    /// let content = "---\ntitle: Test\n---\n\n# Content";
363    /// let ctx = LintContext::new(content, rumdl_lib::config::MarkdownFlavor::Standard, None);
364    ///
365    /// for line in ctx.content_lines() {
366    ///     // Front matter is automatically skipped
367    ///     println!("Line {}: {}", line.line_num, line.content);
368    /// }
369    /// ```
370    fn content_lines(&self) -> FilteredLinesIter<'_>;
371}
372
373/// Builder type that allows chaining filter configuration and converting to an iterator
374pub struct FilteredLinesBuilder<'a> {
375    ctx: &'a LintContext<'a>,
376    config: LineFilterConfig,
377}
378
379impl<'a> FilteredLinesBuilder<'a> {
380    fn new(ctx: &'a LintContext<'a>) -> Self {
381        Self {
382            ctx,
383            config: LineFilterConfig::new(),
384        }
385    }
386
387    /// Skip lines that are part of front matter (YAML/TOML/JSON)
388    #[must_use]
389    pub fn skip_front_matter(mut self) -> Self {
390        self.config = self.config.skip_front_matter();
391        self
392    }
393
394    /// Skip lines inside fenced code blocks
395    #[must_use]
396    pub fn skip_code_blocks(mut self) -> Self {
397        self.config = self.config.skip_code_blocks();
398        self
399    }
400
401    /// Skip lines inside HTML blocks
402    #[must_use]
403    pub fn skip_html_blocks(mut self) -> Self {
404        self.config = self.config.skip_html_blocks();
405        self
406    }
407
408    /// Skip lines inside HTML comments
409    #[must_use]
410    pub fn skip_html_comments(mut self) -> Self {
411        self.config = self.config.skip_html_comments();
412        self
413    }
414
415    /// Skip lines inside mkdocstrings blocks
416    #[must_use]
417    pub fn skip_mkdocstrings(mut self) -> Self {
418        self.config = self.config.skip_mkdocstrings();
419        self
420    }
421
422    /// Skip lines inside ESM (ECMAScript Module) blocks
423    #[must_use]
424    pub fn skip_esm_blocks(mut self) -> Self {
425        self.config = self.config.skip_esm_blocks();
426        self
427    }
428
429    /// Skip lines inside math blocks ($$ ... $$)
430    #[must_use]
431    pub fn skip_math_blocks(mut self) -> Self {
432        self.config = self.config.skip_math_blocks();
433        self
434    }
435
436    /// Skip lines inside Quarto div blocks (::: ... :::)
437    #[must_use]
438    pub fn skip_quarto_divs(mut self) -> Self {
439        self.config = self.config.skip_quarto_divs();
440        self
441    }
442
443    /// Skip lines containing or inside JSX expressions (MDX: {expression})
444    #[must_use]
445    pub fn skip_jsx_expressions(mut self) -> Self {
446        self.config = self.config.skip_jsx_expressions();
447        self
448    }
449
450    /// Skip lines inside MDX comments ({/* ... */})
451    #[must_use]
452    pub fn skip_mdx_comments(mut self) -> Self {
453        self.config = self.config.skip_mdx_comments();
454        self
455    }
456
457    /// Skip lines inside MkDocs admonitions (!!! or ???)
458    #[must_use]
459    pub fn skip_admonitions(mut self) -> Self {
460        self.config = self.config.skip_admonitions();
461        self
462    }
463
464    /// Skip lines inside MkDocs content tabs (=== "Tab")
465    #[must_use]
466    pub fn skip_content_tabs(mut self) -> Self {
467        self.config = self.config.skip_content_tabs();
468        self
469    }
470
471    /// Skip lines inside any MkDocs container (admonitions or content tabs)
472    ///
473    /// This is a convenience method that enables both `skip_admonitions` and
474    /// `skip_content_tabs`. MkDocs containers use 4-space indented content
475    /// which may need special handling to preserve structure.
476    #[must_use]
477    pub fn skip_mkdocs_containers(mut self) -> Self {
478        self.config = self.config.skip_mkdocs_containers();
479        self
480    }
481
482    /// Skip lines inside definition lists (:  definition)
483    #[must_use]
484    pub fn skip_definition_lists(mut self) -> Self {
485        self.config = self.config.skip_definition_lists();
486        self
487    }
488
489    /// Skip lines inside Obsidian comments (%%...%%)
490    #[must_use]
491    pub fn skip_obsidian_comments(mut self) -> Self {
492        self.config = self.config.skip_obsidian_comments();
493        self
494    }
495}
496
497impl<'a> IntoIterator for FilteredLinesBuilder<'a> {
498    type Item = FilteredLine<'a>;
499    type IntoIter = FilteredLinesIter<'a>;
500
501    fn into_iter(self) -> Self::IntoIter {
502        FilteredLinesIter::new(self.ctx, self.config)
503    }
504}
505
506impl<'a> FilteredLinesExt for LintContext<'a> {
507    fn filtered_lines(&self) -> FilteredLinesBuilder<'_> {
508        FilteredLinesBuilder::new(self)
509    }
510
511    fn content_lines(&self) -> FilteredLinesIter<'_> {
512        FilteredLinesIter::new(self, LineFilterConfig::new().skip_front_matter())
513    }
514}
515
516#[cfg(test)]
517mod tests {
518    use super::*;
519    use crate::config::MarkdownFlavor;
520
521    #[test]
522    fn test_filtered_line_structure() {
523        let content = "# Title\n\nContent";
524        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
525
526        let line = ctx.content_lines().next().unwrap();
527        assert_eq!(line.line_num, 1);
528        assert_eq!(line.content, "# Title");
529        assert!(!line.line_info.in_front_matter);
530    }
531
532    #[test]
533    fn test_skip_front_matter_yaml() {
534        let content = "---\ntitle: Test\nurl: http://example.com\n---\n\n# Content\n\nMore content";
535        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
536
537        let lines: Vec<_> = ctx.content_lines().collect();
538        // After front matter (lines 1-4), we have: empty line, "# Content", empty line, "More content"
539        assert_eq!(lines.len(), 4);
540        assert_eq!(lines[0].line_num, 5); // First line after front matter
541        assert_eq!(lines[0].content, "");
542        assert_eq!(lines[1].line_num, 6);
543        assert_eq!(lines[1].content, "# Content");
544        assert_eq!(lines[2].line_num, 7);
545        assert_eq!(lines[2].content, "");
546        assert_eq!(lines[3].line_num, 8);
547        assert_eq!(lines[3].content, "More content");
548    }
549
550    #[test]
551    fn test_skip_front_matter_toml() {
552        let content = "+++\ntitle = \"Test\"\nurl = \"http://example.com\"\n+++\n\n# Content";
553        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
554
555        let lines: Vec<_> = ctx.content_lines().collect();
556        assert_eq!(lines.len(), 2); // Empty line + "# Content"
557        assert_eq!(lines[0].line_num, 5);
558        assert_eq!(lines[1].line_num, 6);
559        assert_eq!(lines[1].content, "# Content");
560    }
561
562    #[test]
563    fn test_skip_front_matter_json() {
564        let content = "{\n\"title\": \"Test\",\n\"url\": \"http://example.com\"\n}\n\n# Content";
565        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
566
567        let lines: Vec<_> = ctx.content_lines().collect();
568        assert_eq!(lines.len(), 2); // Empty line + "# Content"
569        assert_eq!(lines[0].line_num, 5);
570        assert_eq!(lines[1].line_num, 6);
571        assert_eq!(lines[1].content, "# Content");
572    }
573
574    #[test]
575    fn test_skip_code_blocks() {
576        let content = "# Title\n\n```rust\nlet x = 1;\nlet y = 2;\n```\n\nContent";
577        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
578
579        let lines: Vec<_> = ctx.filtered_lines().skip_code_blocks().into_iter().collect();
580
581        // Should have: "# Title", empty line, "```rust" fence, "```" fence, empty line, "Content"
582        // Wait, actually code blocks include the fences. Let me check the line_info
583        // Looking at the implementation, in_code_block is true for lines INSIDE code blocks
584        // The fences themselves are not marked as in_code_block
585        assert!(lines.iter().any(|l| l.content == "# Title"));
586        assert!(lines.iter().any(|l| l.content == "Content"));
587        // The actual code lines should be filtered out
588        assert!(!lines.iter().any(|l| l.content == "let x = 1;"));
589        assert!(!lines.iter().any(|l| l.content == "let y = 2;"));
590    }
591
592    #[test]
593    fn test_no_filters() {
594        let content = "---\ntitle: Test\n---\n\n# Content";
595        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
596
597        // With no filters, all lines should be included
598        let lines: Vec<_> = ctx.filtered_lines().into_iter().collect();
599        assert_eq!(lines.len(), ctx.lines.len());
600    }
601
602    #[test]
603    fn test_multiple_filters() {
604        let content = "---\ntitle: Test\n---\n\n# Title\n\n```rust\ncode\n```\n\nContent";
605        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
606
607        let lines: Vec<_> = ctx
608            .filtered_lines()
609            .skip_front_matter()
610            .skip_code_blocks()
611            .into_iter()
612            .collect();
613
614        // Should skip front matter (lines 1-3) and code block content (line 8)
615        assert!(lines.iter().any(|l| l.content == "# Title"));
616        assert!(lines.iter().any(|l| l.content == "Content"));
617        assert!(!lines.iter().any(|l| l.content == "title: Test"));
618        assert!(!lines.iter().any(|l| l.content == "code"));
619    }
620
621    #[test]
622    fn test_line_numbering_is_1_indexed() {
623        let content = "First\nSecond\nThird";
624        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
625
626        let lines: Vec<_> = ctx.content_lines().collect();
627        assert_eq!(lines[0].line_num, 1);
628        assert_eq!(lines[0].content, "First");
629        assert_eq!(lines[1].line_num, 2);
630        assert_eq!(lines[1].content, "Second");
631        assert_eq!(lines[2].line_num, 3);
632        assert_eq!(lines[2].content, "Third");
633    }
634
635    #[test]
636    fn test_content_lines_convenience_method() {
637        let content = "---\nfoo: bar\n---\n\nContent";
638        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
639
640        // content_lines() should automatically skip front matter
641        let lines: Vec<_> = ctx.content_lines().collect();
642        assert!(!lines.iter().any(|l| l.content.contains("foo")));
643        assert!(lines.iter().any(|l| l.content == "Content"));
644    }
645
646    #[test]
647    fn test_empty_document() {
648        let content = "";
649        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
650
651        let lines: Vec<_> = ctx.content_lines().collect();
652        assert_eq!(lines.len(), 0);
653    }
654
655    #[test]
656    fn test_only_front_matter() {
657        let content = "---\ntitle: Test\n---";
658        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
659
660        let lines: Vec<_> = ctx.content_lines().collect();
661        assert_eq!(
662            lines.len(),
663            0,
664            "Document with only front matter should have no content lines"
665        );
666    }
667
668    #[test]
669    fn test_builder_pattern_ergonomics() {
670        let content = "# Title\n\n```\ncode\n```\n\nContent";
671        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
672
673        // Test that builder pattern works smoothly
674        let _lines: Vec<_> = ctx
675            .filtered_lines()
676            .skip_front_matter()
677            .skip_code_blocks()
678            .skip_html_blocks()
679            .into_iter()
680            .collect();
681
682        // If this compiles and runs, the builder pattern is working
683    }
684
685    #[test]
686    fn test_filtered_line_access_to_line_info() {
687        let content = "# Title\n\nContent";
688        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
689
690        for line in ctx.content_lines() {
691            // Should be able to access line_info fields
692            assert!(!line.line_info.in_front_matter);
693            assert!(!line.line_info.in_code_block);
694        }
695    }
696
697    #[test]
698    fn test_skip_mkdocstrings() {
699        let content = r#"# API Documentation
700
701::: mymodule.MyClass
702    options:
703      show_root_heading: true
704      show_source: false
705
706Some regular content here.
707
708::: mymodule.function
709    options:
710      show_signature: true
711
712More content."#;
713        let ctx = LintContext::new(content, MarkdownFlavor::MkDocs, None);
714        let lines: Vec<_> = ctx.filtered_lines().skip_mkdocstrings().into_iter().collect();
715
716        // Verify lines OUTSIDE mkdocstrings blocks are INCLUDED
717        assert!(
718            lines.iter().any(|l| l.content.contains("# API Documentation")),
719            "Should include lines outside mkdocstrings blocks"
720        );
721        assert!(
722            lines.iter().any(|l| l.content.contains("Some regular content")),
723            "Should include content between mkdocstrings blocks"
724        );
725        assert!(
726            lines.iter().any(|l| l.content.contains("More content")),
727            "Should include content after mkdocstrings blocks"
728        );
729
730        // Verify lines INSIDE mkdocstrings blocks are EXCLUDED
731        assert!(
732            !lines.iter().any(|l| l.content.contains("::: mymodule")),
733            "Should exclude mkdocstrings marker lines"
734        );
735        assert!(
736            !lines.iter().any(|l| l.content.contains("show_root_heading")),
737            "Should exclude mkdocstrings option lines"
738        );
739        assert!(
740            !lines.iter().any(|l| l.content.contains("show_signature")),
741            "Should exclude all mkdocstrings option lines"
742        );
743
744        // Verify line numbers are preserved (1-indexed)
745        assert_eq!(lines[0].line_num, 1, "First line should be line 1");
746    }
747
748    #[test]
749    fn test_skip_esm_blocks() {
750        // MDX 2.0+ allows ESM imports/exports anywhere in the document
751        let content = r#"import {Chart} from './components.js'
752import {Table} from './table.js'
753export const year = 2023
754
755# Last year's snowfall
756
757Content about snowfall data.
758
759import {Footer} from './footer.js'
760
761More content."#;
762        let ctx = LintContext::new(content, MarkdownFlavor::MDX, None);
763        let lines: Vec<_> = ctx.filtered_lines().skip_esm_blocks().into_iter().collect();
764
765        // Verify lines OUTSIDE ESM blocks are INCLUDED
766        assert!(
767            lines.iter().any(|l| l.content.contains("# Last year's snowfall")),
768            "Should include markdown headings"
769        );
770        assert!(
771            lines.iter().any(|l| l.content.contains("Content about snowfall")),
772            "Should include markdown content"
773        );
774        assert!(
775            lines.iter().any(|l| l.content.contains("More content")),
776            "Should include content after ESM blocks"
777        );
778
779        // Verify ALL ESM blocks are EXCLUDED (MDX 2.0+ allows imports anywhere)
780        assert!(
781            !lines.iter().any(|l| l.content.contains("import {Chart}")),
782            "Should exclude import statements at top of file"
783        );
784        assert!(
785            !lines.iter().any(|l| l.content.contains("import {Table}")),
786            "Should exclude all import statements at top of file"
787        );
788        assert!(
789            !lines.iter().any(|l| l.content.contains("export const year")),
790            "Should exclude export statements at top of file"
791        );
792        // MDX 2.0+ allows imports anywhere - they should ALL be excluded
793        assert!(
794            !lines.iter().any(|l| l.content.contains("import {Footer}")),
795            "Should exclude import statements even after markdown content (MDX 2.0+ ESM anywhere)"
796        );
797
798        // Verify line numbers are preserved
799        let heading_line = lines
800            .iter()
801            .find(|l| l.content.contains("# Last year's snowfall"))
802            .unwrap();
803        assert_eq!(heading_line.line_num, 5, "Heading should be on line 5");
804    }
805
806    #[test]
807    fn test_all_filters_combined() {
808        let content = r#"---
809title: Test
810---
811
812# Title
813
814```
815code
816```
817
818<!-- HTML comment here -->
819
820::: mymodule.Class
821    options:
822      show_root_heading: true
823
824<div>
825HTML block
826</div>
827
828Content"#;
829        let ctx = LintContext::new(content, MarkdownFlavor::MkDocs, None);
830
831        let lines: Vec<_> = ctx
832            .filtered_lines()
833            .skip_front_matter()
834            .skip_code_blocks()
835            .skip_html_blocks()
836            .skip_html_comments()
837            .skip_mkdocstrings()
838            .into_iter()
839            .collect();
840
841        // Verify markdown content is INCLUDED
842        assert!(
843            lines.iter().any(|l| l.content == "# Title"),
844            "Should include markdown headings"
845        );
846        assert!(
847            lines.iter().any(|l| l.content == "Content"),
848            "Should include markdown content"
849        );
850
851        // Verify all filtered content is EXCLUDED
852        assert!(
853            !lines.iter().any(|l| l.content == "title: Test"),
854            "Should exclude front matter"
855        );
856        assert!(
857            !lines.iter().any(|l| l.content == "code"),
858            "Should exclude code block content"
859        );
860        assert!(
861            !lines.iter().any(|l| l.content.contains("HTML comment")),
862            "Should exclude HTML comments"
863        );
864        assert!(
865            !lines.iter().any(|l| l.content.contains("::: mymodule")),
866            "Should exclude mkdocstrings blocks"
867        );
868        assert!(
869            !lines.iter().any(|l| l.content.contains("show_root_heading")),
870            "Should exclude mkdocstrings options"
871        );
872        assert!(
873            !lines.iter().any(|l| l.content.contains("HTML block")),
874            "Should exclude HTML blocks"
875        );
876    }
877
878    #[test]
879    fn test_skip_math_blocks() {
880        let content = r#"# Heading
881
882Some regular text.
883
884$$
885A = \left[
886\begin{array}{c}
8871 \\
888-D
889\end{array}
890\right]
891$$
892
893More content after math."#;
894        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
895        let lines: Vec<_> = ctx.filtered_lines().skip_math_blocks().into_iter().collect();
896
897        // Verify lines OUTSIDE math blocks are INCLUDED
898        assert!(
899            lines.iter().any(|l| l.content.contains("# Heading")),
900            "Should include markdown headings"
901        );
902        assert!(
903            lines.iter().any(|l| l.content.contains("Some regular text")),
904            "Should include regular text before math block"
905        );
906        assert!(
907            lines.iter().any(|l| l.content.contains("More content after math")),
908            "Should include content after math block"
909        );
910
911        // Verify lines INSIDE math blocks are EXCLUDED
912        assert!(
913            !lines.iter().any(|l| l.content == "$$"),
914            "Should exclude math block delimiters"
915        );
916        assert!(
917            !lines.iter().any(|l| l.content.contains("\\left[")),
918            "Should exclude LaTeX content inside math block"
919        );
920        assert!(
921            !lines.iter().any(|l| l.content.contains("-D")),
922            "Should exclude content that looks like list items inside math block"
923        );
924        assert!(
925            !lines.iter().any(|l| l.content.contains("\\begin{array}")),
926            "Should exclude LaTeX array content"
927        );
928    }
929
930    #[test]
931    fn test_math_blocks_not_confused_with_code_blocks() {
932        let content = r#"# Title
933
934```python
935# This $$ is inside a code block
936x = 1
937```
938
939$$
940y = 2
941$$
942
943Regular text."#;
944        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
945
946        // Check that the $$ inside code block doesn't start a math block
947        let lines: Vec<_> = ctx.filtered_lines().skip_math_blocks().into_iter().collect();
948
949        // The $$ inside the code block should NOT trigger math block detection
950        // So when we skip math blocks, the code block content is still there (until we also skip code blocks)
951        assert!(
952            lines.iter().any(|l| l.content.contains("# This $$")),
953            "Code block content with $$ should not be detected as math block"
954        );
955
956        // But the real math block content should be excluded
957        assert!(
958            !lines.iter().any(|l| l.content == "y = 2"),
959            "Actual math block content should be excluded"
960        );
961    }
962
963    #[test]
964    fn test_skip_quarto_divs() {
965        let content = r#"# Heading
966
967::: {.callout-note}
968This is a callout note.
969With multiple lines.
970:::
971
972Regular text outside.
973
974::: {.bordered}
975Content inside bordered div.
976:::
977
978More content."#;
979        let ctx = LintContext::new(content, MarkdownFlavor::Quarto, None);
980        let lines: Vec<_> = ctx.filtered_lines().skip_quarto_divs().into_iter().collect();
981
982        // Verify lines OUTSIDE Quarto divs are INCLUDED
983        assert!(
984            lines.iter().any(|l| l.content.contains("# Heading")),
985            "Should include markdown headings"
986        );
987        assert!(
988            lines.iter().any(|l| l.content.contains("Regular text outside")),
989            "Should include content between divs"
990        );
991        assert!(
992            lines.iter().any(|l| l.content.contains("More content")),
993            "Should include content after divs"
994        );
995
996        // Verify lines INSIDE Quarto divs are EXCLUDED
997        assert!(
998            !lines.iter().any(|l| l.content.contains("::: {.callout-note}")),
999            "Should exclude callout div markers"
1000        );
1001        assert!(
1002            !lines.iter().any(|l| l.content.contains("This is a callout note")),
1003            "Should exclude callout content"
1004        );
1005        assert!(
1006            !lines.iter().any(|l| l.content.contains("Content inside bordered")),
1007            "Should exclude bordered div content"
1008        );
1009    }
1010
1011    #[test]
1012    fn test_skip_jsx_expressions() {
1013        let content = r#"# MDX Document
1014
1015Here is some content with {myVariable} inline.
1016
1017{items.map(item => (
1018  <Item key={item.id} />
1019))}
1020
1021Regular paragraph after expression.
1022
1023{/* This should NOT be skipped by jsx_expressions filter */}
1024{/* MDX comments have their own filter */}
1025
1026More content."#;
1027        let ctx = LintContext::new(content, MarkdownFlavor::MDX, None);
1028        let lines: Vec<_> = ctx.filtered_lines().skip_jsx_expressions().into_iter().collect();
1029
1030        // Verify lines OUTSIDE JSX expressions are INCLUDED
1031        assert!(
1032            lines.iter().any(|l| l.content.contains("# MDX Document")),
1033            "Should include markdown headings"
1034        );
1035        assert!(
1036            lines.iter().any(|l| l.content.contains("Regular paragraph")),
1037            "Should include regular paragraphs"
1038        );
1039        assert!(
1040            lines.iter().any(|l| l.content.contains("More content")),
1041            "Should include content after expressions"
1042        );
1043
1044        // Verify lines with JSX expressions are EXCLUDED
1045        assert!(
1046            !lines.iter().any(|l| l.content.contains("{myVariable}")),
1047            "Should exclude lines with inline JSX expressions"
1048        );
1049        assert!(
1050            !lines.iter().any(|l| l.content.contains("items.map")),
1051            "Should exclude multi-line JSX expression content"
1052        );
1053        assert!(
1054            !lines.iter().any(|l| l.content.contains("<Item key")),
1055            "Should exclude JSX inside expressions"
1056        );
1057    }
1058
1059    #[test]
1060    fn test_skip_quarto_divs_nested() {
1061        let content = r#"# Title
1062
1063::: {.outer}
1064Outer content.
1065
1066::: {.inner}
1067Inner content.
1068:::
1069
1070Back to outer.
1071:::
1072
1073Outside text."#;
1074        let ctx = LintContext::new(content, MarkdownFlavor::Quarto, None);
1075        let lines: Vec<_> = ctx.filtered_lines().skip_quarto_divs().into_iter().collect();
1076
1077        // Should include content outside all divs
1078        assert!(
1079            lines.iter().any(|l| l.content.contains("# Title")),
1080            "Should include heading"
1081        );
1082        assert!(
1083            lines.iter().any(|l| l.content.contains("Outside text")),
1084            "Should include text after divs"
1085        );
1086
1087        // Should exclude all div content
1088        assert!(
1089            !lines.iter().any(|l| l.content.contains("Outer content")),
1090            "Should exclude outer div content"
1091        );
1092        assert!(
1093            !lines.iter().any(|l| l.content.contains("Inner content")),
1094            "Should exclude inner div content"
1095        );
1096    }
1097
1098    #[test]
1099    fn test_skip_quarto_divs_not_in_standard_flavor() {
1100        let content = r#"::: {.callout-note}
1101This should NOT be skipped in standard flavor.
1102:::"#;
1103        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
1104        let lines: Vec<_> = ctx.filtered_lines().skip_quarto_divs().into_iter().collect();
1105
1106        // In standard flavor, Quarto divs are not detected, so nothing is skipped
1107        assert!(
1108            lines.iter().any(|l| l.content.contains("This should NOT be skipped")),
1109            "Standard flavor should not detect Quarto divs"
1110        );
1111    }
1112
1113    #[test]
1114    fn test_skip_mdx_comments() {
1115        let content = r#"# MDX Document
1116
1117{/* This is an MDX comment */}
1118
1119Regular content here.
1120
1121{/*
1122  Multi-line
1123  MDX comment
1124*/}
1125
1126More content after comment."#;
1127        let ctx = LintContext::new(content, MarkdownFlavor::MDX, None);
1128        let lines: Vec<_> = ctx.filtered_lines().skip_mdx_comments().into_iter().collect();
1129
1130        // Verify lines OUTSIDE MDX comments are INCLUDED
1131        assert!(
1132            lines.iter().any(|l| l.content.contains("# MDX Document")),
1133            "Should include markdown headings"
1134        );
1135        assert!(
1136            lines.iter().any(|l| l.content.contains("Regular content")),
1137            "Should include regular content"
1138        );
1139        assert!(
1140            lines.iter().any(|l| l.content.contains("More content")),
1141            "Should include content after comments"
1142        );
1143
1144        // Verify lines with MDX comments are EXCLUDED
1145        assert!(
1146            !lines.iter().any(|l| l.content.contains("{/* This is")),
1147            "Should exclude single-line MDX comments"
1148        );
1149        assert!(
1150            !lines.iter().any(|l| l.content.contains("Multi-line")),
1151            "Should exclude multi-line MDX comment content"
1152        );
1153    }
1154
1155    #[test]
1156    fn test_jsx_expressions_with_nested_braces() {
1157        // Test that nested braces are handled correctly
1158        let content = r#"# Document
1159
1160{props.style || {color: "red", background: "blue"}}
1161
1162Regular content."#;
1163        let ctx = LintContext::new(content, MarkdownFlavor::MDX, None);
1164        let lines: Vec<_> = ctx.filtered_lines().skip_jsx_expressions().into_iter().collect();
1165
1166        // Verify nested braces don't break detection
1167        assert!(
1168            !lines.iter().any(|l| l.content.contains("props.style")),
1169            "Should exclude JSX expression with nested braces"
1170        );
1171        assert!(
1172            lines.iter().any(|l| l.content.contains("Regular content")),
1173            "Should include content after nested expression"
1174        );
1175    }
1176
1177    #[test]
1178    fn test_jsx_and_mdx_comments_combined() {
1179        // Test both filters together
1180        let content = r#"# Title
1181
1182{variable}
1183
1184{/* comment */}
1185
1186Content."#;
1187        let ctx = LintContext::new(content, MarkdownFlavor::MDX, None);
1188        let lines: Vec<_> = ctx
1189            .filtered_lines()
1190            .skip_jsx_expressions()
1191            .skip_mdx_comments()
1192            .into_iter()
1193            .collect();
1194
1195        assert!(
1196            lines.iter().any(|l| l.content.contains("# Title")),
1197            "Should include heading"
1198        );
1199        assert!(
1200            lines.iter().any(|l| l.content.contains("Content")),
1201            "Should include regular content"
1202        );
1203        assert!(
1204            !lines.iter().any(|l| l.content.contains("{variable}")),
1205            "Should exclude JSX expression"
1206        );
1207        assert!(
1208            !lines.iter().any(|l| l.content.contains("{/* comment */")),
1209            "Should exclude MDX comment"
1210        );
1211    }
1212
1213    #[test]
1214    fn test_jsx_expressions_not_detected_in_standard_flavor() {
1215        // JSX expressions should only be detected in MDX flavor
1216        let content = r#"# Document
1217
1218{this is not JSX in standard markdown}
1219
1220Content."#;
1221        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
1222        let lines: Vec<_> = ctx.filtered_lines().skip_jsx_expressions().into_iter().collect();
1223
1224        // In standard markdown, braces are just text - nothing should be filtered
1225        assert!(
1226            lines.iter().any(|l| l.content.contains("{this is not JSX")),
1227            "Should NOT exclude brace content in standard markdown"
1228        );
1229    }
1230
1231    // ==================== Obsidian Comment Tests ====================
1232
1233    #[test]
1234    fn test_skip_obsidian_comments_simple_inline() {
1235        // Simple inline comment: text %%hidden%% text
1236        let content = r#"# Heading
1237
1238This is visible %%this is hidden%% and visible again.
1239
1240More content."#;
1241        let ctx = LintContext::new(content, MarkdownFlavor::Obsidian, None);
1242        let lines: Vec<_> = ctx.filtered_lines().skip_obsidian_comments().into_iter().collect();
1243
1244        // All lines should be included - inline comments don't hide entire lines
1245        assert!(
1246            lines.iter().any(|l| l.content.contains("# Heading")),
1247            "Should include heading"
1248        );
1249        assert!(
1250            lines.iter().any(|l| l.content.contains("This is visible")),
1251            "Should include line with inline comment"
1252        );
1253        assert!(
1254            lines.iter().any(|l| l.content.contains("More content")),
1255            "Should include content after comment"
1256        );
1257    }
1258
1259    #[test]
1260    fn test_skip_obsidian_comments_multiline_block() {
1261        // Multi-line comment block
1262        let content = r#"# Heading
1263
1264%%
1265This is a multi-line
1266comment block
1267%%
1268
1269Content after."#;
1270        let ctx = LintContext::new(content, MarkdownFlavor::Obsidian, None);
1271        let lines: Vec<_> = ctx.filtered_lines().skip_obsidian_comments().into_iter().collect();
1272
1273        // Should include content outside the comment block
1274        assert!(
1275            lines.iter().any(|l| l.content.contains("# Heading")),
1276            "Should include heading"
1277        );
1278        assert!(
1279            lines.iter().any(|l| l.content.contains("Content after")),
1280            "Should include content after comment block"
1281        );
1282
1283        // Lines inside the comment block should be excluded
1284        assert!(
1285            !lines.iter().any(|l| l.content.contains("This is a multi-line")),
1286            "Should exclude multi-line comment content"
1287        );
1288        assert!(
1289            !lines.iter().any(|l| l.content.contains("comment block")),
1290            "Should exclude multi-line comment content"
1291        );
1292    }
1293
1294    #[test]
1295    fn test_skip_obsidian_comments_in_code_block() {
1296        // %% inside code blocks should NOT be treated as comments
1297        let content = r#"# Heading
1298
1299```
1300%% This is NOT a comment
1301It's inside a code block
1302%%
1303```
1304
1305Content."#;
1306        let ctx = LintContext::new(content, MarkdownFlavor::Obsidian, None);
1307        let lines: Vec<_> = ctx
1308            .filtered_lines()
1309            .skip_obsidian_comments()
1310            .skip_code_blocks()
1311            .into_iter()
1312            .collect();
1313
1314        // The code block content should be excluded by skip_code_blocks, not by obsidian comments
1315        assert!(
1316            lines.iter().any(|l| l.content.contains("# Heading")),
1317            "Should include heading"
1318        );
1319        assert!(
1320            lines.iter().any(|l| l.content.contains("Content")),
1321            "Should include content after code block"
1322        );
1323    }
1324
1325    #[test]
1326    fn test_skip_obsidian_comments_in_html_comment() {
1327        // %% inside HTML comments should NOT be treated as Obsidian comments
1328        let content = r#"# Heading
1329
1330<!-- %% This is inside HTML comment %% -->
1331
1332Content."#;
1333        let ctx = LintContext::new(content, MarkdownFlavor::Obsidian, None);
1334        let lines: Vec<_> = ctx
1335            .filtered_lines()
1336            .skip_obsidian_comments()
1337            .skip_html_comments()
1338            .into_iter()
1339            .collect();
1340
1341        assert!(
1342            lines.iter().any(|l| l.content.contains("# Heading")),
1343            "Should include heading"
1344        );
1345        assert!(
1346            lines.iter().any(|l| l.content.contains("Content")),
1347            "Should include content"
1348        );
1349    }
1350
1351    #[test]
1352    fn test_skip_obsidian_comments_empty() {
1353        // Empty comment: %%%%
1354        let content = r#"# Heading
1355
1356%%%% empty comment
1357
1358Content."#;
1359        let ctx = LintContext::new(content, MarkdownFlavor::Obsidian, None);
1360        let lines: Vec<_> = ctx.filtered_lines().skip_obsidian_comments().into_iter().collect();
1361
1362        // Empty comments should be handled gracefully
1363        assert!(
1364            lines.iter().any(|l| l.content.contains("# Heading")),
1365            "Should include heading"
1366        );
1367    }
1368
1369    #[test]
1370    fn test_skip_obsidian_comments_unclosed() {
1371        // Unclosed comment extends to end of document
1372        let content = r#"# Heading
1373
1374%% starts but never ends
1375This should be hidden
1376Until end of document"#;
1377        let ctx = LintContext::new(content, MarkdownFlavor::Obsidian, None);
1378        let lines: Vec<_> = ctx.filtered_lines().skip_obsidian_comments().into_iter().collect();
1379
1380        // Should include content before the unclosed comment
1381        assert!(
1382            lines.iter().any(|l| l.content.contains("# Heading")),
1383            "Should include heading before unclosed comment"
1384        );
1385
1386        // Content after the %% should be excluded
1387        assert!(
1388            !lines.iter().any(|l| l.content.contains("This should be hidden")),
1389            "Should exclude content in unclosed comment"
1390        );
1391        assert!(
1392            !lines.iter().any(|l| l.content.contains("Until end of document")),
1393            "Should exclude content until end of document"
1394        );
1395    }
1396
1397    #[test]
1398    fn test_skip_obsidian_comments_multiple_on_same_line() {
1399        // Multiple comments on same line
1400        let content = r#"# Heading
1401
1402First %%hidden1%% middle %%hidden2%% last
1403
1404Content."#;
1405        let ctx = LintContext::new(content, MarkdownFlavor::Obsidian, None);
1406        let lines: Vec<_> = ctx.filtered_lines().skip_obsidian_comments().into_iter().collect();
1407
1408        // Line should still be included (inline comments)
1409        assert!(
1410            lines.iter().any(|l| l.content.contains("First")),
1411            "Should include line with multiple inline comments"
1412        );
1413        assert!(
1414            lines.iter().any(|l| l.content.contains("middle")),
1415            "Should include visible text between comments"
1416        );
1417    }
1418
1419    #[test]
1420    fn test_skip_obsidian_comments_at_start_of_line() {
1421        // Comment at start of line
1422        let content = r#"# Heading
1423
1424%%comment at start%%
1425
1426Content."#;
1427        let ctx = LintContext::new(content, MarkdownFlavor::Obsidian, None);
1428        let lines: Vec<_> = ctx.filtered_lines().skip_obsidian_comments().into_iter().collect();
1429
1430        assert!(
1431            lines.iter().any(|l| l.content.contains("# Heading")),
1432            "Should include heading"
1433        );
1434        assert!(
1435            lines.iter().any(|l| l.content.contains("Content")),
1436            "Should include content"
1437        );
1438    }
1439
1440    #[test]
1441    fn test_skip_obsidian_comments_at_end_of_line() {
1442        // Comment at end of line
1443        let content = r#"# Heading
1444
1445Some text %%comment at end%%
1446
1447Content."#;
1448        let ctx = LintContext::new(content, MarkdownFlavor::Obsidian, None);
1449        let lines: Vec<_> = ctx.filtered_lines().skip_obsidian_comments().into_iter().collect();
1450
1451        assert!(
1452            lines.iter().any(|l| l.content.contains("Some text")),
1453            "Should include text before comment"
1454        );
1455    }
1456
1457    #[test]
1458    fn test_skip_obsidian_comments_with_markdown_inside() {
1459        // Comments containing special markdown
1460        let content = r#"# Heading
1461
1462%%
1463# hidden heading
1464[hidden link](url)
1465**hidden bold**
1466%%
1467
1468Content."#;
1469        let ctx = LintContext::new(content, MarkdownFlavor::Obsidian, None);
1470        let lines: Vec<_> = ctx.filtered_lines().skip_obsidian_comments().into_iter().collect();
1471
1472        assert!(
1473            !lines.iter().any(|l| l.content.contains("# hidden heading")),
1474            "Should exclude heading inside comment"
1475        );
1476        assert!(
1477            !lines.iter().any(|l| l.content.contains("[hidden link]")),
1478            "Should exclude link inside comment"
1479        );
1480        assert!(
1481            !lines.iter().any(|l| l.content.contains("**hidden bold**")),
1482            "Should exclude bold inside comment"
1483        );
1484    }
1485
1486    #[test]
1487    fn test_skip_obsidian_comments_with_unicode() {
1488        // Unicode content inside comments
1489        let content = r#"# Heading
1490
1491%%日本語コメント%%
1492
1493%%Комментарий%%
1494
1495Content."#;
1496        let ctx = LintContext::new(content, MarkdownFlavor::Obsidian, None);
1497        let lines: Vec<_> = ctx.filtered_lines().skip_obsidian_comments().into_iter().collect();
1498
1499        // Lines with only comments should be handled properly
1500        assert!(
1501            lines.iter().any(|l| l.content.contains("# Heading")),
1502            "Should include heading"
1503        );
1504        assert!(
1505            lines.iter().any(|l| l.content.contains("Content")),
1506            "Should include content"
1507        );
1508    }
1509
1510    #[test]
1511    fn test_skip_obsidian_comments_triple_percent() {
1512        // Odd number of percent signs: %%%
1513        let content = r#"# Heading
1514
1515%%% odd percent
1516
1517Content."#;
1518        let ctx = LintContext::new(content, MarkdownFlavor::Obsidian, None);
1519        let lines: Vec<_> = ctx.filtered_lines().skip_obsidian_comments().into_iter().collect();
1520
1521        // Should handle gracefully - the %%% starts a comment, single % is content
1522        assert!(
1523            lines.iter().any(|l| l.content.contains("# Heading")),
1524            "Should include heading"
1525        );
1526    }
1527
1528    #[test]
1529    fn test_skip_obsidian_comments_not_in_standard_flavor() {
1530        // Obsidian comments should NOT be detected in Standard flavor
1531        let content = r#"# Heading
1532
1533%%this is not hidden in standard%%
1534
1535Content."#;
1536        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
1537        let lines: Vec<_> = ctx.filtered_lines().skip_obsidian_comments().into_iter().collect();
1538
1539        // In Standard flavor, %% is just text - nothing should be filtered
1540        assert!(
1541            lines.iter().any(|l| l.content.contains("%%this is not hidden")),
1542            "Should NOT hide %% content in Standard flavor"
1543        );
1544    }
1545
1546    #[test]
1547    fn test_skip_obsidian_comments_integration_with_other_filters() {
1548        // Test combining with frontmatter and code block filters
1549        let content = r#"---
1550title: Test
1551---
1552
1553# Heading
1554
1555```
1556code
1557```
1558
1559%%hidden comment%%
1560
1561Content."#;
1562        let ctx = LintContext::new(content, MarkdownFlavor::Obsidian, None);
1563        let lines: Vec<_> = ctx
1564            .filtered_lines()
1565            .skip_front_matter()
1566            .skip_code_blocks()
1567            .skip_obsidian_comments()
1568            .into_iter()
1569            .collect();
1570
1571        // Should skip frontmatter, code blocks, and Obsidian comments
1572        assert!(
1573            !lines.iter().any(|l| l.content.contains("title: Test")),
1574            "Should skip frontmatter"
1575        );
1576        assert!(
1577            !lines.iter().any(|l| l.content == "code"),
1578            "Should skip code block content"
1579        );
1580        assert!(
1581            lines.iter().any(|l| l.content.contains("# Heading")),
1582            "Should include heading"
1583        );
1584        assert!(
1585            lines.iter().any(|l| l.content.contains("Content")),
1586            "Should include content"
1587        );
1588    }
1589
1590    #[test]
1591    fn test_skip_obsidian_comments_whole_line_only() {
1592        // Multi-line comment should only mark lines entirely within the comment
1593        let content = "start %%\nfully hidden\n%% end";
1594        let ctx = LintContext::new(content, MarkdownFlavor::Obsidian, None);
1595        let lines: Vec<_> = ctx.filtered_lines().skip_obsidian_comments().into_iter().collect();
1596
1597        // First line starts before comment, should be included
1598        assert!(
1599            lines.iter().any(|l| l.content.contains("start")),
1600            "First line should be included (starts outside comment)"
1601        );
1602        // Middle line is entirely within comment, should be excluded
1603        assert!(
1604            !lines.iter().any(|l| l.content == "fully hidden"),
1605            "Middle line should be excluded (entirely within comment)"
1606        );
1607        // Last line ends after comment, should be included
1608        assert!(
1609            lines.iter().any(|l| l.content.contains("end")),
1610            "Last line should be included (ends outside comment)"
1611        );
1612    }
1613
1614    #[test]
1615    fn test_skip_obsidian_comments_in_inline_code() {
1616        // %% inside inline code spans should NOT be treated as comments
1617        let content = r#"# Heading
1618
1619The syntax is `%%comment%%` in Obsidian.
1620
1621Content."#;
1622        let ctx = LintContext::new(content, MarkdownFlavor::Obsidian, None);
1623        let lines: Vec<_> = ctx.filtered_lines().skip_obsidian_comments().into_iter().collect();
1624
1625        // The line with code span should be included
1626        assert!(
1627            lines.iter().any(|l| l.content.contains("The syntax is")),
1628            "Should include line with %% in code span"
1629        );
1630        assert!(
1631            lines.iter().any(|l| l.content.contains("in Obsidian")),
1632            "Should include text after code span"
1633        );
1634    }
1635
1636    #[test]
1637    fn test_skip_obsidian_comments_in_inline_code_multi_backtick() {
1638        // %% inside inline code spans with multiple backticks should NOT be treated as comments
1639        let content = r#"# Heading
1640
1641The syntax is ``%%comment%%`` in Obsidian.
1642
1643Content."#;
1644        let ctx = LintContext::new(content, MarkdownFlavor::Obsidian, None);
1645        let lines: Vec<_> = ctx.filtered_lines().skip_obsidian_comments().into_iter().collect();
1646
1647        assert!(
1648            lines.iter().any(|l| l.content.contains("The syntax is")),
1649            "Should include line with %% in multi-backtick code span"
1650        );
1651        assert!(
1652            lines.iter().any(|l| l.content.contains("Content")),
1653            "Should include content after code span"
1654        );
1655    }
1656
1657    #[test]
1658    fn test_skip_obsidian_comments_consecutive_blocks() {
1659        // Multiple consecutive comment blocks
1660        let content = r#"# Heading
1661
1662%%comment 1%%
1663
1664%%comment 2%%
1665
1666Content."#;
1667        let ctx = LintContext::new(content, MarkdownFlavor::Obsidian, None);
1668        let lines: Vec<_> = ctx.filtered_lines().skip_obsidian_comments().into_iter().collect();
1669
1670        assert!(
1671            lines.iter().any(|l| l.content.contains("# Heading")),
1672            "Should include heading"
1673        );
1674        assert!(
1675            lines.iter().any(|l| l.content.contains("Content")),
1676            "Should include content after comments"
1677        );
1678    }
1679
1680    #[test]
1681    fn test_skip_obsidian_comments_spanning_many_lines() {
1682        // Comment block spanning many lines
1683        let content = r#"# Title
1684
1685%%
1686Line 1 of comment
1687Line 2 of comment
1688Line 3 of comment
1689Line 4 of comment
1690Line 5 of comment
1691%%
1692
1693After comment."#;
1694        let ctx = LintContext::new(content, MarkdownFlavor::Obsidian, None);
1695        let lines: Vec<_> = ctx.filtered_lines().skip_obsidian_comments().into_iter().collect();
1696
1697        // All lines inside the comment should be excluded
1698        for i in 1..=5 {
1699            assert!(
1700                !lines
1701                    .iter()
1702                    .any(|l| l.content.contains(&format!("Line {i} of comment"))),
1703                "Should exclude line {i} of comment"
1704            );
1705        }
1706
1707        assert!(
1708            lines.iter().any(|l| l.content.contains("# Title")),
1709            "Should include title"
1710        );
1711        assert!(
1712            lines.iter().any(|l| l.content.contains("After comment")),
1713            "Should include content after comment"
1714        );
1715    }
1716
1717    #[test]
1718    fn test_obsidian_comment_line_info_field() {
1719        // Verify the in_obsidian_comment field is set correctly
1720        let content = "visible\n%%\nhidden\n%%\nvisible";
1721        let ctx = LintContext::new(content, MarkdownFlavor::Obsidian, None);
1722
1723        // Line 0: visible - should NOT be in comment
1724        assert!(
1725            !ctx.lines[0].in_obsidian_comment,
1726            "Line 0 should not be marked as in_obsidian_comment"
1727        );
1728
1729        // Line 2: hidden - should be in comment
1730        assert!(
1731            ctx.lines[2].in_obsidian_comment,
1732            "Line 2 (hidden) should be marked as in_obsidian_comment"
1733        );
1734
1735        // Line 4: visible - should NOT be in comment
1736        assert!(
1737            !ctx.lines[4].in_obsidian_comment,
1738            "Line 4 should not be marked as in_obsidian_comment"
1739        );
1740    }
1741}