rumdl_lib/
filtered_lines.rs

1//! Filtered line iteration for markdown linting
2//!
3//! This module provides a zero-cost abstraction for iterating over markdown lines
4//! while automatically filtering out non-content regions like front matter, code blocks,
5//! and HTML blocks. This ensures rules only process actual markdown content.
6//!
7//! # Architecture
8//!
9//! The filtered iterator approach centralizes the logic of what content should be
10//! processed by rules, eliminating error-prone manual checks in each rule implementation.
11//!
12//! # Examples
13//!
14//! ```rust
15//! use rumdl_lib::lint_context::LintContext;
16//! use rumdl_lib::filtered_lines::FilteredLinesExt;
17//!
18//! let content = "---\nurl: http://example.com\n---\n\n# Title\n\nContent";
19//! let ctx = LintContext::new(content, rumdl_lib::config::MarkdownFlavor::Standard, None);
20//!
21//! // Simple: get all content lines (skips front matter by default)
22//! for line in ctx.content_lines() {
23//!     println!("Line {}: {}", line.line_num, line.content);
24//! }
25//!
26//! // Advanced: custom filter configuration
27//! for line in ctx.filtered_lines()
28//!     .skip_code_blocks()
29//!     .skip_front_matter()
30//!     .skip_html_blocks() {
31//!     println!("Line {}: {}", line.line_num, line.content);
32//! }
33//! ```
34
35use crate::lint_context::{LineInfo, LintContext};
36
37/// A single line from a filtered iteration, with guaranteed 1-indexed line numbers
38#[derive(Debug, Clone)]
39pub struct FilteredLine<'a> {
40    /// The 1-indexed line number in the original document
41    pub line_num: usize,
42    /// Reference to the line's metadata
43    pub line_info: &'a LineInfo,
44    /// The actual line content
45    pub content: &'a str,
46}
47
48/// Configuration for filtering lines during iteration
49///
50/// Use the builder pattern to configure which types of content should be skipped:
51///
52/// ```rust
53/// use rumdl_lib::filtered_lines::LineFilterConfig;
54///
55/// let config = LineFilterConfig::new()
56///     .skip_front_matter()
57///     .skip_code_blocks()
58///     .skip_html_blocks()
59///     .skip_html_comments()
60///     .skip_mkdocstrings()
61///     .skip_esm_blocks();
62/// ```
63#[derive(Debug, Clone, Default)]
64pub struct LineFilterConfig {
65    /// Skip lines inside front matter (YAML/TOML/JSON metadata)
66    pub skip_front_matter: bool,
67    /// Skip lines inside fenced code blocks
68    pub skip_code_blocks: bool,
69    /// Skip lines inside HTML blocks
70    pub skip_html_blocks: bool,
71    /// Skip lines inside HTML comments
72    pub skip_html_comments: bool,
73    /// Skip lines inside mkdocstrings blocks
74    pub skip_mkdocstrings: bool,
75    /// Skip lines inside ESM (ECMAScript Module) blocks
76    pub skip_esm_blocks: bool,
77}
78
79impl LineFilterConfig {
80    /// Create a new filter configuration with all filters disabled
81    #[must_use]
82    pub fn new() -> Self {
83        Self::default()
84    }
85
86    /// Skip lines that are part of front matter (YAML/TOML/JSON)
87    ///
88    /// Front matter is metadata at the start of a markdown file and should
89    /// not be processed by markdown linting rules.
90    #[must_use]
91    pub fn skip_front_matter(mut self) -> Self {
92        self.skip_front_matter = true;
93        self
94    }
95
96    /// Skip lines inside fenced code blocks
97    ///
98    /// Code blocks contain source code, not markdown, and most rules should
99    /// not process them.
100    #[must_use]
101    pub fn skip_code_blocks(mut self) -> Self {
102        self.skip_code_blocks = true;
103        self
104    }
105
106    /// Skip lines inside HTML blocks
107    ///
108    /// HTML blocks contain raw HTML and most markdown rules should not
109    /// process them.
110    #[must_use]
111    pub fn skip_html_blocks(mut self) -> Self {
112        self.skip_html_blocks = true;
113        self
114    }
115
116    /// Skip lines inside HTML comments
117    ///
118    /// HTML comments (<!-- ... -->) are metadata and should not be processed
119    /// by most markdown linting rules.
120    #[must_use]
121    pub fn skip_html_comments(mut self) -> Self {
122        self.skip_html_comments = true;
123        self
124    }
125
126    /// Skip lines inside mkdocstrings blocks
127    ///
128    /// Mkdocstrings blocks contain auto-generated documentation and most
129    /// markdown rules should not process them.
130    #[must_use]
131    pub fn skip_mkdocstrings(mut self) -> Self {
132        self.skip_mkdocstrings = true;
133        self
134    }
135
136    /// Skip lines inside ESM (ECMAScript Module) blocks
137    ///
138    /// ESM blocks contain JavaScript/TypeScript module code and most
139    /// markdown rules should not process them.
140    #[must_use]
141    pub fn skip_esm_blocks(mut self) -> Self {
142        self.skip_esm_blocks = true;
143        self
144    }
145
146    /// Check if a line should be filtered out based on this configuration
147    fn should_filter(&self, line_info: &LineInfo) -> bool {
148        (self.skip_front_matter && line_info.in_front_matter)
149            || (self.skip_code_blocks && line_info.in_code_block)
150            || (self.skip_html_blocks && line_info.in_html_block)
151            || (self.skip_html_comments && line_info.in_html_comment)
152            || (self.skip_mkdocstrings && line_info.in_mkdocstrings)
153            || (self.skip_esm_blocks && line_info.in_esm_block)
154    }
155}
156
157/// Iterator that yields filtered lines based on configuration
158pub struct FilteredLinesIter<'a> {
159    ctx: &'a LintContext<'a>,
160    config: LineFilterConfig,
161    current_index: usize,
162    content_lines: Vec<&'a str>,
163}
164
165impl<'a> FilteredLinesIter<'a> {
166    /// Create a new filtered lines iterator
167    fn new(ctx: &'a LintContext<'a>, config: LineFilterConfig) -> Self {
168        Self {
169            ctx,
170            config,
171            current_index: 0,
172            content_lines: ctx.content.lines().collect(),
173        }
174    }
175}
176
177impl<'a> Iterator for FilteredLinesIter<'a> {
178    type Item = FilteredLine<'a>;
179
180    fn next(&mut self) -> Option<Self::Item> {
181        let lines = &self.ctx.lines;
182
183        while self.current_index < lines.len() {
184            let idx = self.current_index;
185            self.current_index += 1;
186
187            // Check if this line should be filtered
188            if self.config.should_filter(&lines[idx]) {
189                continue;
190            }
191
192            // Get the actual line content from the document
193            let line_content = self.content_lines.get(idx).copied().unwrap_or("");
194
195            // Return the filtered line with 1-indexed line number
196            return Some(FilteredLine {
197                line_num: idx + 1, // Convert 0-indexed to 1-indexed
198                line_info: &lines[idx],
199                content: line_content,
200            });
201        }
202
203        None
204    }
205}
206
207/// Extension trait that adds filtered iteration methods to `LintContext`
208///
209/// This trait provides convenient methods for iterating over lines while
210/// automatically filtering out non-content regions.
211pub trait FilteredLinesExt {
212    /// Start building a filtered lines iterator
213    ///
214    /// Returns a `LineFilterConfig` builder that can be used to configure
215    /// which types of content should be filtered out.
216    ///
217    /// # Examples
218    ///
219    /// ```rust
220    /// use rumdl_lib::lint_context::LintContext;
221    /// use rumdl_lib::filtered_lines::FilteredLinesExt;
222    ///
223    /// let content = "# Title\n\n```rust\ncode\n```\n\nContent";
224    /// let ctx = LintContext::new(content, rumdl_lib::config::MarkdownFlavor::Standard, None);
225    ///
226    /// for line in ctx.filtered_lines().skip_code_blocks() {
227    ///     println!("Line {}: {}", line.line_num, line.content);
228    /// }
229    /// ```
230    fn filtered_lines(&self) -> FilteredLinesBuilder<'_>;
231
232    /// Get an iterator over content lines only
233    ///
234    /// This is a convenience method that returns an iterator with front matter
235    /// filtered out by default. This is the most common use case for rules that
236    /// should only process markdown content.
237    ///
238    /// Equivalent to: `ctx.filtered_lines().skip_front_matter()`
239    ///
240    /// # Examples
241    ///
242    /// ```rust
243    /// use rumdl_lib::lint_context::LintContext;
244    /// use rumdl_lib::filtered_lines::FilteredLinesExt;
245    ///
246    /// let content = "---\ntitle: Test\n---\n\n# Content";
247    /// let ctx = LintContext::new(content, rumdl_lib::config::MarkdownFlavor::Standard, None);
248    ///
249    /// for line in ctx.content_lines() {
250    ///     // Front matter is automatically skipped
251    ///     println!("Line {}: {}", line.line_num, line.content);
252    /// }
253    /// ```
254    fn content_lines(&self) -> FilteredLinesIter<'_>;
255}
256
257/// Builder type that allows chaining filter configuration and converting to an iterator
258pub struct FilteredLinesBuilder<'a> {
259    ctx: &'a LintContext<'a>,
260    config: LineFilterConfig,
261}
262
263impl<'a> FilteredLinesBuilder<'a> {
264    fn new(ctx: &'a LintContext<'a>) -> Self {
265        Self {
266            ctx,
267            config: LineFilterConfig::new(),
268        }
269    }
270
271    /// Skip lines that are part of front matter (YAML/TOML/JSON)
272    #[must_use]
273    pub fn skip_front_matter(mut self) -> Self {
274        self.config = self.config.skip_front_matter();
275        self
276    }
277
278    /// Skip lines inside fenced code blocks
279    #[must_use]
280    pub fn skip_code_blocks(mut self) -> Self {
281        self.config = self.config.skip_code_blocks();
282        self
283    }
284
285    /// Skip lines inside HTML blocks
286    #[must_use]
287    pub fn skip_html_blocks(mut self) -> Self {
288        self.config = self.config.skip_html_blocks();
289        self
290    }
291
292    /// Skip lines inside HTML comments
293    #[must_use]
294    pub fn skip_html_comments(mut self) -> Self {
295        self.config = self.config.skip_html_comments();
296        self
297    }
298
299    /// Skip lines inside mkdocstrings blocks
300    #[must_use]
301    pub fn skip_mkdocstrings(mut self) -> Self {
302        self.config = self.config.skip_mkdocstrings();
303        self
304    }
305
306    /// Skip lines inside ESM (ECMAScript Module) blocks
307    #[must_use]
308    pub fn skip_esm_blocks(mut self) -> Self {
309        self.config = self.config.skip_esm_blocks();
310        self
311    }
312}
313
314impl<'a> IntoIterator for FilteredLinesBuilder<'a> {
315    type Item = FilteredLine<'a>;
316    type IntoIter = FilteredLinesIter<'a>;
317
318    fn into_iter(self) -> Self::IntoIter {
319        FilteredLinesIter::new(self.ctx, self.config)
320    }
321}
322
323impl<'a> FilteredLinesExt for LintContext<'a> {
324    fn filtered_lines(&self) -> FilteredLinesBuilder<'_> {
325        FilteredLinesBuilder::new(self)
326    }
327
328    fn content_lines(&self) -> FilteredLinesIter<'_> {
329        FilteredLinesIter::new(self, LineFilterConfig::new().skip_front_matter())
330    }
331}
332
333#[cfg(test)]
334mod tests {
335    use super::*;
336    use crate::config::MarkdownFlavor;
337
338    #[test]
339    fn test_filtered_line_structure() {
340        let content = "# Title\n\nContent";
341        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
342
343        let line = ctx.content_lines().next().unwrap();
344        assert_eq!(line.line_num, 1);
345        assert_eq!(line.content, "# Title");
346        assert!(!line.line_info.in_front_matter);
347    }
348
349    #[test]
350    fn test_skip_front_matter_yaml() {
351        let content = "---\ntitle: Test\nurl: http://example.com\n---\n\n# Content\n\nMore content";
352        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
353
354        let lines: Vec<_> = ctx.content_lines().collect();
355        // After front matter (lines 1-4), we have: empty line, "# Content", empty line, "More content"
356        assert_eq!(lines.len(), 4);
357        assert_eq!(lines[0].line_num, 5); // First line after front matter
358        assert_eq!(lines[0].content, "");
359        assert_eq!(lines[1].line_num, 6);
360        assert_eq!(lines[1].content, "# Content");
361        assert_eq!(lines[2].line_num, 7);
362        assert_eq!(lines[2].content, "");
363        assert_eq!(lines[3].line_num, 8);
364        assert_eq!(lines[3].content, "More content");
365    }
366
367    #[test]
368    fn test_skip_front_matter_toml() {
369        let content = "+++\ntitle = \"Test\"\nurl = \"http://example.com\"\n+++\n\n# Content";
370        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
371
372        let lines: Vec<_> = ctx.content_lines().collect();
373        assert_eq!(lines.len(), 2); // Empty line + "# Content"
374        assert_eq!(lines[0].line_num, 5);
375        assert_eq!(lines[1].line_num, 6);
376        assert_eq!(lines[1].content, "# Content");
377    }
378
379    #[test]
380    fn test_skip_front_matter_json() {
381        let content = "{\n\"title\": \"Test\",\n\"url\": \"http://example.com\"\n}\n\n# Content";
382        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
383
384        let lines: Vec<_> = ctx.content_lines().collect();
385        assert_eq!(lines.len(), 2); // Empty line + "# Content"
386        assert_eq!(lines[0].line_num, 5);
387        assert_eq!(lines[1].line_num, 6);
388        assert_eq!(lines[1].content, "# Content");
389    }
390
391    #[test]
392    fn test_skip_code_blocks() {
393        let content = "# Title\n\n```rust\nlet x = 1;\nlet y = 2;\n```\n\nContent";
394        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
395
396        let lines: Vec<_> = ctx.filtered_lines().skip_code_blocks().into_iter().collect();
397
398        // Should have: "# Title", empty line, "```rust" fence, "```" fence, empty line, "Content"
399        // Wait, actually code blocks include the fences. Let me check the line_info
400        // Looking at the implementation, in_code_block is true for lines INSIDE code blocks
401        // The fences themselves are not marked as in_code_block
402        assert!(lines.iter().any(|l| l.content == "# Title"));
403        assert!(lines.iter().any(|l| l.content == "Content"));
404        // The actual code lines should be filtered out
405        assert!(!lines.iter().any(|l| l.content == "let x = 1;"));
406        assert!(!lines.iter().any(|l| l.content == "let y = 2;"));
407    }
408
409    #[test]
410    fn test_no_filters() {
411        let content = "---\ntitle: Test\n---\n\n# Content";
412        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
413
414        // With no filters, all lines should be included
415        let lines: Vec<_> = ctx.filtered_lines().into_iter().collect();
416        assert_eq!(lines.len(), ctx.lines.len());
417    }
418
419    #[test]
420    fn test_multiple_filters() {
421        let content = "---\ntitle: Test\n---\n\n# Title\n\n```rust\ncode\n```\n\nContent";
422        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
423
424        let lines: Vec<_> = ctx
425            .filtered_lines()
426            .skip_front_matter()
427            .skip_code_blocks()
428            .into_iter()
429            .collect();
430
431        // Should skip front matter (lines 1-3) and code block content (line 8)
432        assert!(lines.iter().any(|l| l.content == "# Title"));
433        assert!(lines.iter().any(|l| l.content == "Content"));
434        assert!(!lines.iter().any(|l| l.content == "title: Test"));
435        assert!(!lines.iter().any(|l| l.content == "code"));
436    }
437
438    #[test]
439    fn test_line_numbering_is_1_indexed() {
440        let content = "First\nSecond\nThird";
441        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
442
443        let lines: Vec<_> = ctx.content_lines().collect();
444        assert_eq!(lines[0].line_num, 1);
445        assert_eq!(lines[0].content, "First");
446        assert_eq!(lines[1].line_num, 2);
447        assert_eq!(lines[1].content, "Second");
448        assert_eq!(lines[2].line_num, 3);
449        assert_eq!(lines[2].content, "Third");
450    }
451
452    #[test]
453    fn test_content_lines_convenience_method() {
454        let content = "---\nfoo: bar\n---\n\nContent";
455        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
456
457        // content_lines() should automatically skip front matter
458        let lines: Vec<_> = ctx.content_lines().collect();
459        assert!(!lines.iter().any(|l| l.content.contains("foo")));
460        assert!(lines.iter().any(|l| l.content == "Content"));
461    }
462
463    #[test]
464    fn test_empty_document() {
465        let content = "";
466        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
467
468        let lines: Vec<_> = ctx.content_lines().collect();
469        assert_eq!(lines.len(), 0);
470    }
471
472    #[test]
473    fn test_only_front_matter() {
474        let content = "---\ntitle: Test\n---";
475        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
476
477        let lines: Vec<_> = ctx.content_lines().collect();
478        assert_eq!(
479            lines.len(),
480            0,
481            "Document with only front matter should have no content lines"
482        );
483    }
484
485    #[test]
486    fn test_builder_pattern_ergonomics() {
487        let content = "# Title\n\n```\ncode\n```\n\nContent";
488        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
489
490        // Test that builder pattern works smoothly
491        let _lines: Vec<_> = ctx
492            .filtered_lines()
493            .skip_front_matter()
494            .skip_code_blocks()
495            .skip_html_blocks()
496            .into_iter()
497            .collect();
498
499        // If this compiles and runs, the builder pattern is working
500    }
501
502    #[test]
503    fn test_filtered_line_access_to_line_info() {
504        let content = "# Title\n\nContent";
505        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
506
507        for line in ctx.content_lines() {
508            // Should be able to access line_info fields
509            assert!(!line.line_info.in_front_matter);
510            assert!(!line.line_info.in_code_block);
511        }
512    }
513
514    #[test]
515    fn test_skip_mkdocstrings() {
516        let content = r#"# API Documentation
517
518::: mymodule.MyClass
519    options:
520      show_root_heading: true
521      show_source: false
522
523Some regular content here.
524
525::: mymodule.function
526    options:
527      show_signature: true
528
529More content."#;
530        let ctx = LintContext::new(content, MarkdownFlavor::MkDocs, None);
531        let lines: Vec<_> = ctx.filtered_lines().skip_mkdocstrings().into_iter().collect();
532
533        // Verify lines OUTSIDE mkdocstrings blocks are INCLUDED
534        assert!(
535            lines.iter().any(|l| l.content.contains("# API Documentation")),
536            "Should include lines outside mkdocstrings blocks"
537        );
538        assert!(
539            lines.iter().any(|l| l.content.contains("Some regular content")),
540            "Should include content between mkdocstrings blocks"
541        );
542        assert!(
543            lines.iter().any(|l| l.content.contains("More content")),
544            "Should include content after mkdocstrings blocks"
545        );
546
547        // Verify lines INSIDE mkdocstrings blocks are EXCLUDED
548        assert!(
549            !lines.iter().any(|l| l.content.contains("::: mymodule")),
550            "Should exclude mkdocstrings marker lines"
551        );
552        assert!(
553            !lines.iter().any(|l| l.content.contains("show_root_heading")),
554            "Should exclude mkdocstrings option lines"
555        );
556        assert!(
557            !lines.iter().any(|l| l.content.contains("show_signature")),
558            "Should exclude all mkdocstrings option lines"
559        );
560
561        // Verify line numbers are preserved (1-indexed)
562        assert_eq!(lines[0].line_num, 1, "First line should be line 1");
563    }
564
565    #[test]
566    fn test_skip_esm_blocks() {
567        let content = r#"import {Chart} from './components.js'
568import {Table} from './table.js'
569export const year = 2023
570
571# Last year's snowfall
572
573Content about snowfall data.
574
575import {Footer} from './footer.js'
576
577More content."#;
578        let ctx = LintContext::new(content, MarkdownFlavor::MDX, None);
579        let lines: Vec<_> = ctx.filtered_lines().skip_esm_blocks().into_iter().collect();
580
581        // Verify lines OUTSIDE ESM blocks are INCLUDED
582        assert!(
583            lines.iter().any(|l| l.content.contains("# Last year's snowfall")),
584            "Should include markdown headings"
585        );
586        assert!(
587            lines.iter().any(|l| l.content.contains("Content about snowfall")),
588            "Should include markdown content"
589        );
590        assert!(
591            lines.iter().any(|l| l.content.contains("More content")),
592            "Should include content after ESM blocks"
593        );
594
595        // Verify lines INSIDE ESM blocks (at top of file) are EXCLUDED
596        assert!(
597            !lines.iter().any(|l| l.content.contains("import {Chart}")),
598            "Should exclude import statements at top of file"
599        );
600        assert!(
601            !lines.iter().any(|l| l.content.contains("import {Table}")),
602            "Should exclude all import statements at top of file"
603        );
604        assert!(
605            !lines.iter().any(|l| l.content.contains("export const year")),
606            "Should exclude export statements at top of file"
607        );
608        // ESM blocks end once markdown starts, so import after markdown is NOT in ESM block
609        assert!(
610            lines.iter().any(|l| l.content.contains("import {Footer}")),
611            "Should include import statements after markdown content (not in ESM block)"
612        );
613
614        // Verify line numbers are preserved
615        let heading_line = lines
616            .iter()
617            .find(|l| l.content.contains("# Last year's snowfall"))
618            .unwrap();
619        assert_eq!(heading_line.line_num, 5, "Heading should be on line 5");
620    }
621
622    #[test]
623    fn test_all_filters_combined() {
624        let content = r#"---
625title: Test
626---
627
628# Title
629
630```
631code
632```
633
634<!-- HTML comment here -->
635
636::: mymodule.Class
637    options:
638      show_root_heading: true
639
640<div>
641HTML block
642</div>
643
644Content"#;
645        let ctx = LintContext::new(content, MarkdownFlavor::MkDocs, None);
646
647        let lines: Vec<_> = ctx
648            .filtered_lines()
649            .skip_front_matter()
650            .skip_code_blocks()
651            .skip_html_blocks()
652            .skip_html_comments()
653            .skip_mkdocstrings()
654            .into_iter()
655            .collect();
656
657        // Verify markdown content is INCLUDED
658        assert!(
659            lines.iter().any(|l| l.content == "# Title"),
660            "Should include markdown headings"
661        );
662        assert!(
663            lines.iter().any(|l| l.content == "Content"),
664            "Should include markdown content"
665        );
666
667        // Verify all filtered content is EXCLUDED
668        assert!(
669            !lines.iter().any(|l| l.content == "title: Test"),
670            "Should exclude front matter"
671        );
672        assert!(
673            !lines.iter().any(|l| l.content == "code"),
674            "Should exclude code block content"
675        );
676        assert!(
677            !lines.iter().any(|l| l.content.contains("HTML comment")),
678            "Should exclude HTML comments"
679        );
680        assert!(
681            !lines.iter().any(|l| l.content.contains("::: mymodule")),
682            "Should exclude mkdocstrings blocks"
683        );
684        assert!(
685            !lines.iter().any(|l| l.content.contains("show_root_heading")),
686            "Should exclude mkdocstrings options"
687        );
688        assert!(
689            !lines.iter().any(|l| l.content.contains("HTML block")),
690            "Should exclude HTML blocks"
691        );
692    }
693}