rumdl_lib/
filtered_lines.rs

1//! Filtered line iteration for markdown linting
2//!
3//! This module provides a zero-cost abstraction for iterating over markdown lines
4//! while automatically filtering out non-content regions like front matter, code blocks,
5//! and HTML blocks. This ensures rules only process actual markdown content.
6//!
7//! # Architecture
8//!
9//! The filtered iterator approach centralizes the logic of what content should be
10//! processed by rules, eliminating error-prone manual checks in each rule implementation.
11//!
12//! # Examples
13//!
14//! ```rust
15//! use rumdl_lib::lint_context::LintContext;
16//! use rumdl_lib::filtered_lines::FilteredLinesExt;
17//!
18//! let content = "---\nurl: http://example.com\n---\n\n# Title\n\nContent";
19//! let ctx = LintContext::new(content, rumdl_lib::config::MarkdownFlavor::Standard);
20//!
21//! // Simple: get all content lines (skips front matter by default)
22//! for line in ctx.content_lines() {
23//!     println!("Line {}: {}", line.line_num, line.content);
24//! }
25//!
26//! // Advanced: custom filter configuration
27//! for line in ctx.filtered_lines()
28//!     .skip_code_blocks()
29//!     .skip_front_matter()
30//!     .skip_html_blocks() {
31//!     println!("Line {}: {}", line.line_num, line.content);
32//! }
33//! ```
34
35use crate::lint_context::{LineInfo, LintContext};
36
37/// A single line from a filtered iteration, with guaranteed 1-indexed line numbers
38#[derive(Debug, Clone)]
39pub struct FilteredLine<'a> {
40    /// The 1-indexed line number in the original document
41    pub line_num: usize,
42    /// Reference to the line's metadata
43    pub line_info: &'a LineInfo,
44    /// The actual line content
45    pub content: &'a str,
46}
47
48/// Configuration for filtering lines during iteration
49///
50/// Use the builder pattern to configure which types of content should be skipped:
51///
52/// ```rust
53/// use rumdl_lib::filtered_lines::LineFilterConfig;
54///
55/// let config = LineFilterConfig::new()
56///     .skip_front_matter()
57///     .skip_code_blocks()
58///     .skip_html_blocks()
59///     .skip_html_comments();
60/// ```
61#[derive(Debug, Clone, Default)]
62pub struct LineFilterConfig {
63    /// Skip lines inside front matter (YAML/TOML/JSON metadata)
64    pub skip_front_matter: bool,
65    /// Skip lines inside fenced code blocks
66    pub skip_code_blocks: bool,
67    /// Skip lines inside HTML blocks
68    pub skip_html_blocks: bool,
69    /// Skip lines inside HTML comments
70    pub skip_html_comments: bool,
71}
72
73impl LineFilterConfig {
74    /// Create a new filter configuration with all filters disabled
75    #[must_use]
76    pub fn new() -> Self {
77        Self::default()
78    }
79
80    /// Skip lines that are part of front matter (YAML/TOML/JSON)
81    ///
82    /// Front matter is metadata at the start of a markdown file and should
83    /// not be processed by markdown linting rules.
84    #[must_use]
85    pub fn skip_front_matter(mut self) -> Self {
86        self.skip_front_matter = true;
87        self
88    }
89
90    /// Skip lines inside fenced code blocks
91    ///
92    /// Code blocks contain source code, not markdown, and most rules should
93    /// not process them.
94    #[must_use]
95    pub fn skip_code_blocks(mut self) -> Self {
96        self.skip_code_blocks = true;
97        self
98    }
99
100    /// Skip lines inside HTML blocks
101    ///
102    /// HTML blocks contain raw HTML and most markdown rules should not
103    /// process them.
104    #[must_use]
105    pub fn skip_html_blocks(mut self) -> Self {
106        self.skip_html_blocks = true;
107        self
108    }
109
110    /// Skip lines inside HTML comments
111    ///
112    /// HTML comments (<!-- ... -->) are metadata and should not be processed
113    /// by most markdown linting rules.
114    #[must_use]
115    pub fn skip_html_comments(mut self) -> Self {
116        self.skip_html_comments = true;
117        self
118    }
119
120    /// Check if a line should be filtered out based on this configuration
121    fn should_filter(&self, line_info: &LineInfo) -> bool {
122        (self.skip_front_matter && line_info.in_front_matter)
123            || (self.skip_code_blocks && line_info.in_code_block)
124            || (self.skip_html_blocks && line_info.in_html_block)
125            || (self.skip_html_comments && line_info.in_html_comment)
126    }
127}
128
129/// Iterator that yields filtered lines based on configuration
130pub struct FilteredLinesIter<'a> {
131    ctx: &'a LintContext<'a>,
132    config: LineFilterConfig,
133    current_index: usize,
134    content_lines: Vec<&'a str>,
135}
136
137impl<'a> FilteredLinesIter<'a> {
138    /// Create a new filtered lines iterator
139    fn new(ctx: &'a LintContext<'a>, config: LineFilterConfig) -> Self {
140        Self {
141            ctx,
142            config,
143            current_index: 0,
144            content_lines: ctx.content.lines().collect(),
145        }
146    }
147}
148
149impl<'a> Iterator for FilteredLinesIter<'a> {
150    type Item = FilteredLine<'a>;
151
152    fn next(&mut self) -> Option<Self::Item> {
153        let lines = &self.ctx.lines;
154
155        while self.current_index < lines.len() {
156            let idx = self.current_index;
157            self.current_index += 1;
158
159            // Check if this line should be filtered
160            if self.config.should_filter(&lines[idx]) {
161                continue;
162            }
163
164            // Get the actual line content from the document
165            let line_content = self.content_lines.get(idx).copied().unwrap_or("");
166
167            // Return the filtered line with 1-indexed line number
168            return Some(FilteredLine {
169                line_num: idx + 1, // Convert 0-indexed to 1-indexed
170                line_info: &lines[idx],
171                content: line_content,
172            });
173        }
174
175        None
176    }
177}
178
179/// Extension trait that adds filtered iteration methods to `LintContext`
180///
181/// This trait provides convenient methods for iterating over lines while
182/// automatically filtering out non-content regions.
183pub trait FilteredLinesExt {
184    /// Start building a filtered lines iterator
185    ///
186    /// Returns a `LineFilterConfig` builder that can be used to configure
187    /// which types of content should be filtered out.
188    ///
189    /// # Examples
190    ///
191    /// ```rust
192    /// use rumdl_lib::lint_context::LintContext;
193    /// use rumdl_lib::filtered_lines::FilteredLinesExt;
194    ///
195    /// let content = "# Title\n\n```rust\ncode\n```\n\nContent";
196    /// let ctx = LintContext::new(content, rumdl_lib::config::MarkdownFlavor::Standard);
197    ///
198    /// for line in ctx.filtered_lines().skip_code_blocks() {
199    ///     println!("Line {}: {}", line.line_num, line.content);
200    /// }
201    /// ```
202    fn filtered_lines(&self) -> FilteredLinesBuilder<'_>;
203
204    /// Get an iterator over content lines only
205    ///
206    /// This is a convenience method that returns an iterator with front matter
207    /// filtered out by default. This is the most common use case for rules that
208    /// should only process markdown content.
209    ///
210    /// Equivalent to: `ctx.filtered_lines().skip_front_matter()`
211    ///
212    /// # Examples
213    ///
214    /// ```rust
215    /// use rumdl_lib::lint_context::LintContext;
216    /// use rumdl_lib::filtered_lines::FilteredLinesExt;
217    ///
218    /// let content = "---\ntitle: Test\n---\n\n# Content";
219    /// let ctx = LintContext::new(content, rumdl_lib::config::MarkdownFlavor::Standard);
220    ///
221    /// for line in ctx.content_lines() {
222    ///     // Front matter is automatically skipped
223    ///     println!("Line {}: {}", line.line_num, line.content);
224    /// }
225    /// ```
226    fn content_lines(&self) -> FilteredLinesIter<'_>;
227}
228
229/// Builder type that allows chaining filter configuration and converting to an iterator
230pub struct FilteredLinesBuilder<'a> {
231    ctx: &'a LintContext<'a>,
232    config: LineFilterConfig,
233}
234
235impl<'a> FilteredLinesBuilder<'a> {
236    fn new(ctx: &'a LintContext<'a>) -> Self {
237        Self {
238            ctx,
239            config: LineFilterConfig::new(),
240        }
241    }
242
243    /// Skip lines that are part of front matter (YAML/TOML/JSON)
244    #[must_use]
245    pub fn skip_front_matter(mut self) -> Self {
246        self.config = self.config.skip_front_matter();
247        self
248    }
249
250    /// Skip lines inside fenced code blocks
251    #[must_use]
252    pub fn skip_code_blocks(mut self) -> Self {
253        self.config = self.config.skip_code_blocks();
254        self
255    }
256
257    /// Skip lines inside HTML blocks
258    #[must_use]
259    pub fn skip_html_blocks(mut self) -> Self {
260        self.config = self.config.skip_html_blocks();
261        self
262    }
263
264    /// Skip lines inside HTML comments
265    #[must_use]
266    pub fn skip_html_comments(mut self) -> Self {
267        self.config = self.config.skip_html_comments();
268        self
269    }
270}
271
272impl<'a> IntoIterator for FilteredLinesBuilder<'a> {
273    type Item = FilteredLine<'a>;
274    type IntoIter = FilteredLinesIter<'a>;
275
276    fn into_iter(self) -> Self::IntoIter {
277        FilteredLinesIter::new(self.ctx, self.config)
278    }
279}
280
281impl<'a> FilteredLinesExt for LintContext<'a> {
282    fn filtered_lines(&self) -> FilteredLinesBuilder<'_> {
283        FilteredLinesBuilder::new(self)
284    }
285
286    fn content_lines(&self) -> FilteredLinesIter<'_> {
287        FilteredLinesIter::new(self, LineFilterConfig::new().skip_front_matter())
288    }
289}
290
291#[cfg(test)]
292mod tests {
293    use super::*;
294    use crate::config::MarkdownFlavor;
295
296    #[test]
297    fn test_filtered_line_structure() {
298        let content = "# Title\n\nContent";
299        let ctx = LintContext::new(content, MarkdownFlavor::Standard);
300
301        let line = ctx.content_lines().next().unwrap();
302        assert_eq!(line.line_num, 1);
303        assert_eq!(line.content, "# Title");
304        assert!(!line.line_info.in_front_matter);
305    }
306
307    #[test]
308    fn test_skip_front_matter_yaml() {
309        let content = "---\ntitle: Test\nurl: http://example.com\n---\n\n# Content\n\nMore content";
310        let ctx = LintContext::new(content, MarkdownFlavor::Standard);
311
312        let lines: Vec<_> = ctx.content_lines().collect();
313        // After front matter (lines 1-4), we have: empty line, "# Content", empty line, "More content"
314        assert_eq!(lines.len(), 4);
315        assert_eq!(lines[0].line_num, 5); // First line after front matter
316        assert_eq!(lines[0].content, "");
317        assert_eq!(lines[1].line_num, 6);
318        assert_eq!(lines[1].content, "# Content");
319        assert_eq!(lines[2].line_num, 7);
320        assert_eq!(lines[2].content, "");
321        assert_eq!(lines[3].line_num, 8);
322        assert_eq!(lines[3].content, "More content");
323    }
324
325    #[test]
326    fn test_skip_front_matter_toml() {
327        let content = "+++\ntitle = \"Test\"\nurl = \"http://example.com\"\n+++\n\n# Content";
328        let ctx = LintContext::new(content, MarkdownFlavor::Standard);
329
330        let lines: Vec<_> = ctx.content_lines().collect();
331        assert_eq!(lines.len(), 2); // Empty line + "# Content"
332        assert_eq!(lines[0].line_num, 5);
333        assert_eq!(lines[1].line_num, 6);
334        assert_eq!(lines[1].content, "# Content");
335    }
336
337    #[test]
338    fn test_skip_front_matter_json() {
339        let content = "{\n\"title\": \"Test\",\n\"url\": \"http://example.com\"\n}\n\n# Content";
340        let ctx = LintContext::new(content, MarkdownFlavor::Standard);
341
342        let lines: Vec<_> = ctx.content_lines().collect();
343        assert_eq!(lines.len(), 2); // Empty line + "# Content"
344        assert_eq!(lines[0].line_num, 5);
345        assert_eq!(lines[1].line_num, 6);
346        assert_eq!(lines[1].content, "# Content");
347    }
348
349    #[test]
350    fn test_skip_code_blocks() {
351        let content = "# Title\n\n```rust\nlet x = 1;\nlet y = 2;\n```\n\nContent";
352        let ctx = LintContext::new(content, MarkdownFlavor::Standard);
353
354        let lines: Vec<_> = ctx.filtered_lines().skip_code_blocks().into_iter().collect();
355
356        // Should have: "# Title", empty line, "```rust" fence, "```" fence, empty line, "Content"
357        // Wait, actually code blocks include the fences. Let me check the line_info
358        // Looking at the implementation, in_code_block is true for lines INSIDE code blocks
359        // The fences themselves are not marked as in_code_block
360        assert!(lines.iter().any(|l| l.content == "# Title"));
361        assert!(lines.iter().any(|l| l.content == "Content"));
362        // The actual code lines should be filtered out
363        assert!(!lines.iter().any(|l| l.content == "let x = 1;"));
364        assert!(!lines.iter().any(|l| l.content == "let y = 2;"));
365    }
366
367    #[test]
368    fn test_no_filters() {
369        let content = "---\ntitle: Test\n---\n\n# Content";
370        let ctx = LintContext::new(content, MarkdownFlavor::Standard);
371
372        // With no filters, all lines should be included
373        let lines: Vec<_> = ctx.filtered_lines().into_iter().collect();
374        assert_eq!(lines.len(), ctx.lines.len());
375    }
376
377    #[test]
378    fn test_multiple_filters() {
379        let content = "---\ntitle: Test\n---\n\n# Title\n\n```rust\ncode\n```\n\nContent";
380        let ctx = LintContext::new(content, MarkdownFlavor::Standard);
381
382        let lines: Vec<_> = ctx
383            .filtered_lines()
384            .skip_front_matter()
385            .skip_code_blocks()
386            .into_iter()
387            .collect();
388
389        // Should skip front matter (lines 1-3) and code block content (line 8)
390        assert!(lines.iter().any(|l| l.content == "# Title"));
391        assert!(lines.iter().any(|l| l.content == "Content"));
392        assert!(!lines.iter().any(|l| l.content == "title: Test"));
393        assert!(!lines.iter().any(|l| l.content == "code"));
394    }
395
396    #[test]
397    fn test_line_numbering_is_1_indexed() {
398        let content = "First\nSecond\nThird";
399        let ctx = LintContext::new(content, MarkdownFlavor::Standard);
400
401        let lines: Vec<_> = ctx.content_lines().collect();
402        assert_eq!(lines[0].line_num, 1);
403        assert_eq!(lines[0].content, "First");
404        assert_eq!(lines[1].line_num, 2);
405        assert_eq!(lines[1].content, "Second");
406        assert_eq!(lines[2].line_num, 3);
407        assert_eq!(lines[2].content, "Third");
408    }
409
410    #[test]
411    fn test_content_lines_convenience_method() {
412        let content = "---\nfoo: bar\n---\n\nContent";
413        let ctx = LintContext::new(content, MarkdownFlavor::Standard);
414
415        // content_lines() should automatically skip front matter
416        let lines: Vec<_> = ctx.content_lines().collect();
417        assert!(!lines.iter().any(|l| l.content.contains("foo")));
418        assert!(lines.iter().any(|l| l.content == "Content"));
419    }
420
421    #[test]
422    fn test_empty_document() {
423        let content = "";
424        let ctx = LintContext::new(content, MarkdownFlavor::Standard);
425
426        let lines: Vec<_> = ctx.content_lines().collect();
427        assert_eq!(lines.len(), 0);
428    }
429
430    #[test]
431    fn test_only_front_matter() {
432        let content = "---\ntitle: Test\n---";
433        let ctx = LintContext::new(content, MarkdownFlavor::Standard);
434
435        let lines: Vec<_> = ctx.content_lines().collect();
436        assert_eq!(
437            lines.len(),
438            0,
439            "Document with only front matter should have no content lines"
440        );
441    }
442
443    #[test]
444    fn test_builder_pattern_ergonomics() {
445        let content = "# Title\n\n```\ncode\n```\n\nContent";
446        let ctx = LintContext::new(content, MarkdownFlavor::Standard);
447
448        // Test that builder pattern works smoothly
449        let _lines: Vec<_> = ctx
450            .filtered_lines()
451            .skip_front_matter()
452            .skip_code_blocks()
453            .skip_html_blocks()
454            .into_iter()
455            .collect();
456
457        // If this compiles and runs, the builder pattern is working
458    }
459
460    #[test]
461    fn test_filtered_line_access_to_line_info() {
462        let content = "# Title\n\nContent";
463        let ctx = LintContext::new(content, MarkdownFlavor::Standard);
464
465        for line in ctx.content_lines() {
466            // Should be able to access line_info fields
467            assert!(!line.line_info.in_front_matter);
468            assert!(!line.line_info.in_code_block);
469        }
470    }
471}