rumdl_lib/
filtered_lines.rs

1//! Filtered line iteration for markdown linting
2//!
3//! This module provides a zero-cost abstraction for iterating over markdown lines
4//! while automatically filtering out non-content regions like front matter, code blocks,
5//! and HTML blocks. This ensures rules only process actual markdown content.
6//!
7//! # Architecture
8//!
9//! The filtered iterator approach centralizes the logic of what content should be
10//! processed by rules, eliminating error-prone manual checks in each rule implementation.
11//!
12//! # Examples
13//!
14//! ```rust
15//! use rumdl_lib::lint_context::LintContext;
16//! use rumdl_lib::filtered_lines::FilteredLinesExt;
17//!
18//! let content = "---\nurl: http://example.com\n---\n\n# Title\n\nContent";
19//! let ctx = LintContext::new(content, rumdl_lib::config::MarkdownFlavor::Standard);
20//!
21//! // Simple: get all content lines (skips front matter by default)
22//! for line in ctx.content_lines() {
23//!     println!("Line {}: {}", line.line_num, line.content);
24//! }
25//!
26//! // Advanced: custom filter configuration
27//! for line in ctx.filtered_lines()
28//!     .skip_code_blocks()
29//!     .skip_front_matter()
30//!     .skip_html_blocks() {
31//!     println!("Line {}: {}", line.line_num, line.content);
32//! }
33//! ```
34
35use crate::lint_context::{LineInfo, LintContext};
36
37/// A single line from a filtered iteration, with guaranteed 1-indexed line numbers
38#[derive(Debug, Clone)]
39pub struct FilteredLine<'a> {
40    /// The 1-indexed line number in the original document
41    pub line_num: usize,
42    /// Reference to the line's metadata
43    pub line_info: &'a LineInfo,
44    /// The actual line content
45    pub content: &'a str,
46}
47
48/// Configuration for filtering lines during iteration
49///
50/// Use the builder pattern to configure which types of content should be skipped:
51///
52/// ```rust
53/// use rumdl_lib::filtered_lines::LineFilterConfig;
54///
55/// let config = LineFilterConfig::new()
56///     .skip_front_matter()
57///     .skip_code_blocks()
58///     .skip_html_blocks()
59///     .skip_html_comments();
60/// ```
61#[derive(Debug, Clone, Default)]
62pub struct LineFilterConfig {
63    /// Skip lines inside front matter (YAML/TOML/JSON metadata)
64    pub skip_front_matter: bool,
65    /// Skip lines inside fenced code blocks
66    pub skip_code_blocks: bool,
67    /// Skip lines inside HTML blocks
68    pub skip_html_blocks: bool,
69    /// Skip lines inside HTML comments
70    pub skip_html_comments: bool,
71}
72
73impl LineFilterConfig {
74    /// Create a new filter configuration with all filters disabled
75    #[must_use]
76    pub fn new() -> Self {
77        Self::default()
78    }
79
80    /// Skip lines that are part of front matter (YAML/TOML/JSON)
81    ///
82    /// Front matter is metadata at the start of a markdown file and should
83    /// not be processed by markdown linting rules.
84    #[must_use]
85    pub fn skip_front_matter(mut self) -> Self {
86        self.skip_front_matter = true;
87        self
88    }
89
90    /// Skip lines inside fenced code blocks
91    ///
92    /// Code blocks contain source code, not markdown, and most rules should
93    /// not process them.
94    #[must_use]
95    pub fn skip_code_blocks(mut self) -> Self {
96        self.skip_code_blocks = true;
97        self
98    }
99
100    /// Skip lines inside HTML blocks
101    ///
102    /// HTML blocks contain raw HTML and most markdown rules should not
103    /// process them.
104    #[must_use]
105    pub fn skip_html_blocks(mut self) -> Self {
106        self.skip_html_blocks = true;
107        self
108    }
109
110    /// Skip lines inside HTML comments
111    ///
112    /// HTML comments (<!-- ... -->) are metadata and should not be processed
113    /// by most markdown linting rules.
114    #[must_use]
115    pub fn skip_html_comments(mut self) -> Self {
116        self.skip_html_comments = true;
117        self
118    }
119
120    /// Check if a line should be filtered out based on this configuration
121    fn should_filter(&self, line_info: &LineInfo) -> bool {
122        (self.skip_front_matter && line_info.in_front_matter)
123            || (self.skip_code_blocks && line_info.in_code_block)
124            || (self.skip_html_blocks && line_info.in_html_block)
125            || (self.skip_html_comments && line_info.in_html_comment)
126    }
127}
128
129/// Iterator that yields filtered lines based on configuration
130pub struct FilteredLinesIter<'a> {
131    ctx: &'a LintContext<'a>,
132    config: LineFilterConfig,
133    current_index: usize,
134}
135
136impl<'a> FilteredLinesIter<'a> {
137    /// Create a new filtered lines iterator
138    fn new(ctx: &'a LintContext<'a>, config: LineFilterConfig) -> Self {
139        Self {
140            ctx,
141            config,
142            current_index: 0,
143        }
144    }
145}
146
147impl<'a> Iterator for FilteredLinesIter<'a> {
148    type Item = FilteredLine<'a>;
149
150    fn next(&mut self) -> Option<Self::Item> {
151        let lines = &self.ctx.lines;
152        let content_lines: Vec<&str> = self.ctx.content.lines().collect();
153
154        while self.current_index < lines.len() {
155            let idx = self.current_index;
156            self.current_index += 1;
157
158            // Check if this line should be filtered
159            if self.config.should_filter(&lines[idx]) {
160                continue;
161            }
162
163            // Get the actual line content from the document
164            let line_content = content_lines.get(idx).copied().unwrap_or("");
165
166            // Return the filtered line with 1-indexed line number
167            return Some(FilteredLine {
168                line_num: idx + 1, // Convert 0-indexed to 1-indexed
169                line_info: &lines[idx],
170                content: line_content,
171            });
172        }
173
174        None
175    }
176}
177
178/// Extension trait that adds filtered iteration methods to `LintContext`
179///
180/// This trait provides convenient methods for iterating over lines while
181/// automatically filtering out non-content regions.
182pub trait FilteredLinesExt {
183    /// Start building a filtered lines iterator
184    ///
185    /// Returns a `LineFilterConfig` builder that can be used to configure
186    /// which types of content should be filtered out.
187    ///
188    /// # Examples
189    ///
190    /// ```rust
191    /// use rumdl_lib::lint_context::LintContext;
192    /// use rumdl_lib::filtered_lines::FilteredLinesExt;
193    ///
194    /// let content = "# Title\n\n```rust\ncode\n```\n\nContent";
195    /// let ctx = LintContext::new(content, rumdl_lib::config::MarkdownFlavor::Standard);
196    ///
197    /// for line in ctx.filtered_lines().skip_code_blocks() {
198    ///     println!("Line {}: {}", line.line_num, line.content);
199    /// }
200    /// ```
201    fn filtered_lines(&self) -> FilteredLinesBuilder<'_>;
202
203    /// Get an iterator over content lines only
204    ///
205    /// This is a convenience method that returns an iterator with front matter
206    /// filtered out by default. This is the most common use case for rules that
207    /// should only process markdown content.
208    ///
209    /// Equivalent to: `ctx.filtered_lines().skip_front_matter()`
210    ///
211    /// # Examples
212    ///
213    /// ```rust
214    /// use rumdl_lib::lint_context::LintContext;
215    /// use rumdl_lib::filtered_lines::FilteredLinesExt;
216    ///
217    /// let content = "---\ntitle: Test\n---\n\n# Content";
218    /// let ctx = LintContext::new(content, rumdl_lib::config::MarkdownFlavor::Standard);
219    ///
220    /// for line in ctx.content_lines() {
221    ///     // Front matter is automatically skipped
222    ///     println!("Line {}: {}", line.line_num, line.content);
223    /// }
224    /// ```
225    fn content_lines(&self) -> FilteredLinesIter<'_>;
226}
227
228/// Builder type that allows chaining filter configuration and converting to an iterator
229pub struct FilteredLinesBuilder<'a> {
230    ctx: &'a LintContext<'a>,
231    config: LineFilterConfig,
232}
233
234impl<'a> FilteredLinesBuilder<'a> {
235    fn new(ctx: &'a LintContext<'a>) -> Self {
236        Self {
237            ctx,
238            config: LineFilterConfig::new(),
239        }
240    }
241
242    /// Skip lines that are part of front matter (YAML/TOML/JSON)
243    #[must_use]
244    pub fn skip_front_matter(mut self) -> Self {
245        self.config = self.config.skip_front_matter();
246        self
247    }
248
249    /// Skip lines inside fenced code blocks
250    #[must_use]
251    pub fn skip_code_blocks(mut self) -> Self {
252        self.config = self.config.skip_code_blocks();
253        self
254    }
255
256    /// Skip lines inside HTML blocks
257    #[must_use]
258    pub fn skip_html_blocks(mut self) -> Self {
259        self.config = self.config.skip_html_blocks();
260        self
261    }
262
263    /// Skip lines inside HTML comments
264    #[must_use]
265    pub fn skip_html_comments(mut self) -> Self {
266        self.config = self.config.skip_html_comments();
267        self
268    }
269}
270
271impl<'a> IntoIterator for FilteredLinesBuilder<'a> {
272    type Item = FilteredLine<'a>;
273    type IntoIter = FilteredLinesIter<'a>;
274
275    fn into_iter(self) -> Self::IntoIter {
276        FilteredLinesIter::new(self.ctx, self.config)
277    }
278}
279
280impl<'a> FilteredLinesExt for LintContext<'a> {
281    fn filtered_lines(&self) -> FilteredLinesBuilder<'_> {
282        FilteredLinesBuilder::new(self)
283    }
284
285    fn content_lines(&self) -> FilteredLinesIter<'_> {
286        FilteredLinesIter::new(self, LineFilterConfig::new().skip_front_matter())
287    }
288}
289
290#[cfg(test)]
291mod tests {
292    use super::*;
293    use crate::config::MarkdownFlavor;
294
295    #[test]
296    fn test_filtered_line_structure() {
297        let content = "# Title\n\nContent";
298        let ctx = LintContext::new(content, MarkdownFlavor::Standard);
299
300        let line = ctx.content_lines().next().unwrap();
301        assert_eq!(line.line_num, 1);
302        assert_eq!(line.content, "# Title");
303        assert!(!line.line_info.in_front_matter);
304    }
305
306    #[test]
307    fn test_skip_front_matter_yaml() {
308        let content = "---\ntitle: Test\nurl: http://example.com\n---\n\n# Content\n\nMore content";
309        let ctx = LintContext::new(content, MarkdownFlavor::Standard);
310
311        let lines: Vec<_> = ctx.content_lines().collect();
312        // After front matter (lines 1-4), we have: empty line, "# Content", empty line, "More content"
313        assert_eq!(lines.len(), 4);
314        assert_eq!(lines[0].line_num, 5); // First line after front matter
315        assert_eq!(lines[0].content, "");
316        assert_eq!(lines[1].line_num, 6);
317        assert_eq!(lines[1].content, "# Content");
318        assert_eq!(lines[2].line_num, 7);
319        assert_eq!(lines[2].content, "");
320        assert_eq!(lines[3].line_num, 8);
321        assert_eq!(lines[3].content, "More content");
322    }
323
324    #[test]
325    fn test_skip_front_matter_toml() {
326        let content = "+++\ntitle = \"Test\"\nurl = \"http://example.com\"\n+++\n\n# Content";
327        let ctx = LintContext::new(content, MarkdownFlavor::Standard);
328
329        let lines: Vec<_> = ctx.content_lines().collect();
330        assert_eq!(lines.len(), 2); // Empty line + "# Content"
331        assert_eq!(lines[0].line_num, 5);
332        assert_eq!(lines[1].line_num, 6);
333        assert_eq!(lines[1].content, "# Content");
334    }
335
336    #[test]
337    fn test_skip_front_matter_json() {
338        let content = "{\n\"title\": \"Test\",\n\"url\": \"http://example.com\"\n}\n\n# Content";
339        let ctx = LintContext::new(content, MarkdownFlavor::Standard);
340
341        let lines: Vec<_> = ctx.content_lines().collect();
342        assert_eq!(lines.len(), 2); // Empty line + "# Content"
343        assert_eq!(lines[0].line_num, 5);
344        assert_eq!(lines[1].line_num, 6);
345        assert_eq!(lines[1].content, "# Content");
346    }
347
348    #[test]
349    fn test_skip_code_blocks() {
350        let content = "# Title\n\n```rust\nlet x = 1;\nlet y = 2;\n```\n\nContent";
351        let ctx = LintContext::new(content, MarkdownFlavor::Standard);
352
353        let lines: Vec<_> = ctx.filtered_lines().skip_code_blocks().into_iter().collect();
354
355        // Should have: "# Title", empty line, "```rust" fence, "```" fence, empty line, "Content"
356        // Wait, actually code blocks include the fences. Let me check the line_info
357        // Looking at the implementation, in_code_block is true for lines INSIDE code blocks
358        // The fences themselves are not marked as in_code_block
359        assert!(lines.iter().any(|l| l.content == "# Title"));
360        assert!(lines.iter().any(|l| l.content == "Content"));
361        // The actual code lines should be filtered out
362        assert!(!lines.iter().any(|l| l.content == "let x = 1;"));
363        assert!(!lines.iter().any(|l| l.content == "let y = 2;"));
364    }
365
366    #[test]
367    fn test_no_filters() {
368        let content = "---\ntitle: Test\n---\n\n# Content";
369        let ctx = LintContext::new(content, MarkdownFlavor::Standard);
370
371        // With no filters, all lines should be included
372        let lines: Vec<_> = ctx.filtered_lines().into_iter().collect();
373        assert_eq!(lines.len(), ctx.lines.len());
374    }
375
376    #[test]
377    fn test_multiple_filters() {
378        let content = "---\ntitle: Test\n---\n\n# Title\n\n```rust\ncode\n```\n\nContent";
379        let ctx = LintContext::new(content, MarkdownFlavor::Standard);
380
381        let lines: Vec<_> = ctx
382            .filtered_lines()
383            .skip_front_matter()
384            .skip_code_blocks()
385            .into_iter()
386            .collect();
387
388        // Should skip front matter (lines 1-3) and code block content (line 8)
389        assert!(lines.iter().any(|l| l.content == "# Title"));
390        assert!(lines.iter().any(|l| l.content == "Content"));
391        assert!(!lines.iter().any(|l| l.content == "title: Test"));
392        assert!(!lines.iter().any(|l| l.content == "code"));
393    }
394
395    #[test]
396    fn test_line_numbering_is_1_indexed() {
397        let content = "First\nSecond\nThird";
398        let ctx = LintContext::new(content, MarkdownFlavor::Standard);
399
400        let lines: Vec<_> = ctx.content_lines().collect();
401        assert_eq!(lines[0].line_num, 1);
402        assert_eq!(lines[0].content, "First");
403        assert_eq!(lines[1].line_num, 2);
404        assert_eq!(lines[1].content, "Second");
405        assert_eq!(lines[2].line_num, 3);
406        assert_eq!(lines[2].content, "Third");
407    }
408
409    #[test]
410    fn test_content_lines_convenience_method() {
411        let content = "---\nfoo: bar\n---\n\nContent";
412        let ctx = LintContext::new(content, MarkdownFlavor::Standard);
413
414        // content_lines() should automatically skip front matter
415        let lines: Vec<_> = ctx.content_lines().collect();
416        assert!(!lines.iter().any(|l| l.content.contains("foo")));
417        assert!(lines.iter().any(|l| l.content == "Content"));
418    }
419
420    #[test]
421    fn test_empty_document() {
422        let content = "";
423        let ctx = LintContext::new(content, MarkdownFlavor::Standard);
424
425        let lines: Vec<_> = ctx.content_lines().collect();
426        assert_eq!(lines.len(), 0);
427    }
428
429    #[test]
430    fn test_only_front_matter() {
431        let content = "---\ntitle: Test\n---";
432        let ctx = LintContext::new(content, MarkdownFlavor::Standard);
433
434        let lines: Vec<_> = ctx.content_lines().collect();
435        assert_eq!(
436            lines.len(),
437            0,
438            "Document with only front matter should have no content lines"
439        );
440    }
441
442    #[test]
443    fn test_builder_pattern_ergonomics() {
444        let content = "# Title\n\n```\ncode\n```\n\nContent";
445        let ctx = LintContext::new(content, MarkdownFlavor::Standard);
446
447        // Test that builder pattern works smoothly
448        let _lines: Vec<_> = ctx
449            .filtered_lines()
450            .skip_front_matter()
451            .skip_code_blocks()
452            .skip_html_blocks()
453            .into_iter()
454            .collect();
455
456        // If this compiles and runs, the builder pattern is working
457    }
458
459    #[test]
460    fn test_filtered_line_access_to_line_info() {
461        let content = "# Title\n\nContent";
462        let ctx = LintContext::new(content, MarkdownFlavor::Standard);
463
464        for line in ctx.content_lines() {
465            // Should be able to access line_info fields
466            assert!(!line.line_info.in_front_matter);
467            assert!(!line.line_info.in_code_block);
468        }
469    }
470}