rumdl_lib/
filtered_lines.rs

1//! Filtered line iteration for markdown linting
2//!
3//! This module provides a zero-cost abstraction for iterating over markdown lines
4//! while automatically filtering out non-content regions like front matter, code blocks,
5//! and HTML blocks. This ensures rules only process actual markdown content.
6//!
7//! # Architecture
8//!
9//! The filtered iterator approach centralizes the logic of what content should be
10//! processed by rules, eliminating error-prone manual checks in each rule implementation.
11//!
12//! # Examples
13//!
14//! ```rust
15//! use rumdl_lib::lint_context::LintContext;
16//! use rumdl_lib::filtered_lines::FilteredLinesExt;
17//!
18//! let content = "---\nurl: http://example.com\n---\n\n# Title\n\nContent";
19//! let ctx = LintContext::new(content, rumdl_lib::config::MarkdownFlavor::Standard);
20//!
21//! // Simple: get all content lines (skips front matter by default)
22//! for line in ctx.content_lines() {
23//!     println!("Line {}: {}", line.line_num, line.content);
24//! }
25//!
26//! // Advanced: custom filter configuration
27//! for line in ctx.filtered_lines()
28//!     .skip_code_blocks()
29//!     .skip_front_matter()
30//!     .skip_html_blocks() {
31//!     println!("Line {}: {}", line.line_num, line.content);
32//! }
33//! ```
34
35use crate::lint_context::{LineInfo, LintContext};
36
37/// A single line from a filtered iteration, with guaranteed 1-indexed line numbers
38#[derive(Debug, Clone)]
39pub struct FilteredLine<'a> {
40    /// The 1-indexed line number in the original document
41    pub line_num: usize,
42    /// Reference to the line's metadata
43    pub line_info: &'a LineInfo,
44    /// The actual line content
45    pub content: &'a str,
46}
47
48/// Configuration for filtering lines during iteration
49///
50/// Use the builder pattern to configure which types of content should be skipped:
51///
52/// ```rust
53/// use rumdl_lib::filtered_lines::LineFilterConfig;
54///
55/// let config = LineFilterConfig::new()
56///     .skip_front_matter()
57///     .skip_code_blocks()
58///     .skip_html_blocks();
59/// ```
60#[derive(Debug, Clone, Default)]
61pub struct LineFilterConfig {
62    /// Skip lines inside front matter (YAML/TOML/JSON metadata)
63    pub skip_front_matter: bool,
64    /// Skip lines inside fenced code blocks
65    pub skip_code_blocks: bool,
66    /// Skip lines inside HTML blocks
67    pub skip_html_blocks: bool,
68}
69
70impl LineFilterConfig {
71    /// Create a new filter configuration with all filters disabled
72    #[must_use]
73    pub fn new() -> Self {
74        Self::default()
75    }
76
77    /// Skip lines that are part of front matter (YAML/TOML/JSON)
78    ///
79    /// Front matter is metadata at the start of a markdown file and should
80    /// not be processed by markdown linting rules.
81    #[must_use]
82    pub fn skip_front_matter(mut self) -> Self {
83        self.skip_front_matter = true;
84        self
85    }
86
87    /// Skip lines inside fenced code blocks
88    ///
89    /// Code blocks contain source code, not markdown, and most rules should
90    /// not process them.
91    #[must_use]
92    pub fn skip_code_blocks(mut self) -> Self {
93        self.skip_code_blocks = true;
94        self
95    }
96
97    /// Skip lines inside HTML blocks
98    ///
99    /// HTML blocks contain raw HTML and most markdown rules should not
100    /// process them.
101    #[must_use]
102    pub fn skip_html_blocks(mut self) -> Self {
103        self.skip_html_blocks = true;
104        self
105    }
106
107    /// Check if a line should be filtered out based on this configuration
108    fn should_filter(&self, line_info: &LineInfo) -> bool {
109        (self.skip_front_matter && line_info.in_front_matter)
110            || (self.skip_code_blocks && line_info.in_code_block)
111            || (self.skip_html_blocks && line_info.in_html_block)
112    }
113}
114
115/// Iterator that yields filtered lines based on configuration
116pub struct FilteredLinesIter<'a> {
117    ctx: &'a LintContext<'a>,
118    config: LineFilterConfig,
119    current_index: usize,
120}
121
122impl<'a> FilteredLinesIter<'a> {
123    /// Create a new filtered lines iterator
124    fn new(ctx: &'a LintContext<'a>, config: LineFilterConfig) -> Self {
125        Self {
126            ctx,
127            config,
128            current_index: 0,
129        }
130    }
131}
132
133impl<'a> Iterator for FilteredLinesIter<'a> {
134    type Item = FilteredLine<'a>;
135
136    fn next(&mut self) -> Option<Self::Item> {
137        let lines = &self.ctx.lines;
138        let content_lines: Vec<&str> = self.ctx.content.lines().collect();
139
140        while self.current_index < lines.len() {
141            let idx = self.current_index;
142            self.current_index += 1;
143
144            // Check if this line should be filtered
145            if self.config.should_filter(&lines[idx]) {
146                continue;
147            }
148
149            // Get the actual line content from the document
150            let line_content = content_lines.get(idx).copied().unwrap_or("");
151
152            // Return the filtered line with 1-indexed line number
153            return Some(FilteredLine {
154                line_num: idx + 1, // Convert 0-indexed to 1-indexed
155                line_info: &lines[idx],
156                content: line_content,
157            });
158        }
159
160        None
161    }
162}
163
164/// Extension trait that adds filtered iteration methods to `LintContext`
165///
166/// This trait provides convenient methods for iterating over lines while
167/// automatically filtering out non-content regions.
168pub trait FilteredLinesExt {
169    /// Start building a filtered lines iterator
170    ///
171    /// Returns a `LineFilterConfig` builder that can be used to configure
172    /// which types of content should be filtered out.
173    ///
174    /// # Examples
175    ///
176    /// ```rust
177    /// use rumdl_lib::lint_context::LintContext;
178    /// use rumdl_lib::filtered_lines::FilteredLinesExt;
179    ///
180    /// let content = "# Title\n\n```rust\ncode\n```\n\nContent";
181    /// let ctx = LintContext::new(content, rumdl_lib::config::MarkdownFlavor::Standard);
182    ///
183    /// for line in ctx.filtered_lines().skip_code_blocks() {
184    ///     println!("Line {}: {}", line.line_num, line.content);
185    /// }
186    /// ```
187    fn filtered_lines(&self) -> FilteredLinesBuilder<'_>;
188
189    /// Get an iterator over content lines only
190    ///
191    /// This is a convenience method that returns an iterator with front matter
192    /// filtered out by default. This is the most common use case for rules that
193    /// should only process markdown content.
194    ///
195    /// Equivalent to: `ctx.filtered_lines().skip_front_matter()`
196    ///
197    /// # Examples
198    ///
199    /// ```rust
200    /// use rumdl_lib::lint_context::LintContext;
201    /// use rumdl_lib::filtered_lines::FilteredLinesExt;
202    ///
203    /// let content = "---\ntitle: Test\n---\n\n# Content";
204    /// let ctx = LintContext::new(content, rumdl_lib::config::MarkdownFlavor::Standard);
205    ///
206    /// for line in ctx.content_lines() {
207    ///     // Front matter is automatically skipped
208    ///     println!("Line {}: {}", line.line_num, line.content);
209    /// }
210    /// ```
211    fn content_lines(&self) -> FilteredLinesIter<'_>;
212}
213
214/// Builder type that allows chaining filter configuration and converting to an iterator
215pub struct FilteredLinesBuilder<'a> {
216    ctx: &'a LintContext<'a>,
217    config: LineFilterConfig,
218}
219
220impl<'a> FilteredLinesBuilder<'a> {
221    fn new(ctx: &'a LintContext<'a>) -> Self {
222        Self {
223            ctx,
224            config: LineFilterConfig::new(),
225        }
226    }
227
228    /// Skip lines that are part of front matter (YAML/TOML/JSON)
229    #[must_use]
230    pub fn skip_front_matter(mut self) -> Self {
231        self.config = self.config.skip_front_matter();
232        self
233    }
234
235    /// Skip lines inside fenced code blocks
236    #[must_use]
237    pub fn skip_code_blocks(mut self) -> Self {
238        self.config = self.config.skip_code_blocks();
239        self
240    }
241
242    /// Skip lines inside HTML blocks
243    #[must_use]
244    pub fn skip_html_blocks(mut self) -> Self {
245        self.config = self.config.skip_html_blocks();
246        self
247    }
248}
249
250impl<'a> IntoIterator for FilteredLinesBuilder<'a> {
251    type Item = FilteredLine<'a>;
252    type IntoIter = FilteredLinesIter<'a>;
253
254    fn into_iter(self) -> Self::IntoIter {
255        FilteredLinesIter::new(self.ctx, self.config)
256    }
257}
258
259impl<'a> FilteredLinesExt for LintContext<'a> {
260    fn filtered_lines(&self) -> FilteredLinesBuilder<'_> {
261        FilteredLinesBuilder::new(self)
262    }
263
264    fn content_lines(&self) -> FilteredLinesIter<'_> {
265        FilteredLinesIter::new(self, LineFilterConfig::new().skip_front_matter())
266    }
267}
268
269#[cfg(test)]
270mod tests {
271    use super::*;
272    use crate::config::MarkdownFlavor;
273
274    #[test]
275    fn test_filtered_line_structure() {
276        let content = "# Title\n\nContent";
277        let ctx = LintContext::new(content, MarkdownFlavor::Standard);
278
279        let line = ctx.content_lines().next().unwrap();
280        assert_eq!(line.line_num, 1);
281        assert_eq!(line.content, "# Title");
282        assert!(!line.line_info.in_front_matter);
283    }
284
285    #[test]
286    fn test_skip_front_matter_yaml() {
287        let content = "---\ntitle: Test\nurl: http://example.com\n---\n\n# Content\n\nMore content";
288        let ctx = LintContext::new(content, MarkdownFlavor::Standard);
289
290        let lines: Vec<_> = ctx.content_lines().collect();
291        // After front matter (lines 1-4), we have: empty line, "# Content", empty line, "More content"
292        assert_eq!(lines.len(), 4);
293        assert_eq!(lines[0].line_num, 5); // First line after front matter
294        assert_eq!(lines[0].content, "");
295        assert_eq!(lines[1].line_num, 6);
296        assert_eq!(lines[1].content, "# Content");
297        assert_eq!(lines[2].line_num, 7);
298        assert_eq!(lines[2].content, "");
299        assert_eq!(lines[3].line_num, 8);
300        assert_eq!(lines[3].content, "More content");
301    }
302
303    #[test]
304    fn test_skip_front_matter_toml() {
305        let content = "+++\ntitle = \"Test\"\nurl = \"http://example.com\"\n+++\n\n# Content";
306        let ctx = LintContext::new(content, MarkdownFlavor::Standard);
307
308        let lines: Vec<_> = ctx.content_lines().collect();
309        assert_eq!(lines.len(), 2); // Empty line + "# Content"
310        assert_eq!(lines[0].line_num, 5);
311        assert_eq!(lines[1].line_num, 6);
312        assert_eq!(lines[1].content, "# Content");
313    }
314
315    #[test]
316    fn test_skip_front_matter_json() {
317        let content = "{\n\"title\": \"Test\",\n\"url\": \"http://example.com\"\n}\n\n# Content";
318        let ctx = LintContext::new(content, MarkdownFlavor::Standard);
319
320        let lines: Vec<_> = ctx.content_lines().collect();
321        assert_eq!(lines.len(), 2); // Empty line + "# Content"
322        assert_eq!(lines[0].line_num, 5);
323        assert_eq!(lines[1].line_num, 6);
324        assert_eq!(lines[1].content, "# Content");
325    }
326
327    #[test]
328    fn test_skip_code_blocks() {
329        let content = "# Title\n\n```rust\nlet x = 1;\nlet y = 2;\n```\n\nContent";
330        let ctx = LintContext::new(content, MarkdownFlavor::Standard);
331
332        let lines: Vec<_> = ctx.filtered_lines().skip_code_blocks().into_iter().collect();
333
334        // Should have: "# Title", empty line, "```rust" fence, "```" fence, empty line, "Content"
335        // Wait, actually code blocks include the fences. Let me check the line_info
336        // Looking at the implementation, in_code_block is true for lines INSIDE code blocks
337        // The fences themselves are not marked as in_code_block
338        assert!(lines.iter().any(|l| l.content == "# Title"));
339        assert!(lines.iter().any(|l| l.content == "Content"));
340        // The actual code lines should be filtered out
341        assert!(!lines.iter().any(|l| l.content == "let x = 1;"));
342        assert!(!lines.iter().any(|l| l.content == "let y = 2;"));
343    }
344
345    #[test]
346    fn test_no_filters() {
347        let content = "---\ntitle: Test\n---\n\n# Content";
348        let ctx = LintContext::new(content, MarkdownFlavor::Standard);
349
350        // With no filters, all lines should be included
351        let lines: Vec<_> = ctx.filtered_lines().into_iter().collect();
352        assert_eq!(lines.len(), ctx.lines.len());
353    }
354
355    #[test]
356    fn test_multiple_filters() {
357        let content = "---\ntitle: Test\n---\n\n# Title\n\n```rust\ncode\n```\n\nContent";
358        let ctx = LintContext::new(content, MarkdownFlavor::Standard);
359
360        let lines: Vec<_> = ctx
361            .filtered_lines()
362            .skip_front_matter()
363            .skip_code_blocks()
364            .into_iter()
365            .collect();
366
367        // Should skip front matter (lines 1-3) and code block content (line 8)
368        assert!(lines.iter().any(|l| l.content == "# Title"));
369        assert!(lines.iter().any(|l| l.content == "Content"));
370        assert!(!lines.iter().any(|l| l.content == "title: Test"));
371        assert!(!lines.iter().any(|l| l.content == "code"));
372    }
373
374    #[test]
375    fn test_line_numbering_is_1_indexed() {
376        let content = "First\nSecond\nThird";
377        let ctx = LintContext::new(content, MarkdownFlavor::Standard);
378
379        let lines: Vec<_> = ctx.content_lines().collect();
380        assert_eq!(lines[0].line_num, 1);
381        assert_eq!(lines[0].content, "First");
382        assert_eq!(lines[1].line_num, 2);
383        assert_eq!(lines[1].content, "Second");
384        assert_eq!(lines[2].line_num, 3);
385        assert_eq!(lines[2].content, "Third");
386    }
387
388    #[test]
389    fn test_content_lines_convenience_method() {
390        let content = "---\nfoo: bar\n---\n\nContent";
391        let ctx = LintContext::new(content, MarkdownFlavor::Standard);
392
393        // content_lines() should automatically skip front matter
394        let lines: Vec<_> = ctx.content_lines().collect();
395        assert!(!lines.iter().any(|l| l.content.contains("foo")));
396        assert!(lines.iter().any(|l| l.content == "Content"));
397    }
398
399    #[test]
400    fn test_empty_document() {
401        let content = "";
402        let ctx = LintContext::new(content, MarkdownFlavor::Standard);
403
404        let lines: Vec<_> = ctx.content_lines().collect();
405        assert_eq!(lines.len(), 0);
406    }
407
408    #[test]
409    fn test_only_front_matter() {
410        let content = "---\ntitle: Test\n---";
411        let ctx = LintContext::new(content, MarkdownFlavor::Standard);
412
413        let lines: Vec<_> = ctx.content_lines().collect();
414        assert_eq!(
415            lines.len(),
416            0,
417            "Document with only front matter should have no content lines"
418        );
419    }
420
421    #[test]
422    fn test_builder_pattern_ergonomics() {
423        let content = "# Title\n\n```\ncode\n```\n\nContent";
424        let ctx = LintContext::new(content, MarkdownFlavor::Standard);
425
426        // Test that builder pattern works smoothly
427        let _lines: Vec<_> = ctx
428            .filtered_lines()
429            .skip_front_matter()
430            .skip_code_blocks()
431            .skip_html_blocks()
432            .into_iter()
433            .collect();
434
435        // If this compiles and runs, the builder pattern is working
436    }
437
438    #[test]
439    fn test_filtered_line_access_to_line_info() {
440        let content = "# Title\n\nContent";
441        let ctx = LintContext::new(content, MarkdownFlavor::Standard);
442
443        for line in ctx.content_lines() {
444            // Should be able to access line_info fields
445            assert!(!line.line_info.in_front_matter);
446            assert!(!line.line_info.in_code_block);
447        }
448    }
449}