Skip to main content

rumdl_lib/lint_context/
types.rs

1use pulldown_cmark::LinkType;
2use std::borrow::Cow;
3
4/// Pre-computed information about a line
5#[derive(Debug, Clone)]
6pub struct LineInfo {
7    /// Byte offset where this line starts in the document
8    pub byte_offset: usize,
9    /// Length of the line in bytes (without newline)
10    pub byte_len: usize,
11    /// Number of bytes of leading whitespace (for substring extraction)
12    pub indent: usize,
13    /// Visual column width of leading whitespace (with proper tab expansion)
14    /// Per CommonMark, tabs expand to the next column that is a multiple of 4.
15    /// Use this for numeric comparisons like checking for indented code blocks (>= 4).
16    pub visual_indent: usize,
17    /// Whether the line is blank (empty or only whitespace)
18    pub is_blank: bool,
19    /// Whether this line is inside a code block
20    pub in_code_block: bool,
21    /// Whether this line is inside front matter
22    pub in_front_matter: bool,
23    /// Whether this line is inside an HTML block
24    pub in_html_block: bool,
25    /// Whether this line is inside an HTML comment
26    pub in_html_comment: bool,
27    /// List item information if this line starts a list item
28    /// Boxed to reduce LineInfo size: most lines are not list items
29    pub list_item: Option<Box<ListItemInfo>>,
30    /// Heading information if this line is a heading
31    /// Boxed to reduce LineInfo size: most lines are not headings
32    pub heading: Option<Box<HeadingInfo>>,
33    /// Blockquote information if this line is a blockquote
34    /// Boxed to reduce LineInfo size: most lines are not blockquotes
35    pub blockquote: Option<Box<BlockquoteInfo>>,
36    /// Whether this line is inside a mkdocstrings autodoc block
37    pub in_mkdocstrings: bool,
38    /// Whether this line is part of an ESM import/export block (MDX only)
39    pub in_esm_block: bool,
40    /// Whether this line is a continuation of a multi-line code span from a previous line
41    pub in_code_span_continuation: bool,
42    /// Whether this line is a horizontal rule (---, ***, ___, etc.)
43    /// Pre-computed for consistent detection across all rules
44    pub is_horizontal_rule: bool,
45    /// Whether this line is inside a math block ($$ ... $$)
46    pub in_math_block: bool,
47    /// Whether this line is inside a Quarto div block (::: ... :::)
48    pub in_quarto_div: bool,
49    /// Whether this line is a Quarto/Pandoc div marker (opening ::: {.class} or closing :::)
50    /// Analogous to `is_horizontal_rule` — marks structural delimiters that are not paragraph text
51    pub is_div_marker: bool,
52    /// Whether this line contains or is inside a JSX expression (MDX only)
53    pub in_jsx_expression: bool,
54    /// Whether this line is inside an MDX comment {/* ... */} (MDX only)
55    pub in_mdx_comment: bool,
56    /// Whether this line is inside an MkDocs admonition block (!!! or ???)
57    pub in_admonition: bool,
58    /// Whether this line is inside an MkDocs content tab block (===)
59    pub in_content_tab: bool,
60    /// Whether this line is inside an HTML block with markdown attribute (MkDocs grid cards, etc.)
61    pub in_mkdocs_html_markdown: bool,
62    /// Whether this line is a definition list item (: definition)
63    pub in_definition_list: bool,
64    /// Whether this line is inside an Obsidian comment (%%...%% syntax, Obsidian flavor only)
65    pub in_obsidian_comment: bool,
66    /// Whether this line is inside a PyMdown Blocks region (/// ... ///, MkDocs flavor only)
67    pub in_pymdown_block: bool,
68    /// Whether this line is inside a kramdown extension block ({::comment}...{:/comment}, {::nomarkdown}...{:/nomarkdown})
69    pub in_kramdown_extension_block: bool,
70    /// Whether this line is a kramdown block IAL ({:.class #id}) or ALD ({:ref: .class})
71    pub is_kramdown_block_ial: bool,
72    /// Whether this line is inside a JSX component block (MDX only, e.g. `<Tabs>...</Tabs>`)
73    pub in_jsx_block: bool,
74    /// Whether this line is inside a footnote definition body (continuation lines)
75    pub in_footnote_definition: bool,
76}
77
78impl LineInfo {
79    /// Get the line content as a string slice from the source document
80    pub fn content<'a>(&self, source: &'a str) -> &'a str {
81        &source[self.byte_offset..self.byte_offset + self.byte_len]
82    }
83
84    /// Check if this line is inside MkDocs-specific indented content (admonitions, tabs, or markdown HTML).
85    /// This content uses 4-space indentation which pulldown-cmark would interpret as code blocks,
86    /// but in MkDocs flavor it's actually container content that should be preserved.
87    #[inline]
88    pub fn in_mkdocs_container(&self) -> bool {
89        self.in_admonition || self.in_content_tab || self.in_mkdocs_html_markdown
90    }
91}
92
93/// Information about a list item
94#[derive(Debug, Clone)]
95pub struct ListItemInfo {
96    /// The marker used (*, -, +, or number with . or ))
97    pub marker: String,
98    /// Whether it's ordered (true) or unordered (false)
99    pub is_ordered: bool,
100    /// The number for ordered lists
101    pub number: Option<usize>,
102    /// Column where the marker starts (0-based)
103    pub marker_column: usize,
104    /// Column where content after marker starts
105    pub content_column: usize,
106}
107
108/// Heading style type
109#[derive(Debug, Clone, PartialEq)]
110pub enum HeadingStyle {
111    /// ATX style heading (# Heading)
112    ATX,
113    /// Setext style heading with = underline
114    Setext1,
115    /// Setext style heading with - underline
116    Setext2,
117}
118
119/// Parsed link information
120#[derive(Debug, Clone)]
121pub struct ParsedLink<'a> {
122    /// Line number (1-indexed)
123    pub line: usize,
124    /// Start column (0-indexed) in the line
125    pub start_col: usize,
126    /// End column (0-indexed) in the line
127    pub end_col: usize,
128    /// Byte offset in document
129    pub byte_offset: usize,
130    /// End byte offset in document
131    pub byte_end: usize,
132    /// Link text
133    pub text: Cow<'a, str>,
134    /// Link URL or reference
135    pub url: Cow<'a, str>,
136    /// Inline title (without surrounding delimiters), as produced by pulldown-cmark
137    /// after backslash-escape handling. `None` when the link has no title or is a
138    /// reference style without a matched definition.
139    pub title: Option<Cow<'a, str>>,
140    /// Whether this is a reference link `[text][ref]` vs inline `[text](url)`
141    pub is_reference: bool,
142    /// Reference ID for reference links
143    pub reference_id: Option<Cow<'a, str>>,
144    /// Link type from pulldown-cmark
145    pub link_type: LinkType,
146}
147
148/// Information about a broken link reported by pulldown-cmark
149#[derive(Debug, Clone)]
150pub struct BrokenLinkInfo {
151    /// The reference text that couldn't be resolved
152    pub reference: String,
153    /// Byte span in the source document
154    pub span: std::ops::Range<usize>,
155}
156
157/// Parsed footnote reference (e.g., `[^1]`, `[^note]`)
158#[derive(Debug, Clone)]
159pub struct FootnoteRef {
160    /// The footnote ID (without the ^ prefix)
161    pub id: String,
162    /// Line number (1-indexed)
163    pub line: usize,
164    /// Start byte offset in document
165    pub byte_offset: usize,
166}
167
168/// Parsed image information
169#[derive(Debug, Clone)]
170pub struct ParsedImage<'a> {
171    /// Line number (1-indexed)
172    pub line: usize,
173    /// Start column (0-indexed) in the line
174    pub start_col: usize,
175    /// End column (0-indexed) in the line
176    pub end_col: usize,
177    /// Byte offset in document
178    pub byte_offset: usize,
179    /// End byte offset in document
180    pub byte_end: usize,
181    /// Alt text
182    pub alt_text: Cow<'a, str>,
183    /// Image URL or reference
184    pub url: Cow<'a, str>,
185    /// Inline title (without surrounding delimiters), as produced by pulldown-cmark
186    /// after backslash-escape handling. `None` when the image has no title or is a
187    /// reference style without a matched definition.
188    pub title: Option<Cow<'a, str>>,
189    /// Whether this is a reference image ![alt][ref] vs inline ![alt](url)
190    pub is_reference: bool,
191    /// Reference ID for reference images
192    pub reference_id: Option<Cow<'a, str>>,
193    /// Link type from pulldown-cmark
194    pub link_type: LinkType,
195}
196
197/// Reference definition `[ref]: url "title"`
198#[derive(Debug, Clone)]
199pub struct ReferenceDef {
200    /// Line number (1-indexed)
201    pub line: usize,
202    /// Reference ID (normalized to lowercase)
203    pub id: String,
204    /// URL
205    pub url: String,
206    /// Optional title
207    pub title: Option<String>,
208    /// Byte offset where the reference definition starts
209    pub byte_offset: usize,
210    /// Byte offset where the reference definition ends
211    pub byte_end: usize,
212    /// Byte offset where the title starts (if present, includes quote)
213    pub title_byte_start: Option<usize>,
214    /// Byte offset where the title ends (if present, includes quote)
215    pub title_byte_end: Option<usize>,
216}
217
218/// Parsed code span information
219#[derive(Debug, Clone)]
220pub struct CodeSpan {
221    /// Line number where the code span starts (1-indexed)
222    pub line: usize,
223    /// Line number where the code span ends (1-indexed)
224    pub end_line: usize,
225    /// Start column (0-indexed) in the line
226    pub start_col: usize,
227    /// End column (0-indexed) in the line
228    pub end_col: usize,
229    /// Byte offset in document
230    pub byte_offset: usize,
231    /// End byte offset in document
232    pub byte_end: usize,
233    /// Number of backticks used (1, 2, 3, etc.)
234    pub backtick_count: usize,
235    /// Content inside the code span (without backticks)
236    pub content: String,
237}
238
239/// Parsed math span information (inline $...$ or display $$...$$)
240#[derive(Debug, Clone)]
241pub struct MathSpan {
242    /// Line number where the math span starts (1-indexed)
243    pub line: usize,
244    /// Line number where the math span ends (1-indexed)
245    pub end_line: usize,
246    /// Start column (0-indexed) in the line
247    pub start_col: usize,
248    /// End column (0-indexed) in the line
249    pub end_col: usize,
250    /// Byte offset in document
251    pub byte_offset: usize,
252    /// End byte offset in document
253    pub byte_end: usize,
254    /// Whether this is display math ($$...$$) vs inline ($...$)
255    pub is_display: bool,
256    /// Content inside the math delimiters
257    pub content: String,
258}
259
260/// Information about a heading
261#[derive(Debug, Clone)]
262pub struct HeadingInfo {
263    /// Heading level (1-6 for ATX, 1-2 for Setext)
264    pub level: u8,
265    /// Style of heading
266    pub style: HeadingStyle,
267    /// The heading marker (# characters or underline)
268    pub marker: String,
269    /// Column where the marker starts (0-based)
270    pub marker_column: usize,
271    /// Column where heading text starts
272    pub content_column: usize,
273    /// The heading text (without markers and without custom ID syntax)
274    pub text: String,
275    /// Custom header ID if present (e.g., from {#custom-id} syntax)
276    pub custom_id: Option<String>,
277    /// Original heading text including custom ID syntax
278    pub raw_text: String,
279    /// Whether it has a closing sequence (for ATX)
280    pub has_closing_sequence: bool,
281    /// The closing sequence if present
282    pub closing_sequence: String,
283    /// Whether this is a valid CommonMark heading (ATX headings require space after #)
284    /// False for malformed headings like `#NoSpace` that MD018 should flag
285    pub is_valid: bool,
286}
287
288/// A valid heading from a filtered iteration
289///
290/// Only includes headings that are CommonMark-compliant (have space after #).
291/// Hashtag-like patterns (`#tag`, `#123`) are excluded.
292#[derive(Debug, Clone)]
293pub struct ValidHeading<'a> {
294    /// The 1-indexed line number in the document
295    pub line_num: usize,
296    /// Reference to the heading information
297    pub heading: &'a HeadingInfo,
298    /// Reference to the full line info (for rules that need additional context)
299    pub line_info: &'a LineInfo,
300}
301
302/// Iterator over valid CommonMark headings in a document
303///
304/// Filters out malformed headings like `#NoSpace` that should be flagged by MD018
305/// but should not be processed by other heading rules.
306pub struct ValidHeadingsIter<'a> {
307    lines: &'a [LineInfo],
308    current_index: usize,
309}
310
311impl<'a> ValidHeadingsIter<'a> {
312    pub(super) fn new(lines: &'a [LineInfo]) -> Self {
313        Self {
314            lines,
315            current_index: 0,
316        }
317    }
318}
319
320impl<'a> Iterator for ValidHeadingsIter<'a> {
321    type Item = ValidHeading<'a>;
322
323    fn next(&mut self) -> Option<Self::Item> {
324        while self.current_index < self.lines.len() {
325            let idx = self.current_index;
326            self.current_index += 1;
327
328            let line_info = &self.lines[idx];
329            if let Some(heading) = line_info.heading.as_deref()
330                && heading.is_valid
331            {
332                return Some(ValidHeading {
333                    line_num: idx + 1, // Convert 0-indexed to 1-indexed
334                    heading,
335                    line_info,
336                });
337            }
338        }
339        None
340    }
341}
342
343/// Information about a blockquote line
344#[derive(Debug, Clone)]
345pub struct BlockquoteInfo {
346    /// Nesting level (1 for >, 2 for >>, etc.)
347    pub nesting_level: usize,
348    /// Column where the first > starts (0-based)
349    pub marker_column: usize,
350    /// The blockquote prefix (e.g., "> ", ">> ", etc.)
351    pub prefix: String,
352    /// Content after the blockquote marker(s)
353    pub content: String,
354    /// Whether the line has multiple spaces after the marker
355    pub has_multiple_spaces_after_marker: bool,
356}
357
358/// Information about a list block
359#[derive(Debug, Clone)]
360pub struct ListBlock {
361    /// Line number where the list starts (1-indexed)
362    pub start_line: usize,
363    /// Line number where the list ends (1-indexed)
364    pub end_line: usize,
365    /// Whether it's ordered or unordered
366    pub is_ordered: bool,
367    /// The consistent marker for unordered lists (if any)
368    pub marker: Option<String>,
369    /// Blockquote prefix for this list (empty if not in blockquote)
370    pub blockquote_prefix: String,
371    /// Lines that are list items within this block
372    pub item_lines: Vec<usize>,
373    /// Nesting level (0 for top-level lists)
374    pub nesting_level: usize,
375    /// Maximum marker width seen in this block (e.g., 3 for "1. ", 4 for "10. ")
376    pub max_marker_width: usize,
377}
378
379/// Character frequency data for fast content analysis
380#[derive(Debug, Clone, Default)]
381pub struct CharFrequency {
382    /// Count of # characters (headings)
383    pub hash_count: usize,
384    /// Count of * characters (emphasis, lists, horizontal rules)
385    pub asterisk_count: usize,
386    /// Count of _ characters (emphasis, horizontal rules)
387    pub underscore_count: usize,
388    /// Count of - characters (lists, horizontal rules, setext headings)
389    pub hyphen_count: usize,
390    /// Count of + characters (lists)
391    pub plus_count: usize,
392    /// Count of > characters (blockquotes)
393    pub gt_count: usize,
394    /// Count of | characters (tables)
395    pub pipe_count: usize,
396    /// Count of [ characters (links, images)
397    pub bracket_count: usize,
398    /// Count of ` characters (code spans, code blocks)
399    pub backtick_count: usize,
400    /// Count of < characters (HTML tags, autolinks)
401    pub lt_count: usize,
402    /// Count of ! characters (images)
403    pub exclamation_count: usize,
404    /// Count of newline characters
405    pub newline_count: usize,
406}
407
408/// Pre-parsed HTML tag information
409#[derive(Debug, Clone)]
410pub struct HtmlTag {
411    /// Line number (1-indexed)
412    pub line: usize,
413    /// Start column (0-indexed) in the line
414    pub start_col: usize,
415    /// End column (0-indexed) in the line
416    pub end_col: usize,
417    /// Byte offset in document
418    pub byte_offset: usize,
419    /// End byte offset in document
420    pub byte_end: usize,
421    /// Tag name (e.g., "div", "img", "br")
422    pub tag_name: String,
423    /// Whether it's a closing tag (`</tag>`)
424    pub is_closing: bool,
425    /// Whether it's self-closing (`<tag />`)
426    pub is_self_closing: bool,
427}
428
429/// Pre-parsed emphasis span information
430#[derive(Debug, Clone)]
431pub struct EmphasisSpan {
432    /// Line number (1-indexed)
433    pub line: usize,
434    /// Start column (0-indexed) in the line
435    pub start_col: usize,
436    /// End column (0-indexed) in the line
437    pub end_col: usize,
438    /// Byte offset in document
439    pub byte_offset: usize,
440    /// End byte offset in document
441    pub byte_end: usize,
442    /// Type of emphasis ('*' or '_')
443    pub marker: char,
444    /// Content inside the emphasis
445    pub content: String,
446}
447
448/// Pre-parsed table row information
449#[derive(Debug, Clone)]
450pub struct TableRow {
451    /// Line number (1-indexed)
452    pub line: usize,
453    /// Whether this is a separator row (contains only |, -, :, and spaces)
454    pub is_separator: bool,
455    /// Number of columns (pipe-separated cells)
456    pub column_count: usize,
457    /// Alignment info from separator row
458    pub column_alignments: Vec<String>, // "left", "center", "right", "none"
459}
460
461/// Pre-parsed bare URL information (not in links)
462#[derive(Debug, Clone)]
463pub struct BareUrl {
464    /// Line number (1-indexed)
465    pub line: usize,
466    /// Start column (0-indexed) in the line
467    pub start_col: usize,
468    /// End column (0-indexed) in the line
469    pub end_col: usize,
470    /// Byte offset in document
471    pub byte_offset: usize,
472    /// End byte offset in document
473    pub byte_end: usize,
474    /// The URL string
475    pub url: String,
476}
477
478/// A lazy continuation line detected by pulldown-cmark.
479///
480/// Lazy continuation occurs when text continues a list item paragraph but with less
481/// indentation than expected.
482#[derive(Debug, Clone)]
483pub struct LazyContLine {
484    /// 1-indexed line number
485    pub line_num: usize,
486    /// Expected indentation
487    pub expected_indent: usize,
488    /// Current indentation
489    pub current_indent: usize,
490    /// Blockquote nesting level
491    pub blockquote_level: usize,
492}
493
494/// Check if a line is a horizontal rule (---, ***, ___) per CommonMark spec.
495/// CommonMark rules for thematic breaks (horizontal rules):
496/// - May have 0-3 spaces of leading indentation (but NOT tabs)
497/// - Must have 3+ of the same character (-, *, or _)
498/// - May have spaces between characters
499/// - No other characters allowed
500pub fn is_horizontal_rule_line(line: &str) -> bool {
501    // CommonMark: HRs can have 0-3 spaces of leading indentation, not tabs
502    let leading_spaces = line.len() - line.trim_start_matches(' ').len();
503    if leading_spaces > 3 || line.starts_with('\t') {
504        return false;
505    }
506
507    is_horizontal_rule_content(line.trim())
508}
509
510/// Check if trimmed content matches horizontal rule pattern.
511/// Use `is_horizontal_rule_line` for full CommonMark compliance including indentation check.
512pub fn is_horizontal_rule_content(trimmed: &str) -> bool {
513    if trimmed.len() < 3 {
514        return false;
515    }
516
517    let mut chars = trimmed.chars();
518    let Some(first_char @ ('-' | '*' | '_')) = chars.next() else {
519        return false;
520    };
521
522    // Count occurrences of the rule character, rejecting non-whitespace interlopers
523    let mut count = 1; // Already matched the first character
524    for ch in chars {
525        if ch == first_char {
526            count += 1;
527        } else if ch != ' ' && ch != '\t' {
528            return false;
529        }
530    }
531    count >= 3
532}