Skip to main content

rumdl_lib/lint_context/
types.rs

1use pulldown_cmark::LinkType;
2use std::borrow::Cow;
3
4/// Pre-computed information about a line
5#[derive(Debug, Clone)]
6pub struct LineInfo {
7    /// Byte offset where this line starts in the document
8    pub byte_offset: usize,
9    /// Length of the line in bytes (without newline)
10    pub byte_len: usize,
11    /// Number of bytes of leading whitespace (for substring extraction)
12    pub indent: usize,
13    /// Visual column width of leading whitespace (with proper tab expansion)
14    /// Per CommonMark, tabs expand to the next column that is a multiple of 4.
15    /// Use this for numeric comparisons like checking for indented code blocks (>= 4).
16    pub visual_indent: usize,
17    /// Whether the line is blank (empty or only whitespace)
18    pub is_blank: bool,
19    /// Whether this line is inside a code block
20    pub in_code_block: bool,
21    /// Whether this line is inside front matter
22    pub in_front_matter: bool,
23    /// Whether this line is inside an HTML block
24    pub in_html_block: bool,
25    /// Whether this line is inside an HTML comment
26    pub in_html_comment: bool,
27    /// List item information if this line starts a list item
28    /// Boxed to reduce LineInfo size: most lines are not list items
29    pub list_item: Option<Box<ListItemInfo>>,
30    /// Heading information if this line is a heading
31    /// Boxed to reduce LineInfo size: most lines are not headings
32    pub heading: Option<Box<HeadingInfo>>,
33    /// Blockquote information if this line is a blockquote
34    /// Boxed to reduce LineInfo size: most lines are not blockquotes
35    pub blockquote: Option<Box<BlockquoteInfo>>,
36    /// Whether this line is inside a mkdocstrings autodoc block
37    pub in_mkdocstrings: bool,
38    /// Whether this line is part of an ESM import/export block (MDX only)
39    pub in_esm_block: bool,
40    /// Whether this line is a continuation of a multi-line code span from a previous line
41    pub in_code_span_continuation: bool,
42    /// Whether this line is a horizontal rule (---, ***, ___, etc.)
43    /// Pre-computed for consistent detection across all rules
44    pub is_horizontal_rule: bool,
45    /// Whether this line is inside a math block ($$ ... $$)
46    pub in_math_block: bool,
47    /// Whether this line is inside a Pandoc/Quarto div block (::: ... :::)
48    pub in_pandoc_div: bool,
49    /// Whether this line is a Quarto/Pandoc div marker (opening ::: {.class} or closing :::)
50    /// Analogous to `is_horizontal_rule` — marks structural delimiters that are not paragraph text
51    pub is_div_marker: bool,
52    /// Whether this line contains or is inside a JSX expression (MDX only)
53    pub in_jsx_expression: bool,
54    /// Whether this line is inside an MDX comment {/* ... */} (MDX only)
55    pub in_mdx_comment: bool,
56    /// Whether this line is inside an MkDocs admonition block (!!! or ???)
57    pub in_admonition: bool,
58    /// Whether this line is inside an MkDocs content tab block (===)
59    pub in_content_tab: bool,
60    /// Whether this line is inside an HTML block with markdown attribute (MkDocs grid cards, etc.)
61    pub in_mkdocs_html_markdown: bool,
62    /// Whether this line is a definition list item (: definition)
63    pub in_definition_list: bool,
64    /// Whether this line is inside an Obsidian comment (%%...%% syntax, Obsidian flavor only)
65    pub in_obsidian_comment: bool,
66    /// Whether this line is inside a PyMdown Blocks region (/// ... ///, MkDocs flavor only)
67    pub in_pymdown_block: bool,
68    /// Whether this line is inside a kramdown extension block ({::comment}...{:/comment}, {::nomarkdown}...{:/nomarkdown})
69    pub in_kramdown_extension_block: bool,
70    /// Whether this line is a kramdown block IAL ({:.class #id}) or ALD ({:ref: .class})
71    pub is_kramdown_block_ial: bool,
72    /// Whether this line is inside a JSX component block (MDX only, e.g. `<Tabs>...</Tabs>`)
73    pub in_jsx_block: bool,
74    /// Whether this line is inside a footnote definition body (continuation lines)
75    pub in_footnote_definition: bool,
76}
77
78impl LineInfo {
79    /// Get the line content as a string slice from the source document
80    pub fn content<'a>(&self, source: &'a str) -> &'a str {
81        &source[self.byte_offset..self.byte_offset + self.byte_len]
82    }
83
84    /// Check if this line is inside MkDocs-specific indented content (admonitions, tabs, or markdown HTML).
85    /// This content uses 4-space indentation which pulldown-cmark would interpret as code blocks,
86    /// but in MkDocs flavor it's actually container content that should be preserved.
87    #[inline]
88    pub fn in_mkdocs_container(&self) -> bool {
89        self.in_admonition || self.in_content_tab || self.in_mkdocs_html_markdown
90    }
91
92    /// Whether this line could be part of a paragraph block (CommonMark `paragraph` token).
93    ///
94    /// Returns true for ordinary prose lines, including those inside blockquotes and list items.
95    /// Returns false for lines that belong to non-paragraph blocks: headings, code blocks,
96    /// HTML blocks, math blocks, horizontal rules, front matter, structural div markers, and
97    /// flavor-specific extension blocks. This is the per-line view; cross-line constructs like
98    /// setext underlines aren't visible here and need additional context to detect.
99    ///
100    /// Used by rules (e.g. MD009 strict mode) that need to distinguish "trailing whitespace
101    /// could produce a meaningful `<br>`" from "trailing whitespace is on a structural boundary."
102    #[inline]
103    pub fn is_paragraph_context(&self) -> bool {
104        !self.in_code_block
105            && !self.in_front_matter
106            && !self.in_html_block
107            && !self.in_html_comment
108            && !self.in_math_block
109            && !self.is_horizontal_rule
110            && !self.is_div_marker
111            && !self.in_pymdown_block
112            && !self.in_kramdown_extension_block
113            && !self.is_kramdown_block_ial
114            && self.heading.is_none()
115    }
116}
117
118/// Information about a list item
119#[derive(Debug, Clone)]
120pub struct ListItemInfo {
121    /// The marker used (*, -, +, or number with . or ))
122    pub marker: String,
123    /// Whether it's ordered (true) or unordered (false)
124    pub is_ordered: bool,
125    /// The number for ordered lists
126    pub number: Option<usize>,
127    /// Column where the marker starts (0-based)
128    pub marker_column: usize,
129    /// Column where content after marker starts
130    pub content_column: usize,
131}
132
133/// Heading style type
134#[derive(Debug, Clone, PartialEq)]
135pub enum HeadingStyle {
136    /// ATX style heading (# Heading)
137    ATX,
138    /// Setext style heading with = underline
139    Setext1,
140    /// Setext style heading with - underline
141    Setext2,
142}
143
144/// Parsed link information
145#[derive(Debug, Clone)]
146pub struct ParsedLink<'a> {
147    /// Line number (1-indexed)
148    pub line: usize,
149    /// Start column (0-indexed) in the line
150    pub start_col: usize,
151    /// End column (0-indexed) in the line
152    pub end_col: usize,
153    /// Byte offset in document
154    pub byte_offset: usize,
155    /// End byte offset in document
156    pub byte_end: usize,
157    /// Link text
158    pub text: Cow<'a, str>,
159    /// Link URL or reference
160    pub url: Cow<'a, str>,
161    /// Inline title (without surrounding delimiters), as produced by pulldown-cmark
162    /// after backslash-escape handling. `None` when the link has no title or is a
163    /// reference style without a matched definition.
164    pub title: Option<Cow<'a, str>>,
165    /// Whether this is a reference link `[text][ref]` vs inline `[text](url)`
166    pub is_reference: bool,
167    /// Reference ID for reference links
168    pub reference_id: Option<Cow<'a, str>>,
169    /// Link type from pulldown-cmark
170    pub link_type: LinkType,
171}
172
173/// Information about a broken link reported by pulldown-cmark
174#[derive(Debug, Clone)]
175pub struct BrokenLinkInfo {
176    /// The reference text that couldn't be resolved
177    pub reference: String,
178    /// Byte span in the source document
179    pub span: std::ops::Range<usize>,
180}
181
182/// Parsed footnote reference (e.g., `[^1]`, `[^note]`)
183#[derive(Debug, Clone)]
184pub struct FootnoteRef {
185    /// The footnote ID (without the ^ prefix)
186    pub id: String,
187    /// Line number (1-indexed)
188    pub line: usize,
189    /// Start byte offset in document
190    pub byte_offset: usize,
191}
192
193/// Parsed image information
194#[derive(Debug, Clone)]
195pub struct ParsedImage<'a> {
196    /// Line number (1-indexed)
197    pub line: usize,
198    /// Start column (0-indexed) in the line
199    pub start_col: usize,
200    /// End column (0-indexed) in the line
201    pub end_col: usize,
202    /// Byte offset in document
203    pub byte_offset: usize,
204    /// End byte offset in document
205    pub byte_end: usize,
206    /// Alt text
207    pub alt_text: Cow<'a, str>,
208    /// Image URL or reference
209    pub url: Cow<'a, str>,
210    /// Inline title (without surrounding delimiters), as produced by pulldown-cmark
211    /// after backslash-escape handling. `None` when the image has no title or is a
212    /// reference style without a matched definition.
213    pub title: Option<Cow<'a, str>>,
214    /// Whether this is a reference image ![alt][ref] vs inline ![alt](url)
215    pub is_reference: bool,
216    /// Reference ID for reference images
217    pub reference_id: Option<Cow<'a, str>>,
218    /// Link type from pulldown-cmark
219    pub link_type: LinkType,
220}
221
222/// Reference definition `[ref]: url "title"`
223#[derive(Debug, Clone)]
224pub struct ReferenceDef {
225    /// Line number (1-indexed)
226    pub line: usize,
227    /// Reference ID (normalized to lowercase)
228    pub id: String,
229    /// URL
230    pub url: String,
231    /// Optional title
232    pub title: Option<String>,
233    /// Byte offset where the reference definition starts
234    pub byte_offset: usize,
235    /// Byte offset where the reference definition ends
236    pub byte_end: usize,
237    /// Byte offset where the title starts (if present, includes quote)
238    pub title_byte_start: Option<usize>,
239    /// Byte offset where the title ends (if present, includes quote)
240    pub title_byte_end: Option<usize>,
241}
242
243/// Parsed code span information
244#[derive(Debug, Clone)]
245pub struct CodeSpan {
246    /// Line number where the code span starts (1-indexed)
247    pub line: usize,
248    /// Line number where the code span ends (1-indexed)
249    pub end_line: usize,
250    /// Start column (0-indexed) in the line
251    pub start_col: usize,
252    /// End column (0-indexed) in the line
253    pub end_col: usize,
254    /// Byte offset in document
255    pub byte_offset: usize,
256    /// End byte offset in document
257    pub byte_end: usize,
258    /// Number of backticks used (1, 2, 3, etc.)
259    pub backtick_count: usize,
260    /// Content inside the code span (without backticks)
261    pub content: String,
262}
263
264/// Parsed math span information (inline $...$ or display $$...$$)
265#[derive(Debug, Clone)]
266pub struct MathSpan {
267    /// Line number where the math span starts (1-indexed)
268    pub line: usize,
269    /// Line number where the math span ends (1-indexed)
270    pub end_line: usize,
271    /// Start column (0-indexed) in the line
272    pub start_col: usize,
273    /// End column (0-indexed) in the line
274    pub end_col: usize,
275    /// Byte offset in document
276    pub byte_offset: usize,
277    /// End byte offset in document
278    pub byte_end: usize,
279    /// Whether this is display math ($$...$$) vs inline ($...$)
280    pub is_display: bool,
281    /// Content inside the math delimiters
282    pub content: String,
283}
284
285/// Information about a heading
286#[derive(Debug, Clone)]
287pub struct HeadingInfo {
288    /// Heading level (1-6 for ATX, 1-2 for Setext)
289    pub level: u8,
290    /// Style of heading
291    pub style: HeadingStyle,
292    /// The heading marker (# characters or underline)
293    pub marker: String,
294    /// Column where the marker starts (0-based)
295    pub marker_column: usize,
296    /// Column where heading text starts
297    pub content_column: usize,
298    /// The heading text (without markers and without custom ID syntax)
299    pub text: String,
300    /// Custom header ID if present (e.g., from {#custom-id} syntax)
301    pub custom_id: Option<String>,
302    /// Original heading text including custom ID syntax
303    pub raw_text: String,
304    /// Whether it has a closing sequence (for ATX)
305    pub has_closing_sequence: bool,
306    /// The closing sequence if present
307    pub closing_sequence: String,
308    /// Whether this is a valid CommonMark heading (ATX headings require space after #)
309    /// False for malformed headings like `#NoSpace` that MD018 should flag
310    pub is_valid: bool,
311}
312
313/// A valid heading from a filtered iteration
314///
315/// Only includes headings that are CommonMark-compliant (have space after #).
316/// Hashtag-like patterns (`#tag`, `#123`) are excluded.
317#[derive(Debug, Clone)]
318pub struct ValidHeading<'a> {
319    /// The 1-indexed line number in the document
320    pub line_num: usize,
321    /// Reference to the heading information
322    pub heading: &'a HeadingInfo,
323    /// Reference to the full line info (for rules that need additional context)
324    pub line_info: &'a LineInfo,
325}
326
327/// Iterator over valid CommonMark headings in a document
328///
329/// Filters out malformed headings like `#NoSpace` that should be flagged by MD018
330/// but should not be processed by other heading rules.
331pub struct ValidHeadingsIter<'a> {
332    lines: &'a [LineInfo],
333    current_index: usize,
334}
335
336impl<'a> ValidHeadingsIter<'a> {
337    pub(super) fn new(lines: &'a [LineInfo]) -> Self {
338        Self {
339            lines,
340            current_index: 0,
341        }
342    }
343}
344
345impl<'a> Iterator for ValidHeadingsIter<'a> {
346    type Item = ValidHeading<'a>;
347
348    fn next(&mut self) -> Option<Self::Item> {
349        while self.current_index < self.lines.len() {
350            let idx = self.current_index;
351            self.current_index += 1;
352
353            let line_info = &self.lines[idx];
354            if let Some(heading) = line_info.heading.as_deref()
355                && heading.is_valid
356            {
357                return Some(ValidHeading {
358                    line_num: idx + 1, // Convert 0-indexed to 1-indexed
359                    heading,
360                    line_info,
361                });
362            }
363        }
364        None
365    }
366}
367
368/// Information about a blockquote line
369#[derive(Debug, Clone)]
370pub struct BlockquoteInfo {
371    /// Nesting level (1 for >, 2 for >>, etc.)
372    pub nesting_level: usize,
373    /// Column where the first > starts (0-based)
374    pub marker_column: usize,
375    /// The blockquote prefix (e.g., "> ", ">> ", etc.)
376    pub prefix: String,
377    /// Content after the blockquote marker(s)
378    pub content: String,
379    /// Whether the line has multiple spaces after the marker
380    pub has_multiple_spaces_after_marker: bool,
381}
382
383/// Information about a list block
384#[derive(Debug, Clone)]
385pub struct ListBlock {
386    /// Line number where the list starts (1-indexed)
387    pub start_line: usize,
388    /// Line number where the list ends (1-indexed)
389    pub end_line: usize,
390    /// Whether it's ordered or unordered
391    pub is_ordered: bool,
392    /// The consistent marker for unordered lists (if any)
393    pub marker: Option<String>,
394    /// Blockquote prefix for this list (empty if not in blockquote)
395    pub blockquote_prefix: String,
396    /// Lines that are list items within this block
397    pub item_lines: Vec<usize>,
398    /// Nesting level (0 for top-level lists)
399    pub nesting_level: usize,
400    /// Maximum marker width seen in this block (e.g., 3 for "1. ", 4 for "10. ")
401    pub max_marker_width: usize,
402}
403
404/// Character frequency data for fast content analysis
405#[derive(Debug, Clone, Default)]
406pub struct CharFrequency {
407    /// Count of # characters (headings)
408    pub hash_count: usize,
409    /// Count of * characters (emphasis, lists, horizontal rules)
410    pub asterisk_count: usize,
411    /// Count of _ characters (emphasis, horizontal rules)
412    pub underscore_count: usize,
413    /// Count of - characters (lists, horizontal rules, setext headings)
414    pub hyphen_count: usize,
415    /// Count of + characters (lists)
416    pub plus_count: usize,
417    /// Count of > characters (blockquotes)
418    pub gt_count: usize,
419    /// Count of | characters (tables)
420    pub pipe_count: usize,
421    /// Count of [ characters (links, images)
422    pub bracket_count: usize,
423    /// Count of ` characters (code spans, code blocks)
424    pub backtick_count: usize,
425    /// Count of < characters (HTML tags, autolinks)
426    pub lt_count: usize,
427    /// Count of ! characters (images)
428    pub exclamation_count: usize,
429    /// Count of newline characters
430    pub newline_count: usize,
431}
432
433/// Pre-parsed HTML tag information
434#[derive(Debug, Clone)]
435pub struct HtmlTag {
436    /// Line number (1-indexed)
437    pub line: usize,
438    /// Start column (0-indexed) in the line
439    pub start_col: usize,
440    /// End column (0-indexed) in the line
441    pub end_col: usize,
442    /// Byte offset in document
443    pub byte_offset: usize,
444    /// End byte offset in document
445    pub byte_end: usize,
446    /// Tag name (e.g., "div", "img", "br")
447    pub tag_name: String,
448    /// Whether it's a closing tag (`</tag>`)
449    pub is_closing: bool,
450    /// Whether it's self-closing (`<tag />`)
451    pub is_self_closing: bool,
452}
453
454/// Pre-parsed emphasis span information
455#[derive(Debug, Clone)]
456pub struct EmphasisSpan {
457    /// Line number (1-indexed)
458    pub line: usize,
459    /// Start column (0-indexed) in the line
460    pub start_col: usize,
461    /// End column (0-indexed) in the line
462    pub end_col: usize,
463    /// Byte offset in document
464    pub byte_offset: usize,
465    /// End byte offset in document
466    pub byte_end: usize,
467    /// Type of emphasis ('*' or '_')
468    pub marker: char,
469    /// Content inside the emphasis
470    pub content: String,
471}
472
473/// Pre-parsed table row information
474#[derive(Debug, Clone)]
475pub struct TableRow {
476    /// Line number (1-indexed)
477    pub line: usize,
478    /// Whether this is a separator row (contains only |, -, :, and spaces)
479    pub is_separator: bool,
480    /// Number of columns (pipe-separated cells)
481    pub column_count: usize,
482    /// Alignment info from separator row
483    pub column_alignments: Vec<String>, // "left", "center", "right", "none"
484}
485
486/// Pre-parsed bare URL information (not in links)
487#[derive(Debug, Clone)]
488pub struct BareUrl {
489    /// Line number (1-indexed)
490    pub line: usize,
491    /// Start column (0-indexed) in the line
492    pub start_col: usize,
493    /// End column (0-indexed) in the line
494    pub end_col: usize,
495    /// Byte offset in document
496    pub byte_offset: usize,
497    /// End byte offset in document
498    pub byte_end: usize,
499    /// The URL string
500    pub url: String,
501}
502
503/// A lazy continuation line detected by pulldown-cmark.
504///
505/// Lazy continuation occurs when text continues a list item paragraph but with less
506/// indentation than expected.
507#[derive(Debug, Clone)]
508pub struct LazyContLine {
509    /// 1-indexed line number
510    pub line_num: usize,
511    /// Expected indentation
512    pub expected_indent: usize,
513    /// Current indentation
514    pub current_indent: usize,
515    /// Blockquote nesting level
516    pub blockquote_level: usize,
517}
518
519/// Check if a line is a horizontal rule (---, ***, ___) per CommonMark spec.
520/// CommonMark rules for thematic breaks (horizontal rules):
521/// - May have 0-3 spaces of leading indentation (but NOT tabs)
522/// - Must have 3+ of the same character (-, *, or _)
523/// - May have spaces between characters
524/// - No other characters allowed
525pub fn is_horizontal_rule_line(line: &str) -> bool {
526    // CommonMark: HRs can have 0-3 spaces of leading indentation, not tabs
527    let leading_spaces = line.len() - line.trim_start_matches(' ').len();
528    if leading_spaces > 3 || line.starts_with('\t') {
529        return false;
530    }
531
532    is_horizontal_rule_content(line.trim())
533}
534
535/// Check if trimmed content matches horizontal rule pattern.
536/// Use `is_horizontal_rule_line` for full CommonMark compliance including indentation check.
537pub fn is_horizontal_rule_content(trimmed: &str) -> bool {
538    if trimmed.len() < 3 {
539        return false;
540    }
541
542    let mut chars = trimmed.chars();
543    let Some(first_char @ ('-' | '*' | '_')) = chars.next() else {
544        return false;
545    };
546
547    // Count occurrences of the rule character, rejecting non-whitespace interlopers
548    let mut count = 1; // Already matched the first character
549    for ch in chars {
550        if ch == first_char {
551            count += 1;
552        } else if ch != ' ' && ch != '\t' {
553            return false;
554        }
555    }
556    count >= 3
557}