Skip to main content

rumdl_lib/lint_context/
types.rs

1use pulldown_cmark::LinkType;
2use std::borrow::Cow;
3
4/// Pre-computed information about a line
5#[derive(Debug, Clone)]
6pub struct LineInfo {
7    /// Byte offset where this line starts in the document
8    pub byte_offset: usize,
9    /// Length of the line in bytes (without newline)
10    pub byte_len: usize,
11    /// Number of bytes of leading whitespace (for substring extraction)
12    pub indent: usize,
13    /// Visual column width of leading whitespace (with proper tab expansion)
14    /// Per CommonMark, tabs expand to the next column that is a multiple of 4.
15    /// Use this for numeric comparisons like checking for indented code blocks (>= 4).
16    pub visual_indent: usize,
17    /// Whether the line is blank (empty or only whitespace)
18    pub is_blank: bool,
19    /// Whether this line is inside a code block
20    pub in_code_block: bool,
21    /// Whether this line is inside front matter
22    pub in_front_matter: bool,
23    /// Whether this line is inside an HTML block
24    pub in_html_block: bool,
25    /// Whether this line is inside an HTML comment
26    pub in_html_comment: bool,
27    /// List item information if this line starts a list item
28    /// Boxed to reduce LineInfo size: most lines are not list items
29    pub list_item: Option<Box<ListItemInfo>>,
30    /// Heading information if this line is a heading
31    /// Boxed to reduce LineInfo size: most lines are not headings
32    pub heading: Option<Box<HeadingInfo>>,
33    /// Blockquote information if this line is a blockquote
34    /// Boxed to reduce LineInfo size: most lines are not blockquotes
35    pub blockquote: Option<Box<BlockquoteInfo>>,
36    /// Whether this line is inside a mkdocstrings autodoc block
37    pub in_mkdocstrings: bool,
38    /// Whether this line is part of an ESM import/export block (MDX only)
39    pub in_esm_block: bool,
40    /// Whether this line is a continuation of a multi-line code span from a previous line
41    pub in_code_span_continuation: bool,
42    /// Whether this line is a horizontal rule (---, ***, ___, etc.)
43    /// Pre-computed for consistent detection across all rules
44    pub is_horizontal_rule: bool,
45    /// Whether this line is inside a math block ($$ ... $$)
46    pub in_math_block: bool,
47    /// Whether this line is inside a Pandoc/Quarto div block (::: ... :::)
48    pub in_pandoc_div: bool,
49    /// Whether this line is a Quarto/Pandoc div marker (opening ::: {.class} or closing :::)
50    /// Analogous to `is_horizontal_rule` — marks structural delimiters that are not paragraph text
51    pub is_div_marker: bool,
52    /// Whether this line contains or is inside a JSX expression (MDX only)
53    pub in_jsx_expression: bool,
54    /// Whether this line is inside an MDX comment {/* ... */} (MDX only)
55    pub in_mdx_comment: bool,
56    /// Whether this line is inside an MkDocs admonition block (!!! or ???)
57    pub in_admonition: bool,
58    /// Whether this line is inside an MkDocs content tab block (===)
59    pub in_content_tab: bool,
60    /// Whether this line is inside an HTML block with markdown attribute (MkDocs grid cards, etc.)
61    pub in_mkdocs_html_markdown: bool,
62    /// Whether this line is a definition list item (: definition)
63    pub in_definition_list: bool,
64    /// Whether this line is inside an Obsidian comment (%%...%% syntax, Obsidian flavor only)
65    pub in_obsidian_comment: bool,
66    /// Whether this line is inside a PyMdown Blocks region (/// ... ///, MkDocs flavor only)
67    pub in_pymdown_block: bool,
68    /// Whether this line is inside a kramdown extension block ({::comment}...{:/comment}, {::nomarkdown}...{:/nomarkdown})
69    pub in_kramdown_extension_block: bool,
70    /// Whether this line is a kramdown block IAL ({:.class #id}) or ALD ({:ref: .class})
71    pub is_kramdown_block_ial: bool,
72    /// Whether this line is inside a JSX component block (MDX only, e.g. `<Tabs>...</Tabs>`)
73    pub in_jsx_block: bool,
74    /// Whether this line is inside a footnote definition body (continuation lines)
75    pub in_footnote_definition: bool,
76    /// Whether this line is inside a MyST directive block (colon or backtick fence with `{name}`)
77    pub in_myst_directive: bool,
78    /// Whether this line is a MyST comment (`% comment`)
79    pub is_myst_comment: bool,
80}
81
82impl LineInfo {
83    /// Get the line content as a string slice from the source document
84    pub fn content<'a>(&self, source: &'a str) -> &'a str {
85        &source[self.byte_offset..self.byte_offset + self.byte_len]
86    }
87
88    /// Check if this line is inside MkDocs-specific indented content (admonitions, tabs, or markdown HTML).
89    /// This content uses 4-space indentation which pulldown-cmark would interpret as code blocks,
90    /// but in MkDocs flavor it's actually container content that should be preserved.
91    #[inline]
92    pub fn in_mkdocs_container(&self) -> bool {
93        self.in_admonition || self.in_content_tab || self.in_mkdocs_html_markdown
94    }
95
96    /// Whether this line could be part of a paragraph block (CommonMark `paragraph` token).
97    ///
98    /// Returns true for ordinary prose lines, including those inside blockquotes and list items.
99    /// Returns false for lines that belong to non-paragraph blocks: headings, code blocks,
100    /// HTML blocks, math blocks, horizontal rules, front matter, structural div markers, and
101    /// flavor-specific extension blocks. This is the per-line view; cross-line constructs like
102    /// setext underlines aren't visible here and need additional context to detect.
103    ///
104    /// Used by rules (e.g. MD009 strict mode) that need to distinguish "trailing whitespace
105    /// could produce a meaningful `<br>`" from "trailing whitespace is on a structural boundary."
106    #[inline]
107    pub fn is_paragraph_context(&self) -> bool {
108        !self.in_code_block
109            && !self.in_front_matter
110            && !self.in_html_block
111            && !self.in_html_comment
112            && !self.in_math_block
113            && !self.is_horizontal_rule
114            && !self.is_div_marker
115            && !self.in_pymdown_block
116            && !self.in_kramdown_extension_block
117            && !self.is_kramdown_block_ial
118            && !self.is_myst_comment
119            && self.heading.is_none()
120    }
121}
122
123/// Information about a list item
124#[derive(Debug, Clone)]
125pub struct ListItemInfo {
126    /// The marker used (*, -, +, or number with . or ))
127    pub marker: String,
128    /// Whether it's ordered (true) or unordered (false)
129    pub is_ordered: bool,
130    /// The number for ordered lists
131    pub number: Option<usize>,
132    /// Column where the marker starts (0-based)
133    pub marker_column: usize,
134    /// Column where content after marker starts
135    pub content_column: usize,
136}
137
138/// Heading style type
139#[derive(Debug, Clone, PartialEq)]
140pub enum HeadingStyle {
141    /// ATX style heading (# Heading)
142    ATX,
143    /// Setext style heading with = underline
144    Setext1,
145    /// Setext style heading with - underline
146    Setext2,
147}
148
149/// Parsed link information
150#[derive(Debug, Clone)]
151pub struct ParsedLink<'a> {
152    /// Line number (1-indexed)
153    pub line: usize,
154    /// Start column (0-indexed) in the line
155    pub start_col: usize,
156    /// End column (0-indexed) in the line
157    pub end_col: usize,
158    /// Byte offset in document
159    pub byte_offset: usize,
160    /// End byte offset in document
161    pub byte_end: usize,
162    /// Link text
163    pub text: Cow<'a, str>,
164    /// Link URL or reference
165    pub url: Cow<'a, str>,
166    /// Inline title (without surrounding delimiters), as produced by pulldown-cmark
167    /// after backslash-escape handling. `None` when the link has no title or is a
168    /// reference style without a matched definition.
169    pub title: Option<Cow<'a, str>>,
170    /// Whether this is a reference link `[text][ref]` vs inline `[text](url)`
171    pub is_reference: bool,
172    /// Reference ID for reference links
173    pub reference_id: Option<Cow<'a, str>>,
174    /// Link type from pulldown-cmark
175    pub link_type: LinkType,
176}
177
178/// Information about a broken link reported by pulldown-cmark
179#[derive(Debug, Clone)]
180pub struct BrokenLinkInfo {
181    /// The reference text that couldn't be resolved
182    pub reference: String,
183    /// Byte span in the source document
184    pub span: std::ops::Range<usize>,
185}
186
187/// Parsed footnote reference (e.g., `[^1]`, `[^note]`)
188#[derive(Debug, Clone)]
189pub struct FootnoteRef {
190    /// The footnote ID (without the ^ prefix)
191    pub id: String,
192    /// Line number (1-indexed)
193    pub line: usize,
194    /// Start byte offset in document
195    pub byte_offset: usize,
196}
197
198/// Parsed image information
199#[derive(Debug, Clone)]
200pub struct ParsedImage<'a> {
201    /// Line number (1-indexed)
202    pub line: usize,
203    /// Start column (0-indexed) in the line
204    pub start_col: usize,
205    /// End column (0-indexed) in the line
206    pub end_col: usize,
207    /// Byte offset in document
208    pub byte_offset: usize,
209    /// End byte offset in document
210    pub byte_end: usize,
211    /// Alt text
212    pub alt_text: Cow<'a, str>,
213    /// Image URL or reference
214    pub url: Cow<'a, str>,
215    /// Inline title (without surrounding delimiters), as produced by pulldown-cmark
216    /// after backslash-escape handling. `None` when the image has no title or is a
217    /// reference style without a matched definition.
218    pub title: Option<Cow<'a, str>>,
219    /// Whether this is a reference image ![alt][ref] vs inline ![alt](url)
220    pub is_reference: bool,
221    /// Reference ID for reference images
222    pub reference_id: Option<Cow<'a, str>>,
223    /// Link type from pulldown-cmark
224    pub link_type: LinkType,
225}
226
227/// Reference definition `[ref]: url "title"`
228#[derive(Debug, Clone)]
229pub struct ReferenceDef {
230    /// Line number (1-indexed)
231    pub line: usize,
232    /// Reference ID (normalized to lowercase)
233    pub id: String,
234    /// URL
235    pub url: String,
236    /// Optional title
237    pub title: Option<String>,
238    /// Byte offset where the reference definition starts
239    pub byte_offset: usize,
240    /// Byte offset where the reference definition ends
241    pub byte_end: usize,
242    /// Byte offset where the title starts (if present, includes quote)
243    pub title_byte_start: Option<usize>,
244    /// Byte offset where the title ends (if present, includes quote)
245    pub title_byte_end: Option<usize>,
246}
247
248/// Parsed code span information
249#[derive(Debug, Clone)]
250pub struct CodeSpan {
251    /// Line number where the code span starts (1-indexed)
252    pub line: usize,
253    /// Line number where the code span ends (1-indexed)
254    pub end_line: usize,
255    /// Start column (0-indexed) in the line
256    pub start_col: usize,
257    /// End column (0-indexed) in the line
258    pub end_col: usize,
259    /// Byte offset in document
260    pub byte_offset: usize,
261    /// End byte offset in document
262    pub byte_end: usize,
263    /// Number of backticks used (1, 2, 3, etc.)
264    pub backtick_count: usize,
265    /// Content inside the code span (without backticks)
266    pub content: String,
267}
268
269/// Parsed math span information (inline $...$ or display $$...$$)
270#[derive(Debug, Clone)]
271pub struct MathSpan {
272    /// Line number where the math span starts (1-indexed)
273    pub line: usize,
274    /// Line number where the math span ends (1-indexed)
275    pub end_line: usize,
276    /// Start column (0-indexed) in the line
277    pub start_col: usize,
278    /// End column (0-indexed) in the line
279    pub end_col: usize,
280    /// Byte offset in document
281    pub byte_offset: usize,
282    /// End byte offset in document
283    pub byte_end: usize,
284    /// Whether this is display math ($$...$$) vs inline ($...$)
285    pub is_display: bool,
286    /// Content inside the math delimiters
287    pub content: String,
288}
289
290/// Information about a heading
291#[derive(Debug, Clone)]
292pub struct HeadingInfo {
293    /// Heading level (1-6 for ATX, 1-2 for Setext)
294    pub level: u8,
295    /// Style of heading
296    pub style: HeadingStyle,
297    /// The heading marker (# characters or underline)
298    pub marker: String,
299    /// Column where the marker starts (0-based)
300    pub marker_column: usize,
301    /// Column where heading text starts
302    pub content_column: usize,
303    /// The heading text (without markers and without custom ID syntax)
304    pub text: String,
305    /// Custom header ID if present (e.g., from {#custom-id} syntax)
306    pub custom_id: Option<String>,
307    /// Original heading text including custom ID syntax
308    pub raw_text: String,
309    /// Whether it has a closing sequence (for ATX)
310    pub has_closing_sequence: bool,
311    /// The closing sequence if present
312    pub closing_sequence: String,
313    /// Whether this is a valid CommonMark heading (ATX headings require space after #)
314    /// False for malformed headings like `#NoSpace` that MD018 should flag
315    pub is_valid: bool,
316}
317
318/// A valid heading from a filtered iteration
319///
320/// Only includes headings that are CommonMark-compliant (have space after #).
321/// Hashtag-like patterns (`#tag`, `#123`) are excluded.
322#[derive(Debug, Clone)]
323pub struct ValidHeading<'a> {
324    /// The 1-indexed line number in the document
325    pub line_num: usize,
326    /// Reference to the heading information
327    pub heading: &'a HeadingInfo,
328    /// Reference to the full line info (for rules that need additional context)
329    pub line_info: &'a LineInfo,
330}
331
332/// Iterator over valid CommonMark headings in a document
333///
334/// Filters out malformed headings like `#NoSpace` that should be flagged by MD018
335/// but should not be processed by other heading rules.
336pub struct ValidHeadingsIter<'a> {
337    lines: &'a [LineInfo],
338    current_index: usize,
339}
340
341impl<'a> ValidHeadingsIter<'a> {
342    pub(super) fn new(lines: &'a [LineInfo]) -> Self {
343        Self {
344            lines,
345            current_index: 0,
346        }
347    }
348}
349
350impl<'a> Iterator for ValidHeadingsIter<'a> {
351    type Item = ValidHeading<'a>;
352
353    fn next(&mut self) -> Option<Self::Item> {
354        while self.current_index < self.lines.len() {
355            let idx = self.current_index;
356            self.current_index += 1;
357
358            let line_info = &self.lines[idx];
359            if let Some(heading) = line_info.heading.as_deref()
360                && heading.is_valid
361            {
362                return Some(ValidHeading {
363                    line_num: idx + 1, // Convert 0-indexed to 1-indexed
364                    heading,
365                    line_info,
366                });
367            }
368        }
369        None
370    }
371}
372
373/// Information about a blockquote line
374#[derive(Debug, Clone)]
375pub struct BlockquoteInfo {
376    /// Nesting level (1 for >, 2 for >>, etc.)
377    pub nesting_level: usize,
378    /// Column where the first > starts (0-based)
379    pub marker_column: usize,
380    /// The blockquote prefix (e.g., "> ", ">> ", etc.)
381    pub prefix: String,
382    /// Content after the blockquote marker(s)
383    pub content: String,
384    /// Whether the line has multiple spaces after the marker
385    pub has_multiple_spaces_after_marker: bool,
386}
387
388/// Information about a list block
389#[derive(Debug, Clone)]
390pub struct ListBlock {
391    /// Line number where the list starts (1-indexed)
392    pub start_line: usize,
393    /// Line number where the list ends (1-indexed)
394    pub end_line: usize,
395    /// Whether it's ordered or unordered
396    pub is_ordered: bool,
397    /// The consistent marker for unordered lists (if any)
398    pub marker: Option<String>,
399    /// Blockquote prefix for this list (empty if not in blockquote)
400    pub blockquote_prefix: String,
401    /// Lines that are list items within this block
402    pub item_lines: Vec<usize>,
403    /// Nesting level (0 for top-level lists)
404    pub nesting_level: usize,
405    /// Maximum marker width seen in this block (e.g., 3 for "1. ", 4 for "10. ")
406    pub max_marker_width: usize,
407}
408
409/// Character frequency data for fast content analysis
410#[derive(Debug, Clone, Default)]
411pub struct CharFrequency {
412    /// Count of # characters (headings)
413    pub hash_count: usize,
414    /// Count of * characters (emphasis, lists, horizontal rules)
415    pub asterisk_count: usize,
416    /// Count of _ characters (emphasis, horizontal rules)
417    pub underscore_count: usize,
418    /// Count of - characters (lists, horizontal rules, setext headings)
419    pub hyphen_count: usize,
420    /// Count of + characters (lists)
421    pub plus_count: usize,
422    /// Count of > characters (blockquotes)
423    pub gt_count: usize,
424    /// Count of | characters (tables)
425    pub pipe_count: usize,
426    /// Count of [ characters (links, images)
427    pub bracket_count: usize,
428    /// Count of ` characters (code spans, code blocks)
429    pub backtick_count: usize,
430    /// Count of < characters (HTML tags, autolinks)
431    pub lt_count: usize,
432    /// Count of ! characters (images)
433    pub exclamation_count: usize,
434    /// Count of newline characters
435    pub newline_count: usize,
436}
437
438/// Pre-parsed HTML tag information
439#[derive(Debug, Clone)]
440pub struct HtmlTag {
441    /// Line number (1-indexed)
442    pub line: usize,
443    /// Start column (0-indexed) in the line
444    pub start_col: usize,
445    /// End column (0-indexed) in the line
446    pub end_col: usize,
447    /// Byte offset in document
448    pub byte_offset: usize,
449    /// End byte offset in document
450    pub byte_end: usize,
451    /// Tag name (e.g., "div", "img", "br")
452    pub tag_name: String,
453    /// Whether it's a closing tag (`</tag>`)
454    pub is_closing: bool,
455    /// Whether it's self-closing (`<tag />`)
456    pub is_self_closing: bool,
457}
458
459/// Pre-parsed emphasis span information
460#[derive(Debug, Clone)]
461pub struct EmphasisSpan {
462    /// Line number (1-indexed)
463    pub line: usize,
464    /// Start column (0-indexed) in the line
465    pub start_col: usize,
466    /// End column (0-indexed) in the line
467    pub end_col: usize,
468    /// Byte offset in document
469    pub byte_offset: usize,
470    /// End byte offset in document
471    pub byte_end: usize,
472    /// Type of emphasis ('*' or '_')
473    pub marker: char,
474    /// Content inside the emphasis
475    pub content: String,
476}
477
478/// Pre-parsed table row information
479#[derive(Debug, Clone)]
480pub struct TableRow {
481    /// Line number (1-indexed)
482    pub line: usize,
483    /// Whether this is a separator row (contains only |, -, :, and spaces)
484    pub is_separator: bool,
485    /// Number of columns (pipe-separated cells)
486    pub column_count: usize,
487    /// Alignment info from separator row
488    pub column_alignments: Vec<String>, // "left", "center", "right", "none"
489}
490
491/// Pre-parsed bare URL information (not in links)
492#[derive(Debug, Clone)]
493pub struct BareUrl {
494    /// Line number (1-indexed)
495    pub line: usize,
496    /// Start column (0-indexed) in the line
497    pub start_col: usize,
498    /// End column (0-indexed) in the line
499    pub end_col: usize,
500    /// Byte offset in document
501    pub byte_offset: usize,
502    /// End byte offset in document
503    pub byte_end: usize,
504    /// The URL string
505    pub url: String,
506}
507
508/// A lazy continuation line detected by pulldown-cmark.
509///
510/// Lazy continuation occurs when text continues a list item paragraph but with less
511/// indentation than expected.
512#[derive(Debug, Clone)]
513pub struct LazyContLine {
514    /// 1-indexed line number
515    pub line_num: usize,
516    /// Expected indentation
517    pub expected_indent: usize,
518    /// Current indentation
519    pub current_indent: usize,
520    /// Blockquote nesting level
521    pub blockquote_level: usize,
522}
523
524/// Check if a line is a horizontal rule (---, ***, ___) per CommonMark spec.
525/// CommonMark rules for thematic breaks (horizontal rules):
526/// - May have 0-3 spaces of leading indentation (but NOT tabs)
527/// - Must have 3+ of the same character (-, *, or _)
528/// - May have spaces between characters
529/// - No other characters allowed
530pub fn is_horizontal_rule_line(line: &str) -> bool {
531    // CommonMark: HRs can have 0-3 spaces of leading indentation, not tabs
532    let leading_spaces = line.len() - line.trim_start_matches(' ').len();
533    if leading_spaces > 3 || line.starts_with('\t') {
534        return false;
535    }
536
537    is_horizontal_rule_content(line.trim())
538}
539
540/// Check if trimmed content matches horizontal rule pattern.
541/// Use `is_horizontal_rule_line` for full CommonMark compliance including indentation check.
542pub fn is_horizontal_rule_content(trimmed: &str) -> bool {
543    if trimmed.len() < 3 {
544        return false;
545    }
546
547    let mut chars = trimmed.chars();
548    let Some(first_char @ ('-' | '*' | '_')) = chars.next() else {
549        return false;
550    };
551
552    // Count occurrences of the rule character, rejecting non-whitespace interlopers
553    let mut count = 1; // Already matched the first character
554    for ch in chars {
555        if ch == first_char {
556            count += 1;
557        } else if ch != ' ' && ch != '\t' {
558            return false;
559        }
560    }
561    count >= 3
562}