Skip to main content

rumdl_lib/lint_context/
types.rs

1use pulldown_cmark::LinkType;
2use std::borrow::Cow;
3
4/// Pre-computed information about a line
5#[derive(Debug, Clone)]
6pub struct LineInfo {
7    /// Byte offset where this line starts in the document
8    pub byte_offset: usize,
9    /// Length of the line in bytes (without newline)
10    pub byte_len: usize,
11    /// Number of bytes of leading whitespace (for substring extraction)
12    pub indent: usize,
13    /// Visual column width of leading whitespace (with proper tab expansion)
14    /// Per CommonMark, tabs expand to the next column that is a multiple of 4.
15    /// Use this for numeric comparisons like checking for indented code blocks (>= 4).
16    pub visual_indent: usize,
17    /// Whether the line is blank (empty or only whitespace)
18    pub is_blank: bool,
19    /// Whether this line is inside a code block
20    pub in_code_block: bool,
21    /// Whether this line is inside front matter
22    pub in_front_matter: bool,
23    /// Whether this line is inside an HTML block
24    pub in_html_block: bool,
25    /// Whether this line is inside an HTML comment
26    pub in_html_comment: bool,
27    /// List item information if this line starts a list item
28    /// Boxed to reduce LineInfo size: most lines are not list items
29    pub list_item: Option<Box<ListItemInfo>>,
30    /// Heading information if this line is a heading
31    /// Boxed to reduce LineInfo size: most lines are not headings
32    pub heading: Option<Box<HeadingInfo>>,
33    /// Blockquote information if this line is a blockquote
34    /// Boxed to reduce LineInfo size: most lines are not blockquotes
35    pub blockquote: Option<Box<BlockquoteInfo>>,
36    /// Whether this line is inside a mkdocstrings autodoc block
37    pub in_mkdocstrings: bool,
38    /// Whether this line is part of an ESM import/export block (MDX only)
39    pub in_esm_block: bool,
40    /// Whether this line is a continuation of a multi-line code span from a previous line
41    pub in_code_span_continuation: bool,
42    /// Whether this line is a horizontal rule (---, ***, ___, etc.)
43    /// Pre-computed for consistent detection across all rules
44    pub is_horizontal_rule: bool,
45    /// Whether this line is inside a math block ($$ ... $$)
46    pub in_math_block: bool,
47    /// Whether this line is inside a Quarto div block (::: ... :::)
48    pub in_quarto_div: bool,
49    /// Whether this line is a Quarto/Pandoc div marker (opening ::: {.class} or closing :::)
50    /// Analogous to `is_horizontal_rule` — marks structural delimiters that are not paragraph text
51    pub is_div_marker: bool,
52    /// Whether this line contains or is inside a JSX expression (MDX only)
53    pub in_jsx_expression: bool,
54    /// Whether this line is inside an MDX comment {/* ... */} (MDX only)
55    pub in_mdx_comment: bool,
56    /// Whether this line is inside a JSX component (MDX only)
57    pub in_jsx_component: bool,
58    /// Whether this line is inside a JSX fragment (MDX only)
59    pub in_jsx_fragment: bool,
60    /// Whether this line is inside an MkDocs admonition block (!!! or ???)
61    pub in_admonition: bool,
62    /// Whether this line is inside an MkDocs content tab block (===)
63    pub in_content_tab: bool,
64    /// Whether this line is inside an HTML block with markdown attribute (MkDocs grid cards, etc.)
65    pub in_mkdocs_html_markdown: bool,
66    /// Whether this line is a definition list item (: definition)
67    pub in_definition_list: bool,
68    /// Whether this line is inside an Obsidian comment (%%...%% syntax, Obsidian flavor only)
69    pub in_obsidian_comment: bool,
70    /// Whether this line is inside a PyMdown Blocks region (/// ... ///, MkDocs flavor only)
71    pub in_pymdown_block: bool,
72}
73
74impl LineInfo {
75    /// Get the line content as a string slice from the source document
76    pub fn content<'a>(&self, source: &'a str) -> &'a str {
77        &source[self.byte_offset..self.byte_offset + self.byte_len]
78    }
79
80    /// Check if this line is inside MkDocs-specific indented content (admonitions, tabs, or markdown HTML).
81    /// This content uses 4-space indentation which pulldown-cmark would interpret as code blocks,
82    /// but in MkDocs flavor it's actually container content that should be preserved.
83    #[inline]
84    pub fn in_mkdocs_container(&self) -> bool {
85        self.in_admonition || self.in_content_tab || self.in_mkdocs_html_markdown
86    }
87}
88
89/// Information about a list item
90#[derive(Debug, Clone)]
91pub struct ListItemInfo {
92    /// The marker used (*, -, +, or number with . or ))
93    pub marker: String,
94    /// Whether it's ordered (true) or unordered (false)
95    pub is_ordered: bool,
96    /// The number for ordered lists
97    pub number: Option<usize>,
98    /// Column where the marker starts (0-based)
99    pub marker_column: usize,
100    /// Column where content after marker starts
101    pub content_column: usize,
102}
103
104/// Heading style type
105#[derive(Debug, Clone, PartialEq)]
106pub enum HeadingStyle {
107    /// ATX style heading (# Heading)
108    ATX,
109    /// Setext style heading with = underline
110    Setext1,
111    /// Setext style heading with - underline
112    Setext2,
113}
114
115/// Parsed link information
116#[derive(Debug, Clone)]
117pub struct ParsedLink<'a> {
118    /// Line number (1-indexed)
119    pub line: usize,
120    /// Start column (0-indexed) in the line
121    pub start_col: usize,
122    /// End column (0-indexed) in the line
123    pub end_col: usize,
124    /// Byte offset in document
125    pub byte_offset: usize,
126    /// End byte offset in document
127    pub byte_end: usize,
128    /// Link text
129    pub text: Cow<'a, str>,
130    /// Link URL or reference
131    pub url: Cow<'a, str>,
132    /// Whether this is a reference link `[text][ref]` vs inline `[text](url)`
133    pub is_reference: bool,
134    /// Reference ID for reference links
135    pub reference_id: Option<Cow<'a, str>>,
136    /// Link type from pulldown-cmark
137    pub link_type: LinkType,
138}
139
140/// Information about a broken link reported by pulldown-cmark
141#[derive(Debug, Clone)]
142pub struct BrokenLinkInfo {
143    /// The reference text that couldn't be resolved
144    pub reference: String,
145    /// Byte span in the source document
146    pub span: std::ops::Range<usize>,
147}
148
149/// Parsed footnote reference (e.g., `[^1]`, `[^note]`)
150#[derive(Debug, Clone)]
151pub struct FootnoteRef {
152    /// The footnote ID (without the ^ prefix)
153    pub id: String,
154    /// Line number (1-indexed)
155    pub line: usize,
156    /// Start byte offset in document
157    pub byte_offset: usize,
158    /// End byte offset in document
159    pub byte_end: usize,
160}
161
162/// Parsed image information
163#[derive(Debug, Clone)]
164pub struct ParsedImage<'a> {
165    /// Line number (1-indexed)
166    pub line: usize,
167    /// Start column (0-indexed) in the line
168    pub start_col: usize,
169    /// End column (0-indexed) in the line
170    pub end_col: usize,
171    /// Byte offset in document
172    pub byte_offset: usize,
173    /// End byte offset in document
174    pub byte_end: usize,
175    /// Alt text
176    pub alt_text: Cow<'a, str>,
177    /// Image URL or reference
178    pub url: Cow<'a, str>,
179    /// Whether this is a reference image ![alt][ref] vs inline ![alt](url)
180    pub is_reference: bool,
181    /// Reference ID for reference images
182    pub reference_id: Option<Cow<'a, str>>,
183    /// Link type from pulldown-cmark
184    pub link_type: LinkType,
185}
186
187/// Reference definition `[ref]: url "title"`
188#[derive(Debug, Clone)]
189pub struct ReferenceDef {
190    /// Line number (1-indexed)
191    pub line: usize,
192    /// Reference ID (normalized to lowercase)
193    pub id: String,
194    /// URL
195    pub url: String,
196    /// Optional title
197    pub title: Option<String>,
198    /// Byte offset where the reference definition starts
199    pub byte_offset: usize,
200    /// Byte offset where the reference definition ends
201    pub byte_end: usize,
202    /// Byte offset where the title starts (if present, includes quote)
203    pub title_byte_start: Option<usize>,
204    /// Byte offset where the title ends (if present, includes quote)
205    pub title_byte_end: Option<usize>,
206}
207
208/// Parsed code span information
209#[derive(Debug, Clone)]
210pub struct CodeSpan {
211    /// Line number where the code span starts (1-indexed)
212    pub line: usize,
213    /// Line number where the code span ends (1-indexed)
214    pub end_line: usize,
215    /// Start column (0-indexed) in the line
216    pub start_col: usize,
217    /// End column (0-indexed) in the line
218    pub end_col: usize,
219    /// Byte offset in document
220    pub byte_offset: usize,
221    /// End byte offset in document
222    pub byte_end: usize,
223    /// Number of backticks used (1, 2, 3, etc.)
224    pub backtick_count: usize,
225    /// Content inside the code span (without backticks)
226    pub content: String,
227}
228
229/// Parsed math span information (inline $...$ or display $$...$$)
230#[derive(Debug, Clone)]
231pub struct MathSpan {
232    /// Line number where the math span starts (1-indexed)
233    pub line: usize,
234    /// Line number where the math span ends (1-indexed)
235    pub end_line: usize,
236    /// Start column (0-indexed) in the line
237    pub start_col: usize,
238    /// End column (0-indexed) in the line
239    pub end_col: usize,
240    /// Byte offset in document
241    pub byte_offset: usize,
242    /// End byte offset in document
243    pub byte_end: usize,
244    /// Whether this is display math ($$...$$) vs inline ($...$)
245    pub is_display: bool,
246    /// Content inside the math delimiters
247    pub content: String,
248}
249
250/// Information about a heading
251#[derive(Debug, Clone)]
252pub struct HeadingInfo {
253    /// Heading level (1-6 for ATX, 1-2 for Setext)
254    pub level: u8,
255    /// Style of heading
256    pub style: HeadingStyle,
257    /// The heading marker (# characters or underline)
258    pub marker: String,
259    /// Column where the marker starts (0-based)
260    pub marker_column: usize,
261    /// Column where heading text starts
262    pub content_column: usize,
263    /// The heading text (without markers and without custom ID syntax)
264    pub text: String,
265    /// Custom header ID if present (e.g., from {#custom-id} syntax)
266    pub custom_id: Option<String>,
267    /// Original heading text including custom ID syntax
268    pub raw_text: String,
269    /// Whether it has a closing sequence (for ATX)
270    pub has_closing_sequence: bool,
271    /// The closing sequence if present
272    pub closing_sequence: String,
273    /// Whether this is a valid CommonMark heading (ATX headings require space after #)
274    /// False for malformed headings like `#NoSpace` that MD018 should flag
275    pub is_valid: bool,
276}
277
278/// A valid heading from a filtered iteration
279///
280/// Only includes headings that are CommonMark-compliant (have space after #).
281/// Hashtag-like patterns (`#tag`, `#123`) are excluded.
282#[derive(Debug, Clone)]
283pub struct ValidHeading<'a> {
284    /// The 1-indexed line number in the document
285    pub line_num: usize,
286    /// Reference to the heading information
287    pub heading: &'a HeadingInfo,
288    /// Reference to the full line info (for rules that need additional context)
289    pub line_info: &'a LineInfo,
290}
291
292/// Iterator over valid CommonMark headings in a document
293///
294/// Filters out malformed headings like `#NoSpace` that should be flagged by MD018
295/// but should not be processed by other heading rules.
296pub struct ValidHeadingsIter<'a> {
297    lines: &'a [LineInfo],
298    current_index: usize,
299}
300
301impl<'a> ValidHeadingsIter<'a> {
302    pub(super) fn new(lines: &'a [LineInfo]) -> Self {
303        Self {
304            lines,
305            current_index: 0,
306        }
307    }
308}
309
310impl<'a> Iterator for ValidHeadingsIter<'a> {
311    type Item = ValidHeading<'a>;
312
313    fn next(&mut self) -> Option<Self::Item> {
314        while self.current_index < self.lines.len() {
315            let idx = self.current_index;
316            self.current_index += 1;
317
318            let line_info = &self.lines[idx];
319            if let Some(heading) = line_info.heading.as_deref()
320                && heading.is_valid
321            {
322                return Some(ValidHeading {
323                    line_num: idx + 1, // Convert 0-indexed to 1-indexed
324                    heading,
325                    line_info,
326                });
327            }
328        }
329        None
330    }
331}
332
333/// Information about a blockquote line
334#[derive(Debug, Clone)]
335pub struct BlockquoteInfo {
336    /// Nesting level (1 for >, 2 for >>, etc.)
337    pub nesting_level: usize,
338    /// The indentation before the blockquote marker
339    pub indent: String,
340    /// Column where the first > starts (0-based)
341    pub marker_column: usize,
342    /// The blockquote prefix (e.g., "> ", ">> ", etc.)
343    pub prefix: String,
344    /// Content after the blockquote marker(s)
345    pub content: String,
346    /// Whether the line has no space after the marker
347    pub has_no_space_after_marker: bool,
348    /// Whether the line has multiple spaces after the marker
349    pub has_multiple_spaces_after_marker: bool,
350    /// Whether this is an empty blockquote line needing MD028 fix
351    pub needs_md028_fix: bool,
352}
353
354/// Information about a list block
355#[derive(Debug, Clone)]
356pub struct ListBlock {
357    /// Line number where the list starts (1-indexed)
358    pub start_line: usize,
359    /// Line number where the list ends (1-indexed)
360    pub end_line: usize,
361    /// Whether it's ordered or unordered
362    pub is_ordered: bool,
363    /// The consistent marker for unordered lists (if any)
364    pub marker: Option<String>,
365    /// Blockquote prefix for this list (empty if not in blockquote)
366    pub blockquote_prefix: String,
367    /// Lines that are list items within this block
368    pub item_lines: Vec<usize>,
369    /// Nesting level (0 for top-level lists)
370    pub nesting_level: usize,
371    /// Maximum marker width seen in this block (e.g., 3 for "1. ", 4 for "10. ")
372    pub max_marker_width: usize,
373}
374
375/// Character frequency data for fast content analysis
376#[derive(Debug, Clone, Default)]
377pub struct CharFrequency {
378    /// Count of # characters (headings)
379    pub hash_count: usize,
380    /// Count of * characters (emphasis, lists, horizontal rules)
381    pub asterisk_count: usize,
382    /// Count of _ characters (emphasis, horizontal rules)
383    pub underscore_count: usize,
384    /// Count of - characters (lists, horizontal rules, setext headings)
385    pub hyphen_count: usize,
386    /// Count of + characters (lists)
387    pub plus_count: usize,
388    /// Count of > characters (blockquotes)
389    pub gt_count: usize,
390    /// Count of | characters (tables)
391    pub pipe_count: usize,
392    /// Count of [ characters (links, images)
393    pub bracket_count: usize,
394    /// Count of ` characters (code spans, code blocks)
395    pub backtick_count: usize,
396    /// Count of < characters (HTML tags, autolinks)
397    pub lt_count: usize,
398    /// Count of ! characters (images)
399    pub exclamation_count: usize,
400    /// Count of newline characters
401    pub newline_count: usize,
402}
403
404/// Pre-parsed HTML tag information
405#[derive(Debug, Clone)]
406pub struct HtmlTag {
407    /// Line number (1-indexed)
408    pub line: usize,
409    /// Start column (0-indexed) in the line
410    pub start_col: usize,
411    /// End column (0-indexed) in the line
412    pub end_col: usize,
413    /// Byte offset in document
414    pub byte_offset: usize,
415    /// End byte offset in document
416    pub byte_end: usize,
417    /// Tag name (e.g., "div", "img", "br")
418    pub tag_name: String,
419    /// Whether it's a closing tag (`</tag>`)
420    pub is_closing: bool,
421    /// Whether it's self-closing (`<tag />`)
422    pub is_self_closing: bool,
423    /// Raw tag content
424    pub raw_content: String,
425}
426
427/// Pre-parsed emphasis span information
428#[derive(Debug, Clone)]
429pub struct EmphasisSpan {
430    /// Line number (1-indexed)
431    pub line: usize,
432    /// Start column (0-indexed) in the line
433    pub start_col: usize,
434    /// End column (0-indexed) in the line
435    pub end_col: usize,
436    /// Byte offset in document
437    pub byte_offset: usize,
438    /// End byte offset in document
439    pub byte_end: usize,
440    /// Type of emphasis ('*' or '_')
441    pub marker: char,
442    /// Number of markers (1 for italic, 2 for bold, 3+ for bold+italic)
443    pub marker_count: usize,
444    /// Content inside the emphasis
445    pub content: String,
446}
447
448/// Pre-parsed table row information
449#[derive(Debug, Clone)]
450pub struct TableRow {
451    /// Line number (1-indexed)
452    pub line: usize,
453    /// Whether this is a separator row (contains only |, -, :, and spaces)
454    pub is_separator: bool,
455    /// Number of columns (pipe-separated cells)
456    pub column_count: usize,
457    /// Alignment info from separator row
458    pub column_alignments: Vec<String>, // "left", "center", "right", "none"
459}
460
461/// Pre-parsed bare URL information (not in links)
462#[derive(Debug, Clone)]
463pub struct BareUrl {
464    /// Line number (1-indexed)
465    pub line: usize,
466    /// Start column (0-indexed) in the line
467    pub start_col: usize,
468    /// End column (0-indexed) in the line
469    pub end_col: usize,
470    /// Byte offset in document
471    pub byte_offset: usize,
472    /// End byte offset in document
473    pub byte_end: usize,
474    /// The URL string
475    pub url: String,
476    /// Type of URL ("http", "https", "ftp", "email")
477    pub url_type: String,
478}
479
480/// Check if a line is a horizontal rule (---, ***, ___) per CommonMark spec.
481/// CommonMark rules for thematic breaks (horizontal rules):
482/// - May have 0-3 spaces of leading indentation (but NOT tabs)
483/// - Must have 3+ of the same character (-, *, or _)
484/// - May have spaces between characters
485/// - No other characters allowed
486pub fn is_horizontal_rule_line(line: &str) -> bool {
487    // CommonMark: HRs can have 0-3 spaces of leading indentation, not tabs
488    let leading_spaces = line.len() - line.trim_start_matches(' ').len();
489    if leading_spaces > 3 || line.starts_with('\t') {
490        return false;
491    }
492
493    is_horizontal_rule_content(line.trim())
494}
495
496/// Check if trimmed content matches horizontal rule pattern.
497/// Use `is_horizontal_rule_line` for full CommonMark compliance including indentation check.
498pub fn is_horizontal_rule_content(trimmed: &str) -> bool {
499    if trimmed.len() < 3 {
500        return false;
501    }
502
503    let mut chars = trimmed.chars();
504    let first_char = match chars.next() {
505        Some(c @ ('-' | '*' | '_')) => c,
506        _ => return false,
507    };
508
509    // Count occurrences of the rule character, rejecting non-whitespace interlopers
510    let mut count = 1; // Already matched the first character
511    for ch in chars {
512        if ch == first_char {
513            count += 1;
514        } else if ch != ' ' && ch != '\t' {
515            return false;
516        }
517    }
518    count >= 3
519}
520
521/// Backwards-compatible alias for `is_horizontal_rule_content`
522pub fn is_horizontal_rule(trimmed: &str) -> bool {
523    is_horizontal_rule_content(trimmed)
524}