Skip to main content

rumdl_lib/lint_context/
types.rs

1use pulldown_cmark::LinkType;
2use std::borrow::Cow;
3
4/// Pre-computed information about a line
5#[derive(Debug, Clone)]
6pub struct LineInfo {
7    /// Byte offset where this line starts in the document
8    pub byte_offset: usize,
9    /// Length of the line in bytes (without newline)
10    pub byte_len: usize,
11    /// Number of bytes of leading whitespace (for substring extraction)
12    pub indent: usize,
13    /// Visual column width of leading whitespace (with proper tab expansion)
14    /// Per CommonMark, tabs expand to the next column that is a multiple of 4.
15    /// Use this for numeric comparisons like checking for indented code blocks (>= 4).
16    pub visual_indent: usize,
17    /// Whether the line is blank (empty or only whitespace)
18    pub is_blank: bool,
19    /// Whether this line is inside a code block
20    pub in_code_block: bool,
21    /// Whether this line is inside front matter
22    pub in_front_matter: bool,
23    /// Whether this line is inside an HTML block
24    pub in_html_block: bool,
25    /// Whether this line is inside an HTML comment
26    pub in_html_comment: bool,
27    /// List item information if this line starts a list item
28    pub list_item: Option<ListItemInfo>,
29    /// Heading information if this line is a heading
30    pub heading: Option<HeadingInfo>,
31    /// Blockquote information if this line is a blockquote
32    pub blockquote: Option<BlockquoteInfo>,
33    /// Whether this line is inside a mkdocstrings autodoc block
34    pub in_mkdocstrings: bool,
35    /// Whether this line is part of an ESM import/export block (MDX only)
36    pub in_esm_block: bool,
37    /// Whether this line is a continuation of a multi-line code span from a previous line
38    pub in_code_span_continuation: bool,
39    /// Whether this line is a horizontal rule (---, ***, ___, etc.)
40    /// Pre-computed for consistent detection across all rules
41    pub is_horizontal_rule: bool,
42    /// Whether this line is inside a math block ($$ ... $$)
43    pub in_math_block: bool,
44    /// Whether this line is inside a Quarto div block (::: ... :::)
45    pub in_quarto_div: bool,
46    /// Whether this line contains or is inside a JSX expression (MDX only)
47    pub in_jsx_expression: bool,
48    /// Whether this line is inside an MDX comment {/* ... */} (MDX only)
49    pub in_mdx_comment: bool,
50    /// Whether this line is inside a JSX component (MDX only)
51    pub in_jsx_component: bool,
52    /// Whether this line is inside a JSX fragment (MDX only)
53    pub in_jsx_fragment: bool,
54    /// Whether this line is inside an MkDocs admonition block (!!! or ???)
55    pub in_admonition: bool,
56    /// Whether this line is inside an MkDocs content tab block (===)
57    pub in_content_tab: bool,
58    /// Whether this line is inside an HTML block with markdown attribute (MkDocs grid cards, etc.)
59    pub in_mkdocs_html_markdown: bool,
60    /// Whether this line is a definition list item (: definition)
61    pub in_definition_list: bool,
62    /// Whether this line is inside an Obsidian comment (%%...%% syntax, Obsidian flavor only)
63    pub in_obsidian_comment: bool,
64    /// Whether this line is inside a PyMdown Blocks region (/// ... ///, MkDocs flavor only)
65    pub in_pymdown_block: bool,
66}
67
68impl LineInfo {
69    /// Get the line content as a string slice from the source document
70    pub fn content<'a>(&self, source: &'a str) -> &'a str {
71        &source[self.byte_offset..self.byte_offset + self.byte_len]
72    }
73
74    /// Check if this line is inside MkDocs-specific indented content (admonitions, tabs, or markdown HTML).
75    /// This content uses 4-space indentation which pulldown-cmark would interpret as code blocks,
76    /// but in MkDocs flavor it's actually container content that should be preserved.
77    #[inline]
78    pub fn in_mkdocs_container(&self) -> bool {
79        self.in_admonition || self.in_content_tab || self.in_mkdocs_html_markdown
80    }
81}
82
83/// Information about a list item
84#[derive(Debug, Clone)]
85pub struct ListItemInfo {
86    /// The marker used (*, -, +, or number with . or ))
87    pub marker: String,
88    /// Whether it's ordered (true) or unordered (false)
89    pub is_ordered: bool,
90    /// The number for ordered lists
91    pub number: Option<usize>,
92    /// Column where the marker starts (0-based)
93    pub marker_column: usize,
94    /// Column where content after marker starts
95    pub content_column: usize,
96}
97
98/// Heading style type
99#[derive(Debug, Clone, PartialEq)]
100pub enum HeadingStyle {
101    /// ATX style heading (# Heading)
102    ATX,
103    /// Setext style heading with = underline
104    Setext1,
105    /// Setext style heading with - underline
106    Setext2,
107}
108
109/// Parsed link information
110#[derive(Debug, Clone)]
111pub struct ParsedLink<'a> {
112    /// Line number (1-indexed)
113    pub line: usize,
114    /// Start column (0-indexed) in the line
115    pub start_col: usize,
116    /// End column (0-indexed) in the line
117    pub end_col: usize,
118    /// Byte offset in document
119    pub byte_offset: usize,
120    /// End byte offset in document
121    pub byte_end: usize,
122    /// Link text
123    pub text: Cow<'a, str>,
124    /// Link URL or reference
125    pub url: Cow<'a, str>,
126    /// Whether this is a reference link `[text][ref]` vs inline `[text](url)`
127    pub is_reference: bool,
128    /// Reference ID for reference links
129    pub reference_id: Option<Cow<'a, str>>,
130    /// Link type from pulldown-cmark
131    pub link_type: LinkType,
132}
133
134/// Information about a broken link reported by pulldown-cmark
135#[derive(Debug, Clone)]
136pub struct BrokenLinkInfo {
137    /// The reference text that couldn't be resolved
138    pub reference: String,
139    /// Byte span in the source document
140    pub span: std::ops::Range<usize>,
141}
142
143/// Parsed footnote reference (e.g., `[^1]`, `[^note]`)
144#[derive(Debug, Clone)]
145pub struct FootnoteRef {
146    /// The footnote ID (without the ^ prefix)
147    pub id: String,
148    /// Line number (1-indexed)
149    pub line: usize,
150    /// Start byte offset in document
151    pub byte_offset: usize,
152    /// End byte offset in document
153    pub byte_end: usize,
154}
155
156/// Parsed image information
157#[derive(Debug, Clone)]
158pub struct ParsedImage<'a> {
159    /// Line number (1-indexed)
160    pub line: usize,
161    /// Start column (0-indexed) in the line
162    pub start_col: usize,
163    /// End column (0-indexed) in the line
164    pub end_col: usize,
165    /// Byte offset in document
166    pub byte_offset: usize,
167    /// End byte offset in document
168    pub byte_end: usize,
169    /// Alt text
170    pub alt_text: Cow<'a, str>,
171    /// Image URL or reference
172    pub url: Cow<'a, str>,
173    /// Whether this is a reference image ![alt][ref] vs inline ![alt](url)
174    pub is_reference: bool,
175    /// Reference ID for reference images
176    pub reference_id: Option<Cow<'a, str>>,
177    /// Link type from pulldown-cmark
178    pub link_type: LinkType,
179}
180
181/// Reference definition `[ref]: url "title"`
182#[derive(Debug, Clone)]
183pub struct ReferenceDef {
184    /// Line number (1-indexed)
185    pub line: usize,
186    /// Reference ID (normalized to lowercase)
187    pub id: String,
188    /// URL
189    pub url: String,
190    /// Optional title
191    pub title: Option<String>,
192    /// Byte offset where the reference definition starts
193    pub byte_offset: usize,
194    /// Byte offset where the reference definition ends
195    pub byte_end: usize,
196    /// Byte offset where the title starts (if present, includes quote)
197    pub title_byte_start: Option<usize>,
198    /// Byte offset where the title ends (if present, includes quote)
199    pub title_byte_end: Option<usize>,
200}
201
202/// Parsed code span information
203#[derive(Debug, Clone)]
204pub struct CodeSpan {
205    /// Line number where the code span starts (1-indexed)
206    pub line: usize,
207    /// Line number where the code span ends (1-indexed)
208    pub end_line: usize,
209    /// Start column (0-indexed) in the line
210    pub start_col: usize,
211    /// End column (0-indexed) in the line
212    pub end_col: usize,
213    /// Byte offset in document
214    pub byte_offset: usize,
215    /// End byte offset in document
216    pub byte_end: usize,
217    /// Number of backticks used (1, 2, 3, etc.)
218    pub backtick_count: usize,
219    /// Content inside the code span (without backticks)
220    pub content: String,
221}
222
223/// Parsed math span information (inline $...$ or display $$...$$)
224#[derive(Debug, Clone)]
225pub struct MathSpan {
226    /// Line number where the math span starts (1-indexed)
227    pub line: usize,
228    /// Line number where the math span ends (1-indexed)
229    pub end_line: usize,
230    /// Start column (0-indexed) in the line
231    pub start_col: usize,
232    /// End column (0-indexed) in the line
233    pub end_col: usize,
234    /// Byte offset in document
235    pub byte_offset: usize,
236    /// End byte offset in document
237    pub byte_end: usize,
238    /// Whether this is display math ($$...$$) vs inline ($...$)
239    pub is_display: bool,
240    /// Content inside the math delimiters
241    pub content: String,
242}
243
244/// Information about a heading
245#[derive(Debug, Clone)]
246pub struct HeadingInfo {
247    /// Heading level (1-6 for ATX, 1-2 for Setext)
248    pub level: u8,
249    /// Style of heading
250    pub style: HeadingStyle,
251    /// The heading marker (# characters or underline)
252    pub marker: String,
253    /// Column where the marker starts (0-based)
254    pub marker_column: usize,
255    /// Column where heading text starts
256    pub content_column: usize,
257    /// The heading text (without markers and without custom ID syntax)
258    pub text: String,
259    /// Custom header ID if present (e.g., from {#custom-id} syntax)
260    pub custom_id: Option<String>,
261    /// Original heading text including custom ID syntax
262    pub raw_text: String,
263    /// Whether it has a closing sequence (for ATX)
264    pub has_closing_sequence: bool,
265    /// The closing sequence if present
266    pub closing_sequence: String,
267    /// Whether this is a valid CommonMark heading (ATX headings require space after #)
268    /// False for malformed headings like `#NoSpace` that MD018 should flag
269    pub is_valid: bool,
270}
271
272/// A valid heading from a filtered iteration
273///
274/// Only includes headings that are CommonMark-compliant (have space after #).
275/// Hashtag-like patterns (`#tag`, `#123`) are excluded.
276#[derive(Debug, Clone)]
277pub struct ValidHeading<'a> {
278    /// The 1-indexed line number in the document
279    pub line_num: usize,
280    /// Reference to the heading information
281    pub heading: &'a HeadingInfo,
282    /// Reference to the full line info (for rules that need additional context)
283    pub line_info: &'a LineInfo,
284}
285
286/// Iterator over valid CommonMark headings in a document
287///
288/// Filters out malformed headings like `#NoSpace` that should be flagged by MD018
289/// but should not be processed by other heading rules.
290pub struct ValidHeadingsIter<'a> {
291    lines: &'a [LineInfo],
292    current_index: usize,
293}
294
295impl<'a> ValidHeadingsIter<'a> {
296    pub(super) fn new(lines: &'a [LineInfo]) -> Self {
297        Self {
298            lines,
299            current_index: 0,
300        }
301    }
302}
303
304impl<'a> Iterator for ValidHeadingsIter<'a> {
305    type Item = ValidHeading<'a>;
306
307    fn next(&mut self) -> Option<Self::Item> {
308        while self.current_index < self.lines.len() {
309            let idx = self.current_index;
310            self.current_index += 1;
311
312            let line_info = &self.lines[idx];
313            if let Some(heading) = &line_info.heading
314                && heading.is_valid
315            {
316                return Some(ValidHeading {
317                    line_num: idx + 1, // Convert 0-indexed to 1-indexed
318                    heading,
319                    line_info,
320                });
321            }
322        }
323        None
324    }
325}
326
327/// Information about a blockquote line
328#[derive(Debug, Clone)]
329pub struct BlockquoteInfo {
330    /// Nesting level (1 for >, 2 for >>, etc.)
331    pub nesting_level: usize,
332    /// The indentation before the blockquote marker
333    pub indent: String,
334    /// Column where the first > starts (0-based)
335    pub marker_column: usize,
336    /// The blockquote prefix (e.g., "> ", ">> ", etc.)
337    pub prefix: String,
338    /// Content after the blockquote marker(s)
339    pub content: String,
340    /// Whether the line has no space after the marker
341    pub has_no_space_after_marker: bool,
342    /// Whether the line has multiple spaces after the marker
343    pub has_multiple_spaces_after_marker: bool,
344    /// Whether this is an empty blockquote line needing MD028 fix
345    pub needs_md028_fix: bool,
346}
347
348/// Information about a list block
349#[derive(Debug, Clone)]
350pub struct ListBlock {
351    /// Line number where the list starts (1-indexed)
352    pub start_line: usize,
353    /// Line number where the list ends (1-indexed)
354    pub end_line: usize,
355    /// Whether it's ordered or unordered
356    pub is_ordered: bool,
357    /// The consistent marker for unordered lists (if any)
358    pub marker: Option<String>,
359    /// Blockquote prefix for this list (empty if not in blockquote)
360    pub blockquote_prefix: String,
361    /// Lines that are list items within this block
362    pub item_lines: Vec<usize>,
363    /// Nesting level (0 for top-level lists)
364    pub nesting_level: usize,
365    /// Maximum marker width seen in this block (e.g., 3 for "1. ", 4 for "10. ")
366    pub max_marker_width: usize,
367}
368
369/// Character frequency data for fast content analysis
370#[derive(Debug, Clone, Default)]
371pub struct CharFrequency {
372    /// Count of # characters (headings)
373    pub hash_count: usize,
374    /// Count of * characters (emphasis, lists, horizontal rules)
375    pub asterisk_count: usize,
376    /// Count of _ characters (emphasis, horizontal rules)
377    pub underscore_count: usize,
378    /// Count of - characters (lists, horizontal rules, setext headings)
379    pub hyphen_count: usize,
380    /// Count of + characters (lists)
381    pub plus_count: usize,
382    /// Count of > characters (blockquotes)
383    pub gt_count: usize,
384    /// Count of | characters (tables)
385    pub pipe_count: usize,
386    /// Count of [ characters (links, images)
387    pub bracket_count: usize,
388    /// Count of ` characters (code spans, code blocks)
389    pub backtick_count: usize,
390    /// Count of < characters (HTML tags, autolinks)
391    pub lt_count: usize,
392    /// Count of ! characters (images)
393    pub exclamation_count: usize,
394    /// Count of newline characters
395    pub newline_count: usize,
396}
397
398/// Pre-parsed HTML tag information
399#[derive(Debug, Clone)]
400pub struct HtmlTag {
401    /// Line number (1-indexed)
402    pub line: usize,
403    /// Start column (0-indexed) in the line
404    pub start_col: usize,
405    /// End column (0-indexed) in the line
406    pub end_col: usize,
407    /// Byte offset in document
408    pub byte_offset: usize,
409    /// End byte offset in document
410    pub byte_end: usize,
411    /// Tag name (e.g., "div", "img", "br")
412    pub tag_name: String,
413    /// Whether it's a closing tag (`</tag>`)
414    pub is_closing: bool,
415    /// Whether it's self-closing (`<tag />`)
416    pub is_self_closing: bool,
417    /// Raw tag content
418    pub raw_content: String,
419}
420
421/// Pre-parsed emphasis span information
422#[derive(Debug, Clone)]
423pub struct EmphasisSpan {
424    /// Line number (1-indexed)
425    pub line: usize,
426    /// Start column (0-indexed) in the line
427    pub start_col: usize,
428    /// End column (0-indexed) in the line
429    pub end_col: usize,
430    /// Byte offset in document
431    pub byte_offset: usize,
432    /// End byte offset in document
433    pub byte_end: usize,
434    /// Type of emphasis ('*' or '_')
435    pub marker: char,
436    /// Number of markers (1 for italic, 2 for bold, 3+ for bold+italic)
437    pub marker_count: usize,
438    /// Content inside the emphasis
439    pub content: String,
440}
441
442/// Pre-parsed table row information
443#[derive(Debug, Clone)]
444pub struct TableRow {
445    /// Line number (1-indexed)
446    pub line: usize,
447    /// Whether this is a separator row (contains only |, -, :, and spaces)
448    pub is_separator: bool,
449    /// Number of columns (pipe-separated cells)
450    pub column_count: usize,
451    /// Alignment info from separator row
452    pub column_alignments: Vec<String>, // "left", "center", "right", "none"
453}
454
455/// Pre-parsed bare URL information (not in links)
456#[derive(Debug, Clone)]
457pub struct BareUrl {
458    /// Line number (1-indexed)
459    pub line: usize,
460    /// Start column (0-indexed) in the line
461    pub start_col: usize,
462    /// End column (0-indexed) in the line
463    pub end_col: usize,
464    /// Byte offset in document
465    pub byte_offset: usize,
466    /// End byte offset in document
467    pub byte_end: usize,
468    /// The URL string
469    pub url: String,
470    /// Type of URL ("http", "https", "ftp", "email")
471    pub url_type: String,
472}
473
474/// Check if a line is a horizontal rule (---, ***, ___) per CommonMark spec.
475/// CommonMark rules for thematic breaks (horizontal rules):
476/// - May have 0-3 spaces of leading indentation (but NOT tabs)
477/// - Must have 3+ of the same character (-, *, or _)
478/// - May have spaces between characters
479/// - No other characters allowed
480pub fn is_horizontal_rule_line(line: &str) -> bool {
481    // CommonMark: HRs can have 0-3 spaces of leading indentation, not tabs
482    let leading_spaces = line.len() - line.trim_start_matches(' ').len();
483    if leading_spaces > 3 || line.starts_with('\t') {
484        return false;
485    }
486
487    is_horizontal_rule_content(line.trim())
488}
489
490/// Check if trimmed content matches horizontal rule pattern.
491/// Use `is_horizontal_rule_line` for full CommonMark compliance including indentation check.
492pub fn is_horizontal_rule_content(trimmed: &str) -> bool {
493    if trimmed.len() < 3 {
494        return false;
495    }
496
497    // Check for three or more consecutive -, *, or _ characters (with optional spaces)
498    let chars: Vec<char> = trimmed.chars().collect();
499    if let Some(&first_char) = chars.first()
500        && (first_char == '-' || first_char == '*' || first_char == '_')
501    {
502        let mut count = 0;
503        for &ch in &chars {
504            if ch == first_char {
505                count += 1;
506            } else if ch != ' ' && ch != '\t' {
507                return false; // Non-matching, non-whitespace character
508            }
509        }
510        return count >= 3;
511    }
512    false
513}
514
515/// Backwards-compatible alias for `is_horizontal_rule_content`
516pub fn is_horizontal_rule(trimmed: &str) -> bool {
517    is_horizontal_rule_content(trimmed)
518}