Skip to main content

rumdl_lib/lint_context/
types.rs

1use pulldown_cmark::LinkType;
2use std::borrow::Cow;
3
4/// Pre-computed information about a line
5#[derive(Debug, Clone)]
6pub struct LineInfo {
7    /// Byte offset where this line starts in the document
8    pub byte_offset: usize,
9    /// Length of the line in bytes (without newline)
10    pub byte_len: usize,
11    /// Number of bytes of leading whitespace (for substring extraction)
12    pub indent: usize,
13    /// Visual column width of leading whitespace (with proper tab expansion)
14    /// Per CommonMark, tabs expand to the next column that is a multiple of 4.
15    /// Use this for numeric comparisons like checking for indented code blocks (>= 4).
16    pub visual_indent: usize,
17    /// Whether the line is blank (empty or only whitespace)
18    pub is_blank: bool,
19    /// Whether this line is inside a code block
20    pub in_code_block: bool,
21    /// Whether this line is inside front matter
22    pub in_front_matter: bool,
23    /// Whether this line is inside an HTML block
24    pub in_html_block: bool,
25    /// Whether this line is inside an HTML comment
26    pub in_html_comment: bool,
27    /// List item information if this line starts a list item
28    /// Boxed to reduce LineInfo size: most lines are not list items
29    pub list_item: Option<Box<ListItemInfo>>,
30    /// Heading information if this line is a heading
31    /// Boxed to reduce LineInfo size: most lines are not headings
32    pub heading: Option<Box<HeadingInfo>>,
33    /// Blockquote information if this line is a blockquote
34    /// Boxed to reduce LineInfo size: most lines are not blockquotes
35    pub blockquote: Option<Box<BlockquoteInfo>>,
36    /// Whether this line is inside a mkdocstrings autodoc block
37    pub in_mkdocstrings: bool,
38    /// Whether this line is part of an ESM import/export block (MDX only)
39    pub in_esm_block: bool,
40    /// Whether this line is a continuation of a multi-line code span from a previous line
41    pub in_code_span_continuation: bool,
42    /// Whether this line is a horizontal rule (---, ***, ___, etc.)
43    /// Pre-computed for consistent detection across all rules
44    pub is_horizontal_rule: bool,
45    /// Whether this line is inside a math block ($$ ... $$)
46    pub in_math_block: bool,
47    /// Whether this line is inside a Quarto div block (::: ... :::)
48    pub in_quarto_div: bool,
49    /// Whether this line contains or is inside a JSX expression (MDX only)
50    pub in_jsx_expression: bool,
51    /// Whether this line is inside an MDX comment {/* ... */} (MDX only)
52    pub in_mdx_comment: bool,
53    /// Whether this line is inside a JSX component (MDX only)
54    pub in_jsx_component: bool,
55    /// Whether this line is inside a JSX fragment (MDX only)
56    pub in_jsx_fragment: bool,
57    /// Whether this line is inside an MkDocs admonition block (!!! or ???)
58    pub in_admonition: bool,
59    /// Whether this line is inside an MkDocs content tab block (===)
60    pub in_content_tab: bool,
61    /// Whether this line is inside an HTML block with markdown attribute (MkDocs grid cards, etc.)
62    pub in_mkdocs_html_markdown: bool,
63    /// Whether this line is a definition list item (: definition)
64    pub in_definition_list: bool,
65    /// Whether this line is inside an Obsidian comment (%%...%% syntax, Obsidian flavor only)
66    pub in_obsidian_comment: bool,
67    /// Whether this line is inside a PyMdown Blocks region (/// ... ///, MkDocs flavor only)
68    pub in_pymdown_block: bool,
69}
70
71impl LineInfo {
72    /// Get the line content as a string slice from the source document
73    pub fn content<'a>(&self, source: &'a str) -> &'a str {
74        &source[self.byte_offset..self.byte_offset + self.byte_len]
75    }
76
77    /// Check if this line is inside MkDocs-specific indented content (admonitions, tabs, or markdown HTML).
78    /// This content uses 4-space indentation which pulldown-cmark would interpret as code blocks,
79    /// but in MkDocs flavor it's actually container content that should be preserved.
80    #[inline]
81    pub fn in_mkdocs_container(&self) -> bool {
82        self.in_admonition || self.in_content_tab || self.in_mkdocs_html_markdown
83    }
84}
85
86/// Information about a list item
87#[derive(Debug, Clone)]
88pub struct ListItemInfo {
89    /// The marker used (*, -, +, or number with . or ))
90    pub marker: String,
91    /// Whether it's ordered (true) or unordered (false)
92    pub is_ordered: bool,
93    /// The number for ordered lists
94    pub number: Option<usize>,
95    /// Column where the marker starts (0-based)
96    pub marker_column: usize,
97    /// Column where content after marker starts
98    pub content_column: usize,
99}
100
101/// Heading style type
102#[derive(Debug, Clone, PartialEq)]
103pub enum HeadingStyle {
104    /// ATX style heading (# Heading)
105    ATX,
106    /// Setext style heading with = underline
107    Setext1,
108    /// Setext style heading with - underline
109    Setext2,
110}
111
112/// Parsed link information
113#[derive(Debug, Clone)]
114pub struct ParsedLink<'a> {
115    /// Line number (1-indexed)
116    pub line: usize,
117    /// Start column (0-indexed) in the line
118    pub start_col: usize,
119    /// End column (0-indexed) in the line
120    pub end_col: usize,
121    /// Byte offset in document
122    pub byte_offset: usize,
123    /// End byte offset in document
124    pub byte_end: usize,
125    /// Link text
126    pub text: Cow<'a, str>,
127    /// Link URL or reference
128    pub url: Cow<'a, str>,
129    /// Whether this is a reference link `[text][ref]` vs inline `[text](url)`
130    pub is_reference: bool,
131    /// Reference ID for reference links
132    pub reference_id: Option<Cow<'a, str>>,
133    /// Link type from pulldown-cmark
134    pub link_type: LinkType,
135}
136
137/// Information about a broken link reported by pulldown-cmark
138#[derive(Debug, Clone)]
139pub struct BrokenLinkInfo {
140    /// The reference text that couldn't be resolved
141    pub reference: String,
142    /// Byte span in the source document
143    pub span: std::ops::Range<usize>,
144}
145
146/// Parsed footnote reference (e.g., `[^1]`, `[^note]`)
147#[derive(Debug, Clone)]
148pub struct FootnoteRef {
149    /// The footnote ID (without the ^ prefix)
150    pub id: String,
151    /// Line number (1-indexed)
152    pub line: usize,
153    /// Start byte offset in document
154    pub byte_offset: usize,
155    /// End byte offset in document
156    pub byte_end: usize,
157}
158
159/// Parsed image information
160#[derive(Debug, Clone)]
161pub struct ParsedImage<'a> {
162    /// Line number (1-indexed)
163    pub line: usize,
164    /// Start column (0-indexed) in the line
165    pub start_col: usize,
166    /// End column (0-indexed) in the line
167    pub end_col: usize,
168    /// Byte offset in document
169    pub byte_offset: usize,
170    /// End byte offset in document
171    pub byte_end: usize,
172    /// Alt text
173    pub alt_text: Cow<'a, str>,
174    /// Image URL or reference
175    pub url: Cow<'a, str>,
176    /// Whether this is a reference image ![alt][ref] vs inline ![alt](url)
177    pub is_reference: bool,
178    /// Reference ID for reference images
179    pub reference_id: Option<Cow<'a, str>>,
180    /// Link type from pulldown-cmark
181    pub link_type: LinkType,
182}
183
184/// Reference definition `[ref]: url "title"`
185#[derive(Debug, Clone)]
186pub struct ReferenceDef {
187    /// Line number (1-indexed)
188    pub line: usize,
189    /// Reference ID (normalized to lowercase)
190    pub id: String,
191    /// URL
192    pub url: String,
193    /// Optional title
194    pub title: Option<String>,
195    /// Byte offset where the reference definition starts
196    pub byte_offset: usize,
197    /// Byte offset where the reference definition ends
198    pub byte_end: usize,
199    /// Byte offset where the title starts (if present, includes quote)
200    pub title_byte_start: Option<usize>,
201    /// Byte offset where the title ends (if present, includes quote)
202    pub title_byte_end: Option<usize>,
203}
204
205/// Parsed code span information
206#[derive(Debug, Clone)]
207pub struct CodeSpan {
208    /// Line number where the code span starts (1-indexed)
209    pub line: usize,
210    /// Line number where the code span ends (1-indexed)
211    pub end_line: usize,
212    /// Start column (0-indexed) in the line
213    pub start_col: usize,
214    /// End column (0-indexed) in the line
215    pub end_col: usize,
216    /// Byte offset in document
217    pub byte_offset: usize,
218    /// End byte offset in document
219    pub byte_end: usize,
220    /// Number of backticks used (1, 2, 3, etc.)
221    pub backtick_count: usize,
222    /// Content inside the code span (without backticks)
223    pub content: String,
224}
225
226/// Parsed math span information (inline $...$ or display $$...$$)
227#[derive(Debug, Clone)]
228pub struct MathSpan {
229    /// Line number where the math span starts (1-indexed)
230    pub line: usize,
231    /// Line number where the math span ends (1-indexed)
232    pub end_line: usize,
233    /// Start column (0-indexed) in the line
234    pub start_col: usize,
235    /// End column (0-indexed) in the line
236    pub end_col: usize,
237    /// Byte offset in document
238    pub byte_offset: usize,
239    /// End byte offset in document
240    pub byte_end: usize,
241    /// Whether this is display math ($$...$$) vs inline ($...$)
242    pub is_display: bool,
243    /// Content inside the math delimiters
244    pub content: String,
245}
246
247/// Information about a heading
248#[derive(Debug, Clone)]
249pub struct HeadingInfo {
250    /// Heading level (1-6 for ATX, 1-2 for Setext)
251    pub level: u8,
252    /// Style of heading
253    pub style: HeadingStyle,
254    /// The heading marker (# characters or underline)
255    pub marker: String,
256    /// Column where the marker starts (0-based)
257    pub marker_column: usize,
258    /// Column where heading text starts
259    pub content_column: usize,
260    /// The heading text (without markers and without custom ID syntax)
261    pub text: String,
262    /// Custom header ID if present (e.g., from {#custom-id} syntax)
263    pub custom_id: Option<String>,
264    /// Original heading text including custom ID syntax
265    pub raw_text: String,
266    /// Whether it has a closing sequence (for ATX)
267    pub has_closing_sequence: bool,
268    /// The closing sequence if present
269    pub closing_sequence: String,
270    /// Whether this is a valid CommonMark heading (ATX headings require space after #)
271    /// False for malformed headings like `#NoSpace` that MD018 should flag
272    pub is_valid: bool,
273}
274
275/// A valid heading from a filtered iteration
276///
277/// Only includes headings that are CommonMark-compliant (have space after #).
278/// Hashtag-like patterns (`#tag`, `#123`) are excluded.
279#[derive(Debug, Clone)]
280pub struct ValidHeading<'a> {
281    /// The 1-indexed line number in the document
282    pub line_num: usize,
283    /// Reference to the heading information
284    pub heading: &'a HeadingInfo,
285    /// Reference to the full line info (for rules that need additional context)
286    pub line_info: &'a LineInfo,
287}
288
289/// Iterator over valid CommonMark headings in a document
290///
291/// Filters out malformed headings like `#NoSpace` that should be flagged by MD018
292/// but should not be processed by other heading rules.
293pub struct ValidHeadingsIter<'a> {
294    lines: &'a [LineInfo],
295    current_index: usize,
296}
297
298impl<'a> ValidHeadingsIter<'a> {
299    pub(super) fn new(lines: &'a [LineInfo]) -> Self {
300        Self {
301            lines,
302            current_index: 0,
303        }
304    }
305}
306
307impl<'a> Iterator for ValidHeadingsIter<'a> {
308    type Item = ValidHeading<'a>;
309
310    fn next(&mut self) -> Option<Self::Item> {
311        while self.current_index < self.lines.len() {
312            let idx = self.current_index;
313            self.current_index += 1;
314
315            let line_info = &self.lines[idx];
316            if let Some(heading) = line_info.heading.as_deref()
317                && heading.is_valid
318            {
319                return Some(ValidHeading {
320                    line_num: idx + 1, // Convert 0-indexed to 1-indexed
321                    heading,
322                    line_info,
323                });
324            }
325        }
326        None
327    }
328}
329
330/// Information about a blockquote line
331#[derive(Debug, Clone)]
332pub struct BlockquoteInfo {
333    /// Nesting level (1 for >, 2 for >>, etc.)
334    pub nesting_level: usize,
335    /// The indentation before the blockquote marker
336    pub indent: String,
337    /// Column where the first > starts (0-based)
338    pub marker_column: usize,
339    /// The blockquote prefix (e.g., "> ", ">> ", etc.)
340    pub prefix: String,
341    /// Content after the blockquote marker(s)
342    pub content: String,
343    /// Whether the line has no space after the marker
344    pub has_no_space_after_marker: bool,
345    /// Whether the line has multiple spaces after the marker
346    pub has_multiple_spaces_after_marker: bool,
347    /// Whether this is an empty blockquote line needing MD028 fix
348    pub needs_md028_fix: bool,
349}
350
351/// Information about a list block
352#[derive(Debug, Clone)]
353pub struct ListBlock {
354    /// Line number where the list starts (1-indexed)
355    pub start_line: usize,
356    /// Line number where the list ends (1-indexed)
357    pub end_line: usize,
358    /// Whether it's ordered or unordered
359    pub is_ordered: bool,
360    /// The consistent marker for unordered lists (if any)
361    pub marker: Option<String>,
362    /// Blockquote prefix for this list (empty if not in blockquote)
363    pub blockquote_prefix: String,
364    /// Lines that are list items within this block
365    pub item_lines: Vec<usize>,
366    /// Nesting level (0 for top-level lists)
367    pub nesting_level: usize,
368    /// Maximum marker width seen in this block (e.g., 3 for "1. ", 4 for "10. ")
369    pub max_marker_width: usize,
370}
371
372/// Character frequency data for fast content analysis
373#[derive(Debug, Clone, Default)]
374pub struct CharFrequency {
375    /// Count of # characters (headings)
376    pub hash_count: usize,
377    /// Count of * characters (emphasis, lists, horizontal rules)
378    pub asterisk_count: usize,
379    /// Count of _ characters (emphasis, horizontal rules)
380    pub underscore_count: usize,
381    /// Count of - characters (lists, horizontal rules, setext headings)
382    pub hyphen_count: usize,
383    /// Count of + characters (lists)
384    pub plus_count: usize,
385    /// Count of > characters (blockquotes)
386    pub gt_count: usize,
387    /// Count of | characters (tables)
388    pub pipe_count: usize,
389    /// Count of [ characters (links, images)
390    pub bracket_count: usize,
391    /// Count of ` characters (code spans, code blocks)
392    pub backtick_count: usize,
393    /// Count of < characters (HTML tags, autolinks)
394    pub lt_count: usize,
395    /// Count of ! characters (images)
396    pub exclamation_count: usize,
397    /// Count of newline characters
398    pub newline_count: usize,
399}
400
401/// Pre-parsed HTML tag information
402#[derive(Debug, Clone)]
403pub struct HtmlTag {
404    /// Line number (1-indexed)
405    pub line: usize,
406    /// Start column (0-indexed) in the line
407    pub start_col: usize,
408    /// End column (0-indexed) in the line
409    pub end_col: usize,
410    /// Byte offset in document
411    pub byte_offset: usize,
412    /// End byte offset in document
413    pub byte_end: usize,
414    /// Tag name (e.g., "div", "img", "br")
415    pub tag_name: String,
416    /// Whether it's a closing tag (`</tag>`)
417    pub is_closing: bool,
418    /// Whether it's self-closing (`<tag />`)
419    pub is_self_closing: bool,
420    /// Raw tag content
421    pub raw_content: String,
422}
423
424/// Pre-parsed emphasis span information
425#[derive(Debug, Clone)]
426pub struct EmphasisSpan {
427    /// Line number (1-indexed)
428    pub line: usize,
429    /// Start column (0-indexed) in the line
430    pub start_col: usize,
431    /// End column (0-indexed) in the line
432    pub end_col: usize,
433    /// Byte offset in document
434    pub byte_offset: usize,
435    /// End byte offset in document
436    pub byte_end: usize,
437    /// Type of emphasis ('*' or '_')
438    pub marker: char,
439    /// Number of markers (1 for italic, 2 for bold, 3+ for bold+italic)
440    pub marker_count: usize,
441    /// Content inside the emphasis
442    pub content: String,
443}
444
445/// Pre-parsed table row information
446#[derive(Debug, Clone)]
447pub struct TableRow {
448    /// Line number (1-indexed)
449    pub line: usize,
450    /// Whether this is a separator row (contains only |, -, :, and spaces)
451    pub is_separator: bool,
452    /// Number of columns (pipe-separated cells)
453    pub column_count: usize,
454    /// Alignment info from separator row
455    pub column_alignments: Vec<String>, // "left", "center", "right", "none"
456}
457
458/// Pre-parsed bare URL information (not in links)
459#[derive(Debug, Clone)]
460pub struct BareUrl {
461    /// Line number (1-indexed)
462    pub line: usize,
463    /// Start column (0-indexed) in the line
464    pub start_col: usize,
465    /// End column (0-indexed) in the line
466    pub end_col: usize,
467    /// Byte offset in document
468    pub byte_offset: usize,
469    /// End byte offset in document
470    pub byte_end: usize,
471    /// The URL string
472    pub url: String,
473    /// Type of URL ("http", "https", "ftp", "email")
474    pub url_type: String,
475}
476
477/// Check if a line is a horizontal rule (---, ***, ___) per CommonMark spec.
478/// CommonMark rules for thematic breaks (horizontal rules):
479/// - May have 0-3 spaces of leading indentation (but NOT tabs)
480/// - Must have 3+ of the same character (-, *, or _)
481/// - May have spaces between characters
482/// - No other characters allowed
483pub fn is_horizontal_rule_line(line: &str) -> bool {
484    // CommonMark: HRs can have 0-3 spaces of leading indentation, not tabs
485    let leading_spaces = line.len() - line.trim_start_matches(' ').len();
486    if leading_spaces > 3 || line.starts_with('\t') {
487        return false;
488    }
489
490    is_horizontal_rule_content(line.trim())
491}
492
493/// Check if trimmed content matches horizontal rule pattern.
494/// Use `is_horizontal_rule_line` for full CommonMark compliance including indentation check.
495pub fn is_horizontal_rule_content(trimmed: &str) -> bool {
496    if trimmed.len() < 3 {
497        return false;
498    }
499
500    let mut chars = trimmed.chars();
501    let first_char = match chars.next() {
502        Some(c @ ('-' | '*' | '_')) => c,
503        _ => return false,
504    };
505
506    // Count occurrences of the rule character, rejecting non-whitespace interlopers
507    let mut count = 1; // Already matched the first character
508    for ch in chars {
509        if ch == first_char {
510            count += 1;
511        } else if ch != ' ' && ch != '\t' {
512            return false;
513        }
514    }
515    count >= 3
516}
517
518/// Backwards-compatible alias for `is_horizontal_rule_content`
519pub fn is_horizontal_rule(trimmed: &str) -> bool {
520    is_horizontal_rule_content(trimmed)
521}