rumdl_lib/lint_context/types.rs
1use pulldown_cmark::LinkType;
2use std::borrow::Cow;
3
4/// Pre-computed information about a line
5#[derive(Debug, Clone)]
6pub struct LineInfo {
7 /// Byte offset where this line starts in the document
8 pub byte_offset: usize,
9 /// Length of the line in bytes (without newline)
10 pub byte_len: usize,
11 /// Number of bytes of leading whitespace (for substring extraction)
12 pub indent: usize,
13 /// Visual column width of leading whitespace (with proper tab expansion)
14 /// Per CommonMark, tabs expand to the next column that is a multiple of 4.
15 /// Use this for numeric comparisons like checking for indented code blocks (>= 4).
16 pub visual_indent: usize,
17 /// Whether the line is blank (empty or only whitespace)
18 pub is_blank: bool,
19 /// Whether this line is inside a code block
20 pub in_code_block: bool,
21 /// Whether this line is inside front matter
22 pub in_front_matter: bool,
23 /// Whether this line is inside an HTML block
24 pub in_html_block: bool,
25 /// Whether this line is inside an HTML comment
26 pub in_html_comment: bool,
27 /// List item information if this line starts a list item
28 /// Boxed to reduce LineInfo size: most lines are not list items
29 pub list_item: Option<Box<ListItemInfo>>,
30 /// Heading information if this line is a heading
31 /// Boxed to reduce LineInfo size: most lines are not headings
32 pub heading: Option<Box<HeadingInfo>>,
33 /// Blockquote information if this line is a blockquote
34 /// Boxed to reduce LineInfo size: most lines are not blockquotes
35 pub blockquote: Option<Box<BlockquoteInfo>>,
36 /// Whether this line is inside a mkdocstrings autodoc block
37 pub in_mkdocstrings: bool,
38 /// Whether this line is part of an ESM import/export block (MDX only)
39 pub in_esm_block: bool,
40 /// Whether this line is a continuation of a multi-line code span from a previous line
41 pub in_code_span_continuation: bool,
42 /// Whether this line is a horizontal rule (---, ***, ___, etc.)
43 /// Pre-computed for consistent detection across all rules
44 pub is_horizontal_rule: bool,
45 /// Whether this line is inside a math block ($$ ... $$)
46 pub in_math_block: bool,
47 /// Whether this line is inside a Pandoc/Quarto div block (::: ... :::)
48 pub in_pandoc_div: bool,
49 /// Whether this line is a Quarto/Pandoc div marker (opening ::: {.class} or closing :::)
50 /// Analogous to `is_horizontal_rule` — marks structural delimiters that are not paragraph text
51 pub is_div_marker: bool,
52 /// Whether this line contains or is inside a JSX expression (MDX only)
53 pub in_jsx_expression: bool,
54 /// Whether this line is inside an MDX comment {/* ... */} (MDX only)
55 pub in_mdx_comment: bool,
56 /// Whether this line is inside an MkDocs admonition block (!!! or ???)
57 pub in_admonition: bool,
58 /// Whether this line is inside an MkDocs content tab block (===)
59 pub in_content_tab: bool,
60 /// Whether this line is inside an HTML block with markdown attribute (MkDocs grid cards, etc.)
61 pub in_mkdocs_html_markdown: bool,
62 /// Whether this line is a definition list item (: definition)
63 pub in_definition_list: bool,
64 /// Whether this line is inside an Obsidian comment (%%...%% syntax, Obsidian flavor only)
65 pub in_obsidian_comment: bool,
66 /// Whether this line is inside a PyMdown Blocks region (/// ... ///, MkDocs flavor only)
67 pub in_pymdown_block: bool,
68 /// Whether this line is inside a kramdown extension block ({::comment}...{:/comment}, {::nomarkdown}...{:/nomarkdown})
69 pub in_kramdown_extension_block: bool,
70 /// Whether this line is a kramdown block IAL ({:.class #id}) or ALD ({:ref: .class})
71 pub is_kramdown_block_ial: bool,
72 /// Whether this line is inside a JSX component block (MDX only, e.g. `<Tabs>...</Tabs>`)
73 pub in_jsx_block: bool,
74 /// Whether this line is inside a footnote definition body (continuation lines)
75 pub in_footnote_definition: bool,
76 /// Whether this line is inside a MyST directive block (colon or backtick fence with `{name}`)
77 pub in_myst_directive: bool,
78 /// Whether this line is a MyST comment (`% comment`)
79 pub is_myst_comment: bool,
80}
81
82impl LineInfo {
83 /// Get the line content as a string slice from the source document
84 pub fn content<'a>(&self, source: &'a str) -> &'a str {
85 &source[self.byte_offset..self.byte_offset + self.byte_len]
86 }
87
88 /// Check if this line is inside MkDocs-specific indented content (admonitions, tabs, or markdown HTML).
89 /// This content uses 4-space indentation which pulldown-cmark would interpret as code blocks,
90 /// but in MkDocs flavor it's actually container content that should be preserved.
91 #[inline]
92 pub fn in_mkdocs_container(&self) -> bool {
93 self.in_admonition || self.in_content_tab || self.in_mkdocs_html_markdown
94 }
95
96 /// Whether this line could be part of a paragraph block (CommonMark `paragraph` token).
97 ///
98 /// Returns true for ordinary prose lines, including those inside blockquotes and list items.
99 /// Returns false for lines that belong to non-paragraph blocks: headings, code blocks,
100 /// HTML blocks, math blocks, horizontal rules, front matter, structural div markers, and
101 /// flavor-specific extension blocks. This is the per-line view; cross-line constructs like
102 /// setext underlines aren't visible here and need additional context to detect.
103 ///
104 /// Used by rules (e.g. MD009 strict mode) that need to distinguish "trailing whitespace
105 /// could produce a meaningful `<br>`" from "trailing whitespace is on a structural boundary."
106 #[inline]
107 pub fn is_paragraph_context(&self) -> bool {
108 !self.in_code_block
109 && !self.in_front_matter
110 && !self.in_html_block
111 && !self.in_html_comment
112 && !self.in_math_block
113 && !self.is_horizontal_rule
114 && !self.is_div_marker
115 && !self.in_pymdown_block
116 && !self.in_kramdown_extension_block
117 && !self.is_kramdown_block_ial
118 && !self.is_myst_comment
119 && self.heading.is_none()
120 }
121}
122
123/// Information about a list item
124#[derive(Debug, Clone)]
125pub struct ListItemInfo {
126 /// The marker used (*, -, +, or number with . or ))
127 pub marker: String,
128 /// Whether it's ordered (true) or unordered (false)
129 pub is_ordered: bool,
130 /// The number for ordered lists
131 pub number: Option<usize>,
132 /// Column where the marker starts (0-based)
133 pub marker_column: usize,
134 /// Column where content after marker starts
135 pub content_column: usize,
136}
137
138/// Heading style type
139#[derive(Debug, Clone, PartialEq)]
140pub enum HeadingStyle {
141 /// ATX style heading (# Heading)
142 ATX,
143 /// Setext style heading with = underline
144 Setext1,
145 /// Setext style heading with - underline
146 Setext2,
147}
148
149/// Parsed link information
150#[derive(Debug, Clone)]
151pub struct ParsedLink<'a> {
152 /// Line number (1-indexed)
153 pub line: usize,
154 /// Start column (0-indexed) in the line
155 pub start_col: usize,
156 /// End column (0-indexed) in the line
157 pub end_col: usize,
158 /// Byte offset in document
159 pub byte_offset: usize,
160 /// End byte offset in document
161 pub byte_end: usize,
162 /// Link text
163 pub text: Cow<'a, str>,
164 /// Link URL or reference
165 pub url: Cow<'a, str>,
166 /// Inline title (without surrounding delimiters), as produced by pulldown-cmark
167 /// after backslash-escape handling. `None` when the link has no title or is a
168 /// reference style without a matched definition.
169 pub title: Option<Cow<'a, str>>,
170 /// Whether this is a reference link `[text][ref]` vs inline `[text](url)`
171 pub is_reference: bool,
172 /// Reference ID for reference links
173 pub reference_id: Option<Cow<'a, str>>,
174 /// Link type from pulldown-cmark
175 pub link_type: LinkType,
176}
177
178/// Information about a broken link reported by pulldown-cmark
179#[derive(Debug, Clone)]
180pub struct BrokenLinkInfo {
181 /// The reference text that couldn't be resolved
182 pub reference: String,
183 /// Byte span in the source document
184 pub span: std::ops::Range<usize>,
185}
186
187/// Parsed footnote reference (e.g., `[^1]`, `[^note]`)
188#[derive(Debug, Clone)]
189pub struct FootnoteRef {
190 /// The footnote ID (without the ^ prefix)
191 pub id: String,
192 /// Line number (1-indexed)
193 pub line: usize,
194 /// Start byte offset in document
195 pub byte_offset: usize,
196}
197
198/// Parsed image information
199#[derive(Debug, Clone)]
200pub struct ParsedImage<'a> {
201 /// Line number (1-indexed)
202 pub line: usize,
203 /// Start column (0-indexed) in the line
204 pub start_col: usize,
205 /// End column (0-indexed) in the line
206 pub end_col: usize,
207 /// Byte offset in document
208 pub byte_offset: usize,
209 /// End byte offset in document
210 pub byte_end: usize,
211 /// Alt text
212 pub alt_text: Cow<'a, str>,
213 /// Image URL or reference
214 pub url: Cow<'a, str>,
215 /// Inline title (without surrounding delimiters), as produced by pulldown-cmark
216 /// after backslash-escape handling. `None` when the image has no title or is a
217 /// reference style without a matched definition.
218 pub title: Option<Cow<'a, str>>,
219 /// Whether this is a reference image ![alt][ref] vs inline 
220 pub is_reference: bool,
221 /// Reference ID for reference images
222 pub reference_id: Option<Cow<'a, str>>,
223 /// Link type from pulldown-cmark
224 pub link_type: LinkType,
225}
226
227/// Reference definition `[ref]: url "title"`
228#[derive(Debug, Clone)]
229pub struct ReferenceDef {
230 /// Line number (1-indexed)
231 pub line: usize,
232 /// Reference ID (normalized to lowercase)
233 pub id: String,
234 /// URL
235 pub url: String,
236 /// Optional title
237 pub title: Option<String>,
238 /// Byte offset where the reference definition starts
239 pub byte_offset: usize,
240 /// Byte offset where the reference definition ends
241 pub byte_end: usize,
242 /// Byte offset where the title starts (if present, includes quote)
243 pub title_byte_start: Option<usize>,
244 /// Byte offset where the title ends (if present, includes quote)
245 pub title_byte_end: Option<usize>,
246}
247
248/// Parsed code span information
249#[derive(Debug, Clone)]
250pub struct CodeSpan {
251 /// Line number where the code span starts (1-indexed)
252 pub line: usize,
253 /// Line number where the code span ends (1-indexed)
254 pub end_line: usize,
255 /// Start column (0-indexed) in the line
256 pub start_col: usize,
257 /// End column (0-indexed) in the line
258 pub end_col: usize,
259 /// Byte offset in document
260 pub byte_offset: usize,
261 /// End byte offset in document
262 pub byte_end: usize,
263 /// Number of backticks used (1, 2, 3, etc.)
264 pub backtick_count: usize,
265 /// Content inside the code span (without backticks)
266 pub content: String,
267}
268
269/// Parsed math span information (inline $...$ or display $$...$$)
270#[derive(Debug, Clone)]
271pub struct MathSpan {
272 /// Line number where the math span starts (1-indexed)
273 pub line: usize,
274 /// Line number where the math span ends (1-indexed)
275 pub end_line: usize,
276 /// Start column (0-indexed) in the line
277 pub start_col: usize,
278 /// End column (0-indexed) in the line
279 pub end_col: usize,
280 /// Byte offset in document
281 pub byte_offset: usize,
282 /// End byte offset in document
283 pub byte_end: usize,
284 /// Whether this is display math ($$...$$) vs inline ($...$)
285 pub is_display: bool,
286 /// Content inside the math delimiters
287 pub content: String,
288}
289
290/// Information about a heading
291#[derive(Debug, Clone)]
292pub struct HeadingInfo {
293 /// Heading level (1-6 for ATX, 1-2 for Setext)
294 pub level: u8,
295 /// Style of heading
296 pub style: HeadingStyle,
297 /// The heading marker (# characters or underline)
298 pub marker: String,
299 /// Column where the marker starts (0-based)
300 pub marker_column: usize,
301 /// Column where heading text starts
302 pub content_column: usize,
303 /// The heading text (without markers and without custom ID syntax)
304 pub text: String,
305 /// Custom header ID if present (e.g., from {#custom-id} syntax)
306 pub custom_id: Option<String>,
307 /// Original heading text including custom ID syntax
308 pub raw_text: String,
309 /// Whether it has a closing sequence (for ATX)
310 pub has_closing_sequence: bool,
311 /// The closing sequence if present
312 pub closing_sequence: String,
313 /// Whether this is a valid CommonMark heading (ATX headings require space after #)
314 /// False for malformed headings like `#NoSpace` that MD018 should flag
315 pub is_valid: bool,
316}
317
318/// A valid heading from a filtered iteration
319///
320/// Only includes headings that are CommonMark-compliant (have space after #).
321/// Hashtag-like patterns (`#tag`, `#123`) are excluded.
322#[derive(Debug, Clone)]
323pub struct ValidHeading<'a> {
324 /// The 1-indexed line number in the document
325 pub line_num: usize,
326 /// Reference to the heading information
327 pub heading: &'a HeadingInfo,
328 /// Reference to the full line info (for rules that need additional context)
329 pub line_info: &'a LineInfo,
330}
331
332/// Iterator over valid CommonMark headings in a document
333///
334/// Filters out malformed headings like `#NoSpace` that should be flagged by MD018
335/// but should not be processed by other heading rules.
336pub struct ValidHeadingsIter<'a> {
337 lines: &'a [LineInfo],
338 current_index: usize,
339}
340
341impl<'a> ValidHeadingsIter<'a> {
342 pub(super) fn new(lines: &'a [LineInfo]) -> Self {
343 Self {
344 lines,
345 current_index: 0,
346 }
347 }
348}
349
350impl<'a> Iterator for ValidHeadingsIter<'a> {
351 type Item = ValidHeading<'a>;
352
353 fn next(&mut self) -> Option<Self::Item> {
354 while self.current_index < self.lines.len() {
355 let idx = self.current_index;
356 self.current_index += 1;
357
358 let line_info = &self.lines[idx];
359 if let Some(heading) = line_info.heading.as_deref()
360 && heading.is_valid
361 {
362 return Some(ValidHeading {
363 line_num: idx + 1, // Convert 0-indexed to 1-indexed
364 heading,
365 line_info,
366 });
367 }
368 }
369 None
370 }
371}
372
373/// Information about a blockquote line
374#[derive(Debug, Clone)]
375pub struct BlockquoteInfo {
376 /// Nesting level (1 for >, 2 for >>, etc.)
377 pub nesting_level: usize,
378 /// Column where the first > starts (0-based)
379 pub marker_column: usize,
380 /// The blockquote prefix (e.g., "> ", ">> ", etc.)
381 pub prefix: String,
382 /// Content after the blockquote marker(s)
383 pub content: String,
384 /// Whether the line has multiple spaces after the marker
385 pub has_multiple_spaces_after_marker: bool,
386}
387
388/// Information about a list block
389#[derive(Debug, Clone)]
390pub struct ListBlock {
391 /// Line number where the list starts (1-indexed)
392 pub start_line: usize,
393 /// Line number where the list ends (1-indexed)
394 pub end_line: usize,
395 /// Whether it's ordered or unordered
396 pub is_ordered: bool,
397 /// The consistent marker for unordered lists (if any)
398 pub marker: Option<String>,
399 /// Blockquote prefix for this list (empty if not in blockquote)
400 pub blockquote_prefix: String,
401 /// Lines that are list items within this block
402 pub item_lines: Vec<usize>,
403 /// Nesting level (0 for top-level lists)
404 pub nesting_level: usize,
405 /// Maximum marker width seen in this block (e.g., 3 for "1. ", 4 for "10. ")
406 pub max_marker_width: usize,
407}
408
409/// Character frequency data for fast content analysis
410#[derive(Debug, Clone, Default)]
411pub struct CharFrequency {
412 /// Count of # characters (headings)
413 pub hash_count: usize,
414 /// Count of * characters (emphasis, lists, horizontal rules)
415 pub asterisk_count: usize,
416 /// Count of _ characters (emphasis, horizontal rules)
417 pub underscore_count: usize,
418 /// Count of - characters (lists, horizontal rules, setext headings)
419 pub hyphen_count: usize,
420 /// Count of + characters (lists)
421 pub plus_count: usize,
422 /// Count of > characters (blockquotes)
423 pub gt_count: usize,
424 /// Count of | characters (tables)
425 pub pipe_count: usize,
426 /// Count of [ characters (links, images)
427 pub bracket_count: usize,
428 /// Count of ` characters (code spans, code blocks)
429 pub backtick_count: usize,
430 /// Count of < characters (HTML tags, autolinks)
431 pub lt_count: usize,
432 /// Count of ! characters (images)
433 pub exclamation_count: usize,
434 /// Count of newline characters
435 pub newline_count: usize,
436}
437
438/// Pre-parsed HTML tag information
439#[derive(Debug, Clone)]
440pub struct HtmlTag {
441 /// Line number (1-indexed)
442 pub line: usize,
443 /// Start column (0-indexed) in the line
444 pub start_col: usize,
445 /// End column (0-indexed) in the line
446 pub end_col: usize,
447 /// Byte offset in document
448 pub byte_offset: usize,
449 /// End byte offset in document
450 pub byte_end: usize,
451 /// Tag name (e.g., "div", "img", "br")
452 pub tag_name: String,
453 /// Whether it's a closing tag (`</tag>`)
454 pub is_closing: bool,
455 /// Whether it's self-closing (`<tag />`)
456 pub is_self_closing: bool,
457}
458
459/// Pre-parsed emphasis span information
460#[derive(Debug, Clone)]
461pub struct EmphasisSpan {
462 /// Line number (1-indexed)
463 pub line: usize,
464 /// Start column (0-indexed) in the line
465 pub start_col: usize,
466 /// End column (0-indexed) in the line
467 pub end_col: usize,
468 /// Byte offset in document
469 pub byte_offset: usize,
470 /// End byte offset in document
471 pub byte_end: usize,
472 /// Type of emphasis ('*' or '_')
473 pub marker: char,
474 /// Whether this span is strong emphasis (`**`/`__`) rather than ordinary emphasis (`*`/`_`)
475 pub is_strong: bool,
476 /// Content inside the emphasis
477 pub content: String,
478}
479
480/// Pre-parsed table row information
481#[derive(Debug, Clone)]
482pub struct TableRow {
483 /// Line number (1-indexed)
484 pub line: usize,
485 /// Whether this is a separator row (contains only |, -, :, and spaces)
486 pub is_separator: bool,
487 /// Number of columns (pipe-separated cells)
488 pub column_count: usize,
489 /// Alignment info from separator row
490 pub column_alignments: Vec<String>, // "left", "center", "right", "none"
491}
492
493/// Pre-parsed bare URL information (not in links)
494#[derive(Debug, Clone)]
495pub struct BareUrl {
496 /// Line number (1-indexed)
497 pub line: usize,
498 /// Start column (0-indexed) in the line
499 pub start_col: usize,
500 /// End column (0-indexed) in the line
501 pub end_col: usize,
502 /// Byte offset in document
503 pub byte_offset: usize,
504 /// End byte offset in document
505 pub byte_end: usize,
506 /// The URL string
507 pub url: String,
508}
509
510/// A lazy continuation line detected by pulldown-cmark.
511///
512/// Lazy continuation occurs when text continues a list item paragraph but with less
513/// indentation than expected.
514#[derive(Debug, Clone)]
515pub struct LazyContLine {
516 /// 1-indexed line number
517 pub line_num: usize,
518 /// Expected indentation
519 pub expected_indent: usize,
520 /// Current indentation
521 pub current_indent: usize,
522 /// Blockquote nesting level
523 pub blockquote_level: usize,
524}
525
526/// Check if a line is a horizontal rule (---, ***, ___) per CommonMark spec.
527/// CommonMark rules for thematic breaks (horizontal rules):
528/// - May have 0-3 spaces of leading indentation (but NOT tabs)
529/// - Must have 3+ of the same character (-, *, or _)
530/// - May have spaces between characters
531/// - No other characters allowed
532pub fn is_horizontal_rule_line(line: &str) -> bool {
533 // CommonMark: HRs can have 0-3 spaces of leading indentation, not tabs
534 let leading_spaces = line.len() - line.trim_start_matches(' ').len();
535 if leading_spaces > 3 || line.starts_with('\t') {
536 return false;
537 }
538
539 is_horizontal_rule_content(line.trim())
540}
541
542/// Check if trimmed content matches horizontal rule pattern.
543/// Use `is_horizontal_rule_line` for full CommonMark compliance including indentation check.
544pub fn is_horizontal_rule_content(trimmed: &str) -> bool {
545 if trimmed.len() < 3 {
546 return false;
547 }
548
549 let mut chars = trimmed.chars();
550 let Some(first_char @ ('-' | '*' | '_')) = chars.next() else {
551 return false;
552 };
553
554 // Count occurrences of the rule character, rejecting non-whitespace interlopers
555 let mut count = 1; // Already matched the first character
556 for ch in chars {
557 if ch == first_char {
558 count += 1;
559 } else if ch != ' ' && ch != '\t' {
560 return false;
561 }
562 }
563 count >= 3
564}