rumdl_lib/lint_context/types.rs
1use pulldown_cmark::LinkType;
2use std::borrow::Cow;
3
4/// Pre-computed information about a line
5#[derive(Debug, Clone)]
6pub struct LineInfo {
7 /// Byte offset where this line starts in the document
8 pub byte_offset: usize,
9 /// Length of the line in bytes (without newline)
10 pub byte_len: usize,
11 /// Number of bytes of leading whitespace (for substring extraction)
12 pub indent: usize,
13 /// Visual column width of leading whitespace (with proper tab expansion)
14 /// Per CommonMark, tabs expand to the next column that is a multiple of 4.
15 /// Use this for numeric comparisons like checking for indented code blocks (>= 4).
16 pub visual_indent: usize,
17 /// Whether the line is blank (empty or only whitespace)
18 pub is_blank: bool,
19 /// Whether this line is inside a code block
20 pub in_code_block: bool,
21 /// Whether this line is inside front matter
22 pub in_front_matter: bool,
23 /// Whether this line is inside an HTML block
24 pub in_html_block: bool,
25 /// Whether this line is inside an HTML comment
26 pub in_html_comment: bool,
27 /// List item information if this line starts a list item
28 /// Boxed to reduce LineInfo size: most lines are not list items
29 pub list_item: Option<Box<ListItemInfo>>,
30 /// Heading information if this line is a heading
31 /// Boxed to reduce LineInfo size: most lines are not headings
32 pub heading: Option<Box<HeadingInfo>>,
33 /// Blockquote information if this line is a blockquote
34 /// Boxed to reduce LineInfo size: most lines are not blockquotes
35 pub blockquote: Option<Box<BlockquoteInfo>>,
36 /// Whether this line is inside a mkdocstrings autodoc block
37 pub in_mkdocstrings: bool,
38 /// Whether this line is part of an ESM import/export block (MDX only)
39 pub in_esm_block: bool,
40 /// Whether this line is a continuation of a multi-line code span from a previous line
41 pub in_code_span_continuation: bool,
42 /// Whether this line is a horizontal rule (---, ***, ___, etc.)
43 /// Pre-computed for consistent detection across all rules
44 pub is_horizontal_rule: bool,
45 /// Whether this line is inside a math block ($$ ... $$)
46 pub in_math_block: bool,
47 /// Whether this line is inside a Quarto div block (::: ... :::)
48 pub in_quarto_div: bool,
49 /// Whether this line is a Quarto/Pandoc div marker (opening ::: {.class} or closing :::)
50 /// Analogous to `is_horizontal_rule` — marks structural delimiters that are not paragraph text
51 pub is_div_marker: bool,
52 /// Whether this line contains or is inside a JSX expression (MDX only)
53 pub in_jsx_expression: bool,
54 /// Whether this line is inside an MDX comment {/* ... */} (MDX only)
55 pub in_mdx_comment: bool,
56 /// Whether this line is inside an MkDocs admonition block (!!! or ???)
57 pub in_admonition: bool,
58 /// Whether this line is inside an MkDocs content tab block (===)
59 pub in_content_tab: bool,
60 /// Whether this line is inside an HTML block with markdown attribute (MkDocs grid cards, etc.)
61 pub in_mkdocs_html_markdown: bool,
62 /// Whether this line is a definition list item (: definition)
63 pub in_definition_list: bool,
64 /// Whether this line is inside an Obsidian comment (%%...%% syntax, Obsidian flavor only)
65 pub in_obsidian_comment: bool,
66 /// Whether this line is inside a PyMdown Blocks region (/// ... ///, MkDocs flavor only)
67 pub in_pymdown_block: bool,
68 /// Whether this line is inside a kramdown extension block ({::comment}...{:/comment}, {::nomarkdown}...{:/nomarkdown})
69 pub in_kramdown_extension_block: bool,
70 /// Whether this line is a kramdown block IAL ({:.class #id}) or ALD ({:ref: .class})
71 pub is_kramdown_block_ial: bool,
72 /// Whether this line is inside a JSX component block (MDX only, e.g. `<Tabs>...</Tabs>`)
73 pub in_jsx_block: bool,
74 /// Whether this line is inside a footnote definition body (continuation lines)
75 pub in_footnote_definition: bool,
76}
77
78impl LineInfo {
79 /// Get the line content as a string slice from the source document
80 pub fn content<'a>(&self, source: &'a str) -> &'a str {
81 &source[self.byte_offset..self.byte_offset + self.byte_len]
82 }
83
84 /// Check if this line is inside MkDocs-specific indented content (admonitions, tabs, or markdown HTML).
85 /// This content uses 4-space indentation which pulldown-cmark would interpret as code blocks,
86 /// but in MkDocs flavor it's actually container content that should be preserved.
87 #[inline]
88 pub fn in_mkdocs_container(&self) -> bool {
89 self.in_admonition || self.in_content_tab || self.in_mkdocs_html_markdown
90 }
91}
92
93/// Information about a list item
94#[derive(Debug, Clone)]
95pub struct ListItemInfo {
96 /// The marker used (*, -, +, or number with . or ))
97 pub marker: String,
98 /// Whether it's ordered (true) or unordered (false)
99 pub is_ordered: bool,
100 /// The number for ordered lists
101 pub number: Option<usize>,
102 /// Column where the marker starts (0-based)
103 pub marker_column: usize,
104 /// Column where content after marker starts
105 pub content_column: usize,
106}
107
108/// Heading style type
109#[derive(Debug, Clone, PartialEq)]
110pub enum HeadingStyle {
111 /// ATX style heading (# Heading)
112 ATX,
113 /// Setext style heading with = underline
114 Setext1,
115 /// Setext style heading with - underline
116 Setext2,
117}
118
119/// Parsed link information
120#[derive(Debug, Clone)]
121pub struct ParsedLink<'a> {
122 /// Line number (1-indexed)
123 pub line: usize,
124 /// Start column (0-indexed) in the line
125 pub start_col: usize,
126 /// End column (0-indexed) in the line
127 pub end_col: usize,
128 /// Byte offset in document
129 pub byte_offset: usize,
130 /// End byte offset in document
131 pub byte_end: usize,
132 /// Link text
133 pub text: Cow<'a, str>,
134 /// Link URL or reference
135 pub url: Cow<'a, str>,
136 /// Whether this is a reference link `[text][ref]` vs inline `[text](url)`
137 pub is_reference: bool,
138 /// Reference ID for reference links
139 pub reference_id: Option<Cow<'a, str>>,
140 /// Link type from pulldown-cmark
141 pub link_type: LinkType,
142}
143
144/// Information about a broken link reported by pulldown-cmark
145#[derive(Debug, Clone)]
146pub struct BrokenLinkInfo {
147 /// The reference text that couldn't be resolved
148 pub reference: String,
149 /// Byte span in the source document
150 pub span: std::ops::Range<usize>,
151}
152
153/// Parsed footnote reference (e.g., `[^1]`, `[^note]`)
154#[derive(Debug, Clone)]
155pub struct FootnoteRef {
156 /// The footnote ID (without the ^ prefix)
157 pub id: String,
158 /// Line number (1-indexed)
159 pub line: usize,
160 /// Start byte offset in document
161 pub byte_offset: usize,
162}
163
164/// Parsed image information
165#[derive(Debug, Clone)]
166pub struct ParsedImage<'a> {
167 /// Line number (1-indexed)
168 pub line: usize,
169 /// Start column (0-indexed) in the line
170 pub start_col: usize,
171 /// End column (0-indexed) in the line
172 pub end_col: usize,
173 /// Byte offset in document
174 pub byte_offset: usize,
175 /// End byte offset in document
176 pub byte_end: usize,
177 /// Alt text
178 pub alt_text: Cow<'a, str>,
179 /// Image URL or reference
180 pub url: Cow<'a, str>,
181 /// Whether this is a reference image ![alt][ref] vs inline 
182 pub is_reference: bool,
183 /// Reference ID for reference images
184 pub reference_id: Option<Cow<'a, str>>,
185 /// Link type from pulldown-cmark
186 pub link_type: LinkType,
187}
188
189/// Reference definition `[ref]: url "title"`
190#[derive(Debug, Clone)]
191pub struct ReferenceDef {
192 /// Line number (1-indexed)
193 pub line: usize,
194 /// Reference ID (normalized to lowercase)
195 pub id: String,
196 /// URL
197 pub url: String,
198 /// Optional title
199 pub title: Option<String>,
200 /// Byte offset where the reference definition starts
201 pub byte_offset: usize,
202 /// Byte offset where the reference definition ends
203 pub byte_end: usize,
204 /// Byte offset where the title starts (if present, includes quote)
205 pub title_byte_start: Option<usize>,
206 /// Byte offset where the title ends (if present, includes quote)
207 pub title_byte_end: Option<usize>,
208}
209
210/// Parsed code span information
211#[derive(Debug, Clone)]
212pub struct CodeSpan {
213 /// Line number where the code span starts (1-indexed)
214 pub line: usize,
215 /// Line number where the code span ends (1-indexed)
216 pub end_line: usize,
217 /// Start column (0-indexed) in the line
218 pub start_col: usize,
219 /// End column (0-indexed) in the line
220 pub end_col: usize,
221 /// Byte offset in document
222 pub byte_offset: usize,
223 /// End byte offset in document
224 pub byte_end: usize,
225 /// Number of backticks used (1, 2, 3, etc.)
226 pub backtick_count: usize,
227 /// Content inside the code span (without backticks)
228 pub content: String,
229}
230
231/// Parsed math span information (inline $...$ or display $$...$$)
232#[derive(Debug, Clone)]
233pub struct MathSpan {
234 /// Line number where the math span starts (1-indexed)
235 pub line: usize,
236 /// Line number where the math span ends (1-indexed)
237 pub end_line: usize,
238 /// Start column (0-indexed) in the line
239 pub start_col: usize,
240 /// End column (0-indexed) in the line
241 pub end_col: usize,
242 /// Byte offset in document
243 pub byte_offset: usize,
244 /// End byte offset in document
245 pub byte_end: usize,
246 /// Whether this is display math ($$...$$) vs inline ($...$)
247 pub is_display: bool,
248 /// Content inside the math delimiters
249 pub content: String,
250}
251
252/// Information about a heading
253#[derive(Debug, Clone)]
254pub struct HeadingInfo {
255 /// Heading level (1-6 for ATX, 1-2 for Setext)
256 pub level: u8,
257 /// Style of heading
258 pub style: HeadingStyle,
259 /// The heading marker (# characters or underline)
260 pub marker: String,
261 /// Column where the marker starts (0-based)
262 pub marker_column: usize,
263 /// Column where heading text starts
264 pub content_column: usize,
265 /// The heading text (without markers and without custom ID syntax)
266 pub text: String,
267 /// Custom header ID if present (e.g., from {#custom-id} syntax)
268 pub custom_id: Option<String>,
269 /// Original heading text including custom ID syntax
270 pub raw_text: String,
271 /// Whether it has a closing sequence (for ATX)
272 pub has_closing_sequence: bool,
273 /// The closing sequence if present
274 pub closing_sequence: String,
275 /// Whether this is a valid CommonMark heading (ATX headings require space after #)
276 /// False for malformed headings like `#NoSpace` that MD018 should flag
277 pub is_valid: bool,
278}
279
280/// A valid heading from a filtered iteration
281///
282/// Only includes headings that are CommonMark-compliant (have space after #).
283/// Hashtag-like patterns (`#tag`, `#123`) are excluded.
284#[derive(Debug, Clone)]
285pub struct ValidHeading<'a> {
286 /// The 1-indexed line number in the document
287 pub line_num: usize,
288 /// Reference to the heading information
289 pub heading: &'a HeadingInfo,
290 /// Reference to the full line info (for rules that need additional context)
291 pub line_info: &'a LineInfo,
292}
293
294/// Iterator over valid CommonMark headings in a document
295///
296/// Filters out malformed headings like `#NoSpace` that should be flagged by MD018
297/// but should not be processed by other heading rules.
298pub struct ValidHeadingsIter<'a> {
299 lines: &'a [LineInfo],
300 current_index: usize,
301}
302
303impl<'a> ValidHeadingsIter<'a> {
304 pub(super) fn new(lines: &'a [LineInfo]) -> Self {
305 Self {
306 lines,
307 current_index: 0,
308 }
309 }
310}
311
312impl<'a> Iterator for ValidHeadingsIter<'a> {
313 type Item = ValidHeading<'a>;
314
315 fn next(&mut self) -> Option<Self::Item> {
316 while self.current_index < self.lines.len() {
317 let idx = self.current_index;
318 self.current_index += 1;
319
320 let line_info = &self.lines[idx];
321 if let Some(heading) = line_info.heading.as_deref()
322 && heading.is_valid
323 {
324 return Some(ValidHeading {
325 line_num: idx + 1, // Convert 0-indexed to 1-indexed
326 heading,
327 line_info,
328 });
329 }
330 }
331 None
332 }
333}
334
335/// Information about a blockquote line
336#[derive(Debug, Clone)]
337pub struct BlockquoteInfo {
338 /// Nesting level (1 for >, 2 for >>, etc.)
339 pub nesting_level: usize,
340 /// Column where the first > starts (0-based)
341 pub marker_column: usize,
342 /// The blockquote prefix (e.g., "> ", ">> ", etc.)
343 pub prefix: String,
344 /// Content after the blockquote marker(s)
345 pub content: String,
346 /// Whether the line has multiple spaces after the marker
347 pub has_multiple_spaces_after_marker: bool,
348}
349
350/// Information about a list block
351#[derive(Debug, Clone)]
352pub struct ListBlock {
353 /// Line number where the list starts (1-indexed)
354 pub start_line: usize,
355 /// Line number where the list ends (1-indexed)
356 pub end_line: usize,
357 /// Whether it's ordered or unordered
358 pub is_ordered: bool,
359 /// The consistent marker for unordered lists (if any)
360 pub marker: Option<String>,
361 /// Blockquote prefix for this list (empty if not in blockquote)
362 pub blockquote_prefix: String,
363 /// Lines that are list items within this block
364 pub item_lines: Vec<usize>,
365 /// Nesting level (0 for top-level lists)
366 pub nesting_level: usize,
367 /// Maximum marker width seen in this block (e.g., 3 for "1. ", 4 for "10. ")
368 pub max_marker_width: usize,
369}
370
371/// Character frequency data for fast content analysis
372#[derive(Debug, Clone, Default)]
373pub struct CharFrequency {
374 /// Count of # characters (headings)
375 pub hash_count: usize,
376 /// Count of * characters (emphasis, lists, horizontal rules)
377 pub asterisk_count: usize,
378 /// Count of _ characters (emphasis, horizontal rules)
379 pub underscore_count: usize,
380 /// Count of - characters (lists, horizontal rules, setext headings)
381 pub hyphen_count: usize,
382 /// Count of + characters (lists)
383 pub plus_count: usize,
384 /// Count of > characters (blockquotes)
385 pub gt_count: usize,
386 /// Count of | characters (tables)
387 pub pipe_count: usize,
388 /// Count of [ characters (links, images)
389 pub bracket_count: usize,
390 /// Count of ` characters (code spans, code blocks)
391 pub backtick_count: usize,
392 /// Count of < characters (HTML tags, autolinks)
393 pub lt_count: usize,
394 /// Count of ! characters (images)
395 pub exclamation_count: usize,
396 /// Count of newline characters
397 pub newline_count: usize,
398}
399
400/// Pre-parsed HTML tag information
401#[derive(Debug, Clone)]
402pub struct HtmlTag {
403 /// Line number (1-indexed)
404 pub line: usize,
405 /// Start column (0-indexed) in the line
406 pub start_col: usize,
407 /// End column (0-indexed) in the line
408 pub end_col: usize,
409 /// Byte offset in document
410 pub byte_offset: usize,
411 /// End byte offset in document
412 pub byte_end: usize,
413 /// Tag name (e.g., "div", "img", "br")
414 pub tag_name: String,
415 /// Whether it's a closing tag (`</tag>`)
416 pub is_closing: bool,
417 /// Whether it's self-closing (`<tag />`)
418 pub is_self_closing: bool,
419}
420
421/// Pre-parsed emphasis span information
422#[derive(Debug, Clone)]
423pub struct EmphasisSpan {
424 /// Line number (1-indexed)
425 pub line: usize,
426 /// Start column (0-indexed) in the line
427 pub start_col: usize,
428 /// End column (0-indexed) in the line
429 pub end_col: usize,
430 /// Byte offset in document
431 pub byte_offset: usize,
432 /// End byte offset in document
433 pub byte_end: usize,
434 /// Type of emphasis ('*' or '_')
435 pub marker: char,
436 /// Content inside the emphasis
437 pub content: String,
438}
439
440/// Pre-parsed table row information
441#[derive(Debug, Clone)]
442pub struct TableRow {
443 /// Line number (1-indexed)
444 pub line: usize,
445 /// Whether this is a separator row (contains only |, -, :, and spaces)
446 pub is_separator: bool,
447 /// Number of columns (pipe-separated cells)
448 pub column_count: usize,
449 /// Alignment info from separator row
450 pub column_alignments: Vec<String>, // "left", "center", "right", "none"
451}
452
453/// Pre-parsed bare URL information (not in links)
454#[derive(Debug, Clone)]
455pub struct BareUrl {
456 /// Line number (1-indexed)
457 pub line: usize,
458 /// Start column (0-indexed) in the line
459 pub start_col: usize,
460 /// End column (0-indexed) in the line
461 pub end_col: usize,
462 /// Byte offset in document
463 pub byte_offset: usize,
464 /// End byte offset in document
465 pub byte_end: usize,
466 /// The URL string
467 pub url: String,
468}
469
470/// A lazy continuation line detected by pulldown-cmark.
471///
472/// Lazy continuation occurs when text continues a list item paragraph but with less
473/// indentation than expected.
474#[derive(Debug, Clone)]
475pub struct LazyContLine {
476 /// 1-indexed line number
477 pub line_num: usize,
478 /// Expected indentation
479 pub expected_indent: usize,
480 /// Current indentation
481 pub current_indent: usize,
482 /// Blockquote nesting level
483 pub blockquote_level: usize,
484}
485
486/// Check if a line is a horizontal rule (---, ***, ___) per CommonMark spec.
487/// CommonMark rules for thematic breaks (horizontal rules):
488/// - May have 0-3 spaces of leading indentation (but NOT tabs)
489/// - Must have 3+ of the same character (-, *, or _)
490/// - May have spaces between characters
491/// - No other characters allowed
492pub fn is_horizontal_rule_line(line: &str) -> bool {
493 // CommonMark: HRs can have 0-3 spaces of leading indentation, not tabs
494 let leading_spaces = line.len() - line.trim_start_matches(' ').len();
495 if leading_spaces > 3 || line.starts_with('\t') {
496 return false;
497 }
498
499 is_horizontal_rule_content(line.trim())
500}
501
502/// Check if trimmed content matches horizontal rule pattern.
503/// Use `is_horizontal_rule_line` for full CommonMark compliance including indentation check.
504pub fn is_horizontal_rule_content(trimmed: &str) -> bool {
505 if trimmed.len() < 3 {
506 return false;
507 }
508
509 let mut chars = trimmed.chars();
510 let Some(first_char @ ('-' | '*' | '_')) = chars.next() else {
511 return false;
512 };
513
514 // Count occurrences of the rule character, rejecting non-whitespace interlopers
515 let mut count = 1; // Already matched the first character
516 for ch in chars {
517 if ch == first_char {
518 count += 1;
519 } else if ch != ' ' && ch != '\t' {
520 return false;
521 }
522 }
523 count >= 3
524}