rumdl_lib/lint_context/types.rs
1use pulldown_cmark::LinkType;
2use std::borrow::Cow;
3
4/// Pre-computed information about a line
5#[derive(Debug, Clone)]
6pub struct LineInfo {
7 /// Byte offset where this line starts in the document
8 pub byte_offset: usize,
9 /// Length of the line in bytes (without newline)
10 pub byte_len: usize,
11 /// Number of bytes of leading whitespace (for substring extraction)
12 pub indent: usize,
13 /// Visual column width of leading whitespace (with proper tab expansion)
14 /// Per CommonMark, tabs expand to the next column that is a multiple of 4.
15 /// Use this for numeric comparisons like checking for indented code blocks (>= 4).
16 pub visual_indent: usize,
17 /// Whether the line is blank (empty or only whitespace)
18 pub is_blank: bool,
19 /// Whether this line is inside a code block
20 pub in_code_block: bool,
21 /// Whether this line is inside front matter
22 pub in_front_matter: bool,
23 /// Whether this line is inside an HTML block
24 pub in_html_block: bool,
25 /// Whether this line is inside an HTML comment
26 pub in_html_comment: bool,
27 /// List item information if this line starts a list item
28 pub list_item: Option<ListItemInfo>,
29 /// Heading information if this line is a heading
30 pub heading: Option<HeadingInfo>,
31 /// Blockquote information if this line is a blockquote
32 pub blockquote: Option<BlockquoteInfo>,
33 /// Whether this line is inside a mkdocstrings autodoc block
34 pub in_mkdocstrings: bool,
35 /// Whether this line is part of an ESM import/export block (MDX only)
36 pub in_esm_block: bool,
37 /// Whether this line is a continuation of a multi-line code span from a previous line
38 pub in_code_span_continuation: bool,
39 /// Whether this line is a horizontal rule (---, ***, ___, etc.)
40 /// Pre-computed for consistent detection across all rules
41 pub is_horizontal_rule: bool,
42 /// Whether this line is inside a math block ($$ ... $$)
43 pub in_math_block: bool,
44 /// Whether this line is inside a Quarto div block (::: ... :::)
45 pub in_quarto_div: bool,
46 /// Whether this line contains or is inside a JSX expression (MDX only)
47 pub in_jsx_expression: bool,
48 /// Whether this line is inside an MDX comment {/* ... */} (MDX only)
49 pub in_mdx_comment: bool,
50 /// Whether this line is inside a JSX component (MDX only)
51 pub in_jsx_component: bool,
52 /// Whether this line is inside a JSX fragment (MDX only)
53 pub in_jsx_fragment: bool,
54 /// Whether this line is inside an MkDocs admonition block (!!! or ???)
55 pub in_admonition: bool,
56 /// Whether this line is inside an MkDocs content tab block (===)
57 pub in_content_tab: bool,
58 /// Whether this line is inside an HTML block with markdown attribute (MkDocs grid cards, etc.)
59 pub in_mkdocs_html_markdown: bool,
60 /// Whether this line is a definition list item (: definition)
61 pub in_definition_list: bool,
62 /// Whether this line is inside an Obsidian comment (%%...%% syntax, Obsidian flavor only)
63 pub in_obsidian_comment: bool,
64 /// Whether this line is inside a PyMdown Blocks region (/// ... ///, MkDocs flavor only)
65 pub in_pymdown_block: bool,
66}
67
68impl LineInfo {
69 /// Get the line content as a string slice from the source document
70 pub fn content<'a>(&self, source: &'a str) -> &'a str {
71 &source[self.byte_offset..self.byte_offset + self.byte_len]
72 }
73
74 /// Check if this line is inside MkDocs-specific indented content (admonitions, tabs, or markdown HTML).
75 /// This content uses 4-space indentation which pulldown-cmark would interpret as code blocks,
76 /// but in MkDocs flavor it's actually container content that should be preserved.
77 #[inline]
78 pub fn in_mkdocs_container(&self) -> bool {
79 self.in_admonition || self.in_content_tab || self.in_mkdocs_html_markdown
80 }
81}
82
83/// Information about a list item
84#[derive(Debug, Clone)]
85pub struct ListItemInfo {
86 /// The marker used (*, -, +, or number with . or ))
87 pub marker: String,
88 /// Whether it's ordered (true) or unordered (false)
89 pub is_ordered: bool,
90 /// The number for ordered lists
91 pub number: Option<usize>,
92 /// Column where the marker starts (0-based)
93 pub marker_column: usize,
94 /// Column where content after marker starts
95 pub content_column: usize,
96}
97
98/// Heading style type
99#[derive(Debug, Clone, PartialEq)]
100pub enum HeadingStyle {
101 /// ATX style heading (# Heading)
102 ATX,
103 /// Setext style heading with = underline
104 Setext1,
105 /// Setext style heading with - underline
106 Setext2,
107}
108
109/// Parsed link information
110#[derive(Debug, Clone)]
111pub struct ParsedLink<'a> {
112 /// Line number (1-indexed)
113 pub line: usize,
114 /// Start column (0-indexed) in the line
115 pub start_col: usize,
116 /// End column (0-indexed) in the line
117 pub end_col: usize,
118 /// Byte offset in document
119 pub byte_offset: usize,
120 /// End byte offset in document
121 pub byte_end: usize,
122 /// Link text
123 pub text: Cow<'a, str>,
124 /// Link URL or reference
125 pub url: Cow<'a, str>,
126 /// Whether this is a reference link `[text][ref]` vs inline `[text](url)`
127 pub is_reference: bool,
128 /// Reference ID for reference links
129 pub reference_id: Option<Cow<'a, str>>,
130 /// Link type from pulldown-cmark
131 pub link_type: LinkType,
132}
133
134/// Information about a broken link reported by pulldown-cmark
135#[derive(Debug, Clone)]
136pub struct BrokenLinkInfo {
137 /// The reference text that couldn't be resolved
138 pub reference: String,
139 /// Byte span in the source document
140 pub span: std::ops::Range<usize>,
141}
142
143/// Parsed footnote reference (e.g., `[^1]`, `[^note]`)
144#[derive(Debug, Clone)]
145pub struct FootnoteRef {
146 /// The footnote ID (without the ^ prefix)
147 pub id: String,
148 /// Line number (1-indexed)
149 pub line: usize,
150 /// Start byte offset in document
151 pub byte_offset: usize,
152 /// End byte offset in document
153 pub byte_end: usize,
154}
155
156/// Parsed image information
157#[derive(Debug, Clone)]
158pub struct ParsedImage<'a> {
159 /// Line number (1-indexed)
160 pub line: usize,
161 /// Start column (0-indexed) in the line
162 pub start_col: usize,
163 /// End column (0-indexed) in the line
164 pub end_col: usize,
165 /// Byte offset in document
166 pub byte_offset: usize,
167 /// End byte offset in document
168 pub byte_end: usize,
169 /// Alt text
170 pub alt_text: Cow<'a, str>,
171 /// Image URL or reference
172 pub url: Cow<'a, str>,
173 /// Whether this is a reference image ![alt][ref] vs inline 
174 pub is_reference: bool,
175 /// Reference ID for reference images
176 pub reference_id: Option<Cow<'a, str>>,
177 /// Link type from pulldown-cmark
178 pub link_type: LinkType,
179}
180
181/// Reference definition `[ref]: url "title"`
182#[derive(Debug, Clone)]
183pub struct ReferenceDef {
184 /// Line number (1-indexed)
185 pub line: usize,
186 /// Reference ID (normalized to lowercase)
187 pub id: String,
188 /// URL
189 pub url: String,
190 /// Optional title
191 pub title: Option<String>,
192 /// Byte offset where the reference definition starts
193 pub byte_offset: usize,
194 /// Byte offset where the reference definition ends
195 pub byte_end: usize,
196 /// Byte offset where the title starts (if present, includes quote)
197 pub title_byte_start: Option<usize>,
198 /// Byte offset where the title ends (if present, includes quote)
199 pub title_byte_end: Option<usize>,
200}
201
202/// Parsed code span information
203#[derive(Debug, Clone)]
204pub struct CodeSpan {
205 /// Line number where the code span starts (1-indexed)
206 pub line: usize,
207 /// Line number where the code span ends (1-indexed)
208 pub end_line: usize,
209 /// Start column (0-indexed) in the line
210 pub start_col: usize,
211 /// End column (0-indexed) in the line
212 pub end_col: usize,
213 /// Byte offset in document
214 pub byte_offset: usize,
215 /// End byte offset in document
216 pub byte_end: usize,
217 /// Number of backticks used (1, 2, 3, etc.)
218 pub backtick_count: usize,
219 /// Content inside the code span (without backticks)
220 pub content: String,
221}
222
223/// Parsed math span information (inline $...$ or display $$...$$)
224#[derive(Debug, Clone)]
225pub struct MathSpan {
226 /// Line number where the math span starts (1-indexed)
227 pub line: usize,
228 /// Line number where the math span ends (1-indexed)
229 pub end_line: usize,
230 /// Start column (0-indexed) in the line
231 pub start_col: usize,
232 /// End column (0-indexed) in the line
233 pub end_col: usize,
234 /// Byte offset in document
235 pub byte_offset: usize,
236 /// End byte offset in document
237 pub byte_end: usize,
238 /// Whether this is display math ($$...$$) vs inline ($...$)
239 pub is_display: bool,
240 /// Content inside the math delimiters
241 pub content: String,
242}
243
244/// Information about a heading
245#[derive(Debug, Clone)]
246pub struct HeadingInfo {
247 /// Heading level (1-6 for ATX, 1-2 for Setext)
248 pub level: u8,
249 /// Style of heading
250 pub style: HeadingStyle,
251 /// The heading marker (# characters or underline)
252 pub marker: String,
253 /// Column where the marker starts (0-based)
254 pub marker_column: usize,
255 /// Column where heading text starts
256 pub content_column: usize,
257 /// The heading text (without markers and without custom ID syntax)
258 pub text: String,
259 /// Custom header ID if present (e.g., from {#custom-id} syntax)
260 pub custom_id: Option<String>,
261 /// Original heading text including custom ID syntax
262 pub raw_text: String,
263 /// Whether it has a closing sequence (for ATX)
264 pub has_closing_sequence: bool,
265 /// The closing sequence if present
266 pub closing_sequence: String,
267 /// Whether this is a valid CommonMark heading (ATX headings require space after #)
268 /// False for malformed headings like `#NoSpace` that MD018 should flag
269 pub is_valid: bool,
270}
271
272/// A valid heading from a filtered iteration
273///
274/// Only includes headings that are CommonMark-compliant (have space after #).
275/// Hashtag-like patterns (`#tag`, `#123`) are excluded.
276#[derive(Debug, Clone)]
277pub struct ValidHeading<'a> {
278 /// The 1-indexed line number in the document
279 pub line_num: usize,
280 /// Reference to the heading information
281 pub heading: &'a HeadingInfo,
282 /// Reference to the full line info (for rules that need additional context)
283 pub line_info: &'a LineInfo,
284}
285
286/// Iterator over valid CommonMark headings in a document
287///
288/// Filters out malformed headings like `#NoSpace` that should be flagged by MD018
289/// but should not be processed by other heading rules.
290pub struct ValidHeadingsIter<'a> {
291 lines: &'a [LineInfo],
292 current_index: usize,
293}
294
295impl<'a> ValidHeadingsIter<'a> {
296 pub(super) fn new(lines: &'a [LineInfo]) -> Self {
297 Self {
298 lines,
299 current_index: 0,
300 }
301 }
302}
303
304impl<'a> Iterator for ValidHeadingsIter<'a> {
305 type Item = ValidHeading<'a>;
306
307 fn next(&mut self) -> Option<Self::Item> {
308 while self.current_index < self.lines.len() {
309 let idx = self.current_index;
310 self.current_index += 1;
311
312 let line_info = &self.lines[idx];
313 if let Some(heading) = &line_info.heading
314 && heading.is_valid
315 {
316 return Some(ValidHeading {
317 line_num: idx + 1, // Convert 0-indexed to 1-indexed
318 heading,
319 line_info,
320 });
321 }
322 }
323 None
324 }
325}
326
327/// Information about a blockquote line
328#[derive(Debug, Clone)]
329pub struct BlockquoteInfo {
330 /// Nesting level (1 for >, 2 for >>, etc.)
331 pub nesting_level: usize,
332 /// The indentation before the blockquote marker
333 pub indent: String,
334 /// Column where the first > starts (0-based)
335 pub marker_column: usize,
336 /// The blockquote prefix (e.g., "> ", ">> ", etc.)
337 pub prefix: String,
338 /// Content after the blockquote marker(s)
339 pub content: String,
340 /// Whether the line has no space after the marker
341 pub has_no_space_after_marker: bool,
342 /// Whether the line has multiple spaces after the marker
343 pub has_multiple_spaces_after_marker: bool,
344 /// Whether this is an empty blockquote line needing MD028 fix
345 pub needs_md028_fix: bool,
346}
347
348/// Information about a list block
349#[derive(Debug, Clone)]
350pub struct ListBlock {
351 /// Line number where the list starts (1-indexed)
352 pub start_line: usize,
353 /// Line number where the list ends (1-indexed)
354 pub end_line: usize,
355 /// Whether it's ordered or unordered
356 pub is_ordered: bool,
357 /// The consistent marker for unordered lists (if any)
358 pub marker: Option<String>,
359 /// Blockquote prefix for this list (empty if not in blockquote)
360 pub blockquote_prefix: String,
361 /// Lines that are list items within this block
362 pub item_lines: Vec<usize>,
363 /// Nesting level (0 for top-level lists)
364 pub nesting_level: usize,
365 /// Maximum marker width seen in this block (e.g., 3 for "1. ", 4 for "10. ")
366 pub max_marker_width: usize,
367}
368
369/// Character frequency data for fast content analysis
370#[derive(Debug, Clone, Default)]
371pub struct CharFrequency {
372 /// Count of # characters (headings)
373 pub hash_count: usize,
374 /// Count of * characters (emphasis, lists, horizontal rules)
375 pub asterisk_count: usize,
376 /// Count of _ characters (emphasis, horizontal rules)
377 pub underscore_count: usize,
378 /// Count of - characters (lists, horizontal rules, setext headings)
379 pub hyphen_count: usize,
380 /// Count of + characters (lists)
381 pub plus_count: usize,
382 /// Count of > characters (blockquotes)
383 pub gt_count: usize,
384 /// Count of | characters (tables)
385 pub pipe_count: usize,
386 /// Count of [ characters (links, images)
387 pub bracket_count: usize,
388 /// Count of ` characters (code spans, code blocks)
389 pub backtick_count: usize,
390 /// Count of < characters (HTML tags, autolinks)
391 pub lt_count: usize,
392 /// Count of ! characters (images)
393 pub exclamation_count: usize,
394 /// Count of newline characters
395 pub newline_count: usize,
396}
397
398/// Pre-parsed HTML tag information
399#[derive(Debug, Clone)]
400pub struct HtmlTag {
401 /// Line number (1-indexed)
402 pub line: usize,
403 /// Start column (0-indexed) in the line
404 pub start_col: usize,
405 /// End column (0-indexed) in the line
406 pub end_col: usize,
407 /// Byte offset in document
408 pub byte_offset: usize,
409 /// End byte offset in document
410 pub byte_end: usize,
411 /// Tag name (e.g., "div", "img", "br")
412 pub tag_name: String,
413 /// Whether it's a closing tag (`</tag>`)
414 pub is_closing: bool,
415 /// Whether it's self-closing (`<tag />`)
416 pub is_self_closing: bool,
417 /// Raw tag content
418 pub raw_content: String,
419}
420
421/// Pre-parsed emphasis span information
422#[derive(Debug, Clone)]
423pub struct EmphasisSpan {
424 /// Line number (1-indexed)
425 pub line: usize,
426 /// Start column (0-indexed) in the line
427 pub start_col: usize,
428 /// End column (0-indexed) in the line
429 pub end_col: usize,
430 /// Byte offset in document
431 pub byte_offset: usize,
432 /// End byte offset in document
433 pub byte_end: usize,
434 /// Type of emphasis ('*' or '_')
435 pub marker: char,
436 /// Number of markers (1 for italic, 2 for bold, 3+ for bold+italic)
437 pub marker_count: usize,
438 /// Content inside the emphasis
439 pub content: String,
440}
441
442/// Pre-parsed table row information
443#[derive(Debug, Clone)]
444pub struct TableRow {
445 /// Line number (1-indexed)
446 pub line: usize,
447 /// Whether this is a separator row (contains only |, -, :, and spaces)
448 pub is_separator: bool,
449 /// Number of columns (pipe-separated cells)
450 pub column_count: usize,
451 /// Alignment info from separator row
452 pub column_alignments: Vec<String>, // "left", "center", "right", "none"
453}
454
455/// Pre-parsed bare URL information (not in links)
456#[derive(Debug, Clone)]
457pub struct BareUrl {
458 /// Line number (1-indexed)
459 pub line: usize,
460 /// Start column (0-indexed) in the line
461 pub start_col: usize,
462 /// End column (0-indexed) in the line
463 pub end_col: usize,
464 /// Byte offset in document
465 pub byte_offset: usize,
466 /// End byte offset in document
467 pub byte_end: usize,
468 /// The URL string
469 pub url: String,
470 /// Type of URL ("http", "https", "ftp", "email")
471 pub url_type: String,
472}
473
474/// Check if a line is a horizontal rule (---, ***, ___) per CommonMark spec.
475/// CommonMark rules for thematic breaks (horizontal rules):
476/// - May have 0-3 spaces of leading indentation (but NOT tabs)
477/// - Must have 3+ of the same character (-, *, or _)
478/// - May have spaces between characters
479/// - No other characters allowed
480pub fn is_horizontal_rule_line(line: &str) -> bool {
481 // CommonMark: HRs can have 0-3 spaces of leading indentation, not tabs
482 let leading_spaces = line.len() - line.trim_start_matches(' ').len();
483 if leading_spaces > 3 || line.starts_with('\t') {
484 return false;
485 }
486
487 is_horizontal_rule_content(line.trim())
488}
489
490/// Check if trimmed content matches horizontal rule pattern.
491/// Use `is_horizontal_rule_line` for full CommonMark compliance including indentation check.
492pub fn is_horizontal_rule_content(trimmed: &str) -> bool {
493 if trimmed.len() < 3 {
494 return false;
495 }
496
497 // Check for three or more consecutive -, *, or _ characters (with optional spaces)
498 let chars: Vec<char> = trimmed.chars().collect();
499 if let Some(&first_char) = chars.first()
500 && (first_char == '-' || first_char == '*' || first_char == '_')
501 {
502 let mut count = 0;
503 for &ch in &chars {
504 if ch == first_char {
505 count += 1;
506 } else if ch != ' ' && ch != '\t' {
507 return false; // Non-matching, non-whitespace character
508 }
509 }
510 return count >= 3;
511 }
512 false
513}
514
515/// Backwards-compatible alias for `is_horizontal_rule_content`
516pub fn is_horizontal_rule(trimmed: &str) -> bool {
517 is_horizontal_rule_content(trimmed)
518}