rumdl_lib/
lint_context.rs

1use crate::config::MarkdownFlavor;
2use crate::rules::front_matter_utils::FrontMatterUtils;
3use crate::utils::code_block_utils::{CodeBlockContext, CodeBlockUtils};
4use lazy_static::lazy_static;
5use pulldown_cmark::{Event, Parser};
6use regex::Regex;
7
8lazy_static! {
9    // Comprehensive link pattern that captures both inline and reference links
10    // Use (?s) flag to make . match newlines
11    static ref LINK_PATTERN: Regex = Regex::new(
12        r#"(?sx)
13        \[((?:[^\[\]\\]|\\.|\[[^\]]*\])*)\]          # Link text in group 1 (handles nested brackets)
14        (?:
15            \((?:<([^<>\n]*)>|([^)"']*))(?:\s+(?:"([^"]*)"|'([^']*)'))?\)  # URL in group 2 (angle) or 3 (bare), title in 4/5
16            |
17            \[([^\]]*)\]      # Reference ID in group 6
18        )"#
19    ).unwrap();
20
21    // Image pattern (similar to links but with ! prefix)
22    // Use (?s) flag to make . match newlines
23    static ref IMAGE_PATTERN: Regex = Regex::new(
24        r#"(?sx)
25        !\[((?:[^\[\]\\]|\\.|\[[^\]]*\])*)\]         # Alt text in group 1 (handles nested brackets)
26        (?:
27            \((?:<([^<>\n]*)>|([^)"']*))(?:\s+(?:"([^"]*)"|'([^']*)'))?\)  # URL in group 2 (angle) or 3 (bare), title in 4/5
28            |
29            \[([^\]]*)\]      # Reference ID in group 6
30        )"#
31    ).unwrap();
32
33    // Reference definition pattern
34    static ref REF_DEF_PATTERN: Regex = Regex::new(
35        r#"(?m)^[ ]{0,3}\[([^\]]+)\]:\s*([^\s]+)(?:\s+(?:"([^"]*)"|'([^']*)'))?$"#
36    ).unwrap();
37
38    // Code span pattern - matches backticks and captures content
39    // This handles multi-backtick code spans correctly
40    static ref CODE_SPAN_PATTERN: Regex = Regex::new(
41        r"`+"
42    ).unwrap();
43
44    // Pattern for bare URLs
45    static ref BARE_URL_PATTERN: Regex = Regex::new(
46        r#"(https?|ftp)://[^\s<>\[\]()\\'"`]+(?:\.[^\s<>\[\]()\\'"`]+)*(?::\d+)?(?:/[^\s<>\[\]()\\'"`]*)?(?:\?[^\s<>\[\]()\\'"`]*)?(?:#[^\s<>\[\]()\\'"`]*)?"#
47    ).unwrap();
48
49    // Pattern for email addresses
50    static ref BARE_EMAIL_PATTERN: Regex = Regex::new(
51        r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}"
52    ).unwrap();
53
54    // Pattern for angle bracket links (to exclude from bare URL detection)
55    static ref ANGLE_BRACKET_PATTERN: Regex = Regex::new(
56        r"<((?:https?|ftp)://[^>]+|[^@\s]+@[^@\s]+\.[^@\s>]+)>"
57    ).unwrap();
58
59    // Pattern for blockquote prefix in parse_list_blocks
60    static ref BLOCKQUOTE_PREFIX_REGEX: Regex = Regex::new(r"^(\s*>+\s*)").unwrap();
61}
62
63/// Pre-computed information about a line
64#[derive(Debug, Clone)]
65pub struct LineInfo {
66    /// The actual line content (without newline)
67    pub content: String,
68    /// Byte offset where this line starts in the document
69    pub byte_offset: usize,
70    /// Number of leading spaces/tabs
71    pub indent: usize,
72    /// Whether the line is blank (empty or only whitespace)
73    pub is_blank: bool,
74    /// Whether this line is inside a code block
75    pub in_code_block: bool,
76    /// Whether this line is inside front matter
77    pub in_front_matter: bool,
78    /// Whether this line is inside an HTML block
79    pub in_html_block: bool,
80    /// Whether this line is inside an HTML comment
81    pub in_html_comment: bool,
82    /// List item information if this line starts a list item
83    pub list_item: Option<ListItemInfo>,
84    /// Heading information if this line is a heading
85    pub heading: Option<HeadingInfo>,
86    /// Blockquote information if this line is a blockquote
87    pub blockquote: Option<BlockquoteInfo>,
88    /// Whether this line is inside a mkdocstrings autodoc block
89    pub in_mkdocstrings: bool,
90    /// Whether this line is part of an ESM import/export block (MDX only)
91    pub in_esm_block: bool,
92}
93
94/// Information about a list item
95#[derive(Debug, Clone)]
96pub struct ListItemInfo {
97    /// The marker used (*, -, +, or number with . or ))
98    pub marker: String,
99    /// Whether it's ordered (true) or unordered (false)
100    pub is_ordered: bool,
101    /// The number for ordered lists
102    pub number: Option<usize>,
103    /// Column where the marker starts (0-based)
104    pub marker_column: usize,
105    /// Column where content after marker starts
106    pub content_column: usize,
107}
108
109/// Heading style type
110#[derive(Debug, Clone, PartialEq)]
111pub enum HeadingStyle {
112    /// ATX style heading (# Heading)
113    ATX,
114    /// Setext style heading with = underline
115    Setext1,
116    /// Setext style heading with - underline
117    Setext2,
118}
119
120/// Parsed link information
121#[derive(Debug, Clone)]
122pub struct ParsedLink {
123    /// Line number (1-indexed)
124    pub line: usize,
125    /// Start column (0-indexed) in the line
126    pub start_col: usize,
127    /// End column (0-indexed) in the line
128    pub end_col: usize,
129    /// Byte offset in document
130    pub byte_offset: usize,
131    /// End byte offset in document
132    pub byte_end: usize,
133    /// Link text
134    pub text: String,
135    /// Link URL or reference
136    pub url: String,
137    /// Whether this is a reference link [text][ref] vs inline [text](url)
138    pub is_reference: bool,
139    /// Reference ID for reference links
140    pub reference_id: Option<String>,
141}
142
143/// Parsed image information
144#[derive(Debug, Clone)]
145pub struct ParsedImage {
146    /// Line number (1-indexed)
147    pub line: usize,
148    /// Start column (0-indexed) in the line
149    pub start_col: usize,
150    /// End column (0-indexed) in the line
151    pub end_col: usize,
152    /// Byte offset in document
153    pub byte_offset: usize,
154    /// End byte offset in document
155    pub byte_end: usize,
156    /// Alt text
157    pub alt_text: String,
158    /// Image URL or reference
159    pub url: String,
160    /// Whether this is a reference image ![alt][ref] vs inline ![alt](url)
161    pub is_reference: bool,
162    /// Reference ID for reference images
163    pub reference_id: Option<String>,
164}
165
166/// Reference definition [ref]: url "title"
167#[derive(Debug, Clone)]
168pub struct ReferenceDef {
169    /// Line number (1-indexed)
170    pub line: usize,
171    /// Reference ID (normalized to lowercase)
172    pub id: String,
173    /// URL
174    pub url: String,
175    /// Optional title
176    pub title: Option<String>,
177    /// Byte offset where the reference definition starts
178    pub byte_offset: usize,
179    /// Byte offset where the reference definition ends
180    pub byte_end: usize,
181}
182
183/// Parsed code span information
184#[derive(Debug, Clone)]
185pub struct CodeSpan {
186    /// Line number (1-indexed)
187    pub line: usize,
188    /// Start column (0-indexed) in the line
189    pub start_col: usize,
190    /// End column (0-indexed) in the line
191    pub end_col: usize,
192    /// Byte offset in document
193    pub byte_offset: usize,
194    /// End byte offset in document
195    pub byte_end: usize,
196    /// Number of backticks used (1, 2, 3, etc.)
197    pub backtick_count: usize,
198    /// Content inside the code span (without backticks)
199    pub content: String,
200}
201
202/// Information about a heading
203#[derive(Debug, Clone)]
204pub struct HeadingInfo {
205    /// Heading level (1-6 for ATX, 1-2 for Setext)
206    pub level: u8,
207    /// Style of heading
208    pub style: HeadingStyle,
209    /// The heading marker (# characters or underline)
210    pub marker: String,
211    /// Column where the marker starts (0-based)
212    pub marker_column: usize,
213    /// Column where heading text starts
214    pub content_column: usize,
215    /// The heading text (without markers and without custom ID syntax)
216    pub text: String,
217    /// Custom header ID if present (e.g., from {#custom-id} syntax)
218    pub custom_id: Option<String>,
219    /// Original heading text including custom ID syntax
220    pub raw_text: String,
221    /// Whether it has a closing sequence (for ATX)
222    pub has_closing_sequence: bool,
223    /// The closing sequence if present
224    pub closing_sequence: String,
225}
226
227/// Information about a blockquote line
228#[derive(Debug, Clone)]
229pub struct BlockquoteInfo {
230    /// Nesting level (1 for >, 2 for >>, etc.)
231    pub nesting_level: usize,
232    /// The indentation before the blockquote marker
233    pub indent: String,
234    /// Column where the first > starts (0-based)
235    pub marker_column: usize,
236    /// The blockquote prefix (e.g., "> ", ">> ", etc.)
237    pub prefix: String,
238    /// Content after the blockquote marker(s)
239    pub content: String,
240    /// Whether the line has no space after the marker
241    pub has_no_space_after_marker: bool,
242    /// Whether the line has multiple spaces after the marker
243    pub has_multiple_spaces_after_marker: bool,
244    /// Whether this is an empty blockquote line needing MD028 fix
245    pub needs_md028_fix: bool,
246}
247
248/// Information about a list block
249#[derive(Debug, Clone)]
250pub struct ListBlock {
251    /// Line number where the list starts (1-indexed)
252    pub start_line: usize,
253    /// Line number where the list ends (1-indexed)
254    pub end_line: usize,
255    /// Whether it's ordered or unordered
256    pub is_ordered: bool,
257    /// The consistent marker for unordered lists (if any)
258    pub marker: Option<String>,
259    /// Blockquote prefix for this list (empty if not in blockquote)
260    pub blockquote_prefix: String,
261    /// Lines that are list items within this block
262    pub item_lines: Vec<usize>,
263    /// Nesting level (0 for top-level lists)
264    pub nesting_level: usize,
265    /// Maximum marker width seen in this block (e.g., 3 for "1. ", 4 for "10. ")
266    pub max_marker_width: usize,
267}
268
269use std::sync::{Arc, Mutex};
270
271/// Character frequency data for fast content analysis
272#[derive(Debug, Clone, Default)]
273pub struct CharFrequency {
274    /// Count of # characters (headings)
275    pub hash_count: usize,
276    /// Count of * characters (emphasis, lists, horizontal rules)
277    pub asterisk_count: usize,
278    /// Count of _ characters (emphasis, horizontal rules)
279    pub underscore_count: usize,
280    /// Count of - characters (lists, horizontal rules, setext headings)
281    pub hyphen_count: usize,
282    /// Count of + characters (lists)
283    pub plus_count: usize,
284    /// Count of > characters (blockquotes)
285    pub gt_count: usize,
286    /// Count of | characters (tables)
287    pub pipe_count: usize,
288    /// Count of [ characters (links, images)
289    pub bracket_count: usize,
290    /// Count of ` characters (code spans, code blocks)
291    pub backtick_count: usize,
292    /// Count of < characters (HTML tags, autolinks)
293    pub lt_count: usize,
294    /// Count of ! characters (images)
295    pub exclamation_count: usize,
296    /// Count of newline characters
297    pub newline_count: usize,
298}
299
300/// Pre-parsed HTML tag information
301#[derive(Debug, Clone)]
302pub struct HtmlTag {
303    /// Line number (1-indexed)
304    pub line: usize,
305    /// Start column (0-indexed) in the line
306    pub start_col: usize,
307    /// End column (0-indexed) in the line
308    pub end_col: usize,
309    /// Byte offset in document
310    pub byte_offset: usize,
311    /// End byte offset in document
312    pub byte_end: usize,
313    /// Tag name (e.g., "div", "img", "br")
314    pub tag_name: String,
315    /// Whether it's a closing tag (`</tag>`)
316    pub is_closing: bool,
317    /// Whether it's self-closing (`<tag />`)
318    pub is_self_closing: bool,
319    /// Raw tag content
320    pub raw_content: String,
321}
322
323/// Pre-parsed emphasis span information
324#[derive(Debug, Clone)]
325pub struct EmphasisSpan {
326    /// Line number (1-indexed)
327    pub line: usize,
328    /// Start column (0-indexed) in the line
329    pub start_col: usize,
330    /// End column (0-indexed) in the line
331    pub end_col: usize,
332    /// Byte offset in document
333    pub byte_offset: usize,
334    /// End byte offset in document
335    pub byte_end: usize,
336    /// Type of emphasis ('*' or '_')
337    pub marker: char,
338    /// Number of markers (1 for italic, 2 for bold, 3+ for bold+italic)
339    pub marker_count: usize,
340    /// Content inside the emphasis
341    pub content: String,
342}
343
344/// Pre-parsed table row information
345#[derive(Debug, Clone)]
346pub struct TableRow {
347    /// Line number (1-indexed)
348    pub line: usize,
349    /// Whether this is a separator row (contains only |, -, :, and spaces)
350    pub is_separator: bool,
351    /// Number of columns (pipe-separated cells)
352    pub column_count: usize,
353    /// Alignment info from separator row
354    pub column_alignments: Vec<String>, // "left", "center", "right", "none"
355}
356
357/// Pre-parsed bare URL information (not in links)
358#[derive(Debug, Clone)]
359pub struct BareUrl {
360    /// Line number (1-indexed)
361    pub line: usize,
362    /// Start column (0-indexed) in the line
363    pub start_col: usize,
364    /// End column (0-indexed) in the line
365    pub end_col: usize,
366    /// Byte offset in document
367    pub byte_offset: usize,
368    /// End byte offset in document
369    pub byte_end: usize,
370    /// The URL string
371    pub url: String,
372    /// Type of URL ("http", "https", "ftp", "email")
373    pub url_type: String,
374}
375
376pub struct LintContext<'a> {
377    pub content: &'a str,
378    pub line_offsets: Vec<usize>,
379    pub code_blocks: Vec<(usize, usize)>, // Cached code block ranges (not including inline code spans)
380    pub lines: Vec<LineInfo>,             // Pre-computed line information
381    pub links: Vec<ParsedLink>,           // Pre-parsed links
382    pub images: Vec<ParsedImage>,         // Pre-parsed images
383    pub reference_defs: Vec<ReferenceDef>, // Reference definitions
384    code_spans_cache: Mutex<Option<Arc<Vec<CodeSpan>>>>, // Lazy-loaded inline code spans
385    pub list_blocks: Vec<ListBlock>,      // Pre-parsed list blocks
386    pub char_frequency: CharFrequency,    // Character frequency analysis
387    html_tags_cache: Mutex<Option<Arc<Vec<HtmlTag>>>>, // Lazy-loaded HTML tags
388    emphasis_spans_cache: Mutex<Option<Arc<Vec<EmphasisSpan>>>>, // Lazy-loaded emphasis spans
389    table_rows_cache: Mutex<Option<Arc<Vec<TableRow>>>>, // Lazy-loaded table rows
390    bare_urls_cache: Mutex<Option<Arc<Vec<BareUrl>>>>, // Lazy-loaded bare URLs
391    html_comment_ranges: Vec<crate::utils::skip_context::ByteRange>, // Pre-computed HTML comment ranges
392    pub table_blocks: Vec<crate::utils::table_utils::TableBlock>, // Pre-computed table blocks
393    pub line_index: crate::utils::range_utils::LineIndex, // Pre-computed line index for byte position calculations
394    jinja_ranges: Vec<(usize, usize)>,    // Pre-computed Jinja template ranges ({{ }}, {% %})
395    pub flavor: MarkdownFlavor,           // Markdown flavor being used
396}
397
398/// Detailed blockquote parse result with all components
399struct BlockquoteComponents<'a> {
400    indent: &'a str,
401    markers: &'a str,
402    spaces_after: &'a str,
403    content: &'a str,
404}
405
406/// Parse blockquote prefix with detailed components using manual parsing
407#[inline]
408fn parse_blockquote_detailed(line: &str) -> Option<BlockquoteComponents<'_>> {
409    let bytes = line.as_bytes();
410    let mut pos = 0;
411
412    // Parse leading whitespace (indent)
413    while pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
414        pos += 1;
415    }
416    let indent_end = pos;
417
418    // Must have at least one '>' marker
419    if pos >= bytes.len() || bytes[pos] != b'>' {
420        return None;
421    }
422
423    // Parse '>' markers
424    while pos < bytes.len() && bytes[pos] == b'>' {
425        pos += 1;
426    }
427    let markers_end = pos;
428
429    // Parse spaces after markers
430    while pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
431        pos += 1;
432    }
433    let spaces_end = pos;
434
435    Some(BlockquoteComponents {
436        indent: &line[0..indent_end],
437        markers: &line[indent_end..markers_end],
438        spaces_after: &line[markers_end..spaces_end],
439        content: &line[spaces_end..],
440    })
441}
442
443impl<'a> LintContext<'a> {
444    pub fn new(content: &'a str, flavor: MarkdownFlavor) -> Self {
445        use std::time::Instant;
446        let profile = std::env::var("RUMDL_PROFILE_QUADRATIC").is_ok();
447
448        let start = Instant::now();
449        let mut line_offsets = vec![0];
450        for (i, c) in content.char_indices() {
451            if c == '\n' {
452                line_offsets.push(i + 1);
453            }
454        }
455        if profile {
456            eprintln!("[PROFILE] Line offsets: {:?}", start.elapsed());
457        }
458
459        // Detect code blocks once and cache them
460        let start = Instant::now();
461        let code_blocks = CodeBlockUtils::detect_code_blocks(content);
462        if profile {
463            eprintln!("[PROFILE] Code blocks: {:?}", start.elapsed());
464        }
465
466        // Pre-compute HTML comment ranges ONCE for all operations
467        let start = Instant::now();
468        let html_comment_ranges = crate::utils::skip_context::compute_html_comment_ranges(content);
469        if profile {
470            eprintln!("[PROFILE] HTML comment ranges: {:?}", start.elapsed());
471        }
472
473        // Pre-compute autodoc block ranges for MkDocs flavor (avoids O(n²) scaling)
474        let start = Instant::now();
475        let autodoc_ranges = if flavor == MarkdownFlavor::MkDocs {
476            crate::utils::mkdocstrings_refs::detect_autodoc_block_ranges(content)
477        } else {
478            Vec::new()
479        };
480        if profile {
481            eprintln!("[PROFILE] Autodoc block ranges: {:?}", start.elapsed());
482        }
483
484        // Pre-compute line information (without headings/blockquotes yet)
485        let start = Instant::now();
486        let mut lines = Self::compute_basic_line_info(
487            content,
488            &line_offsets,
489            &code_blocks,
490            flavor,
491            &html_comment_ranges,
492            &autodoc_ranges,
493        );
494        if profile {
495            eprintln!("[PROFILE] Basic line info: {:?}", start.elapsed());
496        }
497
498        // Detect HTML blocks BEFORE heading detection
499        let start = Instant::now();
500        Self::detect_html_blocks(&mut lines);
501        if profile {
502            eprintln!("[PROFILE] HTML blocks: {:?}", start.elapsed());
503        }
504
505        // Detect ESM import/export blocks in MDX files BEFORE heading detection
506        let start = Instant::now();
507        Self::detect_esm_blocks(&mut lines, flavor);
508        if profile {
509            eprintln!("[PROFILE] ESM blocks: {:?}", start.elapsed());
510        }
511
512        // Now detect headings and blockquotes
513        let start = Instant::now();
514        Self::detect_headings_and_blockquotes(content, &mut lines, flavor, &html_comment_ranges);
515        if profile {
516            eprintln!("[PROFILE] Headings & blockquotes: {:?}", start.elapsed());
517        }
518
519        // Parse code spans early so we can exclude them from link/image parsing
520        let start = Instant::now();
521        let code_spans = Self::parse_code_spans(content, &lines);
522        if profile {
523            eprintln!("[PROFILE] Code spans: {:?}", start.elapsed());
524        }
525
526        // Parse links, images, references, and list blocks
527        let start = Instant::now();
528        let links = Self::parse_links(content, &lines, &code_blocks, &code_spans, flavor, &html_comment_ranges);
529        if profile {
530            eprintln!("[PROFILE] Links: {:?}", start.elapsed());
531        }
532
533        let start = Instant::now();
534        let images = Self::parse_images(content, &lines, &code_blocks, &code_spans, &html_comment_ranges);
535        if profile {
536            eprintln!("[PROFILE] Images: {:?}", start.elapsed());
537        }
538
539        let start = Instant::now();
540        let reference_defs = Self::parse_reference_defs(content, &lines);
541        if profile {
542            eprintln!("[PROFILE] Reference defs: {:?}", start.elapsed());
543        }
544
545        let start = Instant::now();
546        let list_blocks = Self::parse_list_blocks(&lines);
547        if profile {
548            eprintln!("[PROFILE] List blocks: {:?}", start.elapsed());
549        }
550
551        // Compute character frequency for fast content analysis
552        let start = Instant::now();
553        let char_frequency = Self::compute_char_frequency(content);
554        if profile {
555            eprintln!("[PROFILE] Char frequency: {:?}", start.elapsed());
556        }
557
558        // Pre-compute table blocks for rules that need them (MD013, MD055, MD056, MD058)
559        let start = Instant::now();
560        let table_blocks =
561            crate::utils::table_utils::TableUtils::find_table_blocks_with_code_info(content, &code_blocks, &code_spans);
562        if profile {
563            eprintln!("[PROFILE] Table blocks: {:?}", start.elapsed());
564        }
565
566        // Pre-compute LineIndex once for all rules (eliminates 46x content cloning)
567        let start = Instant::now();
568        let line_index = crate::utils::range_utils::LineIndex::new(content.to_string());
569        if profile {
570            eprintln!("[PROFILE] Line index: {:?}", start.elapsed());
571        }
572
573        // Pre-compute Jinja template ranges once for all rules (eliminates O(n×m) in MD011)
574        let start = Instant::now();
575        let jinja_ranges = crate::utils::jinja_utils::find_jinja_ranges(content);
576        if profile {
577            eprintln!("[PROFILE] Jinja ranges: {:?}", start.elapsed());
578        }
579
580        Self {
581            content,
582            line_offsets,
583            code_blocks,
584            lines,
585            links,
586            images,
587            reference_defs,
588            code_spans_cache: Mutex::new(Some(Arc::new(code_spans))),
589            list_blocks,
590            char_frequency,
591            html_tags_cache: Mutex::new(None),
592            emphasis_spans_cache: Mutex::new(None),
593            table_rows_cache: Mutex::new(None),
594            bare_urls_cache: Mutex::new(None),
595            html_comment_ranges,
596            table_blocks,
597            line_index,
598            jinja_ranges,
599            flavor,
600        }
601    }
602
603    /// Get code spans - computed lazily on first access
604    pub fn code_spans(&self) -> Arc<Vec<CodeSpan>> {
605        let mut cache = self.code_spans_cache.lock().unwrap();
606
607        // Check if we need to compute code spans
608        if cache.is_none() {
609            let code_spans = Self::parse_code_spans(self.content, &self.lines);
610            *cache = Some(Arc::new(code_spans));
611        }
612
613        // Return a reference to the cached code spans
614        cache.as_ref().unwrap().clone()
615    }
616
617    /// Get HTML tags - computed lazily on first access
618    pub fn html_tags(&self) -> Arc<Vec<HtmlTag>> {
619        let mut cache = self.html_tags_cache.lock().unwrap();
620
621        if cache.is_none() {
622            let html_tags = Self::parse_html_tags(self.content, &self.lines, &self.code_blocks, self.flavor);
623            *cache = Some(Arc::new(html_tags));
624        }
625
626        cache.as_ref().unwrap().clone()
627    }
628
629    /// Get emphasis spans - computed lazily on first access
630    pub fn emphasis_spans(&self) -> Arc<Vec<EmphasisSpan>> {
631        let mut cache = self.emphasis_spans_cache.lock().unwrap();
632
633        if cache.is_none() {
634            let emphasis_spans = Self::parse_emphasis_spans(self.content, &self.lines, &self.code_blocks);
635            *cache = Some(Arc::new(emphasis_spans));
636        }
637
638        cache.as_ref().unwrap().clone()
639    }
640
641    /// Get table rows - computed lazily on first access
642    pub fn table_rows(&self) -> Arc<Vec<TableRow>> {
643        let mut cache = self.table_rows_cache.lock().unwrap();
644
645        if cache.is_none() {
646            let table_rows = Self::parse_table_rows(&self.lines);
647            *cache = Some(Arc::new(table_rows));
648        }
649
650        cache.as_ref().unwrap().clone()
651    }
652
653    /// Get bare URLs - computed lazily on first access
654    pub fn bare_urls(&self) -> Arc<Vec<BareUrl>> {
655        let mut cache = self.bare_urls_cache.lock().unwrap();
656
657        if cache.is_none() {
658            let bare_urls = Self::parse_bare_urls(self.content, &self.lines, &self.code_blocks);
659            *cache = Some(Arc::new(bare_urls));
660        }
661
662        cache.as_ref().unwrap().clone()
663    }
664
665    /// Map a byte offset to (line, column)
666    pub fn offset_to_line_col(&self, offset: usize) -> (usize, usize) {
667        match self.line_offsets.binary_search(&offset) {
668            Ok(line) => (line + 1, 1),
669            Err(line) => {
670                let line_start = self.line_offsets.get(line.wrapping_sub(1)).copied().unwrap_or(0);
671                (line, offset - line_start + 1)
672            }
673        }
674    }
675
676    /// Check if a position is within a code block or code span
677    pub fn is_in_code_block_or_span(&self, pos: usize) -> bool {
678        // Check code blocks first
679        if CodeBlockUtils::is_in_code_block_or_span(&self.code_blocks, pos) {
680            return true;
681        }
682
683        // Check inline code spans (lazy load if needed)
684        self.code_spans()
685            .iter()
686            .any(|span| pos >= span.byte_offset && pos < span.byte_end)
687    }
688
689    /// Get line information by line number (1-indexed)
690    pub fn line_info(&self, line_num: usize) -> Option<&LineInfo> {
691        if line_num > 0 {
692            self.lines.get(line_num - 1)
693        } else {
694            None
695        }
696    }
697
698    /// Get byte offset for a line number (1-indexed)
699    pub fn line_to_byte_offset(&self, line_num: usize) -> Option<usize> {
700        self.line_info(line_num).map(|info| info.byte_offset)
701    }
702
703    /// Get URL for a reference link/image by its ID
704    pub fn get_reference_url(&self, ref_id: &str) -> Option<&str> {
705        let normalized_id = ref_id.to_lowercase();
706        self.reference_defs
707            .iter()
708            .find(|def| def.id == normalized_id)
709            .map(|def| def.url.as_str())
710    }
711
712    /// Get links on a specific line
713    pub fn links_on_line(&self, line_num: usize) -> Vec<&ParsedLink> {
714        self.links.iter().filter(|link| link.line == line_num).collect()
715    }
716
717    /// Get images on a specific line
718    pub fn images_on_line(&self, line_num: usize) -> Vec<&ParsedImage> {
719        self.images.iter().filter(|img| img.line == line_num).collect()
720    }
721
722    /// Check if a line is part of a list block
723    pub fn is_in_list_block(&self, line_num: usize) -> bool {
724        self.list_blocks
725            .iter()
726            .any(|block| line_num >= block.start_line && line_num <= block.end_line)
727    }
728
729    /// Get the list block containing a specific line
730    pub fn list_block_for_line(&self, line_num: usize) -> Option<&ListBlock> {
731        self.list_blocks
732            .iter()
733            .find(|block| line_num >= block.start_line && line_num <= block.end_line)
734    }
735
736    // Compatibility methods for DocumentStructure migration
737
738    /// Check if a line is within a code block
739    pub fn is_in_code_block(&self, line_num: usize) -> bool {
740        if line_num == 0 || line_num > self.lines.len() {
741            return false;
742        }
743        self.lines[line_num - 1].in_code_block
744    }
745
746    /// Check if a line is within front matter
747    pub fn is_in_front_matter(&self, line_num: usize) -> bool {
748        if line_num == 0 || line_num > self.lines.len() {
749            return false;
750        }
751        self.lines[line_num - 1].in_front_matter
752    }
753
754    /// Check if a line is within an HTML block
755    pub fn is_in_html_block(&self, line_num: usize) -> bool {
756        if line_num == 0 || line_num > self.lines.len() {
757            return false;
758        }
759        self.lines[line_num - 1].in_html_block
760    }
761
762    /// Check if a line and column is within a code span
763    pub fn is_in_code_span(&self, line_num: usize, col: usize) -> bool {
764        if line_num == 0 || line_num > self.lines.len() {
765            return false;
766        }
767
768        // Use the code spans cache to check
769        // Note: col is 1-indexed from caller, but span.start_col and span.end_col are 0-indexed
770        // Convert col to 0-indexed for comparison
771        let col_0indexed = if col > 0 { col - 1 } else { 0 };
772        let code_spans = self.code_spans();
773        code_spans
774            .iter()
775            .any(|span| span.line == line_num && col_0indexed >= span.start_col && col_0indexed < span.end_col)
776    }
777
778    /// Check if a byte position is within a reference definition
779    /// This is much faster than scanning the content with regex for each check (O(1) vs O(n))
780    #[inline]
781    pub fn is_in_reference_def(&self, byte_pos: usize) -> bool {
782        self.reference_defs
783            .iter()
784            .any(|ref_def| byte_pos >= ref_def.byte_offset && byte_pos < ref_def.byte_end)
785    }
786
787    /// Check if a byte position is within an HTML comment
788    /// This is much faster than scanning the content with regex for each check (O(k) vs O(n))
789    /// where k is the number of HTML comments (typically very small)
790    #[inline]
791    pub fn is_in_html_comment(&self, byte_pos: usize) -> bool {
792        self.html_comment_ranges
793            .iter()
794            .any(|range| byte_pos >= range.start && byte_pos < range.end)
795    }
796
797    /// Check if a byte position is within a Jinja template ({{ }} or {% %})
798    pub fn is_in_jinja_range(&self, byte_pos: usize) -> bool {
799        self.jinja_ranges
800            .iter()
801            .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
802    }
803
804    /// Check if content has any instances of a specific character (fast)
805    pub fn has_char(&self, ch: char) -> bool {
806        match ch {
807            '#' => self.char_frequency.hash_count > 0,
808            '*' => self.char_frequency.asterisk_count > 0,
809            '_' => self.char_frequency.underscore_count > 0,
810            '-' => self.char_frequency.hyphen_count > 0,
811            '+' => self.char_frequency.plus_count > 0,
812            '>' => self.char_frequency.gt_count > 0,
813            '|' => self.char_frequency.pipe_count > 0,
814            '[' => self.char_frequency.bracket_count > 0,
815            '`' => self.char_frequency.backtick_count > 0,
816            '<' => self.char_frequency.lt_count > 0,
817            '!' => self.char_frequency.exclamation_count > 0,
818            '\n' => self.char_frequency.newline_count > 0,
819            _ => self.content.contains(ch), // Fallback for other characters
820        }
821    }
822
823    /// Get count of a specific character (fast)
824    pub fn char_count(&self, ch: char) -> usize {
825        match ch {
826            '#' => self.char_frequency.hash_count,
827            '*' => self.char_frequency.asterisk_count,
828            '_' => self.char_frequency.underscore_count,
829            '-' => self.char_frequency.hyphen_count,
830            '+' => self.char_frequency.plus_count,
831            '>' => self.char_frequency.gt_count,
832            '|' => self.char_frequency.pipe_count,
833            '[' => self.char_frequency.bracket_count,
834            '`' => self.char_frequency.backtick_count,
835            '<' => self.char_frequency.lt_count,
836            '!' => self.char_frequency.exclamation_count,
837            '\n' => self.char_frequency.newline_count,
838            _ => self.content.matches(ch).count(), // Fallback for other characters
839        }
840    }
841
842    /// Check if content likely contains headings (fast)
843    pub fn likely_has_headings(&self) -> bool {
844        self.char_frequency.hash_count > 0 || self.char_frequency.hyphen_count > 2 // Potential setext underlines
845    }
846
847    /// Check if content likely contains lists (fast)
848    pub fn likely_has_lists(&self) -> bool {
849        self.char_frequency.asterisk_count > 0
850            || self.char_frequency.hyphen_count > 0
851            || self.char_frequency.plus_count > 0
852    }
853
854    /// Check if content likely contains emphasis (fast)
855    pub fn likely_has_emphasis(&self) -> bool {
856        self.char_frequency.asterisk_count > 1 || self.char_frequency.underscore_count > 1
857    }
858
859    /// Check if content likely contains tables (fast)
860    pub fn likely_has_tables(&self) -> bool {
861        self.char_frequency.pipe_count > 2
862    }
863
864    /// Check if content likely contains blockquotes (fast)
865    pub fn likely_has_blockquotes(&self) -> bool {
866        self.char_frequency.gt_count > 0
867    }
868
869    /// Check if content likely contains code (fast)
870    pub fn likely_has_code(&self) -> bool {
871        self.char_frequency.backtick_count > 0
872    }
873
874    /// Check if content likely contains links or images (fast)
875    pub fn likely_has_links_or_images(&self) -> bool {
876        self.char_frequency.bracket_count > 0 || self.char_frequency.exclamation_count > 0
877    }
878
879    /// Check if content likely contains HTML (fast)
880    pub fn likely_has_html(&self) -> bool {
881        self.char_frequency.lt_count > 0
882    }
883
884    /// Get HTML tags on a specific line
885    pub fn html_tags_on_line(&self, line_num: usize) -> Vec<HtmlTag> {
886        self.html_tags()
887            .iter()
888            .filter(|tag| tag.line == line_num)
889            .cloned()
890            .collect()
891    }
892
893    /// Get emphasis spans on a specific line
894    pub fn emphasis_spans_on_line(&self, line_num: usize) -> Vec<EmphasisSpan> {
895        self.emphasis_spans()
896            .iter()
897            .filter(|span| span.line == line_num)
898            .cloned()
899            .collect()
900    }
901
902    /// Get table rows on a specific line
903    pub fn table_rows_on_line(&self, line_num: usize) -> Vec<TableRow> {
904        self.table_rows()
905            .iter()
906            .filter(|row| row.line == line_num)
907            .cloned()
908            .collect()
909    }
910
911    /// Get bare URLs on a specific line
912    pub fn bare_urls_on_line(&self, line_num: usize) -> Vec<BareUrl> {
913        self.bare_urls()
914            .iter()
915            .filter(|url| url.line == line_num)
916            .cloned()
917            .collect()
918    }
919
920    /// Find the line index for a given byte offset using binary search.
921    /// Returns (line_index, line_number, column) where:
922    /// - line_index is the 0-based index in the lines array
923    /// - line_number is the 1-based line number
924    /// - column is the byte offset within that line
925    #[inline]
926    fn find_line_for_offset(lines: &[LineInfo], byte_offset: usize) -> (usize, usize, usize) {
927        // Binary search to find the line containing this byte offset
928        let idx = match lines.binary_search_by(|line| {
929            if byte_offset < line.byte_offset {
930                std::cmp::Ordering::Greater
931            } else if byte_offset > line.byte_offset + line.content.len() {
932                std::cmp::Ordering::Less
933            } else {
934                std::cmp::Ordering::Equal
935            }
936        }) {
937            Ok(idx) => idx,
938            Err(idx) => idx.saturating_sub(1),
939        };
940
941        let line = &lines[idx];
942        let line_num = idx + 1;
943        let col = byte_offset.saturating_sub(line.byte_offset);
944
945        (idx, line_num, col)
946    }
947
948    /// Check if a byte offset is within a code span using binary search
949    #[inline]
950    fn is_offset_in_code_span(code_spans: &[CodeSpan], offset: usize) -> bool {
951        // Since spans are sorted by byte_offset, use partition_point for binary search
952        let idx = code_spans.partition_point(|span| span.byte_offset <= offset);
953
954        // Check the span that starts at or before our offset
955        if idx > 0 {
956            let span = &code_spans[idx - 1];
957            if offset >= span.byte_offset && offset < span.byte_end {
958                return true;
959            }
960        }
961
962        false
963    }
964
965    /// Parse all links in the content
966    fn parse_links(
967        content: &str,
968        lines: &[LineInfo],
969        code_blocks: &[(usize, usize)],
970        code_spans: &[CodeSpan],
971        flavor: MarkdownFlavor,
972        html_comment_ranges: &[crate::utils::skip_context::ByteRange],
973    ) -> Vec<ParsedLink> {
974        use crate::utils::skip_context::{is_in_html_comment_ranges, is_mkdocs_snippet_line};
975
976        // Pre-size based on a heuristic: most markdown files have relatively few links
977        let mut links = Vec::with_capacity(content.len() / 500); // ~1 link per 500 chars
978
979        // Parse links across the entire content, not line by line
980        for cap in LINK_PATTERN.captures_iter(content) {
981            let full_match = cap.get(0).unwrap();
982            let match_start = full_match.start();
983            let match_end = full_match.end();
984
985            // Skip if the opening bracket is escaped (preceded by \)
986            if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'\\') {
987                continue;
988            }
989
990            // Skip if this is actually an image (preceded by !)
991            if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'!') {
992                continue;
993            }
994
995            // Skip if in code block
996            if CodeBlockUtils::is_in_code_block(code_blocks, match_start) {
997                continue;
998            }
999
1000            // Skip if in code span
1001            if Self::is_offset_in_code_span(code_spans, match_start) {
1002                continue;
1003            }
1004
1005            // Skip if in HTML comment (using pre-computed ranges for efficiency)
1006            if is_in_html_comment_ranges(html_comment_ranges, match_start) {
1007                continue;
1008            }
1009
1010            // Use binary search to find the line this link is on
1011            let (line_idx, line_num, col_start) = Self::find_line_for_offset(lines, match_start);
1012
1013            // Skip if this link is on a MkDocs snippet line
1014            if is_mkdocs_snippet_line(&lines[line_idx].content, flavor) {
1015                continue;
1016            }
1017
1018            // Use binary search to find the end line
1019            let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, match_end);
1020
1021            let text = cap.get(1).map_or("", |m| m.as_str()).to_string();
1022
1023            // URL can be in group 2 (angle brackets) or group 3 (bare)
1024            let inline_url = cap.get(2).or_else(|| cap.get(3));
1025
1026            if let Some(url_match) = inline_url {
1027                // Inline link
1028                links.push(ParsedLink {
1029                    line: line_num,
1030                    start_col: col_start,
1031                    end_col: col_end,
1032                    byte_offset: match_start,
1033                    byte_end: match_end,
1034                    text,
1035                    url: url_match.as_str().to_string(),
1036                    is_reference: false,
1037                    reference_id: None,
1038                });
1039            } else if let Some(ref_id) = cap.get(6) {
1040                // Reference link
1041                let ref_id_str = ref_id.as_str();
1042                let normalized_ref = if ref_id_str.is_empty() {
1043                    text.to_lowercase() // Implicit reference
1044                } else {
1045                    ref_id_str.to_lowercase()
1046                };
1047
1048                links.push(ParsedLink {
1049                    line: line_num,
1050                    start_col: col_start,
1051                    end_col: col_end,
1052                    byte_offset: match_start,
1053                    byte_end: match_end,
1054                    text,
1055                    url: String::new(), // Will be resolved with reference_defs
1056                    is_reference: true,
1057                    reference_id: Some(normalized_ref),
1058                });
1059            }
1060        }
1061
1062        links
1063    }
1064
1065    /// Parse all images in the content
1066    fn parse_images(
1067        content: &str,
1068        lines: &[LineInfo],
1069        code_blocks: &[(usize, usize)],
1070        code_spans: &[CodeSpan],
1071        html_comment_ranges: &[crate::utils::skip_context::ByteRange],
1072    ) -> Vec<ParsedImage> {
1073        use crate::utils::skip_context::is_in_html_comment_ranges;
1074
1075        // Pre-size based on a heuristic: images are less common than links
1076        let mut images = Vec::with_capacity(content.len() / 1000); // ~1 image per 1000 chars
1077
1078        // Parse images across the entire content, not line by line
1079        for cap in IMAGE_PATTERN.captures_iter(content) {
1080            let full_match = cap.get(0).unwrap();
1081            let match_start = full_match.start();
1082            let match_end = full_match.end();
1083
1084            // Skip if the ! is escaped (preceded by \)
1085            if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'\\') {
1086                continue;
1087            }
1088
1089            // Skip if in code block
1090            if CodeBlockUtils::is_in_code_block(code_blocks, match_start) {
1091                continue;
1092            }
1093
1094            // Skip if in code span
1095            if Self::is_offset_in_code_span(code_spans, match_start) {
1096                continue;
1097            }
1098
1099            // Skip if in HTML comment (using pre-computed ranges for efficiency)
1100            if is_in_html_comment_ranges(html_comment_ranges, match_start) {
1101                continue;
1102            }
1103
1104            // Use binary search to find the line this image is on
1105            let (_, line_num, col_start) = Self::find_line_for_offset(lines, match_start);
1106
1107            // Use binary search to find the end line
1108            let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, match_end);
1109
1110            let alt_text = cap.get(1).map_or("", |m| m.as_str()).to_string();
1111
1112            // URL can be in group 2 (angle brackets) or group 3 (bare)
1113            let inline_url = cap.get(2).or_else(|| cap.get(3));
1114
1115            if let Some(url_match) = inline_url {
1116                // Inline image
1117                images.push(ParsedImage {
1118                    line: line_num,
1119                    start_col: col_start,
1120                    end_col: col_end,
1121                    byte_offset: match_start,
1122                    byte_end: match_end,
1123                    alt_text,
1124                    url: url_match.as_str().to_string(),
1125                    is_reference: false,
1126                    reference_id: None,
1127                });
1128            } else if let Some(ref_id) = cap.get(6) {
1129                // Reference image
1130                let ref_id_str = ref_id.as_str();
1131                let normalized_ref = if ref_id_str.is_empty() {
1132                    alt_text.to_lowercase() // Implicit reference
1133                } else {
1134                    ref_id_str.to_lowercase()
1135                };
1136
1137                images.push(ParsedImage {
1138                    line: line_num,
1139                    start_col: col_start,
1140                    end_col: col_end,
1141                    byte_offset: match_start,
1142                    byte_end: match_end,
1143                    alt_text,
1144                    url: String::new(), // Will be resolved with reference_defs
1145                    is_reference: true,
1146                    reference_id: Some(normalized_ref),
1147                });
1148            }
1149        }
1150
1151        images
1152    }
1153
1154    /// Parse reference definitions
1155    fn parse_reference_defs(_content: &str, lines: &[LineInfo]) -> Vec<ReferenceDef> {
1156        // Pre-size based on lines count as reference definitions are line-based
1157        let mut refs = Vec::with_capacity(lines.len() / 20); // ~1 ref per 20 lines
1158
1159        for (line_idx, line_info) in lines.iter().enumerate() {
1160            // Skip lines in code blocks
1161            if line_info.in_code_block {
1162                continue;
1163            }
1164
1165            let line = &line_info.content;
1166            let line_num = line_idx + 1;
1167
1168            if let Some(cap) = REF_DEF_PATTERN.captures(line) {
1169                let id = cap.get(1).unwrap().as_str().to_lowercase();
1170                let url = cap.get(2).unwrap().as_str().to_string();
1171                let title = cap.get(3).or_else(|| cap.get(4)).map(|m| m.as_str().to_string());
1172
1173                // Calculate byte positions
1174                // The match starts at the beginning of the line (0) and extends to the end
1175                let match_obj = cap.get(0).unwrap();
1176                let byte_offset = line_info.byte_offset + match_obj.start();
1177                let byte_end = line_info.byte_offset + match_obj.end();
1178
1179                refs.push(ReferenceDef {
1180                    line: line_num,
1181                    id,
1182                    url,
1183                    title,
1184                    byte_offset,
1185                    byte_end,
1186                });
1187            }
1188        }
1189
1190        refs
1191    }
1192
1193    /// Fast blockquote prefix parser - replaces regex for 5-10x speedup
1194    /// Matches: ^(\s*>\s*)(.*)
1195    /// Returns: Some((prefix_with_ws, content_after_prefix)) or None
1196    #[inline]
1197    fn parse_blockquote_prefix(line: &str) -> Option<(&str, &str)> {
1198        let trimmed_start = line.trim_start();
1199        if !trimmed_start.starts_with('>') {
1200            return None;
1201        }
1202
1203        let leading_ws_len = line.len() - trimmed_start.len();
1204        let after_gt = &trimmed_start[1..];
1205        let content = after_gt.trim_start();
1206        let ws_after_gt_len = after_gt.len() - content.len();
1207        let prefix_len = leading_ws_len + 1 + ws_after_gt_len;
1208
1209        Some((&line[..prefix_len], content))
1210    }
1211
1212    /// Fast unordered list parser - replaces regex for 5-10x speedup
1213    /// Matches: ^(\s*)([-*+])([ \t]*)(.*)
1214    /// Returns: Some((leading_ws, marker, spacing, content)) or None
1215    #[inline]
1216    fn parse_unordered_list(line: &str) -> Option<(&str, char, &str, &str)> {
1217        let bytes = line.as_bytes();
1218        let mut i = 0;
1219
1220        // Skip leading whitespace
1221        while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
1222            i += 1;
1223        }
1224
1225        // Check for marker
1226        if i >= bytes.len() {
1227            return None;
1228        }
1229        let marker = bytes[i] as char;
1230        if marker != '-' && marker != '*' && marker != '+' {
1231            return None;
1232        }
1233        let marker_pos = i;
1234        i += 1;
1235
1236        // Collect spacing after marker (space or tab only)
1237        let spacing_start = i;
1238        while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
1239            i += 1;
1240        }
1241
1242        Some((&line[..marker_pos], marker, &line[spacing_start..i], &line[i..]))
1243    }
1244
1245    /// Fast ordered list parser - replaces regex for 5-10x speedup
1246    /// Matches: ^(\s*)(\d+)([.)])([ \t]*)(.*)
1247    /// Returns: Some((leading_ws, number_str, delimiter, spacing, content)) or None
1248    #[inline]
1249    fn parse_ordered_list(line: &str) -> Option<(&str, &str, char, &str, &str)> {
1250        let bytes = line.as_bytes();
1251        let mut i = 0;
1252
1253        // Skip leading whitespace
1254        while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
1255            i += 1;
1256        }
1257
1258        // Collect digits
1259        let number_start = i;
1260        while i < bytes.len() && bytes[i].is_ascii_digit() {
1261            i += 1;
1262        }
1263        if i == number_start {
1264            return None; // No digits found
1265        }
1266
1267        // Check for delimiter
1268        if i >= bytes.len() {
1269            return None;
1270        }
1271        let delimiter = bytes[i] as char;
1272        if delimiter != '.' && delimiter != ')' {
1273            return None;
1274        }
1275        let delimiter_pos = i;
1276        i += 1;
1277
1278        // Collect spacing after delimiter (space or tab only)
1279        let spacing_start = i;
1280        while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
1281            i += 1;
1282        }
1283
1284        Some((
1285            &line[..number_start],
1286            &line[number_start..delimiter_pos],
1287            delimiter,
1288            &line[spacing_start..i],
1289            &line[i..],
1290        ))
1291    }
1292
1293    /// Pre-compute which lines are in code blocks - O(m*n) where m=code_blocks, n=lines
1294    /// Returns a Vec<bool> where index i indicates if line i is in a code block
1295    fn compute_code_block_line_map(content: &str, line_offsets: &[usize], code_blocks: &[(usize, usize)]) -> Vec<bool> {
1296        let num_lines = line_offsets.len();
1297        let mut in_code_block = vec![false; num_lines];
1298
1299        // For each code block, mark all lines within it
1300        for &(start, end) in code_blocks {
1301            // Ensure we're at valid UTF-8 boundaries
1302            let safe_start = if start > 0 && !content.is_char_boundary(start) {
1303                let mut boundary = start;
1304                while boundary > 0 && !content.is_char_boundary(boundary) {
1305                    boundary -= 1;
1306                }
1307                boundary
1308            } else {
1309                start
1310            };
1311
1312            let safe_end = if end < content.len() && !content.is_char_boundary(end) {
1313                let mut boundary = end;
1314                while boundary < content.len() && !content.is_char_boundary(boundary) {
1315                    boundary += 1;
1316                }
1317                boundary
1318            } else {
1319                end.min(content.len())
1320            };
1321
1322            let block_content = &content[safe_start..safe_end];
1323
1324            // Strip blockquote markers to check the actual code block content
1325            // Code blocks inside blockquotes have the format: "> ```" or ">     code"
1326            let content_to_check = block_content
1327                .lines()
1328                .map(|line| {
1329                    let mut stripped = line.to_string();
1330                    while crate::rules::blockquote_utils::BlockquoteUtils::is_blockquote(&stripped) {
1331                        stripped = crate::rules::blockquote_utils::BlockquoteUtils::extract_content(&stripped);
1332                    }
1333                    stripped
1334                })
1335                .collect::<Vec<_>>()
1336                .join("\n");
1337
1338            // Quick checks first: fenced code blocks are most common
1339            let is_fenced =
1340                content_to_check.trim_start().starts_with("```") || content_to_check.trim_start().starts_with("~~~");
1341
1342            // For non-fenced blocks, check if it's an indented code block
1343            // Only check this if needed, as it's expensive
1344            let should_mark = if is_fenced {
1345                true
1346            } else {
1347                // Check if all non-empty lines start with 4 spaces or tab
1348                // Using manual byte scanning instead of .lines() iterator for speed
1349                let bytes = content_to_check.as_bytes();
1350                let mut i = 0;
1351                let mut valid_indented = true;
1352
1353                while i < bytes.len() {
1354                    let line_start = i;
1355                    // Find end of line
1356                    while i < bytes.len() && bytes[i] != b'\n' {
1357                        i += 1;
1358                    }
1359
1360                    // Check if this line is properly indented or empty
1361                    let mut j = line_start;
1362                    // Skip leading whitespace
1363                    while j < i && (bytes[j] == b' ' || bytes[j] == b'\t') {
1364                        j += 1;
1365                    }
1366
1367                    // If line has content, check indentation
1368                    if j < i {
1369                        // Line has non-whitespace content
1370                        let indent_len = j - line_start;
1371                        let starts_with_tab = line_start < bytes.len() && bytes[line_start] == b'\t';
1372                        if indent_len < 4 && !starts_with_tab {
1373                            valid_indented = false;
1374                            break;
1375                        }
1376                    }
1377
1378                    i += 1; // Skip the newline
1379                }
1380
1381                valid_indented
1382            };
1383
1384            if should_mark {
1385                // Use binary search to find the first and last line indices
1386                // line_offsets is sorted, so we can use partition_point for O(log n) lookup
1387                // Use safe_start/safe_end (UTF-8 boundaries) for consistent line mapping
1388                let first_line = line_offsets.partition_point(|&offset| offset < safe_start);
1389                let last_line = line_offsets.partition_point(|&offset| offset < safe_end);
1390
1391                // Mark all lines in the range at once
1392                for flag in in_code_block.iter_mut().take(last_line).skip(first_line) {
1393                    *flag = true;
1394                }
1395            }
1396        }
1397
1398        in_code_block
1399    }
1400
1401    /// Pre-compute basic line information (without headings/blockquotes)
1402    fn compute_basic_line_info(
1403        content: &str,
1404        line_offsets: &[usize],
1405        code_blocks: &[(usize, usize)],
1406        flavor: MarkdownFlavor,
1407        html_comment_ranges: &[crate::utils::skip_context::ByteRange],
1408        autodoc_ranges: &[crate::utils::skip_context::ByteRange],
1409    ) -> Vec<LineInfo> {
1410        let content_lines: Vec<&str> = content.lines().collect();
1411        let mut lines = Vec::with_capacity(content_lines.len());
1412
1413        // Pre-compute which lines are in code blocks
1414        let code_block_map = Self::compute_code_block_line_map(content, line_offsets, code_blocks);
1415
1416        // Detect front matter boundaries FIRST, before any other parsing
1417        // Use FrontMatterUtils to detect all types of front matter (YAML, TOML, JSON, malformed)
1418        let front_matter_end = FrontMatterUtils::get_front_matter_end_line(content);
1419
1420        for (i, line) in content_lines.iter().enumerate() {
1421            let byte_offset = line_offsets.get(i).copied().unwrap_or(0);
1422            let indent = line.len() - line.trim_start().len();
1423
1424            // Parse blockquote prefix once and reuse it (avoid redundant parsing)
1425            let blockquote_parse = Self::parse_blockquote_prefix(line);
1426
1427            // For blank detection, consider blockquote context
1428            let is_blank = if let Some((_, content)) = blockquote_parse {
1429                // In blockquote context, check if content after prefix is blank
1430                content.trim().is_empty()
1431            } else {
1432                line.trim().is_empty()
1433            };
1434
1435            // Use pre-computed map for O(1) lookup instead of O(m) iteration
1436            let in_code_block = code_block_map.get(i).copied().unwrap_or(false);
1437
1438            // Detect list items (skip if in frontmatter, in mkdocstrings block, or in HTML comment)
1439            let in_mkdocstrings = flavor == MarkdownFlavor::MkDocs
1440                && crate::utils::mkdocstrings_refs::is_within_autodoc_block_ranges(autodoc_ranges, byte_offset);
1441            // Use pre-computed ranges for efficiency (O(log n) vs O(file_size))
1442            let in_html_comment =
1443                crate::utils::skip_context::is_in_html_comment_ranges(html_comment_ranges, byte_offset);
1444            let list_item = if !(in_code_block
1445                || is_blank
1446                || in_mkdocstrings
1447                || in_html_comment
1448                || (front_matter_end > 0 && i < front_matter_end))
1449            {
1450                // Strip blockquote prefix if present for list detection (reuse cached result)
1451                let (line_for_list_check, blockquote_prefix_len) = if let Some((prefix, content)) = blockquote_parse {
1452                    (content, prefix.len())
1453                } else {
1454                    (&**line, 0)
1455                };
1456
1457                if let Some((leading_spaces, marker, spacing, _content)) =
1458                    Self::parse_unordered_list(line_for_list_check)
1459                {
1460                    let marker_column = blockquote_prefix_len + leading_spaces.len();
1461                    let content_column = marker_column + 1 + spacing.len();
1462
1463                    // According to CommonMark spec, unordered list items MUST have at least one space
1464                    // after the marker (-, *, or +). Without a space, it's not a list item.
1465                    // This also naturally handles cases like:
1466                    // - *emphasis* (not a list)
1467                    // - **bold** (not a list)
1468                    // - --- (horizontal rule, not a list)
1469                    if spacing.is_empty() {
1470                        None
1471                    } else {
1472                        Some(ListItemInfo {
1473                            marker: marker.to_string(),
1474                            is_ordered: false,
1475                            number: None,
1476                            marker_column,
1477                            content_column,
1478                        })
1479                    }
1480                } else if let Some((leading_spaces, number_str, delimiter, spacing, _content)) =
1481                    Self::parse_ordered_list(line_for_list_check)
1482                {
1483                    let marker = format!("{number_str}{delimiter}");
1484                    let marker_column = blockquote_prefix_len + leading_spaces.len();
1485                    let content_column = marker_column + marker.len() + spacing.len();
1486
1487                    // According to CommonMark spec, ordered list items MUST have at least one space
1488                    // after the marker (period or parenthesis). Without a space, it's not a list item.
1489                    if spacing.is_empty() {
1490                        None
1491                    } else {
1492                        Some(ListItemInfo {
1493                            marker,
1494                            is_ordered: true,
1495                            number: number_str.parse().ok(),
1496                            marker_column,
1497                            content_column,
1498                        })
1499                    }
1500                } else {
1501                    None
1502                }
1503            } else {
1504                None
1505            };
1506
1507            lines.push(LineInfo {
1508                content: line.to_string(),
1509                byte_offset,
1510                indent,
1511                is_blank,
1512                in_code_block,
1513                in_front_matter: front_matter_end > 0 && i < front_matter_end,
1514                in_html_block: false, // Will be populated after line creation
1515                in_html_comment,
1516                list_item,
1517                heading: None,    // Will be populated in second pass for Setext headings
1518                blockquote: None, // Will be populated after line creation
1519                in_mkdocstrings,
1520                in_esm_block: false, // Will be populated after line creation for MDX files
1521            });
1522        }
1523
1524        lines
1525    }
1526
1527    /// Detect headings and blockquotes (called after HTML block detection)
1528    fn detect_headings_and_blockquotes(
1529        content: &str,
1530        lines: &mut [LineInfo],
1531        flavor: MarkdownFlavor,
1532        html_comment_ranges: &[crate::utils::skip_context::ByteRange],
1533    ) {
1534        lazy_static! {
1535
1536            // Regex for heading detection
1537            static ref ATX_HEADING_REGEX: regex::Regex = regex::Regex::new(r"^(\s*)(#{1,6})(\s*)(.*)$").unwrap();
1538            static ref SETEXT_UNDERLINE_REGEX: regex::Regex = regex::Regex::new(r"^(\s*)(=+|-+)\s*$").unwrap();
1539        }
1540
1541        let content_lines: Vec<&str> = content.lines().collect();
1542
1543        // Detect front matter boundaries to skip those lines
1544        let front_matter_end = FrontMatterUtils::get_front_matter_end_line(content);
1545
1546        // Detect headings (including Setext which needs look-ahead) and blockquotes
1547        for i in 0..lines.len() {
1548            if lines[i].in_code_block {
1549                continue;
1550            }
1551
1552            // Skip lines in front matter
1553            if front_matter_end > 0 && i < front_matter_end {
1554                continue;
1555            }
1556
1557            // Skip lines in HTML blocks - HTML content should not be parsed as markdown
1558            if lines[i].in_html_block {
1559                continue;
1560            }
1561
1562            let line = content_lines[i];
1563
1564            // Check for blockquotes (even on blank lines within blockquotes)
1565            if let Some(bq) = parse_blockquote_detailed(line) {
1566                let nesting_level = bq.markers.len(); // Each '>' is one level
1567                let marker_column = bq.indent.len();
1568
1569                // Build the prefix (indentation + markers + space)
1570                let prefix = format!("{}{}{}", bq.indent, bq.markers, bq.spaces_after);
1571
1572                // Check for various blockquote issues
1573                let has_no_space = bq.spaces_after.is_empty() && !bq.content.is_empty();
1574                // Consider tabs as multiple spaces, or actual multiple spaces
1575                let has_multiple_spaces = bq.spaces_after.len() > 1 || bq.spaces_after.contains('\t');
1576
1577                // Check if needs MD028 fix (empty blockquote line without proper spacing)
1578                // MD028 flags empty blockquote lines that don't have a single space after the marker
1579                // Lines like "> " or ">> " are already correct and don't need fixing
1580                let needs_md028_fix = bq.content.is_empty() && bq.spaces_after.is_empty();
1581
1582                lines[i].blockquote = Some(BlockquoteInfo {
1583                    nesting_level,
1584                    indent: bq.indent.to_string(),
1585                    marker_column,
1586                    prefix,
1587                    content: bq.content.to_string(),
1588                    has_no_space_after_marker: has_no_space,
1589                    has_multiple_spaces_after_marker: has_multiple_spaces,
1590                    needs_md028_fix,
1591                });
1592            }
1593
1594            // Skip heading detection for blank lines
1595            if lines[i].is_blank {
1596                continue;
1597            }
1598
1599            // Check for ATX headings (but skip MkDocs snippet lines)
1600            // In MkDocs flavor, lines like "# -8<- [start:name]" are snippet markers, not headings
1601            let is_snippet_line = if flavor == MarkdownFlavor::MkDocs {
1602                crate::utils::mkdocs_snippets::is_snippet_section_start(line)
1603                    || crate::utils::mkdocs_snippets::is_snippet_section_end(line)
1604            } else {
1605                false
1606            };
1607
1608            if !is_snippet_line && let Some(caps) = ATX_HEADING_REGEX.captures(line) {
1609                // Skip headings inside HTML comments (using pre-computed ranges for efficiency)
1610                if crate::utils::skip_context::is_in_html_comment_ranges(html_comment_ranges, lines[i].byte_offset) {
1611                    continue;
1612                }
1613                let leading_spaces = caps.get(1).map_or("", |m| m.as_str());
1614                let hashes = caps.get(2).map_or("", |m| m.as_str());
1615                let spaces_after = caps.get(3).map_or("", |m| m.as_str());
1616                let rest = caps.get(4).map_or("", |m| m.as_str());
1617
1618                let level = hashes.len() as u8;
1619                let marker_column = leading_spaces.len();
1620
1621                // Check for closing sequence, but handle custom IDs that might come after
1622                let (text, has_closing, closing_seq) = {
1623                    // First check if there's a custom ID at the end
1624                    let (rest_without_id, custom_id_part) = if let Some(id_start) = rest.rfind(" {#") {
1625                        // Check if this looks like a valid custom ID (ends with })
1626                        if rest[id_start..].trim_end().ends_with('}') {
1627                            // Split off the custom ID
1628                            (&rest[..id_start], &rest[id_start..])
1629                        } else {
1630                            (rest, "")
1631                        }
1632                    } else {
1633                        (rest, "")
1634                    };
1635
1636                    // Now look for closing hashes in the part before the custom ID
1637                    let trimmed_rest = rest_without_id.trim_end();
1638                    if let Some(last_hash_pos) = trimmed_rest.rfind('#') {
1639                        // Look for the start of the hash sequence
1640                        let mut start_of_hashes = last_hash_pos;
1641                        while start_of_hashes > 0 && trimmed_rest.chars().nth(start_of_hashes - 1) == Some('#') {
1642                            start_of_hashes -= 1;
1643                        }
1644
1645                        // Check if there's at least one space before the closing hashes
1646                        let has_space_before = start_of_hashes == 0
1647                            || trimmed_rest
1648                                .chars()
1649                                .nth(start_of_hashes - 1)
1650                                .is_some_and(|c| c.is_whitespace());
1651
1652                        // Check if this is a valid closing sequence (all hashes to end of trimmed part)
1653                        let potential_closing = &trimmed_rest[start_of_hashes..];
1654                        let is_all_hashes = potential_closing.chars().all(|c| c == '#');
1655
1656                        if is_all_hashes && has_space_before {
1657                            // This is a closing sequence
1658                            let closing_hashes = potential_closing.to_string();
1659                            // The text is everything before the closing hashes
1660                            // Don't include the custom ID here - it will be extracted later
1661                            let text_part = if !custom_id_part.is_empty() {
1662                                // If we have a custom ID, append it back to get the full rest
1663                                // This allows the extract_header_id function to handle it properly
1664                                format!("{}{}", rest_without_id[..start_of_hashes].trim_end(), custom_id_part)
1665                            } else {
1666                                rest_without_id[..start_of_hashes].trim_end().to_string()
1667                            };
1668                            (text_part, true, closing_hashes)
1669                        } else {
1670                            // Not a valid closing sequence, return the full content
1671                            (rest.to_string(), false, String::new())
1672                        }
1673                    } else {
1674                        // No hashes found, return the full content
1675                        (rest.to_string(), false, String::new())
1676                    }
1677                };
1678
1679                let content_column = marker_column + hashes.len() + spaces_after.len();
1680
1681                // Extract custom header ID if present
1682                let raw_text = text.trim().to_string();
1683                let (clean_text, mut custom_id) = crate::utils::header_id_utils::extract_header_id(&raw_text);
1684
1685                // If no custom ID was found on the header line, check the next line for standalone attr-list
1686                if custom_id.is_none() && i + 1 < content_lines.len() && i + 1 < lines.len() {
1687                    let next_line = content_lines[i + 1];
1688                    if !lines[i + 1].in_code_block
1689                        && crate::utils::header_id_utils::is_standalone_attr_list(next_line)
1690                        && let Some(next_line_id) =
1691                            crate::utils::header_id_utils::extract_standalone_attr_list_id(next_line)
1692                    {
1693                        custom_id = Some(next_line_id);
1694                    }
1695                }
1696
1697                lines[i].heading = Some(HeadingInfo {
1698                    level,
1699                    style: HeadingStyle::ATX,
1700                    marker: hashes.to_string(),
1701                    marker_column,
1702                    content_column,
1703                    text: clean_text,
1704                    custom_id,
1705                    raw_text,
1706                    has_closing_sequence: has_closing,
1707                    closing_sequence: closing_seq,
1708                });
1709            }
1710            // Check for Setext headings (need to look at next line)
1711            else if i + 1 < content_lines.len() && i + 1 < lines.len() {
1712                let next_line = content_lines[i + 1];
1713                if !lines[i + 1].in_code_block && SETEXT_UNDERLINE_REGEX.is_match(next_line) {
1714                    // Skip if next line is front matter delimiter
1715                    if front_matter_end > 0 && i < front_matter_end {
1716                        continue;
1717                    }
1718
1719                    // Skip Setext headings inside HTML comments (using pre-computed ranges for efficiency)
1720                    if crate::utils::skip_context::is_in_html_comment_ranges(html_comment_ranges, lines[i].byte_offset)
1721                    {
1722                        continue;
1723                    }
1724
1725                    let underline = next_line.trim();
1726
1727                    // Skip if the underline looks like YAML delimiter (exactly 3 or more dashes)
1728                    // YAML uses exactly `---` while Setext headings typically use longer underlines
1729                    if underline == "---" {
1730                        continue;
1731                    }
1732
1733                    // Skip if the current line looks like YAML key-value syntax
1734                    let current_line_trimmed = line.trim();
1735                    if current_line_trimmed.contains(':')
1736                        && !current_line_trimmed.starts_with('#')
1737                        && !current_line_trimmed.contains('[')
1738                        && !current_line_trimmed.contains("](")
1739                    {
1740                        // This looks like "key: value" which suggests YAML, not a heading
1741                        continue;
1742                    }
1743
1744                    let level = if underline.starts_with('=') { 1 } else { 2 };
1745                    let style = if level == 1 {
1746                        HeadingStyle::Setext1
1747                    } else {
1748                        HeadingStyle::Setext2
1749                    };
1750
1751                    // Extract custom header ID if present
1752                    let raw_text = line.trim().to_string();
1753                    let (clean_text, mut custom_id) = crate::utils::header_id_utils::extract_header_id(&raw_text);
1754
1755                    // If no custom ID was found on the header line, check the line after underline for standalone attr-list
1756                    if custom_id.is_none() && i + 2 < content_lines.len() && i + 2 < lines.len() {
1757                        let attr_line = content_lines[i + 2];
1758                        if !lines[i + 2].in_code_block
1759                            && crate::utils::header_id_utils::is_standalone_attr_list(attr_line)
1760                            && let Some(attr_line_id) =
1761                                crate::utils::header_id_utils::extract_standalone_attr_list_id(attr_line)
1762                        {
1763                            custom_id = Some(attr_line_id);
1764                        }
1765                    }
1766
1767                    lines[i].heading = Some(HeadingInfo {
1768                        level,
1769                        style,
1770                        marker: underline.to_string(),
1771                        marker_column: next_line.len() - next_line.trim_start().len(),
1772                        content_column: lines[i].indent,
1773                        text: clean_text,
1774                        custom_id,
1775                        raw_text,
1776                        has_closing_sequence: false,
1777                        closing_sequence: String::new(),
1778                    });
1779                }
1780            }
1781        }
1782    }
1783
1784    /// Detect HTML blocks in the content
1785    fn detect_html_blocks(lines: &mut [LineInfo]) {
1786        // HTML block elements that trigger block context
1787        const BLOCK_ELEMENTS: &[&str] = &[
1788            "address",
1789            "article",
1790            "aside",
1791            "blockquote",
1792            "details",
1793            "dialog",
1794            "dd",
1795            "div",
1796            "dl",
1797            "dt",
1798            "fieldset",
1799            "figcaption",
1800            "figure",
1801            "footer",
1802            "form",
1803            "h1",
1804            "h2",
1805            "h3",
1806            "h4",
1807            "h5",
1808            "h6",
1809            "header",
1810            "hr",
1811            "li",
1812            "main",
1813            "nav",
1814            "ol",
1815            "p",
1816            "pre",
1817            "script",
1818            "section",
1819            "style",
1820            "table",
1821            "tbody",
1822            "td",
1823            "tfoot",
1824            "th",
1825            "thead",
1826            "tr",
1827            "ul",
1828        ];
1829
1830        let mut i = 0;
1831        while i < lines.len() {
1832            // Skip if already in code block or front matter
1833            if lines[i].in_code_block || lines[i].in_front_matter {
1834                i += 1;
1835                continue;
1836            }
1837
1838            let trimmed = lines[i].content.trim_start();
1839
1840            // Check if line starts with an HTML tag
1841            if trimmed.starts_with('<') && trimmed.len() > 1 {
1842                // Extract tag name safely
1843                let after_bracket = &trimmed[1..];
1844                let is_closing = after_bracket.starts_with('/');
1845                let tag_start = if is_closing { &after_bracket[1..] } else { after_bracket };
1846
1847                // Extract tag name (stop at space, >, /, or end of string)
1848                let tag_name = tag_start
1849                    .chars()
1850                    .take_while(|c| c.is_ascii_alphabetic() || *c == '-')
1851                    .collect::<String>()
1852                    .to_lowercase();
1853
1854                // Check if it's a block element
1855                if !tag_name.is_empty() && BLOCK_ELEMENTS.contains(&tag_name.as_str()) {
1856                    // Mark this line as in HTML block
1857                    lines[i].in_html_block = true;
1858
1859                    // For simplicity, just mark lines until we find a closing tag or reach a blank line
1860                    // This avoids complex nesting logic that might cause infinite loops
1861                    if !is_closing {
1862                        let closing_tag = format!("</{tag_name}>");
1863                        // style and script tags can contain blank lines (CSS/JS formatting)
1864                        let allow_blank_lines = tag_name == "style" || tag_name == "script";
1865                        let mut j = i + 1;
1866                        while j < lines.len() && j < i + 100 {
1867                            // Limit search to 100 lines
1868                            // Stop at blank lines (except for style/script tags)
1869                            if !allow_blank_lines && lines[j].is_blank {
1870                                break;
1871                            }
1872
1873                            lines[j].in_html_block = true;
1874
1875                            // Check if this line contains the closing tag
1876                            if lines[j].content.contains(&closing_tag) {
1877                                break;
1878                            }
1879                            j += 1;
1880                        }
1881                    }
1882                }
1883            }
1884
1885            i += 1;
1886        }
1887    }
1888
1889    /// Detect ESM import/export blocks in MDX files
1890    /// ESM blocks consist of contiguous import/export statements at the top of the file
1891    fn detect_esm_blocks(lines: &mut [LineInfo], flavor: MarkdownFlavor) {
1892        // Only process MDX files
1893        if !flavor.supports_esm_blocks() {
1894            return;
1895        }
1896
1897        for line in lines.iter_mut() {
1898            // Skip blank lines and comments at the start
1899            if line.is_blank || line.in_html_comment {
1900                continue;
1901            }
1902
1903            // Check if line starts with import or export
1904            let trimmed = line.content.trim_start();
1905            if trimmed.starts_with("import ") || trimmed.starts_with("export ") {
1906                line.in_esm_block = true;
1907            } else {
1908                // Once we hit a non-ESM line, we're done with the ESM block
1909                break;
1910            }
1911        }
1912    }
1913
1914    /// Parse all inline code spans in the content using pulldown-cmark streaming parser
1915    fn parse_code_spans(content: &str, lines: &[LineInfo]) -> Vec<CodeSpan> {
1916        let mut code_spans = Vec::new();
1917
1918        // Quick check - if no backticks, no code spans
1919        if !content.contains('`') {
1920            return code_spans;
1921        }
1922
1923        // Use pulldown-cmark's streaming parser with byte offsets
1924        let parser = Parser::new(content).into_offset_iter();
1925
1926        for (event, range) in parser {
1927            if let Event::Code(_) = event {
1928                let start_pos = range.start;
1929                let end_pos = range.end;
1930
1931                // The range includes the backticks, extract the actual content
1932                let full_span = &content[start_pos..end_pos];
1933                let backtick_count = full_span.chars().take_while(|&c| c == '`').count();
1934
1935                // Extract content between backticks, preserving spaces
1936                let content_start = start_pos + backtick_count;
1937                let content_end = end_pos - backtick_count;
1938                let span_content = if content_start < content_end {
1939                    content[content_start..content_end].to_string()
1940                } else {
1941                    String::new()
1942                };
1943
1944                // Use binary search to find line number - O(log n) instead of O(n)
1945                // Find the rightmost line whose byte_offset <= start_pos
1946                let line_idx = lines
1947                    .partition_point(|line| line.byte_offset <= start_pos)
1948                    .saturating_sub(1);
1949                let line_num = line_idx + 1;
1950                let col_start = start_pos - lines[line_idx].byte_offset;
1951
1952                // Find end column using binary search
1953                let end_line_idx = lines
1954                    .partition_point(|line| line.byte_offset <= end_pos)
1955                    .saturating_sub(1);
1956                let col_end = end_pos - lines[end_line_idx].byte_offset;
1957
1958                code_spans.push(CodeSpan {
1959                    line: line_num,
1960                    start_col: col_start,
1961                    end_col: col_end,
1962                    byte_offset: start_pos,
1963                    byte_end: end_pos,
1964                    backtick_count,
1965                    content: span_content,
1966                });
1967            }
1968        }
1969
1970        // Sort by position to ensure consistent ordering
1971        code_spans.sort_by_key(|span| span.byte_offset);
1972
1973        code_spans
1974    }
1975
1976    /// Parse all list blocks in the content (legacy line-by-line approach)
1977    fn parse_list_blocks(lines: &[LineInfo]) -> Vec<ListBlock> {
1978        // Pre-size based on lines that could be list items
1979        let mut list_blocks = Vec::with_capacity(lines.len() / 10); // Estimate ~10% of lines might start list blocks
1980        let mut current_block: Option<ListBlock> = None;
1981        let mut last_list_item_line = 0;
1982        let mut current_indent_level = 0;
1983        let mut last_marker_width = 0;
1984
1985        for (line_idx, line_info) in lines.iter().enumerate() {
1986            let line_num = line_idx + 1;
1987
1988            // Enhanced code block handling using Design #3's context analysis
1989            if line_info.in_code_block {
1990                if let Some(ref mut block) = current_block {
1991                    // Calculate minimum indentation for list continuation
1992                    let min_continuation_indent = CodeBlockUtils::calculate_min_continuation_indent(lines, line_idx);
1993
1994                    // Analyze code block context using the three-tier classification
1995                    let context = CodeBlockUtils::analyze_code_block_context(lines, line_idx, min_continuation_indent);
1996
1997                    match context {
1998                        CodeBlockContext::Indented => {
1999                            // Code block is properly indented - continues the list
2000                            block.end_line = line_num;
2001                            continue;
2002                        }
2003                        CodeBlockContext::Standalone => {
2004                            // Code block separates lists - end current block
2005                            let completed_block = current_block.take().unwrap();
2006                            list_blocks.push(completed_block);
2007                            continue;
2008                        }
2009                        CodeBlockContext::Adjacent => {
2010                            // Edge case - use conservative behavior (continue list)
2011                            block.end_line = line_num;
2012                            continue;
2013                        }
2014                    }
2015                } else {
2016                    // No current list block - skip code block lines
2017                    continue;
2018                }
2019            }
2020
2021            // Extract blockquote prefix if any
2022            let blockquote_prefix = if let Some(caps) = BLOCKQUOTE_PREFIX_REGEX.captures(&line_info.content) {
2023                caps.get(0).unwrap().as_str().to_string()
2024            } else {
2025                String::new()
2026            };
2027
2028            // Check if this line is a list item
2029            if let Some(list_item) = &line_info.list_item {
2030                // Calculate nesting level based on indentation
2031                let item_indent = list_item.marker_column;
2032                let nesting = item_indent / 2; // Assume 2-space indentation for nesting
2033
2034                if let Some(ref mut block) = current_block {
2035                    // Check if this continues the current block
2036                    // For nested lists, we need to check if this is a nested item (higher nesting level)
2037                    // or a continuation at the same or lower level
2038                    let is_nested = nesting > block.nesting_level;
2039                    let same_type =
2040                        (block.is_ordered && list_item.is_ordered) || (!block.is_ordered && !list_item.is_ordered);
2041                    let same_context = block.blockquote_prefix == blockquote_prefix;
2042                    let reasonable_distance = line_num <= last_list_item_line + 2; // Allow one blank line
2043
2044                    // For unordered lists, also check marker consistency
2045                    let marker_compatible =
2046                        block.is_ordered || block.marker.is_none() || block.marker.as_ref() == Some(&list_item.marker);
2047
2048                    // Check if there's non-list content between the last item and this one
2049                    let has_non_list_content = {
2050                        let mut found_non_list = false;
2051                        // Use the last item from the current block, not the global last_list_item_line
2052                        let block_last_item_line = block.item_lines.last().copied().unwrap_or(block.end_line);
2053
2054                        // Debug: Special check for problematic line
2055                        if block_last_item_line > 0 && block_last_item_line <= lines.len() {
2056                            let last_line = &lines[block_last_item_line - 1];
2057                            if last_line.content.contains(r"`sqlalchemy`") && last_line.content.contains(r"\`") {
2058                                log::debug!(
2059                                    "After problematic line {}: checking lines {} to {} for non-list content",
2060                                    block_last_item_line,
2061                                    block_last_item_line + 1,
2062                                    line_num
2063                                );
2064                                // If they're consecutive list items, there's no content between
2065                                if line_num == block_last_item_line + 1 {
2066                                    log::debug!("Lines are consecutive, no content between");
2067                                }
2068                            }
2069                        }
2070
2071                        for check_line in (block_last_item_line + 1)..line_num {
2072                            let check_idx = check_line - 1;
2073                            if check_idx < lines.len() {
2074                                let check_info = &lines[check_idx];
2075                                // Check for content that breaks the list
2076                                let is_list_breaking_content = if check_info.in_code_block {
2077                                    // Use enhanced code block classification for list separation
2078                                    let last_item_marker_width =
2079                                        if block_last_item_line > 0 && block_last_item_line <= lines.len() {
2080                                            lines[block_last_item_line - 1]
2081                                                .list_item
2082                                                .as_ref()
2083                                                .map(|li| {
2084                                                    if li.is_ordered {
2085                                                        li.marker.len() + 1 // Add 1 for the space after ordered list markers
2086                                                    } else {
2087                                                        li.marker.len()
2088                                                    }
2089                                                })
2090                                                .unwrap_or(3) // fallback to 3 if no list item found
2091                                        } else {
2092                                            3 // fallback
2093                                        };
2094
2095                                    let min_continuation = if block.is_ordered { last_item_marker_width } else { 2 };
2096
2097                                    // Analyze code block context using our enhanced classification
2098                                    let context = CodeBlockUtils::analyze_code_block_context(
2099                                        lines,
2100                                        check_line - 1,
2101                                        min_continuation,
2102                                    );
2103
2104                                    // Standalone code blocks break lists, indented ones continue them
2105                                    matches!(context, CodeBlockContext::Standalone)
2106                                } else if !check_info.is_blank && check_info.list_item.is_none() {
2107                                    // Check for structural separators that should break lists (from issue #42)
2108                                    let line_content = check_info.content.trim();
2109
2110                                    // Any of these structural separators break lists
2111                                    if check_info.heading.is_some()
2112                                        || line_content.starts_with("---")
2113                                        || line_content.starts_with("***")
2114                                        || line_content.starts_with("___")
2115                                        || (line_content.contains('|')
2116                                            && !line_content.contains("](")
2117                                            && !line_content.contains("http")
2118                                            && (line_content.matches('|').count() > 1
2119                                                || line_content.starts_with('|')
2120                                                || line_content.ends_with('|')))
2121                                        || line_content.starts_with(">")
2122                                    {
2123                                        true
2124                                    }
2125                                    // Other non-list content - check if properly indented
2126                                    else {
2127                                        let last_item_marker_width =
2128                                            if block_last_item_line > 0 && block_last_item_line <= lines.len() {
2129                                                lines[block_last_item_line - 1]
2130                                                    .list_item
2131                                                    .as_ref()
2132                                                    .map(|li| {
2133                                                        if li.is_ordered {
2134                                                            li.marker.len() + 1 // Add 1 for the space after ordered list markers
2135                                                        } else {
2136                                                            li.marker.len()
2137                                                        }
2138                                                    })
2139                                                    .unwrap_or(3) // fallback to 3 if no list item found
2140                                            } else {
2141                                                3 // fallback
2142                                            };
2143
2144                                        let min_continuation =
2145                                            if block.is_ordered { last_item_marker_width } else { 2 };
2146                                        check_info.indent < min_continuation
2147                                    }
2148                                } else {
2149                                    false
2150                                };
2151
2152                                if is_list_breaking_content {
2153                                    // Not indented enough, so it breaks the list
2154                                    found_non_list = true;
2155                                    break;
2156                                }
2157                            }
2158                        }
2159                        found_non_list
2160                    };
2161
2162                    // A list continues if:
2163                    // 1. It's a nested item (indented more than the parent), OR
2164                    // 2. It's the same type at the same level with reasonable distance
2165                    let mut continues_list = if is_nested {
2166                        // Nested items always continue the list if they're in the same context
2167                        same_context && reasonable_distance && !has_non_list_content
2168                    } else {
2169                        // Same-level items need to match type and markers
2170                        let result = same_type
2171                            && same_context
2172                            && reasonable_distance
2173                            && marker_compatible
2174                            && !has_non_list_content;
2175
2176                        // Debug logging for lines after problematic content
2177                        if block.item_lines.last().is_some_and(|&last_line| {
2178                            last_line > 0
2179                                && last_line <= lines.len()
2180                                && lines[last_line - 1].content.contains(r"`sqlalchemy`")
2181                                && lines[last_line - 1].content.contains(r"\`")
2182                        }) {
2183                            log::debug!(
2184                                "List continuation check after problematic line at line {line_num}: same_type={same_type}, same_context={same_context}, reasonable_distance={reasonable_distance}, marker_compatible={marker_compatible}, has_non_list_content={has_non_list_content}, continues={result}"
2185                            );
2186                            if line_num > 0 && line_num <= lines.len() {
2187                                log::debug!("Current line content: {:?}", lines[line_num - 1].content);
2188                            }
2189                        }
2190
2191                        result
2192                    };
2193
2194                    // WORKAROUND: If items are truly consecutive (no blank lines), they MUST be in the same list
2195                    // This handles edge cases where content patterns might otherwise split lists incorrectly
2196                    if !continues_list && reasonable_distance && line_num > 0 && block.end_line == line_num - 1 {
2197                        // Check if the previous line was a list item
2198                        if block.item_lines.contains(&(line_num - 1)) {
2199                            // They're consecutive list items - force them to be in the same list
2200                            continues_list = true;
2201                        }
2202                    }
2203
2204                    if continues_list {
2205                        // Extend current block
2206                        block.end_line = line_num;
2207                        block.item_lines.push(line_num);
2208
2209                        // Update max marker width
2210                        block.max_marker_width = block.max_marker_width.max(if list_item.is_ordered {
2211                            list_item.marker.len() + 1
2212                        } else {
2213                            list_item.marker.len()
2214                        });
2215
2216                        // Update marker consistency for unordered lists
2217                        if !block.is_ordered
2218                            && block.marker.is_some()
2219                            && block.marker.as_ref() != Some(&list_item.marker)
2220                        {
2221                            // Mixed markers, clear the marker field
2222                            block.marker = None;
2223                        }
2224                    } else {
2225                        // End current block and start a new one
2226
2227                        list_blocks.push(block.clone());
2228
2229                        *block = ListBlock {
2230                            start_line: line_num,
2231                            end_line: line_num,
2232                            is_ordered: list_item.is_ordered,
2233                            marker: if list_item.is_ordered {
2234                                None
2235                            } else {
2236                                Some(list_item.marker.clone())
2237                            },
2238                            blockquote_prefix: blockquote_prefix.clone(),
2239                            item_lines: vec![line_num],
2240                            nesting_level: nesting,
2241                            max_marker_width: if list_item.is_ordered {
2242                                list_item.marker.len() + 1
2243                            } else {
2244                                list_item.marker.len()
2245                            },
2246                        };
2247                    }
2248                } else {
2249                    // Start a new block
2250                    current_block = Some(ListBlock {
2251                        start_line: line_num,
2252                        end_line: line_num,
2253                        is_ordered: list_item.is_ordered,
2254                        marker: if list_item.is_ordered {
2255                            None
2256                        } else {
2257                            Some(list_item.marker.clone())
2258                        },
2259                        blockquote_prefix,
2260                        item_lines: vec![line_num],
2261                        nesting_level: nesting,
2262                        max_marker_width: list_item.marker.len(),
2263                    });
2264                }
2265
2266                last_list_item_line = line_num;
2267                current_indent_level = item_indent;
2268                last_marker_width = if list_item.is_ordered {
2269                    list_item.marker.len() + 1 // Add 1 for the space after ordered list markers
2270                } else {
2271                    list_item.marker.len()
2272                };
2273            } else if let Some(ref mut block) = current_block {
2274                // Not a list item - check if it continues the current block
2275
2276                // For MD032 compatibility, we use a simple approach:
2277                // - Indented lines continue the list
2278                // - Blank lines followed by indented content continue the list
2279                // - Everything else ends the list
2280
2281                // Check if the last line in the list block ended with a backslash (hard line break)
2282                // This handles cases where list items use backslash for hard line breaks
2283                let prev_line_ends_with_backslash = if block.end_line > 0 && block.end_line - 1 < lines.len() {
2284                    lines[block.end_line - 1].content.trim_end().ends_with('\\')
2285                } else {
2286                    false
2287                };
2288
2289                // Calculate minimum indentation for list continuation
2290                // For ordered lists, use the last marker width (e.g., 3 for "1. ", 4 for "10. ")
2291                // For unordered lists like "- ", content starts at column 2, so continuations need at least 2 spaces
2292                let min_continuation_indent = if block.is_ordered {
2293                    current_indent_level + last_marker_width
2294                } else {
2295                    current_indent_level + 2 // Unordered lists need at least 2 spaces (e.g., "- " = 2 chars)
2296                };
2297
2298                if prev_line_ends_with_backslash || line_info.indent >= min_continuation_indent {
2299                    // Indented line or backslash continuation continues the list
2300                    block.end_line = line_num;
2301                } else if line_info.is_blank {
2302                    // Blank line - check if it's internal to the list or ending it
2303                    // We only include blank lines that are followed by more list content
2304                    let mut check_idx = line_idx + 1;
2305                    let mut found_continuation = false;
2306
2307                    // Skip additional blank lines
2308                    while check_idx < lines.len() && lines[check_idx].is_blank {
2309                        check_idx += 1;
2310                    }
2311
2312                    if check_idx < lines.len() {
2313                        let next_line = &lines[check_idx];
2314                        // Check if followed by indented content (list continuation)
2315                        if !next_line.in_code_block && next_line.indent >= min_continuation_indent {
2316                            found_continuation = true;
2317                        }
2318                        // Check if followed by another list item at the same level
2319                        else if !next_line.in_code_block
2320                            && next_line.list_item.is_some()
2321                            && let Some(item) = &next_line.list_item
2322                        {
2323                            let next_blockquote_prefix = BLOCKQUOTE_PREFIX_REGEX
2324                                .find(&next_line.content)
2325                                .map_or(String::new(), |m| m.as_str().to_string());
2326                            if item.marker_column == current_indent_level
2327                                && item.is_ordered == block.is_ordered
2328                                && block.blockquote_prefix.trim() == next_blockquote_prefix.trim()
2329                            {
2330                                // Check if there was meaningful content between the list items (unused now)
2331                                // This variable is kept for potential future use but is currently replaced by has_structural_separators
2332                                let _has_meaningful_content = (line_idx + 1..check_idx).any(|idx| {
2333                                    if let Some(between_line) = lines.get(idx) {
2334                                        let trimmed = between_line.content.trim();
2335                                        // Skip empty lines
2336                                        if trimmed.is_empty() {
2337                                            return false;
2338                                        }
2339                                        // Check for meaningful content
2340                                        let line_indent =
2341                                            between_line.content.len() - between_line.content.trim_start().len();
2342
2343                                        // Structural separators (code fences, headings, etc.) are meaningful and should BREAK lists
2344                                        if trimmed.starts_with("```")
2345                                            || trimmed.starts_with("~~~")
2346                                            || trimmed.starts_with("---")
2347                                            || trimmed.starts_with("***")
2348                                            || trimmed.starts_with("___")
2349                                            || trimmed.starts_with(">")
2350                                            || trimmed.contains('|') // Tables
2351                                            || between_line.heading.is_some()
2352                                        {
2353                                            return true; // These are structural separators - meaningful content that breaks lists
2354                                        }
2355
2356                                        // Only properly indented content continues the list
2357                                        line_indent >= min_continuation_indent
2358                                    } else {
2359                                        false
2360                                    }
2361                                });
2362
2363                                if block.is_ordered {
2364                                    // For ordered lists: don't continue if there are structural separators
2365                                    // Check if there are structural separators between the list items
2366                                    let has_structural_separators = (line_idx + 1..check_idx).any(|idx| {
2367                                        if let Some(between_line) = lines.get(idx) {
2368                                            let trimmed = between_line.content.trim();
2369                                            if trimmed.is_empty() {
2370                                                return false;
2371                                            }
2372                                            // Check for structural separators that break lists
2373                                            trimmed.starts_with("```")
2374                                                || trimmed.starts_with("~~~")
2375                                                || trimmed.starts_with("---")
2376                                                || trimmed.starts_with("***")
2377                                                || trimmed.starts_with("___")
2378                                                || trimmed.starts_with(">")
2379                                                || trimmed.contains('|') // Tables
2380                                                || between_line.heading.is_some()
2381                                        } else {
2382                                            false
2383                                        }
2384                                    });
2385                                    found_continuation = !has_structural_separators;
2386                                } else {
2387                                    // For unordered lists: also check for structural separators
2388                                    let has_structural_separators = (line_idx + 1..check_idx).any(|idx| {
2389                                        if let Some(between_line) = lines.get(idx) {
2390                                            let trimmed = between_line.content.trim();
2391                                            if trimmed.is_empty() {
2392                                                return false;
2393                                            }
2394                                            // Check for structural separators that break lists
2395                                            trimmed.starts_with("```")
2396                                                || trimmed.starts_with("~~~")
2397                                                || trimmed.starts_with("---")
2398                                                || trimmed.starts_with("***")
2399                                                || trimmed.starts_with("___")
2400                                                || trimmed.starts_with(">")
2401                                                || trimmed.contains('|') // Tables
2402                                                || between_line.heading.is_some()
2403                                        } else {
2404                                            false
2405                                        }
2406                                    });
2407                                    found_continuation = !has_structural_separators;
2408                                }
2409                            }
2410                        }
2411                    }
2412
2413                    if found_continuation {
2414                        // Include the blank line in the block
2415                        block.end_line = line_num;
2416                    } else {
2417                        // Blank line ends the list - don't include it
2418                        list_blocks.push(block.clone());
2419                        current_block = None;
2420                    }
2421                } else {
2422                    // Check for lazy continuation - non-indented line immediately after a list item
2423                    // But only if the line has sufficient indentation for the list type
2424                    let min_required_indent = if block.is_ordered {
2425                        current_indent_level + last_marker_width
2426                    } else {
2427                        current_indent_level + 2
2428                    };
2429
2430                    // For lazy continuation to apply, the line must either:
2431                    // 1. Have no indentation (true lazy continuation)
2432                    // 2. Have sufficient indentation for the list type
2433                    // BUT structural separators (headings, code blocks, etc.) should never be lazy continuations
2434                    let line_content = line_info.content.trim();
2435                    let is_structural_separator = line_info.heading.is_some()
2436                        || line_content.starts_with("```")
2437                        || line_content.starts_with("~~~")
2438                        || line_content.starts_with("---")
2439                        || line_content.starts_with("***")
2440                        || line_content.starts_with("___")
2441                        || line_content.starts_with(">")
2442                        || (line_content.contains('|')
2443                            && !line_content.contains("](")
2444                            && !line_content.contains("http")
2445                            && (line_content.matches('|').count() > 1
2446                                || line_content.starts_with('|')
2447                                || line_content.ends_with('|'))); // Tables
2448
2449                    // Allow lazy continuation if we're still within the same list block
2450                    // (not just immediately after a list item)
2451                    let is_lazy_continuation = !is_structural_separator
2452                        && !line_info.is_blank
2453                        && (line_info.indent == 0 || line_info.indent >= min_required_indent);
2454
2455                    if is_lazy_continuation {
2456                        // Additional check: if the line starts with uppercase and looks like a new sentence,
2457                        // it's probably not a continuation
2458                        let content_to_check = if !blockquote_prefix.is_empty() {
2459                            // Strip blockquote prefix to check the actual content
2460                            line_info
2461                                .content
2462                                .strip_prefix(&blockquote_prefix)
2463                                .unwrap_or(&line_info.content)
2464                                .trim()
2465                        } else {
2466                            line_info.content.trim()
2467                        };
2468
2469                        let starts_with_uppercase = content_to_check.chars().next().is_some_and(|c| c.is_uppercase());
2470
2471                        // If it starts with uppercase and the previous line ended with punctuation,
2472                        // it's likely a new paragraph, not a continuation
2473                        if starts_with_uppercase && last_list_item_line > 0 {
2474                            // This looks like a new paragraph
2475                            list_blocks.push(block.clone());
2476                            current_block = None;
2477                        } else {
2478                            // This is a lazy continuation line
2479                            block.end_line = line_num;
2480                        }
2481                    } else {
2482                        // Non-indented, non-blank line that's not a lazy continuation - end the block
2483                        list_blocks.push(block.clone());
2484                        current_block = None;
2485                    }
2486                }
2487            }
2488        }
2489
2490        // Don't forget the last block
2491        if let Some(block) = current_block {
2492            list_blocks.push(block);
2493        }
2494
2495        // Merge adjacent blocks that should be one
2496        merge_adjacent_list_blocks(&mut list_blocks, lines);
2497
2498        list_blocks
2499    }
2500
2501    /// Compute character frequency for fast content analysis
2502    fn compute_char_frequency(content: &str) -> CharFrequency {
2503        let mut frequency = CharFrequency::default();
2504
2505        for ch in content.chars() {
2506            match ch {
2507                '#' => frequency.hash_count += 1,
2508                '*' => frequency.asterisk_count += 1,
2509                '_' => frequency.underscore_count += 1,
2510                '-' => frequency.hyphen_count += 1,
2511                '+' => frequency.plus_count += 1,
2512                '>' => frequency.gt_count += 1,
2513                '|' => frequency.pipe_count += 1,
2514                '[' => frequency.bracket_count += 1,
2515                '`' => frequency.backtick_count += 1,
2516                '<' => frequency.lt_count += 1,
2517                '!' => frequency.exclamation_count += 1,
2518                '\n' => frequency.newline_count += 1,
2519                _ => {}
2520            }
2521        }
2522
2523        frequency
2524    }
2525
2526    /// Parse HTML tags in the content
2527    fn parse_html_tags(
2528        content: &str,
2529        lines: &[LineInfo],
2530        code_blocks: &[(usize, usize)],
2531        flavor: MarkdownFlavor,
2532    ) -> Vec<HtmlTag> {
2533        lazy_static! {
2534            static ref HTML_TAG_REGEX: regex::Regex =
2535                regex::Regex::new(r"(?i)<(/?)([a-zA-Z][a-zA-Z0-9]*)(?:\s+[^>]*?)?\s*(/?)>").unwrap();
2536        }
2537
2538        let mut html_tags = Vec::with_capacity(content.matches('<').count());
2539
2540        for cap in HTML_TAG_REGEX.captures_iter(content) {
2541            let full_match = cap.get(0).unwrap();
2542            let match_start = full_match.start();
2543            let match_end = full_match.end();
2544
2545            // Skip if in code block
2546            if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
2547                continue;
2548            }
2549
2550            let is_closing = !cap.get(1).unwrap().as_str().is_empty();
2551            let tag_name_original = cap.get(2).unwrap().as_str();
2552            let tag_name = tag_name_original.to_lowercase();
2553            let is_self_closing = !cap.get(3).unwrap().as_str().is_empty();
2554
2555            // Skip JSX components in MDX files (tags starting with uppercase letter)
2556            // JSX components like <Chart />, <MyComponent> should not be treated as HTML
2557            if flavor.supports_jsx() && tag_name_original.chars().next().is_some_and(|c| c.is_uppercase()) {
2558                continue;
2559            }
2560
2561            // Find which line this tag is on
2562            let mut line_num = 1;
2563            let mut col_start = match_start;
2564            let mut col_end = match_end;
2565            for (idx, line_info) in lines.iter().enumerate() {
2566                if match_start >= line_info.byte_offset {
2567                    line_num = idx + 1;
2568                    col_start = match_start - line_info.byte_offset;
2569                    col_end = match_end - line_info.byte_offset;
2570                } else {
2571                    break;
2572                }
2573            }
2574
2575            html_tags.push(HtmlTag {
2576                line: line_num,
2577                start_col: col_start,
2578                end_col: col_end,
2579                byte_offset: match_start,
2580                byte_end: match_end,
2581                tag_name,
2582                is_closing,
2583                is_self_closing,
2584                raw_content: full_match.as_str().to_string(),
2585            });
2586        }
2587
2588        html_tags
2589    }
2590
2591    /// Parse emphasis spans in the content
2592    fn parse_emphasis_spans(content: &str, lines: &[LineInfo], code_blocks: &[(usize, usize)]) -> Vec<EmphasisSpan> {
2593        lazy_static! {
2594            static ref EMPHASIS_REGEX: regex::Regex =
2595                regex::Regex::new(r"(\*{1,3}|_{1,3})([^*_\s][^*_]*?)(\*{1,3}|_{1,3})").unwrap();
2596        }
2597
2598        let mut emphasis_spans = Vec::with_capacity(content.matches('*').count() + content.matches('_').count() / 4);
2599
2600        for cap in EMPHASIS_REGEX.captures_iter(content) {
2601            let full_match = cap.get(0).unwrap();
2602            let match_start = full_match.start();
2603            let match_end = full_match.end();
2604
2605            // Skip if in code block
2606            if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
2607                continue;
2608            }
2609
2610            let opening_markers = cap.get(1).unwrap().as_str();
2611            let content_part = cap.get(2).unwrap().as_str();
2612            let closing_markers = cap.get(3).unwrap().as_str();
2613
2614            // Validate matching markers
2615            if opening_markers.chars().next() != closing_markers.chars().next()
2616                || opening_markers.len() != closing_markers.len()
2617            {
2618                continue;
2619            }
2620
2621            let marker = opening_markers.chars().next().unwrap();
2622            let marker_count = opening_markers.len();
2623
2624            // Find which line this emphasis is on
2625            let mut line_num = 1;
2626            let mut col_start = match_start;
2627            let mut col_end = match_end;
2628            for (idx, line_info) in lines.iter().enumerate() {
2629                if match_start >= line_info.byte_offset {
2630                    line_num = idx + 1;
2631                    col_start = match_start - line_info.byte_offset;
2632                    col_end = match_end - line_info.byte_offset;
2633                } else {
2634                    break;
2635                }
2636            }
2637
2638            emphasis_spans.push(EmphasisSpan {
2639                line: line_num,
2640                start_col: col_start,
2641                end_col: col_end,
2642                byte_offset: match_start,
2643                byte_end: match_end,
2644                marker,
2645                marker_count,
2646                content: content_part.to_string(),
2647            });
2648        }
2649
2650        emphasis_spans
2651    }
2652
2653    /// Parse table rows in the content
2654    fn parse_table_rows(lines: &[LineInfo]) -> Vec<TableRow> {
2655        let mut table_rows = Vec::with_capacity(lines.len() / 20);
2656
2657        for (line_idx, line_info) in lines.iter().enumerate() {
2658            // Skip lines in code blocks or blank lines
2659            if line_info.in_code_block || line_info.is_blank {
2660                continue;
2661            }
2662
2663            let line = &line_info.content;
2664            let line_num = line_idx + 1;
2665
2666            // Check if this line contains pipes (potential table row)
2667            if !line.contains('|') {
2668                continue;
2669            }
2670
2671            // Count columns by splitting on pipes
2672            let parts: Vec<&str> = line.split('|').collect();
2673            let column_count = if parts.len() > 2 { parts.len() - 2 } else { parts.len() };
2674
2675            // Check if this is a separator row
2676            let is_separator = line.chars().all(|c| "|:-+ \t".contains(c));
2677            let mut column_alignments = Vec::new();
2678
2679            if is_separator {
2680                for part in &parts[1..parts.len() - 1] {
2681                    // Skip first and last empty parts
2682                    let trimmed = part.trim();
2683                    let alignment = if trimmed.starts_with(':') && trimmed.ends_with(':') {
2684                        "center".to_string()
2685                    } else if trimmed.ends_with(':') {
2686                        "right".to_string()
2687                    } else if trimmed.starts_with(':') {
2688                        "left".to_string()
2689                    } else {
2690                        "none".to_string()
2691                    };
2692                    column_alignments.push(alignment);
2693                }
2694            }
2695
2696            table_rows.push(TableRow {
2697                line: line_num,
2698                is_separator,
2699                column_count,
2700                column_alignments,
2701            });
2702        }
2703
2704        table_rows
2705    }
2706
2707    /// Parse bare URLs and emails in the content
2708    fn parse_bare_urls(content: &str, lines: &[LineInfo], code_blocks: &[(usize, usize)]) -> Vec<BareUrl> {
2709        let mut bare_urls = Vec::with_capacity(content.matches("http").count() + content.matches('@').count());
2710
2711        // Check for bare URLs (not in angle brackets or markdown links)
2712        for cap in BARE_URL_PATTERN.captures_iter(content) {
2713            let full_match = cap.get(0).unwrap();
2714            let match_start = full_match.start();
2715            let match_end = full_match.end();
2716
2717            // Skip if in code block
2718            if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
2719                continue;
2720            }
2721
2722            // Skip if already in angle brackets or markdown links
2723            let preceding_char = if match_start > 0 {
2724                content.chars().nth(match_start - 1)
2725            } else {
2726                None
2727            };
2728            let following_char = content.chars().nth(match_end);
2729
2730            if preceding_char == Some('<') || preceding_char == Some('(') || preceding_char == Some('[') {
2731                continue;
2732            }
2733            if following_char == Some('>') || following_char == Some(')') || following_char == Some(']') {
2734                continue;
2735            }
2736
2737            let url = full_match.as_str();
2738            let url_type = if url.starts_with("https://") {
2739                "https"
2740            } else if url.starts_with("http://") {
2741                "http"
2742            } else if url.starts_with("ftp://") {
2743                "ftp"
2744            } else {
2745                "other"
2746            };
2747
2748            // Find which line this URL is on
2749            let mut line_num = 1;
2750            let mut col_start = match_start;
2751            let mut col_end = match_end;
2752            for (idx, line_info) in lines.iter().enumerate() {
2753                if match_start >= line_info.byte_offset {
2754                    line_num = idx + 1;
2755                    col_start = match_start - line_info.byte_offset;
2756                    col_end = match_end - line_info.byte_offset;
2757                } else {
2758                    break;
2759                }
2760            }
2761
2762            bare_urls.push(BareUrl {
2763                line: line_num,
2764                start_col: col_start,
2765                end_col: col_end,
2766                byte_offset: match_start,
2767                byte_end: match_end,
2768                url: url.to_string(),
2769                url_type: url_type.to_string(),
2770            });
2771        }
2772
2773        // Check for bare email addresses
2774        for cap in BARE_EMAIL_PATTERN.captures_iter(content) {
2775            let full_match = cap.get(0).unwrap();
2776            let match_start = full_match.start();
2777            let match_end = full_match.end();
2778
2779            // Skip if in code block
2780            if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
2781                continue;
2782            }
2783
2784            // Skip if already in angle brackets or markdown links
2785            let preceding_char = if match_start > 0 {
2786                content.chars().nth(match_start - 1)
2787            } else {
2788                None
2789            };
2790            let following_char = content.chars().nth(match_end);
2791
2792            if preceding_char == Some('<') || preceding_char == Some('(') || preceding_char == Some('[') {
2793                continue;
2794            }
2795            if following_char == Some('>') || following_char == Some(')') || following_char == Some(']') {
2796                continue;
2797            }
2798
2799            let email = full_match.as_str();
2800
2801            // Find which line this email is on
2802            let mut line_num = 1;
2803            let mut col_start = match_start;
2804            let mut col_end = match_end;
2805            for (idx, line_info) in lines.iter().enumerate() {
2806                if match_start >= line_info.byte_offset {
2807                    line_num = idx + 1;
2808                    col_start = match_start - line_info.byte_offset;
2809                    col_end = match_end - line_info.byte_offset;
2810                } else {
2811                    break;
2812                }
2813            }
2814
2815            bare_urls.push(BareUrl {
2816                line: line_num,
2817                start_col: col_start,
2818                end_col: col_end,
2819                byte_offset: match_start,
2820                byte_end: match_end,
2821                url: email.to_string(),
2822                url_type: "email".to_string(),
2823            });
2824        }
2825
2826        bare_urls
2827    }
2828}
2829
2830/// Merge adjacent list blocks that should be treated as one
2831fn merge_adjacent_list_blocks(list_blocks: &mut Vec<ListBlock>, lines: &[LineInfo]) {
2832    if list_blocks.len() < 2 {
2833        return;
2834    }
2835
2836    let mut merger = ListBlockMerger::new(lines);
2837    *list_blocks = merger.merge(list_blocks);
2838}
2839
2840/// Helper struct to manage the complex logic of merging list blocks
2841struct ListBlockMerger<'a> {
2842    lines: &'a [LineInfo],
2843}
2844
2845impl<'a> ListBlockMerger<'a> {
2846    fn new(lines: &'a [LineInfo]) -> Self {
2847        Self { lines }
2848    }
2849
2850    fn merge(&mut self, list_blocks: &[ListBlock]) -> Vec<ListBlock> {
2851        let mut merged = Vec::with_capacity(list_blocks.len());
2852        let mut current = list_blocks[0].clone();
2853
2854        for next in list_blocks.iter().skip(1) {
2855            if self.should_merge_blocks(&current, next) {
2856                current = self.merge_two_blocks(current, next);
2857            } else {
2858                merged.push(current);
2859                current = next.clone();
2860            }
2861        }
2862
2863        merged.push(current);
2864        merged
2865    }
2866
2867    /// Determine if two adjacent list blocks should be merged
2868    fn should_merge_blocks(&self, current: &ListBlock, next: &ListBlock) -> bool {
2869        // Basic compatibility checks
2870        if !self.blocks_are_compatible(current, next) {
2871            return false;
2872        }
2873
2874        // Check spacing and content between blocks
2875        let spacing = self.analyze_spacing_between(current, next);
2876        match spacing {
2877            BlockSpacing::Consecutive => true,
2878            BlockSpacing::SingleBlank => self.can_merge_with_blank_between(current, next),
2879            BlockSpacing::MultipleBlanks | BlockSpacing::ContentBetween => {
2880                self.can_merge_with_content_between(current, next)
2881            }
2882        }
2883    }
2884
2885    /// Check if blocks have compatible structure for merging
2886    fn blocks_are_compatible(&self, current: &ListBlock, next: &ListBlock) -> bool {
2887        current.is_ordered == next.is_ordered
2888            && current.blockquote_prefix == next.blockquote_prefix
2889            && current.nesting_level == next.nesting_level
2890    }
2891
2892    /// Analyze the spacing between two list blocks
2893    fn analyze_spacing_between(&self, current: &ListBlock, next: &ListBlock) -> BlockSpacing {
2894        let gap = next.start_line - current.end_line;
2895
2896        match gap {
2897            1 => BlockSpacing::Consecutive,
2898            2 => BlockSpacing::SingleBlank,
2899            _ if gap > 2 => {
2900                if self.has_only_blank_lines_between(current, next) {
2901                    BlockSpacing::MultipleBlanks
2902                } else {
2903                    BlockSpacing::ContentBetween
2904                }
2905            }
2906            _ => BlockSpacing::Consecutive, // gap == 0, overlapping (shouldn't happen)
2907        }
2908    }
2909
2910    /// Check if unordered lists can be merged with a single blank line between
2911    fn can_merge_with_blank_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
2912        // Check if there are structural separators between the blocks
2913        // If has_meaningful_content_between returns true, it means there are structural separators
2914        if has_meaningful_content_between(current, next, self.lines) {
2915            return false; // Structural separators prevent merging
2916        }
2917
2918        // Only merge unordered lists with same marker across single blank
2919        !current.is_ordered && current.marker == next.marker
2920    }
2921
2922    /// Check if ordered lists can be merged when there's content between them
2923    fn can_merge_with_content_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
2924        // Do not merge lists if there are structural separators between them
2925        if has_meaningful_content_between(current, next, self.lines) {
2926            return false; // Structural separators prevent merging
2927        }
2928
2929        // Only consider merging ordered lists if there's no structural content between
2930        current.is_ordered && next.is_ordered
2931    }
2932
2933    /// Check if there are only blank lines between blocks
2934    fn has_only_blank_lines_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
2935        for line_num in (current.end_line + 1)..next.start_line {
2936            if let Some(line_info) = self.lines.get(line_num - 1)
2937                && !line_info.content.trim().is_empty()
2938            {
2939                return false;
2940            }
2941        }
2942        true
2943    }
2944
2945    /// Merge two compatible list blocks into one
2946    fn merge_two_blocks(&self, mut current: ListBlock, next: &ListBlock) -> ListBlock {
2947        current.end_line = next.end_line;
2948        current.item_lines.extend_from_slice(&next.item_lines);
2949
2950        // Update max marker width
2951        current.max_marker_width = current.max_marker_width.max(next.max_marker_width);
2952
2953        // Handle marker consistency for unordered lists
2954        if !current.is_ordered && self.markers_differ(&current, next) {
2955            current.marker = None; // Mixed markers
2956        }
2957
2958        current
2959    }
2960
2961    /// Check if two blocks have different markers
2962    fn markers_differ(&self, current: &ListBlock, next: &ListBlock) -> bool {
2963        current.marker.is_some() && next.marker.is_some() && current.marker != next.marker
2964    }
2965}
2966
2967/// Types of spacing between list blocks
2968#[derive(Debug, PartialEq)]
2969enum BlockSpacing {
2970    Consecutive,    // No gap between blocks
2971    SingleBlank,    // One blank line between blocks
2972    MultipleBlanks, // Multiple blank lines but no content
2973    ContentBetween, // Content exists between blocks
2974}
2975
2976/// Check if there's meaningful content (not just blank lines) between two list blocks
2977fn has_meaningful_content_between(current: &ListBlock, next: &ListBlock, lines: &[LineInfo]) -> bool {
2978    // Check lines between current.end_line and next.start_line
2979    for line_num in (current.end_line + 1)..next.start_line {
2980        if let Some(line_info) = lines.get(line_num - 1) {
2981            // Convert to 0-indexed
2982            let trimmed = line_info.content.trim();
2983
2984            // Skip empty lines
2985            if trimmed.is_empty() {
2986                continue;
2987            }
2988
2989            // Check for structural separators that should separate lists (CommonMark compliant)
2990
2991            // Headings separate lists
2992            if line_info.heading.is_some() {
2993                return true; // Has meaningful content - headings separate lists
2994            }
2995
2996            // Horizontal rules separate lists (---, ***, ___)
2997            if is_horizontal_rule(trimmed) {
2998                return true; // Has meaningful content - horizontal rules separate lists
2999            }
3000
3001            // Tables separate lists (lines containing | but not in URLs or code)
3002            // Simple heuristic: tables typically have | at start/end or multiple |
3003            if trimmed.contains('|') && trimmed.len() > 1 {
3004                // Don't treat URLs with | as tables
3005                if !trimmed.contains("](") && !trimmed.contains("http") {
3006                    // More robust check: tables usually have multiple | or | at edges
3007                    let pipe_count = trimmed.matches('|').count();
3008                    if pipe_count > 1 || trimmed.starts_with('|') || trimmed.ends_with('|') {
3009                        return true; // Has meaningful content - tables separate lists
3010                    }
3011                }
3012            }
3013
3014            // Blockquotes separate lists
3015            if trimmed.starts_with('>') {
3016                return true; // Has meaningful content - blockquotes separate lists
3017            }
3018
3019            // Code block fences separate lists (unless properly indented as list content)
3020            if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
3021                let line_indent = line_info.content.len() - line_info.content.trim_start().len();
3022
3023                // Check if this code block is properly indented as list continuation
3024                let min_continuation_indent = if current.is_ordered {
3025                    current.nesting_level + current.max_marker_width + 1 // +1 for space after marker
3026                } else {
3027                    current.nesting_level + 2
3028                };
3029
3030                if line_indent < min_continuation_indent {
3031                    // This is a standalone code block that separates lists
3032                    return true; // Has meaningful content - standalone code blocks separate lists
3033                }
3034            }
3035
3036            // Check if this line has proper indentation for list continuation
3037            let line_indent = line_info.content.len() - line_info.content.trim_start().len();
3038
3039            // Calculate minimum indentation needed to be list continuation
3040            let min_indent = if current.is_ordered {
3041                current.nesting_level + current.max_marker_width
3042            } else {
3043                current.nesting_level + 2
3044            };
3045
3046            // If the line is not indented enough to be list continuation, it's meaningful content
3047            if line_indent < min_indent {
3048                return true; // Has meaningful content - content not indented as list continuation
3049            }
3050
3051            // If we reach here, the line is properly indented as list continuation
3052            // Continue checking other lines
3053        }
3054    }
3055
3056    // Only blank lines or properly indented list continuation content between blocks
3057    false
3058}
3059
3060/// Check if a line is a horizontal rule (---, ***, ___)
3061fn is_horizontal_rule(trimmed: &str) -> bool {
3062    if trimmed.len() < 3 {
3063        return false;
3064    }
3065
3066    // Check for three or more consecutive -, *, or _ characters (with optional spaces)
3067    let chars: Vec<char> = trimmed.chars().collect();
3068    if let Some(&first_char) = chars.first()
3069        && (first_char == '-' || first_char == '*' || first_char == '_')
3070    {
3071        let mut count = 0;
3072        for &ch in &chars {
3073            if ch == first_char {
3074                count += 1;
3075            } else if ch != ' ' && ch != '\t' {
3076                return false; // Non-matching, non-whitespace character
3077            }
3078        }
3079        return count >= 3;
3080    }
3081    false
3082}
3083
3084/// Check if content contains patterns that cause the markdown crate to panic
3085#[cfg(test)]
3086mod tests {
3087    use super::*;
3088
3089    #[test]
3090    fn test_empty_content() {
3091        let ctx = LintContext::new("", MarkdownFlavor::Standard);
3092        assert_eq!(ctx.content, "");
3093        assert_eq!(ctx.line_offsets, vec![0]);
3094        assert_eq!(ctx.offset_to_line_col(0), (1, 1));
3095        assert_eq!(ctx.lines.len(), 0);
3096    }
3097
3098    #[test]
3099    fn test_single_line() {
3100        let ctx = LintContext::new("# Hello", MarkdownFlavor::Standard);
3101        assert_eq!(ctx.content, "# Hello");
3102        assert_eq!(ctx.line_offsets, vec![0]);
3103        assert_eq!(ctx.offset_to_line_col(0), (1, 1));
3104        assert_eq!(ctx.offset_to_line_col(3), (1, 4));
3105    }
3106
3107    #[test]
3108    fn test_multi_line() {
3109        let content = "# Title\n\nSecond line\nThird line";
3110        let ctx = LintContext::new(content, MarkdownFlavor::Standard);
3111        assert_eq!(ctx.line_offsets, vec![0, 8, 9, 21]);
3112        // Test offset to line/col
3113        assert_eq!(ctx.offset_to_line_col(0), (1, 1)); // start
3114        assert_eq!(ctx.offset_to_line_col(8), (2, 1)); // start of blank line
3115        assert_eq!(ctx.offset_to_line_col(9), (3, 1)); // start of 'Second line'
3116        assert_eq!(ctx.offset_to_line_col(15), (3, 7)); // middle of 'Second line'
3117        assert_eq!(ctx.offset_to_line_col(21), (4, 1)); // start of 'Third line'
3118    }
3119
3120    #[test]
3121    fn test_line_info() {
3122        let content = "# Title\n    indented\n\ncode:\n```rust\nfn main() {}\n```";
3123        let ctx = LintContext::new(content, MarkdownFlavor::Standard);
3124
3125        // Test line info
3126        assert_eq!(ctx.lines.len(), 7);
3127
3128        // Line 1: "# Title"
3129        let line1 = &ctx.lines[0];
3130        assert_eq!(line1.content, "# Title");
3131        assert_eq!(line1.byte_offset, 0);
3132        assert_eq!(line1.indent, 0);
3133        assert!(!line1.is_blank);
3134        assert!(!line1.in_code_block);
3135        assert!(line1.list_item.is_none());
3136
3137        // Line 2: "    indented"
3138        let line2 = &ctx.lines[1];
3139        assert_eq!(line2.content, "    indented");
3140        assert_eq!(line2.byte_offset, 8);
3141        assert_eq!(line2.indent, 4);
3142        assert!(!line2.is_blank);
3143
3144        // Line 3: "" (blank)
3145        let line3 = &ctx.lines[2];
3146        assert_eq!(line3.content, "");
3147        assert!(line3.is_blank);
3148
3149        // Test helper methods
3150        assert_eq!(ctx.line_to_byte_offset(1), Some(0));
3151        assert_eq!(ctx.line_to_byte_offset(2), Some(8));
3152        assert_eq!(ctx.line_info(1).map(|l| l.indent), Some(0));
3153        assert_eq!(ctx.line_info(2).map(|l| l.indent), Some(4));
3154    }
3155
3156    #[test]
3157    fn test_list_item_detection() {
3158        let content = "- Unordered item\n  * Nested item\n1. Ordered item\n   2) Nested ordered\n\nNot a list";
3159        let ctx = LintContext::new(content, MarkdownFlavor::Standard);
3160
3161        // Line 1: "- Unordered item"
3162        let line1 = &ctx.lines[0];
3163        assert!(line1.list_item.is_some());
3164        let list1 = line1.list_item.as_ref().unwrap();
3165        assert_eq!(list1.marker, "-");
3166        assert!(!list1.is_ordered);
3167        assert_eq!(list1.marker_column, 0);
3168        assert_eq!(list1.content_column, 2);
3169
3170        // Line 2: "  * Nested item"
3171        let line2 = &ctx.lines[1];
3172        assert!(line2.list_item.is_some());
3173        let list2 = line2.list_item.as_ref().unwrap();
3174        assert_eq!(list2.marker, "*");
3175        assert_eq!(list2.marker_column, 2);
3176
3177        // Line 3: "1. Ordered item"
3178        let line3 = &ctx.lines[2];
3179        assert!(line3.list_item.is_some());
3180        let list3 = line3.list_item.as_ref().unwrap();
3181        assert_eq!(list3.marker, "1.");
3182        assert!(list3.is_ordered);
3183        assert_eq!(list3.number, Some(1));
3184
3185        // Line 6: "Not a list"
3186        let line6 = &ctx.lines[5];
3187        assert!(line6.list_item.is_none());
3188    }
3189
3190    #[test]
3191    fn test_offset_to_line_col_edge_cases() {
3192        let content = "a\nb\nc";
3193        let ctx = LintContext::new(content, MarkdownFlavor::Standard);
3194        // line_offsets: [0, 2, 4]
3195        assert_eq!(ctx.offset_to_line_col(0), (1, 1)); // 'a'
3196        assert_eq!(ctx.offset_to_line_col(1), (1, 2)); // after 'a'
3197        assert_eq!(ctx.offset_to_line_col(2), (2, 1)); // 'b'
3198        assert_eq!(ctx.offset_to_line_col(3), (2, 2)); // after 'b'
3199        assert_eq!(ctx.offset_to_line_col(4), (3, 1)); // 'c'
3200        assert_eq!(ctx.offset_to_line_col(5), (3, 2)); // after 'c'
3201    }
3202
3203    #[test]
3204    fn test_mdx_esm_blocks() {
3205        let content = r##"import {Chart} from './snowfall.js'
3206export const year = 2023
3207
3208# Last year's snowfall
3209
3210In {year}, the snowfall was above average.
3211It was followed by a warm spring which caused
3212flood conditions in many of the nearby rivers.
3213
3214<Chart color="#fcb32c" year={year} />
3215"##;
3216
3217        let ctx = LintContext::new(content, MarkdownFlavor::MDX);
3218
3219        // Check that lines 1 and 2 are marked as ESM blocks
3220        assert_eq!(ctx.lines.len(), 10);
3221        assert!(ctx.lines[0].in_esm_block, "Line 1 (import) should be in_esm_block");
3222        assert!(ctx.lines[1].in_esm_block, "Line 2 (export) should be in_esm_block");
3223        assert!(!ctx.lines[2].in_esm_block, "Line 3 (blank) should NOT be in_esm_block");
3224        assert!(
3225            !ctx.lines[3].in_esm_block,
3226            "Line 4 (heading) should NOT be in_esm_block"
3227        );
3228        assert!(!ctx.lines[4].in_esm_block, "Line 5 (blank) should NOT be in_esm_block");
3229        assert!(!ctx.lines[5].in_esm_block, "Line 6 (text) should NOT be in_esm_block");
3230    }
3231
3232    #[test]
3233    fn test_mdx_esm_blocks_not_detected_in_standard_flavor() {
3234        let content = r#"import {Chart} from './snowfall.js'
3235export const year = 2023
3236
3237# Last year's snowfall
3238"#;
3239
3240        let ctx = LintContext::new(content, MarkdownFlavor::Standard);
3241
3242        // ESM blocks should NOT be detected in Standard flavor
3243        assert!(
3244            !ctx.lines[0].in_esm_block,
3245            "Line 1 should NOT be in_esm_block in Standard flavor"
3246        );
3247        assert!(
3248            !ctx.lines[1].in_esm_block,
3249            "Line 2 should NOT be in_esm_block in Standard flavor"
3250        );
3251    }
3252}