lex-analysis 0.10.5

//! This is the semantic token collector, which editors use for syntax highlighting.
//! It's worth going over the general approach.
//!
//! Semantic Tokens and Editor Highlighting Architecture
//!
//!     1. LSP emits semantic tokens using our format's native terminology (e.g., `Verbatim`
//! Annotation, etc). The LSP declares a token legend at initialization and emits tokens as indices
//! into that legend—it has no knowledge of editor-specific theming.
//!     2. Editor plugins map our token types to the editor's theme primitives. This lets users
//! leverage their existing theme choices while our core LSP code remains editor-agnostic.
//!     
//! Editor-Specific Mapping
//!
//!     VSCode — declarative mapping in `package.json`:
//!         "semanticTokenScopes": [{
//!         "language": "ourformat",
//!         "scopes": {
//!         "Verbatim": ["markup.inline.raw"],
//!         "Heading": ["markup.heading"],
//!         "Emphasis": ["markup.italic"]
//!         }
//!         }]
//!     :: javascript
//!
//!     We map to TextMate scopes (`markup.*`) as they have broad theme support and are a natural
//! fit for markup.
//!
//!     Neovim — imperative mapping in the plugin:
//!         vim.api.nvim_set_hl(0, '@lsp.type.Verbatim', { link = '@markup.raw' })
//!         `vim.api.nvim_set_hl(0, '@lsp.type.Heading', { link = '@markup.heading' })
//!         vim.api.nvim_set_hl(0, '@lsp.type.Emphasis', { link = '@markup.italic' })
//!     :: lua
//!     We link to treesitter's `@markup.*` groups for equivalent theme coverage.
//!     Benefits:
//!         - LSP speaks our format's semantics—no impedance mismatch
//!         - Users get syntax highlighting that respects their theme
//!         - Mapping logic is isolated to editor plugins; adding a new editor doesn't touch the LSP
//!
//! The file editors/vscode/themes/lex-light.json has the recommended theming for Lex to be used in
//! tests and so forth.
use lex_core::lex::ast::inline_positions::{walk_text_content_positions, InlinePositionVisitor};
use lex_core::lex::ast::{
    Annotation, ContentItem, Definition, Document, List, ListItem, Paragraph, Range, Session,
    Table, TextContent, Verbatim,
};
use lex_core::lex::inlines::{ReferenceInline, ReferenceType};

#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum LexSemanticTokenKind {
    DocumentTitle,
    DocumentSubtitle,
    SessionMarker,
    SessionTitleText,
    DefinitionSubject,
    DefinitionContent,
    ListMarker,
    ListItemText,
    AnnotationLabel,
    AnnotationParameter,
    AnnotationContent,
    InlineStrong,
    InlineEmphasis,
    InlineCode,
    InlineMath,
    Reference,
    ReferenceCitation,
    ReferenceFootnote,
    ReferenceAnnotation,
    VerbatimSubject,
    DataLabel,
    DataParameter,
    VerbatimContent,
    InlineMarkerStrongStart,
    InlineMarkerStrongEnd,
    InlineMarkerEmphasisStart,
    InlineMarkerEmphasisEnd,
    InlineMarkerCodeStart,
    InlineMarkerCodeEnd,
    InlineMarkerMathStart,
    InlineMarkerMathEnd,
    InlineMarkerRefStart,
    InlineMarkerRefEnd,
}

impl LexSemanticTokenKind {
    /// Returns the semantic token type string for LSP.
    ///
    /// These token type names are mapped to standard TextMate scopes in editor configurations
    /// to ensure compatibility with existing themes (Neovim, VSCode, etc.).
    ///
    /// Mapping rationale (based on Lex↔Markdown mapping from lex-babel):
    /// - Session → Heading → maps to "markup.heading"
    /// - Definition → Term: Desc → maps to "variable.other.definition"
    /// - InlineStrong → bold → maps to "markup.bold"
    /// - InlineEmphasis → *italic* → maps to "markup.italic"
    /// - InlineCode → `code` → maps to "markup.inline.raw"
    /// - InlineMath → $math$ → maps to "constant.numeric"
    /// - Reference → \[citation\] → maps to "markup.underline.link"
    /// - Verbatim → ```block``` → maps to "markup.raw.block"
    /// - Annotation → <!-- comment --> → maps to "comment.block"
    /// - ListMarker → - or 1. → maps to "punctuation.definition.list"
    pub fn as_str(self) -> &'static str {
        match self {
            LexSemanticTokenKind::DocumentTitle => "DocumentTitle",
            LexSemanticTokenKind::DocumentSubtitle => "DocumentSubtitle",
            LexSemanticTokenKind::SessionMarker => "SessionMarker",
            LexSemanticTokenKind::SessionTitleText => "SessionTitleText",
            LexSemanticTokenKind::DefinitionSubject => "DefinitionSubject",
            LexSemanticTokenKind::DefinitionContent => "DefinitionContent",
            LexSemanticTokenKind::ListMarker => "ListMarker",
            LexSemanticTokenKind::ListItemText => "ListItemText",
            LexSemanticTokenKind::AnnotationLabel => "AnnotationLabel",
            LexSemanticTokenKind::AnnotationParameter => "AnnotationParameter",
            LexSemanticTokenKind::AnnotationContent => "AnnotationContent",
            LexSemanticTokenKind::InlineStrong => "InlineStrong",
            LexSemanticTokenKind::InlineEmphasis => "InlineEmphasis",
            LexSemanticTokenKind::InlineCode => "InlineCode",
            LexSemanticTokenKind::InlineMath => "InlineMath",
            LexSemanticTokenKind::Reference => "Reference",
            LexSemanticTokenKind::ReferenceCitation => "ReferenceCitation",
            LexSemanticTokenKind::ReferenceFootnote => "ReferenceFootnote",
            LexSemanticTokenKind::ReferenceAnnotation => "ReferenceAnnotation",
            LexSemanticTokenKind::VerbatimSubject => "VerbatimSubject",
            LexSemanticTokenKind::DataLabel => "DataLabel",
            LexSemanticTokenKind::DataParameter => "DataParameter",
            LexSemanticTokenKind::VerbatimContent => "VerbatimContent",
            LexSemanticTokenKind::InlineMarkerStrongStart => "InlineMarker_strong_start",
            LexSemanticTokenKind::InlineMarkerStrongEnd => "InlineMarker_strong_end",
            LexSemanticTokenKind::InlineMarkerEmphasisStart => "InlineMarker_emphasis_start",
            LexSemanticTokenKind::InlineMarkerEmphasisEnd => "InlineMarker_emphasis_end",
            LexSemanticTokenKind::InlineMarkerCodeStart => "InlineMarker_code_start",
            LexSemanticTokenKind::InlineMarkerCodeEnd => "InlineMarker_code_end",
            LexSemanticTokenKind::InlineMarkerMathStart => "InlineMarker_math_start",
            LexSemanticTokenKind::InlineMarkerMathEnd => "InlineMarker_math_end",
            LexSemanticTokenKind::InlineMarkerRefStart => "InlineMarker_ref_start",
            LexSemanticTokenKind::InlineMarkerRefEnd => "InlineMarker_ref_end",
        }
    }
}

pub const SEMANTIC_TOKEN_KINDS: &[LexSemanticTokenKind] = &[
    LexSemanticTokenKind::DocumentTitle,
    LexSemanticTokenKind::DocumentSubtitle,
    LexSemanticTokenKind::SessionMarker,
    LexSemanticTokenKind::SessionTitleText,
    LexSemanticTokenKind::DefinitionSubject,
    LexSemanticTokenKind::DefinitionContent,
    LexSemanticTokenKind::ListMarker,
    LexSemanticTokenKind::ListItemText,
    LexSemanticTokenKind::AnnotationLabel,
    LexSemanticTokenKind::AnnotationParameter,
    LexSemanticTokenKind::AnnotationContent,
    LexSemanticTokenKind::InlineStrong,
    LexSemanticTokenKind::InlineEmphasis,
    LexSemanticTokenKind::InlineCode,
    LexSemanticTokenKind::InlineMath,
    LexSemanticTokenKind::Reference,
    LexSemanticTokenKind::ReferenceCitation,
    LexSemanticTokenKind::ReferenceFootnote,
    LexSemanticTokenKind::VerbatimSubject,
    LexSemanticTokenKind::DataLabel,
    LexSemanticTokenKind::DataParameter,
    LexSemanticTokenKind::VerbatimContent,
    LexSemanticTokenKind::InlineMarkerStrongStart,
    LexSemanticTokenKind::InlineMarkerStrongEnd,
    LexSemanticTokenKind::InlineMarkerEmphasisStart,
    LexSemanticTokenKind::InlineMarkerEmphasisEnd,
    LexSemanticTokenKind::InlineMarkerCodeStart,
    LexSemanticTokenKind::InlineMarkerCodeEnd,
    LexSemanticTokenKind::InlineMarkerMathStart,
    LexSemanticTokenKind::InlineMarkerMathEnd,
    LexSemanticTokenKind::InlineMarkerRefStart,
    LexSemanticTokenKind::InlineMarkerRefEnd,
    LexSemanticTokenKind::ReferenceAnnotation,
];

#[derive(Debug, Clone, PartialEq)]
pub struct LexSemanticToken {
    pub kind: LexSemanticTokenKind,
    pub range: Range,
}

pub fn collect_semantic_tokens(document: &Document) -> Vec<LexSemanticToken> {
    let mut collector = TokenCollector::new();
    collector.process_document(document);
    collector.finish()
}

struct TokenCollector {
    tokens: Vec<LexSemanticToken>,
    in_annotation: bool,
    in_definition: bool,
}

impl TokenCollector {
    fn new() -> Self {
        Self {
            tokens: Vec::new(),
            in_annotation: false,
            in_definition: false,
        }
    }

    fn finish(mut self) -> Vec<LexSemanticToken> {
        self.tokens.sort_by(|a, b| {
            let a_start = (
                &a.range.start.line,
                &a.range.start.column,
                &a.range.end.line,
                &a.range.end.column,
            );
            let b_start = (
                &b.range.start.line,
                &b.range.start.column,
                &b.range.end.line,
                &b.range.end.column,
            );
            a_start.cmp(&b_start)
        });
        self.tokens
    }

    fn push_range(&mut self, range: &Range, kind: LexSemanticTokenKind) {
        if range.span.start < range.span.end {
            self.tokens.push(LexSemanticToken {
                kind,
                range: range.clone(),
            });
        }
    }

    fn process_document(&mut self, document: &Document) {
        self.process_annotations(document.annotations());
        if let Some(title) = &document.title {
            if let Some(title_loc) = &title.content.location {
                self.push_range(title_loc, LexSemanticTokenKind::DocumentTitle);
            } else {
                self.push_range(&title.location, LexSemanticTokenKind::DocumentTitle);
            }
            self.process_text_content(&title.content);
            if let Some(subtitle) = &title.subtitle {
                if let Some(sub_loc) = &subtitle.location {
                    self.push_range(sub_loc, LexSemanticTokenKind::DocumentSubtitle);
                }
                self.process_text_content(subtitle);
            }
        }
        self.process_session(&document.root, LexSemanticTokenKind::SessionTitleText);
    }

    fn process_session(&mut self, session: &Session, title_kind: LexSemanticTokenKind) {
        // Emit separate tokens for marker and title text
        if let Some(marker) = &session.marker {
            // Emit SessionMarker token for the sequence marker
            self.push_range(&marker.location, LexSemanticTokenKind::SessionMarker);
        }

        // Emit SessionTitleText token for the title text (without marker)
        // Create a range for the title text by using the full title location
        // and adjusting if there's a marker
        if let Some(header) = session.header_location() {
            if let Some(marker) = &session.marker {
                // Calculate the title text range (after the marker)
                let marker_text = marker.as_str();
                let full_title = session.full_title();

                // Find where the marker ends in the title
                if let Some(pos) = full_title.find(marker_text) {
                    let marker_end = pos + marker_text.len();
                    // Skip whitespace after marker
                    let title_start = full_title[marker_end..]
                        .chars()
                        .position(|c| !c.is_whitespace())
                        .map(|p| marker_end + p)
                        .unwrap_or(marker_end);

                    if title_start < full_title.len() {
                        // Create range for title text only
                        use lex_core::lex::ast::Position;
                        let title_text_range = Range::new(
                            header.span.start + title_start..header.span.end,
                            Position::new(header.start.line, header.start.column + title_start),
                            header.end,
                        );
                        self.push_range(&title_text_range, title_kind);
                    }
                }
            } else {
                // No marker, the entire header is title text
                self.push_range(header, title_kind);
            }
        }

        self.process_text_content(&session.title);

        self.process_annotations(session.annotations());
        for child in session.children.iter() {
            self.process_content_item(child);
        }
    }

    fn process_content_item(&mut self, item: &ContentItem) {
        match item {
            ContentItem::Paragraph(paragraph) => self.process_paragraph(paragraph),
            ContentItem::Session(session) => {
                self.process_session(session, LexSemanticTokenKind::SessionTitleText)
            }
            ContentItem::List(list) => self.process_list(list),
            ContentItem::ListItem(list_item) => self.process_list_item(list_item),
            ContentItem::Definition(definition) => self.process_definition(definition),
            ContentItem::Annotation(annotation) => self.process_annotation(annotation),
            ContentItem::VerbatimBlock(verbatim) => self.process_verbatim(verbatim),
            ContentItem::Table(table) => self.process_table(table),
            ContentItem::TextLine(text_line) => self.process_text_content(&text_line.content),
            ContentItem::VerbatimLine(_) => {}
            ContentItem::BlankLineGroup(_) => {}
        }
    }

    fn process_paragraph(&mut self, paragraph: &Paragraph) {
        for line in &paragraph.lines {
            if let ContentItem::TextLine(text_line) = line {
                // Don't emit full-line tokens for DefinitionContent or AnnotationContent
                // as they overlap with inline tokens. The context is already clear from
                // the DefinitionSubject and AnnotationLabel tokens.
                self.process_text_content(&text_line.content);
            }
        }
        self.process_annotations(paragraph.annotations());
    }

    fn process_list(&mut self, list: &List) {
        self.process_annotations(list.annotations());
        for item in list.items.iter() {
            if let ContentItem::ListItem(list_item) = item {
                self.process_list_item(list_item);
            }
        }
    }

    fn process_list_item(&mut self, list_item: &ListItem) {
        if let Some(marker_range) = &list_item.marker.location {
            self.push_range(marker_range, LexSemanticTokenKind::ListMarker);
        }
        for text in &list_item.text {
            if let Some(location) = &text.location {
                self.push_range(location, LexSemanticTokenKind::ListItemText);
            }
            self.process_text_content(text);
        }
        self.process_annotations(list_item.annotations());
        for child in list_item.children.iter() {
            self.process_content_item(child);
        }
    }

    fn process_definition(&mut self, definition: &Definition) {
        if let Some(header) = definition.header_location() {
            self.push_range(header, LexSemanticTokenKind::DefinitionSubject);
        }
        self.process_text_content(&definition.subject);
        self.process_annotations(definition.annotations());
        let was_in_definition = self.in_definition;
        self.in_definition = true;
        for child in definition.children.iter() {
            self.process_content_item(child);
        }
        self.in_definition = was_in_definition;
    }

    fn process_verbatim(&mut self, verbatim: &Verbatim) {
        for group in verbatim.group() {
            self.process_text_content(group.subject);
            if let Some(location) = &group.subject.location {
                self.push_range(location, LexSemanticTokenKind::VerbatimSubject);
            }
            for child in group.children {
                if let ContentItem::VerbatimLine(line) = child {
                    self.push_range(&line.location, LexSemanticTokenKind::VerbatimContent);
                }
            }
        }

        self.push_range(
            &verbatim.closing_data.label.location,
            LexSemanticTokenKind::DataLabel,
        );
        for parameter in &verbatim.closing_data.parameters {
            self.push_range(&parameter.location, LexSemanticTokenKind::DataParameter);
        }

        self.process_annotations(verbatim.annotations());
    }

    fn process_table(&mut self, table: &Table) {
        self.process_text_content(&table.subject);
        if let Some(location) = &table.subject.location {
            self.push_range(location, LexSemanticTokenKind::VerbatimSubject);
        }

        // Process cell content: inline text and block children
        for row in table.all_rows() {
            for cell in &row.cells {
                self.process_text_content(&cell.content);
                for child in cell.children.iter() {
                    self.process_content_item(child);
                }
            }
        }

        // Table config annotations are in table.annotations — processed below
        // by process_annotations()

        self.process_annotations(table.annotations());
    }

    fn process_annotation(&mut self, annotation: &Annotation) {
        self.push_range(
            annotation.header_location(),
            LexSemanticTokenKind::AnnotationLabel,
        );
        for parameter in &annotation.data.parameters {
            self.push_range(
                &parameter.location,
                LexSemanticTokenKind::AnnotationParameter,
            );
        }
        let was_in_annotation = self.in_annotation;
        self.in_annotation = true;
        for child in annotation.children.iter() {
            self.process_content_item(child);
        }
        self.in_annotation = was_in_annotation;
    }

    fn process_annotations(&mut self, annotations: &[Annotation]) {
        for annotation in annotations {
            self.process_annotation(annotation);
        }
    }

    fn process_text_content(&mut self, text: &TextContent) {
        let mut emitter = InlineTokenEmitter {
            tokens: &mut self.tokens,
            in_annotation: self.in_annotation,
            in_definition: self.in_definition,
            in_formatted: 0,
        };
        walk_text_content_positions(text, &mut emitter);
    }
}

/// Visitor that converts inline-tree positions into [`LexSemanticToken`]s.
///
/// Bridges the shared inline-position walker (which only tracks structural
/// ranges) with the editor-facing semantic-token taxonomy. Marker tokens
/// (`InlineMarker_*_start`/`_end`) are emitted on every container/literal
/// boundary; content tokens take the appropriate kind for the surrounding
/// element, including reference-type discrimination
/// (`ReferenceCitation`/`ReferenceFootnote`/`ReferenceAnnotation`/
/// `Reference`). Plain text inside a Strong/Emphasis container is suppressed
/// (`in_formatted > 0`) because the container's content span already covers
/// it.
struct InlineTokenEmitter<'a> {
    tokens: &'a mut Vec<LexSemanticToken>,
    in_annotation: bool,
    in_definition: bool,
    in_formatted: usize,
}

impl<'a> InlineTokenEmitter<'a> {
    fn push(&mut self, range: &Range, kind: LexSemanticTokenKind) {
        if range.span.start < range.span.end {
            self.tokens.push(LexSemanticToken {
                kind,
                range: range.clone(),
            });
        }
    }
}

impl<'a> InlinePositionVisitor for InlineTokenEmitter<'a> {
    fn visit_plain(&mut self, range: &Range, _text: &str) {
        if self.in_formatted > 0 {
            // Covered by the enclosing container's content span — see leave_strong/leave_emphasis.
            return;
        }
        let kind = if self.in_annotation {
            LexSemanticTokenKind::AnnotationContent
        } else if self.in_definition {
            LexSemanticTokenKind::DefinitionContent
        } else {
            return;
        };
        self.push(range, kind);
    }

    fn enter_strong(&mut self, open_marker: &Range) {
        self.push(open_marker, LexSemanticTokenKind::InlineMarkerStrongStart);
        self.in_formatted += 1;
    }

    fn leave_strong(&mut self, content: &Range, close_marker: &Range) {
        self.in_formatted -= 1;
        self.push(content, LexSemanticTokenKind::InlineStrong);
        self.push(close_marker, LexSemanticTokenKind::InlineMarkerStrongEnd);
    }

    fn enter_emphasis(&mut self, open_marker: &Range) {
        self.push(open_marker, LexSemanticTokenKind::InlineMarkerEmphasisStart);
        self.in_formatted += 1;
    }

    fn leave_emphasis(&mut self, content: &Range, close_marker: &Range) {
        self.in_formatted -= 1;
        self.push(content, LexSemanticTokenKind::InlineEmphasis);
        self.push(close_marker, LexSemanticTokenKind::InlineMarkerEmphasisEnd);
    }

    fn visit_code(
        &mut self,
        open_marker: &Range,
        content: &Range,
        close_marker: &Range,
        _text: &str,
    ) {
        self.push(open_marker, LexSemanticTokenKind::InlineMarkerCodeStart);
        self.push(content, LexSemanticTokenKind::InlineCode);
        self.push(close_marker, LexSemanticTokenKind::InlineMarkerCodeEnd);
    }

    fn visit_math(
        &mut self,
        open_marker: &Range,
        content: &Range,
        close_marker: &Range,
        _text: &str,
    ) {
        self.push(open_marker, LexSemanticTokenKind::InlineMarkerMathStart);
        self.push(content, LexSemanticTokenKind::InlineMath);
        self.push(close_marker, LexSemanticTokenKind::InlineMarkerMathEnd);
    }

    fn visit_reference(
        &mut self,
        open_marker: &Range,
        content: &Range,
        close_marker: &Range,
        data: &ReferenceInline,
    ) {
        self.push(open_marker, LexSemanticTokenKind::InlineMarkerRefStart);
        let ref_kind = match &data.reference_type {
            ReferenceType::Citation(_) => LexSemanticTokenKind::ReferenceCitation,
            ReferenceType::FootnoteNumber { .. } => LexSemanticTokenKind::ReferenceFootnote,
            ReferenceType::AnnotationReference { .. } => LexSemanticTokenKind::ReferenceAnnotation,
            _ => LexSemanticTokenKind::Reference,
        };
        self.push(content, ref_kind);
        self.push(close_marker, LexSemanticTokenKind::InlineMarkerRefEnd);
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::test_support::{sample_document, sample_source};
    use lex_core::lex::testing::lexplore::Lexplore;

    fn snippets(
        tokens: &[LexSemanticToken],
        kind: LexSemanticTokenKind,
        source: &str,
    ) -> Vec<String> {
        tokens
            .iter()
            .filter(|token| token.kind == kind)
            .map(|token| source[token.range.span.clone()].to_string())
            .collect()
    }

    #[test]
    fn collects_structural_tokens() {
        let document = sample_document();
        let tokens = collect_semantic_tokens(&document);
        let source = sample_source();

        // Session titles are now split into SessionMarker and SessionTitleText
        assert!(
            snippets(&tokens, LexSemanticTokenKind::SessionMarker, source)
                .iter()
                .any(|snippet| snippet.trim() == "1.")
        );
        assert!(
            snippets(&tokens, LexSemanticTokenKind::SessionTitleText, source)
                .iter()
                .any(|snippet| snippet.trim() == "Intro")
        );
        // Cache is parsed as VerbatimSubject
        assert!(
            snippets(&tokens, LexSemanticTokenKind::VerbatimSubject, source)
                .iter()
                .any(|snippet| snippet.trim_end() == "Cache")
        );
        let markers = snippets(&tokens, LexSemanticTokenKind::ListMarker, source);
        assert_eq!(markers.len(), 4);
        assert!(markers
            .iter()
            .all(|snippet| snippet.trim_start().starts_with('-')
                || snippet.trim_start().chars().next().unwrap().is_numeric()));
        let annotation_labels = snippets(&tokens, LexSemanticTokenKind::AnnotationLabel, source);
        assert!(annotation_labels
            .iter()
            .any(|snippet| snippet.contains("doc.note")));
        let parameters = snippets(&tokens, LexSemanticTokenKind::AnnotationParameter, source);
        assert!(parameters
            .iter()
            .any(|snippet| snippet.contains("severity=info")));
        let verbatim_subjects = snippets(&tokens, LexSemanticTokenKind::VerbatimSubject, source);
        assert!(verbatim_subjects
            .iter()
            .any(|snippet| snippet.contains("CLI Example")));
        assert!(snippets(&tokens, LexSemanticTokenKind::DataLabel, source)
            .iter()
            .any(|snippet| snippet.contains("shell")));
    }

    #[test]
    fn collects_inline_tokens() {
        let document = sample_document();
        let tokens = collect_semantic_tokens(&document);
        let source = sample_source();
        assert!(
            snippets(&tokens, LexSemanticTokenKind::InlineStrong, source)
                .iter()
                .any(|snippet| snippet.contains("Lex"))
        );
        assert!(
            snippets(&tokens, LexSemanticTokenKind::InlineEmphasis, source)
                .iter()
                .any(|snippet| snippet.contains("format"))
        );
        assert!(snippets(&tokens, LexSemanticTokenKind::InlineCode, source)
            .iter()
            .any(|snippet| snippet.contains("code")));
        assert!(snippets(&tokens, LexSemanticTokenKind::InlineMath, source)
            .iter()
            .any(|snippet| snippet.contains("math")));
    }

    #[test]
    fn classifies_references() {
        let document = sample_document();
        let tokens = collect_semantic_tokens(&document);
        let source = sample_source();
        assert!(
            snippets(&tokens, LexSemanticTokenKind::ReferenceCitation, source)
                .iter()
                .any(|snippet| snippet.contains("@spec2025"))
        );
        assert!(
            snippets(&tokens, LexSemanticTokenKind::ReferenceAnnotation, source)
                .iter()
                .any(|snippet| snippet.contains("::source"))
        );
        assert!(
            snippets(&tokens, LexSemanticTokenKind::ReferenceFootnote, source)
                .iter()
                .any(|snippet| snippet.contains("1"))
        );
        assert!(snippets(&tokens, LexSemanticTokenKind::Reference, source)
            .iter()
            .any(|snippet| snippet.contains("Cache")));
    }

    #[test]
    fn empty_document_has_no_tokens() {
        let document = Lexplore::benchmark(0)
            .parse()
            .expect("failed to parse empty benchmark fixture");
        let tokens = collect_semantic_tokens(&document);
        assert!(tokens.is_empty());
    }

    #[test]
    fn emits_annotation_content_for_inline_annotation() {
        let document = sample_document();
        let tokens = collect_semantic_tokens(&document);
        let source = sample_source();

        // The fixture starts with `:: doc.note severity=info :: Document preface.`
        // "Document preface." is inline annotation content — plain text inside annotation context.
        let annotation_content = snippets(&tokens, LexSemanticTokenKind::AnnotationContent, source);
        assert!(
            annotation_content
                .iter()
                .any(|snippet| snippet.contains("Document preface")),
            "AnnotationContent should be emitted for plain text inside annotations, got: {annotation_content:?}"
        );
    }

    #[test]
    fn annotation_content_excludes_formatted_text() {
        // Inline formatting within annotation context should get its own token type,
        // not AnnotationContent — only Plain nodes emit AnnotationContent.
        let source = ":: note :: Some *bold* text.\n";
        let document = lex_core::lex::parsing::parse_document(source).expect("failed to parse");
        let tokens = collect_semantic_tokens(&document);

        let annotation_content: Vec<_> = tokens
            .iter()
            .filter(|t| t.kind == LexSemanticTokenKind::AnnotationContent)
            .map(|t| &source[t.range.span.clone()])
            .collect();

        // "Some " and " text." should be AnnotationContent, but "bold" should not
        assert!(
            annotation_content.iter().any(|s| s.contains("Some")),
            "Plain text before formatting should be AnnotationContent"
        );
        assert!(
            annotation_content.iter().any(|s| s.contains("text.")),
            "Plain text after formatting should be AnnotationContent"
        );
        assert!(
            !annotation_content.iter().any(|s| s.contains("bold")),
            "Formatted text should NOT be AnnotationContent"
        );

        // "bold" should be InlineStrong
        let strong: Vec<_> = tokens
            .iter()
            .filter(|t| t.kind == LexSemanticTokenKind::InlineStrong)
            .map(|t| &source[t.range.span.clone()])
            .collect();
        assert!(strong.contains(&"bold"));
    }

    #[test]
    fn table_cell_inline_formatting_gets_tokens() {
        let source = "Stats:\n    | *Name* | `code` |\n    | _test_ | #42#   |\n";
        let document = lex_core::lex::parsing::parse_document(source).expect("failed to parse");
        let tokens = collect_semantic_tokens(&document);

        let strong = snippets(&tokens, LexSemanticTokenKind::InlineStrong, source);
        assert!(
            strong.iter().any(|s| s.contains("Name")),
            "Expected InlineStrong for *Name* in table cell, got: {strong:?}"
        );

        let code = snippets(&tokens, LexSemanticTokenKind::InlineCode, source);
        assert!(
            code.iter().any(|s| s.contains("code")),
            "Expected InlineCode for `code` in table cell, got: {code:?}"
        );

        let emphasis = snippets(&tokens, LexSemanticTokenKind::InlineEmphasis, source);
        assert!(
            emphasis.iter().any(|s| s.contains("test")),
            "Expected InlineEmphasis for _test_ in table cell, got: {emphasis:?}"
        );

        let math = snippets(&tokens, LexSemanticTokenKind::InlineMath, source);
        assert!(
            math.iter().any(|s| s.contains("42")),
            "Expected InlineMath for #42# in table cell, got: {math:?}"
        );
    }
}