1use lex_core::lex::ast::{
42 Annotation, ContentItem, Definition, Document, List, ListItem, Paragraph, Position, Range,
43 Session, Table, TextContent, Verbatim,
44};
45use lex_core::lex::inlines::{InlineNode, ReferenceType};
46
47#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
48pub enum LexSemanticTokenKind {
49 DocumentTitle,
50 DocumentSubtitle,
51 SessionMarker,
52 SessionTitleText,
53 DefinitionSubject,
54 DefinitionContent,
55 ListMarker,
56 ListItemText,
57 AnnotationLabel,
58 AnnotationParameter,
59 AnnotationContent,
60 InlineStrong,
61 InlineEmphasis,
62 InlineCode,
63 InlineMath,
64 Reference,
65 ReferenceCitation,
66 ReferenceFootnote,
67 VerbatimSubject,
68 DataLabel,
69 DataParameter,
70 VerbatimContent,
71 InlineMarkerStrongStart,
72 InlineMarkerStrongEnd,
73 InlineMarkerEmphasisStart,
74 InlineMarkerEmphasisEnd,
75 InlineMarkerCodeStart,
76 InlineMarkerCodeEnd,
77 InlineMarkerMathStart,
78 InlineMarkerMathEnd,
79 InlineMarkerRefStart,
80 InlineMarkerRefEnd,
81}
82
83impl LexSemanticTokenKind {
84 pub fn as_str(self) -> &'static str {
101 match self {
102 LexSemanticTokenKind::DocumentTitle => "DocumentTitle",
103 LexSemanticTokenKind::DocumentSubtitle => "DocumentSubtitle",
104 LexSemanticTokenKind::SessionMarker => "SessionMarker",
105 LexSemanticTokenKind::SessionTitleText => "SessionTitleText",
106 LexSemanticTokenKind::DefinitionSubject => "DefinitionSubject",
107 LexSemanticTokenKind::DefinitionContent => "DefinitionContent",
108 LexSemanticTokenKind::ListMarker => "ListMarker",
109 LexSemanticTokenKind::ListItemText => "ListItemText",
110 LexSemanticTokenKind::AnnotationLabel => "AnnotationLabel",
111 LexSemanticTokenKind::AnnotationParameter => "AnnotationParameter",
112 LexSemanticTokenKind::AnnotationContent => "AnnotationContent",
113 LexSemanticTokenKind::InlineStrong => "InlineStrong",
114 LexSemanticTokenKind::InlineEmphasis => "InlineEmphasis",
115 LexSemanticTokenKind::InlineCode => "InlineCode",
116 LexSemanticTokenKind::InlineMath => "InlineMath",
117 LexSemanticTokenKind::Reference => "Reference",
118 LexSemanticTokenKind::ReferenceCitation => "ReferenceCitation",
119 LexSemanticTokenKind::ReferenceFootnote => "ReferenceFootnote",
120 LexSemanticTokenKind::VerbatimSubject => "VerbatimSubject",
121 LexSemanticTokenKind::DataLabel => "DataLabel",
122 LexSemanticTokenKind::DataParameter => "DataParameter",
123 LexSemanticTokenKind::VerbatimContent => "VerbatimContent",
124 LexSemanticTokenKind::InlineMarkerStrongStart => "InlineMarker_strong_start",
125 LexSemanticTokenKind::InlineMarkerStrongEnd => "InlineMarker_strong_end",
126 LexSemanticTokenKind::InlineMarkerEmphasisStart => "InlineMarker_emphasis_start",
127 LexSemanticTokenKind::InlineMarkerEmphasisEnd => "InlineMarker_emphasis_end",
128 LexSemanticTokenKind::InlineMarkerCodeStart => "InlineMarker_code_start",
129 LexSemanticTokenKind::InlineMarkerCodeEnd => "InlineMarker_code_end",
130 LexSemanticTokenKind::InlineMarkerMathStart => "InlineMarker_math_start",
131 LexSemanticTokenKind::InlineMarkerMathEnd => "InlineMarker_math_end",
132 LexSemanticTokenKind::InlineMarkerRefStart => "InlineMarker_ref_start",
133 LexSemanticTokenKind::InlineMarkerRefEnd => "InlineMarker_ref_end",
134 }
135 }
136}
137
138pub const SEMANTIC_TOKEN_KINDS: &[LexSemanticTokenKind] = &[
139 LexSemanticTokenKind::DocumentTitle,
140 LexSemanticTokenKind::DocumentSubtitle,
141 LexSemanticTokenKind::SessionMarker,
142 LexSemanticTokenKind::SessionTitleText,
143 LexSemanticTokenKind::DefinitionSubject,
144 LexSemanticTokenKind::DefinitionContent,
145 LexSemanticTokenKind::ListMarker,
146 LexSemanticTokenKind::ListItemText,
147 LexSemanticTokenKind::AnnotationLabel,
148 LexSemanticTokenKind::AnnotationParameter,
149 LexSemanticTokenKind::AnnotationContent,
150 LexSemanticTokenKind::InlineStrong,
151 LexSemanticTokenKind::InlineEmphasis,
152 LexSemanticTokenKind::InlineCode,
153 LexSemanticTokenKind::InlineMath,
154 LexSemanticTokenKind::Reference,
155 LexSemanticTokenKind::ReferenceCitation,
156 LexSemanticTokenKind::ReferenceFootnote,
157 LexSemanticTokenKind::VerbatimSubject,
158 LexSemanticTokenKind::DataLabel,
159 LexSemanticTokenKind::DataParameter,
160 LexSemanticTokenKind::VerbatimContent,
161 LexSemanticTokenKind::InlineMarkerStrongStart,
162 LexSemanticTokenKind::InlineMarkerStrongEnd,
163 LexSemanticTokenKind::InlineMarkerEmphasisStart,
164 LexSemanticTokenKind::InlineMarkerEmphasisEnd,
165 LexSemanticTokenKind::InlineMarkerCodeStart,
166 LexSemanticTokenKind::InlineMarkerCodeEnd,
167 LexSemanticTokenKind::InlineMarkerMathStart,
168 LexSemanticTokenKind::InlineMarkerMathEnd,
169 LexSemanticTokenKind::InlineMarkerRefStart,
170 LexSemanticTokenKind::InlineMarkerRefEnd,
171];
172
173#[derive(Debug, Clone, PartialEq)]
174pub struct LexSemanticToken {
175 pub kind: LexSemanticTokenKind,
176 pub range: Range,
177}
178
179pub fn collect_semantic_tokens(document: &Document) -> Vec<LexSemanticToken> {
180 let mut collector = TokenCollector::new();
181 collector.process_document(document);
182 collector.finish()
183}
184
185struct TokenCollector {
186 tokens: Vec<LexSemanticToken>,
187 in_annotation: bool,
188 in_definition: bool,
189}
190
191impl TokenCollector {
192 fn new() -> Self {
193 Self {
194 tokens: Vec::new(),
195 in_annotation: false,
196 in_definition: false,
197 }
198 }
199
200 fn finish(mut self) -> Vec<LexSemanticToken> {
201 self.tokens.sort_by(|a, b| {
202 let a_start = (
203 &a.range.start.line,
204 &a.range.start.column,
205 &a.range.end.line,
206 &a.range.end.column,
207 );
208 let b_start = (
209 &b.range.start.line,
210 &b.range.start.column,
211 &b.range.end.line,
212 &b.range.end.column,
213 );
214 a_start.cmp(&b_start)
215 });
216 self.tokens
217 }
218
219 fn push_range(&mut self, range: &Range, kind: LexSemanticTokenKind) {
220 if range.span.start < range.span.end {
221 self.tokens.push(LexSemanticToken {
222 kind,
223 range: range.clone(),
224 });
225 }
226 }
227
228 fn process_document(&mut self, document: &Document) {
229 self.process_annotations(document.annotations());
230 if let Some(title) = &document.title {
231 if let Some(title_loc) = &title.content.location {
232 self.push_range(title_loc, LexSemanticTokenKind::DocumentTitle);
233 } else {
234 self.push_range(&title.location, LexSemanticTokenKind::DocumentTitle);
235 }
236 self.process_text_content(&title.content);
237 if let Some(subtitle) = &title.subtitle {
238 if let Some(sub_loc) = &subtitle.location {
239 self.push_range(sub_loc, LexSemanticTokenKind::DocumentSubtitle);
240 }
241 self.process_text_content(subtitle);
242 }
243 }
244 self.process_session(&document.root, LexSemanticTokenKind::SessionTitleText);
245 }
246
247 fn process_session(&mut self, session: &Session, title_kind: LexSemanticTokenKind) {
248 if let Some(marker) = &session.marker {
250 self.push_range(&marker.location, LexSemanticTokenKind::SessionMarker);
252 }
253
254 if let Some(header) = session.header_location() {
258 if let Some(marker) = &session.marker {
259 let marker_text = marker.as_str();
261 let full_title = session.full_title();
262
263 if let Some(pos) = full_title.find(marker_text) {
265 let marker_end = pos + marker_text.len();
266 let title_start = full_title[marker_end..]
268 .chars()
269 .position(|c| !c.is_whitespace())
270 .map(|p| marker_end + p)
271 .unwrap_or(marker_end);
272
273 if title_start < full_title.len() {
274 use lex_core::lex::ast::Position;
276 let title_text_range = Range::new(
277 header.span.start + title_start..header.span.end,
278 Position::new(header.start.line, header.start.column + title_start),
279 header.end,
280 );
281 self.push_range(&title_text_range, title_kind);
282 }
283 }
284 } else {
285 self.push_range(header, title_kind);
287 }
288 }
289
290 self.process_text_content(&session.title);
291
292 self.process_annotations(session.annotations());
293 for child in session.children.iter() {
294 self.process_content_item(child);
295 }
296 }
297
298 fn process_content_item(&mut self, item: &ContentItem) {
299 match item {
300 ContentItem::Paragraph(paragraph) => self.process_paragraph(paragraph),
301 ContentItem::Session(session) => {
302 self.process_session(session, LexSemanticTokenKind::SessionTitleText)
303 }
304 ContentItem::List(list) => self.process_list(list),
305 ContentItem::ListItem(list_item) => self.process_list_item(list_item),
306 ContentItem::Definition(definition) => self.process_definition(definition),
307 ContentItem::Annotation(annotation) => self.process_annotation(annotation),
308 ContentItem::VerbatimBlock(verbatim) => self.process_verbatim(verbatim),
309 ContentItem::Table(table) => self.process_table(table),
310 ContentItem::TextLine(text_line) => self.process_text_content(&text_line.content),
311 ContentItem::VerbatimLine(_) => {}
312 ContentItem::BlankLineGroup(_) => {}
313 }
314 }
315
316 fn process_paragraph(&mut self, paragraph: &Paragraph) {
317 for line in ¶graph.lines {
318 if let ContentItem::TextLine(text_line) = line {
319 self.process_text_content(&text_line.content);
323 }
324 }
325 self.process_annotations(paragraph.annotations());
326 }
327
328 fn process_list(&mut self, list: &List) {
329 self.process_annotations(list.annotations());
330 for item in list.items.iter() {
331 if let ContentItem::ListItem(list_item) = item {
332 self.process_list_item(list_item);
333 }
334 }
335 }
336
337 fn process_list_item(&mut self, list_item: &ListItem) {
338 if let Some(marker_range) = &list_item.marker.location {
339 self.push_range(marker_range, LexSemanticTokenKind::ListMarker);
340 }
341 for text in &list_item.text {
342 if let Some(location) = &text.location {
343 self.push_range(location, LexSemanticTokenKind::ListItemText);
344 }
345 self.process_text_content(text);
346 }
347 self.process_annotations(list_item.annotations());
348 for child in list_item.children.iter() {
349 self.process_content_item(child);
350 }
351 }
352
353 fn process_definition(&mut self, definition: &Definition) {
354 if let Some(header) = definition.header_location() {
355 self.push_range(header, LexSemanticTokenKind::DefinitionSubject);
356 }
357 self.process_text_content(&definition.subject);
358 self.process_annotations(definition.annotations());
359 let was_in_definition = self.in_definition;
360 self.in_definition = true;
361 for child in definition.children.iter() {
362 self.process_content_item(child);
363 }
364 self.in_definition = was_in_definition;
365 }
366
367 fn process_verbatim(&mut self, verbatim: &Verbatim) {
368 for group in verbatim.group() {
369 self.process_text_content(group.subject);
370 if let Some(location) = &group.subject.location {
371 self.push_range(location, LexSemanticTokenKind::VerbatimSubject);
372 }
373 for child in group.children {
374 if let ContentItem::VerbatimLine(line) = child {
375 self.push_range(&line.location, LexSemanticTokenKind::VerbatimContent);
376 }
377 }
378 }
379
380 self.push_range(
381 &verbatim.closing_data.label.location,
382 LexSemanticTokenKind::DataLabel,
383 );
384 for parameter in &verbatim.closing_data.parameters {
385 self.push_range(¶meter.location, LexSemanticTokenKind::DataParameter);
386 }
387
388 self.process_annotations(verbatim.annotations());
389 }
390
391 fn process_table(&mut self, table: &Table) {
392 self.process_text_content(&table.subject);
393 if let Some(location) = &table.subject.location {
394 self.push_range(location, LexSemanticTokenKind::VerbatimSubject);
395 }
396
397 for row in table.all_rows() {
399 for cell in &row.cells {
400 self.process_text_content(&cell.content);
401 for child in cell.children.iter() {
402 self.process_content_item(child);
403 }
404 }
405 }
406
407 self.process_annotations(table.annotations());
411 }
412
413 fn process_annotation(&mut self, annotation: &Annotation) {
414 self.push_range(
415 annotation.header_location(),
416 LexSemanticTokenKind::AnnotationLabel,
417 );
418 for parameter in &annotation.data.parameters {
419 self.push_range(
420 ¶meter.location,
421 LexSemanticTokenKind::AnnotationParameter,
422 );
423 }
424 let was_in_annotation = self.in_annotation;
425 self.in_annotation = true;
426 for child in annotation.children.iter() {
427 self.process_content_item(child);
428 }
429 self.in_annotation = was_in_annotation;
430 }
431
432 fn process_annotations(&mut self, annotations: &[Annotation]) {
433 for annotation in annotations {
434 self.process_annotation(annotation);
435 }
436 }
437
438 fn process_text_content(&mut self, text: &TextContent) {
439 let Some(base_range) = text.location.as_ref() else {
440 return;
441 };
442 let raw = text.as_string();
443 if raw.is_empty() {
444 return;
445 }
446 let nodes = text.inline_items();
447 let mut walker = InlineWalker {
448 raw,
449 base_range,
450 cursor: 0,
451 tokens: &mut self.tokens,
452 in_annotation: self.in_annotation,
453 in_definition: self.in_definition,
454 in_formatted: false,
455 };
456 walker.walk_nodes(&nodes);
457 }
458}
459
460struct InlineWalker<'a> {
467 raw: &'a str,
468 base_range: &'a Range,
469 cursor: usize,
470 tokens: &'a mut Vec<LexSemanticToken>,
471 in_annotation: bool,
472 in_definition: bool,
473 in_formatted: bool,
477}
478
479impl<'a> InlineWalker<'a> {
480 fn walk_nodes(&mut self, nodes: &[InlineNode]) {
481 for node in nodes {
482 self.walk_node(node);
483 }
484 }
485
486 fn walk_node(&mut self, node: &InlineNode) {
487 match node {
488 InlineNode::Plain { text, .. } => self.walk_plain(text),
489 InlineNode::Strong { content, .. } => self.walk_container(
490 content,
491 '*',
492 LexSemanticTokenKind::InlineStrong,
493 LexSemanticTokenKind::InlineMarkerStrongStart,
494 LexSemanticTokenKind::InlineMarkerStrongEnd,
495 ),
496 InlineNode::Emphasis { content, .. } => self.walk_container(
497 content,
498 '_',
499 LexSemanticTokenKind::InlineEmphasis,
500 LexSemanticTokenKind::InlineMarkerEmphasisStart,
501 LexSemanticTokenKind::InlineMarkerEmphasisEnd,
502 ),
503 InlineNode::Code { text, .. } => self.walk_literal(
504 text,
505 '`',
506 LexSemanticTokenKind::InlineCode,
507 LexSemanticTokenKind::InlineMarkerCodeStart,
508 LexSemanticTokenKind::InlineMarkerCodeEnd,
509 ),
510 InlineNode::Math { text, .. } => self.walk_literal(
511 text,
512 '#',
513 LexSemanticTokenKind::InlineMath,
514 LexSemanticTokenKind::InlineMarkerMathStart,
515 LexSemanticTokenKind::InlineMarkerMathEnd,
516 ),
517 InlineNode::Reference { data, .. } => self.walk_reference(data),
518 }
519 }
520
521 fn walk_plain(&mut self, text: &str) {
524 let start = self.cursor;
525 self.advance_unescaped(text);
526 let end = self.cursor;
527
528 if start < end {
529 let kind = if self.in_formatted {
530 None } else if self.in_annotation {
532 Some(LexSemanticTokenKind::AnnotationContent)
533 } else if self.in_definition {
534 Some(LexSemanticTokenKind::DefinitionContent)
535 } else {
536 None
537 };
538 if let Some(kind) = kind {
539 self.push(self.make_range(start, end), kind);
540 }
541 }
542 }
543
544 fn walk_container(
546 &mut self,
547 content: &[InlineNode],
548 marker: char,
549 content_kind: LexSemanticTokenKind,
550 start_marker_kind: LexSemanticTokenKind,
551 end_marker_kind: LexSemanticTokenKind,
552 ) {
553 let marker_len = marker.len_utf8();
554
555 let marker_start = self.cursor;
557 self.cursor += marker_len;
558 self.push(
559 self.make_range(marker_start, self.cursor),
560 start_marker_kind,
561 );
562
563 let content_start = self.cursor;
565 let was_in_formatted = self.in_formatted;
566 self.in_formatted = true;
567 self.walk_nodes(content);
568 self.in_formatted = was_in_formatted;
569 let content_end = self.cursor;
570
571 if content_start < content_end {
573 self.push(self.make_range(content_start, content_end), content_kind);
574 }
575
576 let close_start = self.cursor;
578 self.cursor += marker_len;
579 self.push(self.make_range(close_start, self.cursor), end_marker_kind);
580 }
581
582 fn walk_literal(
584 &mut self,
585 text: &str,
586 marker: char,
587 content_kind: LexSemanticTokenKind,
588 start_marker_kind: LexSemanticTokenKind,
589 end_marker_kind: LexSemanticTokenKind,
590 ) {
591 let marker_len = marker.len_utf8();
592
593 let marker_start = self.cursor;
595 self.cursor += marker_len;
596 self.push(
597 self.make_range(marker_start, self.cursor),
598 start_marker_kind,
599 );
600
601 let content_start = self.cursor;
603 self.cursor += text.len();
604 if content_start < self.cursor {
605 self.push(self.make_range(content_start, self.cursor), content_kind);
606 }
607
608 let close_start = self.cursor;
610 self.cursor += marker_len;
611 self.push(self.make_range(close_start, self.cursor), end_marker_kind);
612 }
613
614 fn walk_reference(&mut self, data: &lex_core::lex::inlines::ReferenceInline) {
616 let ref_kind = match &data.reference_type {
617 ReferenceType::Citation(_) => LexSemanticTokenKind::ReferenceCitation,
618 ReferenceType::FootnoteNumber { .. } | ReferenceType::FootnoteLabeled { .. } => {
619 LexSemanticTokenKind::ReferenceFootnote
620 }
621 _ => LexSemanticTokenKind::Reference,
622 };
623
624 let open_start = self.cursor;
626 self.cursor += 1;
627 self.push(
628 self.make_range(open_start, self.cursor),
629 LexSemanticTokenKind::InlineMarkerRefStart,
630 );
631
632 let content_start = self.cursor;
634 self.cursor += data.raw.len();
635 if content_start < self.cursor {
636 self.push(self.make_range(content_start, self.cursor), ref_kind);
637 }
638
639 let close_start = self.cursor;
641 self.cursor += 1;
642 self.push(
643 self.make_range(close_start, self.cursor),
644 LexSemanticTokenKind::InlineMarkerRefEnd,
645 );
646 }
647
648 fn advance_unescaped(&mut self, text: &str) {
654 for expected in text.chars() {
655 if self.cursor >= self.raw.len() {
656 break;
657 }
658 let raw_ch = self.raw[self.cursor..].chars().next().unwrap();
659 if raw_ch == '\\' {
660 if self.cursor + 1 >= self.raw.len() {
661 self.cursor += 1;
664 } else {
665 let next_ch = self.raw[self.cursor + 1..].chars().next();
666 match next_ch {
667 Some(nc) if !nc.is_alphanumeric() => {
668 self.cursor += 1 + nc.len_utf8();
670 }
671 _ => {
672 self.cursor += 1;
674 }
675 }
676 }
677 } else {
678 self.cursor += raw_ch.len_utf8();
679 }
680 let _ = expected; }
682 }
683
684 fn make_range(&self, start: usize, end: usize) -> Range {
685 let start_pos = self.position_at(start);
686 let end_pos = self.position_at(end);
687 Range::new(
688 (self.base_range.span.start + start)..(self.base_range.span.start + end),
689 start_pos,
690 end_pos,
691 )
692 }
693
694 fn position_at(&self, offset: usize) -> Position {
695 let mut line = self.base_range.start.line;
696 let mut column = self.base_range.start.column;
697 for ch in self.raw[..offset].chars() {
698 if ch == '\n' {
699 line += 1;
700 column = 0;
701 } else {
702 column += ch.len_utf8();
703 }
704 }
705 Position::new(line, column)
706 }
707
708 fn push(&mut self, range: Range, kind: LexSemanticTokenKind) {
709 if range.span.start < range.span.end {
710 self.tokens.push(LexSemanticToken { kind, range });
711 }
712 }
713}
714
715#[cfg(test)]
716mod tests {
717 use super::*;
718 use crate::test_support::{sample_document, sample_source};
719 use lex_core::lex::testing::lexplore::Lexplore;
720
721 fn snippets(
722 tokens: &[LexSemanticToken],
723 kind: LexSemanticTokenKind,
724 source: &str,
725 ) -> Vec<String> {
726 tokens
727 .iter()
728 .filter(|token| token.kind == kind)
729 .map(|token| source[token.range.span.clone()].to_string())
730 .collect()
731 }
732
733 #[test]
734 fn collects_structural_tokens() {
735 let document = sample_document();
736 let tokens = collect_semantic_tokens(&document);
737 let source = sample_source();
738
739 assert!(
741 snippets(&tokens, LexSemanticTokenKind::SessionMarker, source)
742 .iter()
743 .any(|snippet| snippet.trim() == "1.")
744 );
745 assert!(
746 snippets(&tokens, LexSemanticTokenKind::SessionTitleText, source)
747 .iter()
748 .any(|snippet| snippet.trim() == "Intro")
749 );
750 assert!(
752 snippets(&tokens, LexSemanticTokenKind::VerbatimSubject, source)
753 .iter()
754 .any(|snippet| snippet.trim_end() == "Cache")
755 );
756 let markers = snippets(&tokens, LexSemanticTokenKind::ListMarker, source);
757 assert_eq!(markers.len(), 4);
758 assert!(markers
759 .iter()
760 .all(|snippet| snippet.trim_start().starts_with('-')
761 || snippet.trim_start().chars().next().unwrap().is_numeric()));
762 let annotation_labels = snippets(&tokens, LexSemanticTokenKind::AnnotationLabel, source);
763 assert!(annotation_labels
764 .iter()
765 .any(|snippet| snippet.contains("doc.note")));
766 let parameters = snippets(&tokens, LexSemanticTokenKind::AnnotationParameter, source);
767 assert!(parameters
768 .iter()
769 .any(|snippet| snippet.contains("severity=info")));
770 let verbatim_subjects = snippets(&tokens, LexSemanticTokenKind::VerbatimSubject, source);
771 assert!(verbatim_subjects
772 .iter()
773 .any(|snippet| snippet.contains("CLI Example")));
774 assert!(snippets(&tokens, LexSemanticTokenKind::DataLabel, source)
775 .iter()
776 .any(|snippet| snippet.contains("shell")));
777 }
778
779 #[test]
780 fn collects_inline_tokens() {
781 let document = sample_document();
782 let tokens = collect_semantic_tokens(&document);
783 let source = sample_source();
784 assert!(
785 snippets(&tokens, LexSemanticTokenKind::InlineStrong, source)
786 .iter()
787 .any(|snippet| snippet.contains("Lex"))
788 );
789 assert!(
790 snippets(&tokens, LexSemanticTokenKind::InlineEmphasis, source)
791 .iter()
792 .any(|snippet| snippet.contains("format"))
793 );
794 assert!(snippets(&tokens, LexSemanticTokenKind::InlineCode, source)
795 .iter()
796 .any(|snippet| snippet.contains("code")));
797 assert!(snippets(&tokens, LexSemanticTokenKind::InlineMath, source)
798 .iter()
799 .any(|snippet| snippet.contains("math")));
800 }
801
802 #[test]
803 fn classifies_references() {
804 let document = sample_document();
805 let tokens = collect_semantic_tokens(&document);
806 let source = sample_source();
807 assert!(
808 snippets(&tokens, LexSemanticTokenKind::ReferenceCitation, source)
809 .iter()
810 .any(|snippet| snippet.contains("@spec2025"))
811 );
812 assert!(
813 snippets(&tokens, LexSemanticTokenKind::ReferenceFootnote, source)
814 .iter()
815 .any(|snippet| snippet.contains("^source"))
816 );
817 assert!(
818 snippets(&tokens, LexSemanticTokenKind::ReferenceFootnote, source)
819 .iter()
820 .any(|snippet| snippet.contains("1"))
821 );
822 assert!(snippets(&tokens, LexSemanticTokenKind::Reference, source)
823 .iter()
824 .any(|snippet| snippet.contains("Cache")));
825 }
826
827 #[test]
828 fn empty_document_has_no_tokens() {
829 let document = Lexplore::benchmark(0)
830 .parse()
831 .expect("failed to parse empty benchmark fixture");
832 let tokens = collect_semantic_tokens(&document);
833 assert!(tokens.is_empty());
834 }
835
836 #[test]
837 fn emits_annotation_content_for_inline_annotation() {
838 let document = sample_document();
839 let tokens = collect_semantic_tokens(&document);
840 let source = sample_source();
841
842 let annotation_content = snippets(&tokens, LexSemanticTokenKind::AnnotationContent, source);
845 assert!(
846 annotation_content
847 .iter()
848 .any(|snippet| snippet.contains("Document preface")),
849 "AnnotationContent should be emitted for plain text inside annotations, got: {annotation_content:?}"
850 );
851 }
852
853 #[test]
854 fn annotation_content_excludes_formatted_text() {
855 let source = ":: note :: Some *bold* text.\n";
858 let document = lex_core::lex::parsing::parse_document(source).expect("failed to parse");
859 let tokens = collect_semantic_tokens(&document);
860
861 let annotation_content: Vec<_> = tokens
862 .iter()
863 .filter(|t| t.kind == LexSemanticTokenKind::AnnotationContent)
864 .map(|t| &source[t.range.span.clone()])
865 .collect();
866
867 assert!(
869 annotation_content.iter().any(|s| s.contains("Some")),
870 "Plain text before formatting should be AnnotationContent"
871 );
872 assert!(
873 annotation_content.iter().any(|s| s.contains("text.")),
874 "Plain text after formatting should be AnnotationContent"
875 );
876 assert!(
877 !annotation_content.iter().any(|s| s.contains("bold")),
878 "Formatted text should NOT be AnnotationContent"
879 );
880
881 let strong: Vec<_> = tokens
883 .iter()
884 .filter(|t| t.kind == LexSemanticTokenKind::InlineStrong)
885 .map(|t| &source[t.range.span.clone()])
886 .collect();
887 assert!(strong.contains(&"bold"));
888 }
889
890 #[test]
891 fn table_cell_inline_formatting_gets_tokens() {
892 let source = "Stats:\n | *Name* | `code` |\n | _test_ | #42# |\n:: table ::\n";
893 let document = lex_core::lex::parsing::parse_document(source).expect("failed to parse");
894 let tokens = collect_semantic_tokens(&document);
895
896 let strong = snippets(&tokens, LexSemanticTokenKind::InlineStrong, source);
897 assert!(
898 strong.iter().any(|s| s.contains("Name")),
899 "Expected InlineStrong for *Name* in table cell, got: {strong:?}"
900 );
901
902 let code = snippets(&tokens, LexSemanticTokenKind::InlineCode, source);
903 assert!(
904 code.iter().any(|s| s.contains("code")),
905 "Expected InlineCode for `code` in table cell, got: {code:?}"
906 );
907
908 let emphasis = snippets(&tokens, LexSemanticTokenKind::InlineEmphasis, source);
909 assert!(
910 emphasis.iter().any(|s| s.contains("test")),
911 "Expected InlineEmphasis for _test_ in table cell, got: {emphasis:?}"
912 );
913
914 let math = snippets(&tokens, LexSemanticTokenKind::InlineMath, source);
915 assert!(
916 math.iter().any(|s| s.contains("42")),
917 "Expected InlineMath for #42# in table cell, got: {math:?}"
918 );
919 }
920}