1use lex_core::lex::ast::{
42 Annotation, ContentItem, Definition, Document, List, ListItem, Paragraph, Position, Range,
43 Session, Table, TextContent, Verbatim,
44};
45use lex_core::lex::inlines::{InlineNode, ReferenceType};
46
47#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
48pub enum LexSemanticTokenKind {
49 DocumentTitle,
50 DocumentSubtitle,
51 SessionMarker,
52 SessionTitleText,
53 DefinitionSubject,
54 DefinitionContent,
55 ListMarker,
56 ListItemText,
57 AnnotationLabel,
58 AnnotationParameter,
59 AnnotationContent,
60 InlineStrong,
61 InlineEmphasis,
62 InlineCode,
63 InlineMath,
64 Reference,
65 ReferenceCitation,
66 ReferenceFootnote,
67 ReferenceAnnotation,
68 VerbatimSubject,
69 DataLabel,
70 DataParameter,
71 VerbatimContent,
72 InlineMarkerStrongStart,
73 InlineMarkerStrongEnd,
74 InlineMarkerEmphasisStart,
75 InlineMarkerEmphasisEnd,
76 InlineMarkerCodeStart,
77 InlineMarkerCodeEnd,
78 InlineMarkerMathStart,
79 InlineMarkerMathEnd,
80 InlineMarkerRefStart,
81 InlineMarkerRefEnd,
82}
83
84impl LexSemanticTokenKind {
85 pub fn as_str(self) -> &'static str {
102 match self {
103 LexSemanticTokenKind::DocumentTitle => "DocumentTitle",
104 LexSemanticTokenKind::DocumentSubtitle => "DocumentSubtitle",
105 LexSemanticTokenKind::SessionMarker => "SessionMarker",
106 LexSemanticTokenKind::SessionTitleText => "SessionTitleText",
107 LexSemanticTokenKind::DefinitionSubject => "DefinitionSubject",
108 LexSemanticTokenKind::DefinitionContent => "DefinitionContent",
109 LexSemanticTokenKind::ListMarker => "ListMarker",
110 LexSemanticTokenKind::ListItemText => "ListItemText",
111 LexSemanticTokenKind::AnnotationLabel => "AnnotationLabel",
112 LexSemanticTokenKind::AnnotationParameter => "AnnotationParameter",
113 LexSemanticTokenKind::AnnotationContent => "AnnotationContent",
114 LexSemanticTokenKind::InlineStrong => "InlineStrong",
115 LexSemanticTokenKind::InlineEmphasis => "InlineEmphasis",
116 LexSemanticTokenKind::InlineCode => "InlineCode",
117 LexSemanticTokenKind::InlineMath => "InlineMath",
118 LexSemanticTokenKind::Reference => "Reference",
119 LexSemanticTokenKind::ReferenceCitation => "ReferenceCitation",
120 LexSemanticTokenKind::ReferenceFootnote => "ReferenceFootnote",
121 LexSemanticTokenKind::ReferenceAnnotation => "ReferenceAnnotation",
122 LexSemanticTokenKind::VerbatimSubject => "VerbatimSubject",
123 LexSemanticTokenKind::DataLabel => "DataLabel",
124 LexSemanticTokenKind::DataParameter => "DataParameter",
125 LexSemanticTokenKind::VerbatimContent => "VerbatimContent",
126 LexSemanticTokenKind::InlineMarkerStrongStart => "InlineMarker_strong_start",
127 LexSemanticTokenKind::InlineMarkerStrongEnd => "InlineMarker_strong_end",
128 LexSemanticTokenKind::InlineMarkerEmphasisStart => "InlineMarker_emphasis_start",
129 LexSemanticTokenKind::InlineMarkerEmphasisEnd => "InlineMarker_emphasis_end",
130 LexSemanticTokenKind::InlineMarkerCodeStart => "InlineMarker_code_start",
131 LexSemanticTokenKind::InlineMarkerCodeEnd => "InlineMarker_code_end",
132 LexSemanticTokenKind::InlineMarkerMathStart => "InlineMarker_math_start",
133 LexSemanticTokenKind::InlineMarkerMathEnd => "InlineMarker_math_end",
134 LexSemanticTokenKind::InlineMarkerRefStart => "InlineMarker_ref_start",
135 LexSemanticTokenKind::InlineMarkerRefEnd => "InlineMarker_ref_end",
136 }
137 }
138}
139
140pub const SEMANTIC_TOKEN_KINDS: &[LexSemanticTokenKind] = &[
141 LexSemanticTokenKind::DocumentTitle,
142 LexSemanticTokenKind::DocumentSubtitle,
143 LexSemanticTokenKind::SessionMarker,
144 LexSemanticTokenKind::SessionTitleText,
145 LexSemanticTokenKind::DefinitionSubject,
146 LexSemanticTokenKind::DefinitionContent,
147 LexSemanticTokenKind::ListMarker,
148 LexSemanticTokenKind::ListItemText,
149 LexSemanticTokenKind::AnnotationLabel,
150 LexSemanticTokenKind::AnnotationParameter,
151 LexSemanticTokenKind::AnnotationContent,
152 LexSemanticTokenKind::InlineStrong,
153 LexSemanticTokenKind::InlineEmphasis,
154 LexSemanticTokenKind::InlineCode,
155 LexSemanticTokenKind::InlineMath,
156 LexSemanticTokenKind::Reference,
157 LexSemanticTokenKind::ReferenceCitation,
158 LexSemanticTokenKind::ReferenceFootnote,
159 LexSemanticTokenKind::VerbatimSubject,
160 LexSemanticTokenKind::DataLabel,
161 LexSemanticTokenKind::DataParameter,
162 LexSemanticTokenKind::VerbatimContent,
163 LexSemanticTokenKind::InlineMarkerStrongStart,
164 LexSemanticTokenKind::InlineMarkerStrongEnd,
165 LexSemanticTokenKind::InlineMarkerEmphasisStart,
166 LexSemanticTokenKind::InlineMarkerEmphasisEnd,
167 LexSemanticTokenKind::InlineMarkerCodeStart,
168 LexSemanticTokenKind::InlineMarkerCodeEnd,
169 LexSemanticTokenKind::InlineMarkerMathStart,
170 LexSemanticTokenKind::InlineMarkerMathEnd,
171 LexSemanticTokenKind::InlineMarkerRefStart,
172 LexSemanticTokenKind::InlineMarkerRefEnd,
173 LexSemanticTokenKind::ReferenceAnnotation,
174];
175
176#[derive(Debug, Clone, PartialEq)]
177pub struct LexSemanticToken {
178 pub kind: LexSemanticTokenKind,
179 pub range: Range,
180}
181
182pub fn collect_semantic_tokens(document: &Document) -> Vec<LexSemanticToken> {
183 let mut collector = TokenCollector::new();
184 collector.process_document(document);
185 collector.finish()
186}
187
188struct TokenCollector {
189 tokens: Vec<LexSemanticToken>,
190 in_annotation: bool,
191 in_definition: bool,
192}
193
194impl TokenCollector {
195 fn new() -> Self {
196 Self {
197 tokens: Vec::new(),
198 in_annotation: false,
199 in_definition: false,
200 }
201 }
202
203 fn finish(mut self) -> Vec<LexSemanticToken> {
204 self.tokens.sort_by(|a, b| {
205 let a_start = (
206 &a.range.start.line,
207 &a.range.start.column,
208 &a.range.end.line,
209 &a.range.end.column,
210 );
211 let b_start = (
212 &b.range.start.line,
213 &b.range.start.column,
214 &b.range.end.line,
215 &b.range.end.column,
216 );
217 a_start.cmp(&b_start)
218 });
219 self.tokens
220 }
221
222 fn push_range(&mut self, range: &Range, kind: LexSemanticTokenKind) {
223 if range.span.start < range.span.end {
224 self.tokens.push(LexSemanticToken {
225 kind,
226 range: range.clone(),
227 });
228 }
229 }
230
231 fn process_document(&mut self, document: &Document) {
232 self.process_annotations(document.annotations());
233 if let Some(title) = &document.title {
234 if let Some(title_loc) = &title.content.location {
235 self.push_range(title_loc, LexSemanticTokenKind::DocumentTitle);
236 } else {
237 self.push_range(&title.location, LexSemanticTokenKind::DocumentTitle);
238 }
239 self.process_text_content(&title.content);
240 if let Some(subtitle) = &title.subtitle {
241 if let Some(sub_loc) = &subtitle.location {
242 self.push_range(sub_loc, LexSemanticTokenKind::DocumentSubtitle);
243 }
244 self.process_text_content(subtitle);
245 }
246 }
247 self.process_session(&document.root, LexSemanticTokenKind::SessionTitleText);
248 }
249
250 fn process_session(&mut self, session: &Session, title_kind: LexSemanticTokenKind) {
251 if let Some(marker) = &session.marker {
253 self.push_range(&marker.location, LexSemanticTokenKind::SessionMarker);
255 }
256
257 if let Some(header) = session.header_location() {
261 if let Some(marker) = &session.marker {
262 let marker_text = marker.as_str();
264 let full_title = session.full_title();
265
266 if let Some(pos) = full_title.find(marker_text) {
268 let marker_end = pos + marker_text.len();
269 let title_start = full_title[marker_end..]
271 .chars()
272 .position(|c| !c.is_whitespace())
273 .map(|p| marker_end + p)
274 .unwrap_or(marker_end);
275
276 if title_start < full_title.len() {
277 use lex_core::lex::ast::Position;
279 let title_text_range = Range::new(
280 header.span.start + title_start..header.span.end,
281 Position::new(header.start.line, header.start.column + title_start),
282 header.end,
283 );
284 self.push_range(&title_text_range, title_kind);
285 }
286 }
287 } else {
288 self.push_range(header, title_kind);
290 }
291 }
292
293 self.process_text_content(&session.title);
294
295 self.process_annotations(session.annotations());
296 for child in session.children.iter() {
297 self.process_content_item(child);
298 }
299 }
300
301 fn process_content_item(&mut self, item: &ContentItem) {
302 match item {
303 ContentItem::Paragraph(paragraph) => self.process_paragraph(paragraph),
304 ContentItem::Session(session) => {
305 self.process_session(session, LexSemanticTokenKind::SessionTitleText)
306 }
307 ContentItem::List(list) => self.process_list(list),
308 ContentItem::ListItem(list_item) => self.process_list_item(list_item),
309 ContentItem::Definition(definition) => self.process_definition(definition),
310 ContentItem::Annotation(annotation) => self.process_annotation(annotation),
311 ContentItem::VerbatimBlock(verbatim) => self.process_verbatim(verbatim),
312 ContentItem::Table(table) => self.process_table(table),
313 ContentItem::TextLine(text_line) => self.process_text_content(&text_line.content),
314 ContentItem::VerbatimLine(_) => {}
315 ContentItem::BlankLineGroup(_) => {}
316 }
317 }
318
319 fn process_paragraph(&mut self, paragraph: &Paragraph) {
320 for line in ¶graph.lines {
321 if let ContentItem::TextLine(text_line) = line {
322 self.process_text_content(&text_line.content);
326 }
327 }
328 self.process_annotations(paragraph.annotations());
329 }
330
331 fn process_list(&mut self, list: &List) {
332 self.process_annotations(list.annotations());
333 for item in list.items.iter() {
334 if let ContentItem::ListItem(list_item) = item {
335 self.process_list_item(list_item);
336 }
337 }
338 }
339
340 fn process_list_item(&mut self, list_item: &ListItem) {
341 if let Some(marker_range) = &list_item.marker.location {
342 self.push_range(marker_range, LexSemanticTokenKind::ListMarker);
343 }
344 for text in &list_item.text {
345 if let Some(location) = &text.location {
346 self.push_range(location, LexSemanticTokenKind::ListItemText);
347 }
348 self.process_text_content(text);
349 }
350 self.process_annotations(list_item.annotations());
351 for child in list_item.children.iter() {
352 self.process_content_item(child);
353 }
354 }
355
356 fn process_definition(&mut self, definition: &Definition) {
357 if let Some(header) = definition.header_location() {
358 self.push_range(header, LexSemanticTokenKind::DefinitionSubject);
359 }
360 self.process_text_content(&definition.subject);
361 self.process_annotations(definition.annotations());
362 let was_in_definition = self.in_definition;
363 self.in_definition = true;
364 for child in definition.children.iter() {
365 self.process_content_item(child);
366 }
367 self.in_definition = was_in_definition;
368 }
369
370 fn process_verbatim(&mut self, verbatim: &Verbatim) {
371 for group in verbatim.group() {
372 self.process_text_content(group.subject);
373 if let Some(location) = &group.subject.location {
374 self.push_range(location, LexSemanticTokenKind::VerbatimSubject);
375 }
376 for child in group.children {
377 if let ContentItem::VerbatimLine(line) = child {
378 self.push_range(&line.location, LexSemanticTokenKind::VerbatimContent);
379 }
380 }
381 }
382
383 self.push_range(
384 &verbatim.closing_data.label.location,
385 LexSemanticTokenKind::DataLabel,
386 );
387 for parameter in &verbatim.closing_data.parameters {
388 self.push_range(¶meter.location, LexSemanticTokenKind::DataParameter);
389 }
390
391 self.process_annotations(verbatim.annotations());
392 }
393
394 fn process_table(&mut self, table: &Table) {
395 self.process_text_content(&table.subject);
396 if let Some(location) = &table.subject.location {
397 self.push_range(location, LexSemanticTokenKind::VerbatimSubject);
398 }
399
400 for row in table.all_rows() {
402 for cell in &row.cells {
403 self.process_text_content(&cell.content);
404 for child in cell.children.iter() {
405 self.process_content_item(child);
406 }
407 }
408 }
409
410 self.process_annotations(table.annotations());
414 }
415
416 fn process_annotation(&mut self, annotation: &Annotation) {
417 self.push_range(
418 annotation.header_location(),
419 LexSemanticTokenKind::AnnotationLabel,
420 );
421 for parameter in &annotation.data.parameters {
422 self.push_range(
423 ¶meter.location,
424 LexSemanticTokenKind::AnnotationParameter,
425 );
426 }
427 let was_in_annotation = self.in_annotation;
428 self.in_annotation = true;
429 for child in annotation.children.iter() {
430 self.process_content_item(child);
431 }
432 self.in_annotation = was_in_annotation;
433 }
434
435 fn process_annotations(&mut self, annotations: &[Annotation]) {
436 for annotation in annotations {
437 self.process_annotation(annotation);
438 }
439 }
440
441 fn process_text_content(&mut self, text: &TextContent) {
442 let Some(base_range) = text.location.as_ref() else {
443 return;
444 };
445 let raw = text.as_string();
446 if raw.is_empty() {
447 return;
448 }
449 let nodes = text.inline_items();
450 let mut walker = InlineWalker {
451 raw,
452 base_range,
453 cursor: 0,
454 tokens: &mut self.tokens,
455 in_annotation: self.in_annotation,
456 in_definition: self.in_definition,
457 in_formatted: false,
458 };
459 walker.walk_nodes(&nodes);
460 }
461}
462
463struct InlineWalker<'a> {
470 raw: &'a str,
471 base_range: &'a Range,
472 cursor: usize,
473 tokens: &'a mut Vec<LexSemanticToken>,
474 in_annotation: bool,
475 in_definition: bool,
476 in_formatted: bool,
480}
481
482impl<'a> InlineWalker<'a> {
483 fn walk_nodes(&mut self, nodes: &[InlineNode]) {
484 for node in nodes {
485 self.walk_node(node);
486 }
487 }
488
489 fn walk_node(&mut self, node: &InlineNode) {
490 match node {
491 InlineNode::Plain { text, .. } => self.walk_plain(text),
492 InlineNode::Strong { content, .. } => self.walk_container(
493 content,
494 '*',
495 LexSemanticTokenKind::InlineStrong,
496 LexSemanticTokenKind::InlineMarkerStrongStart,
497 LexSemanticTokenKind::InlineMarkerStrongEnd,
498 ),
499 InlineNode::Emphasis { content, .. } => self.walk_container(
500 content,
501 '_',
502 LexSemanticTokenKind::InlineEmphasis,
503 LexSemanticTokenKind::InlineMarkerEmphasisStart,
504 LexSemanticTokenKind::InlineMarkerEmphasisEnd,
505 ),
506 InlineNode::Code { text, .. } => self.walk_literal(
507 text,
508 '`',
509 LexSemanticTokenKind::InlineCode,
510 LexSemanticTokenKind::InlineMarkerCodeStart,
511 LexSemanticTokenKind::InlineMarkerCodeEnd,
512 ),
513 InlineNode::Math { text, .. } => self.walk_literal(
514 text,
515 '#',
516 LexSemanticTokenKind::InlineMath,
517 LexSemanticTokenKind::InlineMarkerMathStart,
518 LexSemanticTokenKind::InlineMarkerMathEnd,
519 ),
520 InlineNode::Reference { data, .. } => self.walk_reference(data),
521 }
522 }
523
524 fn walk_plain(&mut self, text: &str) {
527 let start = self.cursor;
528 self.advance_unescaped(text);
529 let end = self.cursor;
530
531 if start < end {
532 let kind = if self.in_formatted {
533 None } else if self.in_annotation {
535 Some(LexSemanticTokenKind::AnnotationContent)
536 } else if self.in_definition {
537 Some(LexSemanticTokenKind::DefinitionContent)
538 } else {
539 None
540 };
541 if let Some(kind) = kind {
542 self.push(self.make_range(start, end), kind);
543 }
544 }
545 }
546
547 fn walk_container(
549 &mut self,
550 content: &[InlineNode],
551 marker: char,
552 content_kind: LexSemanticTokenKind,
553 start_marker_kind: LexSemanticTokenKind,
554 end_marker_kind: LexSemanticTokenKind,
555 ) {
556 let marker_len = marker.len_utf8();
557
558 let marker_start = self.cursor;
560 self.cursor += marker_len;
561 self.push(
562 self.make_range(marker_start, self.cursor),
563 start_marker_kind,
564 );
565
566 let content_start = self.cursor;
568 let was_in_formatted = self.in_formatted;
569 self.in_formatted = true;
570 self.walk_nodes(content);
571 self.in_formatted = was_in_formatted;
572 let content_end = self.cursor;
573
574 if content_start < content_end {
576 self.push(self.make_range(content_start, content_end), content_kind);
577 }
578
579 let close_start = self.cursor;
581 self.cursor += marker_len;
582 self.push(self.make_range(close_start, self.cursor), end_marker_kind);
583 }
584
585 fn walk_literal(
587 &mut self,
588 text: &str,
589 marker: char,
590 content_kind: LexSemanticTokenKind,
591 start_marker_kind: LexSemanticTokenKind,
592 end_marker_kind: LexSemanticTokenKind,
593 ) {
594 let marker_len = marker.len_utf8();
595
596 let marker_start = self.cursor;
598 self.cursor += marker_len;
599 self.push(
600 self.make_range(marker_start, self.cursor),
601 start_marker_kind,
602 );
603
604 let content_start = self.cursor;
606 self.cursor += text.len();
607 if content_start < self.cursor {
608 self.push(self.make_range(content_start, self.cursor), content_kind);
609 }
610
611 let close_start = self.cursor;
613 self.cursor += marker_len;
614 self.push(self.make_range(close_start, self.cursor), end_marker_kind);
615 }
616
617 fn walk_reference(&mut self, data: &lex_core::lex::inlines::ReferenceInline) {
619 let ref_kind = match &data.reference_type {
620 ReferenceType::Citation(_) => LexSemanticTokenKind::ReferenceCitation,
621 ReferenceType::FootnoteNumber { .. } => LexSemanticTokenKind::ReferenceFootnote,
622 ReferenceType::AnnotationReference { .. } => LexSemanticTokenKind::ReferenceAnnotation,
623 _ => LexSemanticTokenKind::Reference,
624 };
625
626 let open_start = self.cursor;
628 self.cursor += 1;
629 self.push(
630 self.make_range(open_start, self.cursor),
631 LexSemanticTokenKind::InlineMarkerRefStart,
632 );
633
634 let content_start = self.cursor;
636 self.cursor += data.raw.len();
637 if content_start < self.cursor {
638 self.push(self.make_range(content_start, self.cursor), ref_kind);
639 }
640
641 let close_start = self.cursor;
643 self.cursor += 1;
644 self.push(
645 self.make_range(close_start, self.cursor),
646 LexSemanticTokenKind::InlineMarkerRefEnd,
647 );
648 }
649
650 fn advance_unescaped(&mut self, text: &str) {
656 for expected in text.chars() {
657 if self.cursor >= self.raw.len() {
658 break;
659 }
660 let raw_ch = self.raw[self.cursor..].chars().next().unwrap();
661 if raw_ch == '\\' {
662 if self.cursor + 1 >= self.raw.len() {
663 self.cursor += 1;
666 } else {
667 let next_ch = self.raw[self.cursor + 1..].chars().next();
668 match next_ch {
669 Some(nc) if !nc.is_alphanumeric() => {
670 self.cursor += 1 + nc.len_utf8();
672 }
673 _ => {
674 self.cursor += 1;
676 }
677 }
678 }
679 } else {
680 self.cursor += raw_ch.len_utf8();
681 }
682 let _ = expected; }
684 }
685
686 fn make_range(&self, start: usize, end: usize) -> Range {
687 let start_pos = self.position_at(start);
688 let end_pos = self.position_at(end);
689 Range::new(
690 (self.base_range.span.start + start)..(self.base_range.span.start + end),
691 start_pos,
692 end_pos,
693 )
694 }
695
696 fn position_at(&self, offset: usize) -> Position {
697 let mut line = self.base_range.start.line;
698 let mut column = self.base_range.start.column;
699 for ch in self.raw[..offset].chars() {
700 if ch == '\n' {
701 line += 1;
702 column = 0;
703 } else {
704 column += ch.len_utf8();
705 }
706 }
707 Position::new(line, column)
708 }
709
710 fn push(&mut self, range: Range, kind: LexSemanticTokenKind) {
711 if range.span.start < range.span.end {
712 self.tokens.push(LexSemanticToken { kind, range });
713 }
714 }
715}
716
717#[cfg(test)]
718mod tests {
719 use super::*;
720 use crate::test_support::{sample_document, sample_source};
721 use lex_core::lex::testing::lexplore::Lexplore;
722
723 fn snippets(
724 tokens: &[LexSemanticToken],
725 kind: LexSemanticTokenKind,
726 source: &str,
727 ) -> Vec<String> {
728 tokens
729 .iter()
730 .filter(|token| token.kind == kind)
731 .map(|token| source[token.range.span.clone()].to_string())
732 .collect()
733 }
734
735 #[test]
736 fn collects_structural_tokens() {
737 let document = sample_document();
738 let tokens = collect_semantic_tokens(&document);
739 let source = sample_source();
740
741 assert!(
743 snippets(&tokens, LexSemanticTokenKind::SessionMarker, source)
744 .iter()
745 .any(|snippet| snippet.trim() == "1.")
746 );
747 assert!(
748 snippets(&tokens, LexSemanticTokenKind::SessionTitleText, source)
749 .iter()
750 .any(|snippet| snippet.trim() == "Intro")
751 );
752 assert!(
754 snippets(&tokens, LexSemanticTokenKind::VerbatimSubject, source)
755 .iter()
756 .any(|snippet| snippet.trim_end() == "Cache")
757 );
758 let markers = snippets(&tokens, LexSemanticTokenKind::ListMarker, source);
759 assert_eq!(markers.len(), 4);
760 assert!(markers
761 .iter()
762 .all(|snippet| snippet.trim_start().starts_with('-')
763 || snippet.trim_start().chars().next().unwrap().is_numeric()));
764 let annotation_labels = snippets(&tokens, LexSemanticTokenKind::AnnotationLabel, source);
765 assert!(annotation_labels
766 .iter()
767 .any(|snippet| snippet.contains("doc.note")));
768 let parameters = snippets(&tokens, LexSemanticTokenKind::AnnotationParameter, source);
769 assert!(parameters
770 .iter()
771 .any(|snippet| snippet.contains("severity=info")));
772 let verbatim_subjects = snippets(&tokens, LexSemanticTokenKind::VerbatimSubject, source);
773 assert!(verbatim_subjects
774 .iter()
775 .any(|snippet| snippet.contains("CLI Example")));
776 assert!(snippets(&tokens, LexSemanticTokenKind::DataLabel, source)
777 .iter()
778 .any(|snippet| snippet.contains("shell")));
779 }
780
781 #[test]
782 fn collects_inline_tokens() {
783 let document = sample_document();
784 let tokens = collect_semantic_tokens(&document);
785 let source = sample_source();
786 assert!(
787 snippets(&tokens, LexSemanticTokenKind::InlineStrong, source)
788 .iter()
789 .any(|snippet| snippet.contains("Lex"))
790 );
791 assert!(
792 snippets(&tokens, LexSemanticTokenKind::InlineEmphasis, source)
793 .iter()
794 .any(|snippet| snippet.contains("format"))
795 );
796 assert!(snippets(&tokens, LexSemanticTokenKind::InlineCode, source)
797 .iter()
798 .any(|snippet| snippet.contains("code")));
799 assert!(snippets(&tokens, LexSemanticTokenKind::InlineMath, source)
800 .iter()
801 .any(|snippet| snippet.contains("math")));
802 }
803
804 #[test]
805 fn classifies_references() {
806 let document = sample_document();
807 let tokens = collect_semantic_tokens(&document);
808 let source = sample_source();
809 assert!(
810 snippets(&tokens, LexSemanticTokenKind::ReferenceCitation, source)
811 .iter()
812 .any(|snippet| snippet.contains("@spec2025"))
813 );
814 assert!(
815 snippets(&tokens, LexSemanticTokenKind::ReferenceAnnotation, source)
816 .iter()
817 .any(|snippet| snippet.contains("::source"))
818 );
819 assert!(
820 snippets(&tokens, LexSemanticTokenKind::ReferenceFootnote, source)
821 .iter()
822 .any(|snippet| snippet.contains("1"))
823 );
824 assert!(snippets(&tokens, LexSemanticTokenKind::Reference, source)
825 .iter()
826 .any(|snippet| snippet.contains("Cache")));
827 }
828
829 #[test]
830 fn empty_document_has_no_tokens() {
831 let document = Lexplore::benchmark(0)
832 .parse()
833 .expect("failed to parse empty benchmark fixture");
834 let tokens = collect_semantic_tokens(&document);
835 assert!(tokens.is_empty());
836 }
837
838 #[test]
839 fn emits_annotation_content_for_inline_annotation() {
840 let document = sample_document();
841 let tokens = collect_semantic_tokens(&document);
842 let source = sample_source();
843
844 let annotation_content = snippets(&tokens, LexSemanticTokenKind::AnnotationContent, source);
847 assert!(
848 annotation_content
849 .iter()
850 .any(|snippet| snippet.contains("Document preface")),
851 "AnnotationContent should be emitted for plain text inside annotations, got: {annotation_content:?}"
852 );
853 }
854
855 #[test]
856 fn annotation_content_excludes_formatted_text() {
857 let source = ":: note :: Some *bold* text.\n";
860 let document = lex_core::lex::parsing::parse_document(source).expect("failed to parse");
861 let tokens = collect_semantic_tokens(&document);
862
863 let annotation_content: Vec<_> = tokens
864 .iter()
865 .filter(|t| t.kind == LexSemanticTokenKind::AnnotationContent)
866 .map(|t| &source[t.range.span.clone()])
867 .collect();
868
869 assert!(
871 annotation_content.iter().any(|s| s.contains("Some")),
872 "Plain text before formatting should be AnnotationContent"
873 );
874 assert!(
875 annotation_content.iter().any(|s| s.contains("text.")),
876 "Plain text after formatting should be AnnotationContent"
877 );
878 assert!(
879 !annotation_content.iter().any(|s| s.contains("bold")),
880 "Formatted text should NOT be AnnotationContent"
881 );
882
883 let strong: Vec<_> = tokens
885 .iter()
886 .filter(|t| t.kind == LexSemanticTokenKind::InlineStrong)
887 .map(|t| &source[t.range.span.clone()])
888 .collect();
889 assert!(strong.contains(&"bold"));
890 }
891
892 #[test]
893 fn table_cell_inline_formatting_gets_tokens() {
894 let source = "Stats:\n | *Name* | `code` |\n | _test_ | #42# |\n";
895 let document = lex_core::lex::parsing::parse_document(source).expect("failed to parse");
896 let tokens = collect_semantic_tokens(&document);
897
898 let strong = snippets(&tokens, LexSemanticTokenKind::InlineStrong, source);
899 assert!(
900 strong.iter().any(|s| s.contains("Name")),
901 "Expected InlineStrong for *Name* in table cell, got: {strong:?}"
902 );
903
904 let code = snippets(&tokens, LexSemanticTokenKind::InlineCode, source);
905 assert!(
906 code.iter().any(|s| s.contains("code")),
907 "Expected InlineCode for `code` in table cell, got: {code:?}"
908 );
909
910 let emphasis = snippets(&tokens, LexSemanticTokenKind::InlineEmphasis, source);
911 assert!(
912 emphasis.iter().any(|s| s.contains("test")),
913 "Expected InlineEmphasis for _test_ in table cell, got: {emphasis:?}"
914 );
915
916 let math = snippets(&tokens, LexSemanticTokenKind::InlineMath, source);
917 assert!(
918 math.iter().any(|s| s.contains("42")),
919 "Expected InlineMath for #42# in table cell, got: {math:?}"
920 );
921 }
922}