lex_core/lex/
annotation.rs

1//! Annotation-specific helpers shared across lexer, parser, and builders.
2//!
3//! Currently this module focuses on analyzing annotation headers (the token
4//! sequence between `::` markers). The helpers keep the "label vs parameters"
5//! rules in one place so every stage enforces the same constraints.
6
7use crate::lex::token::Token;
8use std::ops::Range;
9
10/// Result of analyzing the tokens inside an annotation header.
11#[derive(Debug, Clone, Copy, PartialEq, Eq)]
12pub struct AnnotationHeaderAnalysis {
13    /// Number of tokens (including leading whitespace) that belong to the label.
14    pub label_token_count: usize,
15    /// Whether a syntactic label (non-whitespace, non-parameter) was found.
16    pub has_label: bool,
17}
18
19/// Analyze a raw slice of tokens located between `::` markers.
20pub fn analyze_annotation_header_tokens(tokens: &[Token]) -> AnnotationHeaderAnalysis {
21    analyze_slice(tokens, |token| token)
22}
23
24/// Analyze a slice of `(Token, Range)` pairs located between `::` markers.
25pub fn analyze_annotation_header_token_pairs(
26    tokens: &[(Token, Range<usize>)],
27) -> AnnotationHeaderAnalysis {
28    analyze_slice(tokens, |pair| &pair.0)
29}
30
31/// Slice helper used by the public analyzers.
32fn analyze_slice<T>(tokens: &[T], mut get: impl FnMut(&T) -> &Token) -> AnnotationHeaderAnalysis {
33    let len = tokens.len();
34    let mut idx = 0;
35
36    // Consume leading whitespace
37    while idx < len && is_header_whitespace(get(&tokens[idx])) {
38        idx += 1;
39    }
40
41    let mut consumed = idx;
42    let mut has_label = false;
43
44    while idx < len {
45        let token = get(&tokens[idx]);
46        if is_label_component(token) {
47            // Check if this sequence is actually the start of a parameter key
48            let mut check_idx = idx + 1;
49            while check_idx < len && is_label_component(get(&tokens[check_idx])) {
50                check_idx += 1;
51            }
52            while check_idx < len && is_header_whitespace(get(&tokens[check_idx])) {
53                check_idx += 1;
54            }
55            if check_idx < len && matches!(get(&tokens[check_idx]), Token::Equals) {
56                break;
57            }
58
59            idx += 1;
60            consumed = idx;
61            has_label = true;
62        } else if is_header_whitespace(token) {
63            idx += 1;
64            consumed = idx;
65        } else {
66            break;
67        }
68    }
69
70    AnnotationHeaderAnalysis {
71        label_token_count: consumed,
72        has_label,
73    }
74}
75
76fn is_label_component(token: &Token) -> bool {
77    matches!(
78        token,
79        Token::Text(_) | Token::Dash | Token::Number(_) | Token::Period
80    )
81}
82
83fn is_header_whitespace(token: &Token) -> bool {
84    matches!(token, Token::Whitespace(_) | Token::Indentation)
85}
86
87/// Collect the tokens that compose the label segment (including leading
88/// whitespace) and return the index of the next token after the label.
89pub fn split_label_tokens_with_ranges(
90    tokens: &[(Token, Range<usize>)],
91) -> (Vec<(Token, Range<usize>)>, usize, bool) {
92    let analysis = analyze_annotation_header_token_pairs(tokens);
93    let label_tokens = tokens[..analysis.label_token_count].to_vec();
94    (label_tokens, analysis.label_token_count, analysis.has_label)
95}