regen/sdk/token/
token_blocks.rs

1use crate::sdk::{TokenImpl, TokenType};
2
3/// Semantic information that is stored and updated during the parsing process
4///
5/// The difference between `TokenBlocks` and [`TokenStream`](crate::sdk::TokenStream)
6/// is that `TokenBlocks` is used for overwriting semantic type for tokens in any position, while `TokenStream`
7/// is used for consuming one token at a time. The `get_spans` and `get_html` methods can
8/// be used to get the semantic tokens.
9#[derive(PartialEq, Debug, Clone)]
10pub struct TokenBlocks<S>
11where
12    S: TokenType,
13{
14    /// Source code.
15    ///
16    /// This is needed because the blocks may not cover the entire source code
17    src: String,
18    /// Semantic token blocks
19    blocks: Vec<TokenImpl<S>>,
20}
21
22impl<S> AsRef<[TokenImpl<S>]> for TokenBlocks<S>
23where
24    S: TokenType,
25{
26    /// Get the semantic blocks currently stored as a slice.
27    ///
28    /// The returned semantic tokens may not be contiguous (i.e. maybe have gaps).
29    /// Use one of the `get_spans` method to compute contiguous semantic tokens that cover the entire source code.
30    fn as_ref(&self) -> &[TokenImpl<S>] {
31        &self.blocks
32    }
33}
34
35impl<S> TokenBlocks<S>
36where
37    S: TokenType,
38{
39    /// Create a new instance with the given source code
40    pub fn new(src: &str) -> Self {
41        Self {
42            src: src.to_string(),
43            blocks: Vec::new(),
44        }
45    }
46
47    /// Insert all tokens and mark them with the associated token semantic
48    pub fn insert_all<T>(&mut self, tokens: &[TokenImpl<T>])
49    where
50        T: TokenType + Into<S>,
51    {
52        self.blocks
53            .extend(tokens.iter().cloned().map(|t| TokenImpl {
54                pos: t.pos,
55                value: t.value,
56                token_type: t.token_type.into(),
57            }));
58        self.blocks.sort_by(|a, b| a.pos.0.cmp(&b.pos.0))
59    }
60
61    /// Set the semantic of a token
62    ///
63    /// This replaces the semantic type of the current token if there exists one that starts at the same position (and assumes it ends at the same position, without additional checks).
64    /// If there is no token that starts at the same position, it inserts a new token without checking for overlapping.
65    ///
66    /// It also assumes the token in the parameter and the existing token stored have the same content
67    pub fn set<T>(&mut self, token: &TokenImpl<T>, semantic_type: S)
68    where
69        T: TokenType,
70        S: From<T>,
71    {
72        let result = self
73            .blocks
74            .binary_search_by(|probe| probe.pos.0.cmp(&token.pos.0));
75        match result {
76            Ok(index) => {
77                self.blocks[index].token_type = semantic_type;
78            }
79            Err(index) => {
80                self.blocks.insert(
81                    index,
82                    TokenImpl {
83                        token_type: semantic_type,
84                        value: token.value.clone(),
85                        pos: token.pos,
86                    },
87                );
88            }
89        }
90    }
91
92    /// Get the semantic spans.
93    ///
94    /// The difference between `as_ref` and `get_spans` is that `as_ref` returns the semantic tokens as they are stored,
95    /// while `get_spans` returns a continuous list of spans that cover the entire source code.
96    /// If you don't care about the gaps, use `as_ref` instead to avoid extra computation.
97    ///
98    /// The `converter` parameter can be used to convert the stored token type to any custom type that implements the `TokenType` trait.
99    pub fn get_spans<T, F>(&self, converter: F) -> Vec<TokenImpl<Option<T>>>
100    where
101        F: Fn(S) -> T,
102        T: TokenType,
103    {
104        // TODO: we can combine adjacent blocks with the same semantic
105        let mut code_blocks = Vec::new();
106        let mut cur = 0;
107        for semantic_token in &self.blocks {
108            let (start, end) = semantic_token.pos;
109            if start > cur {
110                code_blocks.push(TokenImpl {
111                    token_type: None,
112                    value: self.src[cur..start].to_owned(),
113                    pos: (cur, start),
114                });
115            }
116            cur = cur.max(start);
117            if end > cur {
118                code_blocks.push(TokenImpl {
119                    token_type: Some(converter(semantic_token.token_type.clone())),
120                    value: self.src[cur..end].to_owned(),
121                    pos: (cur, start),
122                });
123                cur = end;
124            }
125        }
126        if cur < self.src.len() {
127            code_blocks.push(TokenImpl {
128                token_type: None,
129                value: self.src[cur..].to_owned(),
130                pos: (cur, self.src.len()),
131            });
132        }
133        code_blocks
134    }
135
136    /// Get the semantic spans as html
137    ///
138    /// This is a wrapper that calls [`get_spans`](TokenBlocks::get_spans) and then converts the semantic tokens to html.
139    /// The converter function is be used to convert the stored token type to another custom type.
140    pub fn get_html<T, F>(&self, converter: F) -> String
141    where
142        F: Fn(S) -> T,
143        T: TokenType,
144    {
145        self.get_spans(converter)
146            .iter()
147            .map(|s| s.to_html())
148            .collect::<Vec<String>>()
149            .join("")
150    }
151}