regen/sdk/token/token_blocks.rs
1use crate::sdk::{TokenImpl, TokenType};
2
3/// Semantic information that is stored and updated during the parsing process
4///
5/// The difference between `TokenBlocks` and [`TokenStream`](crate::sdk::TokenStream)
6/// is that `TokenBlocks` is used for overwriting semantic type for tokens in any position, while `TokenStream`
7/// is used for consuming one token at a time. The `get_spans` and `get_html` methods can
8/// be used to get the semantic tokens.
9#[derive(PartialEq, Debug, Clone)]
10pub struct TokenBlocks<S>
11where
12 S: TokenType,
13{
14 /// Source code.
15 ///
16 /// This is needed because the blocks may not cover the entire source code
17 src: String,
18 /// Semantic token blocks
19 blocks: Vec<TokenImpl<S>>,
20}
21
22impl<S> AsRef<[TokenImpl<S>]> for TokenBlocks<S>
23where
24 S: TokenType,
25{
26 /// Get the semantic blocks currently stored as a slice.
27 ///
28 /// The returned semantic tokens may not be contiguous (i.e. maybe have gaps).
29 /// Use one of the `get_spans` method to compute contiguous semantic tokens that cover the entire source code.
30 fn as_ref(&self) -> &[TokenImpl<S>] {
31 &self.blocks
32 }
33}
34
35impl<S> TokenBlocks<S>
36where
37 S: TokenType,
38{
39 /// Create a new instance with the given source code
40 pub fn new(src: &str) -> Self {
41 Self {
42 src: src.to_string(),
43 blocks: Vec::new(),
44 }
45 }
46
47 /// Insert all tokens and mark them with the associated token semantic
48 pub fn insert_all<T>(&mut self, tokens: &[TokenImpl<T>])
49 where
50 T: TokenType + Into<S>,
51 {
52 self.blocks
53 .extend(tokens.iter().cloned().map(|t| TokenImpl {
54 pos: t.pos,
55 value: t.value,
56 token_type: t.token_type.into(),
57 }));
58 self.blocks.sort_by(|a, b| a.pos.0.cmp(&b.pos.0))
59 }
60
61 /// Set the semantic of a token
62 ///
63 /// This replaces the semantic type of the current token if there exists one that starts at the same position (and assumes it ends at the same position, without additional checks).
64 /// If there is no token that starts at the same position, it inserts a new token without checking for overlapping.
65 ///
66 /// It also assumes the token in the parameter and the existing token stored have the same content
67 pub fn set<T>(&mut self, token: &TokenImpl<T>, semantic_type: S)
68 where
69 T: TokenType,
70 S: From<T>,
71 {
72 let result = self
73 .blocks
74 .binary_search_by(|probe| probe.pos.0.cmp(&token.pos.0));
75 match result {
76 Ok(index) => {
77 self.blocks[index].token_type = semantic_type;
78 }
79 Err(index) => {
80 self.blocks.insert(
81 index,
82 TokenImpl {
83 token_type: semantic_type,
84 value: token.value.clone(),
85 pos: token.pos,
86 },
87 );
88 }
89 }
90 }
91
92 /// Get the semantic spans.
93 ///
94 /// The difference between `as_ref` and `get_spans` is that `as_ref` returns the semantic tokens as they are stored,
95 /// while `get_spans` returns a continuous list of spans that cover the entire source code.
96 /// If you don't care about the gaps, use `as_ref` instead to avoid extra computation.
97 ///
98 /// The `converter` parameter can be used to convert the stored token type to any custom type that implements the `TokenType` trait.
99 pub fn get_spans<T, F>(&self, converter: F) -> Vec<TokenImpl<Option<T>>>
100 where
101 F: Fn(S) -> T,
102 T: TokenType,
103 {
104 // TODO: we can combine adjacent blocks with the same semantic
105 let mut code_blocks = Vec::new();
106 let mut cur = 0;
107 for semantic_token in &self.blocks {
108 let (start, end) = semantic_token.pos;
109 if start > cur {
110 code_blocks.push(TokenImpl {
111 token_type: None,
112 value: self.src[cur..start].to_owned(),
113 pos: (cur, start),
114 });
115 }
116 cur = cur.max(start);
117 if end > cur {
118 code_blocks.push(TokenImpl {
119 token_type: Some(converter(semantic_token.token_type.clone())),
120 value: self.src[cur..end].to_owned(),
121 pos: (cur, start),
122 });
123 cur = end;
124 }
125 }
126 if cur < self.src.len() {
127 code_blocks.push(TokenImpl {
128 token_type: None,
129 value: self.src[cur..].to_owned(),
130 pos: (cur, self.src.len()),
131 });
132 }
133 code_blocks
134 }
135
136 /// Get the semantic spans as html
137 ///
138 /// This is a wrapper that calls [`get_spans`](TokenBlocks::get_spans) and then converts the semantic tokens to html.
139 /// The converter function is be used to convert the stored token type to another custom type.
140 pub fn get_html<T, F>(&self, converter: F) -> String
141 where
142 F: Fn(S) -> T,
143 T: TokenType,
144 {
145 self.get_spans(converter)
146 .iter()
147 .map(|s| s.to_html())
148 .collect::<Vec<String>>()
149 .join("")
150 }
151}