1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150
use crate::sdk::{TokenImpl, TokenType};
/// Semantic information that is stored and updated during the parsing process
///
/// The difference between `TokenBlocks` and [`TokenStream`](crate::sdk::TokenStream)
/// is that `TokenBlocks` is used for overwriting semantic type for tokens in any position, while `TokenStream`
/// is used for consuming one token at a time. The `get_spans` and `get_html` methods can
/// be used to get the semantic tokens.
pub struct TokenBlocks<S>
where
S: TokenType,
{
/// Source code.
///
/// This is needed because the blocks may not cover the entire source code
src: String,
/// Semantic token blocks
blocks: Vec<TokenImpl<S>>,
}
impl<S> AsRef<[TokenImpl<S>]> for TokenBlocks<S>
where
S: TokenType,
{
/// Get the semantic blocks currently stored as a slice.
///
/// The returned semantic tokens may not be contiguous (i.e. maybe have gaps).
/// Use one of the `get_spans` method to compute contiguous semantic tokens that cover the entire source code.
fn as_ref(&self) -> &[TokenImpl<S>] {
&self.blocks
}
}
impl<S> TokenBlocks<S>
where
S: TokenType,
{
/// Create a new instance with the given source code
pub fn new(src: &str) -> Self {
Self {
src: src.to_string(),
blocks: Vec::new(),
}
}
/// Insert all tokens and mark them with the associated token semantic
pub fn insert_all<T>(&mut self, tokens: &[TokenImpl<T>])
where
T: TokenType + Into<S>,
{
self.blocks
.extend(tokens.iter().cloned().map(|t| TokenImpl {
pos: t.pos,
value: t.value,
token_type: t.token_type.into(),
}));
self.blocks.sort_by(|a, b| a.pos.0.cmp(&b.pos.0))
}
/// Set the semantic of a token
///
/// This replaces the semantic type of the current token if there exists one that starts at the same position (and assumes it ends at the same position, without additional checks).
/// If there is no token that starts at the same position, it inserts a new token without checking for overlapping.
///
/// It also assumes the token in the parameter and the existing token stored have the same content
pub fn set<T>(&mut self, token: &TokenImpl<T>, semantic_type: S)
where
T: TokenType,
S: From<T>,
{
let result = self
.blocks
.binary_search_by(|probe| probe.pos.0.cmp(&token.pos.0));
match result {
Ok(index) => {
self.blocks[index].token_type = semantic_type;
}
Err(index) => {
self.blocks.insert(
index,
TokenImpl {
token_type: semantic_type,
value: token.value.clone(),
pos: token.pos,
},
);
}
}
}
/// Get the semantic spans.
///
/// The difference between `as_ref` and `get_spans` is that `as_ref` returns the semantic tokens as they are stored,
/// while `get_spans` returns a continuous list of spans that cover the entire source code.
/// If you don't care about the gaps, use `as_ref` instead to avoid extra computation.
///
/// The `converter` parameter can be used to convert the stored token type to any custom type that implements the `TokenType` trait.
pub fn get_spans<T, F>(&self, converter: F) -> Vec<TokenImpl<Option<T>>>
where
F: Fn(S) -> T,
T: TokenType,
{
// TODO: we can combine adjacent blocks with the same semantic
let mut code_blocks = Vec::new();
let mut cur = 0;
for semantic_token in &self.blocks {
let (start, end) = semantic_token.pos;
if start > cur {
code_blocks.push(TokenImpl {
token_type: None,
value: self.src[cur..start].to_owned(),
pos: (cur, start),
});
}
cur = cur.max(start);
if end > cur {
code_blocks.push(TokenImpl {
token_type: Some(converter(semantic_token.token_type.clone())),
value: self.src[cur..end].to_owned(),
pos: (cur, start),
});
cur = end;
}
}
if cur < self.src.len() {
code_blocks.push(TokenImpl {
token_type: None,
value: self.src[cur..].to_owned(),
pos: (cur, self.src.len()),
});
}
code_blocks
}
/// Get the semantic spans as html
///
/// This is a wrapper that calls [`get_spans`](TokenBlocks::get_spans) and then converts the semantic tokens to html.
/// The converter function is be used to convert the stored token type to another custom type.
pub fn get_html<T, F>(&self, converter: F) -> String
where
F: Fn(S) -> T,
T: TokenType,
{
self.get_spans(converter)
.iter()
.map(|s| s.to_html())
.collect::<Vec<String>>()
.join("")
}
}