Struct regen::sdk::TokenBlocks
source · pub struct TokenBlocks<S>where
S: TokenType,{ /* private fields */ }Expand description
Semantic information that is stored and updated during the parsing process
The difference between TokenBlocks and TokenStream
is that TokenBlocks is used for overwriting semantic type for tokens in any position, while TokenStream
is used for consuming one token at a time. The get_spans and get_html methods can
be used to get the semantic tokens.
Implementations§
source§impl<S> TokenBlocks<S>where
S: TokenType,
impl<S> TokenBlocks<S>where S: TokenType,
sourcepub fn new(src: &str) -> Self
pub fn new(src: &str) -> Self
Create a new instance with the given source code
Examples found in repository?
8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58
fn main() {
// This example parses regen.grammar, and prints html code with prismjs classes
// This example is ran from the `docs` directory
let grammar_source = fs::read_to_string("../regen.grammar").unwrap();
// These is a wrapper regen::parse_language_from_grammar that creates
// a Language object from a grammar string. However, it does not store
// semantic info, so we can't use that here.
// Run tokenizer
let lex_output = grammar::tokenize(&grammar_source);
// Create token stream
// 200 is the stack size, meaning the AST can have depth <= 200
// which is plenty. The default stack size for CLI is 2048
let mut ts = TokenStream::new(&lex_output.tokens, 200);
// Generate AST (need the ASTParser trait)
let parser = grammar::Parser;
let asts = parser.parse_ast_all(&mut ts).unwrap(); // error if syntax error
// collect semantic info so far
let mut outer_tbs = TokenBlocks::new(&grammar_source);
lex_output.apply_semantic(&mut outer_tbs);
asts.iter()
.for_each(|ast| ast.apply_semantic(&mut outer_tbs, &None));
// Generate PT, because it fills in additional semantic info
// This requires a lang builder as a context, but we won't need it
let mut lang_builder: Box<LangBuilder> = Box::default();
for ast in &asts {
// if you don't need semantic you can use the parse_pt method
match ast.parse_pt_with_semantic(outer_tbs, lang_builder) {
ParseTreeResultSemantic::Ok { /*pt*/ ctx, tbs, .. } => {
outer_tbs = tbs;
lang_builder = ctx;
},
ParseTreeResultSemantic::Err { .. /*pt, ctx, tbs, err*/ } => {
// should not happen, but you also get the context and semantic info back here
unreachable!();
}
}
}
// now we have the semantic info in outer_tbs, we can convert it to HTML
let code = outer_tbs.get_html(to_prismjs);
println!("{}", code);
}sourcepub fn insert_all<T>(&mut self, tokens: &[TokenImpl<T>])where
T: TokenType + Into<S>,
pub fn insert_all<T>(&mut self, tokens: &[TokenImpl<T>])where T: TokenType + Into<S>,
Insert all tokens and mark them with the associated token semantic
sourcepub fn set<T>(&mut self, token: &TokenImpl<T>, semantic_type: S)where
T: TokenType,
S: From<T>,
pub fn set<T>(&mut self, token: &TokenImpl<T>, semantic_type: S)where T: TokenType, S: From<T>,
Set the semantic of a token
This replaces the semantic type of the current token if there exists one that starts at the same position (and assumes it ends at the same position, without additional checks). If there is no token that starts at the same position, it inserts a new token without checking for overlapping.
It also assumes the token in the parameter and the existing token stored have the same content
sourcepub fn get_spans<T, F>(&self, converter: F) -> Vec<TokenImpl<Option<T>>>where
F: Fn(S) -> T,
T: TokenType,
pub fn get_spans<T, F>(&self, converter: F) -> Vec<TokenImpl<Option<T>>>where F: Fn(S) -> T, T: TokenType,
Get the semantic spans.
The difference between as_ref and get_spans is that as_ref returns the semantic tokens as they are stored,
while get_spans returns a continuous list of spans that cover the entire source code.
If you don’t care about the gaps, use as_ref instead to avoid extra computation.
The converter parameter can be used to convert the stored token type to any custom type that implements the TokenType trait.
sourcepub fn get_html<T, F>(&self, converter: F) -> Stringwhere
F: Fn(S) -> T,
T: TokenType,
pub fn get_html<T, F>(&self, converter: F) -> Stringwhere F: Fn(S) -> T, T: TokenType,
Get the semantic spans as html
This is a wrapper that calls get_spans and then converts the semantic tokens to html.
The converter function is be used to convert the stored token type to another custom type.
Examples found in repository?
8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58
fn main() {
// This example parses regen.grammar, and prints html code with prismjs classes
// This example is ran from the `docs` directory
let grammar_source = fs::read_to_string("../regen.grammar").unwrap();
// These is a wrapper regen::parse_language_from_grammar that creates
// a Language object from a grammar string. However, it does not store
// semantic info, so we can't use that here.
// Run tokenizer
let lex_output = grammar::tokenize(&grammar_source);
// Create token stream
// 200 is the stack size, meaning the AST can have depth <= 200
// which is plenty. The default stack size for CLI is 2048
let mut ts = TokenStream::new(&lex_output.tokens, 200);
// Generate AST (need the ASTParser trait)
let parser = grammar::Parser;
let asts = parser.parse_ast_all(&mut ts).unwrap(); // error if syntax error
// collect semantic info so far
let mut outer_tbs = TokenBlocks::new(&grammar_source);
lex_output.apply_semantic(&mut outer_tbs);
asts.iter()
.for_each(|ast| ast.apply_semantic(&mut outer_tbs, &None));
// Generate PT, because it fills in additional semantic info
// This requires a lang builder as a context, but we won't need it
let mut lang_builder: Box<LangBuilder> = Box::default();
for ast in &asts {
// if you don't need semantic you can use the parse_pt method
match ast.parse_pt_with_semantic(outer_tbs, lang_builder) {
ParseTreeResultSemantic::Ok { /*pt*/ ctx, tbs, .. } => {
outer_tbs = tbs;
lang_builder = ctx;
},
ParseTreeResultSemantic::Err { .. /*pt, ctx, tbs, err*/ } => {
// should not happen, but you also get the context and semantic info back here
unreachable!();
}
}
}
// now we have the semantic info in outer_tbs, we can convert it to HTML
let code = outer_tbs.get_html(to_prismjs);
println!("{}", code);
}