use super::{Markers, MatchKind, Model, Scope, TokenGroup};
pub const MARKERS: Markers = Markers {
begin: "// >>> rossi gen-grammars (generated, do not edit)",
end: "// <<< rossi gen-grammars",
};
pub const MARKERS_SCM: Markers = Markers {
begin: "; >>> rossi gen-grammars (generated, do not edit)",
end: "; <<< rossi gen-grammars",
};
pub fn node_name(group: &TokenGroup) -> &'static str {
match (group.scope, group.kind) {
(Scope::KeywordControl, _) => "keyword",
(Scope::KeywordOther, _) => "status_keyword",
(Scope::SupportFunction, _) => "builtin",
(Scope::ConstantLanguage, MatchKind::Word) => "constant_word",
(Scope::ConstantLanguage, MatchKind::Symbol) => "constant_sym",
(Scope::KeywordOperator, MatchKind::Word) => "operator_word",
(Scope::KeywordOperator, MatchKind::Symbol) => "operator_sym",
}
}
fn capture_name(scope: Scope) -> &'static str {
match scope {
Scope::KeywordControl | Scope::KeywordOther => "keyword",
Scope::ConstantLanguage => "constant.builtin",
Scope::SupportFunction => "function.builtin",
Scope::KeywordOperator => "operator",
}
}
pub fn render_grammar_region(model: &Model) -> String {
let mut out = String::new();
for group in &model.groups {
if group.members.is_empty() {
continue;
}
let name = node_name(group);
out.push_str(&format!(" {name}: $ => {},\n", token_expr(group)));
}
out.push_str(" ");
out
}
pub fn render_highlights_region(model: &Model) -> String {
let mut out = String::new();
for group in &model.groups {
if group.members.is_empty() {
continue;
}
let name = node_name(group);
out.push_str(&format!("({}) @{}\n", name, capture_name(group.scope)));
}
out.push_str(
"\n(comment) @comment\n\
(string) @string\n\
(number) @number\n\
(label) @label\n\
(identifier) @variable\n\n\
[\"(\" \")\" \"[\" \"]\" \"{\" \"}\"] @punctuation.bracket\n\
\",\" @punctuation.delimiter\n",
);
out
}
pub fn tokens_manifest(model: &Model) -> String {
let mut map = std::collections::BTreeMap::new();
for group in &model.groups {
if group.members.is_empty() {
continue;
}
map.insert(node_name(group), &group.members);
}
let mut out = serde_json::to_string_pretty(&map).expect("serialize token manifest");
out.push('\n');
out
}
fn token_expr(group: &TokenGroup) -> String {
match group.kind {
MatchKind::Word => {
let mut words: Vec<&str> = group.members.iter().map(String::as_str).collect();
words.sort_by(|a, b| super::longest_first(a, b));
let alts: Vec<String> = words.iter().map(|w| super::escape_oniguruma(w)).collect();
let flag = if group.case_insensitive { "i" } else { "" };
format!("token(/(?:{})/{flag})", alts.join("|"))
}
MatchKind::Symbol => {
let lits: Vec<String> = group.members.iter().map(|s| js_string(s)).collect();
if lits.len() == 1 {
format!("token({})", lits[0])
} else {
format!("token(choice({}))", lits.join(", "))
}
}
}
}
fn js_string(s: &str) -> String {
let mut out = String::with_capacity(s.len() + 2);
out.push('"');
for c in s.chars() {
if c == '\\' || c == '"' {
out.push('\\');
}
out.push(c);
}
out.push('"');
out
}
#[cfg(test)]
mod tests {
use super::*;
fn grammar_js() -> String {
let path = std::path::Path::new(env!("CARGO_MANIFEST_DIR"))
.join("../../editors/tree-sitter-eventb/grammar.js");
std::fs::read_to_string(&path).unwrap_or_else(|e| panic!("read {}: {e}", path.display()))
}
#[test]
fn generated_nodes_are_listed_in_token_choice() {
let model = Model::build();
let grammar = grammar_js();
for group in &model.groups {
if group.members.is_empty() {
continue;
}
let name = node_name(group);
assert!(
grammar.contains(&format!("$.{name},")),
"grammar.js `_token` is missing `$.{name}` (generated node has no place in the tree)"
);
}
}
#[test]
fn highlights_capture_every_node_and_the_structural_tokens() {
let model = Model::build();
let scm = render_highlights_region(&model);
for group in &model.groups {
if group.members.is_empty() {
continue;
}
let name = node_name(group);
let capture = capture_name(group.scope);
assert!(
scm.contains(&format!("({name}) @{capture}\n")),
"highlights.scm is missing `({name}) @{capture}`"
);
}
for fixed in [
"(comment) @comment",
"(string) @string",
"(number) @number",
"(label) @label",
"(identifier) @variable",
"@punctuation.bracket",
] {
assert!(scm.contains(fixed), "highlights.scm is missing `{fixed}`");
}
}
#[test]
fn tokens_manifest_lists_every_node_and_all_members() {
let model = Model::build();
let json: serde_json::Value =
serde_json::from_str(&tokens_manifest(&model)).expect("manifest is valid JSON");
let obj = json.as_object().expect("manifest is a JSON object");
for group in &model.groups {
if group.members.is_empty() {
continue;
}
let name = node_name(group);
let arr = obj
.get(name)
.unwrap_or_else(|| panic!("manifest missing node `{name}`"))
.as_array()
.unwrap_or_else(|| panic!("manifest `{name}` is not an array"));
let listed: Vec<&str> = arr.iter().map(|v| v.as_str().unwrap()).collect();
for m in &group.members {
assert!(
listed.contains(&m.as_str()),
"manifest `{name}` is missing spelling `{m}`"
);
}
}
assert!(
obj["keyword"]
.as_array()
.unwrap()
.iter()
.any(|v| v == "context")
);
assert!(
obj["builtin"]
.as_array()
.unwrap()
.iter()
.any(|v| v == "card")
);
assert!(
obj["operator_sym"]
.as_array()
.unwrap()
.iter()
.any(|v| v == "∈")
);
}
fn word_alternatives(region: &str, rule: &str) -> Vec<String> {
let line = region
.lines()
.find(|l| l.trim_start().starts_with(&format!("{rule}:")))
.unwrap_or_else(|| panic!("missing rule {rule}"));
let body = line
.split_once("(?:")
.and_then(|(_, rest)| rest.split_once(")/"))
.map(|(body, _)| body)
.unwrap_or_else(|| panic!("rule {rule} is not a `token(/(?:…)/…)` regex: {line}"));
body.split('|').map(str::to_string).collect()
}
#[test]
fn word_rules_are_longest_first() {
let model = Model::build();
let region = render_grammar_region(&model);
let mut pairs_checked = 0;
for group in &model.groups {
if !matches!(group.kind, MatchKind::Word) || group.members.is_empty() {
continue;
}
let rule = node_name(group);
let alts = word_alternatives(®ion, rule);
for (i, a) in alts.iter().enumerate() {
for (j, b) in alts.iter().enumerate() {
if i != j && b.len() > a.len() && b.starts_with(a.as_str()) {
pairs_checked += 1;
assert!(
j < i,
"in `{rule}`, longer `{b}` must precede its prefix `{a}`: {alts:?}"
);
}
}
}
}
assert!(
pairs_checked > 0,
"no prefix pair found to exercise ordering"
);
}
#[test]
fn symbol_rules_are_string_literals() {
let model = Model::build();
let region = render_grammar_region(&model);
let op_sym = region
.lines()
.find(|l| l.trim_start().starts_with("operator_sym:"))
.expect("operator_sym rule");
assert!(op_sym.contains("token(choice("));
assert!(!op_sym.contains("/(?:"));
assert!(op_sym.contains("\"∈\""));
assert!(op_sym.contains("\"<=>\""));
}
#[test]
fn js_string_escapes_backslashes() {
assert_eq!(js_string("\\/"), "\"\\\\/\""); assert_eq!(js_string("∈"), "\"∈\"");
}
}