#![allow(dead_code)]
use rowan::{Language, NodeOrToken, SyntaxNode};
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum Atom {
Env(String, Vec<Atom>),
Cmd(String, Vec<Atom>),
Group(Vec<Atom>),
Opt(Vec<Atom>),
Math(Vec<Atom>),
Verbatim,
}
#[derive(Clone, Copy, PartialEq, Eq)]
pub enum Cat {
Env,
Cmd,
Group,
Opt,
Math,
Transparent,
Drop,
}
pub trait Projector {
type Lang: Language;
fn cat(kind: <Self::Lang as Language>::Kind) -> Cat;
fn is_begin_or_end(kind: <Self::Lang as Language>::Kind) -> bool;
fn is_command_token(kind: <Self::Lang as Language>::Kind) -> bool;
fn is_word_token(kind: <Self::Lang as Language>::Kind) -> bool;
fn is_verbatim_token(kind: <Self::Lang as Language>::Kind) -> bool;
}
type Node<P> = SyntaxNode<<P as Projector>::Lang>;
pub fn project<P: Projector>(root: &Node<P>) -> Vec<Atom> {
project_node::<P>(root)
}
fn project_node<P: Projector>(node: &Node<P>) -> Vec<Atom> {
match P::cat(node.kind()) {
Cat::Cmd => vec![Atom::Cmd(
command_name::<P>(node),
project_command_args::<P>(node),
)],
Cat::Env => {
let body = node
.children_with_tokens()
.filter(|e| match e {
NodeOrToken::Node(n) => !P::is_begin_or_end(n.kind()),
NodeOrToken::Token(_) => true,
})
.flat_map(|e| project_elem::<P>(&e))
.collect();
vec![Atom::Env(env_name::<P>(node), body)]
}
Cat::Group => vec![Atom::Group(project_children::<P>(node))],
Cat::Opt => vec![Atom::Opt(project_children::<P>(node))],
Cat::Math => vec![Atom::Math(project_children::<P>(node))],
Cat::Transparent => project_children::<P>(node),
Cat::Drop => vec![],
}
}
fn project_children<P: Projector>(node: &Node<P>) -> Vec<Atom> {
node.children_with_tokens()
.flat_map(|e| project_elem::<P>(&e))
.collect()
}
fn project_command_args<P: Projector>(node: &Node<P>) -> Vec<Atom> {
let mut skipped_name = false;
node.children_with_tokens()
.flat_map(|e| {
if !skipped_name
&& let NodeOrToken::Token(t) = &e
&& P::is_command_token(t.kind())
{
skipped_name = true;
return Vec::new();
}
project_elem::<P>(&e)
})
.collect()
}
fn project_elem<P: Projector>(
elem: &NodeOrToken<Node<P>, rowan::SyntaxToken<P::Lang>>,
) -> Vec<Atom> {
match elem {
NodeOrToken::Node(n) => project_node::<P>(n),
NodeOrToken::Token(t) if P::is_verbatim_token(t.kind()) => vec![Atom::Verbatim],
NodeOrToken::Token(t) if P::is_command_token(t.kind()) => vec![Atom::Cmd(
t.text().trim_start_matches('\\').to_string(),
Vec::new(),
)],
NodeOrToken::Token(_) => vec![],
}
}
fn command_name<P: Projector>(node: &Node<P>) -> String {
node.children_with_tokens()
.filter_map(|e| e.into_token())
.find(|t| P::is_command_token(t.kind()))
.map(|t| t.text().trim_start_matches('\\').to_string())
.unwrap_or_default()
}
fn env_name<P: Projector>(node: &Node<P>) -> String {
node.descendants_with_tokens()
.filter_map(|e| e.into_token())
.find(|t| P::is_word_token(t.kind()))
.map(|t| t.text().to_string())
.unwrap_or_default()
}
pub fn render_lines(forest: &[Atom]) -> Vec<String> {
let mut out = Vec::new();
for atom in forest {
render_atom(atom, 0, &mut out);
}
out
}
fn render_atom(atom: &Atom, depth: usize, out: &mut Vec<String>) {
let pad = " ".repeat(depth);
let (head, children): (String, &[Atom]) = match atom {
Atom::Env(name, ch) => (format!("(env {name})"), ch),
Atom::Cmd(name, ch) => (format!("(cmd {name})"), ch),
Atom::Group(ch) => ("(group)".to_string(), ch),
Atom::Opt(ch) => ("(opt)".to_string(), ch),
Atom::Math(ch) => ("(math)".to_string(), ch),
Atom::Verbatim => ("(verbatim)".to_string(), &[]),
};
out.push(format!("{pad}{head}"));
for child in children {
render_atom(child, depth + 1, out);
}
}
pub fn lcs_len(a: &[String], b: &[String]) -> usize {
if a.is_empty() || b.is_empty() {
return 0;
}
let mut prev = vec![0usize; b.len() + 1];
for line_a in a {
let mut cur = vec![0usize; b.len() + 1];
for (j, line_b) in b.iter().enumerate() {
cur[j + 1] = if line_a == line_b {
prev[j] + 1
} else {
cur[j].max(prev[j + 1])
};
}
prev = cur;
}
prev[b.len()]
}
pub fn dice(a: &[String], b: &[String]) -> f64 {
let denom = a.len() + b.len();
if denom == 0 {
return 1.0;
}
2.0 * lcs_len(a, b) as f64 / denom as f64
}
pub enum Badness {}
impl Projector for Badness {
type Lang = badness::syntax::BadnessLang;
fn cat(kind: badness::syntax::SyntaxKind) -> Cat {
use badness::syntax::SyntaxKind::*;
match kind {
COMMAND => Cat::Cmd,
ENVIRONMENT => Cat::Env,
GROUP => Cat::Group,
OPTIONAL => Cat::Opt,
INLINE_MATH | DISPLAY_MATH => Cat::Math,
BEGIN | END | NAME_GROUP => Cat::Drop,
_ => Cat::Transparent,
}
}
fn is_begin_or_end(kind: badness::syntax::SyntaxKind) -> bool {
use badness::syntax::SyntaxKind::{BEGIN, END};
matches!(kind, BEGIN | END)
}
fn is_command_token(kind: badness::syntax::SyntaxKind) -> bool {
use badness::syntax::SyntaxKind::{CONTROL_SYMBOL, CONTROL_WORD};
matches!(kind, CONTROL_WORD | CONTROL_SYMBOL)
}
fn is_word_token(kind: badness::syntax::SyntaxKind) -> bool {
kind == badness::syntax::SyntaxKind::WORD
}
fn is_verbatim_token(kind: badness::syntax::SyntaxKind) -> bool {
use badness::syntax::SyntaxKind::{VERB, VERBATIM_BODY};
matches!(kind, VERB | VERBATIM_BODY)
}
}
pub fn project_badness(text: &str) -> Vec<Atom> {
let parsed = badness::parser::parse(text);
project::<Badness>(&parsed.syntax())
}
pub enum Texlab {}
impl Projector for Texlab {
type Lang = texlab_syntax::latex::LatexLanguage;
fn cat(kind: texlab_syntax::latex::SyntaxKind) -> Cat {
use texlab_syntax::latex::SyntaxKind::*;
match kind {
ENVIRONMENT => Cat::Env,
BEGIN | END => Cat::Drop,
FORMULA | EQUATION => Cat::Math,
CURLY_GROUP
| CURLY_GROUP_WORD
| CURLY_GROUP_WORD_LIST
| CURLY_GROUP_COMMAND
| CURLY_GROUP_KEY_VALUE => Cat::Group,
BRACK_GROUP | BRACK_GROUP_WORD | BRACK_GROUP_KEY_VALUE | MIXED_GROUP => Cat::Opt,
GENERIC_COMMAND
| PART
| CHAPTER
| SECTION
| SUBSECTION
| SUBSUBSECTION
| PARAGRAPH
| SUBPARAGRAPH
| ENUM_ITEM
| CITATION
| PACKAGE_INCLUDE
| CLASS_INCLUDE
| LATEX_INCLUDE
| BIBLATEX_INCLUDE
| BIBTEX_INCLUDE
| GRAPHICS_INCLUDE
| SVG_INCLUDE
| INKSCAPE_INCLUDE
| VERBATIM_INCLUDE
| IMPORT
| LABEL_DEFINITION
| LABEL_REFERENCE
| LABEL_REFERENCE_RANGE
| OLD_COMMAND_DEFINITION
| NEW_COMMAND_DEFINITION
| MATH_OPERATOR
| GLOSSARY_ENTRY_DEFINITION
| GLOSSARY_ENTRY_REFERENCE
| ACRONYM_DEFINITION
| ACRONYM_DECLARATION
| ACRONYM_REFERENCE
| THEOREM_DEFINITION_AMSTHM
| THEOREM_DEFINITION_THMTOOLS
| COLOR_REFERENCE
| COLOR_DEFINITION
| COLOR_SET_DEFINITION
| TIKZ_LIBRARY_IMPORT
| ENVIRONMENT_DEFINITION
| GRAPHICS_PATH
| CAPTION
| LABEL_NUMBER
| BIBITEM
| TOC_CONTENTS_LINE
| TOC_NUMBER_LINE => Cat::Cmd,
_ => Cat::Transparent,
}
}
fn is_begin_or_end(kind: texlab_syntax::latex::SyntaxKind) -> bool {
use texlab_syntax::latex::SyntaxKind::{BEGIN, END};
matches!(kind, BEGIN | END)
}
fn is_command_token(kind: texlab_syntax::latex::SyntaxKind) -> bool {
kind == texlab_syntax::latex::SyntaxKind::COMMAND_NAME
}
fn is_word_token(kind: texlab_syntax::latex::SyntaxKind) -> bool {
use texlab_syntax::latex::SyntaxKind::{KEY, WORD};
matches!(kind, WORD | KEY)
}
fn is_verbatim_token(kind: texlab_syntax::latex::SyntaxKind) -> bool {
kind == texlab_syntax::latex::SyntaxKind::VERBATIM
}
}
pub fn project_texlab(text: &str) -> Vec<Atom> {
let green = texlab_parser::parse_latex(text, &texlab_parser::SyntaxConfig::default());
let root = texlab_syntax::latex::SyntaxNode::new_root(green);
project::<Texlab>(&root)
}
pub fn texlab_has_error(text: &str) -> bool {
let green = texlab_parser::parse_latex(text, &texlab_parser::SyntaxConfig::default());
let root = texlab_syntax::latex::SyntaxNode::new_root(green);
root.descendants_with_tokens()
.any(|e| e.kind() == texlab_syntax::latex::SyntaxKind::ERROR)
}