use std::borrow::Cow;
use std::collections::HashMap;
use std::sync::LazyLock;
use serde::Deserialize;
use smol_str::SmolStr;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ArgKind {
Brace,
Bracket,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct ArgSpec {
pub required: bool,
pub kind: ArgKind,
pub prose: bool,
pub collapse: bool,
}
#[derive(Debug, Clone, PartialEq, Eq, Default)]
pub struct CommandSig {
pub args: Cow<'static, [ArgSpec]>,
pub sectioning: Option<u8>,
pub verbatim: bool,
pub rule: bool,
pub inline: bool,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum OutlineKind {
Float,
Theorem,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct EnvironmentSig {
pub args: Cow<'static, [ArgSpec]>,
pub verbatim_body: bool,
pub math: bool,
pub code: bool,
pub align: bool,
pub reflow: bool,
pub no_indent: bool,
pub list: bool,
pub block: bool,
pub outline: Option<OutlineKind>,
}
pub(crate) const fn derive_reflow(verbatim_body: bool, math: bool, code: bool) -> bool {
!(verbatim_body || math || code)
}
pub(crate) const fn derive_block(
block_explicit: bool,
math: bool,
list: bool,
no_indent: bool,
) -> bool {
block_explicit || math || list || no_indent
}
pub(crate) const fn arg(required: bool, kind: ArgKind, prose: bool, collapse: bool) -> ArgSpec {
ArgSpec {
required,
kind,
prose,
collapse,
}
}
pub(crate) const fn command(
args: &'static [ArgSpec],
sectioning: Option<u8>,
verbatim: bool,
rule: bool,
inline: bool,
) -> CommandSig {
CommandSig {
args: Cow::Borrowed(args),
sectioning,
verbatim,
rule,
inline,
}
}
#[allow(clippy::too_many_arguments)]
pub(crate) const fn environment(
args: &'static [ArgSpec],
verbatim_body: bool,
math: bool,
code: bool,
align: bool,
no_indent: bool,
list: bool,
block_explicit: bool,
outline: Option<OutlineKind>,
) -> EnvironmentSig {
EnvironmentSig {
args: Cow::Borrowed(args),
verbatim_body,
math,
code,
align,
reflow: derive_reflow(verbatim_body, math, code),
no_indent,
list,
block: derive_block(block_explicit, math, list, no_indent),
outline,
}
}
#[derive(Debug, Default, Clone, PartialEq, Eq)]
pub struct SignatureDb {
commands: HashMap<SmolStr, CommandSig>,
environments: HashMap<SmolStr, EnvironmentSig>,
}
impl SignatureDb {
pub fn command(&self, name: &str) -> Option<&CommandSig> {
self.commands.get(name)
}
pub fn environment(&self, name: &str) -> Option<&EnvironmentSig> {
self.environments.get(name)
}
pub fn command_names(&self) -> impl Iterator<Item = &str> {
self.commands.keys().map(SmolStr::as_str)
}
pub fn environment_names(&self) -> impl Iterator<Item = &str> {
self.environments.keys().map(SmolStr::as_str)
}
pub fn insert_command(&mut self, name: impl Into<SmolStr>, sig: CommandSig) {
self.commands.insert(name.into(), sig);
}
pub fn insert_environment(&mut self, name: impl Into<SmolStr>, sig: EnvironmentSig) {
self.environments.insert(name.into(), sig);
}
pub fn merge_from(&mut self, other: &SignatureDb) {
for (name, sig) in &other.commands {
self.commands.insert(name.clone(), sig.clone());
}
for (name, sig) in &other.environments {
self.environments.insert(name.clone(), sig.clone());
}
}
}
#[derive(Debug, Clone, Copy)]
pub struct Signatures<'a> {
user: &'a SignatureDb,
}
impl<'a> Signatures<'a> {
pub fn new(user: &'a SignatureDb) -> Self {
Self { user }
}
pub fn command(&self, name: &str) -> Option<&'a CommandSig> {
self.user
.command(name)
.or_else(|| builtin().command(name))
.or_else(|| cwl().command(name))
}
pub fn environment(&self, name: &str) -> Option<&'a EnvironmentSig> {
self.user
.environment(name)
.or_else(|| builtin().environment(name))
.or_else(|| cwl().environment(name))
}
}
const SIGNATURES_JSON: &str = include_str!("../../data/signatures.json");
static DB: LazyLock<SignatureDb> =
LazyLock::new(|| parse(SIGNATURES_JSON).expect("bundled data/signatures.json must be valid"));
pub fn builtin() -> &'static SignatureDb {
&DB
}
type CwlSigMap<V> = phf::Map<&'static str, V>;
include!(concat!(env!("OUT_DIR"), "/cwl_signatures.rs"));
#[derive(Debug, Clone, Copy)]
pub struct CwlDb;
impl CwlDb {
pub fn command(&self, name: &str) -> Option<&'static CommandSig> {
CWL_COMMANDS.get(name)
}
pub fn environment(&self, name: &str) -> Option<&'static EnvironmentSig> {
CWL_ENVIRONMENTS.get(name)
}
pub fn command_names(&self) -> impl Iterator<Item = &str> {
CWL_COMMANDS.keys().map(|name| &**name)
}
pub fn environment_names(&self) -> impl Iterator<Item = &str> {
CWL_ENVIRONMENTS.keys().map(|name| &**name)
}
pub fn command_sigs(&self) -> impl Iterator<Item = &'static CommandSig> {
CWL_COMMANDS.values()
}
pub fn environment_sigs(&self) -> impl Iterator<Item = &'static EnvironmentSig> {
CWL_ENVIRONMENTS.values()
}
}
static CWL: CwlDb = CwlDb;
pub fn cwl() -> &'static CwlDb {
&CWL
}
#[derive(Deserialize, Clone, Copy)]
#[serde(rename_all = "lowercase")]
enum RawArgKind {
Req,
Opt,
}
impl RawArgKind {
fn required(self) -> bool {
matches!(self, RawArgKind::Req)
}
fn kind(self) -> ArgKind {
match self {
RawArgKind::Req => ArgKind::Brace,
RawArgKind::Opt => ArgKind::Bracket,
}
}
}
#[derive(Deserialize)]
#[serde(untagged)]
enum RawArg {
Short(RawArgKind),
Full {
kind: RawArgKind,
#[serde(default)]
prose: bool,
#[serde(default)]
collapse: bool,
},
}
impl From<RawArg> for ArgSpec {
fn from(raw: RawArg) -> Self {
match raw {
RawArg::Short(kind) => ArgSpec {
required: kind.required(),
kind: kind.kind(),
prose: false,
collapse: false,
},
RawArg::Full {
kind,
prose,
collapse,
} => ArgSpec {
required: kind.required(),
kind: kind.kind(),
prose,
collapse,
},
}
}
}
#[derive(Deserialize, Default)]
#[serde(deny_unknown_fields)]
struct RawCommand {
#[serde(default)]
args: Vec<RawArg>,
#[serde(default)]
sectioning: Option<u8>,
#[serde(default)]
verbatim: bool,
#[serde(default)]
rule: bool,
#[serde(default)]
inline: bool,
}
impl From<RawCommand> for CommandSig {
fn from(raw: RawCommand) -> Self {
CommandSig {
args: Cow::Owned(raw.args.into_iter().map(ArgSpec::from).collect()),
sectioning: raw.sectioning,
verbatim: raw.verbatim,
rule: raw.rule,
inline: raw.inline,
}
}
}
#[derive(Deserialize, Clone, Copy)]
#[serde(rename_all = "lowercase")]
enum RawOutlineKind {
Float,
Theorem,
}
impl From<RawOutlineKind> for OutlineKind {
fn from(raw: RawOutlineKind) -> Self {
match raw {
RawOutlineKind::Float => OutlineKind::Float,
RawOutlineKind::Theorem => OutlineKind::Theorem,
}
}
}
#[derive(Deserialize, Default)]
#[serde(deny_unknown_fields)]
struct RawEnvironment {
#[serde(default)]
args: Vec<RawArg>,
#[serde(default, rename = "verbatimBody")]
verbatim_body: bool,
#[serde(default)]
math: bool,
#[serde(default)]
code: bool,
#[serde(default)]
align: bool,
#[serde(default, rename = "noIndent")]
no_indent: bool,
#[serde(default)]
list: bool,
#[serde(default)]
block: bool,
#[serde(default)]
outline: Option<RawOutlineKind>,
}
impl From<RawEnvironment> for EnvironmentSig {
fn from(raw: RawEnvironment) -> Self {
EnvironmentSig {
args: Cow::Owned(raw.args.into_iter().map(ArgSpec::from).collect()),
verbatim_body: raw.verbatim_body,
math: raw.math,
code: raw.code,
align: raw.align,
reflow: derive_reflow(raw.verbatim_body, raw.math, raw.code),
no_indent: raw.no_indent,
list: raw.list,
block: derive_block(raw.block, raw.math, raw.list, raw.no_indent),
outline: raw.outline.map(OutlineKind::from),
}
}
}
#[derive(Deserialize, Default)]
#[serde(deny_unknown_fields)]
struct RawDb {
#[serde(default, rename = "_comment")]
_comment: Option<serde::de::IgnoredAny>,
#[serde(default)]
commands: HashMap<String, RawCommand>,
#[serde(default)]
environments: HashMap<String, RawEnvironment>,
}
fn parse(json: &str) -> serde_json::Result<SignatureDb> {
let raw: RawDb = serde_json::from_str(json)?;
Ok(SignatureDb {
commands: raw
.commands
.into_iter()
.map(|(name, sig)| (SmolStr::new(name), sig.into()))
.collect(),
environments: raw
.environments
.into_iter()
.map(|(name, sig)| (SmolStr::new(name), sig.into()))
.collect(),
})
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn bundled_json_loads() {
let db = builtin();
assert!(db.command("section").is_some());
assert!(db.environment("tabular").is_some());
}
#[test]
fn loads_and_resolves_known_commands() {
let db = builtin();
assert_eq!(db.command("frac").map(|c| c.args.len()), Some(2));
assert!(db.command("frac").unwrap().args.iter().all(|a| a.required));
}
#[test]
fn optional_then_mandatory_order_preserved() {
let args = &builtin().command("includegraphics").unwrap().args;
assert_eq!(args.len(), 2);
assert_eq!(args[0].kind, ArgKind::Bracket);
assert!(!args[0].required);
assert_eq!(args[1].kind, ArgKind::Brace);
assert!(args[1].required);
}
#[test]
fn mixed_argument_order_round_trips() {
let args = &builtin().command("newcommand").unwrap().args;
let kinds: Vec<_> = args.iter().map(|a| a.kind).collect();
assert_eq!(
kinds,
vec![ArgKind::Brace, ArgKind::Bracket, ArgKind::Brace]
);
}
#[test]
fn outline_categories_assigned() {
let db = builtin();
assert_eq!(
db.environment("figure").unwrap().outline,
Some(OutlineKind::Float)
);
assert_eq!(
db.environment("table*").unwrap().outline,
Some(OutlineKind::Float)
);
assert_eq!(
db.environment("theorem").unwrap().outline,
Some(OutlineKind::Theorem)
);
assert_eq!(db.environment("center").unwrap().outline, None);
}
#[test]
fn sectioning_levels_assigned() {
let db = builtin();
assert_eq!(db.command("part").unwrap().sectioning, Some(0));
assert_eq!(db.command("section").unwrap().sectioning, Some(2));
assert_eq!(db.command("subsubsection").unwrap().sectioning, Some(4));
assert_eq!(db.command("section").unwrap().args.len(), 2);
assert!(db.command("textbf").unwrap().sectioning.is_none());
}
#[test]
fn verbatim_commands_flagged() {
assert!(builtin().command("verb").unwrap().verbatim);
assert!(builtin().command("lstinline").unwrap().verbatim);
assert!(!builtin().command("textbf").unwrap().verbatim);
}
#[test]
fn prose_arg_parses_from_both_forms() {
let db = parse(
r#"{ "commands": {
"short": { "args": ["req"] },
"full": { "args": ["opt", { "kind": "req", "prose": true }] }
} }"#,
)
.expect("valid prose schema");
let short = &db.command("short").unwrap().args;
assert!(!short[0].prose);
let full = &db.command("full").unwrap().args;
assert_eq!(full[0].kind, ArgKind::Bracket);
assert!(!full[0].prose); assert_eq!(full[1].kind, ArgKind::Brace);
assert!(full[1].prose);
}
#[test]
fn bundled_prose_args_flagged() {
let footnote = &builtin().command("footnote").unwrap().args;
assert!(footnote.iter().any(|a| a.prose));
let label = &builtin().command("label").unwrap().args;
assert!(label.iter().all(|a| !a.prose));
}
#[test]
fn environment_argument_shapes() {
let db = builtin();
let tabular = db.environment("tabular").unwrap();
assert_eq!(tabular.args.len(), 2);
assert_eq!(tabular.args[0].kind, ArgKind::Bracket); assert_eq!(tabular.args[1].kind, ArgKind::Brace); assert!(db.environment("verbatim").unwrap().args.is_empty());
}
#[test]
fn environment_flags_and_derived_reflow() {
let db = builtin();
let lstlisting = db.environment("lstlisting").unwrap();
assert!(lstlisting.verbatim_body);
assert!(!lstlisting.reflow);
let equation = db.environment("equation").unwrap();
assert!(equation.math);
assert!(!equation.reflow);
assert!(!equation.align);
let align = db.environment("align").unwrap();
assert!(align.math);
assert!(align.align);
let pmatrix = db.environment("pmatrix").unwrap();
assert!(pmatrix.math);
assert!(pmatrix.align);
let tabular = db.environment("tabular").unwrap();
assert!(!tabular.verbatim_body);
assert!(!tabular.math);
assert!(tabular.align);
assert!(!tabular.list);
for name in ["itemize", "enumerate", "description"] {
let env = db.environment(name).unwrap();
assert!(env.list, "{name} should be a list environment");
assert!(env.reflow);
assert!(!env.math);
}
for name in [
"Code",
"CodeInput",
"CodeOutput",
"Sinput",
"Soutput",
"Scode",
] {
let env = db.environment(name).unwrap();
assert!(env.verbatim_body, "{name} should be a verbatim environment");
assert!(!env.reflow);
}
}
#[test]
fn block_flag_is_explicit_or_derived() {
let db = builtin();
assert!(db.environment("figure").unwrap().block);
assert!(db.environment("center").unwrap().block);
assert!(db.environment("verbatim").unwrap().block);
assert!(db.environment("equation").unwrap().block);
assert!(db.environment("itemize").unwrap().block);
assert!(db.environment("document").unwrap().block);
assert!(db.environment("center").unwrap().reflow);
}
#[test]
fn doc_ltxdoc_signatures() {
let db = builtin();
for name in ["DocInput", "DescribeMacro", "DescribeEnv", "StopEventually"] {
let cmd = db
.command(name)
.unwrap_or_else(|| panic!("{name} signature"));
assert_eq!(cmd.args.len(), 1, "{name} arity");
assert!(cmd.args[0].required, "{name} arg is mandatory");
}
for name in ["macro", "environment"] {
let env = db.environment(name).unwrap_or_else(|| panic!("{name} env"));
assert_eq!(env.args.len(), 1, "{name} arity");
assert!(env.block, "{name} is a block env");
assert!(env.reflow, "{name} body reflows as prose");
assert!(!env.code, "{name} is not a code env");
}
for name in ["macrocode", "macrocode*"] {
let env = db.environment(name).unwrap_or_else(|| panic!("{name} env"));
assert!(env.code, "{name} is code");
assert!(!env.reflow, "{name} never reflows");
assert!(!env.verbatim_body, "{name} body is parsed, not verbatim");
assert!(env.block, "{name} is a block env");
}
}
#[test]
fn code_flag_parses_and_drives_reflow() {
let db = parse(
r#"{ "environments": {
"plain": {},
"codeish": { "code": true }
} }"#,
)
.expect("valid code schema");
let plain = db.environment("plain").unwrap();
assert!(!plain.code);
assert!(plain.reflow);
let codeish = db.environment("codeish").unwrap();
assert!(codeish.code);
assert!(!codeish.reflow);
assert!(!codeish.verbatim_body);
}
#[test]
fn unknown_names_resolve_to_none() {
let db = builtin();
assert!(db.command("definitelynotacommand").is_none());
assert!(db.environment("definitelynotanenv").is_none());
}
#[test]
fn rejects_unknown_fields() {
let err = parse(r#"{ "commands": { "x": { "sektioning": 2 } } }"#);
assert!(err.is_err());
}
#[test]
fn empty_document_is_valid() {
let db = parse("{}").expect("empty object is valid");
assert!(db.command("anything").is_none());
}
#[test]
fn cwl_tier_loads_and_covers_long_tail() {
let db = cwl();
assert!(db.command("siunitx").is_some() || db.command("SI").is_some());
assert!(
db.command_names().count() > 1000,
"the CWL subset should contribute a broad name set"
);
}
#[test]
fn cwl_entries_carry_only_arity_no_behavior_flags() {
let db = cwl();
for sig in db.command_sigs() {
assert!(sig.sectioning.is_none());
assert!(!sig.verbatim && !sig.rule && !sig.inline);
assert!(sig.args.iter().all(|a| !a.prose && !a.collapse));
}
for sig in db.environment_sigs() {
assert!(!sig.verbatim_body && !sig.math && !sig.code && !sig.align);
assert!(!sig.no_indent && !sig.list && !sig.block);
assert!(sig.outline.is_none());
}
}
#[test]
fn curated_builtin_wins_over_cwl_tier() {
let empty = SignatureDb::default();
let sigs = Signatures::new(&empty);
assert!(
cwl().command("section").is_some(),
"test premise: in CWL tier"
);
assert_eq!(sigs.command("section").unwrap().sectioning, Some(2));
}
#[test]
fn cwl_only_name_resolves_through_signatures() {
let empty = SignatureDb::default();
let sigs = Signatures::new(&empty);
let Some(name) = cwl()
.command_names()
.find(|n| builtin().command(n).is_none())
else {
panic!("expected at least one CWL-only command name");
};
let sig = sigs.command(name).expect("CWL-only name resolves");
assert!(sig.sectioning.is_none() && !sig.inline && !sig.verbatim);
}
}