1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345
//! Specification structure of a set of LaTeX commands.
//!
//! The specification will be passed to MiTeX for converting LaTeX code
//! correctly. For example, MiTeX Parser uses it to produce an AST that respect
//! the shape of commands.
//!
//! Note: since we need to process environments statically, users cannot
//! override the `\begin` and `\end` commands.
//!
//! See <https://github.com/mitex-rs/mitex/blob/main/docs/spec.typ> for detailed description.
use std::sync::Arc;
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
#[cfg(feature = "rkyv")]
use rkyv::{Archive, Deserialize as rDeser, Serialize as rSer};
pub mod preludes;
pub mod query;
mod stream;
pub use query::CommandSpecRepr as JsonCommandSpec;
/// An item of command specification. It is either a normal _command_ or an
/// _environment_.
/// See [Command Syntax] for concept of _command_.
/// See [Environment Syntax] for concept of _environment_.
///
/// [Command Syntax]: https://latexref.xyz/LaTeX-command-syntax.html
/// [Environment Syntax]: https://latexref.xyz/Environment-syntax.html
#[derive(Debug, Clone)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "rkyv", derive(Archive, rDeser, rSer))]
#[cfg_attr(feature = "rkyv-validation", archive(check_bytes))]
pub enum CommandSpecItem {
/// Specifies a TeX command
/// e.g. `\hat`, `\sum`, `\sqrt`
Cmd(CmdShape),
/// Specifies a TeX environment
/// e.g. `equation`, `matrix`
Env(EnvShape),
}
/// Command specification that contains a set of commands and environments.
#[derive(Debug, Clone)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "rkyv", derive(Archive, rDeser, rSer))]
#[cfg_attr(feature = "rkyv-validation", archive(check_bytes))]
pub struct CommandSpecRepr {
/// A map from command name to command specification
pub commands: fxhash::FxHashMap<String, CommandSpecItem>,
}
/// Command specification that is cheap to clone
#[derive(Debug, Clone)]
#[cfg_attr(feature = "rkyv", derive(Archive, rDeser, rSer))]
#[cfg_attr(feature = "rkyv-validation", archive(check_bytes))]
pub struct CommandSpec(Arc<CommandSpecRepr>);
#[cfg(feature = "rkyv")]
impl CommandSpec {
/// Serializes the command specification into bytes in rkyv format.
///
/// # Panics
/// Panics if rkyv doesn't work properly.
pub fn to_bytes(&self) -> Vec<u8> {
// Or you can customize your serialization for better performance
// and compatibility with #![no_std] environments
use rkyv::ser::{serializers::AllocSerializer, Serializer};
let mut serializer = AllocSerializer::<0>::default();
serializer.serialize_value(self.0.as_ref()).unwrap();
let bytes = serializer.into_serializer().into_inner();
bytes.into_vec()
}
/// Deserializes the command specification from bytes in rkyv format.
#[cfg(feature = "rkyv-validation")]
pub fn from_bytes(bytes: &[u8]) -> Self {
let s = stream::BytesModuleStream::from_slice(bytes);
Self(Arc::new(s.checkout_owned()))
}
/// # Safety
/// The data source must be trusted and valid.
pub unsafe fn from_bytes_unchecked(bytes: &[u8]) -> Self {
let s = stream::BytesModuleStream::from_slice(bytes);
Self(Arc::new(s.checkout_owned_unchecked()))
}
}
impl CommandSpec {
/// Create a new command specification
pub fn new(commands: fxhash::FxHashMap<String, CommandSpecItem>) -> Self {
Self(Arc::new(CommandSpecRepr { commands }))
}
/// Get an item by name
pub fn get(&self, name: &str) -> Option<&CommandSpecItem> {
self.0.commands.get(name)
}
/// Iterate all items
pub fn items(&self) -> impl Iterator<Item = (&str, &CommandSpecItem)> {
self.0.commands.iter().map(|(k, v)| (k.as_str(), v))
}
/// Get an item by name in kind of _command_
pub fn get_cmd(&self, name: &str) -> Option<&CmdShape> {
self.get(name).and_then(|item| {
if let CommandSpecItem::Cmd(item) = item {
Some(item)
} else {
None
}
})
}
/// Get an item by name in kind of _environment_
pub fn get_env(&self, name: &str) -> Option<&EnvShape> {
self.get(name).and_then(|item| {
if let CommandSpecItem::Env(item) = item {
Some(item)
} else {
None
}
})
}
}
/// Shape of a TeX command.
#[derive(Debug, Clone)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "rkyv", derive(Archive, rDeser, rSer))]
#[cfg_attr(feature = "rkyv-validation", archive(check_bytes))]
pub struct CmdShape {
/// Describes how we could match the arguments of a command item.
pub args: ArgShape,
/// Makes the command alias to some Typst handler.
/// For exmaple, alias `\prod` to Typst's `product`
pub alias: Option<String>,
}
/// Shape of a TeX envionment.
#[derive(Debug, Clone)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "rkyv", derive(Archive, rDeser, rSer))]
#[cfg_attr(feature = "rkyv-validation", archive(check_bytes))]
pub struct EnvShape {
/// Describes how we could match the arguments of an environment item.
pub args: ArgPattern,
/// Specifies how we could process items before passing them
/// to the Typst handler
pub ctx_feature: ContextFeature,
/// Makes the command alias to some Typst handler.
/// For exmaple, alias `\pmatrix` to a Typst function `pmat` in scope.
pub alias: Option<String>,
}
/// The character encoding used for argument matching
pub mod argument_kind {
/// The character used for matching argument in a term (curly group or
/// others)
pub const ARGUMENT_KIND_TERM: char = 't';
/// The character used for matching argument in a bracket group
pub const ARGUMENT_KIND_BRACKET: char = 'b';
/// The character used for matching argument in a parenthesis group
pub const ARGUMENT_KIND_PAREN: char = 'p';
}
/// A shared string that represents a glob pattern.
#[derive(Debug, Clone)]
#[cfg_attr(feature = "rkyv", derive(Archive, rDeser, rSer))]
#[cfg_attr(feature = "rkyv-validation", archive(check_bytes))]
pub struct GlobStr(pub Arc<str>);
impl From<&str> for GlobStr {
fn from(s: &str) -> Self {
Self(s.into())
}
}
#[cfg(feature = "serde")]
mod glob_str_impl {
use super::GlobStr;
use serde::{Deserialize, Deserializer, Serialize, Serializer};
impl Serialize for GlobStr {
fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
self.0.serialize(serializer)
}
}
impl<'de> Deserialize<'de> for GlobStr {
fn deserialize<D: Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
Ok(GlobStr(String::deserialize(deserializer)?.into()))
}
}
}
/// An efficient pattern used for argument matching.
///
/// There are four kinds of pattern. The most powerful one is
/// [`ArgPattern::Glob`], which matches an sequence of input as arguments. Among
/// these four kinds, [`ArgPattern::Glob`] can already match all possible inputs
/// in our use cases. But one should specify a fixed length pattern
/// ([`ArgPattern::FixedLenTerm`]), a range length pattern
/// ([`ArgPattern::RangeLenTerm`]), or a greedy pattern
/// ([`ArgPattern::Greedy`]) to achieve better performance.
///
/// Let us look at usage of a glob pattern by \sqrt, which is `{,b}t`.
///
/// - Example 1. For `\sqrt{2}{3}`, parser requires the pattern to match with an
/// encoded string `tt`. Here, `{,b}t` matches and yields the string `t`
/// (which corresponds to `{2}`).
///
/// - Example 2. For `\sqrt[1]{2}{2}`, parser requires the pattern to match with
/// an encoded string `btt`. Here, `{,b}t` matches and yields the string `bt`
/// (which corresponds to `[1]{2}`).
///
/// Kinds of item to match:
/// - Bracket/b: []
/// - Parenthesis/p: ()
/// - Term/t: any remaining terms, typically {} or a single char
///
/// Note: any prefix of the argument pattern are matched during the parse stage,
/// so you need to check whether it is complete in later stages.
#[derive(Debug, Clone)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "serde", serde(tag = "kind"))]
#[cfg_attr(feature = "rkyv", derive(Archive, rDeser, rSer))]
#[cfg_attr(feature = "rkyv-validation", archive(check_bytes))]
pub enum ArgPattern {
/// No arguments are passed, i.e. this is processed as a variable in Typst.
///
/// E.g. `\alpha` => `$alpha$`, where `\alpha` has an argument pattern of
/// `None`
#[cfg_attr(feature = "serde", serde(rename = "none"))]
None,
/// Fixed length pattern, equivalent to repeat `{,t}` for `x` times
///
/// E.g. `\hat x y` => `$hat(x) y$`, where `\hat` has an argument pattern of
/// `FixedLenTerm(1)`
///
/// E.g. `1 \sum\limits` => `$1 limits(sum)$`, where `\limits` has an
/// argument pattern of `FixedLenTerm(1)`
#[cfg_attr(feature = "serde", serde(rename = "fixed-len"))]
FixedLenTerm {
/// The length of the arguments should be matched
len: u8,
},
/// Range length pattern (matches as much as possible), equivalent to
/// repeat `t` for `x` times, then repeat `{,t}` for `y` times.
///
/// No example
#[cfg_attr(feature = "serde", serde(rename = "range-len"))]
RangeLenTerm {
/// The minimum length of the arguments should be matched
min: u8,
/// The maximum length of the arguments should be matched
max: u8,
},
/// Receives any items as much as possible, equivalent to `*`.
///
/// E.g. \over, \displaystyle
#[cfg_attr(feature = "serde", serde(rename = "greedy"))]
Greedy,
/// The most powerful pattern, but slightly slow.
/// Note that the glob must accept the whole prefix of the input.
///
/// E.g. \sqrt has a glob argument pattern of `{,b}t`
///
/// Description of the glob pattern:
/// - {,b}: first, it matches a bracket option, e.g. `\sqrt[3]`
/// - t: it then matches a single term, e.g. `\sqrt[3]{a}` or `\sqrt{a}`
#[cfg_attr(feature = "serde", serde(rename = "glob"))]
Glob(GlobStr),
}
// struct ArgShape(ArgPattern, Direction);
/// Shape of arguments with direction to match since.
///
/// Note: We currently only support
/// - `Direction::Right` with any `ArgPattern`
/// - `Direction::Left` with `ArgPattern::FixedLenTerm(1)`
/// - `Direction::Infix` with `ArgPattern::Greedy`
#[derive(Debug, Clone)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "serde", serde(tag = "kind"))]
#[cfg_attr(feature = "rkyv", derive(Archive, rDeser, rSer))]
#[cfg_attr(feature = "rkyv-validation", archive(check_bytes))]
pub enum ArgShape {
/// A command that associates with the right side of items.
///
/// E.g. `\hat`
#[cfg_attr(feature = "serde", serde(rename = "right"))]
Right {
/// The pattern to match the arguments
pattern: ArgPattern,
},
/// A command that associates with the left side of items, and with
/// `ArgPattern::FixedLenTerm(1)`.
///
/// E.g. `\limits`
#[cfg_attr(feature = "serde", serde(rename = "left1"))]
Left1,
/// A command that associates with both side of items, and with
/// `ArgPattern::Greedy`, also known as infix operators.
///
/// E.g. `\over`
#[cfg_attr(feature = "serde", serde(rename = "infix-greedy"))]
InfixGreedy,
}
/// A feature that specifies how to process the content of an environment.
#[derive(Debug, Clone)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "serde", serde(tag = "kind"))]
#[cfg_attr(feature = "rkyv", derive(Archive, rDeser, rSer))]
#[cfg_attr(feature = "rkyv-validation", archive(check_bytes))]
pub enum ContextFeature {
/// No special feature
#[cfg_attr(feature = "serde", serde(rename = "none"))]
None,
/// Parse content like math environments
#[cfg_attr(feature = "serde", serde(rename = "is-math"))]
IsMath,
/// Parse content like mat arguments
#[cfg_attr(feature = "serde", serde(rename = "is-matrix"))]
IsMatrix,
/// Parse content like cases
#[cfg_attr(feature = "serde", serde(rename = "is-cases"))]
IsCases,
/// Parse content like itemize
#[cfg_attr(feature = "serde", serde(rename = "is-itemize"))]
IsItemize,
/// Parse content like enumerate
#[cfg_attr(feature = "serde", serde(rename = "is-enumerate"))]
IsEnumerate,
}