use smol_str::SmolStr;
#[derive(Debug, Clone)]
#[non_exhaustive]
pub enum Grammar {
#[cfg(feature = "json")]
#[cfg_attr(docsrs, doc(cfg(feature = "json")))]
JsonSchema(serde_json::Value),
Lark(SmolStr),
#[cfg(feature = "regex")]
#[cfg_attr(docsrs, doc(cfg(feature = "regex")))]
Regex(RegexGrammar),
}
#[cfg(feature = "regex")]
#[cfg_attr(docsrs, doc(cfg(feature = "regex")))]
#[derive(Debug, Clone)]
pub struct RegexGrammar {
pattern: SmolStr,
compiled: regex::Regex,
anchored: regex::Regex,
}
#[cfg(feature = "regex")]
#[cfg_attr(docsrs, doc(cfg(feature = "regex")))]
const _: () = {
impl RegexGrammar {
#[cfg_attr(not(tarpaulin), inline(always))]
pub fn pattern(&self) -> &str {
&self.pattern
}
#[cfg_attr(not(tarpaulin), inline(always))]
pub fn compiled(&self) -> ®ex::Regex {
&self.compiled
}
#[cfg_attr(not(tarpaulin), inline(always))]
pub fn is_full_match(&self, input: &str) -> bool {
self.anchored.is_match(input)
}
}
impl Grammar {
pub fn regex(pattern: &str) -> Result<Self, regex::Error> {
let compiled = regex::Regex::new(pattern)?;
let parsed_hir = regex_syntax::Parser::new()
.parse(pattern)
.expect("regex::Regex::new accepted this pattern, regex-syntax must too");
let anchored_hir = regex_syntax::hir::Hir::concat(std::vec![
regex_syntax::hir::Hir::look(regex_syntax::hir::Look::Start),
parsed_hir,
regex_syntax::hir::Hir::look(regex_syntax::hir::Look::End),
]);
let anchored = regex::Regex::new(&std::format!("{anchored_hir}"))
.expect("HIR-emitted anchored variant of valid pattern must compile");
Ok(Self::Regex(RegexGrammar {
pattern: SmolStr::new(pattern),
compiled,
anchored,
}))
}
#[cfg_attr(not(tarpaulin), inline(always))]
pub fn as_regex(&self) -> Option<®ex::Regex> {
if let Self::Regex(rg) = self {
Some(rg.compiled())
} else {
None
}
}
#[cfg_attr(not(tarpaulin), inline(always))]
pub fn as_regex_pattern(&self) -> Option<&str> {
if let Self::Regex(rg) = self {
Some(rg.pattern())
} else {
None
}
}
#[cfg_attr(not(tarpaulin), inline(always))]
pub fn is_regex_full_match(&self, input: &str) -> Option<bool> {
if let Self::Regex(rg) = self {
Some(rg.is_full_match(input))
} else {
None
}
}
}
};
#[cfg(feature = "json")]
#[cfg_attr(docsrs, doc(cfg(feature = "json")))]
const _: () = {
impl Grammar {
#[cfg_attr(not(tarpaulin), inline(always))]
pub fn json_schema(value: serde_json::Value) -> Self {
Self::JsonSchema(value)
}
#[cfg_attr(not(tarpaulin), inline(always))]
pub fn as_json_schema(&self) -> Option<&serde_json::Value> {
if let Self::JsonSchema(v) = self {
Some(v)
} else {
None
}
}
#[cfg_attr(not(tarpaulin), inline(always))]
pub fn is_json_schema(&self) -> bool {
matches!(self, Self::JsonSchema(_))
}
}
};
impl Grammar {
#[cfg_attr(not(tarpaulin), inline(always))]
pub fn lark(src: impl Into<SmolStr>) -> Self {
Self::Lark(src.into())
}
#[cfg_attr(not(tarpaulin), inline(always))]
pub const fn kind(&self) -> &'static str {
match self {
#[cfg(feature = "json")]
Self::JsonSchema(_) => "json_schema",
Self::Lark(_) => "lark",
#[cfg(feature = "regex")]
Self::Regex(_) => "regex",
}
}
}
#[derive(thiserror::Error, Debug, Clone, PartialEq, Eq)]
#[error("engine does not support `{kind}` grammar (supported: {supported})")]
pub struct UnsupportedGrammar {
pub kind: &'static str,
pub supported: &'static str,
}
impl UnsupportedGrammar {
#[cfg_attr(not(tarpaulin), inline(always))]
pub const fn new(kind: &'static str, supported: &'static str) -> Self {
Self { kind, supported }
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::string::ToString;
#[cfg(feature = "json")]
use serde_json::json;
#[cfg(feature = "json")]
#[test]
fn grammar_kind_strings_are_stable_json() {
assert_eq!(Grammar::json_schema(json!({})).kind(), "json_schema");
}
#[test]
fn grammar_kind_strings_are_stable_lark() {
assert_eq!(Grammar::lark("start: \"a\"").kind(), "lark");
}
#[cfg(feature = "regex")]
#[test]
fn grammar_kind_strings_are_stable_regex() {
assert_eq!(Grammar::regex(r"[0-9]+").unwrap().kind(), "regex");
}
#[cfg(feature = "json")]
#[test]
fn as_json_schema_only_returns_json_variant() {
let js = Grammar::json_schema(json!({"type":"string"}));
assert!(js.as_json_schema().is_some());
assert!(js.is_json_schema());
let lark = Grammar::lark("start: \"x\"");
assert!(lark.as_json_schema().is_none());
assert!(!lark.is_json_schema());
}
#[test]
fn unsupported_grammar_message_includes_both_kinds() {
let err = UnsupportedGrammar::new("lark", "json_schema");
let msg = err.to_string();
assert!(msg.contains("lark"));
assert!(msg.contains("json_schema"));
}
#[cfg(feature = "json")]
#[test]
fn grammar_is_clone() {
let a = Grammar::json_schema(json!({"type":"object"}));
let b = a.clone();
assert_eq!(a.kind(), b.kind());
assert_eq!(a.as_json_schema(), b.as_json_schema());
}
#[cfg(feature = "regex")]
#[test]
fn regex_constructor_accepts_valid_pattern() {
let g = Grammar::regex(r"^[0-9]{4}-[0-9]{2}-[0-9]{2}$").expect("valid pattern");
assert!(matches!(g, Grammar::Regex(_)));
assert_eq!(g.kind(), "regex");
}
#[cfg(feature = "regex")]
#[test]
fn regex_constructor_rejects_invalid_pattern() {
let result = Grammar::regex(r"[a-z");
assert!(result.is_err(), "invalid pattern must reject");
}
#[cfg(feature = "regex")]
#[test]
fn regex_pattern_and_compiled_describe_same_language() {
let g = Grammar::regex(r"yes|no").unwrap();
assert_eq!(g.as_regex_pattern(), Some("yes|no"));
let re = g.as_regex().expect("Some for Regex variant");
assert_eq!(re.as_str(), "yes|no");
assert!(re.is_match("yes"));
assert!(re.is_match("no"));
assert!(
!re.is_match("YES"),
"case-insensitive flag MUST NOT be smuggleable"
);
assert!(!re.is_match("NO"));
}
#[cfg(feature = "regex")]
#[test]
fn as_regex_returns_borrow_for_regex_variant() {
let g = Grammar::regex(r"[0-9]+").unwrap();
let r = g.as_regex().expect("Some for Regex variant");
assert!(r.is_match("42"));
assert!(!r.is_match("abc"));
}
#[cfg(feature = "regex")]
#[test]
fn as_regex_pattern_returns_str_for_regex_variant() {
let g = Grammar::regex(r"[0-9]{4}-[0-9]{2}-[0-9]{2}").unwrap();
assert_eq!(g.as_regex_pattern(), Some(r"[0-9]{4}-[0-9]{2}-[0-9]{2}"));
}
#[cfg(feature = "regex")]
#[test]
fn is_full_match_rejects_substring_padded_match() {
let g = Grammar::regex(r"[0-9]{4}-[0-9]{2}-[0-9]{2}").unwrap();
assert_eq!(g.is_regex_full_match("2026-05-09"), Some(true));
assert_eq!(
g.is_regex_full_match("abc2026-05-09xyz"),
Some(false),
"leading + trailing junk"
);
assert_eq!(
g.is_regex_full_match("2026-05-09trailing"),
Some(false),
"trailing junk"
);
assert_eq!(
g.is_regex_full_match("leading2026-05-09"),
Some(false),
"leading junk"
);
assert_eq!(g.is_regex_full_match("not a date"), Some(false));
let g = Grammar::regex(r"yes|no").unwrap();
assert_eq!(g.is_regex_full_match("yes"), Some(true));
assert_eq!(g.is_regex_full_match("no"), Some(true));
assert_eq!(g.is_regex_full_match("yesno"), Some(false));
}
#[cfg(feature = "regex")]
#[test]
fn is_full_match_handles_verbose_mode_patterns() {
let g = Grammar::regex(r"(?x)[0-9]+ # trailing comment").unwrap();
assert_eq!(g.is_regex_full_match("123"), Some(true));
assert_eq!(g.is_regex_full_match("abc123xyz"), Some(false));
assert_eq!(g.is_regex_full_match("123 trailing"), Some(false));
}
#[cfg(feature = "regex")]
#[test]
fn is_full_match_handles_prefix_alternatives() {
let g = Grammar::regex(r"a|ab").unwrap();
assert_eq!(g.is_regex_full_match("a"), Some(true), "short arm");
assert_eq!(g.is_regex_full_match("ab"), Some(true), "long arm");
assert_eq!(g.is_regex_full_match("abc"), Some(false));
let g = Grammar::regex(r"|a").unwrap();
assert_eq!(g.is_regex_full_match(""), Some(true), "empty arm");
assert_eq!(g.is_regex_full_match("a"), Some(true), "non-empty arm");
assert_eq!(g.is_regex_full_match("aa"), Some(false));
let g = Grammar::regex(r"foo|foobar|foob").unwrap();
assert_eq!(g.is_regex_full_match("foo"), Some(true));
assert_eq!(g.is_regex_full_match("foob"), Some(true));
assert_eq!(g.is_regex_full_match("foobar"), Some(true));
assert_eq!(g.is_regex_full_match("foobaz"), Some(false));
}
#[cfg(all(feature = "regex", feature = "json"))]
#[test]
fn is_regex_full_match_returns_none_for_non_regex_variants() {
assert_eq!(
Grammar::json_schema(json!({})).is_regex_full_match("anything"),
None
);
assert_eq!(
Grammar::lark("start: \"x\"").is_regex_full_match("anything"),
None
);
}
#[cfg(all(feature = "regex", feature = "json"))]
#[test]
fn as_regex_returns_none_for_non_regex_variant() {
assert!(Grammar::json_schema(json!({})).as_regex().is_none());
assert!(Grammar::lark("start: \"x\"").as_regex().is_none());
assert!(Grammar::lark("start: \"x\"").as_regex_pattern().is_none());
}
}