xgrammar/compiler/
grammar_compiler.rs1use autocxx::prelude::*;
2
3use crate::{
4 CxxUniquePtr,
5 FFIGrammarCompiler,
6 compiler::CompiledGrammar,
7 cxx_int,
8 cxx_longlong,
9 cxx_utils,
10 grammar::{self, StructuralTagItem},
11 tokenizer_info::TokenizerInfo,
12};
13
14pub struct GrammarCompiler {
19 inner: CxxUniquePtr<FFIGrammarCompiler>,
20}
21
22impl GrammarCompiler {
23 pub fn new(
32 tokenizer_info: &TokenizerInfo,
33 max_threads: i32,
34 cache_enabled: bool,
35 cache_limit_bytes: isize,
36 ) -> Self {
37 let inner = cxx_utils::make_grammar_compiler(
38 tokenizer_info.ffi_ref(),
39 cxx_int(max_threads),
40 cache_enabled,
41 cxx_longlong(cache_limit_bytes as i64),
42 );
43 Self { inner }
44 }
45
46 pub fn compile_json_schema(
61 &mut self,
62 schema: &str,
63 any_whitespace: bool,
64 indent: Option<i32>,
65 separators: Option<(impl AsRef<str>, impl AsRef<str>)>,
66 strict_mode: bool,
67 max_whitespace_cnt: Option<i32>,
68 ) -> CompiledGrammar {
69 cxx::let_cxx_string!(schema_cxx = schema);
70 let has_indent = indent.is_some();
71 let indent_i32: i32 = indent.unwrap_or(0);
72 let has_separators = separators.is_some();
73 let (sep_comma, sep_colon) = if let Some((comma, colon)) = separators {
74 (comma.as_ref().to_string(), colon.as_ref().to_string())
75 } else {
76 (String::new(), String::new())
77 };
78 cxx::let_cxx_string!(sep_comma_cxx = sep_comma.as_str());
79 cxx::let_cxx_string!(sep_colon_cxx = sep_colon.as_str());
80
81 let unique_ptr = cxx_utils::compiler_compile_json_schema(
82 self.inner.as_mut().expect("GrammarCompiler inner is null"),
83 &schema_cxx,
84 any_whitespace,
85 has_indent,
86 cxx_int(indent_i32),
87 has_separators,
88 &sep_comma_cxx,
89 &sep_colon_cxx,
90 strict_mode,
91 max_whitespace_cnt.is_some(),
92 cxx_int(max_whitespace_cnt.unwrap_or(0)),
93 );
94 CompiledGrammar::from_unique_ptr(unique_ptr)
95 }
96
97 pub fn compile_builtin_json_grammar(&mut self) -> CompiledGrammar {
99 let unique_ptr = cxx_utils::compiler_compile_builtin_json(
100 self.inner.as_mut().expect("GrammarCompiler inner is null"),
101 );
102 CompiledGrammar::from_unique_ptr(unique_ptr)
103 }
104
105 pub fn compile_regex(
107 &mut self,
108 regex: &str,
109 ) -> CompiledGrammar {
110 cxx::let_cxx_string!(regex_cxx = regex);
111 let unique_ptr = cxx_utils::compiler_compile_regex(
112 self.inner.as_mut().expect("GrammarCompiler inner is null"),
113 ®ex_cxx,
114 );
115 CompiledGrammar::from_unique_ptr(unique_ptr)
116 }
117
118 pub fn compile_structural_tag(
124 &mut self,
125 tags: &[StructuralTagItem],
126 triggers: &[impl AsRef<str>],
127 ) -> CompiledGrammar {
128 use serde_json::json;
130 let mut tag_entries = Vec::new();
131 for tag in tags {
132 let schema_value: serde_json::Value =
133 serde_json::from_str(&tag.schema)
134 .expect("Invalid JSON schema in StructuralTagItem");
135 let content = json!({
136 "type": "json_schema",
137 "json_schema": schema_value
138 });
139 tag_entries.push(json!({
140 "type": "tag",
141 "begin": tag.begin,
142 "content": content,
143 "end": tag.end,
144 }));
145 }
146 let triggers_vec: Vec<String> =
147 triggers.iter().map(|t| t.as_ref().to_string()).collect();
148 let format_obj = json!({
149 "type": "triggered_tags",
150 "triggers": triggers_vec,
151 "tags": tag_entries,
152 });
153 let structural_tag_json = json!({
154 "type": "structural_tag",
155 "format": format_obj,
156 })
157 .to_string();
158
159 cxx::let_cxx_string!(structural_tag_str = structural_tag_json);
160 let unique_ptr = cxx_utils::compiler_compile_structural_tag(
161 self.inner.as_mut().expect("GrammarCompiler inner is null"),
162 &structural_tag_str,
163 );
164 CompiledGrammar::from_unique_ptr(unique_ptr)
165 }
166
167 pub fn compile_grammar(
169 &mut self,
170 grammar: &grammar::Grammar,
171 ) -> CompiledGrammar {
172 cxx::let_cxx_string!(error_out_cxx = "");
173 let unique_ptr = unsafe {
174 cxx_utils::compiler_compile_grammar_or_error(
175 self.inner.as_mut().expect("GrammarCompiler inner is null"),
176 grammar.ffi_ref(),
177 error_out_cxx.as_mut().get_unchecked_mut(),
178 )
179 };
180 if unique_ptr.is_null() {
181 let msg = error_out_cxx.to_string();
182 panic!("CompileGrammar threw: {}", msg);
183 }
184 CompiledGrammar::from_unique_ptr(unique_ptr)
185 }
186
187 pub fn compile_grammar_from_ebnf(
194 &mut self,
195 ebnf_string: &str,
196 root_rule_name: &str,
197 ) -> CompiledGrammar {
198 let grammar = grammar::Grammar::from_ebnf(ebnf_string, root_rule_name);
199 self.compile_grammar(&grammar)
200 }
201
202 pub fn clear_cache(&mut self) {
204 self.inner.as_mut().expect("GrammarCompiler inner is null").ClearCache();
205 }
206
207 pub fn get_cache_size_bytes(&self) -> i64 {
209 self.inner.as_ref().expect("GrammarCompiler inner is null").GetCacheSizeBytes().into()
210 }
211
212 pub fn cache_limit_bytes(&self) -> i64 {
214 self.inner.as_ref().expect("GrammarCompiler inner is null").CacheLimitBytes().into()
215 }
216}
217
218impl Drop for GrammarCompiler {
219 fn drop(&mut self) {
220 }
221}