xgrammar/compiler/
compiled_grammar.rs

1use std::pin::Pin;
2
3use autocxx::prelude::*;
4
5use crate::{
6    CxxUniquePtr, FFICompiledGrammar, Grammar, TokenizerInfo, cxx_ulong, cxx_ulonglong,
7    cxx_utils,
8};
9
10/// This is the primary object to store compiled grammar.
11///
12/// A CompiledGrammar can be used to construct GrammarMatcher to generate token masks efficiently.
13///
14/// Notes
15/// -----
16/// Do not construct this class directly, instead use GrammarCompiler to construct the object.
17pub struct CompiledGrammar {
18    inner: CxxUniquePtr<FFICompiledGrammar>,
19}
20
21impl CompiledGrammar {
22    /// The original grammar.
23    pub fn grammar(&self) -> Grammar {
24        let inner_ref = self.inner.as_ref().expect("CompiledGrammar inner is null");
25        Grammar::from_unique_ptr(inner_ref.GetGrammar().within_unique_ptr())
26    }
27
28    /// The tokenizer info associated with the compiled grammar.
29    pub fn tokenizer_info(&self) -> TokenizerInfo {
30        let inner_ref = self.inner.as_ref().expect("CompiledGrammar inner is null");
31        TokenizerInfo::from_unique_ptr(inner_ref.GetTokenizerInfo().within_unique_ptr())
32    }
33
34    /// The approximate memory usage of the compiled grammar in bytes.
35    pub fn memory_size_bytes(&self) -> usize {
36        // MemorySizeBytes() returns C size_t, which autocxx may represent as either:
37        // - primitive usize (some build environments)
38        // - cxx_ulong newtype (other build environments)
39        //
40        // We define a trait to handle both uniformly
41        trait ToUsize {
42            fn to_usize(self) -> usize;
43        }
44
45        impl ToUsize for usize {
46            fn to_usize(self) -> usize {
47                self
48            }
49        }
50
51        #[cfg(target_os = "windows")]
52        impl ToUsize for cxx_ulong {
53            fn to_usize(self) -> usize {
54                self.0 as usize
55            }
56        }
57
58        #[cfg(not(target_os = "windows"))]
59        impl ToUsize for cxx_ulong {
60            fn to_usize(self) -> usize {
61                let val: u64 = self.into();
62                val as usize
63            }
64        }
65
66        #[cfg(target_os = "windows")]
67        impl ToUsize for cxx_ulonglong {
68            fn to_usize(self) -> usize {
69                let val: u64 = self.0.into();
70                val as usize
71            }
72        }
73
74        #[cfg(not(target_os = "windows"))]
75        impl ToUsize for cxx_ulonglong {
76            fn to_usize(self) -> usize {
77                let val: u64 = self.into();
78                val as usize
79            }
80        }
81
82        let inner_ref = self.inner.as_ref().expect("CompiledGrammar inner is null");
83        let sz = inner_ref.MemorySizeBytes().to_usize();
84        sz
85    }
86    /// Serialize the compiled grammar to a JSON string.
87    /// It will serialize the compiled grammar without the tokenizer info,
88    /// since the tokenizer info is shared by multiple compiled grammars.
89    ///
90    /// Notes
91    /// -----
92    /// The metadata of the tokenizer info is serialized and will be checked when deserializing.
93    pub fn serialize_json(&self) -> String {
94        let inner_ref = self.inner.as_ref().expect("CompiledGrammar inner is null");
95        inner_ref.SerializeJSON().to_string()
96    }
97
98    /// Deserialize the compiled grammar from a JSON string and associate it with the specified
99    /// tokenizer info.
100    ///
101    /// Returns
102    /// - Ok(CompiledGrammar) on success
103    /// - Err(String) if the JSON is invalid, format mismatch, version mismatch, or tokenizer
104    ///   metadata does not match. The error string mirrors the C++ exception message.
105    pub fn deserialize_json(
106        json: &str,
107        tokenizer_info: &TokenizerInfo,
108    ) -> Result<Self, String> {
109        cxx::let_cxx_string!(json_cxx = json);
110        cxx::let_cxx_string!(error_out_cxx = "");
111        let unique_ptr = unsafe {
112            cxx_utils::compiled_grammar_deserialize_json_or_error(
113                &json_cxx,
114                tokenizer_info.ffi_ref(),
115                error_out_cxx.as_mut().get_unchecked_mut(),
116            )
117        };
118        if unique_ptr.is_null() {
119            return Err(error_out_cxx.to_string());
120        }
121        Ok(Self { inner: unique_ptr })
122    }
123
124    pub(crate) fn from_unique_ptr(inner: cxx::UniquePtr<FFICompiledGrammar>) -> Self {
125        Self { inner }
126    }
127
128    pub(crate) fn ffi_ref(&self) -> &FFICompiledGrammar {
129        self.inner.as_ref().expect("CompiledGrammar inner is null")
130    }
131}
132
133impl Drop for CompiledGrammar {
134    fn drop(&mut self) {
135    }
136}