xgrammar 0.4.2

Rust bindings for XGrammar
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
#![recursion_limit = "256"]
//! Safe, idiomatic Rust bindings for the [xgrammar](https://github.com/mlc-ai/xgrammar)
//! C++ library for constrained decoding of large language models.
//!
//! This crate wraps xgrammar's grammar compilation and token-level matching so
//! you can drive constrained generation (JSON schema, regex, BNF, structural
//! tags) from Rust while retaining the performance of the upstream C++
//! implementation.
//!
//! # Highlights
//!
//! - [`Grammar`], [`GrammarCompiler`], [`CompiledGrammar`], [`TokenizerInfo`] —
//!   compile grammars (BNF, JSON schema, regex, structural tags) against a
//!   tokenizer.
//! - [`GrammarMatcher`] — token-by-token constrained decoding, including
//!   [`GrammarMatcher::is_completed`] (root-rule match without stop token) and
//!   [`GrammarMatcher::fork`] for speculative / branching decoding.
//! - [`BatchGrammarMatcher`] — batched helpers over a slice of matchers:
//!   [`BatchGrammarMatcher::batch_fill_next_token_bitmask`] is parallel and
//!   thread-pool-backed; `batch_accept_token` / `batch_accept_string` /
//!   `batch_rollback` are sequential static helpers.
//!
//! See each item's documentation for usage details, including when a
//! `BatchGrammarMatcher` instance is required vs when associated functions can
//! be called directly.

mod error;
#[cfg(feature = "hf_hub")]
pub mod huggingface_hub;

use std::collections::HashMap;
use std::ffi::{CStr, CString};
use std::path::Path;
use std::str::FromStr;

use cpp::{cpp, cpp_class};
use dlpark::{traits::TensorView, versioned::SafeManagedTensorVersioned as DLTensor};
pub use error::XGrammarErr;
use serde_json::Value;
pub use tokenizers;

type Result<T> = std::result::Result<T, XGrammarErr>;

pub type VocabMap = std::collections::HashMap<String, u32>;

pub type TokenId = i32;

cpp! {{
    #include "xgrammar/xgrammar.h"
    #include <picojson.h>
    #include <cstring>

    using namespace std;
    using namespace xgrammar;
    using namespace picojson;

    struct MetadataFromHF {
        VocabType vocab_type;
        bool add_prefix_space;
    };

    struct GrammarResult {
        bool success;
        Grammar grammar;
        char* error_message;
    };

    struct CompiledGrammarResult {
        bool success;
        CompiledGrammar compiled_grammar;
        char* error_message;
    };

    struct MatcherResult {
        bool success;
        bool value;
        char* error_message;
    };
}}

cpp_class!(
    pub unsafe struct TokenizerInfo as "xgrammar::TokenizerInfo"
);
cpp_class!(
    pub unsafe struct GrammarCompiler as "xgrammar::GrammarCompiler"
);
cpp_class!(
    pub unsafe struct CompiledGrammar as "xgrammar::CompiledGrammar"
);
cpp_class!(
    pub unsafe struct Grammar as "xgrammar::Grammar"
);
cpp_class!(
    pub unsafe struct GrammarMatcher as "xgrammar::GrammarMatcher"
);
cpp_class!(
    pub unsafe struct BatchGrammarMatcher as "xgrammar::BatchGrammarMatcher"
);

#[repr(i32)]
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum VocabType {
    Raw = 0,
    ByteFallback = 1,
    ByteLevel = 2,
}

#[repr(C)]
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct MetadataFromHF {
    pub vocab_type: VocabType,
    pub add_prefix_space: bool,
}

/// Helper function to safely extract and free C++ error message.
///
/// # Safety
/// The error_message_ptr must be a valid C string pointer allocated with strdup
/// and must not be null.
unsafe fn extract_and_free_error_message(error_message_ptr: *mut std::os::raw::c_char) -> String {
    // SAFETY: The caller guarantees that error_message_ptr is a valid C string
    // allocated with strdup and is not null
    unsafe {
        let msg = CStr::from_ptr(error_message_ptr).to_string_lossy().into_owned();
        libc::free(error_message_ptr as *mut libc::c_void);
        msg
    }
}

#[repr(C)]
pub(crate) struct GrammarResult {
    pub success: bool,
    pub grammar: Grammar,
    pub error_message: *mut std::os::raw::c_char,
}

impl Drop for GrammarResult {
    fn drop(&mut self) {
        if !self.error_message.is_null() {
            unsafe {
                libc::free(self.error_message as *mut libc::c_void);
            }
        }
    }
}

impl From<GrammarResult> for Result<Grammar> {
    fn from(result: GrammarResult) -> Self {
        use std::mem::ManuallyDrop;

        // Wrap in ManuallyDrop to prevent automatic drop
        let result = ManuallyDrop::new(result);

        if result.success {
            // SAFETY: We're taking ownership and preventing double-free by using ManuallyDrop
            unsafe { Ok(std::ptr::read(&result.grammar)) }
        } else {
            // SAFETY: error_message is valid and we're taking ownership
            let error_msg = unsafe { extract_and_free_error_message(result.error_message) };
            Err(XGrammarErr::InvalidGrammar(error_msg))
        }
    }
}

#[repr(C)]
pub(crate) struct CompiledGrammarResult {
    pub success: bool,
    pub compiled_grammar: CompiledGrammar,
    pub error_message: *mut std::os::raw::c_char,
}

impl Drop for CompiledGrammarResult {
    fn drop(&mut self) {
        if !self.error_message.is_null() {
            unsafe {
                libc::free(self.error_message as *mut libc::c_void);
            }
        }
    }
}

impl From<CompiledGrammarResult> for Result<CompiledGrammar> {
    fn from(result: CompiledGrammarResult) -> Self {
        use std::mem::ManuallyDrop;

        // Wrap in ManuallyDrop to prevent automatic drop
        let result = ManuallyDrop::new(result);

        if result.success {
            // SAFETY: We're taking ownership and preventing double-free by using ManuallyDrop
            unsafe { Ok(std::ptr::read(&result.compiled_grammar)) }
        } else {
            // SAFETY: error_message is valid and we're taking ownership
            let error_msg = unsafe { extract_and_free_error_message(result.error_message) };
            Err(XGrammarErr::CompilationError(error_msg))
        }
    }
}

#[repr(C)]
pub(crate) struct MatcherResult {
    pub success: bool,
    pub value: bool,
    pub error_message: *mut std::os::raw::c_char,
}

impl Drop for MatcherResult {
    fn drop(&mut self) {
        if !self.error_message.is_null() {
            unsafe {
                libc::free(self.error_message as *mut libc::c_void);
            }
        }
    }
}

impl From<MatcherResult> for Result<bool> {
    fn from(result: MatcherResult) -> Self {
        use std::mem::ManuallyDrop;

        // Wrap in ManuallyDrop to prevent automatic drop
        let result = ManuallyDrop::new(result);

        if result.success {
            Ok(result.value)
        } else {
            // SAFETY: error_message is valid and we're taking ownership
            let error_msg = unsafe { extract_and_free_error_message(result.error_message) };
            Err(XGrammarErr::MatcherError(error_msg))
        }
    }
}

impl From<MatcherResult> for Result<()> {
    fn from(result: MatcherResult) -> Self {
        use std::mem::ManuallyDrop;

        // Wrap in ManuallyDrop to prevent automatic drop
        let result = ManuallyDrop::new(result);

        if result.success {
            Ok(())
        } else {
            // SAFETY: error_message is valid and we're taking ownership
            let error_msg = unsafe { extract_and_free_error_message(result.error_message) };
            Err(XGrammarErr::MatcherError(error_msg))
        }
    }
}

pub static HF_CONFIG_FILE: &str = "config.json";
pub static TOKENIZER_FILE: &str = "tokenizer.json";
pub static TOKENIZER_CONFIG_FILE: &str = "tokenizer_config.json";
pub static GENERATION_CONFIG_FILE: &str = "generation_config.json";
pub static TOKENIZER_ALLOW_PATTERN: &[&str] =
    &[TOKENIZER_FILE, TOKENIZER_CONFIG_FILE, GENERATION_CONFIG_FILE];

pub static TOKENIZER_MODEL_KEY: &str = "model";
pub static TOKENIZER_VOCAB_KEY: &str = "vocab";
pub static EOS_TOKEN_ID_KEY: &str = "eos_token_id";

impl TokenizerInfo {
    pub fn from_backend_str(
        backend_str: &str,
        vocab_size: Option<usize>,
        stop_token_ids: Vec<TokenId>,
    ) -> self::Result<Self> {
        let tokenizer = tokenizers::Tokenizer::from_str(backend_str).map_err(|e| {
            XGrammarErr::TokenizerParseFailed(format!("failed to parse tokenizer: {}", e))
        })?;
        let vocab_map = tokenizer.get_vocab(true); // with added special tokens
        let max_id = vocab_map
            .values()
            .max()
            .ok_or(XGrammarErr::InvalidTokenizerConfig("Vocab map is empty".to_string()))?;
        let tokenizer_vocab_size = std::cmp::max(vocab_map.len(), (max_id + 1) as usize);
        if let Some(vocab_size) = vocab_size {
            if vocab_size != tokenizer_vocab_size {
                tracing::warn!(
                    "Provided vocab_size {} does not match tokenizer vocab size {}. Using provided vocab_size.",
                    vocab_size,
                    tokenizer_vocab_size
                );
            }
        }
        let final_vocab_size = vocab_size.unwrap_or(tokenizer_vocab_size);
        let tokenizer_metadata = Self::detect_metadata_from_hf(backend_str);
        let vocab_type = tokenizer_metadata.vocab_type;
        let add_prefix_space = tokenizer_metadata.add_prefix_space;

        Self::new(vocab_map, vocab_type, final_vocab_size, stop_token_ids, add_prefix_space)
    }

    pub fn parse_eos_token(path: &Path, json_key: &str) -> Option<Vec<i32>> {
        let contents = std::fs::read_to_string(path).ok()?;
        let json: Value = serde_json::from_str(&contents).ok()?;
        match json.get(json_key) {
            Some(Value::Number(num)) if num.is_i64() => Some(vec![num.as_i64().unwrap() as i32]),
            Some(Value::Array(arr)) => {
                let mut eos_tokens = Vec::new();
                for item in arr {
                    if let Value::Number(num) = item {
                        if num.is_i64() {
                            eos_tokens.push(num.as_i64().unwrap() as i32);
                        } else {
                            return None;
                        }
                    } else {
                        return None;
                    }
                }
                Some(eos_tokens)
            }
            _ => None,
        }
    }

    pub fn from_path<P>(
        path: P,
        vocab_size: Option<usize>,
        stop_token_ids: Option<Vec<TokenId>>,
    ) -> Result<Self>
    where
        P: AsRef<Path>,
    {
        let path = path.as_ref();
        let tokenizer_json_path = path.join(TOKENIZER_FILE);
        let backend_str = std::fs::read_to_string(&tokenizer_json_path)
            .map_err(XGrammarErr::TokenizerLoadFailed)?;

        let eos_token = Self::parse_eos_token(&path.join(GENERATION_CONFIG_FILE), EOS_TOKEN_ID_KEY)
            .or_else(|| Self::parse_eos_token(&path.join(HF_CONFIG_FILE), EOS_TOKEN_ID_KEY));

        let mut stop_token_ids = stop_token_ids.unwrap_or_default();
        stop_token_ids.extend(eos_token.unwrap_or_default());
        stop_token_ids.dedup();

        Self::from_backend_str(&backend_str, vocab_size, stop_token_ids)
    }

    #[cfg(feature = "hf_hub")]
    pub fn from_pretrained(
        tokenizer_id: &str,
        revision: Option<String>,
        vocab_size: Option<usize>,
        stop_token_ids: Option<Vec<i32>>,
    ) -> Result<TokenizerInfo> {
        use huggingface_hub::{Params, Repo, RepoType, compile_glob_pattern, snapshot_download};

        let allow_patterns = compile_glob_pattern(TOKENIZER_ALLOW_PATTERN).map_err(|e| {
            XGrammarErr::TokenizerParseFailed(format!("Failed to compile glob patterns: {}", e))
        })?;
        let download_options =
            Some(Params { allow_patterns: Some(allow_patterns), ..Default::default() });

        let repo = Repo::with_revision(
            tokenizer_id.to_string(),
            RepoType::Model,
            revision.unwrap_or("main".to_string()),
        );
        let tokenizer_dir = snapshot_download(repo, download_options)?;
        Self::from_path(tokenizer_dir, vocab_size, stop_token_ids)
    }

    fn new(
        vocab_map: HashMap<String, u32>,
        vocab_type: VocabType,
        vocab_size: usize,
        stop_token_ids: Vec<i32>,
        add_prefix_space: bool,
    ) -> self::Result<Self> {
        // Ensure the vocab size is at least as large as the max id in the vocab map
        let mut encoded_vocab = vec![CString::new("").unwrap(); vocab_size];

        // Fill the encoded_vocab with tokens from the vocab_map
        for (token, idx) in vocab_map.iter() {
            assert!(
                (*idx as usize) < vocab_size,
                "Token ID {} exceeds vocab size {}",
                idx,
                vocab_size
            );
            encoded_vocab[*idx as usize] =
                CString::new(token.as_str()).expect("fail to convert a token to CString");
        }

        let encoded_vocab_ptr: Vec<_> = encoded_vocab.iter().map(|s| s.as_ptr()).collect();
        let encoded_vocab_ptr_ptr = encoded_vocab_ptr.as_ptr();
        let vocab_size_i32 = vocab_size as i32;
        let stop_token_ids_ptr = stop_token_ids.as_ptr();
        let stop_token_ids_len = stop_token_ids.len();

        Ok(cpp!(unsafe [
            encoded_vocab_ptr_ptr as "const char* const*",
            vocab_type as "xgrammar::VocabType",
            vocab_size_i32 as "int",
            stop_token_ids_ptr as "const int32_t*",
            stop_token_ids_len as "size_t",
            add_prefix_space as "bool"
        ] -> TokenizerInfo as "xgrammar::TokenizerInfo" {
            std::vector<std::string> encoded_vocab;
            for (int i = 0; i < vocab_size_i32; ++i) {
                encoded_vocab.push_back(std::string(encoded_vocab_ptr_ptr[i]));
            }
            std::vector<int32_t> stop_token_ids(stop_token_ids_ptr, stop_token_ids_ptr + stop_token_ids_len);

            return xgrammar::TokenizerInfo(
                encoded_vocab,
                vocab_type,
                vocab_size_i32,
                stop_token_ids,
                add_prefix_space
            );
        }))
    }

    // // VocabType GetVocabType() const;
    pub fn get_vocab_type(&self) -> VocabType {
        cpp!(unsafe [self as "const xgrammar::TokenizerInfo*"] -> VocabType as "xgrammar::VocabType" {
            return self->GetVocabType();
        })
    }

    // bool GetAddPrefixSpace() const;
    pub fn get_add_prefix_space(&self) -> bool {
        cpp!(unsafe [self as "const xgrammar::TokenizerInfo*"] -> bool as "bool" {
            return self->GetAddPrefixSpace();
        })
    }

    // int GetVocabSize() const;
    pub fn get_vocab_size(&self) -> i32 {
        cpp!(unsafe [self as "const xgrammar::TokenizerInfo*"] -> i32 as "int" {
            return self->GetVocabSize();
        })
    }

    // const std::vector<std::string>& GetDecodedVocab() const;
    pub fn get_decoded_vocab(&self) -> Vec<String> {
        // Avoid relying on layout-compatibility between `Vec<T>` and `std::vector<T>`
        // (Rust Vec is (ptr, cap, len); libstdc++ std::vector is (start, finish,
        // end_of_storage) — different semantics for the second/third word). Instead
        // the C++ side writes each element into a Rust-allocated `Vec<String>` via
        // the `vec_push_string` callback bridge.
        let mut out: Vec<String> = Vec::new();
        let out_ptr = &mut out as *mut Vec<String>;
        cpp!(unsafe [
            self as "const xgrammar::TokenizerInfo*",
            out_ptr as "void*"
        ] {
            const auto& vocab = self->GetDecodedVocab();
            for (const auto& s : vocab) {
                // Marshal as `uint8_t*` so the Rust side receives `*const u8`
                // directly, avoiding a `c_char`→`u8` cast whose necessity
                // varies by platform (c_char is i8 on x86_64 but u8 on arm64,
                // which makes the cast trigger `clippy::unnecessary_cast`
                // on arm64).
                const uint8_t* data = reinterpret_cast<const uint8_t*>(s.data());
                size_t len = s.size();
                rust!(XGR_TokInfo_DecodedVocab_push [
                    out_ptr: *mut Vec<String> as "void*",
                    data: *const u8 as "const uint8_t*",
                    len: usize as "size_t"
                ] {
                    // SAFETY: `data`/`len` point into the C++ std::string; the
                    // slice is only read during this call. `out_ptr` was
                    // obtained from a live `&mut Vec<String>` on the Rust side.
                    let slice = unsafe { std::slice::from_raw_parts(data, len) };
                    // Must be `from_utf8_lossy`, not `from_utf8_unchecked`:
                    // xgrammar's ByteFallback / ByteLevel decoders (see
                    // thirdparty/xgrammar/cpp/tokenizer_info.cc `DecodeToken`)
                    // can return single raw bytes (e.g. `<0x80>` → 0x80), which
                    // are not valid UTF-8. `unchecked` would be UB here.
                    let s = String::from_utf8_lossy(slice).into_owned();
                    unsafe { (*out_ptr).push(s) };
                });
            }
        });
        out
    }

    fn detect_metadata_from_hf(backend_str: &str) -> MetadataFromHF {
        let backend_str =
            CString::new(backend_str).expect("Failed to convert backend_str to CString");
        let backend_str_ptr = backend_str.as_ptr();

        cpp!(unsafe [backend_str_ptr as "const char*"] -> MetadataFromHF as "MetadataFromHF" {
            const std::string &backend_str(backend_str_ptr);
            std::string metadata_str = TokenizerInfo::DetectMetadataFromHF(backend_str);
            picojson::value v;
            std::string err = picojson::parse(v, metadata_str);
            if (!err.empty()) {
                throw std::runtime_error("Failed to parse metadata: " + err);
            }
            const picojson::object& metadata = v.get<picojson::object>();

            MetadataFromHF metadata_from_hf;
            metadata_from_hf.vocab_type = static_cast<xgrammar::VocabType>(metadata["vocab_type"].get<double>());
            metadata_from_hf.add_prefix_space = metadata["add_prefix_space"].get<bool>();
            return metadata_from_hf;
        })
    }
}

impl CompiledGrammar {
    pub fn get_grammar(&self) -> Grammar {
        cpp!(unsafe [self as "const xgrammar::CompiledGrammar*"] -> Grammar as "xgrammar::Grammar" {
            return self->GetGrammar();
        })
    }

    /// Return the tokenizer info associated with this compiled grammar.
    pub fn get_tokenizer_info(&self) -> TokenizerInfo {
        cpp!(unsafe [self as "const xgrammar::CompiledGrammar*"] -> TokenizerInfo as "xgrammar::TokenizerInfo" {
            return self->GetTokenizerInfo();
        })
    }

    /// Return the approximate memory usage of the grammar in bytes.
    pub fn memory_size_bytes(&self) -> usize {
        cpp!(unsafe [self as "const xgrammar::CompiledGrammar*"] -> usize as "size_t" {
            return self->MemorySizeBytes();
        })
    }
}

impl GrammarCompiler {
    /// Create a new GrammarCompiler with default parameters.
    ///
    /// The GrammarCompiler is a grammar compilation utility that compiles various types of
    /// grammars into CompiledGrammar objects. It is associated with a specific tokenizer
    /// and supports caching of grammar compilation results.
    ///
    /// # Arguments
    /// * `tokenizer_info` - The tokenizer info to use for the grammar compiler
    ///
    /// # Returns
    /// * A new GrammarCompiler instance with default settings (max_threads: 1, cache enabled)
    pub fn new(tokenizer_info: &TokenizerInfo) -> Self {
        Self::with(tokenizer_info, None, None, None)
    }

    /// Create a new GrammarCompiler with custom parameters.
    ///
    /// This allows fine-grained control over compilation behavior including thread usage,
    /// caching, and memory limits.
    ///
    /// # Arguments
    /// * `tokenizer_info` - The tokenizer info to use for the grammar compiler
    /// * `max_threads` - The maximum number of threads to use for parallel compilation (default: 1)
    /// * `cache_enabled` - Whether to enable caching of compiled grammars (default: true)
    /// * `max_memory_bytes` - The maximum memory in bytes to use for caching. Use None for unlimited.
    ///
    /// # Returns
    /// * A new GrammarCompiler instance with the specified settings
    pub fn with(
        tokenizer_info: &TokenizerInfo,
        max_threads: Option<usize>,
        cache_enabled: Option<bool>,
        max_memory_bytes: Option<usize>,
    ) -> Self {
        let max_threads = max_threads.unwrap_or(1) as i32;
        let cache_enabled = cache_enabled.unwrap_or(true);
        let max_memory_bytes: i64 = max_memory_bytes.map(|v| v as i64).unwrap_or(-1);

        let grammar_compiler = cpp!(unsafe [
            tokenizer_info as "const xgrammar::TokenizerInfo*",
            max_threads as "int",
            cache_enabled as "bool",
            max_memory_bytes as "long long"
        ] -> GrammarCompiler as "xgrammar::GrammarCompiler" {
            return xgrammar::GrammarCompiler(
                *tokenizer_info,
                max_threads,
                cache_enabled,
                max_memory_bytes
            );
        });

        grammar_compiler
    }

    /// Compile a Grammar object into a CompiledGrammar.
    ///
    /// This method takes a Grammar object (which can be created from EBNF, JSON schema,
    /// regex, or structural tags) and compiles it for use with a GrammarMatcher.
    ///
    /// # Arguments
    /// * `grammar` - The grammar to compile
    ///
    /// # Returns
    /// * `Ok(CompiledGrammar)` - A compiled grammar that can be used with GrammarMatcher
    /// * `Err(XGrammarErr)` - Error if the grammar compilation fails
    ///
    /// # Errors
    /// * Returns error if the grammar is invalid or compilation fails
    ///
    /// # Example
    /// ```
    /// # use xgrammar::{Grammar, GrammarCompiler, TokenizerInfo};
    /// # fn example(tokenizer_info: &TokenizerInfo) -> xgrammar::Result<()> {
    /// let compiler = GrammarCompiler::new(tokenizer_info);
    /// let grammar = Grammar::builtin_json_grammar();
    /// let compiled = compiler.compile_grammar(&grammar)?;
    /// # Ok(())
    /// # }
    /// ```
    pub fn compile_grammar(&self, grammar: &Grammar) -> Result<CompiledGrammar> {
        let result = cpp!(unsafe [
            self as "xgrammar::GrammarCompiler*",
            grammar as "const xgrammar::Grammar*"
        ] -> CompiledGrammarResult as "CompiledGrammarResult" {
            try {
                auto compiled = self->CompileGrammar(*grammar);
                return {true, compiled, nullptr};
            } catch (const std::exception& e) {
                return {false, CompiledGrammar(NullObj()), strdup(e.what())};
            }
        });

        result.into()
    }

    /// Compile a grammar for standard JSON format.
    ///
    /// This is a convenience method that returns a compiled grammar for parsing
    /// any valid JSON without schema constraints.
    ///
    /// # Returns
    /// * `Ok(CompiledGrammar)` - A compiled grammar that matches standard JSON format
    /// * `Err(XGrammarErr)` - Error if the grammar compilation fails
    ///
    /// # Errors
    /// * Returns error if the builtin JSON grammar compilation fails (unlikely)
    ///
    /// # Example
    /// ```
    /// # use xgrammar::{GrammarCompiler, TokenizerInfo};
    /// # fn example(tokenizer_info: &TokenizerInfo) -> xgrammar::Result<()> {
    /// let compiler = GrammarCompiler::new(tokenizer_info);
    /// let compiled = compiler.compile_builtin_json_grammar()?;
    /// # Ok(())
    /// # }
    /// ```
    pub fn compile_builtin_json_grammar(&self) -> Result<CompiledGrammar> {
        let result = cpp!(unsafe [self as "xgrammar::GrammarCompiler*"] -> CompiledGrammarResult as "CompiledGrammarResult" {
            try {
                auto compiled = self->CompileBuiltinJSONGrammar();
                return {true, compiled, nullptr};
            } catch (const std::exception& e) {
                return {false, CompiledGrammar(NullObj()), strdup(e.what())};
            }
        });

        result.into()
    }

    /// Compile a grammar from a JSON schema string.
    ///
    /// This method compiles a JSON schema specification into a grammar that enforces
    /// the schema constraints during text generation.
    ///
    /// # Arguments
    /// * `schema` - The JSON schema string to compile
    /// * `any_whitespace` - Whether to allow flexible whitespace in the JSON output. None uses true
    /// * `indent` - Number of spaces for indentation. None means no indentation
    /// * `separators` - Custom separators as (object_separator, array_separator), e.g., (":", ","). None uses default separators
    /// * `strict_mode` - Whether to enforce strict JSON schema validation. None uses true
    /// * `max_whitespace_cnt` - Maximum number of consecutive whitespace characters allowed. None means no limit
    ///
    /// # Returns
    /// * `Ok(CompiledGrammar)` - A compiled grammar that can be used with GrammarMatcher
    /// * `Err(XGrammarErr)` - Error if the JSON schema is invalid or compilation fails
    ///
    /// # Errors
    /// * Returns error if the JSON schema is invalid
    /// * Returns error if the schema cannot be compiled
    ///
    /// # Example
    /// ```
    /// # use xgrammar::{GrammarCompiler, TokenizerInfo};
    /// # fn example(tokenizer_info: &TokenizerInfo) -> xgrammar::Result<()> {
    /// let compiler = GrammarCompiler::new(tokenizer_info);
    /// let schema = r#"{"type": "object", "properties": {"name": {"type": "string"}}}"#;
    /// let compiled = compiler.compile_json_schema(schema, None, None, None, None, None)?;
    /// # Ok(())
    /// # }
    /// ```
    pub fn compile_json_schema(
        &self,
        schema: &str,
        any_whitespace: Option<bool>,
        indent: Option<i32>,
        separators: Option<(String, String)>,
        strict_mode: Option<bool>,
        max_whitespace_cnt: Option<i32>,
    ) -> Result<CompiledGrammar> {
        let schema_cstring = CString::new(schema).expect("Failed to convert schema to CString");
        let schema_ptr = schema_cstring.as_ptr();
        let any_whitespace = any_whitespace.unwrap_or(true);
        let strict_mode = strict_mode.unwrap_or(true);
        let has_indent = indent.is_some();
        let indent_value = indent.unwrap_or(0);
        let has_separators = separators.is_some();
        let has_max_whitespace_cnt = max_whitespace_cnt.is_some();
        let max_whitespace_cnt_value = max_whitespace_cnt.unwrap_or(0);

        let (_obj_sep_cstring, _array_sep_cstring, obj_sep_ptr, array_sep_ptr) =
            if let Some((obj_sep, array_sep)) = separators {
                let obj_sep_cstring =
                    CString::new(obj_sep).expect("Failed to convert object separator to CString");
                let array_sep_cstring =
                    CString::new(array_sep).expect("Failed to convert array separator to CString");
                let obj_sep_ptr = obj_sep_cstring.as_ptr();
                let array_sep_ptr = array_sep_cstring.as_ptr();
                (Some(obj_sep_cstring), Some(array_sep_cstring), obj_sep_ptr, array_sep_ptr)
            } else {
                (None, None, std::ptr::null(), std::ptr::null())
            };

        let result = cpp!(unsafe [
            self as "xgrammar::GrammarCompiler*",
            schema_ptr as "const char*",
            any_whitespace as "bool",
            has_indent as "bool",
            indent_value as "int",
            has_separators as "bool",
            obj_sep_ptr as "const char*",
            array_sep_ptr as "const char*",
            strict_mode as "bool",
            has_max_whitespace_cnt as "bool",
            max_whitespace_cnt_value as "int"
        ] -> CompiledGrammarResult as "CompiledGrammarResult" {
            try {
                std::string schema_str(schema_ptr);
                std::optional<int> opt_indent = has_indent ? std::make_optional(indent_value) : std::nullopt;
                std::optional<std::pair<std::string, std::string>> opt_separators;

                if (has_separators) {
                    opt_separators = std::make_pair(std::string(obj_sep_ptr), std::string(array_sep_ptr));
                } else {
                    opt_separators = std::nullopt;
                }

                std::optional<int> opt_max_whitespace_cnt = has_max_whitespace_cnt ? std::make_optional(max_whitespace_cnt_value) : std::nullopt;

                auto compiled = self->CompileJSONSchema(schema_str, any_whitespace, opt_indent, opt_separators, strict_mode, opt_max_whitespace_cnt);
                return {true, compiled, nullptr};
            } catch (const std::exception& e) {
                return {false, CompiledGrammar(NullObj()), strdup(e.what())};
            }
        });

        result.into()
    }

    /// Compile a grammar from a regular expression pattern.
    ///
    /// This method compiles a regex pattern into a grammar that matches text
    /// conforming to the specified pattern.
    ///
    /// # Arguments
    /// * `regex` - The regex pattern string to compile
    ///
    /// # Returns
    /// * `Ok(CompiledGrammar)` - A compiled grammar that can be used with GrammarMatcher
    /// * `Err(XGrammarErr)` - Error if the regex pattern is invalid or compilation fails
    ///
    /// # Errors
    /// * Returns error if the regex pattern is invalid
    /// * Returns error if the regex cannot be compiled
    ///
    /// # Example
    /// ```
    /// # use xgrammar::{GrammarCompiler, TokenizerInfo};
    /// # fn example(tokenizer_info: &TokenizerInfo) -> xgrammar::Result<()> {
    /// let compiler = GrammarCompiler::new(tokenizer_info);
    /// let compiled = compiler.compile_regex(r"[a-z]+@[a-z]+\.[a-z]+")?;
    /// # Ok(())
    /// # }
    /// ```
    pub fn compile_regex(&self, regex: &str) -> Result<CompiledGrammar> {
        let regex_cstring = CString::new(regex).expect("Failed to convert regex to CString");
        let regex_ptr = regex_cstring.as_ptr();

        let result = cpp!(unsafe [
            self as "xgrammar::GrammarCompiler*",
            regex_ptr as "const char*"
        ] -> CompiledGrammarResult as "CompiledGrammarResult" {
            try {
                std::string regex_str(regex_ptr);
                auto compiled = self->CompileRegex(regex_str);
                return {true, compiled, nullptr};
            } catch (const std::exception& e) {
                return {false, CompiledGrammar(NullObj()), strdup(e.what())};
            }
        });

        result.into()
    }

    /// Clear the internal cache of compiled grammars.
    /// This frees up memory used by cached compiled grammars.
    pub fn clear_cache(&self) {
        cpp!(unsafe [self as "xgrammar::GrammarCompiler*"] {
            self->ClearCache();
        })
    }

    /// Return the approximate memory usage of the compiler cache in bytes.
    ///
    /// # Returns
    /// * The current cache size in bytes
    pub fn get_cache_size_bytes(&self) -> i64 {
        cpp!(unsafe [self as "const xgrammar::GrammarCompiler*"] -> i64 as "long long" {
            return self->GetCacheSizeBytes();
        })
    }

    /// Return the cache limit in bytes. -1 means unlimited.
    ///
    /// # Returns
    /// * The cache limit in bytes, or -1 for unlimited
    pub fn cache_limit_bytes(&self) -> i64 {
        cpp!(unsafe [self as "const xgrammar::GrammarCompiler*"] -> i64 as "long long" {
            return self->CacheLimitBytes();
        })
    }

    /// Compile a grammar from a structural tag JSON string.
    ///
    /// This method compiles a structural tag specification provided as a JSON string into
    /// a grammar that can be used with a GrammarMatcher. The structural tag allows for
    /// structured text generation with specific formatting tags and schemas.
    ///
    /// # Arguments
    /// * `structural_tag_json` - A JSON string specifying the structural tag configuration.
    ///   The JSON should contain the structural tag items and triggers.
    ///
    /// # Returns
    /// * `Ok(CompiledGrammar)` - A compiled grammar that can be used with GrammarMatcher
    /// * `Err(XGrammarErr)` - Error if the structural tag is invalid or compilation fails
    ///
    /// # Errors
    /// * Returns error if the structural tag JSON is invalid
    /// * Returns error if the structural tag cannot be compiled
    ///
    /// # Example
    /// ```no_run
    /// # use xgrammar::{GrammarCompiler, TokenizerInfo};
    /// # fn example(tokenizer_info: &TokenizerInfo) -> xgrammar::Result<()> {
    /// let compiler = GrammarCompiler::new(tokenizer_info);
    /// let structural_tag_json = r#"{"tags": [{"begin": "<start>", "schema": "{}", "end": "</start>"}], "triggers": ["trigger1"]}"#;
    /// let compiled_grammar = compiler.compile_structural_tag(structural_tag_json)?;
    /// # Ok(())
    /// # }
    /// ```
    pub fn compile_structural_tag(&self, structural_tag_json: &str) -> Result<CompiledGrammar> {
        let structural_tag_json_cstring = CString::new(structural_tag_json)
            .expect("Failed to convert structural_tag_json to CString");
        let structural_tag_json_ptr = structural_tag_json_cstring.as_ptr();

        let result = cpp!(unsafe [
            self as "xgrammar::GrammarCompiler*",
            structural_tag_json_ptr as "const char*"
        ] -> CompiledGrammarResult as "CompiledGrammarResult" {
            try {
                std::string structural_tag_json_str(structural_tag_json_ptr);
                auto compiled = self->CompileStructuralTag(structural_tag_json_str);
                return {true, compiled, nullptr};
            } catch (const std::exception& e) {
                return {false, CompiledGrammar(NullObj()), strdup(e.what())};
            }
        });

        result.into()
    }
}

/// Represents a context-free grammar for grammar-guided text generation.
///
/// The Grammar struct supports Extended Backus-Naur Form (EBNF) grammar specifications
/// following the GBNF specification from llama.cpp. It provides flexible grammar generation
/// and manipulation for constrained text generation tasks.
///
/// # Construction Methods
///
/// Grammar can be constructed from various sources:
/// - [`Grammar::from_ebnf`]: From EBNF grammar strings
/// - [`Grammar::from_json_schema`]: From JSON schema specifications
/// - [`Grammar::from_regex`]: From regular expression patterns
/// - [`Grammar::from_structural_tag`]: From structural tags with embedded schemas
/// - [`Grammar::builtin_json_grammar`]: Standard JSON grammar
///
/// # Grammar Operations
///
/// Multiple grammars can be combined using:
/// - [`Grammar::union`]: Creates a grammar matching any of the input grammars (equivalent to `|` operator)
/// - [`Grammar::concat`]: Creates a grammar matching concatenated sequences (equivalent to `+` operator)
impl Grammar {
    /// Construct a grammar from an EBNF-formatted string.
    ///
    /// This method creates a context-free grammar from an Extended Backus-Naur Form (EBNF)
    /// specification. The grammar follows the GBNF specification from llama.cpp.
    ///
    /// # Arguments
    /// * `ebnf_string` - The EBNF grammar specification string
    /// * `root_rule_name` - The name of the root rule to use as the entry point. If None, uses "root"
    ///
    /// # Returns
    /// * `Ok(Grammar)` - A Grammar object constructed from the EBNF specification
    /// * `Err(XGrammarErr)` - Error if the EBNF string is invalid or malformed
    ///
    /// # Errors
    /// * Returns error if the EBNF string contains syntax errors
    /// * Returns error if the root rule is not defined
    /// * Returns error if there are undefined rule references
    ///
    /// # Example
    /// ```
    /// # use xgrammar::Grammar;
    /// let ebnf = r#"
    /// root ::= "Hello, " name "!"
    /// name ::= [A-Z][a-z]+
    /// "#;
    /// let grammar = Grammar::from_ebnf(ebnf, Some("root")).unwrap();
    /// assert!(!grammar.is_null());
    ///
    /// // Invalid EBNF will return an error
    /// let invalid_ebnf = r#"root ::= "unterminated string"#;
    /// assert!(Grammar::from_ebnf(invalid_ebnf, Some("root")).is_err());
    /// ```
    pub fn from_ebnf(ebnf_string: &str, root_rule_name: Option<&str>) -> Result<Self> {
        let ebnf_string_cstring =
            CString::new(ebnf_string).expect("Failed to convert ebnf_string to CString");
        let ebnf_string_ptr = ebnf_string_cstring.as_ptr();
        let root_rule_name = root_rule_name.unwrap_or("root");
        let root_rule_name_cstring =
            CString::new(root_rule_name).expect("Failed to convert root_rule_name to CString");
        let root_rule_name_ptr = root_rule_name_cstring.as_ptr();

        let result = cpp!(unsafe [
            ebnf_string_ptr as "const char*",
            root_rule_name_ptr as "const char*"
        ] -> GrammarResult as "GrammarResult" {
            try {
                auto grammar = Grammar::FromEBNF(string(ebnf_string_ptr), string(root_rule_name_ptr));
                return {true, grammar, nullptr};
            } catch (const std::exception& e) {
                return {false, Grammar(NullObj()), strdup(e.what())};
            }
        });

        result.into()
    }

    /// Construct a grammar from a JSON schema string.
    ///
    /// This method creates a grammar from a JSON schema specification that enforces schema
    /// constraints during text generation. The schema can be in JSON string format or
    /// represent a Pydantic-style model structure.
    ///
    /// # Arguments
    /// * `schema` - The JSON schema string defining the structure to enforce
    /// * `any_whitespace` - Whether to allow flexible whitespace in the JSON output. When true,
    ///   any amount of whitespace is allowed between tokens
    /// * `indent` - Number of spaces for indentation in the JSON output. When specified,
    ///   produces formatted JSON with the given indentation level
    /// * `separators` - Custom separators for JSON formatting as (item_separator, key_separator).
    ///   For example, `(":", ",")` produces compact JSON. When None, uses standard JSON separators
    /// * `strict_mode` - Whether to enforce strict JSON schema validation. When true, ensures
    ///   all schema constraints are strictly enforced
    /// * `max_whitespace_cnt` - Maximum number of consecutive whitespace characters allowed.
    ///   Useful for preventing excessive whitespace in generated output
    /// * `print_converted_ebnf` - Whether to print the converted EBNF grammar for debugging purposes
    ///
    /// # Returns
    /// * `Ok(Grammar)` - A Grammar object that enforces the JSON schema constraints
    /// * `Err(XGrammarErr)` - Error if the JSON schema is invalid or malformed
    ///
    /// # Errors
    /// * Returns error if the JSON schema is invalid
    /// * Returns error if the schema cannot be converted to EBNF
    ///
    /// # Example
    /// ```
    /// # use xgrammar::Grammar;
    /// let schema = r#"{
    ///   "type": "object",
    ///   "properties": {
    ///     "name": {"type": "string"},
    ///     "age": {"type": "integer"}
    ///   },
    ///   "required": ["name", "age"]
    /// }"#;
    /// let grammar = Grammar::from_json_schema(
    ///     schema,
    ///     Some(true),    // allow flexible whitespace
    ///     Some(2),       // 2-space indentation
    ///     None,          // default separators
    ///     Some(true),    // strict mode
    ///     None,          // no whitespace limit
    ///     Some(false)    // don't print EBNF
    /// ).unwrap();
    /// assert!(!grammar.is_null());
    ///
    /// // Invalid JSON schema will return an error
    /// let invalid_schema = r#"{ invalid json }"#;
    /// assert!(Grammar::from_json_schema(invalid_schema, None, None, None, None, None, None).is_err());
    /// ```
    pub fn from_json_schema(
        schema: &str,
        any_whitespace: Option<bool>,
        indent: Option<i32>,
        separators: Option<(String, String)>,
        strict_mode: Option<bool>,
        max_whitespace_cnt: Option<i32>,
        print_converted_ebnf: Option<bool>,
    ) -> Result<Self> {
        let schema_cstring = CString::new(schema).expect("Failed to convert schema to CString");
        let schema_ptr = schema_cstring.as_ptr();
        let any_whitespace = any_whitespace.unwrap_or(true);
        let strict_mode = strict_mode.unwrap_or(true);
        let print_converted_ebnf = print_converted_ebnf.unwrap_or(false);
        let has_indent = indent.is_some();
        let indent_value = indent.unwrap_or(0);
        let has_separators = separators.is_some();
        let has_max_whitespace_cnt = max_whitespace_cnt.is_some();
        let max_whitespace_cnt_value = max_whitespace_cnt.unwrap_or(0);

        let (_obj_sep_cstring, _array_sep_cstring, obj_sep_ptr, array_sep_ptr) =
            if let Some((obj_sep, array_sep)) = separators {
                let obj_sep_cstring =
                    CString::new(obj_sep).expect("Failed to convert object separator to CString");
                let array_sep_cstring =
                    CString::new(array_sep).expect("Failed to convert array separator to CString");
                let obj_sep_ptr = obj_sep_cstring.as_ptr();
                let array_sep_ptr = array_sep_cstring.as_ptr();
                (Some(obj_sep_cstring), Some(array_sep_cstring), obj_sep_ptr, array_sep_ptr)
            } else {
                (None, None, std::ptr::null(), std::ptr::null())
            };

        let result = cpp!(unsafe [
            schema_ptr as "const char*",
            any_whitespace as "bool",
            has_indent as "bool",
            indent_value as "int",
            has_separators as "bool",
            obj_sep_ptr as "const char*",
            array_sep_ptr as "const char*",
            strict_mode as "bool",
            has_max_whitespace_cnt as "bool",
            max_whitespace_cnt_value as "int",
            print_converted_ebnf as "bool"
        ] -> GrammarResult as "GrammarResult" {
            try {
                std::string schema_str(schema_ptr);
                std::optional<int> opt_indent = has_indent ? std::make_optional(indent_value) : std::nullopt;
                std::optional<std::pair<std::string, std::string>> opt_separators;

                if (has_separators) {
                    opt_separators = std::make_pair(std::string(obj_sep_ptr), std::string(array_sep_ptr));
                } else {
                    opt_separators = std::nullopt;
                }

                std::optional<int> opt_max_whitespace_cnt = has_max_whitespace_cnt ? std::make_optional(max_whitespace_cnt_value) : std::nullopt;

                auto grammar = Grammar::FromJSONSchema(
                    schema_str,
                    any_whitespace,
                    opt_indent,
                    opt_separators,
                    strict_mode,
                    opt_max_whitespace_cnt,
                    print_converted_ebnf
                );
                return {true, grammar, nullptr};
            } catch (const std::exception& e) {
                return {false, Grammar(NullObj()), strdup(e.what())};
            }
        });

        result.into()
    }

    /// Construct a grammar from a regular expression string.
    ///
    /// This method creates a grammar by converting a regular expression pattern into
    /// an EBNF grammar specification. The resulting grammar matches text conforming
    /// to the specified regex pattern.
    ///
    /// # Arguments
    /// * `regex` - The regular expression pattern string to convert
    /// * `print_converted_ebnf` - Whether to print the converted EBNF grammar for debugging purposes
    ///
    /// # Returns
    /// * `Ok(Grammar)` - A Grammar object that matches the regex pattern
    /// * `Err(XGrammarErr)` - Error if the regex pattern is invalid or malformed
    ///
    /// # Errors
    /// * Returns error if the regex pattern is invalid
    /// * Returns error if the regex cannot be converted to EBNF
    ///
    /// # Example
    /// ```
    /// # use xgrammar::Grammar;
    /// // Match email-like patterns
    /// let grammar = Grammar::from_regex(r"[a-z]+@[a-z]+\.[a-z]+", Some(false)).unwrap();
    /// assert!(!grammar.is_null());
    ///
    /// // Invalid regex will return an error
    /// let invalid_regex = r"[";
    /// assert!(Grammar::from_regex(invalid_regex, Some(false)).is_err());
    /// ```
    pub fn from_regex(regex: &str, print_converted_ebnf: Option<bool>) -> Result<Self> {
        let regex_cstring = CString::new(regex).expect("Failed to convert regex to CString");
        let regex_ptr = regex_cstring.as_ptr();
        let print_converted_ebnf = print_converted_ebnf.unwrap_or(false);

        let result = cpp!(unsafe [
            regex_ptr as "const char*",
            print_converted_ebnf as "bool"
        ] -> GrammarResult as "GrammarResult" {
            try {
                auto grammar = Grammar::FromRegex(string(regex_ptr), print_converted_ebnf);
                return {true, grammar, nullptr};
            } catch (const std::exception& e) {
                return {false, Grammar(NullObj()), strdup(e.what())};
            }
        });

        result.into()
    }

    /// Construct a grammar from a structural tag JSON string.
    ///
    /// This method creates a grammar from structural tags that enable grammar-guided generation
    /// with specific formatting markers. Structural tags are useful for dispatching between
    /// different grammars based on trigger tokens and wrapping content with specific begin/end tags.
    ///
    /// The structural tag format supports:
    /// - Single tag specification with begin marker, JSON schema, and end marker
    /// - Multiple tags for grammar dispatching based on triggers
    /// - Legacy tag/trigger pattern support
    ///
    /// # Arguments
    /// * `structural_tag_json` - A JSON string specifying the structural tag configuration.
    ///   The JSON should contain structural tag items with `begin`, `schema`, and `end` fields,
    ///   and optionally `triggers` for grammar dispatching.
    /// * `tokenizer_info` - Optional `TokenizerInfo` for resolving string token references.
    ///   Required when the structural tag JSON uses token-level formats introduced in
    ///   xgrammar >= 0.1.33 (e.g. `type: "token"`, `type: "exclude_token"`,
    ///   `type: "any_tokens"`, `type: "token_triggered_tags"`). Pass `None` for pure
    ///   character-level tag formats.
    ///
    /// # Returns
    /// * `Ok(Grammar)` if the JSON is valid and the grammar was successfully created
    /// * `Err(XGrammarErr)` if the JSON is invalid or the structural tag is malformed
    ///
    /// # Example
    /// ```
    /// # use xgrammar::Grammar;
    /// use serde_json::json;
    ///
    /// // Triggered tags example for tool calling with multiple functions
    /// let structural_tag = json!({
    ///     "format": {
    ///         "type": "triggered_tags",
    ///         "triggers": ["<function="],
    ///         "tags": [
    ///             {
    ///                 "begin": "<function=get_weather>",
    ///                 "content": {
    ///                     "type": "json_schema",
    ///                     "json_schema": {
    ///                         "type": "object",
    ///                         "properties": {
    ///                             "city": {"type": "string"},
    ///                             "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]}
    ///                         },
    ///                         "required": ["city"]
    ///                     }
    ///                 },
    ///                 "end": "</function>"
    ///             }
    ///         ]
    ///     }
    /// });
    ///
    /// let grammar = Grammar::from_structural_tag(&structural_tag.to_string(), None).unwrap();
    /// assert!(!grammar.is_null());
    /// ```
    pub fn from_structural_tag(
        structural_tag_json: &str,
        tokenizer_info: Option<&TokenizerInfo>,
    ) -> Result<Self> {
        let structural_tag_json_cstring = CString::new(structural_tag_json)
            .expect("Failed to convert structural_tag_json to CString");
        let structural_tag_json_ptr = structural_tag_json_cstring.as_ptr();
        let tokenizer_info_ptr: *const TokenizerInfo =
            tokenizer_info.map(|t| t as *const TokenizerInfo).unwrap_or(std::ptr::null());

        let result = cpp!(unsafe [
            structural_tag_json_ptr as "const char*",
            tokenizer_info_ptr as "const xgrammar::TokenizerInfo*"
        ] -> GrammarResult as "GrammarResult" {
            std::string structural_tag_json_str(structural_tag_json_ptr);
            std::optional<xgrammar::TokenizerInfo> opt_tokenizer_info;
            if (tokenizer_info_ptr != nullptr) {
                opt_tokenizer_info = *tokenizer_info_ptr;
            }
            auto result = xgrammar::Grammar::FromStructuralTag(
                structural_tag_json_str, opt_tokenizer_info
            );

            // Check if result holds a Grammar or an error
            if (std::holds_alternative<xgrammar::Grammar>(result)) {
                return {true, std::get<xgrammar::Grammar>(result), nullptr};
            } else {
                auto error = std::get<xgrammar::StructuralTagError>(result);

                // Extract error message from the variant
                std::string error_msg;
                std::visit([&error_msg](auto&& err) {
                    error_msg = err.what();
                }, error);

                // Allocate and copy error message
                return {false, Grammar(NullObj()), strdup(error_msg.c_str())};
            }
        });

        result.into()
    }

    /// Get a grammar for standard JSON format.
    ///
    /// This method returns a pre-built grammar that matches any valid JSON according
    /// to the JSON specification, without schema constraints. It's useful as a starting
    /// point for JSON generation or when you need to accept any valid JSON structure.
    ///
    /// # Returns
    /// * A Grammar object that matches standard JSON format
    ///
    /// # Example
    /// ```
    /// # use xgrammar::Grammar;
    /// let json_grammar = Grammar::builtin_json_grammar();
    /// assert!(!json_grammar.is_null());
    /// ```
    pub fn builtin_json_grammar() -> Self {
        cpp!(unsafe [] -> Grammar as "xgrammar::Grammar" {
            return xgrammar::Grammar::BuiltinJSONGrammar();
        })
    }

    /// Create a grammar that matches any of the provided grammars.
    ///
    /// This method combines multiple grammars using a union operation, creating a new grammar
    /// that accepts input matching any of the input grammars. This is equivalent to the `|`
    /// (OR) operator in regular expressions.
    ///
    /// # Arguments
    /// * `grammars` - A slice of Grammar objects to combine
    ///
    /// # Returns
    /// * A new Grammar that matches if any of the input grammars match
    ///
    /// # Example
    /// ```
    /// # use xgrammar::Grammar;
    /// let grammar1 = Grammar::from_regex(r"[0-9]+", Some(false)).unwrap();
    /// let grammar2 = Grammar::from_regex(r"[a-z]+", Some(false)).unwrap();
    /// let union_grammar = Grammar::union(&[grammar1, grammar2]);
    /// assert!(!union_grammar.is_null());
    /// ```
    pub fn union(grammars: &[Grammar]) -> Self {
        let grammars_ptr = grammars.as_ptr();
        let num_grammars = grammars.len();
        cpp!(unsafe [
            grammars_ptr as "const xgrammar::Grammar*",
            num_grammars as "size_t"
        ] -> Grammar as "xgrammar::Grammar" {
            std::vector<xgrammar::Grammar> grammars_vec;
            grammars_vec.reserve(num_grammars);
            for (size_t i = 0; i < num_grammars; ++i) {
                grammars_vec.push_back(grammars_ptr[i]);
            }
            return xgrammar::Grammar::Union(grammars_vec);
        })
    }

    /// Create a grammar that matches the concatenation of the provided grammars.
    ///
    /// This method combines multiple grammars in sequence, creating a new grammar that requires
    /// input to match all grammars in order. This is equivalent to the `+` (concatenation)
    /// operator in formal language theory.
    ///
    /// # Arguments
    /// * `grammars` - A slice of Grammar objects to concatenate in order
    ///
    /// # Returns
    /// * A new Grammar that matches the sequential combination of all input grammars
    ///
    /// # Example
    /// ```
    /// # use xgrammar::Grammar;
    /// let greeting = Grammar::from_regex(r"Hello", Some(false)).unwrap();
    /// let space = Grammar::from_regex(r" ", Some(false)).unwrap();
    /// let name = Grammar::from_regex(r"[A-Z][a-z]+", Some(false)).unwrap();
    /// let concat_grammar = Grammar::concat(&[greeting, space, name]);
    /// assert!(!concat_grammar.is_null());
    /// ```
    pub fn concat(grammars: &[Grammar]) -> Self {
        let grammars_ptr = grammars.as_ptr();
        let num_grammars = grammars.len();
        cpp!(unsafe [
            grammars_ptr as "const xgrammar::Grammar*",
            num_grammars as "size_t"
        ] -> Grammar as "xgrammar::Grammar" {
            std::vector<xgrammar::Grammar> grammars_vec;
            grammars_vec.reserve(num_grammars);
            for (size_t i = 0; i < num_grammars; ++i) {
                grammars_vec.push_back(grammars_ptr[i]);
            }
            return xgrammar::Grammar::Concat(grammars_vec);
        })
    }

    /// Check if the grammar object is null.
    ///
    /// A null grammar typically indicates an uninitialized or invalid grammar state.
    /// This can occur when grammar construction fails or when working with default values.
    ///
    /// # Returns
    /// * `true` if the grammar is null (invalid/uninitialized)
    /// * `false` if the grammar is valid
    ///
    /// # Example
    /// ```
    /// # use xgrammar::Grammar;
    /// let grammar = Grammar::builtin_json_grammar();
    /// assert!(!grammar.is_null());
    /// ```
    pub fn is_null(&self) -> bool {
        cpp!(unsafe [self as "const xgrammar::Grammar*"] -> bool as "bool" {
            return self->IsNull();
        })
    }
}

impl GrammarMatcher {
    /// Create a GrammarMatcher from a compiled grammar.
    /// # Arguments
    /// * `compiled_grammar` - The compiled grammar to use
    pub fn new(compiled_grammar: &CompiledGrammar) -> Self {
        Self::with(compiled_grammar, None, Some(true), None)
    }

    /// Create a GrammarMatcher from a compiled grammar.
    /// # Arguments
    /// * `compiled_grammar` - The compiled grammar to use
    /// * `override_stop_tokens` - Optional list of token ids to override the default stop tokens
    /// * `terminate_without_stop_token` - Whether to terminate the matcher without accepting a stop token.
    /// * `max_rollback_tokens` - Deprecated. You don't need to set it and it's always unlimited (-1).
    ///   The new Earley parser significantly reduces the number of states, so we can allow
    ///   unlimited rollback. The maximum number of rollback tokens allowed. The rollback operation
    ///   is useful for jump-forward decoding and speculative decoding.
    pub fn with(
        compiled_grammar: &CompiledGrammar,
        override_stop_tokens: Option<&[i32]>,
        terminate_without_stop_token: Option<bool>,
        max_rollback_tokens: Option<i32>,
    ) -> Self {
        // Keep it sync with the C++ implementation:
        // https://github.com/mlc-ai/xgrammar/blob/95bdfce011506ea95306b37d080115a2da3e369a/cpp/grammar_matcher.cc#L257
        let terminate_without_stop_token = terminate_without_stop_token.unwrap_or(false);
        let max_rollback_tokens = max_rollback_tokens.unwrap_or(0);
        let override_stop_tokens_ptr =
            override_stop_tokens.as_ref().map_or(std::ptr::null(), |v| v.as_ptr());
        let override_stop_tokens_len = override_stop_tokens.as_ref().map_or(0, |v| v.len());

        cpp!(unsafe [
            compiled_grammar as "const xgrammar::CompiledGrammar*",
            override_stop_tokens_ptr as "const int32_t*",
            override_stop_tokens_len as "size_t",
            terminate_without_stop_token as "bool",
            max_rollback_tokens as "int"
        ] -> GrammarMatcher as "xgrammar::GrammarMatcher" {
            std::optional<std::vector<int32_t>> opt_override_stop_tokens;
            if (override_stop_tokens_len > 0) {
                opt_override_stop_tokens = std::vector<int32_t>(
                    *override_stop_tokens_ptr,
                    *override_stop_tokens_ptr + override_stop_tokens_len
                );
            } else {
                opt_override_stop_tokens = std::nullopt;
            }

            return xgrammar::GrammarMatcher(
                *compiled_grammar,
                opt_override_stop_tokens,
                terminate_without_stop_token,
                max_rollback_tokens
            );
        })
    }

    /// Accept one token and update the state of the matcher.
    ///
    /// # Arguments
    /// * `token_id` - The id of the token to accept.
    /// * `debug_print` - If true, print debug information.
    ///
    /// # Returns
    /// * Whether the token is accepted.
    ///
    /// # Note
    /// Termination state.
    ///
    /// When the end of the root rule is reached, the matcher can only accept the stop token.
    /// The matcher is terminated after accepting the stop token, i.e. no AcceptToken or
    /// FindNextTokenMask operations can be performed. The termination state can be canceled
    /// using rollback().
    pub fn accept_token(&mut self, token_id: i32, debug_print: Option<bool>) -> bool {
        let debug_print = debug_print.unwrap_or(false);
        cpp!(unsafe [self as "xgrammar::GrammarMatcher*", token_id as "int32_t", debug_print as "bool"] -> bool as "bool" {
            return self->AcceptToken(token_id, debug_print);
        })
    }

    /// Accept a string and update the state of the matcher. The whole string is considered
    /// as one step in rollback. It is used to complement the functionality of `accept_token()`,
    /// and `accept_token()` should always be used to accept tokens.
    ///
    /// # Arguments
    /// * `input_str` - The string to be accepted.
    /// * `debug_print` - Whether to print information about the internal state of the matcher.
    ///
    /// # Returns
    /// * Whether the string is accepted.
    pub fn accept_string(&mut self, input_str: &str, debug_print: Option<bool>) -> bool {
        let debug_print = debug_print.unwrap_or(false);
        let input_str_cstring =
            CString::new(input_str).expect("Failed to convert input_str to CString");
        let input_str_ptr = input_str_cstring.as_ptr();

        cpp!(unsafe [self as "xgrammar::GrammarMatcher*", input_str_ptr as "const char*", debug_print as "bool"] -> bool as "bool" {
            return self->AcceptString(input_str_ptr, debug_print);
        })
    }

    /// Get the set of tokens that are acceptable for the next step and store them in a bitmask.
    ///
    /// # Arguments
    /// * `next_token_bitmask` - The bitmask to store the result. The bitmask must be pre-allocated
    ///   a DLTensor with shape (tokenizer.GetVocabSize() + 31) / 32, and dtype int32.
    /// * `index` - The index of the bitmask to fill. If None, the first bitmask is filled.
    /// * `debug_print` - If true, print debug information.
    ///
    /// # Returns
    /// * `Ok(bool)` - Whether the bitmask need to be applied (not all-true).
    /// * `Err(XGrammarErr)` - Error if the operation fails (e.g., matcher terminated, invalid bitmask).
    ///
    /// # Errors
    /// * Returns error if the matcher has terminated after accepting the stop token
    /// * Returns error if the bitmask has invalid dtype, shape, or device type
    pub fn fill_next_token_bitmask(
        &mut self,
        next_token_bitmask: &mut DLTensor,
        index: Option<usize>,
        debug_print: Option<bool>,
    ) -> Result<bool> {
        let dl_tensor = next_token_bitmask.dl_tensor();
        let index = index.unwrap_or(0) as i32;
        let debug_print = debug_print.unwrap_or(false);

        let result = cpp!(unsafe [self as "xgrammar::GrammarMatcher*", dl_tensor as "DLTensor*", index as "int32_t", debug_print as "bool"] -> MatcherResult as "MatcherResult" {
            try {
                bool value = self->FillNextTokenBitmask(dl_tensor, index, debug_print);
                return {true, value, nullptr};
            } catch (const std::exception& e) {
                return {false, false, strdup(e.what())};
            }
        });

        result.into()
    }

    /// Rollback the matcher to a previous state.
    ///
    /// # Arguments
    /// * `num_tokens` - The number of tokens to rollback. It cannot exceed the current number of
    ///   steps, nor can it exceed the specified maximum number of rollback tokens.
    ///
    /// # Returns
    /// * `Ok(())` - If the rollback succeeds
    /// * `Err(XGrammarErr)` - Error if the rollback fails (e.g., num_tokens exceeds history)
    ///
    /// # Errors
    /// * Returns error if num_tokens exceeds the number of saved history steps
    pub fn rollback(&mut self, num_tokens: Option<i32>) -> Result<()> {
        let num_tokens = num_tokens.unwrap_or(1);

        let result = cpp!(unsafe [self as "xgrammar::GrammarMatcher*", num_tokens as "int"] -> MatcherResult as "MatcherResult" {
            try {
                self->Rollback(num_tokens);
                return {true, false, nullptr};
            } catch (const std::exception& e) {
                return {false, false, strdup(e.what())};
            }
        });

        result.into()
    }

    /// Check if the matcher has accepted the stop token and terminated.
    pub fn is_terminated(&self) -> bool {
        cpp!(unsafe [self as "const xgrammar::GrammarMatcher*"] -> bool as "bool" {
            return self->IsTerminated();
        })
    }

    /// Check if the grammar's root rule has been fully matched by the input
    /// accepted so far. Unlike [`Self::is_terminated`], this does not require the
    /// stop token to have been accepted.
    pub fn is_completed(&self) -> bool {
        cpp!(unsafe [self as "const xgrammar::GrammarMatcher*"] -> bool as "bool" {
            return self->IsCompleted();
        })
    }

    /// Get the maximum number of rollback tokens allowed.
    pub fn get_max_rollback_tokens(&self) -> i32 {
        cpp!(unsafe [self as "const xgrammar::GrammarMatcher*"] -> i32 as "int" {
            return self->GetMaxRollbackTokens();
        })
    }

    pub fn get_stop_token_ids(&self) -> Vec<i32> {
        // Avoid relying on layout-compatibility between `Vec<T>` and
        // `std::vector<T>` — the two have different memory layouts on
        // libstdc++. Push each element into a Rust-allocated `Vec<i32>` via
        // the `rust!` callback bridge so we never assume layout parity.
        let mut out: Vec<i32> = Vec::new();
        let out_ptr = &mut out as *mut Vec<i32>;
        cpp!(unsafe [
            self as "const xgrammar::GrammarMatcher*",
            out_ptr as "void*"
        ] {
            const auto& ids = self->GetStopTokenIds();
            for (int id : ids) {
                rust!(XGR_Matcher_StopTokenIds_push [
                    out_ptr: *mut Vec<i32> as "void*",
                    id: i32 as "int"
                ] {
                    // SAFETY: `out_ptr` was obtained from a live `&mut Vec<i32>`
                    // on the Rust side and is only used within this call.
                    unsafe { (*out_ptr).push(id) };
                });
            }
        });
        out
    }

    /// Reset the matcher to the initial state.
    pub fn reset(&mut self) {
        cpp!(unsafe [self as "xgrammar::GrammarMatcher*"] {
            self->Reset();
        })
    }

    /// Deep-copy the matcher state. The returned matcher shares the
    /// `CompiledGrammar` and `TokenizerInfo` with `self` (cheap shared_ptr
    /// aliases) but has independent matcher state that evolves separately.
    /// Useful for speculative decoding and branching search — accepting tokens
    /// on the forked matcher does not affect `self`.
    ///
    /// This is unrelated to POSIX `fork(2)` despite the name; the name mirrors
    /// upstream xgrammar's `GrammarMatcher::Fork()`.
    pub fn fork(&self) -> GrammarMatcher {
        cpp!(unsafe [self as "const xgrammar::GrammarMatcher*"]
            -> GrammarMatcher as "xgrammar::GrammarMatcher"
        {
            return self->Fork();
        })
    }
}

/// Batched helpers that operate on a slice of [`GrammarMatcher`].
///
/// ## When an instance is (and isn't) needed
///
/// The method layout intentionally mirrors upstream xgrammar, where only one batch
/// operation actually runs in parallel:
///
/// - [`Self::batch_fill_next_token_bitmask`] — **instance method** (`&mut self`).
///   Uses the thread pool owned by this `BatchGrammarMatcher` to fan the per-matcher
///   work out across threads. Construct once via [`Self::new`] or
///   [`Self::with_max_threads`] and reuse.
///
/// - [`Self::batch_accept_token`], [`Self::batch_accept_string`],
///   [`Self::batch_rollback`] — **associated (static) functions**.
///   Upstream implements these as a plain sequential `for` loop; no thread pool
///   or instance state is involved, so there is nothing for `self` to carry.
///   Call them as `BatchGrammarMatcher::batch_accept_token(...)` without
///   constructing an instance.
///
/// This asymmetry reflects the upstream implementation
/// (`BatchGrammarMatcher::Impl::BatchAcceptToken` etc. in
/// `thirdparty/xgrammar/cpp/grammar_matcher.cc`). If upstream ever parallelizes
/// those ops, they will gain a `&self` receiver here.
impl BatchGrammarMatcher {
    /// Create a `BatchGrammarMatcher` with the default `"auto"` thread policy
    /// (roughly half of the available hardware threads).
    ///
    /// The constructed instance owns a thread pool that is used **only** by
    /// [`Self::batch_fill_next_token_bitmask`]. If you do not call that method,
    /// you do not need a `BatchGrammarMatcher` instance — the other batch helpers
    /// are associated functions.
    ///
    /// Equivalent to `BatchGrammarMatcher::default()` (the `Default` impl is
    /// auto-generated by `cpp_class!` and maps to the C++ default constructor,
    /// which also uses `"auto"`).
    pub fn new() -> Self {
        cpp!(unsafe [] -> BatchGrammarMatcher as "xgrammar::BatchGrammarMatcher" {
            return xgrammar::BatchGrammarMatcher(std::string("auto"));
        })
    }

    /// Create a `BatchGrammarMatcher` with an explicit maximum thread count for
    /// the thread pool used by [`Self::batch_fill_next_token_bitmask`].
    ///
    /// A value of `1` disables parallelism (the work runs on the calling thread).
    /// Values `> 1` spin up a thread pool on each `batch_fill_next_token_bitmask`
    /// call (upstream rebuilds the pool each call because `ThreadPool` is not
    /// reusable after `Join`).
    pub fn with_max_threads(max_threads: i32) -> Self {
        cpp!(unsafe [max_threads as "int32_t"]
            -> BatchGrammarMatcher as "xgrammar::BatchGrammarMatcher"
        {
            return xgrammar::BatchGrammarMatcher(max_threads);
        })
    }

    /// Batched version of [`GrammarMatcher::fill_next_token_bitmask`].
    ///
    /// This is the **only** batch method that uses the thread pool of this
    /// `BatchGrammarMatcher`; it therefore takes `&mut self`. When
    /// `max_threads > 1` the per-matcher bitmask fills are executed in parallel.
    ///
    /// # Arguments
    /// * `matchers` - The matchers to operate on in parallel. Mutated in place.
    /// * `next_token_bitmask` - Pre-allocated `DLTensor` with shape `(N, bitmask_len)` and
    ///   dtype `int32`, where `N >= matchers.len()` and `bitmask_len` is the per-matcher
    ///   bitmask length.
    /// * `indices` - Optional mapping from matcher index to bitmask row. If `None`, the
    ///   bitmask row `i` is written for `matchers[i]`.
    /// * `debug_print` - When `true`, print debug information.
    pub fn batch_fill_next_token_bitmask(
        &mut self,
        matchers: &mut [GrammarMatcher],
        next_token_bitmask: &mut DLTensor,
        indices: Option<&[i32]>,
        debug_print: Option<bool>,
    ) -> Result<()> {
        let dl_tensor = next_token_bitmask.dl_tensor();
        let debug_print = debug_print.unwrap_or(false);
        let matchers_ptr = matchers.as_mut_ptr();
        let num_matchers = matchers.len();
        let indices_ptr = indices.map(|s| s.as_ptr()).unwrap_or(std::ptr::null());
        let num_indices = indices.map(|s| s.len()).unwrap_or(0);
        let has_indices = indices.is_some();

        // xgrammar's batch API takes `std::vector<GrammarMatcher>*`, so we must
        // materialize a vector over our Rust-owned slice. This is cheap and safe
        // because `GrammarMatcher` is a shared_ptr<Impl> PIMPL (see upstream
        // xgrammar/object.h `XGRAMMAR_DEFINE_PIMPL_METHODS`): copying a matcher
        // clones the shared_ptr, so `matchers_vec[i]` aliases the *same* Impl as
        // `matchers_ptr[i]`. Batch ops mutate through `pimpl_`, so state changes
        // land in the shared Impl and are visible to the caller without any
        // write-back step.
        let result = cpp!(unsafe [
            self as "xgrammar::BatchGrammarMatcher*",
            matchers_ptr as "xgrammar::GrammarMatcher*",
            num_matchers as "size_t",
            dl_tensor as "DLTensor*",
            indices_ptr as "const int32_t*",
            num_indices as "size_t",
            has_indices as "bool",
            debug_print as "bool"
        ] -> MatcherResult as "MatcherResult" {
            try {
                std::vector<xgrammar::GrammarMatcher> matchers_vec(
                    matchers_ptr, matchers_ptr + num_matchers
                );
                std::optional<std::vector<int32_t>> opt_indices;
                if (has_indices) {
                    opt_indices = std::vector<int32_t>(
                        indices_ptr, indices_ptr + num_indices
                    );
                }
                self->BatchFillNextTokenBitmask(
                    &matchers_vec, dl_tensor, opt_indices, debug_print
                );
                return {true, false, nullptr};
            } catch (const std::exception& e) {
                return {false, false, strdup(e.what())};
            }
        });

        result.into()
    }

    /// Batched version of [`GrammarMatcher::accept_token`]. Returns a vector of
    /// booleans indicating whether each token was accepted by the corresponding
    /// matcher.
    ///
    /// This is an **associated function**, not a method — upstream xgrammar
    /// implements it as a sequential `for` loop with no thread pool, so no
    /// `BatchGrammarMatcher` instance is required. Call as
    /// `BatchGrammarMatcher::batch_accept_token(&mut matchers, &token_ids, None)`.
    pub fn batch_accept_token(
        matchers: &mut [GrammarMatcher],
        token_ids: &[i32],
        debug_print: Option<bool>,
    ) -> Vec<bool> {
        let debug_print = debug_print.unwrap_or(false);
        let matchers_ptr = matchers.as_mut_ptr();
        let num_matchers = matchers.len();
        let token_ids_ptr = token_ids.as_ptr();
        let num_tokens = token_ids.len();
        let mut out_buf = vec![0u8; num_matchers];
        let out_ptr = out_buf.as_mut_ptr();

        // See `batch_fill_next_token_bitmask` for why no write-back is needed:
        // `GrammarMatcher` is a shared_ptr<Impl> PIMPL, so the vector entries
        // alias the same Impl as the caller's slice.
        cpp!(unsafe [
            matchers_ptr as "xgrammar::GrammarMatcher*",
            num_matchers as "size_t",
            token_ids_ptr as "const int32_t*",
            num_tokens as "size_t",
            out_ptr as "uint8_t*",
            debug_print as "bool"
        ] {
            std::vector<xgrammar::GrammarMatcher> matchers_vec(
                matchers_ptr, matchers_ptr + num_matchers
            );
            std::vector<int32_t> token_ids_vec(token_ids_ptr, token_ids_ptr + num_tokens);
            auto out = xgrammar::BatchGrammarMatcher::BatchAcceptToken(
                &matchers_vec, token_ids_vec, debug_print
            );
            size_t n = out.size() < num_matchers ? out.size() : num_matchers;
            for (size_t i = 0; i < n; ++i) {
                out_ptr[i] = out[i];
            }
        });

        out_buf.into_iter().map(|b| b != 0).collect()
    }

    /// Batched version of [`GrammarMatcher::accept_string`]. Returns a vector of
    /// booleans indicating whether each string was accepted by the corresponding
    /// matcher.
    ///
    /// This is an **associated function**, not a method — upstream xgrammar
    /// implements it as a sequential `for` loop with no thread pool, so no
    /// `BatchGrammarMatcher` instance is required. Call as
    /// `BatchGrammarMatcher::batch_accept_string(&mut matchers, &input_strs, None)`.
    pub fn batch_accept_string(
        matchers: &mut [GrammarMatcher],
        input_strs: &[&str],
        debug_print: Option<bool>,
    ) -> Vec<bool> {
        let debug_print = debug_print.unwrap_or(false);
        let matchers_ptr = matchers.as_mut_ptr();
        let num_matchers = matchers.len();

        let cstrings: Vec<CString> = input_strs
            .iter()
            .map(|s| CString::new(*s).expect("Failed to convert input_str to CString"))
            .collect();
        let c_ptrs: Vec<*const std::os::raw::c_char> =
            cstrings.iter().map(|c| c.as_ptr()).collect();
        let c_ptrs_ptr = c_ptrs.as_ptr();
        let num_strs = c_ptrs.len();
        let mut out_buf = vec![0u8; num_matchers];
        let out_ptr = out_buf.as_mut_ptr();

        // See `batch_fill_next_token_bitmask` for why no write-back is needed:
        // `GrammarMatcher` is a shared_ptr<Impl> PIMPL, so the vector entries
        // alias the same Impl as the caller's slice.
        cpp!(unsafe [
            matchers_ptr as "xgrammar::GrammarMatcher*",
            num_matchers as "size_t",
            c_ptrs_ptr as "const char* const*",
            num_strs as "size_t",
            out_ptr as "uint8_t*",
            debug_print as "bool"
        ] {
            std::vector<xgrammar::GrammarMatcher> matchers_vec(
                matchers_ptr, matchers_ptr + num_matchers
            );
            std::vector<std::string> input_strs_vec;
            input_strs_vec.reserve(num_strs);
            for (size_t i = 0; i < num_strs; ++i) {
                input_strs_vec.emplace_back(c_ptrs_ptr[i]);
            }
            auto out = xgrammar::BatchGrammarMatcher::BatchAcceptString(
                &matchers_vec, input_strs_vec, debug_print
            );
            size_t n = out.size() < num_matchers ? out.size() : num_matchers;
            for (size_t i = 0; i < n; ++i) {
                out_ptr[i] = out[i];
            }
        });

        // Anchor the backing storage here — after the `cpp!` block — so NLL
        // cannot drop `cstrings` or `c_ptrs` while C++ still holds pointers
        // derived from them. The `let _` binding is a use-site that extends
        // both values' liveness to this line.
        let _keep_alive = (&cstrings, &c_ptrs);
        out_buf.into_iter().map(|b| b != 0).collect()
    }

    /// Batched version of [`GrammarMatcher::rollback`]. Each matcher rolls back
    /// by the corresponding count in `num_tokens`.
    ///
    /// This is an **associated function**, not a method — upstream xgrammar
    /// implements it as a sequential `for` loop with no thread pool, so no
    /// `BatchGrammarMatcher` instance is required. Call as
    /// `BatchGrammarMatcher::batch_rollback(&mut matchers, &counts)`.
    pub fn batch_rollback(matchers: &mut [GrammarMatcher], num_tokens: &[i32]) {
        let matchers_ptr = matchers.as_mut_ptr();
        let num_matchers = matchers.len();
        let num_tokens_ptr = num_tokens.as_ptr();
        let num_tokens_len = num_tokens.len();

        // See `batch_fill_next_token_bitmask` for why no write-back is needed:
        // `GrammarMatcher` is a shared_ptr<Impl> PIMPL, so the vector entries
        // alias the same Impl as the caller's slice.
        cpp!(unsafe [
            matchers_ptr as "xgrammar::GrammarMatcher*",
            num_matchers as "size_t",
            num_tokens_ptr as "const int*",
            num_tokens_len as "size_t"
        ] {
            std::vector<xgrammar::GrammarMatcher> matchers_vec(
                matchers_ptr, matchers_ptr + num_matchers
            );
            std::vector<int> num_tokens_vec(num_tokens_ptr, num_tokens_ptr + num_tokens_len);
            xgrammar::BatchGrammarMatcher::BatchRollback(&matchers_vec, num_tokens_vec);
        });
    }
}