llguidance/json/
compiler.rs

1use crate::api::LLGuidanceOptions;
2use crate::grammar_builder::GrammarResult;
3use crate::HashMap;
4use anyhow::{anyhow, Context, Result};
5use derivre::{JsonQuoteOptions, RegexAst};
6use indexmap::IndexMap;
7use serde::{Deserialize, Serialize};
8use serde_json::{json, Value};
9
10use super::numeric::{check_number_bounds, rx_float_range, rx_int_range, Decimal};
11use super::schema::{build_schema, Schema};
12use super::RetrieveWrapper;
13
14use crate::{GrammarBuilder, NodeRef};
15
16// TODO: grammar size limit
17// TODO: array maxItems etc limits
18// TODO: schemastore/src/schemas/json/BizTalkServerApplicationSchema.json - this breaks 1M fuel on lexer, why?!
19
20#[derive(Debug, Clone, Deserialize, Serialize)]
21#[serde(default, deny_unknown_fields)]
22pub struct JsonCompileOptions {
23    pub item_separator: String,
24    pub key_separator: String,
25    pub whitespace_flexible: bool,
26    pub whitespace_pattern: Option<String>,
27    pub coerce_one_of: bool,
28    #[serde(skip)]
29    pub retriever: Option<RetrieveWrapper>,
30}
31
32fn json_dumps(target: &serde_json::Value) -> String {
33    serde_json::to_string(target).unwrap()
34}
35
36#[derive(Debug)]
37struct UnsatisfiableSchemaError {
38    message: String,
39}
40
41impl std::fmt::Display for UnsatisfiableSchemaError {
42    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
43        write!(f, "Unsatisfiable schema: {}", self.message)
44    }
45}
46
47const CHAR_REGEX: &str = r#"(\\([\"\\\/bfnrt]|u[a-fA-F0-9]{4})|[^\"\\\x00-\x1F\x7F])"#;
48
49struct Compiler {
50    builder: GrammarBuilder,
51    options: JsonCompileOptions,
52    definitions: HashMap<String, NodeRef>,
53    pending_definitions: Vec<(String, NodeRef)>,
54
55    any_cache: Option<NodeRef>,
56    string_cache: Option<NodeRef>,
57}
58
59macro_rules! cache {
60    ($field:expr, $gen:expr) => {{
61        if $field.is_none() {
62            $field = Some($gen);
63        };
64        return ($field).unwrap();
65    }};
66}
67
68impl Default for JsonCompileOptions {
69    fn default() -> Self {
70        Self {
71            item_separator: ",".to_string(),
72            key_separator: ":".to_string(),
73            whitespace_pattern: None,
74            whitespace_flexible: true,
75            coerce_one_of: false,
76            retriever: None,
77        }
78    }
79}
80
81impl JsonCompileOptions {
82    pub fn json_to_llg(&self, builder: GrammarBuilder, schema: Value) -> Result<GrammarResult> {
83        let compiler = Compiler::new(self.clone(), builder);
84        #[cfg(feature = "jsonschema_validation")]
85        {
86            use crate::json_validation::validate_schema;
87            validate_schema(&schema)?;
88        }
89
90        compiler.execute(schema)
91    }
92
93    pub fn json_to_llg_no_validate(
94        &self,
95        builder: GrammarBuilder,
96        schema: Value,
97    ) -> Result<GrammarResult> {
98        let compiler = Compiler::new(self.clone(), builder);
99        compiler.execute(schema)
100    }
101
102    pub fn apply_to(&self, schema: &mut Value) {
103        schema.as_object_mut().unwrap().insert(
104            "x-guidance".to_string(),
105            serde_json::to_value(self).unwrap(),
106        );
107    }
108}
109
110impl Compiler {
111    pub fn new(options: JsonCompileOptions, builder: GrammarBuilder) -> Self {
112        Self {
113            builder,
114            options,
115            definitions: HashMap::default(),
116            pending_definitions: vec![],
117            any_cache: None,
118            string_cache: None,
119        }
120    }
121
122    pub fn execute(mut self, schema: Value) -> Result<GrammarResult> {
123        let skip = if let Some(pattern) = &self.options.whitespace_pattern {
124            RegexAst::Regex(pattern.clone())
125        } else if self.options.whitespace_flexible {
126            RegexAst::Regex(r"[\x20\x0A\x0D\x09]+".to_string())
127        } else {
128            RegexAst::NoMatch
129        };
130        let id = self
131            .builder
132            .add_grammar(LLGuidanceOptions::default(), skip)?;
133
134        let (compiled_schema, definitions) = build_schema(schema, self.options.retriever.clone())?;
135
136        let root = self.gen_json(&compiled_schema)?;
137        self.builder.set_start_node(root);
138
139        while let Some((path, pl)) = self.pending_definitions.pop() {
140            let schema = definitions
141                .get(&path)
142                .ok_or_else(|| anyhow!("Definition not found: {}", path))?;
143            let compiled = self.gen_json(schema)?;
144            self.builder.set_placeholder(pl, compiled);
145        }
146
147        Ok(self.builder.finalize(id))
148    }
149
150    fn gen_json(&mut self, json_schema: &Schema) -> Result<NodeRef> {
151        if let Some(ast) = self.regex_compile(json_schema)? {
152            return self.ast_lexeme(ast);
153        }
154        match json_schema {
155            Schema::Any => Ok(self.gen_json_any()),
156            Schema::Unsatisfiable { reason } => Err(anyhow!(UnsatisfiableSchemaError {
157                message: reason.to_string(),
158            })),
159
160            Schema::Array {
161                min_items,
162                max_items,
163                prefix_items,
164                items,
165            } => self.gen_json_array(
166                prefix_items,
167                items.as_deref().unwrap_or(&Schema::Any),
168                *min_items,
169                *max_items,
170            ),
171            Schema::Object {
172                properties,
173                additional_properties,
174                required,
175            } => self.gen_json_object(
176                properties,
177                additional_properties.as_deref().unwrap_or(&Schema::Any),
178                required.iter().cloned().collect(),
179            ),
180
181            Schema::AnyOf { options } => self.process_any_of(options),
182            Schema::OneOf { options } => self.process_one_of(options),
183            Schema::Ref { uri, .. } => self.get_definition(uri),
184
185            Schema::Null
186            | Schema::Boolean
187            | Schema::LiteralBool { .. }
188            | Schema::String { .. }
189            | Schema::Number { .. } => {
190                unreachable!("should be handled in regex_compile()")
191            }
192        }
193    }
194
195    fn process_one_of(&mut self, options: &[Schema]) -> Result<NodeRef> {
196        if self.options.coerce_one_of {
197            self.process_any_of(options)
198        } else {
199            Err(anyhow!("oneOf constraints are not supported. Enable 'coerce_one_of' option to approximate oneOf with anyOf"))
200        }
201    }
202
203    fn process_option(
204        &mut self,
205        option: &Schema,
206        regex_nodes: &mut Vec<RegexAst>,
207        cfg_nodes: &mut Vec<NodeRef>,
208    ) -> Result<()> {
209        match self.regex_compile(option)? {
210            Some(c) => regex_nodes.push(c),
211            None => cfg_nodes.push(self.gen_json(option)?),
212        }
213        Ok(())
214    }
215
216    fn process_any_of(&mut self, options: &[Schema]) -> Result<NodeRef> {
217        let mut regex_nodes = vec![];
218        let mut cfg_nodes = vec![];
219        let mut errors = vec![];
220
221        for option in options.iter() {
222            if let Err(err) = self.process_option(option, &mut regex_nodes, &mut cfg_nodes) {
223                match err.downcast_ref::<UnsatisfiableSchemaError>() {
224                    Some(_) => errors.push(err),
225                    None => return Err(err),
226                }
227            }
228        }
229
230        self.builder.check_limits()?;
231
232        if !regex_nodes.is_empty() {
233            let node = RegexAst::Or(regex_nodes);
234            let lex = self.ast_lexeme(node)?;
235            cfg_nodes.push(lex);
236        }
237
238        if !cfg_nodes.is_empty() {
239            Ok(self.builder.select(&cfg_nodes))
240        } else if let Some(e) = errors.pop() {
241            Err(anyhow!(UnsatisfiableSchemaError {
242                message: "All options in anyOf are unsatisfiable".to_string(),
243            })
244            .context(e))
245        } else {
246            Err(anyhow!(UnsatisfiableSchemaError {
247                message: "No options in anyOf".to_string(),
248            }))
249        }
250    }
251
252    fn json_int(
253        &mut self,
254        minimum: Option<f64>,
255        maximum: Option<f64>,
256        exclusive_minimum: bool,
257        exclusive_maximum: bool,
258        multiple_of: Option<Decimal>,
259    ) -> Result<RegexAst> {
260        check_number_bounds(
261            minimum,
262            maximum,
263            exclusive_minimum,
264            exclusive_maximum,
265            false,
266            multiple_of.clone(),
267        )
268        .map_err(|e| {
269            anyhow!(UnsatisfiableSchemaError {
270                message: e.to_string(),
271            })
272        })?;
273        let minimum = match (minimum, exclusive_minimum) {
274            (Some(min_val), true) => {
275                if min_val.fract() != 0.0 {
276                    Some(min_val.ceil())
277                } else {
278                    Some(min_val + 1.0)
279                }
280            }
281            (Some(min_val), false) => Some(min_val.ceil()),
282            _ => None,
283        }
284        .map(|val| val as i64);
285        let maximum = match (maximum, exclusive_maximum) {
286            (Some(max_val), true) => {
287                if max_val.fract() != 0.0 {
288                    Some(max_val.floor())
289                } else {
290                    Some(max_val - 1.0)
291                }
292            }
293            (Some(max_val), false) => Some(max_val.floor()),
294            _ => None,
295        }
296        .map(|val| val as i64);
297        let rx = rx_int_range(minimum, maximum).with_context(|| {
298            format!(
299                "Failed to generate regex for integer range: min={:?}, max={:?}",
300                minimum, maximum
301            )
302        })?;
303        let mut ast = RegexAst::Regex(rx);
304        if let Some(d) = multiple_of {
305            ast = RegexAst::And(vec![ast, RegexAst::MultipleOf(d.coef, d.exp)]);
306        }
307        Ok(ast)
308    }
309
310    fn json_number(
311        &mut self,
312        minimum: Option<f64>,
313        maximum: Option<f64>,
314        exclusive_minimum: bool,
315        exclusive_maximum: bool,
316        multiple_of: Option<Decimal>,
317    ) -> Result<RegexAst> {
318        check_number_bounds(
319            minimum,
320            maximum,
321            exclusive_minimum,
322            exclusive_maximum,
323            false,
324            multiple_of.clone(),
325        )
326        .map_err(|e| {
327            anyhow!(UnsatisfiableSchemaError {
328                message: e.to_string(),
329            })
330        })?;
331        let rx = rx_float_range(minimum, maximum, !exclusive_minimum, !exclusive_maximum)
332            .with_context(|| {
333                format!(
334                    "Failed to generate regex for float range: min={:?}, max={:?}",
335                    minimum, maximum
336                )
337            })?;
338        let mut ast = RegexAst::Regex(rx);
339        if let Some(d) = multiple_of {
340            ast = RegexAst::And(vec![ast, RegexAst::MultipleOf(d.coef, d.exp)]);
341        }
342        Ok(ast)
343    }
344
345    fn ast_lexeme(&mut self, ast: RegexAst) -> Result<NodeRef> {
346        let id = self.builder.regex.add_ast(ast)?;
347        Ok(self.builder.lexeme(id))
348    }
349
350    fn json_simple_string(&mut self) -> NodeRef {
351        cache!(self.string_cache, {
352            let ast = self.json_quote(RegexAst::Regex("(?s:.*)".to_string()));
353            self.ast_lexeme(ast).unwrap()
354        })
355    }
356
357    fn get_definition(&mut self, reference: &str) -> Result<NodeRef> {
358        if let Some(definition) = self.definitions.get(reference) {
359            return Ok(*definition);
360        }
361        let r = self.builder.new_node(reference);
362        self.definitions.insert(reference.to_string(), r);
363        self.pending_definitions.push((reference.to_string(), r));
364        Ok(r)
365    }
366
367    fn gen_json_any(&mut self) -> NodeRef {
368        cache!(self.any_cache, {
369            let json_any = self.builder.new_node("json_any");
370            self.any_cache = Some(json_any); // avoid infinite recursion
371            let num = self.json_number(None, None, false, false, None).unwrap();
372            let tf = self.builder.regex.regex("true|false").unwrap();
373            let options = vec![
374                self.builder.string("null"),
375                self.builder.lexeme(tf),
376                self.ast_lexeme(num).unwrap(),
377                self.json_simple_string(),
378                self.gen_json_array(&[], &Schema::Any, 0, None).unwrap(),
379                self.gen_json_object(&IndexMap::new(), &Schema::Any, vec![])
380                    .unwrap(),
381            ];
382            let inner = self.builder.select(&options);
383            self.builder.set_placeholder(json_any, inner);
384            json_any
385        })
386    }
387
388    fn gen_json_object(
389        &mut self,
390        properties: &IndexMap<String, Schema>,
391        additional_properties: &Schema,
392        required: Vec<String>,
393    ) -> Result<NodeRef> {
394        let mut taken_names: Vec<String> = vec![];
395        let mut items: Vec<(NodeRef, bool)> = vec![];
396        for name in properties.keys().chain(
397            required
398                .iter()
399                .filter(|n| !properties.contains_key(n.as_str())),
400        ) {
401            let property_schema = properties.get(name).unwrap_or(additional_properties);
402            let is_required = required.contains(name);
403            // Quote (and escape) the name
404            let quoted_name = json_dumps(&json!(name));
405            let property = match self.gen_json(property_schema) {
406                Ok(node) => node,
407                Err(e) => match e.downcast_ref::<UnsatisfiableSchemaError>() {
408                    // If it's not an UnsatisfiableSchemaError, just propagate it normally
409                    None => return Err(e),
410                    // Property is optional; don't raise UnsatisfiableSchemaError but mark name as taken
411                    Some(_) if !is_required => {
412                        taken_names.push(quoted_name);
413                        continue;
414                    }
415                    // Property is required; add context and propagate UnsatisfiableSchemaError
416                    Some(_) => {
417                        return Err(e.context(UnsatisfiableSchemaError {
418                            message: format!("required property '{}' is unsatisfiable", name),
419                        }));
420                    }
421                },
422            };
423            let name = self.builder.string(&quoted_name);
424            taken_names.push(quoted_name);
425            let colon = self.builder.string(&self.options.key_separator);
426            let item = self.builder.join(&[name, colon, property]);
427            items.push((item, is_required));
428        }
429
430        match self.gen_json(additional_properties) {
431            Err(e) => {
432                if e.downcast_ref::<UnsatisfiableSchemaError>().is_none() {
433                    // Propagate errors that aren't UnsatisfiableSchemaError
434                    return Err(e);
435                }
436                // Ignore UnsatisfiableSchemaError for additionalProperties
437            }
438            Ok(property) => {
439                let name = if taken_names.is_empty() {
440                    self.json_simple_string()
441                } else {
442                    let taken_name_ids = taken_names
443                        .iter()
444                        .map(|n| self.builder.regex.literal(n.to_string()))
445                        .collect::<Vec<_>>();
446                    let taken = self.builder.regex.select(taken_name_ids);
447                    let not_taken = self.builder.regex.not(taken);
448                    let valid = self
449                        .builder
450                        .regex
451                        .regex(&format!("\"({})*\"", CHAR_REGEX))?;
452                    let valid_and_not_taken = self.builder.regex.and(vec![valid, not_taken]);
453                    self.builder.lexeme(valid_and_not_taken)
454                };
455                let colon = self.builder.string(&self.options.key_separator);
456                let item = self.builder.join(&[name, colon, property]);
457                let seq = self.sequence(item);
458                items.push((seq, false));
459            }
460        }
461        let opener = self.builder.string("{");
462        let inner = self.ordered_sequence(&items, false, &mut HashMap::default());
463        let closer = self.builder.string("}");
464        Ok(self.builder.join(&[opener, inner, closer]))
465    }
466
467    #[allow(clippy::type_complexity)]
468    fn ordered_sequence<'a>(
469        &mut self,
470        items: &'a [(NodeRef, bool)],
471        prefixed: bool,
472        cache: &mut HashMap<(&'a [(NodeRef, bool)], bool), NodeRef>,
473    ) -> NodeRef {
474        // Cache to reduce number of nodes from O(n^2) to O(n)
475        if let Some(node) = cache.get(&(items, prefixed)) {
476            return *node;
477        }
478        if items.is_empty() {
479            return self.builder.string("");
480        }
481        let comma = self.builder.string(&self.options.item_separator);
482        let (item, required) = items[0];
483        let rest = &items[1..];
484
485        let node = match (prefixed, required) {
486            (true, true) => {
487                // If we know we have preceeding elements, we can safely just add a (',' + e)
488                let rest_seq = self.ordered_sequence(rest, true, cache);
489                self.builder.join(&[comma, item, rest_seq])
490            }
491            (true, false) => {
492                // If we know we have preceeding elements, we can safely just add an optional(',' + e)
493                // TODO optimization: if the rest is all optional, we can nest the rest in the optional
494                let comma_item = self.builder.join(&[comma, item]);
495                let optional_comma_item = self.builder.optional(comma_item);
496                let rest_seq = self.ordered_sequence(rest, true, cache);
497                self.builder.join(&[optional_comma_item, rest_seq])
498            }
499            (false, true) => {
500                // No preceding elements, so we just add the element (no comma)
501                let rest_seq = self.ordered_sequence(rest, true, cache);
502                self.builder.join(&[item, rest_seq])
503            }
504            (false, false) => {
505                // No preceding elements, but our element is optional. If we add the element, the remaining
506                // will be prefixed, else they are not.
507                // TODO: same nested optimization as above
508                let prefixed_rest = self.ordered_sequence(rest, true, cache);
509                let unprefixed_rest = self.ordered_sequence(rest, false, cache);
510                let opts = [self.builder.join(&[item, prefixed_rest]), unprefixed_rest];
511                self.builder.select(&opts)
512            }
513        };
514        cache.insert((items, prefixed), node);
515        node
516    }
517
518    fn sequence(&mut self, item: NodeRef) -> NodeRef {
519        let comma = self.builder.string(&self.options.item_separator);
520        let item_comma = self.builder.join(&[item, comma]);
521        let item_comma_star = self.builder.zero_or_more(item_comma);
522        self.builder.join(&[item_comma_star, item])
523    }
524
525    fn json_quote(&self, ast: RegexAst) -> RegexAst {
526        RegexAst::JsonQuote(
527            Box::new(ast),
528            JsonQuoteOptions {
529                allowed_escapes: "nrbtf\\\"u".to_string(),
530                raw_mode: false,
531            },
532        )
533    }
534
535    fn regex_compile(&mut self, schema: &Schema) -> Result<Option<RegexAst>> {
536        fn literal_regex(rx: &str) -> Option<RegexAst> {
537            Some(RegexAst::Literal(rx.to_string()))
538        }
539
540        self.builder.check_limits()?;
541
542        let r = match schema {
543            Schema::Null => literal_regex("null"),
544            Schema::Boolean => Some(RegexAst::Regex("true|false".to_string())),
545            Schema::LiteralBool { value } => literal_regex(if *value { "true" } else { "false" }),
546
547            Schema::Number {
548                minimum,
549                maximum,
550                exclusive_minimum,
551                exclusive_maximum,
552                integer,
553                multiple_of,
554            } => {
555                let (minimum, exclusive_minimum) = match (minimum, exclusive_minimum) {
556                    (Some(min), Some(xmin)) => {
557                        if xmin >= min {
558                            (Some(*xmin), true)
559                        } else {
560                            (Some(*min), false)
561                        }
562                    }
563                    (Some(min), None) => (Some(*min), false),
564                    (None, Some(xmin)) => (Some(*xmin), true),
565                    (None, None) => (None, false),
566                };
567                let (maximum, exclusive_maximum) = match (maximum, exclusive_maximum) {
568                    (Some(max), Some(xmax)) => {
569                        if xmax <= max {
570                            (Some(*xmax), true)
571                        } else {
572                            (Some(*max), false)
573                        }
574                    }
575                    (Some(max), None) => (Some(*max), false),
576                    (None, Some(xmax)) => (Some(*xmax), true),
577                    (None, None) => (None, false),
578                };
579                Some(if *integer {
580                    self.json_int(
581                        minimum,
582                        maximum,
583                        exclusive_minimum,
584                        exclusive_maximum,
585                        multiple_of.clone(),
586                    )?
587                } else {
588                    self.json_number(
589                        minimum,
590                        maximum,
591                        exclusive_minimum,
592                        exclusive_maximum,
593                        multiple_of.clone(),
594                    )?
595                })
596            }
597
598            Schema::String {
599                min_length,
600                max_length,
601                regex,
602            } => {
603                return self
604                    .gen_json_string(*min_length, *max_length, regex.clone())
605                    .map(Some)
606            }
607
608            Schema::Any
609            | Schema::Unsatisfiable { .. }
610            | Schema::Array { .. }
611            | Schema::Object { .. }
612            | Schema::AnyOf { .. }
613            | Schema::OneOf { .. }
614            | Schema::Ref { .. } => None,
615        };
616        Ok(r)
617    }
618
619    fn gen_json_string(
620        &self,
621        min_length: u64,
622        max_length: Option<u64>,
623        regex: Option<RegexAst>,
624    ) -> Result<RegexAst> {
625        if let Some(max_length) = max_length {
626            if min_length > max_length {
627                return Err(anyhow!(UnsatisfiableSchemaError {
628                    message: format!(
629                        "minLength ({}) is greater than maxLength ({})",
630                        min_length, max_length
631                    ),
632                }));
633            }
634        }
635        if min_length == 0 && max_length.is_none() && regex.is_none() {
636            return Ok(self.json_quote(RegexAst::Regex("(?s:.*)".to_string())));
637        }
638        if let Some(mut ast) = regex {
639            let mut positive = false;
640
641            fn mk_rx_repr(ast: &RegexAst) -> String {
642                let mut rx_repr = String::new();
643                ast.write_to_str(&mut rx_repr, 1_000, None);
644                rx_repr
645            }
646
647            // special-case literals - the length is easy to check
648            if let RegexAst::Literal(s) = &ast {
649                let l = s.chars().count() as u64;
650
651                if l < min_length || l > max_length.unwrap_or(u64::MAX) {
652                    return Err(anyhow!(UnsatisfiableSchemaError {
653                        message: format!("Constant {:?} doesn't match length constraints", s)
654                    }));
655                }
656
657                positive = true;
658            } else if min_length != 0 || max_length.is_some() {
659                ast = RegexAst::And(vec![
660                    ast,
661                    RegexAst::Regex(format!(
662                        "(?s:.{{{},{}}})",
663                        min_length,
664                        max_length.map_or("".to_string(), |v| v.to_string())
665                    )),
666                ]);
667            } else {
668                positive = always_non_empty(&ast);
669                // eprintln!("positive:{} {}", positive, mk_rx_repr(&ast));
670            }
671
672            if !positive {
673                // Check if the regex is empty
674                let mut builder = derivre::RegexBuilder::new();
675                let expr = builder.mk(&ast)?;
676                // if regex is not positive, do the more expensive non-emptiness check
677                if !builder.exprset().is_positive(expr) {
678                    // in JSB, 13 cases above 2000;
679                    // 1 case above 5000:
680                    // "format": "email",
681                    // "pattern": "^\\w+([\\.-]?\\w+)*@\\w+([\\.-]?\\w+)*(\\.\\w{2,})+$",
682                    // "minLength": 6,
683                    //
684                    // (excluding two handwritten examples with minLength:10000)
685                    let mut regex = builder.to_regex_limited(expr, 10_000).map_err(|_| {
686                        anyhow!(
687                            "Unable to determine if regex is empty: {}",
688                            mk_rx_repr(&ast)
689                        )
690                    })?;
691                    if regex.always_empty() {
692                        return Err(anyhow!(UnsatisfiableSchemaError {
693                            message: format!("Regex is empty: {}", mk_rx_repr(&ast))
694                        }));
695                    }
696                }
697            }
698
699            ast = self.json_quote(ast);
700
701            Ok(ast)
702        } else {
703            Ok(self.json_quote(RegexAst::Regex(format!(
704                "(?s:.{{{},{}}})",
705                min_length,
706                max_length.map_or("".to_string(), |v| v.to_string())
707            ))))
708        }
709    }
710
711    fn gen_json_array(
712        &mut self,
713        prefix_items: &[Schema],
714        item_schema: &Schema,
715        min_items: u64,
716        max_items: Option<u64>,
717    ) -> Result<NodeRef> {
718        let mut max_items = max_items;
719
720        if let Some(max_items) = max_items {
721            if min_items > max_items {
722                return Err(anyhow!(UnsatisfiableSchemaError {
723                    message: format!(
724                        "minItems ({}) is greater than maxItems ({})",
725                        min_items, max_items
726                    ),
727                }));
728            }
729        }
730
731        let additional_item_grm = match self.gen_json(item_schema) {
732            Ok(node) => Some(node),
733            Err(e) => match e.downcast_ref::<UnsatisfiableSchemaError>() {
734                // If it's not an UnsatisfiableSchemaError, just propagate it normally
735                None => return Err(e),
736                // Item is optional; don't raise UnsatisfiableSchemaError
737                Some(_) if prefix_items.len() >= min_items as usize => None,
738                // Item is required; add context and propagate UnsatisfiableSchemaError
739                Some(_) => {
740                    return Err(e.context(UnsatisfiableSchemaError {
741                        message: "required item is unsatisfiable".to_string(),
742                    }));
743                }
744            },
745        };
746
747        let mut required_items = vec![];
748        let mut optional_items = vec![];
749
750        // If max_items is None, we can add an infinite tail of items later
751        let n_to_add = max_items.map_or(prefix_items.len().max(min_items as usize), |max| {
752            max as usize
753        });
754
755        for i in 0..n_to_add {
756            let item = if i < prefix_items.len() {
757                match self.gen_json(&prefix_items[i]) {
758                    Ok(node) => node,
759                    Err(e) => match e.downcast_ref::<UnsatisfiableSchemaError>() {
760                        // If it's not an UnsatisfiableSchemaError, just propagate it normally
761                        None => return Err(e),
762                        // Item is optional; don't raise UnsatisfiableSchemaError.
763                        // Set max_items to the current index, as we can't satisfy any more items.
764                        Some(_) if i >= min_items as usize => {
765                            max_items = Some(i as u64);
766                            break;
767                        }
768                        // Item is required; add context and propagate UnsatisfiableSchemaError
769                        Some(_) => {
770                            return Err(e.context(UnsatisfiableSchemaError {
771                                message: format!(
772                                    "prefixItems[{}] is unsatisfiable but minItems is {}",
773                                    i, min_items
774                                ),
775                            }));
776                        }
777                    },
778                }
779            } else if let Some(compiled) = &additional_item_grm {
780                *compiled
781            } else {
782                break;
783            };
784
785            if i < min_items as usize {
786                required_items.push(item);
787            } else {
788                optional_items.push(item);
789            }
790        }
791
792        if max_items.is_none() {
793            if let Some(additional_item) = additional_item_grm {
794                // Add an infinite tail of items
795                optional_items.push(self.sequence(additional_item));
796            }
797        }
798
799        let mut grammars: Vec<NodeRef> = vec![self.builder.string("[")];
800        let comma = self.builder.string(&self.options.item_separator);
801
802        if !required_items.is_empty() {
803            grammars.push(required_items[0]);
804            for item in &required_items[1..] {
805                grammars.push(comma);
806                grammars.push(*item);
807            }
808        }
809
810        if !optional_items.is_empty() {
811            let first = optional_items[0];
812            let tail =
813                optional_items
814                    .into_iter()
815                    .skip(1)
816                    .rev()
817                    .fold(self.builder.empty(), |acc, item| {
818                        let j = self.builder.join(&[comma, item, acc]);
819                        self.builder.optional(j)
820                    });
821            let tail = self.builder.join(&[first, tail]);
822
823            if !required_items.is_empty() {
824                let j = self.builder.join(&[comma, tail]);
825                grammars.push(self.builder.optional(j));
826            } else {
827                grammars.push(self.builder.optional(tail));
828            }
829        }
830
831        grammars.push(self.builder.string("]"));
832        Ok(self.builder.join(&grammars))
833    }
834}
835
836fn always_non_empty(ast: &RegexAst) -> bool {
837    match ast {
838        RegexAst::Or(asts) => asts.iter().any(always_non_empty),
839        RegexAst::Concat(asts) => asts.iter().all(always_non_empty),
840        RegexAst::Repeat(ast, _, _) | RegexAst::JsonQuote(ast, _) | RegexAst::LookAhead(ast) => {
841            always_non_empty(ast)
842        }
843
844        RegexAst::EmptyString
845        | RegexAst::Literal(_)
846        | RegexAst::ByteLiteral(_)
847        | RegexAst::Byte(_)
848        | RegexAst::ByteSet(_)
849        | RegexAst::MultipleOf(_, _) => true,
850
851        RegexAst::And(_)
852        | RegexAst::Not(_)
853        | RegexAst::NoMatch
854        | RegexAst::Regex(_)
855        | RegexAst::ExprRef(_) => false,
856    }
857}