Skip to main content

oxibonsai_runtime/
json_schema.rs

1//! JSON Schema-driven structured output constraint.
2//!
3//! Parses a subset of JSON Schema and generates a state machine that
4//! enforces valid JSON matching the schema at the token level.
5//!
6//! Supported schema features:
7//! - type: "object", "array", "string", "number", "integer", "boolean", "null"
8//! - properties (required/optional)
9//! - required fields list
10//! - enum values (string enums)
11//! - minLength / maxLength for strings
12//! - minimum / maximum for numbers
13//! - items (array element type)
14//! - maxItems / minItems for arrays
15
16use std::collections::HashMap;
17
18// ─────────────────────────────────────────────────────────────────────────────
19// Error type
20// ─────────────────────────────────────────────────────────────────────────────
21
22/// Errors arising from schema parsing, validation, or enforcement.
23#[derive(Debug, thiserror::Error)]
24pub enum SchemaError {
25    /// The schema JSON could not be parsed.
26    #[error("invalid schema JSON: {0}")]
27    InvalidJson(String),
28
29    /// A schema feature that is not supported was encountered.
30    #[error("unsupported schema feature: {0}")]
31    UnsupportedFeature(String),
32
33    /// The schema object is missing a required `"type"` field.
34    #[error("missing 'type' field in schema")]
35    MissingType,
36
37    /// The `"type"` field contained an unrecognized value.
38    #[error("unknown type: '{0}'")]
39    UnknownType(String),
40
41    /// Validation of a value against the schema failed.
42    #[error("validation error: {0}")]
43    ValidationError(String),
44
45    /// A constraint violation detected at a specific character position.
46    #[error("schema violation at position {pos}: {msg}")]
47    SchemaViolation {
48        /// Character offset in the generated text.
49        pos: usize,
50        /// Description of the violation.
51        msg: String,
52    },
53}
54
55// ─────────────────────────────────────────────────────────────────────────────
56// SchemaType — parsed schema representation
57// ─────────────────────────────────────────────────────────────────────────────
58
59/// A parsed JSON schema (subset).
60#[derive(Debug, Clone)]
61pub enum SchemaType {
62    /// An object with named properties and a list of required keys.
63    Object {
64        /// Property name → sub-schema.
65        properties: HashMap<String, SchemaType>,
66        /// Keys that must be present.
67        required: Vec<String>,
68    },
69    /// An array, optionally with a uniform item schema and length bounds.
70    Array {
71        /// Schema for each element.
72        items: Option<Box<SchemaType>>,
73        /// Minimum number of elements.
74        min_items: Option<usize>,
75        /// Maximum number of elements.
76        max_items: Option<usize>,
77    },
78    /// A string, optionally constrained by enum values or length.
79    String {
80        /// If present, the string must be one of these values.
81        enum_values: Option<Vec<String>>,
82        /// Minimum string length (in characters).
83        min_length: Option<usize>,
84        /// Maximum string length (in characters).
85        max_length: Option<usize>,
86    },
87    /// A floating-point number with optional bounds.
88    Number {
89        /// Inclusive minimum.
90        minimum: Option<f64>,
91        /// Inclusive maximum.
92        maximum: Option<f64>,
93    },
94    /// An integer with optional bounds.
95    Integer {
96        /// Inclusive minimum.
97        minimum: Option<i64>,
98        /// Inclusive maximum.
99        maximum: Option<i64>,
100    },
101    /// A JSON boolean (`true` or `false`).
102    Boolean,
103    /// The JSON `null` literal.
104    Null,
105    /// A union of schemas (`anyOf`).
106    AnyOf(Vec<SchemaType>),
107}
108
109impl SchemaType {
110    /// Returns a human-readable name for the schema variant.
111    pub fn type_name(&self) -> &'static str {
112        match self {
113            Self::Object { .. } => "object",
114            Self::Array { .. } => "array",
115            Self::String { .. } => "string",
116            Self::Number { .. } => "number",
117            Self::Integer { .. } => "integer",
118            Self::Boolean => "boolean",
119            Self::Null => "null",
120            Self::AnyOf(_) => "anyOf",
121        }
122    }
123
124    /// Returns `true` if `key` is in the `required` list of an object schema.
125    pub fn is_required_property(&self, key: &str) -> bool {
126        match self {
127            Self::Object { required, .. } => required.iter().any(|k| k == key),
128            _ => false,
129        }
130    }
131}
132
133// ─────────────────────────────────────────────────────────────────────────────
134// Internal JSON value — minimal hand-rolled representation
135// ─────────────────────────────────────────────────────────────────────────────
136
137/// Minimal JSON value for internal schema parsing (no serde dependency).
138#[derive(Debug, Clone)]
139#[allow(dead_code)]
140enum JsonValue {
141    Null,
142    Bool(bool),
143    Number(f64),
144    Str(String),
145    Array(Vec<JsonValue>),
146    Object(Vec<(String, JsonValue)>),
147}
148
149impl JsonValue {
150    /// Try to interpret this value as a string.
151    fn as_str(&self) -> Option<&str> {
152        match self {
153            Self::Str(s) => Some(s.as_str()),
154            _ => None,
155        }
156    }
157
158    /// Try to interpret this value as an array.
159    fn as_array(&self) -> Option<&[JsonValue]> {
160        match self {
161            Self::Array(v) => Some(v.as_slice()),
162            _ => None,
163        }
164    }
165
166    /// Try to interpret this value as an object (ordered key-value pairs).
167    fn as_object(&self) -> Option<&[(String, JsonValue)]> {
168        match self {
169            Self::Object(kv) => Some(kv.as_slice()),
170            _ => None,
171        }
172    }
173
174    /// Try to interpret this value as an f64.
175    fn as_f64(&self) -> Option<f64> {
176        match self {
177            Self::Number(n) => Some(*n),
178            _ => None,
179        }
180    }
181
182    /// Look up a key in an object.
183    fn get(&self, key: &str) -> Option<&JsonValue> {
184        self.as_object()
185            .and_then(|pairs| pairs.iter().find(|(k, _)| k == key).map(|(_, v)| v))
186    }
187}
188
189// ─────────────────────────────────────────────────────────────────────────────
190// Minimal JSON parser
191// ─────────────────────────────────────────────────────────────────────────────
192
193/// Skip leading ASCII whitespace.
194fn skip_ws(input: &str) -> &str {
195    input.trim_start()
196}
197
198/// Parse a JSON value from the beginning of `input`.
199/// Returns `(value, remaining_input)`.
200fn parse_json_value(input: &str) -> Result<(JsonValue, &str), SchemaError> {
201    let s = skip_ws(input);
202    if s.is_empty() {
203        return Err(SchemaError::InvalidJson("unexpected end of input".into()));
204    }
205
206    let first = s.as_bytes()[0];
207    match first {
208        b'"' => parse_json_string(s),
209        b'{' => parse_json_object(s),
210        b'[' => parse_json_array(s),
211        b't' | b'f' => parse_json_bool(s),
212        b'n' => parse_json_null(s),
213        b'-' | b'0'..=b'9' => parse_json_number(s),
214        _ => Err(SchemaError::InvalidJson(format!(
215            "unexpected character '{}'",
216            s.chars().next().unwrap_or('?')
217        ))),
218    }
219}
220
221/// Parse a JSON string (including the surrounding quotes).
222fn parse_json_string(input: &str) -> Result<(JsonValue, &str), SchemaError> {
223    debug_assert!(input.starts_with('"'));
224    let mut chars = input[1..].char_indices();
225    let mut result = String::new();
226    loop {
227        match chars.next() {
228            None => return Err(SchemaError::InvalidJson("unterminated string".into())),
229            Some((_, '\\')) => match chars.next() {
230                Some((_, '"')) => result.push('"'),
231                Some((_, '\\')) => result.push('\\'),
232                Some((_, '/')) => result.push('/'),
233                Some((_, 'n')) => result.push('\n'),
234                Some((_, 'r')) => result.push('\r'),
235                Some((_, 't')) => result.push('\t'),
236                Some((_, 'b')) => result.push('\u{0008}'),
237                Some((_, 'f')) => result.push('\u{000C}'),
238                Some((_, 'u')) => {
239                    let hex = collect_n_chars(&mut chars, 4)?;
240                    let cp = u32::from_str_radix(&hex, 16).map_err(|_| {
241                        SchemaError::InvalidJson(format!("invalid unicode escape: \\u{hex}"))
242                    })?;
243                    let c = char::from_u32(cp).ok_or_else(|| {
244                        SchemaError::InvalidJson(format!("invalid codepoint: U+{cp:04X}"))
245                    })?;
246                    result.push(c);
247                }
248                Some((_, c)) => {
249                    return Err(SchemaError::InvalidJson(format!("unknown escape: \\{c}")))
250                }
251                None => return Err(SchemaError::InvalidJson("unterminated escape".into())),
252            },
253            Some((i, '"')) => {
254                // i is the index *within* input[1..], the closing quote
255                let rest = &input[1 + i + 1..];
256                return Ok((JsonValue::Str(result), rest));
257            }
258            Some((_, c)) => result.push(c),
259        }
260    }
261}
262
263/// Helper: collect `n` chars from an iterator into a String.
264fn collect_n_chars(iter: &mut std::str::CharIndices<'_>, n: usize) -> Result<String, SchemaError> {
265    let mut s = String::with_capacity(n);
266    for _ in 0..n {
267        match iter.next() {
268            Some((_, c)) => s.push(c),
269            None => return Err(SchemaError::InvalidJson("unexpected end in escape".into())),
270        }
271    }
272    Ok(s)
273}
274
275/// Parse a JSON number (integer or floating-point).
276fn parse_json_number(input: &str) -> Result<(JsonValue, &str), SchemaError> {
277    let mut end = 0;
278    let bytes = input.as_bytes();
279    // optional leading minus
280    if end < bytes.len() && bytes[end] == b'-' {
281        end += 1;
282    }
283    // integer part
284    while end < bytes.len() && bytes[end].is_ascii_digit() {
285        end += 1;
286    }
287    // fractional part
288    if end < bytes.len() && bytes[end] == b'.' {
289        end += 1;
290        while end < bytes.len() && bytes[end].is_ascii_digit() {
291            end += 1;
292        }
293    }
294    // exponent
295    if end < bytes.len() && (bytes[end] == b'e' || bytes[end] == b'E') {
296        end += 1;
297        if end < bytes.len() && (bytes[end] == b'+' || bytes[end] == b'-') {
298            end += 1;
299        }
300        while end < bytes.len() && bytes[end].is_ascii_digit() {
301            end += 1;
302        }
303    }
304    if end == 0 || (end == 1 && bytes[0] == b'-') {
305        return Err(SchemaError::InvalidJson("expected number".into()));
306    }
307    let num_str = &input[..end];
308    let val: f64 = num_str
309        .parse()
310        .map_err(|_| SchemaError::InvalidJson(format!("invalid number: {num_str}")))?;
311    Ok((JsonValue::Number(val), &input[end..]))
312}
313
314/// Parse a JSON boolean.
315fn parse_json_bool(input: &str) -> Result<(JsonValue, &str), SchemaError> {
316    if let Some(rest) = input.strip_prefix("true") {
317        Ok((JsonValue::Bool(true), rest))
318    } else if let Some(rest) = input.strip_prefix("false") {
319        Ok((JsonValue::Bool(false), rest))
320    } else {
321        Err(SchemaError::InvalidJson("expected boolean".into()))
322    }
323}
324
325/// Parse the JSON `null` literal.
326fn parse_json_null(input: &str) -> Result<(JsonValue, &str), SchemaError> {
327    if let Some(rest) = input.strip_prefix("null") {
328        Ok((JsonValue::Null, rest))
329    } else {
330        Err(SchemaError::InvalidJson("expected null".into()))
331    }
332}
333
334/// Parse a JSON object.
335fn parse_json_object(input: &str) -> Result<(JsonValue, &str), SchemaError> {
336    debug_assert!(input.starts_with('{'));
337    let mut rest = skip_ws(&input[1..]);
338    let mut pairs = Vec::new();
339    if let Some(after_brace) = rest.strip_prefix('}') {
340        return Ok((JsonValue::Object(pairs), after_brace));
341    }
342    loop {
343        // key
344        if !rest.starts_with('"') {
345            return Err(SchemaError::InvalidJson("expected string key".into()));
346        }
347        let (key_val, after_key) = parse_json_string(rest)?;
348        let key = match key_val {
349            JsonValue::Str(s) => s,
350            _ => return Err(SchemaError::InvalidJson("key must be string".into())),
351        };
352        let after_colon = skip_ws(after_key);
353        if !after_colon.starts_with(':') {
354            return Err(SchemaError::InvalidJson("expected ':' after key".into()));
355        }
356        let after_colon = skip_ws(&after_colon[1..]);
357        let (val, after_val) = parse_json_value(after_colon)?;
358        pairs.push((key, val));
359        rest = skip_ws(after_val);
360        if let Some(after_brace) = rest.strip_prefix('}') {
361            return Ok((JsonValue::Object(pairs), after_brace));
362        }
363        if rest.starts_with(',') {
364            rest = skip_ws(&rest[1..]);
365        } else {
366            return Err(SchemaError::InvalidJson(
367                "expected ',' or '}' in object".into(),
368            ));
369        }
370    }
371}
372
373/// Parse a JSON array.
374fn parse_json_array(input: &str) -> Result<(JsonValue, &str), SchemaError> {
375    debug_assert!(input.starts_with('['));
376    let mut rest = skip_ws(&input[1..]);
377    let mut items = Vec::new();
378    if let Some(after_bracket) = rest.strip_prefix(']') {
379        return Ok((JsonValue::Array(items), after_bracket));
380    }
381    loop {
382        let (val, after_val) = parse_json_value(rest)?;
383        items.push(val);
384        rest = skip_ws(after_val);
385        if let Some(after_bracket) = rest.strip_prefix(']') {
386            return Ok((JsonValue::Array(items), after_bracket));
387        }
388        if rest.starts_with(',') {
389            rest = skip_ws(&rest[1..]);
390        } else {
391            return Err(SchemaError::InvalidJson(
392                "expected ',' or ']' in array".into(),
393            ));
394        }
395    }
396}
397
398// ─────────────────────────────────────────────────────────────────────────────
399// Schema conversion
400// ─────────────────────────────────────────────────────────────────────────────
401
402/// Parse a JSON schema from its JSON string representation.
403///
404/// Supports the subset described in [`SchemaType`].
405pub fn parse_schema(schema_json: &str) -> Result<SchemaType, SchemaError> {
406    let (value, rest) = parse_json_value(schema_json)?;
407    let rest_trimmed = skip_ws(rest);
408    if !rest_trimmed.is_empty() {
409        return Err(SchemaError::InvalidJson(format!(
410            "trailing characters after schema: {rest_trimmed}"
411        )));
412    }
413    json_value_to_schema(&value)
414}
415
416/// Convert a parsed [`JsonValue`] into a [`SchemaType`].
417fn json_value_to_schema(value: &JsonValue) -> Result<SchemaType, SchemaError> {
418    let _obj = value
419        .as_object()
420        .ok_or_else(|| SchemaError::InvalidJson("schema must be an object".into()))?;
421
422    // Check for anyOf
423    if let Some(any_of_val) = value.get("anyOf") {
424        let arr = any_of_val
425            .as_array()
426            .ok_or_else(|| SchemaError::InvalidJson("anyOf must be an array".into()))?;
427        let schemas: Result<Vec<SchemaType>, _> = arr.iter().map(json_value_to_schema).collect();
428        return Ok(SchemaType::AnyOf(schemas?));
429    }
430
431    let type_val = value.get("type").ok_or(SchemaError::MissingType)?;
432    let type_str = type_val
433        .as_str()
434        .ok_or_else(|| SchemaError::InvalidJson("'type' must be a string".into()))?;
435
436    match type_str {
437        "object" => {
438            let mut properties = HashMap::new();
439            if let Some(props_val) = value.get("properties") {
440                if let Some(props_obj) = props_val.as_object() {
441                    for (k, v) in props_obj {
442                        let sub = json_value_to_schema(v)?;
443                        properties.insert(k.clone(), sub);
444                    }
445                }
446            }
447            let mut required = Vec::new();
448            if let Some(req_val) = value.get("required") {
449                if let Some(arr) = req_val.as_array() {
450                    for item in arr {
451                        if let Some(s) = item.as_str() {
452                            required.push(s.to_string());
453                        }
454                    }
455                }
456            }
457            Ok(SchemaType::Object {
458                properties,
459                required,
460            })
461        }
462        "array" => {
463            let items = match value.get("items") {
464                Some(v) => Some(Box::new(json_value_to_schema(v)?)),
465                None => None,
466            };
467            let min_items = value
468                .get("minItems")
469                .and_then(|v| v.as_f64())
470                .map(|n| n as usize);
471            let max_items = value
472                .get("maxItems")
473                .and_then(|v| v.as_f64())
474                .map(|n| n as usize);
475            Ok(SchemaType::Array {
476                items,
477                min_items,
478                max_items,
479            })
480        }
481        "string" => {
482            let enum_values = value.get("enum").and_then(|v| {
483                v.as_array().map(|arr| {
484                    arr.iter()
485                        .filter_map(|item| item.as_str().map(|s| s.to_string()))
486                        .collect()
487                })
488            });
489            let min_length = value
490                .get("minLength")
491                .and_then(|v| v.as_f64())
492                .map(|n| n as usize);
493            let max_length = value
494                .get("maxLength")
495                .and_then(|v| v.as_f64())
496                .map(|n| n as usize);
497            Ok(SchemaType::String {
498                enum_values,
499                min_length,
500                max_length,
501            })
502        }
503        "number" => {
504            let minimum = value.get("minimum").and_then(|v| v.as_f64());
505            let maximum = value.get("maximum").and_then(|v| v.as_f64());
506            Ok(SchemaType::Number { minimum, maximum })
507        }
508        "integer" => {
509            let minimum = value
510                .get("minimum")
511                .and_then(|v| v.as_f64())
512                .map(|n| n as i64);
513            let maximum = value
514                .get("maximum")
515                .and_then(|v| v.as_f64())
516                .map(|n| n as i64);
517            Ok(SchemaType::Integer { minimum, maximum })
518        }
519        "boolean" => Ok(SchemaType::Boolean),
520        "null" => Ok(SchemaType::Null),
521        other => Err(SchemaError::UnknownType(other.to_string())),
522    }
523}
524
525// ─────────────────────────────────────────────────────────────────────────────
526// SchemaState — state machine for tracking generation progress
527// ─────────────────────────────────────────────────────────────────────────────
528
529/// A context frame on the schema-state stack.
530#[derive(Debug, Clone)]
531#[allow(clippy::enum_variant_names)]
532enum ContextFrame {
533    /// Inside an object: tracking which keys have been emitted.
534    ObjectStart {
535        schema: SchemaType,
536        emitted_keys: Vec<String>,
537        pending_value: bool,
538    },
539    /// Inside an array: tracking element count.
540    ArrayStart { schema: SchemaType, count: usize },
541    /// Inside a string literal.
542    StringStart {
543        constraints: Option<(Option<usize>, Option<usize>)>,
544    },
545    /// Inside a number literal.
546    NumberStart,
547    /// Expecting a value matching a specific schema.
548    ValueStart { schema: SchemaType },
549}
550
551/// State machine that tracks JSON generation progress against a schema.
552///
553/// Characters are fed one at a time; the machine maintains a stack of context
554/// frames mirroring the nesting structure of the JSON being generated.
555#[derive(Debug, Clone)]
556pub struct SchemaState {
557    /// Stack of context frames.
558    stack: Vec<ContextFrame>,
559    /// Characters generated so far.
560    buffer: String,
561    /// Whether we've completed the root value.
562    pub is_complete: bool,
563}
564
565impl SchemaState {
566    /// Create a new state machine rooted at the given schema.
567    pub fn new(schema: &SchemaType) -> Self {
568        Self {
569            stack: vec![ContextFrame::ValueStart {
570                schema: schema.clone(),
571            }],
572            buffer: String::new(),
573            is_complete: false,
574        }
575    }
576
577    /// Feed a character and check if it's valid according to the schema.
578    ///
579    /// Returns `Ok(true)` if the character was accepted, `Ok(false)` if it
580    /// was rejected but didn't violate hard constraints, or `Err` on a
581    /// definite schema violation.
582    pub fn feed_char(&mut self, ch: char) -> Result<bool, SchemaError> {
583        self.buffer.push(ch);
584
585        if self.is_complete {
586            return Err(SchemaError::SchemaViolation {
587                pos: self.buffer.len(),
588                msg: "input continues after root value is complete".into(),
589            });
590        }
591
592        if self.stack.is_empty() {
593            self.is_complete = true;
594            return Ok(true);
595        }
596
597        // Peek at the top frame to decide what to do.
598        let accepted = self.process_char(ch)?;
599        Ok(accepted)
600    }
601
602    /// Internal: process one character against the current top frame.
603    fn process_char(&mut self, ch: char) -> Result<bool, SchemaError> {
604        // We need to pop the top frame, inspect it, and potentially push
605        // replacement frames.
606        let frame = match self.stack.last() {
607            Some(_) => self.stack.pop(),
608            None => {
609                self.is_complete = true;
610                return Ok(ch.is_ascii_whitespace());
611            }
612        };
613
614        match frame {
615            Some(ContextFrame::ValueStart { schema }) => {
616                // We're expecting a new value. The first character tells us
617                // what kind of JSON value this will be.
618                match ch {
619                    '{' => {
620                        if let SchemaType::Object { .. } | SchemaType::AnyOf(_) = &schema {
621                            self.stack.push(ContextFrame::ObjectStart {
622                                schema,
623                                emitted_keys: Vec::new(),
624                                pending_value: false,
625                            });
626                            Ok(true)
627                        } else {
628                            Err(SchemaError::SchemaViolation {
629                                pos: self.buffer.len(),
630                                msg: format!("expected {}, got object", schema.type_name()),
631                            })
632                        }
633                    }
634                    '[' => {
635                        if let SchemaType::Array { .. } | SchemaType::AnyOf(_) = &schema {
636                            self.stack
637                                .push(ContextFrame::ArrayStart { schema, count: 0 });
638                            Ok(true)
639                        } else {
640                            Err(SchemaError::SchemaViolation {
641                                pos: self.buffer.len(),
642                                msg: format!("expected {}, got array", schema.type_name()),
643                            })
644                        }
645                    }
646                    '"' => match &schema {
647                        SchemaType::String {
648                            min_length,
649                            max_length,
650                            ..
651                        } => {
652                            self.stack.push(ContextFrame::StringStart {
653                                constraints: Some((*min_length, *max_length)),
654                            });
655                            Ok(true)
656                        }
657                        SchemaType::AnyOf(_) => {
658                            self.stack
659                                .push(ContextFrame::StringStart { constraints: None });
660                            Ok(true)
661                        }
662                        _ => Err(SchemaError::SchemaViolation {
663                            pos: self.buffer.len(),
664                            msg: format!("expected {}, got string", schema.type_name()),
665                        }),
666                    },
667                    't' | 'f' => {
668                        if matches!(&schema, SchemaType::Boolean | SchemaType::AnyOf(_)) {
669                            // We'll just accept subsequent chars of true/false
670                            self.stack.push(ContextFrame::ValueStart { schema });
671                            Ok(true)
672                        } else {
673                            Err(SchemaError::SchemaViolation {
674                                pos: self.buffer.len(),
675                                msg: format!("expected {}, got boolean", schema.type_name()),
676                            })
677                        }
678                    }
679                    'r' | 'u' | 'e' | 'a' | 'l' | 's' => {
680                        // Continuation of true/false/null keywords
681                        Ok(true)
682                    }
683                    'n' => {
684                        if matches!(
685                            &schema,
686                            SchemaType::Null | SchemaType::AnyOf(_) | SchemaType::Boolean
687                        ) {
688                            self.stack.push(ContextFrame::ValueStart { schema });
689                            Ok(true)
690                        } else {
691                            Err(SchemaError::SchemaViolation {
692                                pos: self.buffer.len(),
693                                msg: format!("expected {}, got null", schema.type_name()),
694                            })
695                        }
696                    }
697                    '-' | '0'..='9' => {
698                        if matches!(
699                            &schema,
700                            SchemaType::Number { .. }
701                                | SchemaType::Integer { .. }
702                                | SchemaType::AnyOf(_)
703                        ) {
704                            self.stack.push(ContextFrame::NumberStart);
705                            Ok(true)
706                        } else {
707                            Err(SchemaError::SchemaViolation {
708                                pos: self.buffer.len(),
709                                msg: format!("expected {}, got number", schema.type_name()),
710                            })
711                        }
712                    }
713                    c if c.is_ascii_whitespace() => {
714                        // Skip whitespace before value
715                        self.stack.push(ContextFrame::ValueStart { schema });
716                        Ok(true)
717                    }
718                    _ => Err(SchemaError::SchemaViolation {
719                        pos: self.buffer.len(),
720                        msg: format!("unexpected character '{ch}'"),
721                    }),
722                }
723            }
724            Some(ContextFrame::ObjectStart {
725                schema,
726                emitted_keys,
727                pending_value,
728            }) => {
729                if pending_value {
730                    // We just finished reading a key, expecting ':'
731                    if ch == ':' || ch.is_ascii_whitespace() {
732                        self.stack.push(ContextFrame::ObjectStart {
733                            schema,
734                            emitted_keys,
735                            pending_value: ch != ':',
736                        });
737                        Ok(true)
738                    } else {
739                        Err(SchemaError::SchemaViolation {
740                            pos: self.buffer.len(),
741                            msg: format!("expected ':' in object, got '{ch}'"),
742                        })
743                    }
744                } else {
745                    match ch {
746                        '}' => Ok(true),
747                        '"' => {
748                            // Start of a key
749                            self.stack.push(ContextFrame::ObjectStart {
750                                schema,
751                                emitted_keys,
752                                pending_value: true,
753                            });
754                            Ok(true)
755                        }
756                        ',' | ' ' | '\n' | '\r' | '\t' => {
757                            self.stack.push(ContextFrame::ObjectStart {
758                                schema,
759                                emitted_keys,
760                                pending_value: false,
761                            });
762                            Ok(true)
763                        }
764                        _ => Ok(true), // Accept other chars during object parsing
765                    }
766                }
767            }
768            Some(ContextFrame::ArrayStart { schema, count }) => match ch {
769                ']' => Ok(true),
770                ',' => {
771                    self.stack.push(ContextFrame::ArrayStart {
772                        schema,
773                        count: count + 1,
774                    });
775                    Ok(true)
776                }
777                _ => {
778                    self.stack.push(ContextFrame::ArrayStart { schema, count });
779                    Ok(true)
780                }
781            },
782            Some(ContextFrame::StringStart { constraints }) => match ch {
783                '"' => Ok(true), // End of string
784                '\\' => {
785                    self.stack.push(ContextFrame::StringStart { constraints });
786                    Ok(true)
787                }
788                _ => {
789                    self.stack.push(ContextFrame::StringStart { constraints });
790                    Ok(true)
791                }
792            },
793            Some(ContextFrame::NumberStart) => {
794                if ch.is_ascii_digit()
795                    || ch == '.'
796                    || ch == '-'
797                    || ch == 'e'
798                    || ch == 'E'
799                    || ch == '+'
800                {
801                    self.stack.push(ContextFrame::NumberStart);
802                    Ok(true)
803                } else {
804                    // Number ended, this char belongs to the parent
805                    Ok(true)
806                }
807            }
808            None => {
809                self.is_complete = true;
810                Ok(ch.is_ascii_whitespace())
811            }
812        }
813    }
814
815    /// Get the set of valid next characters at the current position.
816    ///
817    /// This is a simplified heuristic — for complex schemas, the set may be
818    /// conservative (allowing more than strictly valid).
819    pub fn valid_next_chars(&self) -> Vec<char> {
820        match self.stack.last() {
821            None => vec![],
822            Some(ContextFrame::ValueStart { schema }) => match schema {
823                SchemaType::Object { .. } => vec!['{', ' ', '\n'],
824                SchemaType::Array { .. } => vec!['[', ' ', '\n'],
825                SchemaType::String { .. } => vec!['"'],
826                SchemaType::Number { .. } | SchemaType::Integer { .. } => {
827                    vec!['-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
828                }
829                SchemaType::Boolean => vec!['t', 'f'],
830                SchemaType::Null => vec!['n'],
831                SchemaType::AnyOf(_) => {
832                    vec![
833                        '{', '[', '"', 't', 'f', 'n', '-', '0', '1', '2', '3', '4', '5', '6', '7',
834                        '8', '9',
835                    ]
836                }
837            },
838            Some(ContextFrame::ObjectStart { pending_value, .. }) => {
839                if *pending_value {
840                    vec![':', ' ']
841                } else {
842                    vec!['"', '}', ',', ' ', '\n']
843                }
844            }
845            Some(ContextFrame::ArrayStart { .. }) => {
846                vec![
847                    ']', ',', '"', '{', '[', 't', 'f', 'n', '-', '0', '1', '2', '3', '4', '5', '6',
848                    '7', '8', '9', ' ', '\n',
849                ]
850            }
851            Some(ContextFrame::StringStart { .. }) => {
852                // Almost any character is valid inside a string
853                let mut chars: Vec<char> = (0x20u8..=0x7Eu8).map(|b| b as char).collect();
854                chars.push('\n');
855                chars
856            }
857            Some(ContextFrame::NumberStart) => {
858                vec![
859                    '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '.', '-', 'e', 'E', '+',
860                ]
861            }
862        }
863    }
864
865    /// Get a regex pattern for valid continuations (for masking logits).
866    pub fn continuation_pattern(&self) -> String {
867        match self.stack.last() {
868            None => String::new(),
869            Some(ContextFrame::ValueStart { schema }) => match schema {
870                SchemaType::Object { .. } => "\\{".to_string(),
871                SchemaType::Array { .. } => "\\[".to_string(),
872                SchemaType::String { .. } => "\"".to_string(),
873                SchemaType::Number { .. } | SchemaType::Integer { .. } => "-?[0-9]".to_string(),
874                SchemaType::Boolean => "[tf]".to_string(),
875                SchemaType::Null => "n".to_string(),
876                SchemaType::AnyOf(_) => "[\\{\\[\"\\-0-9tfn]".to_string(),
877            },
878            Some(ContextFrame::ObjectStart { .. }) => "[\"\\},: \\n]".to_string(),
879            Some(ContextFrame::ArrayStart { .. }) => "[\\]\\[,0-9tfn\"\\{\\ \\n]".to_string(),
880            Some(ContextFrame::StringStart { .. }) => "[^\"\\\\]|\\\\.".to_string(),
881            Some(ContextFrame::NumberStart) => "[0-9.\\-eE+]".to_string(),
882        }
883    }
884
885    /// Returns a reference to the characters generated so far.
886    pub fn buffer(&self) -> &str {
887        &self.buffer
888    }
889
890    /// Returns the current nesting depth (number of frames on the stack).
891    pub fn depth(&self) -> usize {
892        self.stack.len()
893    }
894}
895
896// ─────────────────────────────────────────────────────────────────────────────
897// Validation
898// ─────────────────────────────────────────────────────────────────────────────
899
900/// Validate that `text` is valid JSON matching the given `schema`.
901///
902/// This parses the text as JSON and checks structural conformance.
903pub fn validate_against_schema(text: &str, schema: &SchemaType) -> Result<bool, SchemaError> {
904    let (value, rest) = parse_json_value(text)
905        .map_err(|e| SchemaError::ValidationError(format!("JSON parse error: {e}")))?;
906    let rest_trimmed = skip_ws(rest);
907    if !rest_trimmed.is_empty() {
908        return Err(SchemaError::ValidationError(
909            "trailing characters after JSON value".into(),
910        ));
911    }
912    validate_value_against_schema(&value, schema)
913}
914
915/// Recursively validate a parsed value against a schema.
916fn validate_value_against_schema(
917    value: &JsonValue,
918    schema: &SchemaType,
919) -> Result<bool, SchemaError> {
920    match schema {
921        SchemaType::Object {
922            properties,
923            required,
924        } => {
925            let pairs = match value.as_object() {
926                Some(p) => p,
927                None => return Ok(false),
928            };
929            // Check required fields
930            for key in required {
931                if !pairs.iter().any(|(k, _)| k == key) {
932                    return Ok(false);
933                }
934            }
935            // Validate each known property
936            for (k, v) in pairs {
937                if let Some(prop_schema) = properties.get(k) {
938                    if !validate_value_against_schema(v, prop_schema)? {
939                        return Ok(false);
940                    }
941                }
942            }
943            Ok(true)
944        }
945        SchemaType::Array {
946            items,
947            min_items,
948            max_items,
949        } => {
950            let arr = match value.as_array() {
951                Some(a) => a,
952                None => return Ok(false),
953            };
954            if let Some(min) = min_items {
955                if arr.len() < *min {
956                    return Ok(false);
957                }
958            }
959            if let Some(max) = max_items {
960                if arr.len() > *max {
961                    return Ok(false);
962                }
963            }
964            if let Some(item_schema) = items {
965                for elem in arr {
966                    if !validate_value_against_schema(elem, item_schema)? {
967                        return Ok(false);
968                    }
969                }
970            }
971            Ok(true)
972        }
973        SchemaType::String {
974            enum_values,
975            min_length,
976            max_length,
977        } => {
978            let s = match value.as_str() {
979                Some(s) => s,
980                None => return Ok(false),
981            };
982            if let Some(enums) = enum_values {
983                if !enums.iter().any(|e| e == s) {
984                    return Ok(false);
985                }
986            }
987            if let Some(min) = min_length {
988                if s.chars().count() < *min {
989                    return Ok(false);
990                }
991            }
992            if let Some(max) = max_length {
993                if s.chars().count() > *max {
994                    return Ok(false);
995                }
996            }
997            Ok(true)
998        }
999        SchemaType::Number { minimum, maximum } => {
1000            let n = match value.as_f64() {
1001                Some(n) => n,
1002                None => return Ok(false),
1003            };
1004            if let Some(min) = minimum {
1005                if n < *min {
1006                    return Ok(false);
1007                }
1008            }
1009            if let Some(max) = maximum {
1010                if n > *max {
1011                    return Ok(false);
1012                }
1013            }
1014            Ok(true)
1015        }
1016        SchemaType::Integer { minimum, maximum } => {
1017            let n = match value.as_f64() {
1018                Some(n) => n,
1019                None => return Ok(false),
1020            };
1021            // Check it's actually an integer
1022            if n.fract() != 0.0 {
1023                return Ok(false);
1024            }
1025            let i = n as i64;
1026            if let Some(min) = minimum {
1027                if i < *min {
1028                    return Ok(false);
1029                }
1030            }
1031            if let Some(max) = maximum {
1032                if i > *max {
1033                    return Ok(false);
1034                }
1035            }
1036            Ok(true)
1037        }
1038        SchemaType::Boolean => match value {
1039            JsonValue::Bool(_) => Ok(true),
1040            _ => Ok(false),
1041        },
1042        SchemaType::Null => match value {
1043            JsonValue::Null => Ok(true),
1044            _ => Ok(false),
1045        },
1046        SchemaType::AnyOf(schemas) => {
1047            for sub_schema in schemas {
1048                if validate_value_against_schema(value, sub_schema)? {
1049                    return Ok(true);
1050                }
1051            }
1052            Ok(false)
1053        }
1054    }
1055}
1056
1057// ─────────────────────────────────────────────────────────────────────────────
1058// Template / example generation
1059// ─────────────────────────────────────────────────────────────────────────────
1060
1061/// Generate a template/skeleton from a schema (for prompting).
1062///
1063/// The template uses placeholder values to illustrate the expected shape.
1064pub fn schema_template(schema: &SchemaType) -> String {
1065    match schema {
1066        SchemaType::Object {
1067            properties,
1068            required,
1069        } => {
1070            if properties.is_empty() {
1071                return "{}".to_string();
1072            }
1073            let mut parts = Vec::new();
1074            // Emit required properties first, then optional
1075            let mut sorted_keys: Vec<&String> = properties.keys().collect();
1076            sorted_keys.sort();
1077            for key in &sorted_keys {
1078                let sub = properties.get(*key).expect("key exists in map");
1079                let marker = if required.iter().any(|r| r == *key) {
1080                    " /* required */"
1081                } else {
1082                    " /* optional */"
1083                };
1084                parts.push(format!("  \"{key}\": {}{marker}", schema_template(sub)));
1085            }
1086            format!("{{\n{}\n}}", parts.join(",\n"))
1087        }
1088        SchemaType::Array { items, .. } => match items {
1089            Some(item_schema) => format!("[{}]", schema_template(item_schema)),
1090            None => "[]".to_string(),
1091        },
1092        SchemaType::String { enum_values, .. } => {
1093            if let Some(enums) = enum_values {
1094                if let Some(first) = enums.first() {
1095                    return format!("\"{first}\"");
1096                }
1097            }
1098            "\"<string>\"".to_string()
1099        }
1100        SchemaType::Number { .. } => "0.0".to_string(),
1101        SchemaType::Integer { .. } => "0".to_string(),
1102        SchemaType::Boolean => "true".to_string(),
1103        SchemaType::Null => "null".to_string(),
1104        SchemaType::AnyOf(schemas) => {
1105            if let Some(first) = schemas.first() {
1106                schema_template(first)
1107            } else {
1108                "null".to_string()
1109            }
1110        }
1111    }
1112}
1113
1114/// Generate an example JSON string matching the schema.
1115///
1116/// Produces valid JSON using sensible default values.
1117pub fn schema_example(schema: &SchemaType) -> String {
1118    match schema {
1119        SchemaType::Object {
1120            properties,
1121            required: _,
1122        } => {
1123            if properties.is_empty() {
1124                return "{}".to_string();
1125            }
1126            let mut parts = Vec::new();
1127            let mut sorted_keys: Vec<&String> = properties.keys().collect();
1128            sorted_keys.sort();
1129            for key in &sorted_keys {
1130                // In the example, emit all properties (required + optional)
1131                let sub = properties.get(*key).expect("key exists in map");
1132                parts.push(format!("\"{}\":{}", key, schema_example(sub)));
1133            }
1134            format!("{{{}}}", parts.join(","))
1135        }
1136        SchemaType::Array {
1137            items, min_items, ..
1138        } => {
1139            let count = min_items.unwrap_or(1).max(1);
1140            match items {
1141                Some(item_schema) => {
1142                    let elems: Vec<String> =
1143                        (0..count).map(|_| schema_example(item_schema)).collect();
1144                    format!("[{}]", elems.join(","))
1145                }
1146                None => "[]".to_string(),
1147            }
1148        }
1149        SchemaType::String {
1150            enum_values,
1151            min_length,
1152            ..
1153        } => {
1154            if let Some(enums) = enum_values {
1155                if let Some(first) = enums.first() {
1156                    return format!("\"{first}\"");
1157                }
1158            }
1159            let min_len = min_length.unwrap_or(0);
1160            let example = if min_len > 0 {
1161                "x".repeat(min_len)
1162            } else {
1163                "example".to_string()
1164            };
1165            format!("\"{example}\"")
1166        }
1167        SchemaType::Number { minimum, .. } => {
1168            let val = minimum.unwrap_or(0.0);
1169            if val == val.floor() {
1170                format!("{val:.1}")
1171            } else {
1172                format!("{val}")
1173            }
1174        }
1175        SchemaType::Integer { minimum, .. } => {
1176            let val = minimum.unwrap_or(0);
1177            format!("{val}")
1178        }
1179        SchemaType::Boolean => "true".to_string(),
1180        SchemaType::Null => "null".to_string(),
1181        SchemaType::AnyOf(schemas) => {
1182            if let Some(first) = schemas.first() {
1183                schema_example(first)
1184            } else {
1185                "null".to_string()
1186            }
1187        }
1188    }
1189}