Skip to main content

a3s_code_core/llm/
structured.rs

1//! Structured object generation from LLM output.
2//!
3//! Provides reliable JSON object generation with schema validation, automatic
4//! repair, and streaming partial object support. Works across all providers by
5//! selecting the best available mode (strict JSON schema, json_mode, tool-call,
6//! or prompt-only).
7
8use super::{LlmClient, Message, StreamEvent, TokenUsage, ToolDefinition};
9use anyhow::{bail, Context, Result};
10use serde::{Deserialize, Serialize};
11use serde_json::Value;
12use tokio_util::sync::CancellationToken;
13
14// ---------------------------------------------------------------------------
15// Public types
16// ---------------------------------------------------------------------------
17
18/// Mode selection for structured output generation.
19#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
20#[serde(rename_all = "snake_case")]
21pub enum StructuredMode {
22    /// Auto-select best mode based on provider capabilities.
23    Auto,
24    /// OpenAI native strict JSON schema (response_format.type = json_schema).
25    Strict,
26    /// OpenAI json_object mode (guarantees valid JSON, not schema-conformant).
27    Json,
28    /// Use tool-calling: inject a synthetic tool whose parameters IS the schema.
29    /// Works on all providers that support tool use (Anthropic, OpenAI, etc).
30    Tool,
31    /// Prompt-only: append schema instructions to the prompt. Least reliable.
32    Prompt,
33}
34
35/// Request specification for structured object generation.
36#[derive(Debug, Clone)]
37pub struct StructuredRequest {
38    pub prompt: String,
39    pub system: Option<String>,
40    pub schema: Value,
41    pub schema_name: String,
42    pub schema_description: Option<String>,
43    pub mode: StructuredMode,
44    pub max_repair_attempts: u8,
45}
46
47/// Result of a successful structured generation.
48#[derive(Debug, Clone, Serialize)]
49pub struct StructuredResult {
50    pub object: Value,
51    pub raw_text: Option<String>,
52    pub usage: TokenUsage,
53    pub repair_rounds: u8,
54    pub mode_used: StructuredMode,
55}
56
57/// Provider-native structured-output capability.
58///
59/// Each [`LlmClient`] reports this so the structured engine can request the
60/// strongest enforcement the provider actually supports. Defaults to
61/// [`NativeStructuredSupport::None`] for clients that don't override it.
62#[derive(Debug, Clone, Copy, PartialEq, Eq)]
63pub enum NativeStructuredSupport {
64    /// No native enforcement — rely on prompt instructions + lenient extraction.
65    None,
66    /// Can force a specific tool call (Anthropic `tool_choice`, OpenAI function
67    /// `tool_choice`). Guarantees the model emits the structured tool call
68    /// instead of free-form prose.
69    ForcedTool,
70    /// Supports OpenAI-style `response_format` (`json_object` and
71    /// `json_schema` + `strict`) in addition to forced tool calls.
72    JsonSchema,
73}
74
75/// A native `response_format` request for OpenAI-compatible providers.
76#[derive(Debug, Clone, PartialEq)]
77pub enum ResponseFormat {
78    /// `{"type":"json_object"}` — guarantees syntactically valid JSON, but not
79    /// schema conformance.
80    JsonObject,
81    /// `{"type":"json_schema","json_schema":{name,schema,strict:true}}` —
82    /// parser-enforced schema conformance.
83    JsonSchema { name: String, schema: Value },
84}
85
86/// Instruction telling a provider how to enforce structured output for a call.
87///
88/// Carries the union of intents; each provider honors what it supports and
89/// ignores the rest (e.g. Anthropic has no `response_format`, so it only acts
90/// on `force_tool`). The default (`force_tool: None, response_format: None`)
91/// reproduces an ordinary completion, which is why the trait's default
92/// `complete_structured` impl is behavior-preserving.
93#[derive(Debug, Clone, Default, PartialEq)]
94pub struct StructuredDirective {
95    /// Force the model to call exactly this tool (provider `tool_choice`).
96    pub force_tool: Option<String>,
97    /// Request a provider-native `response_format` (OpenAI-compatible only).
98    pub response_format: Option<ResponseFormat>,
99}
100
101/// Callback for streaming partial object snapshots.
102pub type PartialObjectCallback = Box<dyn Fn(&Value) + Send>;
103
104// ---------------------------------------------------------------------------
105// Core generation: blocking (non-streaming)
106// ---------------------------------------------------------------------------
107
108/// Generate a structured JSON object using the given LLM client.
109///
110/// Selects the best mode based on `req.mode`, calls the LLM, validates against
111/// the schema, and retries with repair prompts if validation fails.
112pub async fn generate_blocking(
113    client: &dyn LlmClient,
114    req: &StructuredRequest,
115) -> Result<StructuredResult> {
116    let mode = resolve_mode(req.mode, client.native_structured_support());
117    let mut messages = build_initial_messages(req, mode);
118    let system = build_system_prompt(req, mode);
119    let tools = build_tools(req, mode);
120    let directive = build_directive(req, mode);
121
122    let mut total_usage = TokenUsage::default();
123    let mut repair_rounds: u8 = 0;
124
125    loop {
126        let resp = client
127            .complete_structured(&messages, Some(&system), &tools, &directive)
128            .await
129            .context("LLM call failed during structured generation")?;
130
131        accumulate_usage(&mut total_usage, &resp.usage);
132
133        // Mine the object from every place a model might have parked it (tool call,
134        // text content, AND the reasoning channel), trying each balanced JSON
135        // candidate against the schema. Reasoning models routinely leave `content`
136        // empty and emit the object inside `reasoning`, so without the reasoning
137        // fallback generate_object failed with "no structured output" across models.
138        let candidates = extract_raw_candidates(&resp.message, mode);
139        let resolution = resolve_structured(&candidates, &req.schema);
140
141        if let Some((value, raw)) = resolution.valid {
142            return Ok(StructuredResult {
143                object: value,
144                raw_text: Some(raw),
145                usage: total_usage,
146                repair_rounds,
147                mode_used: mode,
148            });
149        }
150
151        if repair_rounds >= req.max_repair_attempts {
152            return Err(match resolution.invalid {
153                Some((_, errors)) => anyhow::anyhow!(
154                    "Structured output failed schema validation after {} repair attempts. Errors: {}",
155                    repair_rounds,
156                    errors.join("; ")
157                ),
158                None => anyhow::anyhow!(
159                    "Structured output parsing failed after {} repair attempts: no JSON object found in tool call, text content, or reasoning channel",
160                    repair_rounds
161                ),
162            });
163        }
164
165        repair_rounds += 1;
166        let (repair_msg, raw_for_ctx) = match resolution.invalid {
167            Some((raw, errors)) => (build_repair_message(&raw, &errors), raw),
168            None => {
169                let raw = resolution.raw_seen.unwrap_or_default();
170                (build_parse_failure_repair(&raw), raw)
171            }
172        };
173        append_repair_context(
174            &mut messages,
175            &resp.message,
176            &repair_msg,
177            mode,
178            &raw_for_ctx,
179        );
180    }
181}
182
183// ---------------------------------------------------------------------------
184// Core generation: streaming
185// ---------------------------------------------------------------------------
186
187/// Generate a structured JSON object with streaming partial updates.
188///
189/// Calls `on_partial` with progressively more complete partial objects as tokens
190/// arrive. Returns the final validated object.
191///
192/// In streaming mode, `max_repair_attempts` defaults to 0 because a repair
193/// would reset the partial object stream (confusing for consumers).
194pub async fn generate_streaming(
195    client: &dyn LlmClient,
196    req: &StructuredRequest,
197    on_partial: PartialObjectCallback,
198) -> Result<StructuredResult> {
199    let mode = resolve_mode(req.mode, client.native_structured_support());
200    let messages = build_initial_messages(req, mode);
201    let system = build_system_prompt(req, mode);
202    let tools = build_tools(req, mode);
203    let directive = build_directive(req, mode);
204
205    let cancel_token = CancellationToken::new();
206    let mut rx = client
207        .complete_streaming_structured(&messages, Some(&system), &tools, &directive, cancel_token)
208        .await
209        .context("LLM streaming call failed during structured generation")?;
210
211    let mut json_buffer = String::new();
212    let mut last_valid_partial: Option<Value> = None;
213    let mut final_response: Option<super::LlmResponse> = None;
214    let mut last_parse_len: usize = 0;
215    // Minimum bytes of new data before attempting a partial parse (reduces CPU)
216    const PARSE_THRESHOLD: usize = 8;
217
218    while let Some(event) = rx.recv().await {
219        match event {
220            StreamEvent::ToolUseInputDelta(delta) if mode == StructuredMode::Tool => {
221                if final_response.is_some() {
222                    continue;
223                }
224                json_buffer.push_str(&delta);
225                if json_buffer.len() - last_parse_len >= PARSE_THRESHOLD {
226                    if let Some(partial) = try_parse_partial_json(&json_buffer) {
227                        if last_valid_partial.as_ref() != Some(&partial) {
228                            on_partial(&partial);
229                            last_valid_partial = Some(partial);
230                        }
231                    }
232                    last_parse_len = json_buffer.len();
233                }
234            }
235            StreamEvent::TextDelta(delta) if mode != StructuredMode::Tool => {
236                if final_response.is_some() {
237                    continue;
238                }
239                json_buffer.push_str(&delta);
240                if json_buffer.len() - last_parse_len >= PARSE_THRESHOLD {
241                    if let Some(json_start) = find_json_start(&json_buffer) {
242                        let candidate = &json_buffer[json_start..];
243                        if let Some(partial) = try_parse_partial_json(candidate) {
244                            if last_valid_partial.as_ref() != Some(&partial) {
245                                on_partial(&partial);
246                                last_valid_partial = Some(partial);
247                            }
248                        }
249                    }
250                    last_parse_len = json_buffer.len();
251                }
252            }
253            StreamEvent::Done(resp) => {
254                final_response = Some(resp);
255            }
256            _ => {}
257        }
258    }
259
260    let resp = final_response.context("Stream ended without Done event")?;
261    // Same multi-source resolution as the blocking path: the final message may carry
262    // the object in the tool call, the text content, or the reasoning channel.
263    let candidates = extract_raw_candidates(&resp.message, mode);
264    let resolution = resolve_structured(&candidates, &req.schema);
265    let (value, raw_text) = match resolution.valid {
266        Some(vr) => vr,
267        None => {
268            return Err(match resolution.invalid {
269                Some((_, errors)) => anyhow::anyhow!(
270                    "Streamed structured output failed schema validation: {}",
271                    errors.join("; ")
272                ),
273                None => anyhow::anyhow!(
274                    "Streamed output produced no parseable JSON object (checked tool call, text content, and reasoning channel)"
275                ),
276            });
277        }
278    };
279
280    // Emit final complete object
281    on_partial(&value);
282
283    Ok(StructuredResult {
284        object: value,
285        raw_text: Some(raw_text),
286        usage: resp.usage,
287        repair_rounds: 0,
288        mode_used: mode,
289    })
290}
291
292// ---------------------------------------------------------------------------
293// JSON extraction and parsing
294// ---------------------------------------------------------------------------
295
296/// Extract a JSON value from potentially dirty LLM output.
297///
298/// Handles: raw JSON, markdown code fences, leading/trailing prose.
299pub fn extract_json_value(text: &str) -> Result<Value> {
300    let trimmed = text.trim();
301
302    // 1. Direct parse
303    if let Ok(v) = serde_json::from_str::<Value>(trimmed) {
304        if v.is_object() || v.is_array() {
305            return Ok(v);
306        }
307    }
308
309    // 2. Strip markdown code fence
310    if let Some(inner) = strip_code_fence(trimmed) {
311        if let Ok(v) = serde_json::from_str::<Value>(inner.trim()) {
312            if v.is_object() || v.is_array() {
313                return Ok(v);
314            }
315        }
316    }
317
318    // 3. Find balanced JSON substring (first { to matching })
319    if let Some(candidate) = find_balanced_json_object(trimmed) {
320        if let Ok(v) = serde_json::from_str::<Value>(candidate) {
321            return Ok(v);
322        }
323    }
324
325    // 4. Try array
326    if let Some(candidate) = find_balanced_json_array(trimmed) {
327        if let Ok(v) = serde_json::from_str::<Value>(candidate) {
328            return Ok(v);
329        }
330    }
331
332    bail!("No valid JSON object found in LLM output")
333}
334
335/// Strip ```json ... ``` or ``` ... ``` fences.
336fn strip_code_fence(text: &str) -> Option<&str> {
337    let start_patterns = ["```json\n", "```json\r\n", "```\n", "```\r\n"];
338    for pat in &start_patterns {
339        if let Some(rest) = text.strip_prefix(pat) {
340            // Find closing fence
341            if let Some(end) = rest.rfind("```") {
342                return Some(&rest[..end]);
343            }
344        }
345    }
346    // Also handle inline: ```json{...}```
347    if let Some(inner) = text.strip_prefix("```json") {
348        if let Some(end) = inner.rfind("```") {
349            return Some(inner[..end].trim());
350        }
351    }
352    if let Some(inner) = text.strip_prefix("```") {
353        if let Some(end) = inner.rfind("```") {
354            return Some(inner[..end].trim());
355        }
356    }
357    None
358}
359
360/// Find the first balanced `{...}` substring using bracket counting.
361fn find_balanced_json_object(text: &str) -> Option<&str> {
362    find_balanced(text, '{', '}')
363}
364
365/// Find the first balanced `[...]` substring.
366fn find_balanced_json_array(text: &str) -> Option<&str> {
367    find_balanced(text, '[', ']')
368}
369
370fn find_balanced(text: &str, open: char, close: char) -> Option<&str> {
371    find_balanced_range(text, open, close).map(|(start, end)| &text[start..end])
372}
373
374/// Byte range `[start, end)` of the first balanced `open..close` substring (quote-aware).
375fn find_balanced_range(text: &str, open: char, close: char) -> Option<(usize, usize)> {
376    let bytes = text.as_bytes();
377    let open_byte = open as u8;
378    let close_byte = close as u8;
379
380    // Find the first unquoted occurrence of `open`
381    let mut in_string = false;
382    let mut escape_next = false;
383    let mut start = None;
384
385    for (i, &b) in bytes.iter().enumerate() {
386        if escape_next {
387            escape_next = false;
388            continue;
389        }
390        match b {
391            b'\\' if in_string => escape_next = true,
392            b'"' => in_string = !in_string,
393            _ if in_string => {}
394            _ if b == open_byte => {
395                start = Some(i);
396                break;
397            }
398            _ => {}
399        }
400    }
401
402    let start = start?;
403    let mut depth = 0i32;
404    in_string = false;
405    escape_next = false;
406
407    for (i, &b) in bytes[start..].iter().enumerate() {
408        if escape_next {
409            escape_next = false;
410            continue;
411        }
412        match b {
413            b'\\' if in_string => escape_next = true,
414            b'"' => in_string = !in_string,
415            _ if in_string => {}
416            _ if b == open_byte => depth += 1,
417            _ if b == close_byte => {
418                depth -= 1;
419                if depth == 0 {
420                    return Some((start, start + i + 1));
421                }
422            }
423            _ => {}
424        }
425    }
426    None
427}
428
429/// Every top-level balanced `open..close` substring, in document order.
430///
431/// Reasoning traces often contain several objects (worked examples, partial drafts)
432/// before the final answer, so callers validate each against the schema and keep the
433/// one that fits rather than blindly trusting the first `{...}`.
434fn find_all_balanced(text: &str, open: char, close: char) -> Vec<String> {
435    let mut out = Vec::new();
436    let mut base = 0usize;
437    while base < text.len() {
438        match find_balanced_range(&text[base..], open, close) {
439            Some((start, end)) => {
440                out.push(text[base + start..base + end].to_string());
441                base += end;
442            }
443            None => break,
444        }
445    }
446    out
447}
448
449/// Find the byte offset where JSON content starts in a text stream.
450/// Skips leading prose/whitespace to find `{` or `[` that isn't inside a string.
451fn find_json_start(text: &str) -> Option<usize> {
452    // Skip past code fence markers if present
453    let (search_text, offset) = if let Some(rest) = text.strip_prefix("```json") {
454        (rest, 7)
455    } else if let Some(rest) = text.strip_prefix("```") {
456        (rest, 3)
457    } else {
458        (text, 0)
459    };
460
461    let mut in_string = false;
462    let mut escape_next = false;
463    for (i, &b) in search_text.as_bytes().iter().enumerate() {
464        if escape_next {
465            escape_next = false;
466            continue;
467        }
468        match b {
469            b'\\' if in_string => {
470                escape_next = true;
471            }
472            b'"' => {
473                in_string = !in_string;
474            }
475            b'{' | b'[' if !in_string => {
476                return Some(offset + i);
477            }
478            _ => {}
479        }
480    }
481    None
482}
483
484// ---------------------------------------------------------------------------
485// Partial JSON parsing (for streaming)
486// ---------------------------------------------------------------------------
487
488/// Attempt to parse a potentially incomplete JSON string into the most complete
489/// valid partial object possible.
490///
491/// Strategy: try parsing as-is first. If that fails, progressively close open
492/// braces/brackets and try again. This handles the common case where the LLM
493/// has output `{"name": "foo", "items": [1, 2` — we close it to get a partial.
494fn try_parse_partial_json(text: &str) -> Option<Value> {
495    let trimmed = text.trim();
496    if trimmed.is_empty() {
497        return None;
498    }
499
500    // Fast path: already valid
501    if let Ok(v) = serde_json::from_str::<Value>(trimmed) {
502        if v.is_object() || v.is_array() {
503            return Some(v);
504        }
505    }
506
507    // Count unclosed brackets/braces (respecting strings)
508    let mut closers = Vec::new();
509    let mut in_string = false;
510    let mut escape_next = false;
511    // Track if we're mid-value (after a colon or comma, before the value is complete)
512    let mut last_significant: Option<u8> = None;
513
514    for &b in trimmed.as_bytes() {
515        if escape_next {
516            escape_next = false;
517            continue;
518        }
519        match b {
520            b'\\' if in_string => {
521                escape_next = true;
522            }
523            b'"' => {
524                in_string = !in_string;
525                if !in_string {
526                    last_significant = Some(b'"');
527                }
528            }
529            _ if in_string => {}
530            b'{' => {
531                closers.push(b'}');
532                last_significant = Some(b'{');
533            }
534            b'[' => {
535                closers.push(b']');
536                last_significant = Some(b'[');
537            }
538            b'}' | b']' => {
539                closers.pop();
540                last_significant = Some(b);
541            }
542            b':' | b',' => {
543                last_significant = Some(b);
544            }
545            b if !b.is_ascii_whitespace() => {
546                last_significant = Some(b);
547            }
548            _ => {}
549        }
550    }
551
552    if closers.is_empty() {
553        return None; // Already balanced but didn't parse — genuinely invalid
554    }
555
556    // Pre-allocate repair buffer: original + at most 6 extra chars (null + closers)
557    let mut repaired = String::with_capacity(trimmed.len() + closers.len() + 6);
558    repaired.push_str(trimmed);
559
560    if in_string {
561        repaired.push('"');
562        last_significant = Some(b'"');
563    }
564
565    // If last significant char suggests an incomplete key or value, handle it
566    if let Some(last) = last_significant {
567        if last == b':' {
568            // Key with no value yet — add null
569            repaired.push_str("null");
570        } else if last == b',' {
571            // Trailing comma — some parsers choke on this, trim it
572            if let Some(pos) = repaired.rfind(',') {
573                repaired.truncate(pos);
574            }
575        }
576    }
577
578    // Close all open brackets/braces
579    for &closer in closers.iter().rev() {
580        repaired.push(closer as char);
581    }
582
583    serde_json::from_str::<Value>(&repaired)
584        .ok()
585        .filter(|v| v.is_object() || v.is_array())
586}
587
588// ---------------------------------------------------------------------------
589// Schema validation
590// ---------------------------------------------------------------------------
591
592/// Validate a JSON value against a JSON Schema.
593/// Returns Ok(()) on success, or a list of human-readable error strings.
594fn validate_against_schema(value: &Value, schema: &Value) -> Result<(), Vec<String>> {
595    // We do a basic recursive validation here. For production, consider using
596    // the `jsonschema` crate, but to avoid adding a heavy dependency we implement
597    // the subset of JSON Schema that matters for structured output.
598    let errors = basic_schema_validate(value, schema, "");
599    if errors.is_empty() {
600        Ok(())
601    } else {
602        Err(errors)
603    }
604}
605
606/// Basic JSON Schema validator covering the most common constraints.
607fn basic_schema_validate(value: &Value, schema: &Value, path: &str) -> Vec<String> {
608    let mut errors = Vec::new();
609
610    // Handle $ref — not supported in basic validator, skip
611    if schema.get("$ref").is_some() {
612        return errors;
613    }
614
615    // Handle anyOf / oneOf: value must match at least one sub-schema
616    if let Some(any_of) = schema
617        .get("anyOf")
618        .or_else(|| schema.get("oneOf"))
619        .and_then(|v| v.as_array())
620    {
621        let matched = any_of
622            .iter()
623            .any(|sub| basic_schema_validate(value, sub, path).is_empty());
624        if !matched {
625            errors.push(format!(
626                "{}: value does not match any variant in anyOf/oneOf",
627                path_or_root(path),
628            ));
629        }
630        return errors;
631    }
632
633    // Handle enum
634    if let Some(enum_values) = schema.get("enum").and_then(|v| v.as_array()) {
635        if !enum_values.contains(value) {
636            errors.push(format!(
637                "{}: value {:?} not in enum {:?}",
638                path_or_root(path),
639                value,
640                enum_values
641            ));
642        }
643        return errors;
644    }
645
646    // Handle const
647    if let Some(const_val) = schema.get("const") {
648        if value != const_val {
649            errors.push(format!(
650                "{}: expected const {:?}, got {:?}",
651                path_or_root(path),
652                const_val,
653                value
654            ));
655        }
656        return errors;
657    }
658
659    // Type checking (supports nullable via type array: ["string", "null"])
660    if let Some(type_val) = schema.get("type") {
661        let type_ok = if let Some(type_str) = type_val.as_str() {
662            check_type(value, type_str)
663        } else if let Some(type_arr) = type_val.as_array() {
664            type_arr
665                .iter()
666                .filter_map(|t| t.as_str())
667                .any(|t| check_type(value, t))
668        } else {
669            true
670        };
671        if !type_ok {
672            errors.push(format!(
673                "{}: expected type {:?}, got {:?}",
674                path_or_root(path),
675                type_val,
676                value_type_name(value)
677            ));
678            return errors;
679        }
680    }
681
682    // Object validation
683    if let Some(obj) = value.as_object() {
684        if let Some(properties) = schema.get("properties").and_then(|v| v.as_object()) {
685            for (key, prop_schema) in properties {
686                if let Some(child_value) = obj.get(key) {
687                    let child_path = if path.is_empty() {
688                        format!(".{}", key)
689                    } else {
690                        format!("{}.{}", path, key)
691                    };
692                    errors.extend(basic_schema_validate(child_value, prop_schema, &child_path));
693                }
694            }
695        }
696
697        if let Some(required) = schema.get("required").and_then(|v| v.as_array()) {
698            for req_field in required {
699                if let Some(field_name) = req_field.as_str() {
700                    if !obj.contains_key(field_name) {
701                        errors.push(format!(
702                            "{}: missing required field '{}'",
703                            path_or_root(path),
704                            field_name
705                        ));
706                    }
707                }
708            }
709        }
710
711        // additionalProperties: false
712        if schema.get("additionalProperties") == Some(&Value::Bool(false)) {
713            if let Some(properties) = schema.get("properties").and_then(|v| v.as_object()) {
714                for key in obj.keys() {
715                    if !properties.contains_key(key) {
716                        errors.push(format!(
717                            "{}: unexpected additional property '{}'",
718                            path_or_root(path),
719                            key
720                        ));
721                    }
722                }
723            }
724        }
725    }
726
727    // Array validation
728    if let Some(arr) = value.as_array() {
729        if let Some(items_schema) = schema.get("items") {
730            for (i, item) in arr.iter().enumerate() {
731                let child_path = format!("{}[{}]", path, i);
732                errors.extend(basic_schema_validate(item, items_schema, &child_path));
733            }
734        }
735        if let Some(min) = schema.get("minItems").and_then(|v| v.as_u64()) {
736            if (arr.len() as u64) < min {
737                errors.push(format!(
738                    "{}: array has {} items, minimum is {}",
739                    path_or_root(path),
740                    arr.len(),
741                    min
742                ));
743            }
744        }
745        if let Some(max) = schema.get("maxItems").and_then(|v| v.as_u64()) {
746            if (arr.len() as u64) > max {
747                errors.push(format!(
748                    "{}: array has {} items, maximum is {}",
749                    path_or_root(path),
750                    arr.len(),
751                    max
752                ));
753            }
754        }
755    }
756
757    // String validation
758    if let Some(s) = value.as_str() {
759        if let Some(min_len) = schema.get("minLength").and_then(|v| v.as_u64()) {
760            if (s.chars().count() as u64) < min_len {
761                errors.push(format!(
762                    "{}: string length {} < minLength {}",
763                    path_or_root(path),
764                    s.chars().count(),
765                    min_len
766                ));
767            }
768        }
769        if let Some(max_len) = schema.get("maxLength").and_then(|v| v.as_u64()) {
770            if (s.chars().count() as u64) > max_len {
771                errors.push(format!(
772                    "{}: string length {} > maxLength {}",
773                    path_or_root(path),
774                    s.chars().count(),
775                    max_len
776                ));
777            }
778        }
779        if let Some(pattern) = schema.get("pattern").and_then(|v| v.as_str()) {
780            if let Ok(re) = regex::Regex::new(pattern) {
781                if !re.is_match(s) {
782                    errors.push(format!(
783                        "{}: string does not match pattern '{}'",
784                        path_or_root(path),
785                        pattern
786                    ));
787                }
788            }
789        }
790    }
791
792    // Number validation
793    if let Some(n) = value.as_f64() {
794        if let Some(min) = schema.get("minimum").and_then(|v| v.as_f64()) {
795            if n < min {
796                errors.push(format!(
797                    "{}: value {} < minimum {}",
798                    path_or_root(path),
799                    n,
800                    min
801                ));
802            }
803        }
804        if let Some(max) = schema.get("maximum").and_then(|v| v.as_f64()) {
805            if n > max {
806                errors.push(format!(
807                    "{}: value {} > maximum {}",
808                    path_or_root(path),
809                    n,
810                    max
811                ));
812            }
813        }
814        if let Some(exc_min) = schema.get("exclusiveMinimum").and_then(|v| v.as_f64()) {
815            if n <= exc_min {
816                errors.push(format!(
817                    "{}: value {} <= exclusiveMinimum {}",
818                    path_or_root(path),
819                    n,
820                    exc_min
821                ));
822            }
823        }
824        if let Some(exc_max) = schema.get("exclusiveMaximum").and_then(|v| v.as_f64()) {
825            if n >= exc_max {
826                errors.push(format!(
827                    "{}: value {} >= exclusiveMaximum {}",
828                    path_or_root(path),
829                    n,
830                    exc_max
831                ));
832            }
833        }
834    }
835
836    errors
837}
838
839fn check_type(value: &Value, type_str: &str) -> bool {
840    match type_str {
841        "object" => value.is_object(),
842        "array" => value.is_array(),
843        "string" => value.is_string(),
844        "number" => value.is_number(),
845        "integer" => {
846            value.is_i64()
847                || value.is_u64()
848                || value
849                    .as_f64()
850                    .map(|f| f.fract() == 0.0 && f.is_finite())
851                    .unwrap_or(false)
852        }
853        "boolean" => value.is_boolean(),
854        "null" => value.is_null(),
855        _ => true,
856    }
857}
858
859fn path_or_root(path: &str) -> &str {
860    if path.is_empty() {
861        "$"
862    } else {
863        path
864    }
865}
866
867fn value_type_name(value: &Value) -> &'static str {
868    match value {
869        Value::Null => "null",
870        Value::Bool(_) => "boolean",
871        Value::Number(_) => "number",
872        Value::String(_) => "string",
873        Value::Array(_) => "array",
874        Value::Object(_) => "object",
875    }
876}
877
878// ---------------------------------------------------------------------------
879// Message/prompt construction helpers
880// ---------------------------------------------------------------------------
881
882/// Resolve the requested mode against the provider's native capability.
883///
884/// `Auto`/`Tool` always resolve to forced `Tool` mode — the most reliable
885/// cross-provider strategy (the synthetic `emit_*` tool is made mandatory via
886/// the provider's `tool_choice`). `Strict`/`Json` use native `response_format`
887/// only when the provider reports [`NativeStructuredSupport::JsonSchema`];
888/// otherwise they fall back to forced `Tool` mode rather than silently
889/// degrading to unconstrained text.
890fn resolve_mode(requested: StructuredMode, support: NativeStructuredSupport) -> StructuredMode {
891    match requested {
892        StructuredMode::Prompt => StructuredMode::Prompt,
893        StructuredMode::Strict if support == NativeStructuredSupport::JsonSchema => {
894            StructuredMode::Strict
895        }
896        StructuredMode::Json if support == NativeStructuredSupport::JsonSchema => {
897            StructuredMode::Json
898        }
899        // Auto, Tool, or Strict/Json on a provider without json_schema support.
900        _ => StructuredMode::Tool,
901    }
902}
903
904/// Build the provider directive for an already-resolved mode.
905fn build_directive(req: &StructuredRequest, mode: StructuredMode) -> StructuredDirective {
906    match mode {
907        StructuredMode::Tool => StructuredDirective {
908            force_tool: Some(format!("emit_{}", req.schema_name)),
909            response_format: None,
910        },
911        StructuredMode::Strict => StructuredDirective {
912            force_tool: None,
913            response_format: Some(ResponseFormat::JsonSchema {
914                name: req.schema_name.clone(),
915                schema: req.schema.clone(),
916            }),
917        },
918        StructuredMode::Json => StructuredDirective {
919            force_tool: None,
920            response_format: Some(ResponseFormat::JsonObject),
921        },
922        StructuredMode::Auto | StructuredMode::Prompt => StructuredDirective::default(),
923    }
924}
925
926fn build_initial_messages(req: &StructuredRequest, mode: StructuredMode) -> Vec<Message> {
927    match mode {
928        StructuredMode::Tool => {
929            // For tool mode, the prompt is the user message; the LLM will respond
930            // with a tool call whose input is the structured object.
931            vec![Message::user(&req.prompt)]
932        }
933        StructuredMode::Prompt | StructuredMode::Json => {
934            // Prompt mode and json_object mode both need the schema in the prompt:
935            // json_object only guarantees *syntactic* validity, so the model still
936            // has to be told the shape it should produce.
937            let augmented = format!(
938                "{}\n\nYou MUST respond with ONLY a valid JSON object (no markdown, no explanation) that conforms to this JSON Schema:\n\n```json\n{}\n```",
939                req.prompt,
940                serde_json::to_string_pretty(&req.schema).unwrap_or_default()
941            );
942            vec![Message::user(&augmented)]
943        }
944        _ => {
945            // Strict mode: the schema constraint is enforced by the provider via
946            // response_format.json_schema, so the user message is just the prompt.
947            vec![Message::user(&req.prompt)]
948        }
949    }
950}
951
952fn build_system_prompt(req: &StructuredRequest, mode: StructuredMode) -> String {
953    let base = req.system.as_deref().unwrap_or("");
954
955    match mode {
956        StructuredMode::Tool => {
957            format!(
958                "{}{}You MUST respond by calling the `emit_{}` tool exactly once with a valid argument matching the schema. Do not output any text outside the tool call.",
959                base,
960                if base.is_empty() { "" } else { "\n\n" },
961                req.schema_name
962            )
963        }
964        StructuredMode::Prompt | StructuredMode::Json => {
965            format!(
966                "{}{}You are a structured data extraction assistant. Always respond with valid JSON only, no markdown fences, no explanation text.",
967                base,
968                if base.is_empty() { "" } else { "\n\n" },
969            )
970        }
971        _ => base.to_string(),
972    }
973}
974
975fn build_tools(req: &StructuredRequest, mode: StructuredMode) -> Vec<ToolDefinition> {
976    match mode {
977        StructuredMode::Tool => {
978            vec![ToolDefinition {
979                name: format!("emit_{}", req.schema_name),
980                description: req
981                    .schema_description
982                    .clone()
983                    .unwrap_or_else(|| format!("Emit a structured {} object", req.schema_name)),
984                parameters: req.schema.clone(),
985            }]
986        }
987        _ => vec![],
988    }
989}
990
991/// Outcome of mining a response for the structured object across all candidate sources.
992struct StructuredResolution {
993    /// A schema-valid object plus the raw source string it came from.
994    valid: Option<(Value, String)>,
995    /// First parseable-but-schema-invalid object source + its validation errors,
996    /// used to build a targeted repair prompt.
997    invalid: Option<(String, Vec<String>)>,
998    /// First non-empty raw candidate, shown verbatim in a parse-failure repair prompt.
999    raw_seen: Option<String>,
1000}
1001
1002/// Append `s` to `out` if it is non-empty and not already present (trimmed, deduped).
1003fn push_candidate(out: &mut Vec<String>, s: String) {
1004    let trimmed = s.trim();
1005    if !trimmed.is_empty() && !out.iter().any(|c| c == trimmed) {
1006        out.push(trimmed.to_string());
1007    }
1008}
1009
1010/// Ordered raw strings to mine for the structured object, most authoritative first:
1011/// tool-call arguments, then text content, then the reasoning channel.
1012///
1013/// The reasoning fallback is the crux of the cross-model fix: reasoning models
1014/// (GLM/zhipu, DeepSeek-R1, kimi…) frequently emit the final object inside
1015/// `reasoning` with `content` empty and no tool call. Earlier extraction only looked
1016/// at the tool call / text, so those models yielded an empty string and the whole
1017/// generate_object failed even though a perfectly good object was produced.
1018fn extract_raw_candidates(message: &super::Message, mode: StructuredMode) -> Vec<String> {
1019    let mut out: Vec<String> = Vec::new();
1020    if mode == StructuredMode::Tool {
1021        if let Some(call) = message.tool_calls().first() {
1022            push_candidate(
1023                &mut out,
1024                serde_json::to_string(&call.args).unwrap_or_default(),
1025            );
1026        }
1027    }
1028    push_candidate(&mut out, message.text());
1029    if let Some(reasoning) = message.reasoning_content.as_deref() {
1030        push_candidate(&mut out, reasoning.to_string());
1031    }
1032    out
1033}
1034
1035/// Every JSON object/array value mineable from possibly-dirty text, in document order
1036/// (direct parse, code fences, then all balanced `{...}` / `[...]`). Deduped.
1037fn extract_all_json_values(text: &str) -> Vec<Value> {
1038    let trimmed = text.trim();
1039    let mut values: Vec<Value> = Vec::new();
1040    let consider = |candidate: &str, values: &mut Vec<Value>| {
1041        if let Ok(v) = serde_json::from_str::<Value>(candidate.trim()) {
1042            if (v.is_object() || v.is_array()) && !values.contains(&v) {
1043                values.push(v);
1044            }
1045        }
1046    };
1047    consider(trimmed, &mut values);
1048    if let Some(inner) = strip_code_fence(trimmed) {
1049        consider(inner, &mut values);
1050    }
1051    for candidate in find_all_balanced(trimmed, '{', '}') {
1052        consider(&candidate, &mut values);
1053    }
1054    for candidate in find_all_balanced(trimmed, '[', ']') {
1055        consider(&candidate, &mut values);
1056    }
1057    values
1058}
1059
1060/// Try every raw candidate × every JSON value it yields against the schema; return the
1061/// first schema-valid object, else the best parseable-but-invalid object (for repair).
1062fn resolve_structured(candidates: &[String], schema: &Value) -> StructuredResolution {
1063    let mut invalid: Option<(String, Vec<String>)> = None;
1064    let mut raw_seen: Option<String> = None;
1065    for raw in candidates {
1066        if raw_seen.is_none() && !raw.trim().is_empty() {
1067            raw_seen = Some(raw.clone());
1068        }
1069        for value in extract_all_json_values(raw) {
1070            match validate_against_schema(&value, schema) {
1071                Ok(()) => {
1072                    return StructuredResolution {
1073                        valid: Some((value, raw.clone())),
1074                        invalid,
1075                        raw_seen,
1076                    };
1077                }
1078                Err(errors) => {
1079                    if invalid.is_none() {
1080                        invalid = Some((raw.clone(), errors));
1081                    }
1082                }
1083            }
1084        }
1085    }
1086    StructuredResolution {
1087        valid: None,
1088        invalid,
1089        raw_seen,
1090    }
1091}
1092
1093/// UTF-8-safe truncation to at most `max` bytes (never splits a multibyte char —
1094/// repair prompts echo arbitrary model output, including CJK).
1095fn truncate_utf8(s: &str, max: usize) -> &str {
1096    if s.len() <= max {
1097        return s;
1098    }
1099    let mut end = max;
1100    while end > 0 && !s.is_char_boundary(end) {
1101        end -= 1;
1102    }
1103    &s[..end]
1104}
1105
1106/// Repair prompt for when nothing parseable was produced at all.
1107fn build_parse_failure_repair(raw_text: &str) -> String {
1108    if raw_text.trim().is_empty() {
1109        return "Your previous response contained no JSON. Respond with ONLY a single valid JSON object that matches the schema — no prose, no markdown, no analysis, and put the object in your reply content (not in a thinking/reasoning aside).".to_string();
1110    }
1111    format!(
1112        "Your previous output could not be parsed as a JSON object:\n\n{}\n\nReturn ONLY a single valid JSON object matching the schema — no prose, no markdown.",
1113        truncate_utf8(raw_text, 2000)
1114    )
1115}
1116
1117fn build_repair_message(raw_text: &str, errors: &[String]) -> String {
1118    // Truncate raw output in repair message to avoid blowing context
1119    let truncated_raw = if raw_text.len() > 2000 {
1120        format!(
1121            "{}...[truncated, {} bytes total]",
1122            truncate_utf8(raw_text, 2000),
1123            raw_text.len()
1124        )
1125    } else {
1126        raw_text.to_string()
1127    };
1128    format!(
1129        "Your previous output failed schema validation:\n\n{}\n\nValidation errors:\n{}\n\nPlease return ONLY a corrected JSON object that fixes these errors. No explanation, no markdown.",
1130        truncated_raw,
1131        errors.iter().map(|e| format!("- {}", e)).collect::<Vec<_>>().join("\n")
1132    )
1133}
1134
1135fn accumulate_usage(total: &mut TokenUsage, delta: &TokenUsage) {
1136    total.prompt_tokens += delta.prompt_tokens;
1137    total.completion_tokens += delta.completion_tokens;
1138    total.total_tokens += delta.total_tokens;
1139}
1140
1141/// Append repair context to the message history, respecting conversation structure.
1142///
1143/// In tool mode, the LLM returned a tool_use block. The correct follow-up is:
1144///   assistant (tool_use) → user (tool_result with error) → assistant (retry)
1145/// In text modes, it's simply:
1146///   assistant (text) → user (repair request) → assistant (retry)
1147fn append_repair_context(
1148    messages: &mut Vec<Message>,
1149    assistant_msg: &Message,
1150    repair_text: &str,
1151    mode: StructuredMode,
1152    _raw_text: &str,
1153) {
1154    if mode == StructuredMode::Tool {
1155        // Push the original assistant message (with tool_use block intact)
1156        messages.push(assistant_msg.clone());
1157        // Find the tool_use ID to construct a proper tool_result
1158        let tool_use_id = assistant_msg
1159            .tool_calls()
1160            .first()
1161            .map(|tc| tc.id.clone())
1162            .unwrap_or_else(|| "unknown".to_string());
1163        // Return the error as a tool_result so the conversation stays valid
1164        messages.push(Message::tool_result(&tool_use_id, repair_text, true));
1165    } else {
1166        // Text modes: push assistant text then user repair request
1167        messages.push(assistant_msg.clone());
1168        messages.push(Message::user(repair_text));
1169    }
1170}
1171
1172// ---------------------------------------------------------------------------
1173// Tests
1174// ---------------------------------------------------------------------------
1175
1176#[cfg(test)]
1177#[path = "structured_tests.rs"]
1178mod structured_tests;