Skip to main content

a3s_code_core/llm/
structured.rs

1//! Structured object generation from LLM output.
2//!
3//! Provides reliable JSON object generation with schema validation, automatic
4//! repair, and streaming partial object support. Works across all providers by
5//! selecting the best available mode (strict JSON schema, json_mode, tool-call,
6//! or prompt-only).
7
8use super::{LlmClient, Message, StreamEvent, TokenUsage, ToolDefinition};
9use anyhow::{bail, Context, Result};
10use serde::{Deserialize, Serialize};
11use serde_json::Value;
12use tokio_util::sync::CancellationToken;
13
14// ---------------------------------------------------------------------------
15// Public types
16// ---------------------------------------------------------------------------
17
18/// Mode selection for structured output generation.
19#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
20#[serde(rename_all = "snake_case")]
21pub enum StructuredMode {
22    /// Auto-select best mode based on provider capabilities.
23    Auto,
24    /// OpenAI native strict JSON schema (response_format.type = json_schema).
25    Strict,
26    /// OpenAI json_object mode (guarantees valid JSON, not schema-conformant).
27    Json,
28    /// Use tool-calling: inject a synthetic tool whose parameters IS the schema.
29    /// Works on all providers that support tool use (Anthropic, OpenAI, etc).
30    Tool,
31    /// Prompt-only: append schema instructions to the prompt. Least reliable.
32    Prompt,
33}
34
35/// Request specification for structured object generation.
36#[derive(Debug, Clone)]
37pub struct StructuredRequest {
38    pub prompt: String,
39    pub system: Option<String>,
40    pub schema: Value,
41    pub schema_name: String,
42    pub schema_description: Option<String>,
43    pub mode: StructuredMode,
44    pub max_repair_attempts: u8,
45}
46
47/// Result of a successful structured generation.
48#[derive(Debug, Clone, Serialize)]
49pub struct StructuredResult {
50    pub object: Value,
51    pub raw_text: Option<String>,
52    pub usage: TokenUsage,
53    pub repair_rounds: u8,
54    pub mode_used: StructuredMode,
55}
56
57/// Provider-native structured-output capability.
58///
59/// Each [`LlmClient`] reports this so the structured engine can request the
60/// strongest enforcement the provider actually supports. Defaults to
61/// [`NativeStructuredSupport::None`] for clients that don't override it.
62#[derive(Debug, Clone, Copy, PartialEq, Eq)]
63pub enum NativeStructuredSupport {
64    /// No native enforcement — rely on prompt instructions + lenient extraction.
65    None,
66    /// Can force a specific tool call (Anthropic `tool_choice`, OpenAI function
67    /// `tool_choice`). Guarantees the model emits the structured tool call
68    /// instead of free-form prose.
69    ForcedTool,
70    /// Supports OpenAI-style `response_format` (`json_object` and
71    /// `json_schema` + `strict`) in addition to forced tool calls.
72    JsonSchema,
73}
74
75/// A native `response_format` request for OpenAI-compatible providers.
76#[derive(Debug, Clone, PartialEq)]
77pub enum ResponseFormat {
78    /// `{"type":"json_object"}` — guarantees syntactically valid JSON, but not
79    /// schema conformance.
80    JsonObject,
81    /// `{"type":"json_schema","json_schema":{name,schema,strict:true}}` —
82    /// parser-enforced schema conformance.
83    JsonSchema { name: String, schema: Value },
84}
85
86/// Instruction telling a provider how to enforce structured output for a call.
87///
88/// Carries the union of intents; each provider honors what it supports and
89/// ignores the rest (e.g. Anthropic has no `response_format`, so it only acts
90/// on `force_tool`). The default (`force_tool: None, response_format: None`)
91/// reproduces an ordinary completion, which is why the trait's default
92/// `complete_structured` impl is behavior-preserving.
93#[derive(Debug, Clone, Default, PartialEq)]
94pub struct StructuredDirective {
95    /// Force the model to call exactly this tool (provider `tool_choice`).
96    pub force_tool: Option<String>,
97    /// Request a provider-native `response_format` (OpenAI-compatible only).
98    pub response_format: Option<ResponseFormat>,
99}
100
101/// Callback for streaming partial object snapshots.
102pub type PartialObjectCallback = Box<dyn Fn(&Value) + Send>;
103
104// ---------------------------------------------------------------------------
105// Core generation: blocking (non-streaming)
106// ---------------------------------------------------------------------------
107
108/// Generate a structured JSON object using the given LLM client.
109///
110/// Selects the best mode based on `req.mode`, calls the LLM, validates against
111/// the schema, and retries with repair prompts if validation fails.
112pub async fn generate_blocking(
113    client: &dyn LlmClient,
114    req: &StructuredRequest,
115) -> Result<StructuredResult> {
116    let mode = resolve_mode(req.mode, client.native_structured_support());
117    let mut messages = build_initial_messages(req, mode);
118    let system = build_system_prompt(req, mode);
119    let tools = build_tools(req, mode);
120    let directive = build_directive(req, mode);
121
122    let mut total_usage = TokenUsage::default();
123    let mut repair_rounds: u8 = 0;
124
125    loop {
126        let resp = client
127            .complete_structured(&messages, Some(&system), &tools, &directive)
128            .await
129            .context("LLM call failed during structured generation")?;
130
131        accumulate_usage(&mut total_usage, &resp.usage);
132
133        let raw_text = extract_raw_output(&resp.message, mode);
134        let parsed = extract_json_value(&raw_text);
135
136        match parsed {
137            Ok(value) => match validate_against_schema(&value, &req.schema) {
138                Ok(()) => {
139                    return Ok(StructuredResult {
140                        object: value,
141                        raw_text: Some(raw_text),
142                        usage: total_usage,
143                        repair_rounds,
144                        mode_used: mode,
145                    });
146                }
147                Err(errors) if repair_rounds < req.max_repair_attempts => {
148                    repair_rounds += 1;
149                    let repair_msg = build_repair_message(&raw_text, &errors);
150                    append_repair_context(
151                        &mut messages,
152                        &resp.message,
153                        &repair_msg,
154                        mode,
155                        &raw_text,
156                    );
157                }
158                Err(errors) => {
159                    bail!(
160                            "Structured output failed schema validation after {} repair attempts. Errors: {}",
161                            repair_rounds,
162                            errors.join("; ")
163                        );
164                }
165            },
166            Err(parse_err) if repair_rounds < req.max_repair_attempts => {
167                repair_rounds += 1;
168                let repair_msg = format!(
169                    "Your previous output could not be parsed as JSON:\n\n{}\n\nError: {}\n\nPlease return ONLY a valid JSON object matching the schema.",
170                    raw_text, parse_err
171                );
172                append_repair_context(&mut messages, &resp.message, &repair_msg, mode, &raw_text);
173            }
174            Err(parse_err) => {
175                bail!(
176                    "Structured output failed JSON parsing after {} repair attempts: {}",
177                    repair_rounds,
178                    parse_err
179                );
180            }
181        }
182    }
183}
184
185// ---------------------------------------------------------------------------
186// Core generation: streaming
187// ---------------------------------------------------------------------------
188
189/// Generate a structured JSON object with streaming partial updates.
190///
191/// Calls `on_partial` with progressively more complete partial objects as tokens
192/// arrive. Returns the final validated object.
193///
194/// In streaming mode, `max_repair_attempts` defaults to 0 because a repair
195/// would reset the partial object stream (confusing for consumers).
196pub async fn generate_streaming(
197    client: &dyn LlmClient,
198    req: &StructuredRequest,
199    on_partial: PartialObjectCallback,
200) -> Result<StructuredResult> {
201    let mode = resolve_mode(req.mode, client.native_structured_support());
202    let messages = build_initial_messages(req, mode);
203    let system = build_system_prompt(req, mode);
204    let tools = build_tools(req, mode);
205    let directive = build_directive(req, mode);
206
207    let cancel_token = CancellationToken::new();
208    let mut rx = client
209        .complete_streaming_structured(&messages, Some(&system), &tools, &directive, cancel_token)
210        .await
211        .context("LLM streaming call failed during structured generation")?;
212
213    let mut json_buffer = String::new();
214    let mut last_valid_partial: Option<Value> = None;
215    let mut final_response: Option<super::LlmResponse> = None;
216    let mut last_parse_len: usize = 0;
217    // Minimum bytes of new data before attempting a partial parse (reduces CPU)
218    const PARSE_THRESHOLD: usize = 8;
219
220    while let Some(event) = rx.recv().await {
221        match event {
222            StreamEvent::ToolUseInputDelta(delta) if mode == StructuredMode::Tool => {
223                if final_response.is_some() {
224                    continue;
225                }
226                json_buffer.push_str(&delta);
227                if json_buffer.len() - last_parse_len >= PARSE_THRESHOLD {
228                    if let Some(partial) = try_parse_partial_json(&json_buffer) {
229                        if last_valid_partial.as_ref() != Some(&partial) {
230                            on_partial(&partial);
231                            last_valid_partial = Some(partial);
232                        }
233                    }
234                    last_parse_len = json_buffer.len();
235                }
236            }
237            StreamEvent::TextDelta(delta) if mode != StructuredMode::Tool => {
238                if final_response.is_some() {
239                    continue;
240                }
241                json_buffer.push_str(&delta);
242                if json_buffer.len() - last_parse_len >= PARSE_THRESHOLD {
243                    if let Some(json_start) = find_json_start(&json_buffer) {
244                        let candidate = &json_buffer[json_start..];
245                        if let Some(partial) = try_parse_partial_json(candidate) {
246                            if last_valid_partial.as_ref() != Some(&partial) {
247                                on_partial(&partial);
248                                last_valid_partial = Some(partial);
249                            }
250                        }
251                    }
252                    last_parse_len = json_buffer.len();
253                }
254            }
255            StreamEvent::Done(resp) => {
256                final_response = Some(resp);
257            }
258            _ => {}
259        }
260    }
261
262    let resp = final_response.context("Stream ended without Done event")?;
263    let raw_text = extract_raw_output(&resp.message, mode);
264    let value =
265        extract_json_value(&raw_text).context("Failed to parse final streamed output as JSON")?;
266
267    validate_against_schema(&value, &req.schema).map_err(|errors| {
268        anyhow::anyhow!(
269            "Streamed structured output failed schema validation: {}",
270            errors.join("; ")
271        )
272    })?;
273
274    // Emit final complete object
275    on_partial(&value);
276
277    Ok(StructuredResult {
278        object: value,
279        raw_text: Some(raw_text),
280        usage: resp.usage,
281        repair_rounds: 0,
282        mode_used: mode,
283    })
284}
285
286// ---------------------------------------------------------------------------
287// JSON extraction and parsing
288// ---------------------------------------------------------------------------
289
290/// Extract a JSON value from potentially dirty LLM output.
291///
292/// Handles: raw JSON, markdown code fences, leading/trailing prose.
293pub fn extract_json_value(text: &str) -> Result<Value> {
294    let trimmed = text.trim();
295
296    // 1. Direct parse
297    if let Ok(v) = serde_json::from_str::<Value>(trimmed) {
298        if v.is_object() || v.is_array() {
299            return Ok(v);
300        }
301    }
302
303    // 2. Strip markdown code fence
304    if let Some(inner) = strip_code_fence(trimmed) {
305        if let Ok(v) = serde_json::from_str::<Value>(inner.trim()) {
306            if v.is_object() || v.is_array() {
307                return Ok(v);
308            }
309        }
310    }
311
312    // 3. Find balanced JSON substring (first { to matching })
313    if let Some(candidate) = find_balanced_json_object(trimmed) {
314        if let Ok(v) = serde_json::from_str::<Value>(candidate) {
315            return Ok(v);
316        }
317    }
318
319    // 4. Try array
320    if let Some(candidate) = find_balanced_json_array(trimmed) {
321        if let Ok(v) = serde_json::from_str::<Value>(candidate) {
322            return Ok(v);
323        }
324    }
325
326    bail!("No valid JSON object found in LLM output")
327}
328
329/// Strip ```json ... ``` or ``` ... ``` fences.
330fn strip_code_fence(text: &str) -> Option<&str> {
331    let start_patterns = ["```json\n", "```json\r\n", "```\n", "```\r\n"];
332    for pat in &start_patterns {
333        if let Some(rest) = text.strip_prefix(pat) {
334            // Find closing fence
335            if let Some(end) = rest.rfind("```") {
336                return Some(&rest[..end]);
337            }
338        }
339    }
340    // Also handle inline: ```json{...}```
341    if let Some(inner) = text.strip_prefix("```json") {
342        if let Some(end) = inner.rfind("```") {
343            return Some(inner[..end].trim());
344        }
345    }
346    if let Some(inner) = text.strip_prefix("```") {
347        if let Some(end) = inner.rfind("```") {
348            return Some(inner[..end].trim());
349        }
350    }
351    None
352}
353
354/// Find the first balanced `{...}` substring using bracket counting.
355fn find_balanced_json_object(text: &str) -> Option<&str> {
356    find_balanced(text, '{', '}')
357}
358
359/// Find the first balanced `[...]` substring.
360fn find_balanced_json_array(text: &str) -> Option<&str> {
361    find_balanced(text, '[', ']')
362}
363
364fn find_balanced(text: &str, open: char, close: char) -> Option<&str> {
365    let bytes = text.as_bytes();
366    let open_byte = open as u8;
367    let close_byte = close as u8;
368
369    // Find the first unquoted occurrence of `open`
370    let mut in_string = false;
371    let mut escape_next = false;
372    let mut start = None;
373
374    for (i, &b) in bytes.iter().enumerate() {
375        if escape_next {
376            escape_next = false;
377            continue;
378        }
379        match b {
380            b'\\' if in_string => escape_next = true,
381            b'"' => in_string = !in_string,
382            _ if in_string => {}
383            _ if b == open_byte => {
384                start = Some(i);
385                break;
386            }
387            _ => {}
388        }
389    }
390
391    let start = start?;
392    let mut depth = 0i32;
393    in_string = false;
394    escape_next = false;
395
396    for (i, &b) in bytes[start..].iter().enumerate() {
397        if escape_next {
398            escape_next = false;
399            continue;
400        }
401        match b {
402            b'\\' if in_string => escape_next = true,
403            b'"' => in_string = !in_string,
404            _ if in_string => {}
405            _ if b == open_byte => depth += 1,
406            _ if b == close_byte => {
407                depth -= 1;
408                if depth == 0 {
409                    return Some(&text[start..start + i + 1]);
410                }
411            }
412            _ => {}
413        }
414    }
415    None
416}
417
418/// Find the byte offset where JSON content starts in a text stream.
419/// Skips leading prose/whitespace to find `{` or `[` that isn't inside a string.
420fn find_json_start(text: &str) -> Option<usize> {
421    // Skip past code fence markers if present
422    let (search_text, offset) = if let Some(rest) = text.strip_prefix("```json") {
423        (rest, 7)
424    } else if let Some(rest) = text.strip_prefix("```") {
425        (rest, 3)
426    } else {
427        (text, 0)
428    };
429
430    let mut in_string = false;
431    let mut escape_next = false;
432    for (i, &b) in search_text.as_bytes().iter().enumerate() {
433        if escape_next {
434            escape_next = false;
435            continue;
436        }
437        match b {
438            b'\\' if in_string => {
439                escape_next = true;
440            }
441            b'"' => {
442                in_string = !in_string;
443            }
444            b'{' | b'[' if !in_string => {
445                return Some(offset + i);
446            }
447            _ => {}
448        }
449    }
450    None
451}
452
453// ---------------------------------------------------------------------------
454// Partial JSON parsing (for streaming)
455// ---------------------------------------------------------------------------
456
457/// Attempt to parse a potentially incomplete JSON string into the most complete
458/// valid partial object possible.
459///
460/// Strategy: try parsing as-is first. If that fails, progressively close open
461/// braces/brackets and try again. This handles the common case where the LLM
462/// has output `{"name": "foo", "items": [1, 2` — we close it to get a partial.
463fn try_parse_partial_json(text: &str) -> Option<Value> {
464    let trimmed = text.trim();
465    if trimmed.is_empty() {
466        return None;
467    }
468
469    // Fast path: already valid
470    if let Ok(v) = serde_json::from_str::<Value>(trimmed) {
471        if v.is_object() || v.is_array() {
472            return Some(v);
473        }
474    }
475
476    // Count unclosed brackets/braces (respecting strings)
477    let mut closers = Vec::new();
478    let mut in_string = false;
479    let mut escape_next = false;
480    // Track if we're mid-value (after a colon or comma, before the value is complete)
481    let mut last_significant: Option<u8> = None;
482
483    for &b in trimmed.as_bytes() {
484        if escape_next {
485            escape_next = false;
486            continue;
487        }
488        match b {
489            b'\\' if in_string => {
490                escape_next = true;
491            }
492            b'"' => {
493                in_string = !in_string;
494                if !in_string {
495                    last_significant = Some(b'"');
496                }
497            }
498            _ if in_string => {}
499            b'{' => {
500                closers.push(b'}');
501                last_significant = Some(b'{');
502            }
503            b'[' => {
504                closers.push(b']');
505                last_significant = Some(b'[');
506            }
507            b'}' | b']' => {
508                closers.pop();
509                last_significant = Some(b);
510            }
511            b':' | b',' => {
512                last_significant = Some(b);
513            }
514            b if !b.is_ascii_whitespace() => {
515                last_significant = Some(b);
516            }
517            _ => {}
518        }
519    }
520
521    if closers.is_empty() {
522        return None; // Already balanced but didn't parse — genuinely invalid
523    }
524
525    // Pre-allocate repair buffer: original + at most 6 extra chars (null + closers)
526    let mut repaired = String::with_capacity(trimmed.len() + closers.len() + 6);
527    repaired.push_str(trimmed);
528
529    if in_string {
530        repaired.push('"');
531        last_significant = Some(b'"');
532    }
533
534    // If last significant char suggests an incomplete key or value, handle it
535    if let Some(last) = last_significant {
536        if last == b':' {
537            // Key with no value yet — add null
538            repaired.push_str("null");
539        } else if last == b',' {
540            // Trailing comma — some parsers choke on this, trim it
541            if let Some(pos) = repaired.rfind(',') {
542                repaired.truncate(pos);
543            }
544        }
545    }
546
547    // Close all open brackets/braces
548    for &closer in closers.iter().rev() {
549        repaired.push(closer as char);
550    }
551
552    serde_json::from_str::<Value>(&repaired)
553        .ok()
554        .filter(|v| v.is_object() || v.is_array())
555}
556
557// ---------------------------------------------------------------------------
558// Schema validation
559// ---------------------------------------------------------------------------
560
561/// Validate a JSON value against a JSON Schema.
562/// Returns Ok(()) on success, or a list of human-readable error strings.
563fn validate_against_schema(value: &Value, schema: &Value) -> Result<(), Vec<String>> {
564    // We do a basic recursive validation here. For production, consider using
565    // the `jsonschema` crate, but to avoid adding a heavy dependency we implement
566    // the subset of JSON Schema that matters for structured output.
567    let errors = basic_schema_validate(value, schema, "");
568    if errors.is_empty() {
569        Ok(())
570    } else {
571        Err(errors)
572    }
573}
574
575/// Basic JSON Schema validator covering the most common constraints.
576fn basic_schema_validate(value: &Value, schema: &Value, path: &str) -> Vec<String> {
577    let mut errors = Vec::new();
578
579    // Handle $ref — not supported in basic validator, skip
580    if schema.get("$ref").is_some() {
581        return errors;
582    }
583
584    // Handle anyOf / oneOf: value must match at least one sub-schema
585    if let Some(any_of) = schema
586        .get("anyOf")
587        .or_else(|| schema.get("oneOf"))
588        .and_then(|v| v.as_array())
589    {
590        let matched = any_of
591            .iter()
592            .any(|sub| basic_schema_validate(value, sub, path).is_empty());
593        if !matched {
594            errors.push(format!(
595                "{}: value does not match any variant in anyOf/oneOf",
596                path_or_root(path),
597            ));
598        }
599        return errors;
600    }
601
602    // Handle enum
603    if let Some(enum_values) = schema.get("enum").and_then(|v| v.as_array()) {
604        if !enum_values.contains(value) {
605            errors.push(format!(
606                "{}: value {:?} not in enum {:?}",
607                path_or_root(path),
608                value,
609                enum_values
610            ));
611        }
612        return errors;
613    }
614
615    // Handle const
616    if let Some(const_val) = schema.get("const") {
617        if value != const_val {
618            errors.push(format!(
619                "{}: expected const {:?}, got {:?}",
620                path_or_root(path),
621                const_val,
622                value
623            ));
624        }
625        return errors;
626    }
627
628    // Type checking (supports nullable via type array: ["string", "null"])
629    if let Some(type_val) = schema.get("type") {
630        let type_ok = if let Some(type_str) = type_val.as_str() {
631            check_type(value, type_str)
632        } else if let Some(type_arr) = type_val.as_array() {
633            type_arr
634                .iter()
635                .filter_map(|t| t.as_str())
636                .any(|t| check_type(value, t))
637        } else {
638            true
639        };
640        if !type_ok {
641            errors.push(format!(
642                "{}: expected type {:?}, got {:?}",
643                path_or_root(path),
644                type_val,
645                value_type_name(value)
646            ));
647            return errors;
648        }
649    }
650
651    // Object validation
652    if let Some(obj) = value.as_object() {
653        if let Some(properties) = schema.get("properties").and_then(|v| v.as_object()) {
654            for (key, prop_schema) in properties {
655                if let Some(child_value) = obj.get(key) {
656                    let child_path = if path.is_empty() {
657                        format!(".{}", key)
658                    } else {
659                        format!("{}.{}", path, key)
660                    };
661                    errors.extend(basic_schema_validate(child_value, prop_schema, &child_path));
662                }
663            }
664        }
665
666        if let Some(required) = schema.get("required").and_then(|v| v.as_array()) {
667            for req_field in required {
668                if let Some(field_name) = req_field.as_str() {
669                    if !obj.contains_key(field_name) {
670                        errors.push(format!(
671                            "{}: missing required field '{}'",
672                            path_or_root(path),
673                            field_name
674                        ));
675                    }
676                }
677            }
678        }
679
680        // additionalProperties: false
681        if schema.get("additionalProperties") == Some(&Value::Bool(false)) {
682            if let Some(properties) = schema.get("properties").and_then(|v| v.as_object()) {
683                for key in obj.keys() {
684                    if !properties.contains_key(key) {
685                        errors.push(format!(
686                            "{}: unexpected additional property '{}'",
687                            path_or_root(path),
688                            key
689                        ));
690                    }
691                }
692            }
693        }
694    }
695
696    // Array validation
697    if let Some(arr) = value.as_array() {
698        if let Some(items_schema) = schema.get("items") {
699            for (i, item) in arr.iter().enumerate() {
700                let child_path = format!("{}[{}]", path, i);
701                errors.extend(basic_schema_validate(item, items_schema, &child_path));
702            }
703        }
704        if let Some(min) = schema.get("minItems").and_then(|v| v.as_u64()) {
705            if (arr.len() as u64) < min {
706                errors.push(format!(
707                    "{}: array has {} items, minimum is {}",
708                    path_or_root(path),
709                    arr.len(),
710                    min
711                ));
712            }
713        }
714        if let Some(max) = schema.get("maxItems").and_then(|v| v.as_u64()) {
715            if (arr.len() as u64) > max {
716                errors.push(format!(
717                    "{}: array has {} items, maximum is {}",
718                    path_or_root(path),
719                    arr.len(),
720                    max
721                ));
722            }
723        }
724    }
725
726    // String validation
727    if let Some(s) = value.as_str() {
728        if let Some(min_len) = schema.get("minLength").and_then(|v| v.as_u64()) {
729            if (s.chars().count() as u64) < min_len {
730                errors.push(format!(
731                    "{}: string length {} < minLength {}",
732                    path_or_root(path),
733                    s.chars().count(),
734                    min_len
735                ));
736            }
737        }
738        if let Some(max_len) = schema.get("maxLength").and_then(|v| v.as_u64()) {
739            if (s.chars().count() as u64) > max_len {
740                errors.push(format!(
741                    "{}: string length {} > maxLength {}",
742                    path_or_root(path),
743                    s.chars().count(),
744                    max_len
745                ));
746            }
747        }
748        if let Some(pattern) = schema.get("pattern").and_then(|v| v.as_str()) {
749            if let Ok(re) = regex::Regex::new(pattern) {
750                if !re.is_match(s) {
751                    errors.push(format!(
752                        "{}: string does not match pattern '{}'",
753                        path_or_root(path),
754                        pattern
755                    ));
756                }
757            }
758        }
759    }
760
761    // Number validation
762    if let Some(n) = value.as_f64() {
763        if let Some(min) = schema.get("minimum").and_then(|v| v.as_f64()) {
764            if n < min {
765                errors.push(format!(
766                    "{}: value {} < minimum {}",
767                    path_or_root(path),
768                    n,
769                    min
770                ));
771            }
772        }
773        if let Some(max) = schema.get("maximum").and_then(|v| v.as_f64()) {
774            if n > max {
775                errors.push(format!(
776                    "{}: value {} > maximum {}",
777                    path_or_root(path),
778                    n,
779                    max
780                ));
781            }
782        }
783        if let Some(exc_min) = schema.get("exclusiveMinimum").and_then(|v| v.as_f64()) {
784            if n <= exc_min {
785                errors.push(format!(
786                    "{}: value {} <= exclusiveMinimum {}",
787                    path_or_root(path),
788                    n,
789                    exc_min
790                ));
791            }
792        }
793        if let Some(exc_max) = schema.get("exclusiveMaximum").and_then(|v| v.as_f64()) {
794            if n >= exc_max {
795                errors.push(format!(
796                    "{}: value {} >= exclusiveMaximum {}",
797                    path_or_root(path),
798                    n,
799                    exc_max
800                ));
801            }
802        }
803    }
804
805    errors
806}
807
808fn check_type(value: &Value, type_str: &str) -> bool {
809    match type_str {
810        "object" => value.is_object(),
811        "array" => value.is_array(),
812        "string" => value.is_string(),
813        "number" => value.is_number(),
814        "integer" => {
815            value.is_i64()
816                || value.is_u64()
817                || value
818                    .as_f64()
819                    .map(|f| f.fract() == 0.0 && f.is_finite())
820                    .unwrap_or(false)
821        }
822        "boolean" => value.is_boolean(),
823        "null" => value.is_null(),
824        _ => true,
825    }
826}
827
828fn path_or_root(path: &str) -> &str {
829    if path.is_empty() {
830        "$"
831    } else {
832        path
833    }
834}
835
836fn value_type_name(value: &Value) -> &'static str {
837    match value {
838        Value::Null => "null",
839        Value::Bool(_) => "boolean",
840        Value::Number(_) => "number",
841        Value::String(_) => "string",
842        Value::Array(_) => "array",
843        Value::Object(_) => "object",
844    }
845}
846
847// ---------------------------------------------------------------------------
848// Message/prompt construction helpers
849// ---------------------------------------------------------------------------
850
851/// Resolve the requested mode against the provider's native capability.
852///
853/// `Auto`/`Tool` always resolve to forced `Tool` mode — the most reliable
854/// cross-provider strategy (the synthetic `emit_*` tool is made mandatory via
855/// the provider's `tool_choice`). `Strict`/`Json` use native `response_format`
856/// only when the provider reports [`NativeStructuredSupport::JsonSchema`];
857/// otherwise they fall back to forced `Tool` mode rather than silently
858/// degrading to unconstrained text.
859fn resolve_mode(requested: StructuredMode, support: NativeStructuredSupport) -> StructuredMode {
860    match requested {
861        StructuredMode::Prompt => StructuredMode::Prompt,
862        StructuredMode::Strict if support == NativeStructuredSupport::JsonSchema => {
863            StructuredMode::Strict
864        }
865        StructuredMode::Json if support == NativeStructuredSupport::JsonSchema => {
866            StructuredMode::Json
867        }
868        // Auto, Tool, or Strict/Json on a provider without json_schema support.
869        _ => StructuredMode::Tool,
870    }
871}
872
873/// Build the provider directive for an already-resolved mode.
874fn build_directive(req: &StructuredRequest, mode: StructuredMode) -> StructuredDirective {
875    match mode {
876        StructuredMode::Tool => StructuredDirective {
877            force_tool: Some(format!("emit_{}", req.schema_name)),
878            response_format: None,
879        },
880        StructuredMode::Strict => StructuredDirective {
881            force_tool: None,
882            response_format: Some(ResponseFormat::JsonSchema {
883                name: req.schema_name.clone(),
884                schema: req.schema.clone(),
885            }),
886        },
887        StructuredMode::Json => StructuredDirective {
888            force_tool: None,
889            response_format: Some(ResponseFormat::JsonObject),
890        },
891        StructuredMode::Auto | StructuredMode::Prompt => StructuredDirective::default(),
892    }
893}
894
895fn build_initial_messages(req: &StructuredRequest, mode: StructuredMode) -> Vec<Message> {
896    match mode {
897        StructuredMode::Tool => {
898            // For tool mode, the prompt is the user message; the LLM will respond
899            // with a tool call whose input is the structured object.
900            vec![Message::user(&req.prompt)]
901        }
902        StructuredMode::Prompt | StructuredMode::Json => {
903            // Prompt mode and json_object mode both need the schema in the prompt:
904            // json_object only guarantees *syntactic* validity, so the model still
905            // has to be told the shape it should produce.
906            let augmented = format!(
907                "{}\n\nYou MUST respond with ONLY a valid JSON object (no markdown, no explanation) that conforms to this JSON Schema:\n\n```json\n{}\n```",
908                req.prompt,
909                serde_json::to_string_pretty(&req.schema).unwrap_or_default()
910            );
911            vec![Message::user(&augmented)]
912        }
913        _ => {
914            // Strict mode: the schema constraint is enforced by the provider via
915            // response_format.json_schema, so the user message is just the prompt.
916            vec![Message::user(&req.prompt)]
917        }
918    }
919}
920
921fn build_system_prompt(req: &StructuredRequest, mode: StructuredMode) -> String {
922    let base = req.system.as_deref().unwrap_or("");
923
924    match mode {
925        StructuredMode::Tool => {
926            format!(
927                "{}{}You MUST respond by calling the `emit_{}` tool exactly once with a valid argument matching the schema. Do not output any text outside the tool call.",
928                base,
929                if base.is_empty() { "" } else { "\n\n" },
930                req.schema_name
931            )
932        }
933        StructuredMode::Prompt | StructuredMode::Json => {
934            format!(
935                "{}{}You are a structured data extraction assistant. Always respond with valid JSON only, no markdown fences, no explanation text.",
936                base,
937                if base.is_empty() { "" } else { "\n\n" },
938            )
939        }
940        _ => base.to_string(),
941    }
942}
943
944fn build_tools(req: &StructuredRequest, mode: StructuredMode) -> Vec<ToolDefinition> {
945    match mode {
946        StructuredMode::Tool => {
947            vec![ToolDefinition {
948                name: format!("emit_{}", req.schema_name),
949                description: req
950                    .schema_description
951                    .clone()
952                    .unwrap_or_else(|| format!("Emit a structured {} object", req.schema_name)),
953                parameters: req.schema.clone(),
954            }]
955        }
956        _ => vec![],
957    }
958}
959
960/// Extract the raw JSON string from the LLM response based on mode.
961fn extract_raw_output(message: &super::Message, mode: StructuredMode) -> String {
962    match mode {
963        StructuredMode::Tool => {
964            // Look for tool call input
965            let calls = message.tool_calls();
966            if let Some(call) = calls.first() {
967                serde_json::to_string(&call.args).unwrap_or_default()
968            } else {
969                // Fallback: maybe the model responded with text anyway
970                message.text()
971            }
972        }
973        _ => message.text(),
974    }
975}
976
977fn build_repair_message(raw_text: &str, errors: &[String]) -> String {
978    // Truncate raw output in repair message to avoid blowing context
979    let truncated_raw = if raw_text.len() > 2000 {
980        format!(
981            "{}...[truncated, {} bytes total]",
982            &raw_text[..2000],
983            raw_text.len()
984        )
985    } else {
986        raw_text.to_string()
987    };
988    format!(
989        "Your previous output failed schema validation:\n\n{}\n\nValidation errors:\n{}\n\nPlease return ONLY a corrected JSON object that fixes these errors. No explanation, no markdown.",
990        truncated_raw,
991        errors.iter().map(|e| format!("- {}", e)).collect::<Vec<_>>().join("\n")
992    )
993}
994
995fn accumulate_usage(total: &mut TokenUsage, delta: &TokenUsage) {
996    total.prompt_tokens += delta.prompt_tokens;
997    total.completion_tokens += delta.completion_tokens;
998    total.total_tokens += delta.total_tokens;
999}
1000
1001/// Append repair context to the message history, respecting conversation structure.
1002///
1003/// In tool mode, the LLM returned a tool_use block. The correct follow-up is:
1004///   assistant (tool_use) → user (tool_result with error) → assistant (retry)
1005/// In text modes, it's simply:
1006///   assistant (text) → user (repair request) → assistant (retry)
1007fn append_repair_context(
1008    messages: &mut Vec<Message>,
1009    assistant_msg: &Message,
1010    repair_text: &str,
1011    mode: StructuredMode,
1012    _raw_text: &str,
1013) {
1014    if mode == StructuredMode::Tool {
1015        // Push the original assistant message (with tool_use block intact)
1016        messages.push(assistant_msg.clone());
1017        // Find the tool_use ID to construct a proper tool_result
1018        let tool_use_id = assistant_msg
1019            .tool_calls()
1020            .first()
1021            .map(|tc| tc.id.clone())
1022            .unwrap_or_else(|| "unknown".to_string());
1023        // Return the error as a tool_result so the conversation stays valid
1024        messages.push(Message::tool_result(&tool_use_id, repair_text, true));
1025    } else {
1026        // Text modes: push assistant text then user repair request
1027        messages.push(assistant_msg.clone());
1028        messages.push(Message::user(repair_text));
1029    }
1030}
1031
1032// ---------------------------------------------------------------------------
1033// Tests
1034// ---------------------------------------------------------------------------
1035
1036#[cfg(test)]
1037#[path = "structured_tests.rs"]
1038mod structured_tests;