Skip to main content

nika_core/ast/
structured.rs

1//! Structured Output Configuration
2//!
3//! Defines task-level configuration for JSON Schema validation:
4//! - `StructuredOutputSpec`: Schema + layer enables + retry config
5//!
6//! Works with the `StructuredOutputEngine` for ~99.99% compliance.
7
8use serde::de::{self, Deserializer, MapAccess, Visitor};
9use serde::{Deserialize, Serialize};
10use std::fmt;
11
12use super::output::SchemaRef;
13
14/// Structured output configuration for a task
15///
16/// Controls how the StructuredOutputEngine validates and repairs output.
17/// Supports shorthand (just schema path) or full configuration.
18///
19/// # Examples
20///
21/// Shorthand (schema file path):
22/// ```yaml
23/// structured: ./schemas/user.json
24/// ```
25///
26/// Full configuration:
27/// ```yaml
28/// structured:
29///   schema: ./schemas/user.json
30///   max_retries: 3
31///   enable_repair: true
32///   repair_model: claude-sonnet-4-6
33/// ```
34///
35/// Inline schema:
36/// ```yaml
37/// structured:
38///   schema:
39///     type: object
40///     properties:
41///       name:
42///         type: string
43///     required: [name]
44/// ```
45#[derive(Debug, Clone, Serialize)]
46pub struct StructuredOutputSpec {
47    /// JSON Schema reference (inline or file path).
48    ///
49    /// `None` when `from_example` is set — schema is derived at runtime.
50    /// Use `StructuredOutputEngine::load_schema()` which handles derivation correctly.
51    #[serde(default, skip_serializing_if = "Option::is_none")]
52    pub schema: Option<SchemaRef>,
53
54    /// JSON example — Nika auto-derives the JSON Schema from this at runtime.
55    ///
56    /// Mutually exclusive with `schema`. When set, `schema` is a placeholder `{}`.
57    /// The engine calls `json_to_schema()` on the example before any validation.
58    #[serde(default, skip_serializing_if = "Option::is_none")]
59    pub from_example: Option<SchemaRef>,
60
61    /// Enable Layer 1: rig Extractor (Rust type extraction)
62    /// Default: true
63    #[serde(default)]
64    pub enable_extractor: Option<bool>,
65
66    /// Enable Layer 0: Tool injection (DynamicSubmitTool)
67    /// When true, injects a synthetic submit_result tool for provider-side
68    /// schema enforcement before falling through to post-processing layers.
69    /// Default: true
70    #[serde(default)]
71    pub enable_tool_injection: Option<bool>,
72
73    /// Enable Layer 3: Retry with feedback
74    /// Default: true
75    #[serde(default)]
76    pub enable_retry: Option<bool>,
77
78    /// Enable Layer 4: LLM repair
79    /// Default: true
80    #[serde(default)]
81    pub enable_repair: Option<bool>,
82
83    /// Maximum retry attempts (Layer 3)
84    /// Default: 2
85    #[serde(default)]
86    pub max_retries: Option<u8>,
87
88    /// Model to use for repair (Layer 4)
89    /// Default: same as task model
90    #[serde(default)]
91    pub repair_model: Option<String>,
92
93    /// When true, derived `from_example` schemas add `additionalProperties: false`
94    /// to all object schemas recursively. Prevents the LLM from injecting extra keys.
95    /// Default: false. Only meaningful when `from_example` is set.
96    #[serde(default, skip_serializing_if = "Option::is_none")]
97    pub strict: Option<bool>,
98}
99
100impl StructuredOutputSpec {
101    /// Create with a schema reference
102    pub fn with_schema(schema: SchemaRef) -> Self {
103        Self {
104            schema: Some(schema),
105            from_example: None,
106            enable_extractor: None,
107            enable_tool_injection: None,
108            enable_retry: None,
109            enable_repair: None,
110            max_retries: None,
111            repair_model: None,
112            strict: None,
113        }
114    }
115
116    /// Create with an example file (schema derived at runtime).
117    ///
118    /// The file is read at execution time and its JSON structure is used to derive
119    /// the JSON Schema for output validation.
120    ///
121    /// NOTE: The LLM prompt will NOT include the example structure — only a generic
122    /// "output valid JSON" instruction. Use `with_example_inline()` when you want
123    /// the example shown in the prompt.
124    pub fn with_example_file(path: impl Into<String>) -> Self {
125        Self {
126            schema: None,
127            from_example: Some(SchemaRef::File(path.into())),
128            enable_extractor: None,
129            enable_tool_injection: None,
130            enable_retry: None,
131            enable_repair: None,
132            max_retries: None,
133            repair_model: None,
134            strict: None,
135        }
136    }
137
138    /// Create with an inline JSON example (schema derived at runtime).
139    ///
140    /// The example structure is injected into the LLM prompt AND used for validation.
141    /// This gives the LLM full context about the expected output shape.
142    pub fn with_example_inline(example: serde_json::Value) -> Self {
143        Self {
144            schema: None,
145            from_example: Some(SchemaRef::Inline(example)),
146            enable_extractor: None,
147            enable_tool_injection: None,
148            enable_retry: None,
149            enable_repair: None,
150            max_retries: None,
151            repair_model: None,
152            strict: None,
153        }
154    }
155
156    /// Create with an inline JSON schema
157    pub fn with_inline_schema(schema: serde_json::Value) -> Self {
158        Self::with_schema(SchemaRef::Inline(schema))
159    }
160
161    /// Create with a file path
162    pub fn with_file_schema(path: impl Into<String>) -> Self {
163        Self::with_schema(SchemaRef::File(path.into()))
164    }
165
166    /// Get max_retries with default
167    pub fn max_retries_or_default(&self) -> u8 {
168        self.max_retries.unwrap_or(2)
169    }
170
171    /// Check if Layer 2 (tool_use) is enabled
172    pub fn enable_tool_injection_or_default(&self) -> bool {
173        self.enable_tool_injection.unwrap_or(true)
174    }
175
176    /// Check if Layer 3 (retry) is enabled
177    pub fn enable_retry_or_default(&self) -> bool {
178        self.enable_retry.unwrap_or(true)
179    }
180
181    /// Check if Layer 4 (repair) is enabled
182    pub fn enable_repair_or_default(&self) -> bool {
183        self.enable_repair.unwrap_or(true)
184    }
185
186    /// Convert to OutputPolicy for executor Layer 0 dispatch.
187    ///
188    /// The executor's `run_infer()` uses `OutputPolicy` to trigger Layer 0 tool injection
189    /// and prompt schema instructions. This bridges `structured:` config to that path.
190    /// The original spec is preserved in `source_structured_spec` so that
191    /// `to_structured_spec()` can roundtrip without losing layer toggle config.
192    pub fn to_output_policy(&self) -> super::output::OutputPolicy {
193        super::output::OutputPolicy {
194            format: super::output::OutputFormat::Json,
195            schema: self.schema.clone(),
196            from_example: self.from_example.clone(),
197            max_retries: self.max_retries,
198            source_structured_spec: Some(self.clone()),
199        }
200    }
201}
202
203impl<'de> Deserialize<'de> for StructuredOutputSpec {
204    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
205    where
206        D: Deserializer<'de>,
207    {
208        struct StructuredOutputSpecVisitor;
209
210        impl<'de> Visitor<'de> for StructuredOutputSpecVisitor {
211            type Value = StructuredOutputSpec;
212
213            fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
214                formatter
215                    .write_str("a schema path string or structured output configuration object")
216            }
217
218            // Shorthand: `structured: ./schema.json`
219            fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
220            where
221                E: de::Error,
222            {
223                Ok(StructuredOutputSpec::with_file_schema(v))
224            }
225
226            fn visit_string<E>(self, v: String) -> Result<Self::Value, E>
227            where
228                E: de::Error,
229            {
230                Ok(StructuredOutputSpec::with_file_schema(v))
231            }
232
233            // Full form: `structured: { schema: ..., max_retries: ... }`
234            fn visit_map<A>(self, mut map: A) -> Result<Self::Value, A::Error>
235            where
236                A: MapAccess<'de>,
237            {
238                let mut schema: Option<SchemaRef> = None;
239                let mut from_example: Option<SchemaRef> = None;
240                let mut enable_extractor: Option<bool> = None;
241                let mut enable_tool_injection: Option<bool> = None;
242                let mut enable_retry: Option<bool> = None;
243                let mut enable_repair: Option<bool> = None;
244                let mut max_retries: Option<u8> = None;
245                let mut repair_model: Option<String> = None;
246                let mut strict: Option<bool> = None;
247
248                while let Some(key) = map.next_key::<String>()? {
249                    match key.as_str() {
250                        "schema" => {
251                            schema = Some(map.next_value()?);
252                        }
253                        "from_example" => {
254                            from_example = Some(map.next_value()?);
255                        }
256                        "enable_extractor" => {
257                            enable_extractor = Some(map.next_value()?);
258                        }
259                        "enable_tool_injection" => {
260                            enable_tool_injection = Some(map.next_value()?);
261                        }
262                        "enable_retry" => {
263                            enable_retry = Some(map.next_value()?);
264                        }
265                        "enable_repair" => {
266                            enable_repair = Some(map.next_value()?);
267                        }
268                        "max_retries" => {
269                            max_retries = Some(map.next_value()?);
270                        }
271                        "repair_model" => {
272                            repair_model = Some(map.next_value()?);
273                        }
274                        "strict" => {
275                            strict = Some(map.next_value()?);
276                        }
277                        _ => {
278                            // Ignore unknown fields
279                            let _: serde_json::Value = map.next_value()?;
280                        }
281                    }
282                }
283
284                // from_example and schema are mutually exclusive.
285                // When from_example is set, schema is None (derived at runtime by the engine).
286                if schema.is_none() && from_example.is_none() {
287                    return Err(de::Error::missing_field("schema or from_example"));
288                }
289
290                Ok(StructuredOutputSpec {
291                    schema,
292                    from_example,
293                    enable_extractor,
294                    enable_tool_injection,
295                    enable_retry,
296                    enable_repair,
297                    max_retries,
298                    repair_model,
299                    strict,
300                })
301            }
302        }
303
304        deserializer.deserialize_any(StructuredOutputSpecVisitor)
305    }
306}
307
308// Re-export from the dedicated schema module.
309// json_to_schema lives in nika_core::schema but is re-exported here
310// so existing callers (nika-engine via `crate::ast::structured::json_to_schema`) keep working.
311pub use crate::schema::{json_to_schema, json_to_schema_strict};
312
313#[cfg(test)]
314mod tests {
315    use super::*;
316    use crate::serde_yaml;
317
318    #[test]
319    fn parse_shorthand_file_path() {
320        let yaml = "structured: ./schemas/user.json";
321        let spec: StructuredOutputSpec =
322            serde_yaml::from_str(&yaml.replace("structured: ", "")).unwrap();
323        assert!(matches!(spec.schema, Some(SchemaRef::File(ref p)) if p == "./schemas/user.json"));
324    }
325
326    #[test]
327    fn parse_full_form_with_file() {
328        let yaml = r#"
329schema: ./schemas/user.json
330max_retries: 3
331enable_repair: false
332"#;
333        let spec: StructuredOutputSpec = serde_yaml::from_str(yaml).unwrap();
334        assert!(matches!(spec.schema, Some(SchemaRef::File(ref p)) if p == "./schemas/user.json"));
335        assert_eq!(spec.max_retries, Some(3));
336        assert_eq!(spec.enable_repair, Some(false));
337    }
338
339    #[test]
340    fn parse_full_form_with_inline_schema() {
341        let yaml = r#"
342schema:
343  type: object
344  properties:
345    name:
346      type: string
347  required:
348    - name
349max_retries: 2
350"#;
351        let spec: StructuredOutputSpec = serde_yaml::from_str(yaml).unwrap();
352        assert!(matches!(spec.schema, Some(SchemaRef::Inline(_))));
353        assert_eq!(spec.max_retries, Some(2));
354    }
355
356    #[test]
357    fn defaults_are_applied() {
358        let spec = StructuredOutputSpec::with_file_schema("./test.json");
359        assert_eq!(spec.max_retries_or_default(), 2);
360        assert!(spec.enable_tool_injection_or_default());
361        assert!(spec.enable_retry_or_default());
362        assert!(spec.enable_repair_or_default());
363    }
364
365    #[test]
366    fn constructors_work() {
367        let file_spec = StructuredOutputSpec::with_file_schema("./test.json");
368        assert!(matches!(file_spec.schema, Some(SchemaRef::File(_))));
369
370        let inline_spec = StructuredOutputSpec::with_inline_schema(serde_json::json!({
371            "type": "object"
372        }));
373        assert!(matches!(inline_spec.schema, Some(SchemaRef::Inline(_))));
374    }
375
376    #[test]
377    fn parse_with_repair_model() {
378        let yaml = r#"
379schema: ./test.json
380repair_model: claude-sonnet-4-6
381"#;
382        let spec: StructuredOutputSpec = serde_yaml::from_str(yaml).unwrap();
383        assert_eq!(spec.repair_model, Some("claude-sonnet-4-6".to_string()));
384    }
385
386    #[test]
387    fn parse_all_layer_toggles() {
388        let yaml = r#"
389schema: ./test.json
390enable_extractor: false
391enable_tool_injection: false
392enable_retry: true
393enable_repair: false
394"#;
395        let spec: StructuredOutputSpec = serde_yaml::from_str(yaml).unwrap();
396        assert_eq!(spec.enable_extractor, Some(false));
397        assert_eq!(spec.enable_tool_injection, Some(false));
398        assert_eq!(spec.enable_retry, Some(true));
399        assert_eq!(spec.enable_repair, Some(false));
400    }
401
402    #[test]
403    fn legacy_enable_tool_use_is_ignored() {
404        let yaml = r#"
405schema: ./test.json
406enable_tool_use: false
407"#;
408        let spec: StructuredOutputSpec = serde_yaml::from_str(yaml).unwrap();
409        // enable_tool_use is no longer recognized; only enable_tool_injection works
410        assert_eq!(spec.enable_tool_injection, None);
411    }
412
413    #[test]
414    fn serialize_to_json() {
415        let spec = StructuredOutputSpec::with_file_schema("./test.json");
416        let json = serde_json::to_string(&spec).unwrap();
417        assert!(json.contains("./test.json"));
418    }
419
420    #[test]
421    fn parse_from_example_file() {
422        let yaml = r#"
423from_example: ./structure.json
424enable_repair: true
425"#;
426        let spec: StructuredOutputSpec = serde_yaml::from_str(yaml).unwrap();
427        assert!(
428            spec.schema.is_none(),
429            "schema should be None when from_example is set"
430        );
431        assert!(spec.from_example.is_some());
432        assert!(
433            matches!(spec.from_example.as_ref().unwrap(), SchemaRef::File(ref p) if p == "./structure.json")
434        );
435        assert_eq!(spec.enable_repair, Some(true));
436    }
437
438    #[test]
439    fn parse_from_example_inline() {
440        let yaml = r#"
441from_example:
442  title: "hello"
443  count: 42
444"#;
445        let spec: StructuredOutputSpec = serde_yaml::from_str(yaml).unwrap();
446        assert!(spec.from_example.is_some());
447        assert!(matches!(
448            spec.from_example.as_ref().unwrap(),
449            SchemaRef::Inline(_)
450        ));
451    }
452
453    #[test]
454    fn parse_both_schema_and_from_example_are_preserved() {
455        // When both are set, both fields survive deserialization.
456        // load_schema() in nika-engine always checks from_example first,
457        // so schema acts as a fallback — but we don't test that here (engine tests cover it).
458        let yaml = r#"
459schema:
460  type: object
461from_example: ./structure.json
462"#;
463        let spec: StructuredOutputSpec = serde_yaml::from_str(yaml).unwrap();
464        assert!(matches!(spec.schema, Some(SchemaRef::Inline(_))));
465        assert!(spec.from_example.is_some());
466    }
467
468    // json_to_schema tests are in nika_core::schema::tests (dedicated module).
469    // This test verifies the re-export works.
470    #[test]
471    fn json_to_schema_reexport_works() {
472        let schema = json_to_schema(&serde_json::json!({"x": 1}));
473        assert_eq!(schema["type"], "object");
474    }
475}