Skip to main content

ai_lib_rust/structured/
json_mode.rs

1//! JSON mode support for structured output.
2//!
3//! Provides configuration and utilities for JSON mode responses,
4//! compatible with OpenAI and Anthropic APIs.
5
6use crate::structured::error::{ValidationError, ValidationResult};
7use crate::structured::validator::OutputValidator;
8use regex::Regex;
9
10/// JSON mode options for structured output.
11///
12/// Defines the level of JSON structure enforcement in model outputs.
13#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
14pub enum JsonMode {
15    /// Standard JSON mode - guarantees valid JSON output
16    Json,
17
18    /// JSON with schema validation - requires strict schema compliance
19    JsonSchema,
20
21    /// Disabled - no JSON mode enforcement
22    Off,
23}
24
25impl JsonMode {
26    /// Get the string representation for API requests.
27    pub fn as_str(&self) -> &'static str {
28        match self {
29            JsonMode::Json => "json_object",
30            JsonMode::JsonSchema => "json_schema",
31            JsonMode::Off => "",
32        }
33    }
34}
35
36impl std::fmt::Display for JsonMode {
37    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
38        f.write_str(self.as_str())
39    }
40}
41
42impl std::str::FromStr for JsonMode {
43    type Err = String;
44
45    fn from_str(s: &str) -> Result<Self, Self::Err> {
46        match s {
47            "json_object" => Ok(JsonMode::Json),
48            "json_schema" => Ok(JsonMode::JsonSchema),
49            "off" | "" => Ok(JsonMode::Off),
50            _ => Err(format!("Unknown JSON mode: {}", s)),
51        }
52    }
53}
54
55/// Configuration for JSON mode.
56///
57/// Defines how structured output should be formatted and validated.
58#[derive(Debug, Clone, PartialEq, Eq)]
59pub struct JsonModeConfig {
60    /// JSON mode to use
61    pub mode: JsonMode,
62
63    /// JSON schema for validation (only used for JsonSchema mode)
64    pub schema: Option<serde_json::Value>,
65
66    /// Name for the schema (used in OpenAI format)
67    pub schema_name: String,
68
69    /// Whether to enforce strict schema compliance
70    pub strict: bool,
71}
72
73impl JsonModeConfig {
74    /// Create a config for simple JSON object mode.
75    ///
76    /// Example:
77    ///
78    /// ```
79    /// use ai_lib_rust::structured::JsonModeConfig;
80    ///
81    /// let config = JsonModeConfig::json_object();
82    /// let openai_format = config.to_openai_format();
83    /// assert_eq!(openai_format["response_format"]["type"], "json_object");
84    /// ```
85    pub fn json_object() -> Self {
86        Self {
87            mode: JsonMode::Json,
88            schema: None,
89            schema_name: "response".to_string(),
90            strict: true,
91        }
92    }
93
94    /// Create a config from a JSON schema.
95    ///
96    /// # Arguments
97    ///
98    /// * `schema` - JSON schema dictionary
99    /// * `name` - Schema name (default: "response")
100    /// * `strict` - Whether to enforce strict compliance (default: true)
101    ///
102    /// Example:
103    ///
104    /// ```
105    /// use ai_lib_rust::structured::{JsonMode, JsonModeConfig};
106    /// use serde_json::json;
107    ///
108    /// let schema = json!({
109    ///     "type": "object",
110    ///     "properties": {
111    ///         "name": {"type": "string"}
112    ///     },
113    ///     "required": ["name"]
114    /// });
115    ///
116    /// let config = JsonModeConfig::from_schema(schema, "test", true);
117    /// assert_eq!(config.mode, JsonMode::JsonSchema);
118    /// ```
119    pub fn from_schema(schema: serde_json::Value, name: impl Into<String>, strict: bool) -> Self {
120        Self {
121            mode: JsonMode::JsonSchema,
122            schema: Some(schema),
123            schema_name: name.into(),
124            strict,
125        }
126    }
127
128    /// Convert to OpenAI API format.
129    ///
130    /// Returns a value suitable for the `response_format` parameter
131    /// in OpenAI's Chat Completions API.
132    ///
133    /// Example output for JSON mode:
134    /// ```json
135    /// {
136    ///   "response_format": {
137    ///     "type": "json_object"
138    ///   }
139    /// }
140    /// ```
141    ///
142    /// Example output for JSON Schema mode:
143    /// ```json
144    /// {
145    ///   "response_format": {
146    ///     "type": "json_schema",
147    ///     "json_schema": {
148    ///       "name": "response",
149    ///       "strict": true,
150    ///       "schema": { ... }
151    ///     }
152    ///   }
153    /// }
154    /// ```
155    pub fn to_openai_format(&self) -> serde_json::Value {
156        match self.mode {
157            JsonMode::Off => serde_json::json!({}),
158
159            JsonMode::Json => serde_json::json!({
160                "response_format": {
161                    "type": self.mode.as_str()
162                }
163            }),
164
165            JsonMode::JsonSchema => {
166                let schema = self
167                    .schema
168                    .as_ref()
169                    .expect("Schema required for JsonSchema mode");
170                serde_json::json!({
171                    "response_format": {
172                        "type": self.mode.as_str(),
173                        "json_schema": {
174                            "name": self.schema_name,
175                            "strict": self.strict,
176                            "schema": schema
177                        }
178                    }
179                })
180            }
181        }
182    }
183
184    /// Convert to Anthropic API format.
185    ///
186    /// Note: Anthropic doesn't have native JSON mode support.
187    /// This returns an empty placeholder, and JSON enforcement
188    /// must be done through system prompt instructions.
189    ///
190    /// Example:
191    ///
192    /// ```
193    /// use ai_lib_rust::structured::JsonModeConfig;
194    /// use serde_json::json;
195    ///
196    /// let config = JsonModeConfig::json_object();
197    /// let anthropic_format = config.to_anthropic_format();
198    /// assert_eq!(anthropic_format, json!({}));
199    /// ```
200    pub fn to_anthropic_format(&self) -> serde_json::Value {
201        // Anthropic relies on system prompt instructions
202        serde_json::json!({})
203    }
204}
205
206/// Structured output result with validation.
207///
208/// Wraps the raw response from the AI model with parsed,
209/// validated, and formatted data.
210#[derive(Debug, Clone)]
211pub struct StructuredOutput {
212    /// Raw response content as string
213    pub raw: String,
214
215    /// Parsed JSON data (None if parsing failed)
216    pub parsed: Option<serde_json::Value>,
217
218    /// Validation result (always populated)
219    pub validation_result: ValidationResult,
220}
221
222impl StructuredOutput {
223    /// Create a structured output from raw content without validation.
224    ///
225    /// # Arguments
226    ///
227    /// * `content` - Raw response content
228    ///
229    /// Returns:
230    /// A StructuredOutput instance with parsed data but without validation.
231    ///
232    /// Use `.validate()` method to add validation after creation.
233    ///
234    /// Example:
235    ///
236    /// ```
237    /// use ai_lib_rust::structured::{StructuredOutput, OutputValidator};
238    /// use serde_json::json;
239    ///
240    /// let schema = json!({"type": "object", "properties": {"name": {"type": "string"}}});
241    /// let validator = OutputValidator::lenient(schema);
242    ///
243    /// let mut output = StructuredOutput::from_response_unvalidated(
244    ///     r#"{"name": "Alice"}"#
245    /// );
246    ///
247    /// output.validate(&validator);
248    /// assert!(output.is_valid());
249    /// ```
250    pub fn from_response_unvalidated(content: impl Into<String>) -> Self {
251        let content = content.into();
252        let content_str = content.trim();
253
254        // Try to parse JSON
255        let parsed = Self::parse_json(content_str);
256
257        // Create a default validation result without validation
258        let validation_result = ValidationResult::success(
259            parsed
260                .clone()
261                .unwrap_or_else(|| serde_json::Value::String(content_str.to_string())),
262        );
263
264        Self {
265            raw: content,
266            parsed,
267            validation_result,
268        }
269    }
270
271    /// Validate this output against a schema.
272    ///
273    /// # Arguments
274    ///
275    /// * `validator` - The validator to use
276    ///
277    /// Updates the validation_result with the schema check result.
278    ///
279    /// Example:
280    ///
281    /// ```
282    /// use ai_lib_rust::structured::{StructuredOutput, OutputValidator};
283    /// use serde_json::json;
284    ///
285    /// let schema = json!({"type": "object", "properties": {"name": {"type": "string"}}});
286    /// let validator = OutputValidator::lenient(schema);
287    ///
288    /// let mut output = StructuredOutput::from_response_unvalidated(r#"{"name": "Alice"}"#);
289    /// output.validate(&validator);
290    /// ```
291    pub fn validate(&mut self, validator: &OutputValidator) {
292        if let Some(parsed) = &self.parsed {
293            self.validation_result = validator.validate(parsed);
294        }
295    }
296
297    /// Create a structured output from raw content with validation.
298    ///
299    /// # Arguments
300    ///
301    /// * `content` - Raw response content
302    /// * `validator` - Validator to check the content
303    ///
304    /// Returns:
305    /// A StructuredOutput instance with parsed and validated data.
306    ///
307    /// Example:
308    ///
309    /// ```
310    /// use ai_lib_rust::structured::{StructuredOutput, OutputValidator};
311    /// use serde_json::json;
312    ///
313    /// let schema = json!({"type": "object", "properties": {"name": {"type": "string"}}});
314    /// let validator = OutputValidator::lenient(schema);
315    ///
316    /// let output = StructuredOutput::from_response(
317    ///     r#"{"name": "Alice"}"#,
318    ///     &validator
319    /// );
320    ///
321    /// assert!(output.is_valid());
322    /// ```
323    pub fn from_response(content: impl Into<String>, validator: &OutputValidator) -> Self {
324        let mut output = Self::from_response_unvalidated(content);
325        output.validate(validator);
326        output
327    }
328
329    /// Parse JSON from text, with support for markdown code blocks.
330    ///
331    /// Extracts JSON from common formats:
332    /// - Raw JSON object
333    /// - ```json ... ``` code blocks
334    /// - ``` ... ``` code blocks
335    /// - Text containing JSON objects/arrays
336    fn parse_json(text: &str) -> Option<serde_json::Value> {
337        // Try direct parsing first
338        if let Ok(parsed) = serde_json::from_str::<serde_json::Value>(text) {
339            return Some(parsed);
340        }
341
342        // Try to extract from markdown code blocks
343        let patterns = [
344            r"```json\s*([\s\S]*?)\s*```",
345            r"```\s*([\s\S]*?)\s*```",
346            r"\{[\s\S]*\}",
347            r"\[[\s\S]*\]",
348        ];
349
350        for pattern in patterns {
351            if let Ok(re) = Regex::new(pattern) {
352                if let Some(captures) = re.captures(text) {
353                    let candidate = match captures.get(1) {
354                        Some(inner) => inner.as_str(),
355                        None => captures.get(0).map(|c| c.as_str()).unwrap_or(text),
356                    };
357
358                    if let Ok(parsed) = serde_json::from_str::<serde_json::Value>(candidate.trim())
359                    {
360                        return Some(parsed);
361                    }
362                }
363            }
364        }
365
366        None
367    }
368
369    /// Check if the output is valid.
370    ///
371    /// Returns true if:
372    /// 1. JSON parsing succeeded
373    /// 2. Validation passed (if validation was performed)
374    pub fn is_valid(&self) -> bool {
375        self.validation_result.is_valid()
376    }
377
378    /// Get the best available data representation.
379    ///
380    /// Priority:
381    /// 1. Validated data (if validation passed)
382    /// 2. Parsed data (if available)
383    /// 3. Raw content as string
384    pub fn data(&self) -> serde_json::Value {
385        if let Some(data) = self.validation_result.data() {
386            return data.clone();
387        }
388        if let Some(parsed) = &self.parsed {
389            return parsed.clone();
390        }
391        serde_json::Value::String(self.raw.clone())
392    }
393
394    /// Get the raw response content.
395    pub fn raw(&self) -> &str {
396        &self.raw
397    }
398
399    /// Get the parsed JSON data (if parsing succeeded).
400    pub fn parsed(&self) -> Option<&serde_json::Value> {
401        self.parsed.as_ref()
402    }
403
404    /// Get the validation result.
405    pub fn validation_result(&self) -> &ValidationResult {
406        &self.validation_result
407    }
408
409    /// Get validation errors if validation failed.
410    pub fn errors(&self) -> Vec<ValidationError> {
411        if self.validation_result.is_valid() {
412            Vec::new()
413        } else {
414            self.validation_result.errors.clone()
415        }
416    }
417
418    /// Get error messages as strings.
419    pub fn error_messages(&self) -> Vec<String> {
420        self.validation_result.error_messages()
421    }
422}
423
424#[cfg(test)]
425mod tests {
426    use super::*;
427
428    #[test]
429    fn test_json_mode_config_json_object() {
430        let config = JsonModeConfig::json_object();
431
432        assert_eq!(config.mode, JsonMode::Json);
433        assert!(config.schema.is_none());
434        assert_eq!(config.schema_name, "response");
435        assert!(config.strict);
436    }
437
438    #[test]
439    fn test_json_mode_config_from_schema() {
440        let schema = serde_json::json!({
441            "type": "object",
442            "properties": {
443                "name": {"type": "string"}
444            }
445        });
446
447        let config = JsonModeConfig::from_schema(schema, "User", true);
448
449        assert_eq!(config.mode, JsonMode::JsonSchema);
450        assert!(config.schema.is_some());
451        assert_eq!(config.schema_name, "User");
452        assert!(config.strict);
453    }
454
455    #[test]
456    fn test_json_mode_config_to_openai_format_json() {
457        let config = JsonModeConfig::json_object();
458        let openai = config.to_openai_format();
459
460        assert_eq!(openai["response_format"]["type"], "json_object");
461    }
462
463    #[test]
464    fn test_json_mode_config_to_openai_format_json_schema() {
465        let schema = serde_json::json!({
466            "type": "string"
467        });
468
469        let config = JsonModeConfig::from_schema(schema.clone(), "test", false);
470        let openai = config.to_openai_format();
471
472        assert_eq!(openai["response_format"]["type"], "json_schema");
473        assert_eq!(openai["response_format"]["json_schema"]["name"], "test");
474        assert_eq!(openai["response_format"]["json_schema"]["strict"], false);
475        assert_eq!(openai["response_format"]["json_schema"]["schema"], schema);
476    }
477
478    #[test]
479    fn test_json_mode_config_to_openai_format_off() {
480        let config = JsonModeConfig {
481            mode: JsonMode::Off,
482            schema: None,
483            schema_name: "test".to_string(),
484            strict: false,
485        };
486        let openai = config.to_openai_format();
487
488        // Should be empty object
489        assert_eq!(openai, serde_json::json!({}));
490    }
491
492    #[test]
493    fn test_json_mode_config_to_anthropic_format() {
494        let config = JsonModeConfig::json_object();
495        let anthropic = config.to_anthropic_format();
496
497        // Anthropic format is empty (relies on system prompt)
498        assert_eq!(anthropic, serde_json::json!({}));
499    }
500
501    #[test]
502    fn test_structured_output_valid_json() {
503        let schema = serde_json::json!({
504            "type": "object",
505            "properties": {
506                "result": {"type": "string"}
507            }
508        });
509        let validator = OutputValidator::lenient(schema);
510
511        let output = StructuredOutput::from_response(r#"{"result": "success"}"#, &validator);
512
513        assert!(output.is_valid());
514        assert!(output.parsed().is_some());
515    }
516
517    #[test]
518    fn test_structured_output_invalid_json() {
519        let output = StructuredOutput::from_response_unvalidated("not json");
520
521        assert!(output.is_valid());
522        assert!(output.parsed().is_none());
523    }
524
525    #[test]
526    fn test_structured_output_parsed_json() {
527        let output = StructuredOutput::from_response_unvalidated(r#"{"valid": true}"#);
528
529        assert_eq!(output.parsed().unwrap()["valid"], true);
530    }
531
532    #[test]
533    fn test_structured_output_json_from_markdown() {
534        let output = StructuredOutput::from_response_unvalidated(
535            r#"Here is the JSON:
536            ```json
537            {"result": "success"}
538            ```"#,
539        );
540
541        assert_eq!(output.parsed().unwrap()["result"], "success");
542    }
543
544    #[test]
545    fn test_structured_output_data_priority() {
546        let schema = serde_json::json!({
547            "type": "object",
548            "properties": {
549                "value": {"type": "string"}
550            }
551        });
552        let validator = OutputValidator::lenient(schema);
553
554        let mut output = StructuredOutput::from_response_unvalidated(r#"{"value": "test"}"#);
555        output.validate(&validator);
556
557        // validation passes, so should return validated data
558        let data = output.data();
559        assert_eq!(*output.validation_result.data().unwrap(), data);
560    }
561
562    #[test]
563    fn test_structured_output_validate_method() {
564        let schema = serde_json::json!({"type": "integer"});
565        let validator = OutputValidator::lenient(schema);
566
567        let mut output = StructuredOutput::from_response_unvalidated(r#"{"value": "test"}"#);
568        output.validate(&validator);
569
570        assert!(!output.is_valid());
571        assert!(!output.errors().is_empty());
572    }
573
574    #[test]
575    fn test_structured_output_errors() {
576        let schema = serde_json::json!({"type": "integer"});
577        let validator = OutputValidator::lenient(schema);
578
579        let mut output = StructuredOutput::from_response_unvalidated(r#"{"value": "not integer"}"#);
580        output.validate(&validator);
581
582        assert!(!output.is_valid());
583        assert!(!output.errors().is_empty());
584    }
585}