Skip to main content

recoco_core/base/
json_schema.rs

1// ReCoco is a Rust-only fork of CocoIndex, by [CocoIndex](https://CocoIndex)
2// Original code from CocoIndex is copyrighted by CocoIndex
3// SPDX-FileCopyrightText: 2025-2026 CocoIndex (upstream)
4// SPDX-FileContributor: CocoIndex Contributors
5//
6// All modifications from the upstream for ReCoco are copyrighted by Knitli Inc.
7// SPDX-FileCopyrightText: 2026 Knitli Inc. (ReCoco)
8// SPDX-FileContributor: Adam Poulemanos <adam@knit.li>
9//
10// Both the upstream CocoIndex code and the ReCoco modifications are licensed under the Apache-2.0 License.
11// SPDX-License-Identifier: Apache-2.0
12
13use crate::prelude::*;
14
15use schemars::Schema;
16use serde_json::{Map, Value as JsonValue};
17use std::fmt::Write;
18use utils::immutable::RefList;
19
20pub struct ToJsonSchemaOptions {
21    /// If true, mark all fields as required.
22    /// Use union type (with `null`) for optional fields instead.
23    /// Models like OpenAI will reject the schema if a field is not required.
24    pub fields_always_required: bool,
25
26    /// If true, the JSON schema supports the `format` keyword.
27    pub supports_format: bool,
28
29    /// If true, extract descriptions to a separate extra instruction.
30    pub extract_descriptions: bool,
31
32    /// If true, the top level must be a JSON object.
33    pub top_level_must_be_object: bool,
34
35    /// If true, include `additionalProperties: false` in object schemas.
36    /// Some LLM APIs (e.g., Gemini) do not support this constraint and will error.
37    pub supports_additional_properties: bool,
38}
39
40struct JsonSchemaBuilder {
41    options: ToJsonSchemaOptions,
42    extra_instructions_per_field: IndexMap<String, String>,
43}
44
45impl JsonSchemaBuilder {
46    fn new(options: ToJsonSchemaOptions) -> Self {
47        Self {
48            options,
49            extra_instructions_per_field: IndexMap::new(),
50        }
51    }
52
53    fn add_description(
54        &mut self,
55        schema: &mut Schema,
56        description: &str,
57        field_path: RefList<'_, &'_ spec::FieldName>,
58    ) {
59        if self.options.extract_descriptions {
60            let mut fields: Vec<_> = field_path.iter().map(|f| f.as_str()).collect();
61            fields.reverse();
62            let field_path_str = fields.join(".");
63
64            let mut_description = self
65                .extra_instructions_per_field
66                .entry(field_path_str)
67                .or_default();
68            if !mut_description.is_empty() {
69                mut_description.push_str("\n\n");
70            }
71            mut_description.push_str(description);
72        } else {
73            let obj = schema.ensure_object();
74            let existing = obj
75                .get("description")
76                .and_then(|v| v.as_str())
77                .map(|s| s.to_owned());
78            let new_description = match existing {
79                Some(existing) if !existing.is_empty() => format!("{existing}\n\n{description}"),
80                _ => description.to_owned(),
81            };
82            obj.insert("description".to_owned(), JsonValue::String(new_description));
83        }
84    }
85
86    fn for_basic_value_type(
87        &mut self,
88        mut schema: Schema,
89        basic_type: &schema::BasicValueType,
90        field_path: RefList<'_, &'_ spec::FieldName>,
91    ) -> Schema {
92        match basic_type {
93            schema::BasicValueType::Str => {
94                schema
95                    .ensure_object()
96                    .insert("type".to_owned(), JsonValue::String("string".to_owned()));
97            }
98            schema::BasicValueType::Bytes => {
99                schema
100                    .ensure_object()
101                    .insert("type".to_owned(), JsonValue::String("string".to_owned()));
102            }
103            schema::BasicValueType::Bool => {
104                schema
105                    .ensure_object()
106                    .insert("type".to_owned(), JsonValue::String("boolean".to_owned()));
107            }
108            schema::BasicValueType::Int64 => {
109                schema
110                    .ensure_object()
111                    .insert("type".to_owned(), JsonValue::String("integer".to_owned()));
112            }
113            schema::BasicValueType::Float32 | schema::BasicValueType::Float64 => {
114                schema
115                    .ensure_object()
116                    .insert("type".to_owned(), JsonValue::String("number".to_owned()));
117            }
118            schema::BasicValueType::Range => {
119                let obj = schema.ensure_object();
120                obj.insert("type".to_owned(), JsonValue::String("array".to_owned()));
121                obj.insert("items".to_owned(), serde_json::json!({"type": "integer"}));
122                obj.insert("minItems".to_owned(), JsonValue::Number(2.into()));
123                obj.insert("maxItems".to_owned(), JsonValue::Number(2.into()));
124                self.add_description(
125                    &mut schema,
126                    "A range represented by a list of two positions, start pos (inclusive), end pos (exclusive).",
127                    field_path,
128                );
129            }
130            schema::BasicValueType::Uuid => {
131                let obj = schema.ensure_object();
132                obj.insert("type".to_owned(), JsonValue::String("string".to_owned()));
133                if self.options.supports_format {
134                    obj.insert("format".to_owned(), JsonValue::String("uuid".to_owned()));
135                }
136                self.add_description(
137                    &mut schema,
138                    "A UUID, e.g. 123e4567-e89b-12d3-a456-426614174000",
139                    field_path,
140                );
141            }
142            schema::BasicValueType::Date => {
143                let obj = schema.ensure_object();
144                obj.insert("type".to_owned(), JsonValue::String("string".to_owned()));
145                if self.options.supports_format {
146                    obj.insert("format".to_owned(), JsonValue::String("date".to_owned()));
147                }
148                self.add_description(
149                    &mut schema,
150                    "A date in YYYY-MM-DD format, e.g. 2025-03-27",
151                    field_path,
152                );
153            }
154            schema::BasicValueType::Time => {
155                let obj = schema.ensure_object();
156                obj.insert("type".to_owned(), JsonValue::String("string".to_owned()));
157                if self.options.supports_format {
158                    obj.insert("format".to_owned(), JsonValue::String("time".to_owned()));
159                }
160                self.add_description(
161                    &mut schema,
162                    "A time in HH:MM:SS format, e.g. 13:32:12",
163                    field_path,
164                );
165            }
166            schema::BasicValueType::LocalDateTime => {
167                let obj = schema.ensure_object();
168                obj.insert("type".to_owned(), JsonValue::String("string".to_owned()));
169                if self.options.supports_format {
170                    obj.insert(
171                        "format".to_owned(),
172                        JsonValue::String("date-time".to_owned()),
173                    );
174                }
175                self.add_description(
176                    &mut schema,
177                    "Date time without timezone offset in YYYY-MM-DDTHH:MM:SS format, e.g. 2025-03-27T13:32:12",
178                    field_path,
179                );
180            }
181            schema::BasicValueType::OffsetDateTime => {
182                let obj = schema.ensure_object();
183                obj.insert("type".to_owned(), JsonValue::String("string".to_owned()));
184                if self.options.supports_format {
185                    obj.insert(
186                        "format".to_owned(),
187                        JsonValue::String("date-time".to_owned()),
188                    );
189                }
190                self.add_description(
191                    &mut schema,
192                    "Date time with timezone offset in RFC3339, e.g. 2025-03-27T13:32:12Z, 2025-03-27T07:32:12.313-06:00",
193                    field_path,
194                );
195            }
196            &schema::BasicValueType::TimeDelta => {
197                let obj = schema.ensure_object();
198                obj.insert("type".to_owned(), JsonValue::String("string".to_owned()));
199                if self.options.supports_format {
200                    obj.insert(
201                        "format".to_owned(),
202                        JsonValue::String("duration".to_owned()),
203                    );
204                }
205                self.add_description(
206                    &mut schema,
207                    "A duration, e.g. 'PT1H2M3S' (ISO 8601) or '1 day 2 hours 3 seconds'",
208                    field_path,
209                );
210            }
211            schema::BasicValueType::Json => {
212                // Can be any value. No type constraint.
213            }
214            schema::BasicValueType::Vector(s) => {
215                let items_schema =
216                    self.for_basic_value_type(Schema::default(), &s.element_type, field_path);
217                let obj = schema.ensure_object();
218                obj.insert("type".to_owned(), JsonValue::String("array".to_owned()));
219                obj.insert(
220                    "items".to_owned(),
221                    serde_json::to_value(&items_schema).unwrap_or(JsonValue::Object(Map::new())),
222                );
223                if let Some(d) = s.dimension
224                    && let Ok(d) = u32::try_from(d)
225                {
226                    obj.insert("minItems".to_owned(), JsonValue::Number(d.into()));
227                    obj.insert("maxItems".to_owned(), JsonValue::Number(d.into()));
228                }
229            }
230            schema::BasicValueType::Union(s) => {
231                let one_of: Vec<JsonValue> = s
232                    .types
233                    .iter()
234                    .map(|t| {
235                        let inner_schema =
236                            self.for_basic_value_type(Schema::default(), t, field_path);
237                        serde_json::to_value(&inner_schema).unwrap_or(JsonValue::Object(Map::new()))
238                    })
239                    .collect();
240                schema
241                    .ensure_object()
242                    .insert("oneOf".to_owned(), JsonValue::Array(one_of));
243            }
244        }
245        schema
246    }
247
248    fn for_struct_schema(
249        &mut self,
250        mut schema: Schema,
251        struct_schema: &schema::StructSchema,
252        field_path: RefList<'_, &'_ spec::FieldName>,
253    ) -> Schema {
254        if let Some(description) = &struct_schema.description {
255            self.add_description(&mut schema, description, field_path);
256        }
257
258        let mut properties = Map::new();
259        let mut required: Vec<String> = Vec::new();
260
261        for f in struct_schema.fields.iter() {
262            let mut field_schema = Schema::default();
263            // Set field description if available
264            if let Some(description) = &f.description {
265                self.add_description(&mut field_schema, description, field_path.prepend(&f.name));
266            }
267            let mut field_schema = self.for_enriched_value_type(
268                field_schema,
269                &f.value_type,
270                field_path.prepend(&f.name),
271            );
272
273            if self.options.fields_always_required && f.value_type.nullable {
274                // Add "null" to the type array
275                let obj = field_schema.ensure_object();
276                if let Some(type_val) = obj.get("type").cloned() {
277                    let types = match type_val {
278                        JsonValue::String(s) => JsonValue::Array(vec![
279                            JsonValue::String(s),
280                            JsonValue::String("null".to_owned()),
281                        ]),
282                        JsonValue::Array(mut arr) => {
283                            arr.push(JsonValue::String("null".to_owned()));
284                            JsonValue::Array(arr)
285                        }
286                        _ => type_val,
287                    };
288                    obj.insert("type".to_owned(), types);
289                }
290            }
291
292            let field_json =
293                serde_json::to_value(&field_schema).unwrap_or(JsonValue::Object(Map::new()));
294            properties.insert(f.name.to_string(), field_json);
295
296            if self.options.fields_always_required || !f.value_type.nullable {
297                required.push(f.name.to_string());
298            }
299        }
300
301        let obj = schema.ensure_object();
302        obj.insert("type".to_owned(), JsonValue::String("object".to_owned()));
303        obj.insert("properties".to_owned(), JsonValue::Object(properties));
304        obj.insert(
305            "required".to_owned(),
306            JsonValue::Array(required.into_iter().map(JsonValue::String).collect()),
307        );
308        if self.options.supports_additional_properties {
309            obj.insert("additionalProperties".to_owned(), JsonValue::Bool(false));
310        }
311
312        schema
313    }
314
315    fn for_value_type(
316        &mut self,
317        mut schema: Schema,
318        value_type: &schema::ValueType,
319        field_path: RefList<'_, &'_ spec::FieldName>,
320    ) -> Schema {
321        match value_type {
322            schema::ValueType::Basic(b) => self.for_basic_value_type(schema, b, field_path),
323            schema::ValueType::Struct(s) => self.for_struct_schema(schema, s, field_path),
324            schema::ValueType::Table(c) => {
325                let items_schema = self.for_struct_schema(Schema::default(), &c.row, field_path);
326                let obj = schema.ensure_object();
327                obj.insert("type".to_owned(), JsonValue::String("array".to_owned()));
328                obj.insert(
329                    "items".to_owned(),
330                    serde_json::to_value(&items_schema).unwrap_or(JsonValue::Object(Map::new())),
331                );
332                schema
333            }
334        }
335    }
336
337    fn for_enriched_value_type(
338        &mut self,
339        schema: Schema,
340        enriched_value_type: &schema::EnrichedValueType,
341        field_path: RefList<'_, &'_ spec::FieldName>,
342    ) -> Schema {
343        self.for_value_type(schema, &enriched_value_type.typ, field_path)
344    }
345
346    fn build_extra_instructions(&self) -> Result<Option<String>> {
347        if self.extra_instructions_per_field.is_empty() {
348            return Ok(None);
349        }
350
351        let mut instructions = String::new();
352        write!(&mut instructions, "Instructions for specific fields:\n\n")?;
353        for (field_path, instruction) in self.extra_instructions_per_field.iter() {
354            write!(
355                &mut instructions,
356                "- {}: {}\n\n",
357                if field_path.is_empty() {
358                    "(root object)"
359                } else {
360                    field_path.as_str()
361                },
362                instruction
363            )?;
364        }
365        Ok(Some(instructions))
366    }
367}
368
369pub struct ValueExtractor {
370    value_type: schema::ValueType,
371    object_wrapper_field_name: Option<String>,
372}
373
374impl ValueExtractor {
375    pub fn extract_value(&self, json_value: serde_json::Value) -> Result<value::Value> {
376        let unwrapped_json_value =
377            if let Some(object_wrapper_field_name) = &self.object_wrapper_field_name {
378                match json_value {
379                    serde_json::Value::Object(mut o) => o
380                        .remove(object_wrapper_field_name)
381                        .unwrap_or(serde_json::Value::Null),
382                    _ => {
383                        client_bail!("Field `{}` not found", object_wrapper_field_name)
384                    }
385                }
386            } else {
387                json_value
388            };
389        let result = value::Value::from_json(unwrapped_json_value, &self.value_type)?;
390        Ok(result)
391    }
392}
393
394pub struct BuildJsonSchemaOutput {
395    pub schema: Schema,
396    pub extra_instructions: Option<String>,
397    pub value_extractor: ValueExtractor,
398}
399
400pub fn build_json_schema(
401    value_type: schema::EnrichedValueType,
402    options: ToJsonSchemaOptions,
403) -> Result<BuildJsonSchemaOutput> {
404    let mut builder = JsonSchemaBuilder::new(options);
405    let (schema, object_wrapper_field_name) = if builder.options.top_level_must_be_object
406        && !matches!(value_type.typ, schema::ValueType::Struct(_))
407    {
408        let object_wrapper_field_name = "value".to_string();
409        let wrapper_struct = schema::StructSchema {
410            fields: Arc::new(vec![schema::FieldSchema {
411                name: object_wrapper_field_name.clone(),
412                value_type: value_type.clone(),
413                description: None,
414            }]),
415            description: None,
416        };
417        (
418            builder.for_struct_schema(Schema::default(), &wrapper_struct, RefList::Nil),
419            Some(object_wrapper_field_name),
420        )
421    } else {
422        (
423            builder.for_enriched_value_type(Schema::default(), &value_type, RefList::Nil),
424            None,
425        )
426    };
427    Ok(BuildJsonSchemaOutput {
428        schema,
429        extra_instructions: builder.build_extra_instructions()?,
430        value_extractor: ValueExtractor {
431            value_type: value_type.typ,
432            object_wrapper_field_name,
433        },
434    })
435}
436
437#[cfg(test)]
438mod tests {
439    use super::*;
440    use crate::base::schema::*;
441    use expect_test::expect;
442    use serde_json::json;
443    use std::sync::Arc;
444
445    fn create_test_options() -> ToJsonSchemaOptions {
446        ToJsonSchemaOptions {
447            fields_always_required: false,
448            supports_format: true,
449            extract_descriptions: false,
450            top_level_must_be_object: false,
451            supports_additional_properties: true,
452        }
453    }
454
455    fn create_test_options_with_extracted_descriptions() -> ToJsonSchemaOptions {
456        ToJsonSchemaOptions {
457            fields_always_required: false,
458            supports_format: true,
459            extract_descriptions: true,
460            top_level_must_be_object: false,
461            supports_additional_properties: true,
462        }
463    }
464
465    fn create_test_options_always_required() -> ToJsonSchemaOptions {
466        ToJsonSchemaOptions {
467            fields_always_required: true,
468            supports_format: true,
469            extract_descriptions: false,
470            top_level_must_be_object: false,
471            supports_additional_properties: true,
472        }
473    }
474
475    fn create_test_options_top_level_object() -> ToJsonSchemaOptions {
476        ToJsonSchemaOptions {
477            fields_always_required: false,
478            supports_format: true,
479            extract_descriptions: false,
480            top_level_must_be_object: true,
481            supports_additional_properties: true,
482        }
483    }
484
485    fn schema_to_json(schema: &Schema) -> serde_json::Value {
486        serde_json::to_value(schema).unwrap()
487    }
488
489    #[test]
490    fn test_basic_types_str() {
491        let value_type = EnrichedValueType {
492            typ: ValueType::Basic(BasicValueType::Str),
493            nullable: false,
494            attrs: Arc::new(BTreeMap::new()),
495        };
496        let options = create_test_options();
497        let result = build_json_schema(value_type, options).unwrap();
498        let json_schema = schema_to_json(&result.schema);
499
500        expect![[r#"
501            {
502              "type": "string"
503            }"#]]
504        .assert_eq(&serde_json::to_string_pretty(&json_schema).unwrap());
505    }
506
507    #[test]
508    fn test_basic_types_bool() {
509        let value_type = EnrichedValueType {
510            typ: ValueType::Basic(BasicValueType::Bool),
511            nullable: false,
512            attrs: Arc::new(BTreeMap::new()),
513        };
514        let options = create_test_options();
515        let result = build_json_schema(value_type, options).unwrap();
516        let json_schema = schema_to_json(&result.schema);
517
518        expect![[r#"
519            {
520              "type": "boolean"
521            }"#]]
522        .assert_eq(&serde_json::to_string_pretty(&json_schema).unwrap());
523    }
524
525    #[test]
526    fn test_basic_types_int64() {
527        let value_type = EnrichedValueType {
528            typ: ValueType::Basic(BasicValueType::Int64),
529            nullable: false,
530            attrs: Arc::new(BTreeMap::new()),
531        };
532        let options = create_test_options();
533        let result = build_json_schema(value_type, options).unwrap();
534        let json_schema = schema_to_json(&result.schema);
535
536        expect![[r#"
537            {
538              "type": "integer"
539            }"#]]
540        .assert_eq(&serde_json::to_string_pretty(&json_schema).unwrap());
541    }
542
543    #[test]
544    fn test_basic_types_float32() {
545        let value_type = EnrichedValueType {
546            typ: ValueType::Basic(BasicValueType::Float32),
547            nullable: false,
548            attrs: Arc::new(BTreeMap::new()),
549        };
550        let options = create_test_options();
551        let result = build_json_schema(value_type, options).unwrap();
552        let json_schema = schema_to_json(&result.schema);
553
554        expect![[r#"
555            {
556              "type": "number"
557            }"#]]
558        .assert_eq(&serde_json::to_string_pretty(&json_schema).unwrap());
559    }
560
561    #[test]
562    fn test_basic_types_float64() {
563        let value_type = EnrichedValueType {
564            typ: ValueType::Basic(BasicValueType::Float64),
565            nullable: false,
566            attrs: Arc::new(BTreeMap::new()),
567        };
568        let options = create_test_options();
569        let result = build_json_schema(value_type, options).unwrap();
570        let json_schema = schema_to_json(&result.schema);
571
572        expect![[r#"
573            {
574              "type": "number"
575            }"#]]
576        .assert_eq(&serde_json::to_string_pretty(&json_schema).unwrap());
577    }
578
579    #[test]
580    fn test_basic_types_bytes() {
581        let value_type = EnrichedValueType {
582            typ: ValueType::Basic(BasicValueType::Bytes),
583            nullable: false,
584            attrs: Arc::new(BTreeMap::new()),
585        };
586        let options = create_test_options();
587        let result = build_json_schema(value_type, options).unwrap();
588        let json_schema = schema_to_json(&result.schema);
589
590        expect![[r#"
591            {
592              "type": "string"
593            }"#]]
594        .assert_eq(&serde_json::to_string_pretty(&json_schema).unwrap());
595    }
596
597    #[test]
598    fn test_basic_types_range() {
599        let value_type = EnrichedValueType {
600            typ: ValueType::Basic(BasicValueType::Range),
601            nullable: false,
602            attrs: Arc::new(BTreeMap::new()),
603        };
604        let options = create_test_options();
605        let result = build_json_schema(value_type, options).unwrap();
606        let json_schema = schema_to_json(&result.schema);
607
608        expect![[r#"
609            {
610              "description": "A range represented by a list of two positions, start pos (inclusive), end pos (exclusive).",
611              "items": {
612                "type": "integer"
613              },
614              "maxItems": 2,
615              "minItems": 2,
616              "type": "array"
617            }"#]].assert_eq(&serde_json::to_string_pretty(&json_schema).unwrap());
618    }
619
620    #[test]
621    fn test_basic_types_uuid() {
622        let value_type = EnrichedValueType {
623            typ: ValueType::Basic(BasicValueType::Uuid),
624            nullable: false,
625            attrs: Arc::new(BTreeMap::new()),
626        };
627        let options = create_test_options();
628        let result = build_json_schema(value_type, options).unwrap();
629        let json_schema = schema_to_json(&result.schema);
630
631        expect![[r#"
632            {
633              "description": "A UUID, e.g. 123e4567-e89b-12d3-a456-426614174000",
634              "format": "uuid",
635              "type": "string"
636            }"#]]
637        .assert_eq(&serde_json::to_string_pretty(&json_schema).unwrap());
638    }
639
640    #[test]
641    fn test_basic_types_date() {
642        let value_type = EnrichedValueType {
643            typ: ValueType::Basic(BasicValueType::Date),
644            nullable: false,
645            attrs: Arc::new(BTreeMap::new()),
646        };
647        let options = create_test_options();
648        let result = build_json_schema(value_type, options).unwrap();
649        let json_schema = schema_to_json(&result.schema);
650
651        expect![[r#"
652            {
653              "description": "A date in YYYY-MM-DD format, e.g. 2025-03-27",
654              "format": "date",
655              "type": "string"
656            }"#]]
657        .assert_eq(&serde_json::to_string_pretty(&json_schema).unwrap());
658    }
659
660    #[test]
661    fn test_basic_types_time() {
662        let value_type = EnrichedValueType {
663            typ: ValueType::Basic(BasicValueType::Time),
664            nullable: false,
665            attrs: Arc::new(BTreeMap::new()),
666        };
667        let options = create_test_options();
668        let result = build_json_schema(value_type, options).unwrap();
669        let json_schema = schema_to_json(&result.schema);
670
671        expect![[r#"
672            {
673              "description": "A time in HH:MM:SS format, e.g. 13:32:12",
674              "format": "time",
675              "type": "string"
676            }"#]]
677        .assert_eq(&serde_json::to_string_pretty(&json_schema).unwrap());
678    }
679
680    #[test]
681    fn test_basic_types_local_date_time() {
682        let value_type = EnrichedValueType {
683            typ: ValueType::Basic(BasicValueType::LocalDateTime),
684            nullable: false,
685            attrs: Arc::new(BTreeMap::new()),
686        };
687        let options = create_test_options();
688        let result = build_json_schema(value_type, options).unwrap();
689        let json_schema = schema_to_json(&result.schema);
690
691        expect![[r#"
692            {
693              "description": "Date time without timezone offset in YYYY-MM-DDTHH:MM:SS format, e.g. 2025-03-27T13:32:12",
694              "format": "date-time",
695              "type": "string"
696            }"#]].assert_eq(&serde_json::to_string_pretty(&json_schema).unwrap());
697    }
698
699    #[test]
700    fn test_basic_types_offset_date_time() {
701        let value_type = EnrichedValueType {
702            typ: ValueType::Basic(BasicValueType::OffsetDateTime),
703            nullable: false,
704            attrs: Arc::new(BTreeMap::new()),
705        };
706        let options = create_test_options();
707        let result = build_json_schema(value_type, options).unwrap();
708        let json_schema = schema_to_json(&result.schema);
709
710        expect![[r#"
711            {
712              "description": "Date time with timezone offset in RFC3339, e.g. 2025-03-27T13:32:12Z, 2025-03-27T07:32:12.313-06:00",
713              "format": "date-time",
714              "type": "string"
715            }"#]].assert_eq(&serde_json::to_string_pretty(&json_schema).unwrap());
716    }
717
718    #[test]
719    fn test_basic_types_time_delta() {
720        let value_type = EnrichedValueType {
721            typ: ValueType::Basic(BasicValueType::TimeDelta),
722            nullable: false,
723            attrs: Arc::new(BTreeMap::new()),
724        };
725        let options = create_test_options();
726        let result = build_json_schema(value_type, options).unwrap();
727        let json_schema = schema_to_json(&result.schema);
728
729        expect![[r#"
730            {
731              "description": "A duration, e.g. 'PT1H2M3S' (ISO 8601) or '1 day 2 hours 3 seconds'",
732              "format": "duration",
733              "type": "string"
734            }"#]]
735        .assert_eq(&serde_json::to_string_pretty(&json_schema).unwrap());
736    }
737
738    #[test]
739    fn test_basic_types_json() {
740        let value_type = EnrichedValueType {
741            typ: ValueType::Basic(BasicValueType::Json),
742            nullable: false,
743            attrs: Arc::new(BTreeMap::new()),
744        };
745        let options = create_test_options();
746        let result = build_json_schema(value_type, options).unwrap();
747        let json_schema = schema_to_json(&result.schema);
748
749        expect!["{}"].assert_eq(&serde_json::to_string_pretty(&json_schema).unwrap());
750    }
751
752    #[test]
753    fn test_basic_types_vector() {
754        let value_type = EnrichedValueType {
755            typ: ValueType::Basic(BasicValueType::Vector(VectorTypeSchema {
756                element_type: Box::new(BasicValueType::Str),
757                dimension: Some(3),
758            })),
759            nullable: false,
760            attrs: Arc::new(BTreeMap::new()),
761        };
762        let options = create_test_options();
763        let result = build_json_schema(value_type, options).unwrap();
764        let json_schema = schema_to_json(&result.schema);
765
766        expect![[r#"
767            {
768              "items": {
769                "type": "string"
770              },
771              "maxItems": 3,
772              "minItems": 3,
773              "type": "array"
774            }"#]]
775        .assert_eq(&serde_json::to_string_pretty(&json_schema).unwrap());
776    }
777
778    #[test]
779    fn test_basic_types_union() {
780        let value_type = EnrichedValueType {
781            typ: ValueType::Basic(BasicValueType::Union(UnionTypeSchema {
782                types: vec![BasicValueType::Str, BasicValueType::Int64],
783            })),
784            nullable: false,
785            attrs: Arc::new(BTreeMap::new()),
786        };
787        let options = create_test_options();
788        let result = build_json_schema(value_type, options).unwrap();
789        let json_schema = schema_to_json(&result.schema);
790
791        expect![[r#"
792            {
793              "oneOf": [
794                {
795                  "type": "string"
796                },
797                {
798                  "type": "integer"
799                }
800              ]
801            }"#]]
802        .assert_eq(&serde_json::to_string_pretty(&json_schema).unwrap());
803    }
804
805    #[test]
806    fn test_nullable_basic_type() {
807        let value_type = EnrichedValueType {
808            typ: ValueType::Basic(BasicValueType::Str),
809            nullable: true,
810            attrs: Arc::new(BTreeMap::new()),
811        };
812        let options = create_test_options();
813        let result = build_json_schema(value_type, options).unwrap();
814        let json_schema = schema_to_json(&result.schema);
815
816        expect![[r#"
817            {
818              "type": "string"
819            }"#]]
820        .assert_eq(&serde_json::to_string_pretty(&json_schema).unwrap());
821    }
822
823    #[test]
824    fn test_struct_type_simple() {
825        let value_type = EnrichedValueType {
826            typ: ValueType::Struct(StructSchema {
827                fields: Arc::new(vec![
828                    FieldSchema::new(
829                        "name",
830                        EnrichedValueType {
831                            typ: ValueType::Basic(BasicValueType::Str),
832                            nullable: false,
833                            attrs: Arc::new(BTreeMap::new()),
834                        },
835                    ),
836                    FieldSchema::new(
837                        "age",
838                        EnrichedValueType {
839                            typ: ValueType::Basic(BasicValueType::Int64),
840                            nullable: false,
841                            attrs: Arc::new(BTreeMap::new()),
842                        },
843                    ),
844                ]),
845                description: None,
846            }),
847            nullable: false,
848            attrs: Arc::new(BTreeMap::new()),
849        };
850        let options = create_test_options();
851        let result = build_json_schema(value_type, options).unwrap();
852        let json_schema = schema_to_json(&result.schema);
853
854        expect![[r#"
855            {
856              "additionalProperties": false,
857              "properties": {
858                "age": {
859                  "type": "integer"
860                },
861                "name": {
862                  "type": "string"
863                }
864              },
865              "required": [
866                "name",
867                "age"
868              ],
869              "type": "object"
870            }"#]]
871        .assert_eq(&serde_json::to_string_pretty(&json_schema).unwrap());
872    }
873
874    #[test]
875    fn test_struct_type_with_optional_field() {
876        let value_type = EnrichedValueType {
877            typ: ValueType::Struct(StructSchema {
878                fields: Arc::new(vec![
879                    FieldSchema::new(
880                        "name",
881                        EnrichedValueType {
882                            typ: ValueType::Basic(BasicValueType::Str),
883                            nullable: false,
884                            attrs: Arc::new(BTreeMap::new()),
885                        },
886                    ),
887                    FieldSchema::new(
888                        "age",
889                        EnrichedValueType {
890                            typ: ValueType::Basic(BasicValueType::Int64),
891                            nullable: true,
892                            attrs: Arc::new(BTreeMap::new()),
893                        },
894                    ),
895                ]),
896                description: None,
897            }),
898            nullable: false,
899            attrs: Arc::new(BTreeMap::new()),
900        };
901        let options = create_test_options();
902        let result = build_json_schema(value_type, options).unwrap();
903        let json_schema = schema_to_json(&result.schema);
904
905        expect![[r#"
906            {
907              "additionalProperties": false,
908              "properties": {
909                "age": {
910                  "type": "integer"
911                },
912                "name": {
913                  "type": "string"
914                }
915              },
916              "required": [
917                "name"
918              ],
919              "type": "object"
920            }"#]]
921        .assert_eq(&serde_json::to_string_pretty(&json_schema).unwrap());
922    }
923
924    #[test]
925    fn test_struct_type_with_description() {
926        let value_type = EnrichedValueType {
927            typ: ValueType::Struct(StructSchema {
928                fields: Arc::new(vec![FieldSchema::new(
929                    "name",
930                    EnrichedValueType {
931                        typ: ValueType::Basic(BasicValueType::Str),
932                        nullable: false,
933                        attrs: Arc::new(BTreeMap::new()),
934                    },
935                )]),
936                description: Some("A person".into()),
937            }),
938            nullable: false,
939            attrs: Arc::new(BTreeMap::new()),
940        };
941        let options = create_test_options();
942        let result = build_json_schema(value_type, options).unwrap();
943        let json_schema = schema_to_json(&result.schema);
944
945        expect![[r#"
946            {
947              "additionalProperties": false,
948              "description": "A person",
949              "properties": {
950                "name": {
951                  "type": "string"
952                }
953              },
954              "required": [
955                "name"
956              ],
957              "type": "object"
958            }"#]]
959        .assert_eq(&serde_json::to_string_pretty(&json_schema).unwrap());
960    }
961
962    #[test]
963    fn test_struct_type_with_extracted_descriptions() {
964        let value_type = EnrichedValueType {
965            typ: ValueType::Struct(StructSchema {
966                fields: Arc::new(vec![FieldSchema::new(
967                    "name",
968                    EnrichedValueType {
969                        typ: ValueType::Basic(BasicValueType::Str),
970                        nullable: false,
971                        attrs: Arc::new(BTreeMap::new()),
972                    },
973                )]),
974                description: Some("A person".into()),
975            }),
976            nullable: false,
977            attrs: Arc::new(BTreeMap::new()),
978        };
979        let options = create_test_options_with_extracted_descriptions();
980        let result = build_json_schema(value_type, options).unwrap();
981        let json_schema = schema_to_json(&result.schema);
982
983        expect![[r#"
984            {
985              "additionalProperties": false,
986              "properties": {
987                "name": {
988                  "type": "string"
989                }
990              },
991              "required": [
992                "name"
993              ],
994              "type": "object"
995            }"#]]
996        .assert_eq(&serde_json::to_string_pretty(&json_schema).unwrap());
997
998        // Check that description was extracted to extra instructions
999        assert!(result.extra_instructions.is_some());
1000        let instructions = result.extra_instructions.unwrap();
1001        assert!(instructions.contains("A person"));
1002    }
1003
1004    #[test]
1005    fn test_struct_type_always_required() {
1006        let value_type = EnrichedValueType {
1007            typ: ValueType::Struct(StructSchema {
1008                fields: Arc::new(vec![
1009                    FieldSchema::new(
1010                        "name",
1011                        EnrichedValueType {
1012                            typ: ValueType::Basic(BasicValueType::Str),
1013                            nullable: false,
1014                            attrs: Arc::new(BTreeMap::new()),
1015                        },
1016                    ),
1017                    FieldSchema::new(
1018                        "age",
1019                        EnrichedValueType {
1020                            typ: ValueType::Basic(BasicValueType::Int64),
1021                            nullable: true,
1022                            attrs: Arc::new(BTreeMap::new()),
1023                        },
1024                    ),
1025                ]),
1026                description: None,
1027            }),
1028            nullable: false,
1029            attrs: Arc::new(BTreeMap::new()),
1030        };
1031        let options = create_test_options_always_required();
1032        let result = build_json_schema(value_type, options).unwrap();
1033        let json_schema = schema_to_json(&result.schema);
1034
1035        expect![[r#"
1036            {
1037              "additionalProperties": false,
1038              "properties": {
1039                "age": {
1040                  "type": [
1041                    "integer",
1042                    "null"
1043                  ]
1044                },
1045                "name": {
1046                  "type": "string"
1047                }
1048              },
1049              "required": [
1050                "name",
1051                "age"
1052              ],
1053              "type": "object"
1054            }"#]]
1055        .assert_eq(&serde_json::to_string_pretty(&json_schema).unwrap());
1056    }
1057
1058    #[test]
1059    fn test_table_type_utable() {
1060        let value_type = EnrichedValueType {
1061            typ: ValueType::Table(TableSchema {
1062                kind: TableKind::UTable,
1063                row: StructSchema {
1064                    fields: Arc::new(vec![
1065                        FieldSchema::new(
1066                            "id",
1067                            EnrichedValueType {
1068                                typ: ValueType::Basic(BasicValueType::Int64),
1069                                nullable: false,
1070                                attrs: Arc::new(BTreeMap::new()),
1071                            },
1072                        ),
1073                        FieldSchema::new(
1074                            "name",
1075                            EnrichedValueType {
1076                                typ: ValueType::Basic(BasicValueType::Str),
1077                                nullable: false,
1078                                attrs: Arc::new(BTreeMap::new()),
1079                            },
1080                        ),
1081                    ]),
1082                    description: None,
1083                },
1084            }),
1085            nullable: false,
1086            attrs: Arc::new(BTreeMap::new()),
1087        };
1088        let options = create_test_options();
1089        let result = build_json_schema(value_type, options).unwrap();
1090        let json_schema = schema_to_json(&result.schema);
1091
1092        expect![[r#"
1093            {
1094              "items": {
1095                "additionalProperties": false,
1096                "properties": {
1097                  "id": {
1098                    "type": "integer"
1099                  },
1100                  "name": {
1101                    "type": "string"
1102                  }
1103                },
1104                "required": [
1105                  "id",
1106                  "name"
1107                ],
1108                "type": "object"
1109              },
1110              "type": "array"
1111            }"#]]
1112        .assert_eq(&serde_json::to_string_pretty(&json_schema).unwrap());
1113    }
1114
1115    #[test]
1116    fn test_table_type_ktable() {
1117        let value_type = EnrichedValueType {
1118            typ: ValueType::Table(TableSchema {
1119                kind: TableKind::KTable(KTableInfo { num_key_parts: 1 }),
1120                row: StructSchema {
1121                    fields: Arc::new(vec![
1122                        FieldSchema::new(
1123                            "id",
1124                            EnrichedValueType {
1125                                typ: ValueType::Basic(BasicValueType::Int64),
1126                                nullable: false,
1127                                attrs: Arc::new(BTreeMap::new()),
1128                            },
1129                        ),
1130                        FieldSchema::new(
1131                            "name",
1132                            EnrichedValueType {
1133                                typ: ValueType::Basic(BasicValueType::Str),
1134                                nullable: false,
1135                                attrs: Arc::new(BTreeMap::new()),
1136                            },
1137                        ),
1138                    ]),
1139                    description: None,
1140                },
1141            }),
1142            nullable: false,
1143            attrs: Arc::new(BTreeMap::new()),
1144        };
1145        let options = create_test_options();
1146        let result = build_json_schema(value_type, options).unwrap();
1147        let json_schema = schema_to_json(&result.schema);
1148
1149        expect![[r#"
1150            {
1151              "items": {
1152                "additionalProperties": false,
1153                "properties": {
1154                  "id": {
1155                    "type": "integer"
1156                  },
1157                  "name": {
1158                    "type": "string"
1159                  }
1160                },
1161                "required": [
1162                  "id",
1163                  "name"
1164                ],
1165                "type": "object"
1166              },
1167              "type": "array"
1168            }"#]]
1169        .assert_eq(&serde_json::to_string_pretty(&json_schema).unwrap());
1170    }
1171
1172    #[test]
1173    fn test_table_type_ltable() {
1174        let value_type = EnrichedValueType {
1175            typ: ValueType::Table(TableSchema {
1176                kind: TableKind::LTable,
1177                row: StructSchema {
1178                    fields: Arc::new(vec![FieldSchema::new(
1179                        "value",
1180                        EnrichedValueType {
1181                            typ: ValueType::Basic(BasicValueType::Str),
1182                            nullable: false,
1183                            attrs: Arc::new(BTreeMap::new()),
1184                        },
1185                    )]),
1186                    description: None,
1187                },
1188            }),
1189            nullable: false,
1190            attrs: Arc::new(BTreeMap::new()),
1191        };
1192        let options = create_test_options();
1193        let result = build_json_schema(value_type, options).unwrap();
1194        let json_schema = schema_to_json(&result.schema);
1195
1196        expect![[r#"
1197            {
1198              "items": {
1199                "additionalProperties": false,
1200                "properties": {
1201                  "value": {
1202                    "type": "string"
1203                  }
1204                },
1205                "required": [
1206                  "value"
1207                ],
1208                "type": "object"
1209              },
1210              "type": "array"
1211            }"#]]
1212        .assert_eq(&serde_json::to_string_pretty(&json_schema).unwrap());
1213    }
1214
1215    #[test]
1216    fn test_top_level_must_be_object_with_basic_type() {
1217        let value_type = EnrichedValueType {
1218            typ: ValueType::Basic(BasicValueType::Str),
1219            nullable: false,
1220            attrs: Arc::new(BTreeMap::new()),
1221        };
1222        let options = create_test_options_top_level_object();
1223        let result = build_json_schema(value_type, options).unwrap();
1224        let json_schema = schema_to_json(&result.schema);
1225
1226        expect![[r#"
1227            {
1228              "additionalProperties": false,
1229              "properties": {
1230                "value": {
1231                  "type": "string"
1232                }
1233              },
1234              "required": [
1235                "value"
1236              ],
1237              "type": "object"
1238            }"#]]
1239        .assert_eq(&serde_json::to_string_pretty(&json_schema).unwrap());
1240
1241        // Check that value extractor has the wrapper field name
1242        assert_eq!(
1243            result.value_extractor.object_wrapper_field_name,
1244            Some("value".to_string())
1245        );
1246    }
1247
1248    #[test]
1249    fn test_top_level_must_be_object_with_struct_type() {
1250        let value_type = EnrichedValueType {
1251            typ: ValueType::Struct(StructSchema {
1252                fields: Arc::new(vec![FieldSchema::new(
1253                    "name",
1254                    EnrichedValueType {
1255                        typ: ValueType::Basic(BasicValueType::Str),
1256                        nullable: false,
1257                        attrs: Arc::new(BTreeMap::new()),
1258                    },
1259                )]),
1260                description: None,
1261            }),
1262            nullable: false,
1263            attrs: Arc::new(BTreeMap::new()),
1264        };
1265        let options = create_test_options_top_level_object();
1266        let result = build_json_schema(value_type, options).unwrap();
1267        let json_schema = schema_to_json(&result.schema);
1268
1269        expect![[r#"
1270            {
1271              "additionalProperties": false,
1272              "properties": {
1273                "name": {
1274                  "type": "string"
1275                }
1276              },
1277              "required": [
1278                "name"
1279              ],
1280              "type": "object"
1281            }"#]]
1282        .assert_eq(&serde_json::to_string_pretty(&json_schema).unwrap());
1283
1284        // Check that value extractor has no wrapper field name since it's already a struct
1285        assert_eq!(result.value_extractor.object_wrapper_field_name, None);
1286    }
1287
1288    #[test]
1289    fn test_nested_struct() {
1290        let value_type = EnrichedValueType {
1291            typ: ValueType::Struct(StructSchema {
1292                fields: Arc::new(vec![FieldSchema::new(
1293                    "person",
1294                    EnrichedValueType {
1295                        typ: ValueType::Struct(StructSchema {
1296                            fields: Arc::new(vec![
1297                                FieldSchema::new(
1298                                    "name",
1299                                    EnrichedValueType {
1300                                        typ: ValueType::Basic(BasicValueType::Str),
1301                                        nullable: false,
1302                                        attrs: Arc::new(BTreeMap::new()),
1303                                    },
1304                                ),
1305                                FieldSchema::new(
1306                                    "age",
1307                                    EnrichedValueType {
1308                                        typ: ValueType::Basic(BasicValueType::Int64),
1309                                        nullable: false,
1310                                        attrs: Arc::new(BTreeMap::new()),
1311                                    },
1312                                ),
1313                            ]),
1314                            description: None,
1315                        }),
1316                        nullable: false,
1317                        attrs: Arc::new(BTreeMap::new()),
1318                    },
1319                )]),
1320                description: None,
1321            }),
1322            nullable: false,
1323            attrs: Arc::new(BTreeMap::new()),
1324        };
1325        let options = create_test_options();
1326        let result = build_json_schema(value_type, options).unwrap();
1327        let json_schema = schema_to_json(&result.schema);
1328
1329        expect![[r#"
1330            {
1331              "additionalProperties": false,
1332              "properties": {
1333                "person": {
1334                  "additionalProperties": false,
1335                  "properties": {
1336                    "age": {
1337                      "type": "integer"
1338                    },
1339                    "name": {
1340                      "type": "string"
1341                    }
1342                  },
1343                  "required": [
1344                    "name",
1345                    "age"
1346                  ],
1347                  "type": "object"
1348                }
1349              },
1350              "required": [
1351                "person"
1352              ],
1353              "type": "object"
1354            }"#]]
1355        .assert_eq(&serde_json::to_string_pretty(&json_schema).unwrap());
1356    }
1357
1358    #[test]
1359    fn test_value_extractor_basic_type() {
1360        let value_type = EnrichedValueType {
1361            typ: ValueType::Basic(BasicValueType::Str),
1362            nullable: false,
1363            attrs: Arc::new(BTreeMap::new()),
1364        };
1365        let options = create_test_options();
1366        let result = build_json_schema(value_type, options).unwrap();
1367
1368        // Test extracting a string value
1369        let json_value = json!("hello world");
1370        let extracted = result.value_extractor.extract_value(json_value).unwrap();
1371        assert!(
1372            matches!(extracted, crate::base::value::Value::Basic(crate::base::value::BasicValue::Str(s)) if s.as_ref() == "hello world")
1373        );
1374    }
1375
1376    #[test]
1377    fn test_value_extractor_with_wrapper() {
1378        let value_type = EnrichedValueType {
1379            typ: ValueType::Basic(BasicValueType::Str),
1380            nullable: false,
1381            attrs: Arc::new(BTreeMap::new()),
1382        };
1383        let options = create_test_options_top_level_object();
1384        let result = build_json_schema(value_type, options).unwrap();
1385
1386        // Test extracting a wrapped value
1387        let json_value = json!({"value": "hello world"});
1388        let extracted = result.value_extractor.extract_value(json_value).unwrap();
1389        assert!(
1390            matches!(extracted, crate::base::value::Value::Basic(crate::base::value::BasicValue::Str(s)) if s.as_ref() == "hello world")
1391        );
1392    }
1393
1394    #[test]
1395    fn test_no_format_support() {
1396        let value_type = EnrichedValueType {
1397            typ: ValueType::Basic(BasicValueType::Uuid),
1398            nullable: false,
1399            attrs: Arc::new(BTreeMap::new()),
1400        };
1401        let options = ToJsonSchemaOptions {
1402            fields_always_required: false,
1403            supports_format: false,
1404            extract_descriptions: false,
1405            top_level_must_be_object: false,
1406            supports_additional_properties: true,
1407        };
1408        let result = build_json_schema(value_type, options).unwrap();
1409        let json_schema = schema_to_json(&result.schema);
1410
1411        expect![[r#"
1412            {
1413              "description": "A UUID, e.g. 123e4567-e89b-12d3-a456-426614174000",
1414              "type": "string"
1415            }"#]]
1416        .assert_eq(&serde_json::to_string_pretty(&json_schema).unwrap());
1417    }
1418
1419    #[test]
1420    fn test_description_concatenation() {
1421        // Create a struct with a field that has both field-level and type-level descriptions
1422        let struct_schema = StructSchema {
1423            description: Some(Arc::from("Test struct description")),
1424            fields: Arc::new(vec![FieldSchema {
1425                name: "uuid_field".to_string(),
1426                value_type: EnrichedValueType {
1427                    typ: ValueType::Basic(BasicValueType::Uuid),
1428                    nullable: false,
1429                    attrs: Default::default(),
1430                },
1431                description: Some(Arc::from("This is a field-level description for UUID")),
1432            }]),
1433        };
1434
1435        let enriched_value_type = EnrichedValueType {
1436            typ: ValueType::Struct(struct_schema),
1437            nullable: false,
1438            attrs: Default::default(),
1439        };
1440
1441        let options = ToJsonSchemaOptions {
1442            fields_always_required: false,
1443            supports_format: true,
1444            extract_descriptions: false, // We want to see the description in the schema
1445            top_level_must_be_object: false,
1446            supports_additional_properties: true,
1447        };
1448
1449        let result = build_json_schema(enriched_value_type, options).unwrap();
1450
1451        // Check if the description contains both field and type descriptions
1452        let schema_json = serde_json::to_value(&result.schema).unwrap();
1453        let description = schema_json
1454            .get("properties")
1455            .and_then(|p| p.get("uuid_field"))
1456            .and_then(|f| f.get("description"))
1457            .and_then(|d| d.as_str());
1458
1459        assert_eq!(
1460            description,
1461            Some(
1462                "This is a field-level description for UUID\n\nA UUID, e.g. 123e4567-e89b-12d3-a456-426614174000"
1463            )
1464        );
1465    }
1466}