1use hedl_core::lex::Tensor;
96use hedl_core::{Document, Item, MatrixList, Value};
97use serde_json::{json, Map, Value as JsonValue};
98use std::collections::BTreeMap;
99use thiserror::Error;
100
101#[derive(Error, Debug)]
103pub enum SchemaError {
104 #[error("Schema validation failed: {0}")]
106 ValidationError(String),
107
108 #[error("Invalid configuration: {0}")]
110 ConfigError(String),
111
112 #[error("Serialization error: {0}")]
114 SerializationError(#[from] serde_json::Error),
115
116 #[error("Internal error: {0}")]
118 InternalError(String),
119}
120
121#[derive(Debug, Clone)]
123pub struct SchemaConfig {
124 pub title: Option<String>,
126 pub description: Option<String>,
128 pub schema_id: Option<String>,
130 pub strict: bool,
132 pub include_examples: bool,
134 pub include_metadata: bool,
136}
137
138impl Default for SchemaConfig {
139 fn default() -> Self {
140 Self {
141 title: None,
142 description: None,
143 schema_id: None,
144 strict: false,
145 include_examples: false,
146 include_metadata: true,
147 }
148 }
149}
150
151impl SchemaConfig {
152 #[must_use]
154 pub fn builder() -> SchemaConfigBuilder {
155 SchemaConfigBuilder::default()
156 }
157}
158
159#[derive(Debug)]
161pub struct SchemaConfigBuilder {
162 title: Option<String>,
163 description: Option<String>,
164 schema_id: Option<String>,
165 strict: bool,
166 include_examples: bool,
167 include_metadata: bool,
168}
169
170impl Default for SchemaConfigBuilder {
171 fn default() -> Self {
172 Self {
173 title: None,
174 description: None,
175 schema_id: None,
176 strict: false,
177 include_examples: false,
178 include_metadata: true, }
180 }
181}
182
183impl SchemaConfigBuilder {
184 pub fn title(mut self, title: impl Into<String>) -> Self {
186 self.title = Some(title.into());
187 self
188 }
189
190 pub fn description(mut self, description: impl Into<String>) -> Self {
192 self.description = Some(description.into());
193 self
194 }
195
196 pub fn schema_id(mut self, schema_id: impl Into<String>) -> Self {
198 self.schema_id = Some(schema_id.into());
199 self
200 }
201
202 #[must_use]
204 pub fn strict(mut self, strict: bool) -> Self {
205 self.strict = strict;
206 self
207 }
208
209 #[must_use]
211 pub fn include_examples(mut self, include: bool) -> Self {
212 self.include_examples = include;
213 self
214 }
215
216 #[must_use]
218 pub fn include_metadata(mut self, include: bool) -> Self {
219 self.include_metadata = include;
220 self
221 }
222
223 #[must_use]
225 pub fn build(self) -> SchemaConfig {
226 SchemaConfig {
227 title: self.title,
228 description: self.description,
229 schema_id: self.schema_id,
230 strict: self.strict,
231 include_examples: self.include_examples,
232 include_metadata: self.include_metadata,
233 }
234 }
235}
236
237pub fn generate_schema(doc: &Document, config: &SchemaConfig) -> Result<String, SchemaError> {
252 let schema = generate_schema_value(doc, config)?;
253 Ok(serde_json::to_string_pretty(&schema)?)
254}
255
256pub fn generate_schema_value(
271 doc: &Document,
272 config: &SchemaConfig,
273) -> Result<JsonValue, SchemaError> {
274 let mut schema = Map::with_capacity(8);
275
276 schema.insert(
278 "$schema".to_string(),
279 json!("http://json-schema.org/draft-07/schema#"),
280 );
281
282 if config.include_metadata {
284 if let Some(ref title) = config.title {
285 schema.insert("title".to_string(), json!(title));
286 }
287 if let Some(ref description) = config.description {
288 schema.insert("description".to_string(), json!(description));
289 }
290 if let Some(ref schema_id) = config.schema_id {
291 schema.insert("$id".to_string(), json!(schema_id));
292 }
293 }
294
295 schema.insert("type".to_string(), json!("object"));
297
298 if !doc.structs.is_empty() {
300 let definitions = generate_definitions(doc, config);
301 schema.insert("definitions".to_string(), JsonValue::Object(definitions));
302 }
303
304 let properties = generate_properties(&doc.root, doc, config);
306 schema.insert("properties".to_string(), JsonValue::Object(properties));
307
308 if config.strict {
310 schema.insert("additionalProperties".to_string(), json!(false));
311 }
312
313 Ok(JsonValue::Object(schema))
314}
315
316fn generate_definitions(doc: &Document, config: &SchemaConfig) -> Map<String, JsonValue> {
318 let mut definitions = Map::with_capacity(doc.structs.len());
319
320 for (type_name, schema_fields) in &doc.structs {
321 let mut def = Map::with_capacity(4);
322 def.insert("type".to_string(), json!("object"));
323
324 let mut properties = Map::with_capacity(schema_fields.len());
326
327 for field_name in schema_fields {
328 let field_schema = infer_field_type(type_name, field_name, doc, config);
330 properties.insert(field_name.clone(), field_schema);
331 }
332
333 if let Some(child_types) = doc.nests.get(type_name) {
335 for child_type in child_types {
336 let child_array_name = pluralize(child_type);
337 let child_ref = json!({
338 "type": "array",
339 "items": {
340 "$ref": format!("#/definitions/{}", child_type)
341 }
342 });
343 properties.insert(child_array_name, child_ref);
344 }
345 }
346
347 def.insert("properties".to_string(), JsonValue::Object(properties));
348
349 if !schema_fields.is_empty() {
351 def.insert("required".to_string(), json!([schema_fields[0]]));
352 }
353
354 if config.strict {
356 def.insert("additionalProperties".to_string(), json!(false));
357 }
358
359 definitions.insert(type_name.clone(), JsonValue::Object(def));
360 }
361
362 definitions
363}
364
365fn generate_properties(
367 items: &BTreeMap<String, Item>,
368 doc: &Document,
369 config: &SchemaConfig,
370) -> Map<String, JsonValue> {
371 let mut properties = Map::with_capacity(items.len());
372
373 for (key, item) in items {
374 let prop_schema = item_to_schema(item, doc, config);
375 properties.insert(key.clone(), prop_schema);
376 }
377
378 properties
379}
380
381fn item_to_schema(item: &Item, doc: &Document, config: &SchemaConfig) -> JsonValue {
383 match item {
384 Item::Scalar(value) => value_to_schema(value, None, config),
385 Item::Object(obj) => object_to_schema(obj, doc, config),
386 Item::List(list) => matrix_list_to_schema(list, config),
387 }
388}
389
390fn value_to_schema(value: &Value, field_name: Option<&str>, config: &SchemaConfig) -> JsonValue {
392 let mut schema = Map::with_capacity(4);
393
394 match value {
395 Value::Null => {
396 schema.insert("type".to_string(), json!("null"));
397 }
398 Value::Bool(b) => {
399 schema.insert("type".to_string(), json!("boolean"));
400 if config.include_examples {
401 schema.insert("examples".to_string(), json!([b]));
402 }
403 }
404 Value::Int(n) => {
405 schema.insert("type".to_string(), json!("integer"));
406 if config.include_examples {
407 schema.insert("examples".to_string(), json!([n]));
408 }
409 }
410 Value::Float(f) => {
411 schema.insert("type".to_string(), json!("number"));
412 if config.include_examples {
413 schema.insert("examples".to_string(), json!([f]));
414 }
415 }
416 Value::String(s) => {
417 schema.insert("type".to_string(), json!("string"));
418
419 if let Some(format) = infer_string_format(s, field_name) {
421 schema.insert("format".to_string(), json!(format));
422 }
423
424 if config.include_examples {
425 schema.insert("examples".to_string(), json!([s]));
426 }
427 }
428 Value::Tensor(tensor) => {
429 return tensor_to_schema(tensor, config);
431 }
432 Value::Reference(reference) => {
433 schema.insert("type".to_string(), json!("string"));
434 schema.insert(
435 "pattern".to_string(),
436 json!("^@([A-Z][a-zA-Z0-9]*:)?[a-zA-Z0-9_-]+$"),
437 );
438 schema.insert(
439 "description".to_string(),
440 json!(format!(
441 "Reference to {}",
442 reference.type_name.as_deref().unwrap_or("entity")
443 )),
444 );
445 }
446 Value::Expression(_) => {
447 schema.insert("type".to_string(), json!("string"));
448 schema.insert("pattern".to_string(), json!(r"^\$\(.+\)$"));
449 schema.insert("description".to_string(), json!("HEDL expression $(...)"));
450 }
451 Value::List(values) => {
452 schema.insert("type".to_string(), json!("array"));
453
454 if let Some(first) = values.first() {
456 let item_schema = value_to_schema(first, None, config);
457 schema.insert("items".to_string(), item_schema);
458 }
459
460 }
463 }
464
465 JsonValue::Object(schema)
466}
467
468fn object_to_schema(
470 obj: &BTreeMap<String, Item>,
471 doc: &Document,
472 config: &SchemaConfig,
473) -> JsonValue {
474 let mut schema = Map::with_capacity(3);
475 schema.insert("type".to_string(), json!("object"));
476
477 let properties = generate_properties(obj, doc, config);
478 schema.insert("properties".to_string(), JsonValue::Object(properties));
479
480 if config.strict {
481 schema.insert("additionalProperties".to_string(), json!(false));
482 }
483
484 JsonValue::Object(schema)
485}
486
487fn matrix_list_to_schema(list: &MatrixList, _config: &SchemaConfig) -> JsonValue {
489 let mut schema = Map::with_capacity(2);
490 schema.insert("type".to_string(), json!("array"));
491
492 let items = json!({
494 "$ref": format!("#/definitions/{}", list.type_name)
495 });
496 schema.insert("items".to_string(), items);
497
498 JsonValue::Object(schema)
499}
500
501fn tensor_to_schema(tensor: &Tensor, config: &SchemaConfig) -> JsonValue {
503 match tensor {
504 Tensor::Scalar(val) => {
505 let mut schema = Map::with_capacity(2);
506 schema.insert("type".to_string(), json!("number"));
507 if config.include_examples {
508 schema.insert("examples".to_string(), json!([val]));
509 }
510 JsonValue::Object(schema)
511 }
512 Tensor::Array(_) => {
513 json!({
515 "type": "array",
516 "items": {
517 "oneOf": [
518 {"type": "number"},
519 {"type": "array"}
520 ]
521 }
522 })
523 }
524 }
525}
526
527fn infer_field_type(
529 type_name: &str,
530 field_name: &str,
531 doc: &Document,
532 config: &SchemaConfig,
533) -> JsonValue {
534 for item in doc.root.values() {
536 if let Item::List(list) = item {
537 if list.type_name == type_name && !list.rows.is_empty() {
538 if let Some(field_idx) = list.schema.iter().position(|f| f == field_name) {
540 if let Some(node) = list.rows.first() {
542 if let Some(value) = node.fields.get(field_idx) {
543 return value_to_schema(value, Some(field_name), config);
544 }
545 }
546 }
547 }
548 }
549 }
550
551 let mut schema = Map::with_capacity(2);
553 schema.insert("type".to_string(), json!("string"));
554
555 if let Some(format) = infer_format_from_name(field_name) {
557 schema.insert("format".to_string(), json!(format));
558 }
559
560 JsonValue::Object(schema)
561}
562
563fn infer_string_format(s: &str, field_name: Option<&str>) -> Option<&'static str> {
565 if s.contains('@') && s.contains('.') && !s.starts_with('@') {
567 return Some("email");
568 }
569
570 if s.starts_with("http://") || s.starts_with("https://") || s.starts_with("ftp://") {
572 return Some("uri");
573 }
574
575 if s.contains('T') && (s.contains('Z') || s.contains('+') || s.contains('-')) && s.len() >= 19 {
577 return Some("date-time");
579 }
580
581 if s.len() == 36 && s.chars().filter(|&c| c == '-').count() == 4 {
583 return Some("uuid");
584 }
585
586 infer_format_from_name(field_name?)
588}
589
590fn infer_format_from_name(field_name: &str) -> Option<&'static str> {
592 let lower = field_name.to_lowercase();
593
594 if lower.contains("email") {
595 Some("email")
596 } else if lower.contains("url") || lower.contains("uri") {
597 Some("uri")
598 } else if lower.contains("date") || lower.ends_with("_at") || lower.ends_with("_on") {
599 Some("date-time")
600 } else if lower.contains("uuid") || lower.contains("guid") {
601 Some("uuid")
602 } else {
603 None
604 }
605}
606
607fn pluralize(word: &str) -> String {
609 if word.ends_with('s')
610 || word.ends_with('x')
611 || word.ends_with('z')
612 || word.ends_with("ch")
613 || word.ends_with("sh")
614 {
615 format!("{word}es")
616 } else if word.ends_with('y') && !word.ends_with("ay") && !word.ends_with("ey") {
617 format!("{}ies", &word[..word.len() - 1])
618 } else {
619 format!("{word}s")
620 }
621}
622
623pub fn validate_schema(schema: &JsonValue) -> Result<(), SchemaError> {
639 validate_schema_internal(schema, true)
640}
641
642fn validate_schema_internal(
644 schema: &JsonValue,
645 require_schema_field: bool,
646) -> Result<(), SchemaError> {
647 let obj = schema
648 .as_object()
649 .ok_or_else(|| SchemaError::ValidationError("Schema must be an object".to_string()))?;
650
651 if require_schema_field && !obj.contains_key("$schema") {
653 return Err(SchemaError::ValidationError(
654 "Schema must have $schema field".to_string(),
655 ));
656 }
657
658 if !obj.contains_key("type") {
660 return Err(SchemaError::ValidationError(
661 "Schema must have type field".to_string(),
662 ));
663 }
664
665 let schema_type = obj
667 .get("type")
668 .and_then(|v| v.as_str())
669 .ok_or_else(|| SchemaError::ValidationError("type must be a string".to_string()))?;
670
671 let valid_types = [
672 "null", "boolean", "object", "array", "number", "string", "integer",
673 ];
674 if !valid_types.contains(&schema_type) {
675 return Err(SchemaError::ValidationError(format!(
676 "Invalid type: {schema_type}. Must be one of: {valid_types:?}"
677 )));
678 }
679
680 if let Some(definitions) = obj.get("definitions") {
682 if let Some(defs) = definitions.as_object() {
683 for (name, def_schema) in defs {
684 validate_schema_internal(def_schema, false).map_err(|e| {
685 SchemaError::ValidationError(format!("Invalid definition '{name}': {e}"))
686 })?;
687 }
688 }
689 }
690
691 Ok(())
692}
693
694#[cfg(test)]
695mod tests {
696 use super::*;
697
698 #[test]
699 fn test_pluralize() {
700 assert_eq!(pluralize("User"), "Users");
701 assert_eq!(pluralize("Post"), "Posts");
702 assert_eq!(pluralize("Category"), "Categories");
703 assert_eq!(pluralize("Box"), "Boxes");
704 assert_eq!(pluralize("Class"), "Classes");
705 }
706
707 #[test]
708 fn test_infer_string_format_email() {
709 assert_eq!(
710 infer_string_format("alice@example.com", None),
711 Some("email")
712 );
713 }
714
715 #[test]
716 fn test_infer_string_format_uri() {
717 assert_eq!(
718 infer_string_format("https://example.com", None),
719 Some("uri")
720 );
721 }
722
723 #[test]
724 fn test_infer_string_format_datetime() {
725 assert_eq!(
726 infer_string_format("2024-01-01T00:00:00Z", None),
727 Some("date-time")
728 );
729 }
730
731 #[test]
732 fn test_infer_format_from_name() {
733 assert_eq!(infer_format_from_name("email"), Some("email"));
734 assert_eq!(infer_format_from_name("url"), Some("uri"));
735 assert_eq!(infer_format_from_name("created_at"), Some("date-time"));
736 assert_eq!(infer_format_from_name("uuid"), Some("uuid"));
737 }
738
739 #[test]
740 fn test_config_builder() {
741 let config = SchemaConfig::builder()
742 .title("Test")
743 .description("Desc")
744 .strict(true)
745 .build();
746
747 assert_eq!(config.title, Some("Test".to_string()));
748 assert_eq!(config.description, Some("Desc".to_string()));
749 assert!(config.strict);
750 }
751
752 #[test]
753 fn test_default_config() {
754 let config = SchemaConfig::default();
755 assert!(config.title.is_none());
756 assert!(!config.strict);
757 assert!(!config.include_examples);
758 assert!(config.include_metadata);
759 }
760}