1use std::collections::HashMap;
29use std::sync::Arc;
30
31use parking_lot::RwLock;
32use serde_json::{Map, Value, json};
33use tracing::debug;
34
35const INTROSPECTION_QUERY: &str = r"
38query IntrospectionQuery {
39 __schema {
40 queryType { name }
41 mutationType { name }
42 subscriptionType { name }
43 types { ...FullType }
44 directives {
45 name description locations
46 args { ...InputValue }
47 }
48 }
49}
50fragment FullType on __Type {
51 kind name description
52 fields(includeDeprecated: true) {
53 name description
54 args { ...InputValue }
55 type { ...TypeRef }
56 isDeprecated deprecationReason
57 }
58 inputFields { ...InputValue }
59 interfaces { ...TypeRef }
60 enumValues(includeDeprecated: true) {
61 name description isDeprecated deprecationReason
62 }
63 possibleTypes { ...TypeRef }
64}
65fragment InputValue on __InputValue {
66 name description
67 type { ...TypeRef }
68 defaultValue
69}
70fragment TypeRef on __Type {
71 kind name
72 ofType {
73 kind name
74 ofType {
75 kind name
76 ofType {
77 kind name
78 ofType {
79 kind name
80 ofType {
81 kind name
82 ofType { kind name }
83 }
84 }
85 }
86 }
87 }
88}
89";
90
91use crate::domain::error::{ProviderError, Result, ServiceError, StygianError};
92use crate::ports::{AIProvider, GraphQlAuth, ScrapingService, ServiceInput};
93
94#[derive(Debug, Clone)]
96pub struct DiscoveredSchema {
97 pub schema: Value,
99 pub source: SchemaSource,
101 pub confidence: f32,
103 pub url_pattern: Option<String>,
105}
106
107#[derive(Debug, Clone, PartialEq, Eq)]
109pub enum SchemaSource {
110 ExampleInference,
112 AiInference,
114 Cached,
116 UserCorrection,
118}
119
120#[derive(Debug, Clone)]
122pub struct SchemaDiscoveryConfig {
123 pub max_content_chars: usize,
125 pub cache_schemas: bool,
127 pub min_ai_confidence: f32,
129}
130
131impl Default for SchemaDiscoveryConfig {
132 fn default() -> Self {
133 Self {
134 max_content_chars: 16_000,
135 cache_schemas: true,
136 min_ai_confidence: 0.6,
137 }
138 }
139}
140
141#[derive(Debug, Clone)]
143pub struct SchemaDiff {
144 pub added_fields: Vec<String>,
146 pub removed_fields: Vec<String>,
148 pub type_changes: Vec<(String, String, String)>, pub is_compatible: bool,
152}
153
154pub struct SchemaDiscoveryService {
160 config: SchemaDiscoveryConfig,
161 ai_provider: Option<Arc<dyn AIProvider>>,
163 cache: Arc<RwLock<HashMap<String, DiscoveredSchema>>>,
165 corrections: Arc<RwLock<HashMap<String, Value>>>,
167 graphql_service: Option<Arc<dyn ScrapingService>>,
169}
170
171impl SchemaDiscoveryService {
172 #[must_use]
182 pub fn new(config: SchemaDiscoveryConfig, ai_provider: Option<Arc<dyn AIProvider>>) -> Self {
183 Self {
184 config,
185 ai_provider,
186 cache: Arc::new(RwLock::new(HashMap::new())),
187 corrections: Arc::new(RwLock::new(HashMap::new())),
188 graphql_service: None,
189 }
190 }
191
192 #[must_use]
206 pub fn with_graphql_service(mut self, service: Arc<dyn ScrapingService>) -> Self {
207 self.graphql_service = Some(service);
208 self
209 }
210
211 #[must_use]
230 pub fn infer_from_example(&self, example: &Value) -> Value {
231 Self::infer_schema_for_value(example)
232 }
233
234 fn infer_schema_for_value(value: &Value) -> Value {
236 match value {
237 Value::Object(map) => {
238 let mut properties = Map::new();
239 let mut required = Vec::new();
240 for (key, val) in map {
241 properties.insert(key.clone(), Self::infer_schema_for_value(val));
242 required.push(json!(key));
243 }
244 json!({
245 "type": "object",
246 "properties": Value::Object(properties),
247 "required": required
248 })
249 }
250 Value::Array(items) => {
251 let item_schema = items
253 .first()
254 .map_or_else(|| json!({}), Self::infer_schema_for_value);
255 json!({
256 "type": "array",
257 "items": item_schema
258 })
259 }
260 Value::String(_) => json!({"type": "string"}),
261 Value::Number(n) => {
262 if n.is_i64() || n.is_u64() {
263 json!({"type": "integer"})
264 } else {
265 json!({"type": "number"})
266 }
267 }
268 Value::Bool(_) => json!({"type": "boolean"}),
269 Value::Null => json!({"type": "null"}),
270 }
271 }
272
273 pub async fn infer_from_html(
301 &self,
302 html: &str,
303 url_pattern: Option<&str>,
304 ) -> Result<DiscoveredSchema> {
305 if self.config.cache_schemas
307 && let Some(pattern) = url_pattern
308 {
309 let cache = self.cache.read();
310 if let Some(cached) = cache.get(pattern) {
311 debug!(pattern, "Schema cache hit");
312 return Ok(DiscoveredSchema {
313 source: SchemaSource::Cached,
314 ..cached.clone()
315 });
316 }
317 }
318
319 let provider = self.ai_provider.as_ref().ok_or_else(|| {
320 StygianError::Provider(ProviderError::ApiError(
321 "No AI provider configured for HTML schema discovery".to_string(),
322 ))
323 })?;
324
325 let truncated = if html.len() > self.config.max_content_chars {
326 &html[..self.config.max_content_chars]
327 } else {
328 html
329 };
330
331 let discovery_schema = json!({
332 "type": "object",
333 "properties": {
334 "schema": {
335 "type": "object",
336 "description": "A valid JSON Schema object describing the structured data in the HTML"
337 },
338 "confidence": {
339 "type": "number",
340 "minimum": 0.0,
341 "maximum": 1.0,
342 "description": "Confidence in the inferred schema [0.0, 1.0]"
343 },
344 "description": {
345 "type": "string",
346 "description": "Human-readable explanation of what data was detected"
347 }
348 },
349 "required": ["schema", "confidence"]
350 });
351
352 let prompt = format!(
353 "Analyze the following HTML page and infer a JSON Schema for the main structured \
354 data it contains (e.g. product listings, articles, search results). \
355 Return the schema definition.\n\nHTML:\n{truncated}"
356 );
357
358 let result = provider.extract(prompt, discovery_schema).await?;
359
360 let schema = result
361 .get("schema")
362 .cloned()
363 .unwrap_or_else(|| json!({"type": "object"}));
364
365 #[allow(clippy::cast_possible_truncation)]
366 let confidence = result
367 .get("confidence")
368 .and_then(Value::as_f64)
369 .unwrap_or(0.7) as f32;
370
371 let schema = self.apply_corrections(schema);
373
374 let discovered = DiscoveredSchema {
375 schema,
376 source: SchemaSource::AiInference,
377 confidence,
378 url_pattern: url_pattern.map(str::to_string),
379 };
380
381 if self.config.cache_schemas
383 && let Some(pattern) = url_pattern
384 {
385 self.cache
386 .write()
387 .insert(pattern.to_string(), discovered.clone());
388 }
389
390 Ok(discovered)
391 }
392
393 fn apply_corrections(&self, mut schema: Value) -> Value {
395 let corrections = self.corrections.read();
396 if corrections.is_empty() {
397 return schema;
398 }
399
400 if let Some(props) = schema.get_mut("properties").and_then(Value::as_object_mut) {
401 for (field, correction) in corrections.iter() {
402 if props.contains_key(field) {
403 props.insert(field.clone(), correction.clone());
404 }
405 }
406 }
407 schema
408 }
409
410 pub fn add_correction(&self, field_name: String, schema_fragment: Value) {
423 self.corrections.write().insert(field_name, schema_fragment);
424 }
425
426 pub fn cache_schema(
443 &self,
444 url_pattern: String,
445 schema: Value,
446 source: SchemaSource,
447 confidence: f32,
448 ) {
449 let discovered = DiscoveredSchema {
450 schema,
451 source,
452 confidence,
453 url_pattern: Some(url_pattern.clone()),
454 };
455 self.cache.write().insert(url_pattern, discovered);
456 }
457
458 pub fn detect_evolution(&self, old_schema: &Value, new_schema: &Value) -> SchemaDiff {
484 let old_props = old_schema
485 .get("properties")
486 .and_then(Value::as_object)
487 .cloned()
488 .unwrap_or_default();
489 let new_props = new_schema
490 .get("properties")
491 .and_then(Value::as_object)
492 .cloned()
493 .unwrap_or_default();
494
495 let added_fields: Vec<String> = new_props
496 .keys()
497 .filter(|k| !old_props.contains_key(*k))
498 .cloned()
499 .collect();
500
501 let removed_fields: Vec<String> = old_props
502 .keys()
503 .filter(|k| !new_props.contains_key(*k))
504 .cloned()
505 .collect();
506
507 let type_changes: Vec<(String, String, String)> = old_props
508 .iter()
509 .filter_map(|(k, old_v)| {
510 let new_v = new_props.get(k)?;
511 let old_t = old_v.get("type").and_then(Value::as_str).unwrap_or("any");
512 let new_t = new_v.get("type").and_then(Value::as_str).unwrap_or("any");
513 (old_t != new_t).then(|| (k.clone(), old_t.to_string(), new_t.to_string()))
514 })
515 .collect();
516
517 let is_compatible = removed_fields.is_empty() && type_changes.is_empty();
518
519 SchemaDiff {
520 added_fields,
521 removed_fields,
522 type_changes,
523 is_compatible,
524 }
525 }
526
527 #[allow(clippy::indexing_slicing)]
554 pub async fn infer_from_graphql(
555 &self,
556 endpoint: &str,
557 auth: Option<GraphQlAuth>,
558 ) -> Result<DiscoveredSchema> {
559 if self.config.cache_schemas {
561 let cache = self.cache.read();
562 if let Some(cached) = cache.get(endpoint) {
563 debug!(endpoint, "GraphQL introspection cache hit");
564 return Ok(DiscoveredSchema {
565 source: SchemaSource::Cached,
566 ..cached.clone()
567 });
568 }
569 }
570
571 let service = self.graphql_service.as_ref().ok_or_else(|| {
573 StygianError::Service(ServiceError::Unavailable(
574 "No GraphQL service configured for introspection".to_string(),
575 ))
576 })?;
577
578 let mut params = json!({
580 "query": INTROSPECTION_QUERY,
581 });
582 if let Some(a) = auth {
583 params["auth"] = json!({
584 "kind": match a.kind {
585 crate::ports::GraphQlAuthKind::Bearer => "bearer",
586 crate::ports::GraphQlAuthKind::ApiKey => "api_key",
587 crate::ports::GraphQlAuthKind::Header => "header",
588 crate::ports::GraphQlAuthKind::None => "none",
589 },
590 "token": a.token,
591 });
592 if let Some(header) = a.header_name {
593 params["auth"]["header_name"] = json!(header);
594 }
595 }
596
597 let input = ServiceInput {
598 url: endpoint.to_string(),
599 params,
600 };
601
602 let output = service.execute(input).await?;
604
605 let response_json: Value = serde_json::from_str(&output.data)
607 .map_err(|e| StygianError::Service(ServiceError::InvalidResponse(e.to_string())))?;
608 let introspection = response_json.get("__schema").ok_or_else(|| {
609 StygianError::Service(ServiceError::InvalidResponse(
610 "introspection response missing __schema key".to_string(),
611 ))
612 })?;
613
614 let query_type_name = introspection
616 .get("queryType")
617 .and_then(|qt| qt.get("name"))
618 .and_then(Value::as_str)
619 .unwrap_or("Query")
620 .to_string();
621
622 let types = introspection
623 .get("types")
624 .and_then(Value::as_array)
625 .cloned()
626 .unwrap_or_default();
627
628 let mut definitions = Map::new();
629 for gql_type in &types {
630 let name = gql_type.get("name").and_then(Value::as_str).unwrap_or("");
631 if name.is_empty() || name.starts_with('_') {
633 continue;
634 }
635 let def = Self::convert_type_to_definition(gql_type);
636 definitions.insert(name.to_string(), def);
637 }
638
639 let schema = json!({
640 "type": "object",
641 "description": format!("GraphQL schema for {endpoint}"),
642 "definitions": Value::Object(definitions),
643 "queryType": query_type_name,
644 });
645
646 let discovered = DiscoveredSchema {
647 schema,
648 source: SchemaSource::AiInference,
649 confidence: 1.0,
650 url_pattern: Some(endpoint.to_string()),
651 };
652
653 if self.config.cache_schemas {
655 self.cache
656 .write()
657 .insert(endpoint.to_string(), discovered.clone());
658 }
659
660 Ok(discovered)
661 }
662
663 #[allow(clippy::indexing_slicing)]
665 fn convert_type_to_definition(gql_type: &Value) -> Value {
666 let kind = gql_type.get("kind").and_then(Value::as_str).unwrap_or("");
667 match kind {
668 "OBJECT" | "INTERFACE" | "INPUT_OBJECT" => {
669 let fields: Vec<Value> = gql_type
670 .get("fields")
671 .or_else(|| gql_type.get("inputFields"))
672 .and_then(Value::as_array)
673 .cloned()
674 .unwrap_or_default();
675
676 let mut properties = Map::new();
677 let mut required: Vec<Value> = Vec::new();
678
679 for field in &fields {
680 let field_name = field.get("name").and_then(Value::as_str).unwrap_or("");
681 if field_name.is_empty() {
682 continue;
683 }
684 let type_ref = &field["type"];
685 if type_ref.get("kind").and_then(Value::as_str) == Some("NON_NULL") {
687 required.push(json!(field_name));
688 }
689 properties.insert(field_name.to_string(), Self::convert_type_ref(type_ref));
690 }
691
692 let mut obj = json!({
693 "type": "object",
694 "properties": Value::Object(properties),
695 });
696 if !required.is_empty() {
697 obj["required"] = json!(required);
698 }
699 obj
700 }
701 "ENUM" => {
702 let values: Vec<Value> = gql_type
703 .get("enumValues")
704 .and_then(Value::as_array)
705 .map(|vals| {
706 vals.iter()
707 .filter_map(|v| v.get("name").and_then(Value::as_str))
708 .map(|s| json!(s))
709 .collect()
710 })
711 .unwrap_or_default();
712 json!({ "type": "string", "enum": values })
713 }
714 "SCALAR" => {
715 let name = gql_type.get("name").and_then(Value::as_str).unwrap_or("");
716 Self::scalar_to_json_schema(name)
717 }
718 _ => json!({}),
719 }
720 }
721
722 fn convert_type_ref(type_ref: &Value) -> Value {
724 let kind = type_ref.get("kind").and_then(Value::as_str).unwrap_or("");
725 match kind {
726 "NON_NULL" => Self::convert_type_ref(&type_ref["ofType"]),
727 "LIST" => json!({
728 "type": "array",
729 "items": Self::convert_type_ref(&type_ref["ofType"]),
730 }),
731 "OBJECT" | "INTERFACE" | "INPUT_OBJECT" | "ENUM" => {
732 let name = type_ref.get("name").and_then(Value::as_str).unwrap_or("");
733 json!({ "$ref": format!("#/definitions/{name}") })
734 }
735 "SCALAR" => {
736 let name = type_ref.get("name").and_then(Value::as_str).unwrap_or("");
737 Self::scalar_to_json_schema(name)
738 }
739 _ => json!({}),
740 }
741 }
742
743 fn scalar_to_json_schema(name: &str) -> Value {
745 match name {
746 "String" | "ID" => json!({ "type": "string" }),
747 "Int" => json!({ "type": "integer" }),
748 "Float" => json!({ "type": "number" }),
749 "Boolean" => json!({ "type": "boolean" }),
750 s if s.to_ascii_lowercase().contains("datetime")
751 || s.to_ascii_lowercase().contains("date") =>
752 {
753 json!({ "type": "string", "format": "date-time" })
754 }
755 _ => json!({}),
756 }
757 }
758
759 #[must_use]
773 pub fn is_valid_schema(schema: &Value) -> bool {
774 if !schema.is_object() {
775 return false;
776 }
777 schema.get("type").is_some() || schema.get("properties").is_some()
778 }
779}
780
781#[cfg(test)]
782#[allow(
783 clippy::unwrap_used,
784 clippy::indexing_slicing,
785 clippy::approx_constant,
786 clippy::significant_drop_tightening,
787 clippy::float_cmp,
788 clippy::unnecessary_map_or
789)]
790mod tests {
791 use super::*;
792 use serde_json::json;
793
794 fn svc() -> SchemaDiscoveryService {
795 SchemaDiscoveryService::new(SchemaDiscoveryConfig::default(), None)
796 }
797
798 #[test]
801 fn test_infer_object_schema() {
802 let s = svc();
803 let schema = s.infer_from_example(&json!({"name": "Alice", "age": 30, "active": true}));
804 assert_eq!(schema["type"].as_str().unwrap(), "object");
805 assert_eq!(
806 schema["properties"]["name"]["type"].as_str().unwrap(),
807 "string"
808 );
809 assert_eq!(
810 schema["properties"]["age"]["type"].as_str().unwrap(),
811 "integer"
812 );
813 assert_eq!(
814 schema["properties"]["active"]["type"].as_str().unwrap(),
815 "boolean"
816 );
817 }
818
819 #[test]
820 fn test_infer_array_schema() {
821 let s = svc();
822 let schema = s.infer_from_example(&json!([{"id": 1}, {"id": 2}]));
823 assert_eq!(schema["type"].as_str().unwrap(), "array");
824 assert_eq!(schema["items"]["type"].as_str().unwrap(), "object");
825 }
826
827 #[test]
828 fn test_infer_string_schema() {
829 let s = svc();
830 let schema = s.infer_from_example(&json!("hello"));
831 assert_eq!(schema["type"].as_str().unwrap(), "string");
832 }
833
834 #[test]
835 fn test_infer_number_float() {
836 let s = svc();
837 let schema = s.infer_from_example(&json!(3.14));
838 assert_eq!(schema["type"].as_str().unwrap(), "number");
839 }
840
841 #[test]
842 fn test_infer_null() {
843 let s = svc();
844 let schema = s.infer_from_example(&Value::Null);
845 assert_eq!(schema["type"].as_str().unwrap(), "null");
846 }
847
848 #[test]
851 fn test_valid_schema_with_type() {
852 assert!(SchemaDiscoveryService::is_valid_schema(
853 &json!({"type": "object"})
854 ));
855 }
856
857 #[test]
858 fn test_valid_schema_with_properties() {
859 assert!(SchemaDiscoveryService::is_valid_schema(
860 &json!({"properties": {"x": {"type": "string"}}})
861 ));
862 }
863
864 #[test]
865 fn test_invalid_schema_string() {
866 assert!(!SchemaDiscoveryService::is_valid_schema(&json!(
867 "not schema"
868 )));
869 }
870
871 #[test]
872 fn test_invalid_schema_empty_object() {
873 assert!(!SchemaDiscoveryService::is_valid_schema(&json!({})));
875 }
876
877 #[test]
880 fn test_evolution_added_field() {
881 let s = svc();
882 let old = json!({"type": "object", "properties": {"name": {"type": "string"}}});
883 let new = json!({"type": "object", "properties": {"name": {"type": "string"}, "email": {"type": "string"}}});
884 let diff = s.detect_evolution(&old, &new);
885 assert!(diff.added_fields.contains(&"email".to_string()));
886 assert!(diff.removed_fields.is_empty());
887 assert!(diff.is_compatible);
888 }
889
890 #[test]
891 fn test_evolution_removed_field() {
892 let s = svc();
893 let old = json!({"type": "object", "properties": {"name": {"type": "string"}, "age": {"type": "integer"}}});
894 let new = json!({"type": "object", "properties": {"name": {"type": "string"}}});
895 let diff = s.detect_evolution(&old, &new);
896 assert!(diff.removed_fields.contains(&"age".to_string()));
897 assert!(!diff.is_compatible);
898 }
899
900 #[test]
901 fn test_evolution_type_change() {
902 let s = svc();
903 let old = json!({"type": "object", "properties": {"id": {"type": "string"}}});
904 let new = json!({"type": "object", "properties": {"id": {"type": "integer"}}});
905 let diff = s.detect_evolution(&old, &new);
906 assert_eq!(diff.type_changes.len(), 1);
907 assert_eq!(
908 diff.type_changes[0],
909 (
910 "id".to_string(),
911 "string".to_string(),
912 "integer".to_string()
913 )
914 );
915 assert!(!diff.is_compatible);
916 }
917
918 #[test]
919 fn test_evolution_no_change() {
920 let s = svc();
921 let schema = json!({"type": "object", "properties": {"x": {"type": "string"}}});
922 let diff = s.detect_evolution(&schema, &schema);
923 assert!(diff.added_fields.is_empty());
924 assert!(diff.removed_fields.is_empty());
925 assert!(diff.is_compatible);
926 }
927
928 #[test]
931 fn test_correction_overrides_inferred_type() {
932 let s = svc();
933 s.add_correction("id".to_string(), json!({"type": "integer"}));
934
935 let schema = json!({
936 "type": "object",
937 "properties": {
938 "id": {"type": "string"},
939 "name": {"type": "string"}
940 }
941 });
942 let corrected = s.apply_corrections(schema);
943 assert_eq!(
944 corrected["properties"]["id"]["type"].as_str().unwrap(),
945 "integer"
946 );
947 assert_eq!(
948 corrected["properties"]["name"]["type"].as_str().unwrap(),
949 "string"
950 );
951 }
952
953 #[test]
956 fn test_cache_and_retrieve() {
957 let s = svc();
958 let schema = json!({"type": "object", "properties": {"title": {"type": "string"}}});
959 s.cache_schema(
960 "example.com".to_string(),
961 schema.clone(),
962 SchemaSource::UserCorrection,
963 1.0,
964 );
965
966 let cache = s.cache.read();
967 let entry = cache.get("example.com").unwrap();
968 assert_eq!(entry.schema, schema);
969 assert_eq!(entry.source, SchemaSource::UserCorrection);
970 }
971
972 #[tokio::test]
975 async fn test_infer_from_html_no_provider() {
976 let s = svc();
977 let result = s
978 .infer_from_html("<html><body>test</body></html>", None)
979 .await;
980 assert!(result.is_err());
981 assert!(result.unwrap_err().to_string().contains("No AI provider"));
982 }
983
984 #[tokio::test]
987 async fn test_infer_from_html_cache_hit() {
988 let s = svc();
989 let schema = json!({"type": "object"});
990 s.cache_schema(
991 "example.com".to_string(),
992 schema.clone(),
993 SchemaSource::Cached,
994 0.9,
995 );
996
997 let result = s
998 .infer_from_html("<html/>", Some("example.com"))
999 .await
1000 .unwrap();
1001 assert_eq!(result.source, SchemaSource::Cached);
1002 assert_eq!(result.schema, schema);
1003 }
1004
1005 struct MockGraphQlService(Value);
1009
1010 #[async_trait::async_trait]
1011 impl ScrapingService for MockGraphQlService {
1012 fn name(&self) -> &'static str {
1013 "mock_graphql"
1014 }
1015 async fn execute(
1016 &self,
1017 _input: crate::ports::ServiceInput,
1018 ) -> Result<crate::ports::ServiceOutput> {
1019 Ok(crate::ports::ServiceOutput {
1020 data: serde_json::to_string(&self.0).unwrap_or_default(),
1021 metadata: Value::default(),
1022 })
1023 }
1024 }
1025
1026 impl SchemaDiscoveryService {
1027 fn with_mock_graphql(config: SchemaDiscoveryConfig, response: Value) -> Self {
1032 let svc = Arc::new(MockGraphQlService(response));
1033 Self::new(config, None).with_graphql_service(svc)
1034 }
1035 }
1036
1037 fn make_introspection_response_simple() -> Value {
1038 json!({
1039 "__schema": {
1040 "queryType": { "name": "Query" },
1041 "mutationType": null,
1042 "subscriptionType": null,
1043 "types": [
1044 {
1045 "kind": "OBJECT",
1046 "name": "Query",
1047 "fields": [
1048 {
1049 "name": "user",
1050 "type": { "kind": "OBJECT", "name": "User", "ofType": null }
1051 }
1052 ],
1053 "inputFields": null,
1054 "interfaces": [],
1055 "enumValues": null,
1056 "possibleTypes": null
1057 },
1058 {
1059 "kind": "OBJECT",
1060 "name": "User",
1061 "fields": [
1062 {
1063 "name": "id",
1064 "type": { "kind": "NON_NULL", "name": null, "ofType": { "kind": "SCALAR", "name": "ID", "ofType": null } }
1065 },
1066 {
1067 "name": "name",
1068 "type": { "kind": "SCALAR", "name": "String", "ofType": null }
1069 }
1070 ],
1071 "inputFields": null,
1072 "interfaces": [],
1073 "enumValues": null,
1074 "possibleTypes": null
1075 },
1076 {
1077 "kind": "SCALAR",
1078 "name": "String",
1079 "fields": null,
1080 "inputFields": null,
1081 "interfaces": null,
1082 "enumValues": null,
1083 "possibleTypes": null
1084 },
1085 {
1086 "kind": "SCALAR",
1087 "name": "ID",
1088 "fields": null,
1089 "inputFields": null,
1090 "interfaces": null,
1091 "enumValues": null,
1092 "possibleTypes": null
1093 }
1094 ],
1095 "directives": []
1096 }
1097 })
1098 }
1099
1100 #[tokio::test]
1101 async fn introspection_no_graphql_service() {
1102 let s = svc(); let result = s
1104 .infer_from_graphql("https://api.example.com/graphql", None)
1105 .await;
1106 assert!(result.is_err());
1107 assert!(
1108 result
1109 .unwrap_err()
1110 .to_string()
1111 .contains("No GraphQL service")
1112 );
1113 }
1114
1115 #[test]
1116 fn graphql_type_to_json_schema_scalar() {
1117 assert_eq!(
1118 SchemaDiscoveryService::scalar_to_json_schema("String"),
1119 json!({"type": "string"})
1120 );
1121 assert_eq!(
1122 SchemaDiscoveryService::scalar_to_json_schema("Int"),
1123 json!({"type": "integer"})
1124 );
1125 assert_eq!(
1126 SchemaDiscoveryService::scalar_to_json_schema("Float"),
1127 json!({"type": "number"})
1128 );
1129 assert_eq!(
1130 SchemaDiscoveryService::scalar_to_json_schema("Boolean"),
1131 json!({"type": "boolean"})
1132 );
1133 assert_eq!(
1134 SchemaDiscoveryService::scalar_to_json_schema("ID"),
1135 json!({"type": "string"})
1136 );
1137 assert_eq!(
1138 SchemaDiscoveryService::scalar_to_json_schema("DateTime"),
1139 json!({"type": "string", "format": "date-time"})
1140 );
1141 assert_eq!(
1143 SchemaDiscoveryService::scalar_to_json_schema("JSON"),
1144 json!({})
1145 );
1146 }
1147
1148 #[test]
1149 fn graphql_type_to_json_schema_object() {
1150 let gql_type = json!({
1151 "kind": "OBJECT",
1152 "name": "User",
1153 "fields": [
1154 {
1155 "name": "id",
1156 "type": { "kind": "SCALAR", "name": "ID", "ofType": null }
1157 },
1158 {
1159 "name": "email",
1160 "type": { "kind": "SCALAR", "name": "String", "ofType": null }
1161 }
1162 ]
1163 });
1164 let schema = SchemaDiscoveryService::convert_type_to_definition(&gql_type);
1165 assert_eq!(schema["type"].as_str().unwrap(), "object");
1166 assert_eq!(
1167 schema["properties"]["id"]["type"].as_str().unwrap(),
1168 "string"
1169 );
1170 assert_eq!(
1171 schema["properties"]["email"]["type"].as_str().unwrap(),
1172 "string"
1173 );
1174 assert!(
1176 schema.get("required").is_none()
1177 || schema["required"].as_array().is_none_or(Vec::is_empty)
1178 );
1179 }
1180
1181 #[test]
1182 fn graphql_non_null_adds_to_required() {
1183 let gql_type = json!({
1184 "kind": "OBJECT",
1185 "name": "Product",
1186 "fields": [
1187 {
1188 "name": "sku",
1189 "type": {
1190 "kind": "NON_NULL",
1191 "name": null,
1192 "ofType": { "kind": "SCALAR", "name": "String", "ofType": null }
1193 }
1194 },
1195 {
1196 "name": "description",
1197 "type": { "kind": "SCALAR", "name": "String", "ofType": null }
1198 }
1199 ]
1200 });
1201 let schema = SchemaDiscoveryService::convert_type_to_definition(&gql_type);
1202 let required = schema["required"].as_array().unwrap();
1203 assert!(required.contains(&json!("sku")));
1204 assert!(!required.contains(&json!("description")));
1205 }
1206
1207 #[tokio::test]
1208 async fn introspection_result_cached() {
1209 let response = make_introspection_response_simple();
1210 let s =
1211 SchemaDiscoveryService::with_mock_graphql(SchemaDiscoveryConfig::default(), response);
1212
1213 let first = s
1215 .infer_from_graphql("https://api.example.com/graphql", None)
1216 .await
1217 .unwrap();
1218 assert_eq!(first.source, SchemaSource::AiInference);
1219 assert_eq!(first.confidence, 1.0);
1220
1221 let second = s
1223 .infer_from_graphql("https://api.example.com/graphql", None)
1224 .await
1225 .unwrap();
1226 assert_eq!(second.source, SchemaSource::Cached);
1227 assert_eq!(second.schema["queryType"].as_str().unwrap(), "Query");
1228 }
1229}