1use super::types::{ContractDiffConfig, Mismatch, MismatchSeverity, MismatchType};
8use crate::intelligent_behavior::config::BehaviorModelConfig;
9use crate::intelligent_behavior::llm_client::LlmClient;
10use crate::intelligent_behavior::types::LlmGenerationRequest;
11use mockforge_foundation::Result;
12use mockforge_openapi::OpenApiSpec;
13pub use mockforge_foundation::contract_diff_types::{SemanticChangeType, SemanticDriftResult};
15use openapiv3;
16use serde_json::Value;
17use std::collections::HashMap;
18
19pub struct SemanticAnalyzer {
21 llm_client: Option<LlmClient>,
23 config: ContractDiffConfig,
25}
26
27impl SemanticAnalyzer {
28 pub fn new(config: ContractDiffConfig) -> Result<Self> {
30 let llm_client = if config.semantic_analysis_enabled {
31 let llm_config = BehaviorModelConfig {
32 llm_provider: config.llm_provider.clone(),
33 model: config.llm_model.clone(),
34 api_key: config.api_key.clone(),
35 api_endpoint: None,
36 temperature: 0.3, max_tokens: 3000,
38 rules: crate::intelligent_behavior::BehaviorRules::default(),
39 };
40
41 Some(LlmClient::new(llm_config))
42 } else {
43 None
44 };
45
46 Ok(Self { llm_client, config })
47 }
48
49 pub async fn analyze_semantic_drift(
55 &self,
56 before_spec: &OpenApiSpec,
57 after_spec: &OpenApiSpec,
58 endpoint_path: &str,
59 method: &str,
60 ) -> Result<Option<SemanticDriftResult>> {
61 if !self.config.semantic_analysis_enabled {
62 return Ok(None);
63 }
64
65 let before_schema = self.extract_endpoint_schema(before_spec, endpoint_path, method);
67 let after_schema = self.extract_endpoint_schema(after_spec, endpoint_path, method);
68
69 if before_schema.is_none() || after_schema.is_none() {
70 return Ok(None);
71 }
72
73 let before = before_schema.unwrap();
74 let after = after_schema.unwrap();
75
76 let rule_based_changes = self.detect_rule_based_changes(&before, &after);
78
79 if let Some(ref llm_client) = self.llm_client {
81 let llm_result = self
82 .analyze_with_llm(llm_client, &before, &after, endpoint_path, method)
83 .await?;
84
85 Ok(Some(self.combine_results(rule_based_changes, llm_result, before, after)))
87 } else {
88 if rule_based_changes.is_empty() {
90 return Ok(None);
91 }
92
93 let change_type = self.determine_change_type(&rule_based_changes);
95 let semantic_confidence = 0.6; let soft_breaking_score = self.calculate_soft_breaking_score(&rule_based_changes);
97
98 Ok(Some(SemanticDriftResult {
99 semantic_confidence,
100 soft_breaking_score,
101 change_type,
102 llm_analysis: serde_json::json!({}),
103 before_semantic_state: before,
104 after_semantic_state: after,
105 semantic_mismatches: rule_based_changes,
106 }))
107 }
108 }
109
110 fn extract_endpoint_schema(
112 &self,
113 spec: &OpenApiSpec,
114 endpoint_path: &str,
115 method: &str,
116 ) -> Option<Value> {
117 spec.spec.paths.paths.get(endpoint_path).and_then(|path_item| {
120 path_item.as_item().and_then(|item| {
121 let operation = match method.to_uppercase().as_str() {
123 "GET" => item.get.as_ref(),
124 "POST" => item.post.as_ref(),
125 "PUT" => item.put.as_ref(),
126 "DELETE" => item.delete.as_ref(),
127 "PATCH" => item.patch.as_ref(),
128 "HEAD" => item.head.as_ref(),
129 "OPTIONS" => item.options.as_ref(),
130 "TRACE" => item.trace.as_ref(),
131 _ => None,
132 }?;
133
134 operation.responses.responses.get(&openapiv3::StatusCode::Code(200)).and_then(
135 |resp| {
136 resp.as_item().and_then(|r| {
137 r.content.get("application/json").and_then(|media| {
138 media
139 .schema
140 .as_ref()
141 .map(|s| serde_json::to_value(s).unwrap_or_default())
142 })
143 })
144 },
145 )
146 })
147 })
148 }
149
150 fn detect_rule_based_changes(&self, before: &Value, after: &Value) -> Vec<Mismatch> {
152 let mut mismatches = Vec::new();
153
154 mismatches.extend(self.detect_description_changes(before, after));
156
157 mismatches.extend(self.detect_enum_narrowing(before, after));
159
160 mismatches.extend(self.detect_nullable_changes(before, after));
162
163 mismatches.extend(self.detect_error_code_changes(before, after));
165
166 mismatches
167 }
168
169 fn detect_description_changes(&self, before: &Value, after: &Value) -> Vec<Mismatch> {
171 let mut mismatches = Vec::new();
172
173 if let (Some(before_desc), Some(after_desc)) = (
175 before.get("description").and_then(|v| v.as_str()),
176 after.get("description").and_then(|v| v.as_str()),
177 ) {
178 if before_desc != after_desc {
179 let is_significant = self.is_description_meaning_change(before_desc, after_desc);
181
182 if is_significant {
183 mismatches.push(Mismatch {
184 mismatch_type: MismatchType::SemanticDescriptionChange,
185 path: "description".to_string(),
186 method: None,
187 expected: Some(before_desc.to_string()),
188 actual: Some(after_desc.to_string()),
189 description: format!(
190 "Description meaning changed: '{}' → '{}'",
191 before_desc, after_desc
192 ),
193 severity: MismatchSeverity::Medium,
194 confidence: 0.7,
195 context: HashMap::new(),
196 });
197 }
198 }
199 }
200
201 mismatches
202 }
203
204 fn is_description_meaning_change(&self, before: &str, after: &str) -> bool {
206 let before_words: Vec<&str> = before.split_whitespace().collect();
208 let after_words: Vec<&str> = after.split_whitespace().collect();
209
210 if before_words.is_empty() || after_words.is_empty() {
211 return true; }
213
214 let common_words: usize = before_words.iter().filter(|w| after_words.contains(w)).count();
215
216 let change_ratio =
217 1.0 - (common_words as f64 / before_words.len().max(after_words.len()) as f64);
218 change_ratio > 0.3
219 }
220
221 fn detect_enum_narrowing(&self, before: &Value, after: &Value) -> Vec<Mismatch> {
223 let mut mismatches = Vec::new();
224
225 if let (Some(before_enum), Some(after_enum)) = (
226 before.get("enum").and_then(|v| v.as_array()),
227 after.get("enum").and_then(|v| v.as_array()),
228 ) {
229 let before_set: std::collections::HashSet<&Value> = before_enum.iter().collect();
230 let after_set: std::collections::HashSet<&Value> = after_enum.iter().collect();
231
232 let removed: Vec<_> = before_set.difference(&after_set).collect();
233
234 if !removed.is_empty() {
235 mismatches.push(Mismatch {
236 mismatch_type: MismatchType::SemanticEnumNarrowing,
237 path: "enum".to_string(),
238 method: None,
239 expected: Some(format!("{:?}", before_enum)),
240 actual: Some(format!("{:?}", after_enum)),
241 description: format!(
242 "Enum values narrowed: {} value(s) removed",
243 removed.len()
244 ),
245 severity: MismatchSeverity::High,
246 confidence: 1.0, context: HashMap::new(),
248 });
249 }
250 }
251
252 mismatches
253 }
254
255 fn detect_nullable_changes(&self, before: &Value, after: &Value) -> Vec<Mismatch> {
257 let mut mismatches = Vec::new();
258
259 let before_nullable = before.get("nullable").and_then(|v| v.as_bool()).unwrap_or(false);
261 let after_nullable = after.get("nullable").and_then(|v| v.as_bool()).unwrap_or(false);
262
263 if before_nullable && !after_nullable {
264 let is_hidden = after.get("oneOf").is_some() || after.get("anyOf").is_some();
266
267 if is_hidden {
268 mismatches.push(Mismatch {
269 mismatch_type: MismatchType::SemanticNullabilityChange,
270 path: "nullable".to_string(),
271 method: None,
272 expected: Some("nullable: true".to_string()),
273 actual: Some("nullable: false (hidden behind oneOf/anyOf)".to_string()),
274 description:
275 "Field became non-nullable but change is hidden behind oneOf/anyOf"
276 .to_string(),
277 severity: MismatchSeverity::High,
278 confidence: 0.8,
279 context: HashMap::new(),
280 });
281 }
282 }
283
284 mismatches
285 }
286
287 fn detect_error_code_changes(&self, before: &Value, after: &Value) -> Vec<Mismatch> {
292 let mut mismatches = Vec::new();
293
294 let before_codes = Self::extract_error_status_codes(before);
295 let after_codes = Self::extract_error_status_codes(after);
296
297 let removed: Vec<&String> =
298 before_codes.iter().filter(|c| !after_codes.contains(*c)).collect();
299
300 if !removed.is_empty() {
301 mismatches.push(Mismatch {
302 mismatch_type: MismatchType::SemanticErrorCodeRemoved,
303 path: "responses".to_string(),
304 method: None,
305 expected: Some(format!("{:?}", before_codes)),
306 actual: Some(format!("{:?}", after_codes)),
307 description: format!(
308 "Error status code(s) removed: {}",
309 removed.iter().map(|c| c.as_str()).collect::<Vec<_>>().join(", ")
310 ),
311 severity: MismatchSeverity::High,
312 confidence: 1.0,
313 context: HashMap::new(),
314 });
315 }
316
317 mismatches
318 }
319
320 fn extract_error_status_codes(schema: &Value) -> Vec<String> {
322 let mut codes = Vec::new();
323 if let Some(responses) = schema.get("responses").and_then(|v| v.as_object()) {
324 for key in responses.keys() {
325 if let Some(first_char) = key.chars().next() {
327 if (first_char == '4' || first_char == '5')
328 && key.len() == 3
329 && key.chars().all(|c| c.is_ascii_digit())
330 {
331 codes.push(key.clone());
332 }
333 }
334 }
335 }
336 codes.sort();
337 codes
338 }
339
340 async fn analyze_with_llm(
342 &self,
343 llm_client: &LlmClient,
344 before: &Value,
345 after: &Value,
346 endpoint_path: &str,
347 method: &str,
348 ) -> Result<Value> {
349 let prompt = self.build_semantic_analysis_prompt(before, after, endpoint_path, method);
350
351 let request = LlmGenerationRequest::new(self.get_system_prompt(), prompt)
352 .with_temperature(0.3)
353 .with_max_tokens(3000);
354
355 let response = llm_client.generate(&request).await?;
356
357 let analysis = response
359 .get("analysis")
360 .and_then(|v| v.as_str())
361 .map(|s| s.to_string())
362 .unwrap_or_else(|| serde_json::to_string(&response).unwrap_or_default());
363
364 let confidence = response.get("confidence").and_then(|v| v.as_f64()).unwrap_or(0.5);
365
366 let soft_breaking_score =
367 response.get("soft_breaking_score").and_then(|v| v.as_f64()).unwrap_or(0.5);
368
369 Ok(serde_json::json!({
370 "analysis": analysis,
371 "confidence": confidence,
372 "soft_breaking_score": soft_breaking_score
373 }))
374 }
375
376 fn build_semantic_analysis_prompt(
378 &self,
379 before: &Value,
380 after: &Value,
381 endpoint_path: &str,
382 method: &str,
383 ) -> String {
384 format!(
385 r#"Analyze the semantic differences between these two API contract schemas for endpoint {} {}.
386
387Before schema:
388{}
389
390After schema:
391{}
392
393Please identify:
3941. Any changes in meaning or semantics (not just structural changes)
3952. Description changes that alter the intended behavior
3963. Enum narrowing or constraint tightening
3974. Nullable changes that might break clients
3985. Error code removals
3996. Any "soft-breaking" changes that won't cause immediate failures but will cause issues
400
401Provide your analysis in JSON format with:
402- semantic_confidence: 0.0-1.0
403- soft_breaking_score: 0.0-1.0
404- change_type: one of the semantic change types
405- reasoning: detailed explanation
406- detected_changes: array of specific changes found"#,
407 method,
408 endpoint_path,
409 serde_json::to_string_pretty(before).unwrap_or_default(),
410 serde_json::to_string_pretty(after).unwrap_or_default()
411 )
412 }
413
414 fn get_system_prompt(&self) -> String {
416 "You are an expert API contract analyst specializing in detecting semantic drift and soft-breaking changes in API contracts. Your analysis helps teams understand when API changes might break clients even if they're not structurally breaking.".to_string()
417 }
418
419 fn combine_results(
421 &self,
422 rule_based: Vec<Mismatch>,
423 llm_result: Value,
424 before: Value,
425 after: Value,
426 ) -> SemanticDriftResult {
427 let semantic_confidence =
428 llm_result.get("semantic_confidence").and_then(|v| v.as_f64()).unwrap_or(0.7);
429
430 let soft_breaking_score =
431 llm_result.get("soft_breaking_score").and_then(|v| v.as_f64()).unwrap_or(0.5);
432
433 let change_type_str = llm_result
434 .get("change_type")
435 .and_then(|v| v.as_str())
436 .unwrap_or("meaning_shift");
437
438 let change_type = match change_type_str {
439 "description_change" => SemanticChangeType::DescriptionChange,
440 "enum_narrowing" => SemanticChangeType::EnumNarrowing,
441 "nullable_change" => SemanticChangeType::NullableChange,
442 "error_code_removed" => SemanticChangeType::ErrorCodeRemoved,
443 "semantic_constraint_change" => SemanticChangeType::SemanticConstraintChange,
444 "soft_breaking_change" => SemanticChangeType::SoftBreakingChange,
445 _ => SemanticChangeType::MeaningShift,
446 };
447
448 let semantic_mismatches = rule_based;
450
451 SemanticDriftResult {
452 semantic_confidence,
453 soft_breaking_score,
454 change_type,
455 llm_analysis: llm_result,
456 before_semantic_state: before,
457 after_semantic_state: after,
458 semantic_mismatches,
459 }
460 }
461
462 fn determine_change_type(&self, mismatches: &[Mismatch]) -> SemanticChangeType {
464 for mismatch in mismatches {
465 match mismatch.mismatch_type {
466 MismatchType::SemanticDescriptionChange => {
467 return SemanticChangeType::DescriptionChange
468 }
469 MismatchType::SemanticEnumNarrowing => return SemanticChangeType::EnumNarrowing,
470 MismatchType::SemanticNullabilityChange => {
471 return SemanticChangeType::NullableChange
472 }
473 MismatchType::SemanticErrorCodeRemoved => {
474 return SemanticChangeType::ErrorCodeRemoved
475 }
476 _ => {}
477 }
478 }
479
480 SemanticChangeType::MeaningShift
481 }
482
483 fn calculate_soft_breaking_score(&self, mismatches: &[Mismatch]) -> f64 {
485 if mismatches.is_empty() {
486 return 0.0;
487 }
488
489 let total_score: f64 = mismatches
491 .iter()
492 .map(|m| {
493 let severity_score = match m.severity {
494 MismatchSeverity::Critical => 1.0,
495 MismatchSeverity::High => 0.8,
496 MismatchSeverity::Medium => 0.6,
497 MismatchSeverity::Low => 0.4,
498 MismatchSeverity::Info => 0.2,
499 };
500 severity_score * m.confidence
501 })
502 .sum();
503
504 (total_score / mismatches.len() as f64).min(1.0)
505 }
506}