Skip to main content

mockforge_intelligence/intelligent_behavior/
openapi_generator.rs

1//! OpenAPI specification generator from recorded traffic
2//!
3//! This module analyzes recorded API traffic and generates OpenAPI 3.0 specifications
4//! using pattern detection and LLM inference.
5
6use super::config::BehaviorModelConfig;
7use super::llm_client::LlmClient;
8use super::types::LlmGenerationRequest;
9use chrono::{DateTime, Utc};
10use mockforge_foundation::Result;
11use mockforge_openapi::spec::OpenApiSpec;
12use openapiv3::*;
13use serde::{Deserialize, Serialize};
14use serde_json::{json, Value};
15use std::collections::HashMap;
16
17// `HttpExchange` is re-exported from `mockforge_foundation::intelligent_behavior`.
18pub use mockforge_foundation::intelligent_behavior::HttpExchange;
19
20/// Configuration for OpenAPI spec generation
21#[derive(Debug, Clone, Serialize, Deserialize)]
22pub struct OpenApiGenerationConfig {
23    /// Minimum confidence score for including inferred paths (0.0 to 1.0)
24    #[serde(default = "default_min_confidence")]
25    pub min_confidence: f64,
26
27    /// Behavior model config for LLM inference
28    pub behavior_model: Option<BehaviorModelConfig>,
29}
30
31fn default_min_confidence() -> f64 {
32    0.7
33}
34
35impl Default for OpenApiGenerationConfig {
36    fn default() -> Self {
37        Self {
38            min_confidence: default_min_confidence(),
39            behavior_model: None,
40        }
41    }
42}
43
44/// Confidence score for an inferred OpenAPI element
45#[derive(Debug, Clone, Serialize, Deserialize)]
46pub struct ConfidenceScore {
47    /// Confidence value (0.0 to 1.0)
48    pub value: f64,
49    /// Reason for the confidence score
50    pub reason: String,
51}
52
53/// Metadata about the generated OpenAPI spec
54#[derive(Debug, Clone, Serialize, Deserialize)]
55pub struct OpenApiGenerationMetadata {
56    /// Number of requests analyzed
57    pub requests_analyzed: usize,
58    /// Number of paths inferred
59    pub paths_inferred: usize,
60    /// Confidence scores per path
61    pub path_confidence: HashMap<String, ConfidenceScore>,
62    /// Timestamp of generation
63    pub generated_at: DateTime<Utc>,
64    /// Generation duration in milliseconds
65    pub duration_ms: u64,
66}
67
68/// Result of OpenAPI spec generation
69#[derive(Debug, Clone)]
70pub struct OpenApiGenerationResult {
71    /// Generated OpenAPI specification
72    pub spec: OpenApiSpec,
73    /// Generation metadata
74    pub metadata: OpenApiGenerationMetadata,
75}
76
77/// OpenAPI specification generator from recorded traffic
78pub struct OpenApiSpecGenerator {
79    /// LLM client for AI-assisted generation
80    llm_client: Option<LlmClient>,
81    /// Configuration
82    #[allow(dead_code)]
83    config: OpenApiGenerationConfig,
84}
85
86impl OpenApiSpecGenerator {
87    /// Create a new OpenAPI spec generator
88    pub fn new(config: OpenApiGenerationConfig) -> Self {
89        let llm_client = config.behavior_model.as_ref().map(|bm| LlmClient::new(bm.clone()));
90
91        Self { llm_client, config }
92    }
93
94    /// Generate OpenAPI spec from HTTP exchanges
95    ///
96    /// This method:
97    /// 1. Groups requests by path patterns (normalize paths with parameters)
98    /// 2. Analyzes request/response schemas using JSON schema inference
99    /// 3. Uses LLM to infer OpenAPI spec structure from patterns
100    /// 4. Generates paths, operations, schemas, and examples
101    pub async fn generate_from_exchanges(
102        &self,
103        exchanges: Vec<HttpExchange>,
104    ) -> Result<OpenApiGenerationResult> {
105        let start_time = Utc::now();
106
107        if exchanges.is_empty() {
108            return Err(mockforge_foundation::Error::internal(
109                "No HTTP exchanges provided for OpenAPI generation",
110            ));
111        }
112
113        tracing::info!("Analyzing {} HTTP exchanges for OpenAPI generation", exchanges.len());
114
115        // 1. Group requests by path patterns
116        let path_groups = self.group_by_path_pattern(&exchanges);
117
118        // 2. Infer path parameters
119        let normalized_paths = self.infer_path_parameters(&path_groups);
120
121        // 3. Extract schemas from request/response bodies
122        let schemas = self.infer_schemas(&exchanges).await?;
123
124        // 4. Generate OpenAPI spec structure
125        let spec = if let Some(ref llm_client) = self.llm_client {
126            // Use LLM for AI-assisted generation
127            self.generate_with_llm(&normalized_paths, &schemas, &exchanges, llm_client)
128                .await?
129        } else {
130            // Fallback to pattern-based generation
131            self.generate_pattern_based(&normalized_paths, &schemas, &exchanges).await?
132        };
133
134        let duration_ms = (Utc::now() - start_time).num_milliseconds() as u64;
135
136        // 5. Calculate confidence scores
137        let path_confidence = self.calculate_confidence_scores(&normalized_paths, &exchanges);
138
139        let metadata = OpenApiGenerationMetadata {
140            requests_analyzed: exchanges.len(),
141            paths_inferred: normalized_paths.len(),
142            path_confidence,
143            generated_at: start_time,
144            duration_ms,
145        };
146
147        Ok(OpenApiGenerationResult { spec, metadata })
148    }
149
150    /// Group exchanges by path pattern
151    pub fn group_by_path_pattern<'a>(
152        &self,
153        exchanges: &'a [HttpExchange],
154    ) -> HashMap<String, Vec<&'a HttpExchange>> {
155        let mut groups: HashMap<String, Vec<&HttpExchange>> = HashMap::new();
156
157        for exchange in exchanges {
158            let path = &exchange.path;
159            groups.entry(path.clone()).or_default().push(exchange);
160        }
161
162        groups
163    }
164
165    /// Infer path parameters from path patterns
166    ///
167    /// Detects patterns like `/api/users/123` and `/api/users/456` and normalizes
168    /// them to `/api/users/{id}`.
169    pub fn infer_path_parameters<'a>(
170        &self,
171        path_groups: &HashMap<String, Vec<&'a HttpExchange>>,
172    ) -> HashMap<String, Vec<&'a HttpExchange>> {
173        let mut normalized: HashMap<String, Vec<&HttpExchange>> = HashMap::new();
174
175        // Group paths by their base pattern
176        let _path_segments: Vec<Vec<String>> = path_groups
177            .keys()
178            .map(|path| path.split('/').filter(|s| !s.is_empty()).map(|s| s.to_string()).collect())
179            .collect();
180
181        // Find common patterns
182        for (original_path, exchanges) in path_groups {
183            let segments: Vec<&str> = original_path.split('/').filter(|s| !s.is_empty()).collect();
184
185            // Try to find similar paths
186            let mut normalized_path = original_path.clone();
187            for other_path in path_groups.keys() {
188                if other_path == original_path {
189                    continue;
190                }
191
192                let other_segments: Vec<&str> =
193                    other_path.split('/').filter(|s| !s.is_empty()).collect();
194
195                if segments.len() == other_segments.len() {
196                    // Check if paths differ only in the last segment (likely an ID)
197                    let mut normalized_segments: Vec<String> = Vec::new();
198                    let mut is_parameter = false;
199
200                    for (i, (seg, other_seg)) in
201                        segments.iter().zip(other_segments.iter()).enumerate()
202                    {
203                        if seg == other_seg {
204                            normalized_segments.push(seg.to_string());
205                        } else if i == segments.len() - 1 {
206                            // Last segment differs - likely a parameter
207                            normalized_segments
208                                .push(format!("{{{}}}", self.infer_parameter_name(seg)));
209                            is_parameter = true;
210                        } else {
211                            // Different in middle - not a match
212                            break;
213                        }
214                    }
215
216                    if is_parameter {
217                        normalized_path = format!("/{}", normalized_segments.join("/"));
218                        break;
219                    }
220                }
221            }
222
223            normalized.entry(normalized_path).or_default().extend(exchanges);
224        }
225
226        normalized
227    }
228
229    /// Infer parameter name from path segment
230    fn infer_parameter_name(&self, segment: &str) -> String {
231        // Try to detect common patterns
232        if segment.chars().all(|c| c.is_ascii_digit()) {
233            "id".to_string()
234        } else if segment.starts_with("user") || segment.contains("user") {
235            "userId".to_string()
236        } else if segment.starts_with("order") || segment.contains("order") {
237            "orderId".to_string()
238        } else {
239            // Default: use singular form or generic name
240            "id".to_string()
241        }
242    }
243
244    /// Infer JSON schemas from request/response bodies
245    pub async fn infer_schemas(
246        &self,
247        exchanges: &[HttpExchange],
248    ) -> Result<HashMap<String, Value>> {
249        let mut schemas: HashMap<String, Value> = HashMap::new();
250
251        for exchange in exchanges {
252            // Parse request body if present
253            if let Some(ref body) = exchange.body {
254                if exchange.body_encoding == "utf8" {
255                    if let Ok(json_value) = serde_json::from_str::<Value>(body) {
256                        let schema = self.json_to_schema(&json_value);
257                        schemas.insert("RequestBody".to_string(), schema);
258                    }
259                }
260            }
261
262            // Parse response body if present
263            if let Some(ref body) = exchange.response_body {
264                if exchange.response_body_encoding.as_deref() == Some("utf8") {
265                    if let Ok(json_value) = serde_json::from_str::<Value>(body) {
266                        let schema = self.json_to_schema(&json_value);
267                        schemas.insert("ResponseBody".to_string(), schema);
268                    }
269                }
270            }
271        }
272
273        Ok(schemas)
274    }
275
276    /// Convert JSON value to JSON Schema
277    #[allow(clippy::only_used_in_recursion)]
278    pub fn json_to_schema(&self, value: &Value) -> Value {
279        match value {
280            Value::Null => json!({"type": "null"}),
281            Value::Bool(_) => json!({"type": "boolean"}),
282            Value::Number(n) => {
283                if n.is_i64() {
284                    json!({"type": "integer"})
285                } else {
286                    json!({"type": "number"})
287                }
288            }
289            Value::String(_) => json!({"type": "string"}),
290            Value::Array(arr) => {
291                if let Some(first) = arr.first() {
292                    json!({
293                        "type": "array",
294                        "items": self.json_to_schema(first)
295                    })
296                } else {
297                    json!({"type": "array"})
298                }
299            }
300            Value::Object(obj) => {
301                let mut properties = serde_json::Map::new();
302                let mut required = Vec::new();
303
304                for (key, val) in obj {
305                    properties.insert(key.clone(), self.json_to_schema(val));
306                    // Only mark non-null fields as required.
307                    // Null values indicate the field is optional.
308                    if !val.is_null() {
309                        required.push(key.clone());
310                    }
311                }
312
313                if required.is_empty() {
314                    json!({
315                        "type": "object",
316                        "properties": properties
317                    })
318                } else {
319                    json!({
320                        "type": "object",
321                        "properties": properties,
322                        "required": required
323                    })
324                }
325            }
326        }
327    }
328
329    /// Generate OpenAPI spec using LLM inference
330    async fn generate_with_llm(
331        &self,
332        normalized_paths: &HashMap<String, Vec<&HttpExchange>>,
333        schemas: &HashMap<String, Value>,
334        exchanges: &[HttpExchange],
335        llm_client: &LlmClient,
336    ) -> Result<OpenApiSpec> {
337        // Build prompt for LLM
338        let prompt = self.build_llm_prompt(normalized_paths, schemas, exchanges);
339
340        let request = LlmGenerationRequest {
341            system_prompt: "You are an expert at generating OpenAPI 3.0 specifications from API traffic patterns. Generate valid, well-structured OpenAPI specs.".to_string(),
342            user_prompt: prompt,
343            temperature: 0.3, // Lower temperature for more consistent output
344            max_tokens: 4000,
345            schema: None, // No schema constraint for OpenAPI generation
346        };
347
348        // Generate spec using LLM
349        let response = llm_client.generate(&request).await?;
350
351        // Parse response as OpenAPI spec
352        // The LLM should return a JSON object that can be converted to OpenAPI
353        let spec = OpenApiSpec::from_json(response)?;
354
355        Ok(spec)
356    }
357
358    /// Build LLM prompt from traffic patterns
359    fn build_llm_prompt(
360        &self,
361        normalized_paths: &HashMap<String, Vec<&HttpExchange>>,
362        schemas: &HashMap<String, Value>,
363        exchanges: &[HttpExchange],
364    ) -> String {
365        let mut prompt = String::from(
366            "Generate an OpenAPI 3.0 specification from the following API traffic patterns:\n\n",
367        );
368
369        // Add path patterns
370        prompt.push_str("## Paths and Methods:\n");
371        for (path, path_exchanges) in normalized_paths {
372            let methods: Vec<String> = path_exchanges
373                .iter()
374                .map(|e| e.method.clone())
375                .collect::<std::collections::HashSet<_>>()
376                .into_iter()
377                .collect();
378            prompt.push_str(&format!("- {}: {}\n", path, methods.join(", ")));
379        }
380
381        // Add sample request/response examples
382        prompt.push_str("\n## Sample Exchanges:\n");
383        for (i, exchange) in exchanges.iter().take(10).enumerate() {
384            prompt.push_str(&format!("\n### Exchange {}\n", i + 1));
385            prompt.push_str(&format!("Method: {}\n", exchange.method));
386            prompt.push_str(&format!("Path: {}\n", exchange.path));
387            if let Some(ref body) = exchange.body {
388                if exchange.body_encoding == "utf8" {
389                    prompt.push_str(&format!("Request Body: {}\n", body));
390                }
391            }
392            if let Some(status) = exchange.status_code {
393                prompt.push_str(&format!("Status: {}\n", status));
394                if let Some(ref body) = exchange.response_body {
395                    if exchange.response_body_encoding.as_deref() == Some("utf8") {
396                        prompt.push_str(&format!("Response Body: {}\n", body));
397                    }
398                }
399            }
400        }
401
402        // Add inferred schemas
403        if !schemas.is_empty() {
404            prompt.push_str("\n## Inferred Schemas:\n");
405            prompt.push_str(&serde_json::to_string_pretty(schemas).unwrap_or_default());
406        }
407
408        prompt.push_str("\n\nGenerate a complete OpenAPI 3.0 specification in JSON format with:");
409        prompt.push_str("\n- info section with title and version");
410        prompt.push_str("\n- paths section with all detected endpoints");
411        prompt.push_str("\n- components/schemas section with request/response schemas");
412        prompt.push_str("\n- proper HTTP methods, status codes, and content types");
413
414        prompt
415    }
416
417    /// Generate OpenAPI spec using pattern-based inference (fallback)
418    async fn generate_pattern_based(
419        &self,
420        normalized_paths: &HashMap<String, Vec<&HttpExchange>>,
421        schemas: &HashMap<String, Value>,
422        _exchanges: &[HttpExchange],
423    ) -> Result<OpenApiSpec> {
424        // Create a basic OpenAPI 3.0 spec structure
425        let mut spec = OpenAPI {
426            openapi: "3.0.3".to_string(),
427            info: Info {
428                title: "Generated API".to_string(),
429                version: "1.0.0".to_string(),
430                description: Some(
431                    "OpenAPI specification generated from recorded traffic".to_string(),
432                ),
433                ..Default::default()
434            },
435            paths: Paths {
436                paths: indexmap::IndexMap::new(),
437                ..Default::default()
438            },
439            components: Some(Components {
440                schemas: indexmap::IndexMap::new(),
441                ..Default::default()
442            }),
443            ..Default::default()
444        };
445
446        // Add paths
447        for (path, path_exchanges) in normalized_paths {
448            let mut path_item = PathItem::default();
449
450            // Group by method
451            let mut method_groups: HashMap<String, Vec<&HttpExchange>> = HashMap::new();
452            for exchange in path_exchanges {
453                method_groups.entry(exchange.method.clone()).or_default().push(exchange);
454            }
455
456            // Add operations for each method
457            for (method, method_exchanges) in method_groups {
458                let operation = self.create_operation_from_exchanges(&method_exchanges)?;
459
460                match method.as_str() {
461                    "GET" => path_item.get = Some(operation),
462                    "POST" => path_item.post = Some(operation),
463                    "PUT" => path_item.put = Some(operation),
464                    "DELETE" => path_item.delete = Some(operation),
465                    "PATCH" => path_item.patch = Some(operation),
466                    "HEAD" => path_item.head = Some(operation),
467                    "OPTIONS" => path_item.options = Some(operation),
468                    "TRACE" => path_item.trace = Some(operation),
469                    other => {
470                        tracing::debug!(method = other, path = %path, "Skipping unsupported HTTP method");
471                    }
472                }
473            }
474
475            spec.paths.paths.insert(path.clone(), ReferenceOr::Item(path_item));
476        }
477
478        // Add schemas to components
479        if let Some(ref mut components) = spec.components {
480            for (name, schema_value) in schemas {
481                // Convert JSON Schema to OpenAPI Schema
482                // This is a simplified conversion
483                if let Ok(schema) = serde_json::from_value::<Schema>(schema_value.clone()) {
484                    components.schemas.insert(name.clone(), ReferenceOr::Item(schema));
485                }
486            }
487        }
488
489        // Create raw document for serialization
490        let raw_document = serde_json::to_value(&spec)?;
491
492        Ok(OpenApiSpec {
493            spec,
494            file_path: None,
495            raw_document: Some(raw_document),
496        })
497    }
498
499    /// Create OpenAPI operation from exchanges
500    fn create_operation_from_exchanges(&self, exchanges: &[&HttpExchange]) -> Result<Operation> {
501        // Use the first exchange as a template
502        let first = exchanges
503            .first()
504            .ok_or_else(|| mockforge_foundation::Error::internal("No exchanges provided"))?;
505
506        let mut operation = Operation {
507            summary: Some(format!("{} {}", first.method, first.path)),
508            ..Default::default()
509        };
510
511        // Add responses
512        let mut responses = Responses::default();
513        for exchange in exchanges {
514            if let Some(status_code) = exchange.status_code {
515                let status = StatusCode::Code(status_code as u16);
516                let mut response_obj = Response::default();
517
518                // Add content if response has body
519                if let Some(ref body) = exchange.response_body {
520                    if exchange.response_body_encoding.as_deref() == Some("utf8") {
521                        if let Ok(json_value) = serde_json::from_str::<Value>(body) {
522                            let mut content = indexmap::IndexMap::new();
523                            let mut media_type = MediaType::default();
524
525                            // Convert JSON Schema to OpenAPI Schema
526                            // For now, create a basic object schema
527                            // A full conversion would require parsing the JSON Schema structure
528                            let schema = match json_value {
529                                Value::Object(_) => Schema {
530                                    schema_data: SchemaData::default(),
531                                    schema_kind: SchemaKind::Type(Type::Object(ObjectType {
532                                        properties: indexmap::IndexMap::new(),
533                                        required: vec![],
534                                        additional_properties: None,
535                                        ..Default::default()
536                                    })),
537                                },
538                                Value::Array(_) => Schema {
539                                    schema_data: SchemaData::default(),
540                                    schema_kind: SchemaKind::Type(Type::Array(ArrayType {
541                                        items: None,
542                                        min_items: None,
543                                        max_items: None,
544                                        unique_items: false,
545                                    })),
546                                },
547                                Value::String(_) => Schema {
548                                    schema_data: SchemaData::default(),
549                                    schema_kind: SchemaKind::Type(Type::String(StringType {
550                                        enumeration: vec![],
551                                        min_length: None,
552                                        max_length: None,
553                                        pattern: None,
554                                        format: VariantOrUnknownOrEmpty::Empty,
555                                    })),
556                                },
557                                Value::Number(n) => {
558                                    if n.is_f64() {
559                                        Schema {
560                                            schema_data: SchemaData::default(),
561                                            schema_kind: SchemaKind::Type(Type::Number(
562                                                NumberType {
563                                                    minimum: None,
564                                                    maximum: None,
565                                                    exclusive_minimum: false,
566                                                    exclusive_maximum: false,
567                                                    multiple_of: None,
568                                                    enumeration: vec![],
569                                                    format: VariantOrUnknownOrEmpty::Empty,
570                                                },
571                                            )),
572                                        }
573                                    } else {
574                                        Schema {
575                                            schema_data: SchemaData::default(),
576                                            schema_kind: SchemaKind::Type(Type::Integer(
577                                                IntegerType {
578                                                    minimum: None,
579                                                    maximum: None,
580                                                    exclusive_minimum: false,
581                                                    exclusive_maximum: false,
582                                                    multiple_of: None,
583                                                    enumeration: vec![],
584                                                    format: VariantOrUnknownOrEmpty::Item(
585                                                        IntegerFormat::Int64,
586                                                    ),
587                                                },
588                                            )),
589                                        }
590                                    }
591                                }
592                                Value::Bool(_) => Schema {
593                                    schema_data: SchemaData::default(),
594                                    schema_kind: SchemaKind::Type(Type::Boolean(BooleanType {
595                                        enumeration: vec![],
596                                    })),
597                                },
598                                Value::Null => Schema {
599                                    schema_data: SchemaData::default(),
600                                    schema_kind: SchemaKind::Type(Type::Object(ObjectType {
601                                        properties: indexmap::IndexMap::new(),
602                                        required: vec![],
603                                        additional_properties: None,
604                                        ..Default::default()
605                                    })),
606                                },
607                            };
608
609                            media_type.schema = Some(ReferenceOr::Item(schema));
610                            content.insert("application/json".to_string(), media_type);
611                            response_obj.content = content;
612                        }
613                    }
614                }
615
616                responses.responses.insert(status, ReferenceOr::Item(response_obj));
617            }
618        }
619
620        operation.responses = responses;
621
622        Ok(operation)
623    }
624
625    /// Calculate confidence scores for inferred paths
626    pub fn calculate_confidence_scores(
627        &self,
628        normalized_paths: &HashMap<String, Vec<&HttpExchange>>,
629        exchanges: &[HttpExchange],
630    ) -> HashMap<String, ConfidenceScore> {
631        let mut scores = HashMap::new();
632
633        for (path, path_exchanges) in normalized_paths {
634            // Confidence based on:
635            // 1. Number of examples (more = higher confidence)
636            // 2. Consistency of status codes
637            // 3. Presence of request/response bodies
638
639            let example_count = path_exchanges.len();
640            let example_ratio = (example_count as f64) / (exchanges.len() as f64);
641
642            // Check status code consistency
643            let status_codes: Vec<i32> =
644                path_exchanges.iter().filter_map(|e| e.status_code).collect();
645            let unique_statuses =
646                status_codes.iter().collect::<std::collections::HashSet<_>>().len();
647            let consistency = if unique_statuses <= 2 { 1.0 } else { 0.7 };
648
649            // Check for request/response bodies
650            let has_bodies =
651                path_exchanges.iter().any(|e| e.body.is_some() || e.response_body.is_some());
652            let body_score = if has_bodies { 1.0 } else { 0.5 };
653
654            // Calculate overall confidence
655            let confidence = (example_ratio * 0.4 + consistency * 0.3 + body_score * 0.3).min(1.0);
656
657            let reason = format!(
658                "Based on {} examples ({}% of total), {} unique status codes, {}",
659                example_count,
660                (example_ratio * 100.0) as u32,
661                unique_statuses,
662                if has_bodies {
663                    "with request/response bodies"
664                } else {
665                    "without bodies"
666                }
667            );
668
669            scores.insert(
670                path.clone(),
671                ConfidenceScore {
672                    value: confidence,
673                    reason,
674                },
675            );
676        }
677
678        scores
679    }
680}
681
682#[cfg(test)]
683mod tests {
684    use super::*;
685
686    #[test]
687    fn test_infer_parameter_name() {
688        let generator = OpenApiSpecGenerator::new(OpenApiGenerationConfig::default());
689        assert_eq!(generator.infer_parameter_name("123"), "id");
690        assert_eq!(generator.infer_parameter_name("user123"), "userId");
691    }
692
693    #[test]
694    fn test_json_to_schema() {
695        let generator = OpenApiSpecGenerator::new(OpenApiGenerationConfig::default());
696        let json = json!({"name": "test", "age": 25});
697        let schema = generator.json_to_schema(&json);
698        assert!(schema.get("type").is_some());
699        assert_eq!(schema["type"], "object");
700    }
701}