mockforge_intelligence/behavioral_cloning/
sequence_learner.rs1use crate::behavioral_cloning::types::BehavioralSequence;
7use async_trait::async_trait;
8use mockforge_core::scenarios::ScenarioDefinition;
9use mockforge_core::Result;
10use std::collections::HashMap;
11
12#[async_trait]
14pub trait TraceQueryProvider: Send + Sync {
15 async fn get_requests_by_trace(
20 &self,
21 min_requests_per_trace: Option<usize>,
22 ) -> Result<Vec<(String, Vec<TraceRequest>)>>;
23}
24
25#[derive(Debug, Clone)]
27pub struct TraceRequest {
28 pub id: String,
30 pub method: String,
32 pub path: String,
34 pub timestamp: chrono::DateTime<chrono::Utc>,
36 pub duration_ms: Option<u64>,
38}
39
40pub struct SequenceLearner;
42
43impl SequenceLearner {
44 pub async fn discover_sequences_from_traces(
56 provider: &dyn TraceQueryProvider,
57 min_frequency: f64,
58 min_requests_per_trace: Option<usize>,
59 ) -> Result<Vec<BehavioralSequence>> {
60 let trace_groups = provider.get_requests_by_trace(min_requests_per_trace).await?;
62
63 if trace_groups.is_empty() {
64 return Ok(Vec::new());
65 }
66
67 let mut sequences: Vec<Vec<(String, String, Option<u64>)>> = Vec::new();
69 let mut trace_ids: Vec<String> = Vec::new();
70
71 for (trace_id, requests) in trace_groups {
72 trace_ids.push(trace_id.clone());
73 let mut sequence: Vec<(String, String, Option<u64>)> = Vec::new();
74
75 let mut sorted_requests = requests;
77 sorted_requests.sort_by_key(|r| r.timestamp);
78
79 for (idx, request) in sorted_requests.iter().enumerate() {
81 let delay = if idx > 0 {
82 let prev_timestamp = sorted_requests[idx - 1].timestamp;
83 let duration =
84 request.timestamp.signed_duration_since(prev_timestamp).num_milliseconds();
85 if duration > 0 {
86 Some(duration as u64)
87 } else {
88 None
89 }
90 } else {
91 None
92 };
93
94 sequence.push((request.path.clone(), request.method.clone(), delay));
95 }
96
97 if !sequence.is_empty() {
98 sequences.push(sequence);
99 }
100 }
101
102 let learned_sequences = Self::learn_sequence_pattern(&sequences, min_frequency)?;
106
107 let mut result = Vec::new();
109 for mut learned_seq in learned_sequences {
110 let mut contributing_traces = Vec::new();
112 for (trace_idx, sequence) in sequences.iter().enumerate() {
113 if trace_idx < trace_ids.len() {
114 if sequence.len() == learned_seq.steps.len() {
116 let mut matches = true;
117 for (step_idx, step) in learned_seq.steps.iter().enumerate() {
118 if step_idx < sequence.len() {
119 let (path, method, _) = &sequence[step_idx];
120 if step.endpoint != *path || step.method != *method {
121 matches = false;
122 break;
123 }
124 } else {
125 matches = false;
126 break;
127 }
128 }
129 if matches {
130 contributing_traces.push(trace_ids[trace_idx].clone());
131 }
132 }
133 }
134 }
135 learned_seq.learned_from = contributing_traces;
136 result.push(learned_seq);
137 }
138
139 Ok(result)
140 }
141
142 pub fn learn_sequence_pattern(
149 sequences: &[Vec<(String, String, Option<u64>)>], min_frequency: f64,
151 ) -> Result<Vec<BehavioralSequence>> {
152 if sequences.is_empty() {
153 return Ok(Vec::new());
154 }
155
156 let normalized: Vec<Vec<(String, String)>> = sequences
161 .iter()
162 .map(|seq| {
163 seq.iter()
164 .map(|(endpoint, method, _)| (method.clone(), endpoint.clone()))
165 .collect()
166 })
167 .collect();
168
169 #[allow(clippy::type_complexity)]
171 let mut sequence_counts: HashMap<Vec<(String, String)>, (usize, Vec<usize>)> =
172 HashMap::new();
173 for (idx, seq) in normalized.iter().enumerate() {
174 let entry = sequence_counts.entry(seq.clone()).or_insert_with(|| (0, Vec::new()));
175 entry.0 += 1;
176 entry.1.push(idx);
177 }
178
179 let total_sequences = sequences.len() as f64;
181 let mut learned_sequences = Vec::new();
182
183 for (pattern, (count, indices)) in sequence_counts {
184 let frequency = count as f64 / total_sequences;
185 if frequency < min_frequency {
186 continue;
187 }
188
189 let mut delays_by_position: Vec<Vec<u64>> = vec![Vec::new(); pattern.len()];
192 for &idx in &indices {
193 let original_seq = &sequences[idx];
194 for (pos, (_, _, delay)) in original_seq.iter().enumerate() {
195 if let Some(d) = delay {
196 if pos < delays_by_position.len() {
197 delays_by_position[pos].push(*d);
198 }
199 }
200 }
201 }
202
203 let mut steps = Vec::new();
205 let mut total_variance = 0.0;
206 for (pos, (method, endpoint)) in pattern.iter().enumerate() {
207 let avg_delay =
208 if pos < delays_by_position.len() && !delays_by_position[pos].is_empty() {
209 let delays = &delays_by_position[pos];
210 let avg = delays.iter().sum::<u64>() as f64 / delays.len() as f64;
211 let variance = delays
212 .iter()
213 .map(|&d| {
214 let diff = d as f64 - avg;
215 diff * diff
216 })
217 .sum::<f64>()
218 / delays.len() as f64;
219 total_variance += variance;
220 Some(avg as u64)
221 } else {
222 None
223 };
224
225 let mut step = crate::behavioral_cloning::types::SequenceStep::new(
226 endpoint.clone(),
227 method.clone(),
228 );
229 if let Some(delay) = avg_delay {
230 step.expected_delay_ms = Some(delay);
231 }
232 let transition_prob = if pos == 0 {
234 1.0 } else {
236 let prev_pattern = &pattern[..pos];
238 let matching_sequences = normalized
239 .iter()
240 .filter(|seq| {
241 seq.len() > pos
242 && seq[..pos] == *prev_pattern
243 && seq[pos] == (method.clone(), endpoint.clone())
244 })
245 .count();
246 matching_sequences as f64 / count as f64
247 };
248 step.probability = transition_prob;
249 steps.push(step);
250 }
251
252 let avg_variance = total_variance / pattern.len() as f64;
254 let consistency_score = 1.0 / (1.0 + avg_variance / 1000.0); let confidence = frequency * 0.7 + consistency_score * 0.3;
256
257 let sequence_id = format!(
259 "seq_{}_{}",
260 pattern[0].0.to_lowercase(),
261 pattern[0].1.replace('/', "_").replace(['{', '}'], "")
262 );
263 let sequence_name =
264 format!("{} {} → {} steps", pattern[0].0, pattern[0].1, pattern.len());
265
266 let learned_from: Vec<String> =
267 indices.iter().map(|&idx| format!("trace_{}", idx)).collect();
268
269 let sequence = BehavioralSequence::new(sequence_id, sequence_name)
270 .with_frequency(frequency)
271 .with_confidence(confidence)
272 .with_learned_from(learned_from);
273
274 let mut seq_with_steps = sequence;
276 for step in steps {
277 seq_with_steps = seq_with_steps.add_step(step);
278 }
279
280 learned_sequences.push(seq_with_steps);
281 }
282
283 learned_sequences.sort_by(|a, b| {
285 b.frequency
286 .partial_cmp(&a.frequency)
287 .unwrap_or(std::cmp::Ordering::Equal)
288 .then_with(|| {
289 b.confidence.partial_cmp(&a.confidence).unwrap_or(std::cmp::Ordering::Equal)
290 })
291 });
292
293 Ok(learned_sequences)
294 }
295
296 pub fn generate_sequence_scenario(sequence: &BehavioralSequence) -> ScenarioDefinition {
301 use mockforge_core::scenarios::ScenarioStep;
302
303 let mut scenario =
304 ScenarioDefinition::new(&sequence.id, &sequence.name).with_tags(sequence.tags.clone());
305
306 if let Some(description) = &sequence.description {
307 scenario.description = Some(description.clone());
308 }
309
310 for (idx, step) in sequence.steps.iter().enumerate() {
312 let mut scenario_step = ScenarioStep::new(
313 format!("step_{}", idx),
314 step.name.as_deref().unwrap_or(&step.endpoint),
315 &step.method,
316 &step.endpoint,
317 )
318 .expect_status(200); if let Some(delay) = step.expected_delay_ms {
321 scenario_step.delay_ms = Some(delay);
322 }
323
324 for (key, value) in &step.conditions {
326 scenario_step.query_params.insert(key.clone(), value.clone());
327 }
328
329 scenario = scenario.add_step(scenario_step);
330 }
331
332 scenario
333 }
334
335 pub fn match_sequence<'a>(
339 sequences: &'a [BehavioralSequence],
340 endpoint: &str,
341 method: &str,
342 conditions: Option<&HashMap<String, String>>,
343 ) -> Option<(&'a BehavioralSequence, usize)> {
344 for sequence in sequences {
346 if let Some(first_step) = sequence.steps.first() {
347 if first_step.endpoint == endpoint && first_step.method == method {
349 if let Some(conditions_map) = conditions {
351 let mut matches = true;
352 for (key, value) in &first_step.conditions {
353 if let Some(actual_value) = conditions_map.get(key) {
354 if actual_value != value {
355 matches = false;
356 break;
357 }
358 } else {
359 matches = false;
360 break;
361 }
362 }
363 if !matches {
364 continue;
365 }
366 } else if !first_step.conditions.is_empty() {
367 continue;
369 }
370
371 return Some((sequence, 0));
373 }
374 }
375 }
376
377 None
378 }
379
380 pub fn find_next_step(
384 sequence: &BehavioralSequence,
385 current_step_idx: usize,
386 endpoint: &str,
387 method: &str,
388 ) -> Option<usize> {
389 if current_step_idx + 1 >= sequence.steps.len() {
390 return None; }
392
393 let next_step = &sequence.steps[current_step_idx + 1];
394 if next_step.endpoint == endpoint && next_step.method == method {
395 Some(current_step_idx + 1)
396 } else {
397 None
398 }
399 }
400}