1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
//! Sequence learning from recorded traffic
//!
//! This module provides functionality to discover and model multi-step
//! flows from real traffic using trace correlation.
use crate::behavioral_cloning::types::BehavioralSequence;
use crate::scenarios::ScenarioDefinition;
use crate::Result;
use async_trait::async_trait;
use std::collections::HashMap;
/// Trait for querying trace data for sequence learning
#[async_trait]
pub trait TraceQueryProvider: Send + Sync {
/// Get requests grouped by trace_id, ordered by timestamp
///
/// Returns a vector of (trace_id, requests) tuples where requests
/// are ordered by timestamp within each trace.
async fn get_requests_by_trace(
&self,
min_requests_per_trace: Option<usize>,
) -> Result<Vec<(String, Vec<TraceRequest>)>>;
}
/// A request in a trace for sequence learning
#[derive(Debug, Clone)]
pub struct TraceRequest {
/// Request ID
pub id: String,
/// HTTP method
pub method: String,
/// Request path
pub path: String,
/// Timestamp
pub timestamp: chrono::DateTime<chrono::Utc>,
/// Duration in milliseconds (time until next request in sequence)
pub duration_ms: Option<u64>,
}
/// Sequence learner for discovering behavioral patterns
pub struct SequenceLearner;
impl SequenceLearner {
/// Discover sequences from trace correlation
///
/// Uses trace_id/span_id to find correlated request sequences
/// and identifies common patterns.
///
/// This implementation:
/// 1. Queries database for requests grouped by trace_id
/// 2. Orders by timestamp within each trace
/// 3. Identifies common subsequences (e.g., login → list → detail)
/// 4. Calculates transition probabilities between steps
/// 5. Returns reusable behavioral sequences
pub async fn discover_sequences_from_traces(
provider: &dyn TraceQueryProvider,
min_frequency: f64,
min_requests_per_trace: Option<usize>,
) -> Result<Vec<BehavioralSequence>> {
// Query database for requests grouped by trace_id
let trace_groups = provider.get_requests_by_trace(min_requests_per_trace).await?;
if trace_groups.is_empty() {
return Ok(Vec::new());
}
// Convert trace groups to sequences of (endpoint, method, delay) tuples
let mut sequences: Vec<Vec<(String, String, Option<u64>)>> = Vec::new();
let mut trace_ids: Vec<String> = Vec::new();
for (trace_id, requests) in trace_groups {
trace_ids.push(trace_id.clone());
let mut sequence: Vec<(String, String, Option<u64>)> = Vec::new();
// Sort requests by timestamp (should already be sorted, but ensure)
let mut sorted_requests = requests;
sorted_requests.sort_by_key(|r| r.timestamp);
// Build sequence with delays between requests
for (idx, request) in sorted_requests.iter().enumerate() {
let delay = if idx > 0 {
let prev_timestamp = sorted_requests[idx - 1].timestamp;
let duration =
request.timestamp.signed_duration_since(prev_timestamp).num_milliseconds();
if duration > 0 {
Some(duration as u64)
} else {
None
}
} else {
None
};
sequence.push((request.path.clone(), request.method.clone(), delay));
}
if !sequence.is_empty() {
sequences.push(sequence);
}
}
// Use existing learn_sequence_pattern to identify common patterns
// This will match sequences and group them, but we need to track which trace_ids
// contributed to each learned sequence
let learned_sequences = Self::learn_sequence_pattern(&sequences, min_frequency)?;
// Map learned sequences back to trace IDs by matching patterns
let mut result = Vec::new();
for mut learned_seq in learned_sequences {
// Find all trace IDs that match this learned sequence pattern
let mut contributing_traces = Vec::new();
for (trace_idx, sequence) in sequences.iter().enumerate() {
if trace_idx < trace_ids.len() {
// Check if this sequence matches the learned pattern
if sequence.len() == learned_seq.steps.len() {
let mut matches = true;
for (step_idx, step) in learned_seq.steps.iter().enumerate() {
if step_idx < sequence.len() {
let (path, method, _) = &sequence[step_idx];
if step.endpoint != *path || step.method != *method {
matches = false;
break;
}
} else {
matches = false;
break;
}
}
if matches {
contributing_traces.push(trace_ids[trace_idx].clone());
}
}
}
}
learned_seq.learned_from = contributing_traces;
result.push(learned_seq);
}
Ok(result)
}
/// Learn sequence pattern from a set of request sequences
///
/// Analyzes multiple sequences to extract common patterns
/// and calculate confidence scores.
///
/// Each sequence is represented as (endpoint, method, delay_ms) tuples.
pub fn learn_sequence_pattern(
sequences: &[Vec<(String, String, Option<u64>)>], // (endpoint, method, delay)
min_frequency: f64,
) -> Result<Vec<BehavioralSequence>> {
if sequences.is_empty() {
return Ok(Vec::new());
}
// Find common subsequences using longest common subsequence (LCS) approach
// For simplicity, we'll find exact matches first, then look for patterns
// Step 1: Normalize sequences to (method, endpoint) pairs for comparison
let normalized: Vec<Vec<(String, String)>> = sequences
.iter()
.map(|seq| {
seq.iter()
.map(|(endpoint, method, _)| (method.clone(), endpoint.clone()))
.collect()
})
.collect();
// Step 2: Find exact sequence matches
#[allow(clippy::type_complexity)]
let mut sequence_counts: HashMap<Vec<(String, String)>, (usize, Vec<usize>)> =
HashMap::new();
for (idx, seq) in normalized.iter().enumerate() {
let entry = sequence_counts.entry(seq.clone()).or_insert_with(|| (0, Vec::new()));
entry.0 += 1;
entry.1.push(idx);
}
// Step 3: Build BehavioralSequences from frequent patterns
let total_sequences = sequences.len() as f64;
let mut learned_sequences = Vec::new();
for (pattern, (count, indices)) in sequence_counts {
let frequency = count as f64 / total_sequences;
if frequency < min_frequency {
continue;
}
// Calculate confidence based on consistency
// Higher confidence if the pattern appears consistently with similar delays
let mut delays_by_position: Vec<Vec<u64>> = vec![Vec::new(); pattern.len()];
for &idx in &indices {
let original_seq = &sequences[idx];
for (pos, (_, _, delay)) in original_seq.iter().enumerate() {
if let Some(d) = delay {
if pos < delays_by_position.len() {
delays_by_position[pos].push(*d);
}
}
}
}
// Calculate average delays and variance for confidence
let mut steps = Vec::new();
let mut total_variance = 0.0;
for (pos, (method, endpoint)) in pattern.iter().enumerate() {
let avg_delay =
if pos < delays_by_position.len() && !delays_by_position[pos].is_empty() {
let delays = &delays_by_position[pos];
let avg = delays.iter().sum::<u64>() as f64 / delays.len() as f64;
let variance = delays
.iter()
.map(|&d| {
let diff = d as f64 - avg;
diff * diff
})
.sum::<f64>()
/ delays.len() as f64;
total_variance += variance;
Some(avg as u64)
} else {
None
};
let mut step = crate::behavioral_cloning::types::SequenceStep::new(
endpoint.clone(),
method.clone(),
);
if let Some(delay) = avg_delay {
step.expected_delay_ms = Some(delay);
}
// Calculate transition probability (how often this step follows the previous)
let transition_prob = if pos == 0 {
1.0 // First step always happens
} else {
// Count how many sequences have this step after the previous
let prev_pattern = &pattern[..pos];
let matching_sequences = normalized
.iter()
.filter(|seq| {
seq.len() > pos
&& seq[..pos] == *prev_pattern
&& seq[pos] == (method.clone(), endpoint.clone())
})
.count();
matching_sequences as f64 / count as f64
};
step.probability = transition_prob;
steps.push(step);
}
// Confidence is based on frequency and consistency (inverse of variance)
let avg_variance = total_variance / pattern.len() as f64;
let consistency_score = 1.0 / (1.0 + avg_variance / 1000.0); // Normalize variance
let confidence = frequency * 0.7 + consistency_score * 0.3;
// Generate sequence ID and name
let sequence_id = format!(
"seq_{}_{}",
pattern[0].0.to_lowercase(),
pattern[0].1.replace('/', "_").replace(['{', '}'], "")
);
let sequence_name =
format!("{} {} → {} steps", pattern[0].0, pattern[0].1, pattern.len());
let learned_from: Vec<String> =
indices.iter().map(|&idx| format!("trace_{}", idx)).collect();
let sequence = BehavioralSequence::new(sequence_id, sequence_name)
.with_frequency(frequency)
.with_confidence(confidence)
.with_learned_from(learned_from);
// Add steps
let mut seq_with_steps = sequence;
for step in steps {
seq_with_steps = seq_with_steps.add_step(step);
}
learned_sequences.push(seq_with_steps);
}
// Sort by frequency and confidence
learned_sequences.sort_by(|a, b| {
b.frequency
.partial_cmp(&a.frequency)
.unwrap_or(std::cmp::Ordering::Equal)
.then_with(|| {
b.confidence.partial_cmp(&a.confidence).unwrap_or(std::cmp::Ordering::Equal)
})
});
Ok(learned_sequences)
}
/// Generate a scenario definition from a learned sequence
///
/// Converts a BehavioralSequence into a ScenarioDefinition
/// that can be executed by the ScenarioExecutor.
pub fn generate_sequence_scenario(sequence: &BehavioralSequence) -> ScenarioDefinition {
use crate::scenarios::ScenarioStep;
let mut scenario =
ScenarioDefinition::new(&sequence.id, &sequence.name).with_tags(sequence.tags.clone());
if let Some(description) = &sequence.description {
scenario.description = Some(description.clone());
}
// Convert sequence steps to scenario steps
for (idx, step) in sequence.steps.iter().enumerate() {
let mut scenario_step = ScenarioStep::new(
format!("step_{}", idx),
step.name.as_deref().unwrap_or(&step.endpoint),
&step.method,
&step.endpoint,
)
.expect_status(200); // Default, could be learned
if let Some(delay) = step.expected_delay_ms {
scenario_step.delay_ms = Some(delay);
}
// Add conditions as query params if needed
for (key, value) in &step.conditions {
scenario_step.query_params.insert(key.clone(), value.clone());
}
scenario = scenario.add_step(scenario_step);
}
scenario
}
/// Check if an incoming request matches a learned sequence
///
/// Returns the matching sequence and current step index if found.
pub fn match_sequence<'a>(
sequences: &'a [BehavioralSequence],
endpoint: &str,
method: &str,
conditions: Option<&HashMap<String, String>>,
) -> Option<(&'a BehavioralSequence, usize)> {
// Find sequences that start with this endpoint/method
for sequence in sequences {
if let Some(first_step) = sequence.steps.first() {
// Check if endpoint and method match
if first_step.endpoint == endpoint && first_step.method == method {
// Check conditions if provided
if let Some(conditions_map) = conditions {
let mut matches = true;
for (key, value) in &first_step.conditions {
if let Some(actual_value) = conditions_map.get(key) {
if actual_value != value {
matches = false;
break;
}
} else {
matches = false;
break;
}
}
if !matches {
continue;
}
} else if !first_step.conditions.is_empty() {
// Sequence requires conditions but none provided
continue;
}
// Found a match - return sequence with step index 0
return Some((sequence, 0));
}
}
}
None
}
/// Find the next step in a sequence given the current step
///
/// Returns the next step index if found, or None if sequence is complete.
pub fn find_next_step(
sequence: &BehavioralSequence,
current_step_idx: usize,
endpoint: &str,
method: &str,
) -> Option<usize> {
if current_step_idx + 1 >= sequence.steps.len() {
return None; // Sequence complete
}
let next_step = &sequence.steps[current_step_idx + 1];
if next_step.endpoint == endpoint && next_step.method == method {
Some(current_step_idx + 1)
} else {
None
}
}
}