denet 0.5.0

a simple process monitor
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
//! eBPF-specific metrics structures

use serde::{Deserialize, Serialize};
use std::collections::HashMap;

/// eBPF profiling metrics
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
pub struct EbpfMetrics {
    /// Syscall frequency counts
    #[serde(skip_serializing_if = "Option::is_none")]
    pub syscalls: Option<SyscallMetrics>,

    /// Off-CPU profiling data
    #[serde(skip_serializing_if = "Option::is_none")]
    pub offcpu: Option<OffCpuMetrics>,

    /// Error message if eBPF collection failed
    #[serde(skip_serializing_if = "Option::is_none")]
    pub error: Option<String>,
}

impl EbpfMetrics {
    /// Create metrics with an error message
    pub fn error(message: &str) -> Self {
        Self {
            syscalls: None,
            offcpu: None,
            error: Some(message.to_string()),
        }
    }

    /// Create metrics with syscall data
    pub fn with_syscalls(syscalls: SyscallMetrics) -> Self {
        Self {
            syscalls: Some(syscalls),
            offcpu: None,
            error: None,
        }
    }

    /// Create metrics with off-CPU profiling data
    pub fn with_offcpu(offcpu: OffCpuMetrics) -> Self {
        Self {
            syscalls: None,
            offcpu: Some(offcpu),
            error: None,
        }
    }

    /// Create metrics with both syscalls and off-CPU data
    pub fn with_all(syscalls: SyscallMetrics, offcpu: OffCpuMetrics) -> Self {
        Self {
            syscalls: Some(syscalls),
            offcpu: Some(offcpu),
            error: None,
        }
    }

    /// Check if there's an error
    pub fn has_error(&self) -> bool {
        self.error.is_some()
    }
}

/// System call frequency metrics with enhanced analysis
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
pub struct SyscallMetrics {
    /// Total number of syscalls
    pub total: u64,

    /// Syscalls by category
    pub by_category: HashMap<String, u64>,

    /// Top 10 most frequent individual syscalls
    pub top_syscalls: Vec<SyscallCount>,

    /// Enhanced syscall analysis for bottleneck diagnosis
    #[serde(skip_serializing_if = "Option::is_none")]
    pub analysis: Option<SyscallAnalysis>,
}

/// Enhanced syscall analysis for process behavior diagnosis
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SyscallAnalysis {
    /// Process behavior classification
    pub behavior_classification: ProcessBehavior,

    /// Syscalls per second rate
    pub syscall_rate_per_sec: f64,

    /// I/O intensity (0.0 to 1.0)
    pub io_intensity: f64,

    /// Memory management intensity (0.0 to 1.0)
    pub memory_intensity: f64,

    /// CPU-related syscall intensity (0.0 to 1.0)
    pub cpu_intensity: f64,

    /// Network activity intensity (0.0 to 1.0)
    pub network_intensity: f64,

    /// Detected bottleneck indicators
    pub bottleneck_indicators: Vec<String>,

    /// Performance characteristics
    pub performance_profile: PerformanceProfile,
}

/// Process behavior classification based on syscall patterns
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
#[serde(rename_all = "snake_case")]
pub enum ProcessBehavior {
    /// High I/O syscall activity
    IoBound,
    /// Low syscall activity, high CPU usage
    CpuBound,
    /// High memory management syscalls
    MemoryBound,
    /// High network syscall activity
    NetworkBound,
    /// Mixed workload
    Mixed,
    /// Insufficient data for classification
    Unknown,
}

/// Performance profile characteristics
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PerformanceProfile {
    /// Estimated workload type
    pub workload_type: String,

    /// Performance bottleneck likelihood (0.0 to 1.0)
    pub bottleneck_likelihood: f64,

    /// Optimization suggestions
    pub optimization_hints: Vec<String>,
}

/// Individual syscall count
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SyscallCount {
    /// Syscall name
    pub name: String,
    /// Number of times called
    pub count: u64,
}

/// Syscall categories for grouping
pub const SYSCALL_CATEGORIES: &[(u64, &str)] = &[
    // File I/O
    (0, "read"),     // SYS_read
    (1, "write"),    // SYS_write
    (2, "open"),     // SYS_open
    (3, "close"),    // SYS_close
    (8, "lseek"),    // SYS_lseek
    (257, "openat"), // SYS_openat
    // Memory management
    (9, "mmap"),          // SYS_mmap
    (11, "munmap"),       // SYS_munmap
    (12, "brk"),          // SYS_brk
    (13, "rt_sigaction"), // SYS_rt_sigaction
    // Process/thread management
    (56, "clone"),  // SYS_clone
    (57, "fork"),   // SYS_fork
    (58, "vfork"),  // SYS_vfork
    (59, "execve"), // SYS_execve
    (60, "exit"),   // SYS_exit
    (61, "wait4"),  // SYS_wait4
    // Network
    (41, "socket"),   // SYS_socket
    (42, "connect"),  // SYS_connect
    (43, "accept"),   // SYS_accept
    (44, "sendto"),   // SYS_sendto
    (45, "recvfrom"), // SYS_recvfrom
    // Time/scheduling
    (35, "nanosleep"),      // SYS_nanosleep
    (96, "gettimeofday"),   // SYS_gettimeofday
    (201, "time"),          // SYS_time
    (228, "clock_gettime"), // SYS_clock_gettime
];

/// Get syscall name by number
pub fn syscall_name(syscall_nr: u64) -> String {
    SYSCALL_CATEGORIES
        .iter()
        .find(|(nr, _)| *nr == syscall_nr)
        .map(|(_, name)| name.to_string())
        .unwrap_or_else(|| format!("syscall_{}", syscall_nr))
}

/// Categorize syscalls into functional groups
///
/// Categorizes Linux syscalls based on their primary functionality:
/// - `file_io`: File and I/O operations
/// - `memory`: Memory allocation and management
/// - `process`: Process and thread management
/// - `network`: Network-related operations
/// - `time`: Time and scheduling operations
/// - `ipc`: Inter-process communication
/// - `security`: Permission and security operations
/// - `signal`: Signal handling
/// - `system`: System configuration and information
/// - `other`: Uncategorized syscalls
pub fn categorize_syscall(syscall_nr: u64) -> String {
    match syscall_nr {
        // File I/O operations
        0 | 1 | 2 | 3 | 4 | 5 | 6 | 8 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 40 | 82 | 83 | 84
        | 85 | 86 | 87 | 88 | 89 | 90 | 132 | 133 | 187 | 188 | 189 | 190 | 257 | 258 | 259
        | 260 | 263 | 264 | 265 | 285 | 286 | 293 | 294 | 295 | 296 | 304 | 305 | 306 | 307 => {
            "file_io".to_string()
        }

        // Memory management
        9 | 10 | 11 | 12 | 15 | 25 | 26 | 27 | 28 | 158 | 159 | 160 | 213 | 214 | 215 | 216
        | 217 | 218 | 226 | 273 | 274 | 275 | 276 | 317 | 318 | 319 => "memory".to_string(),

        // Process/thread management
        56 | 57 | 58 | 59 | 60 | 61 | 62 | 224 | 231 | 232 | 233 | 234 | 235 | 236 | 246 | 266
        | 267 | 268 | 269 | 270 | 271 | 272 => "process".to_string(),

        // Network operations
        41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 198 | 199
        | 200 | 202 | 203 | 288 | 289 | 290 | 291 | 292 | 326 | 327 | 328 | 329 | 330 | 331
        | 332 => "network".to_string(),

        // Time and scheduling operations
        23 | 24 | 35 | 96 | 97 | 98 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 201 | 228
        | 229 | 230 | 249 | 252 | 277 | 278 | 279 | 280 => "time".to_string(),

        // Inter-process communication
        63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80
        | 81 => "ipc".to_string(),

        // Security, permissions, capabilities
        91 | 92 | 95 | 123 | 124 | 125 | 126 | 137 | 138 | 139 | 140 | 141 | 142 | 157 | 163
        | 164 | 165 | 166 | 281 | 282 | 283 | 284 => "security".to_string(),

        // Signal handling
        13 | 14 => "signal".to_string(),

        // System configuration and information
        99 | 100 | 101 | 102 | 103 | 153 | 154 | 155 | 156 | 168 | 169 | 170 | 171 | 172 | 173
        | 174 | 175 => "system".to_string(),

        // Unknown
        _ => "other".to_string(),
    }
}

use super::offcpu_profiler::{ProcessedOffCpuEvent, StackFrame};

/// Aggregated stack trace information for display
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
pub struct AggregatedStacks {
    /// Aggregated user-space stack traces
    #[serde(skip_serializing_if = "Vec::is_empty")]
    pub user_stack: Vec<StackFrame>,

    /// Aggregated kernel-space stack traces
    #[serde(skip_serializing_if = "Vec::is_empty")]
    pub kernel_stack: Vec<StackFrame>,
}

/// Off-CPU profiling metrics
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
pub struct OffCpuMetrics {
    /// Total time spent off-CPU (nanoseconds)
    pub total_time_ns: u64,

    /// Number of off-CPU events
    pub total_events: u64,

    /// Average time spent off-CPU (nanoseconds)
    pub avg_time_ns: u64,

    /// Maximum time spent off-CPU (nanoseconds)
    pub max_time_ns: u64,

    /// Minimum time spent off-CPU (nanoseconds)
    pub min_time_ns: u64,

    /// Thread-specific off-CPU statistics
    #[serde(skip_serializing_if = "HashMap::is_empty")]
    pub thread_stats: HashMap<String, ThreadOffCpuStats>,

    /// Top threads ranked by total off-CPU time, descending (max 10 entries).
    ///
    /// Each entry is a summary across all off-CPU events for that thread since
    /// profiling started. `percentage` is the thread's share of the total
    /// off-CPU time accumulated across all monitored threads — not a share of
    /// wall-clock time.
    ///
    /// This is derived directly from the raw per-thread event counters in
    /// `thread_stats`, not from a separate analysis step.
    pub top_blocking_threads: Vec<ThreadOffCpuInfo>,

    /// Analysis of off-CPU bottlenecks
    #[serde(skip_serializing_if = "Vec::is_empty")]
    pub bottlenecks: Vec<String>,

    /// Symbolicated stack traces (very verbose, for debugging/export)
    #[serde(skip_serializing_if = "Vec::is_empty")]
    pub stack_traces: Vec<ProcessedOffCpuEvent>,

    /// Aggregated stack information (for display)
    #[serde(skip_serializing_if = "Option::is_none")]
    pub stacks: Option<AggregatedStacks>,
}

/// Thread-specific off-CPU statistics
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
pub struct ThreadOffCpuStats {
    /// Thread ID
    pub tid: u32,

    /// Total time spent off-CPU (nanoseconds)
    pub total_time_ns: u64,

    /// Number of off-CPU events
    pub count: u64,

    /// Average time spent off-CPU (nanoseconds)
    pub avg_time_ns: u64,

    /// Maximum time spent off-CPU (nanoseconds)
    pub max_time_ns: u64,

    /// Minimum time spent off-CPU (nanoseconds)
    pub min_time_ns: u64,
}

/// One entry in `top_blocking_threads`: a per-thread off-CPU summary.
///
/// Appears in JSON as:
/// ```json
/// { "pid": 1234, "tid": 1235, "time_ms": 450.2, "percentage": 33.33 }
/// ```
///
/// `time_ms` is the cumulative off-CPU time for this thread since monitoring
/// started. `percentage` is its share of the combined off-CPU time across all
/// monitored threads (not of wall-clock time).
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ThreadOffCpuInfo {
    /// Thread ID
    pub tid: u32,

    /// Process ID (TGID)
    pub pid: u32,

    /// Total time spent off-CPU in milliseconds
    #[serde(rename = "time_ms")]
    pub total_time_ms: f64,

    /// This thread's share of total off-CPU time across all monitored threads
    #[serde(serialize_with = "serialize_percentage_2dp")]
    pub percentage: f64,
}

/// Serialize a f64 percentage value with 2 decimal places
fn serialize_percentage_2dp<S>(value: &f64, serializer: S) -> Result<S::Ok, S::Error>
where
    S: serde::Serializer,
{
    let rounded = (value * 100.0).round() / 100.0;
    serializer.serialize_f64(rounded)
}

/// Analysis of off-CPU patterns
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct OffCpuAnalysis {
    /// Classification of what's causing the most off-CPU time
    pub bottleneck_type: OffCpuBottleneckType,

    /// Percentage of time spent in I/O-related waits
    pub io_wait_percentage: f64,

    /// Percentage of time spent in lock contention
    pub lock_contention_percentage: f64,

    /// Percentage of time spent in sleep/idle
    pub sleep_percentage: f64,

    /// Optimization suggestions
    pub optimization_hints: Vec<String>,
}

/// Classification of off-CPU bottlenecks
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
#[serde(rename_all = "snake_case")]
pub enum OffCpuBottleneckType {
    /// Blocking I/O operations
    IoBlocked,

    /// Lock contention
    LockContention,

    /// Voluntary sleep/yield
    Sleep,

    /// Various mixed causes
    Mixed,

    /// Unknown cause
    Unknown,
}

/// Generate enhanced syscall analysis for bottleneck diagnosis
pub fn generate_syscall_analysis(
    metrics: &SyscallMetrics,
    cpu_usage: f32,
    elapsed_seconds: f64,
) -> SyscallAnalysis {
    let total = metrics.total as f64;

    if total < 1.0 || elapsed_seconds < 0.1 {
        return SyscallAnalysis {
            behavior_classification: ProcessBehavior::Unknown,
            syscall_rate_per_sec: 0.0,
            io_intensity: 0.0,
            memory_intensity: 0.0,
            cpu_intensity: 0.0,
            network_intensity: 0.0,
            bottleneck_indicators: vec![],
            performance_profile: PerformanceProfile {
                workload_type: "insufficient_data".to_string(),
                bottleneck_likelihood: 0.0,
                optimization_hints: vec![],
            },
        };
    }

    // Calculate intensities
    let io_intensity = *metrics.by_category.get("file_io").unwrap_or(&0) as f64 / total;
    let memory_intensity = *metrics.by_category.get("memory").unwrap_or(&0) as f64 / total;
    let network_intensity = *metrics.by_category.get("network").unwrap_or(&0) as f64 / total;
    let process_intensity = *metrics.by_category.get("process").unwrap_or(&0) as f64 / total;

    let syscall_rate_per_sec = total / elapsed_seconds;

    // Classify process behavior
    let behavior_classification = classify_process_behavior(
        io_intensity,
        memory_intensity,
        network_intensity,
        cpu_usage as f64,
        syscall_rate_per_sec,
    );

    // Detect bottleneck indicators
    let bottleneck_indicators =
        detect_bottleneck_indicators(&metrics.by_category, syscall_rate_per_sec, cpu_usage as f64);

    // Generate performance profile
    let performance_profile = generate_performance_profile(
        &behavior_classification,
        io_intensity,
        memory_intensity,
        network_intensity,
        syscall_rate_per_sec,
    );

    SyscallAnalysis {
        behavior_classification,
        syscall_rate_per_sec,
        io_intensity,
        memory_intensity,
        cpu_intensity: process_intensity, // Using process syscalls as CPU proxy
        network_intensity,
        bottleneck_indicators,
        performance_profile,
    }
}

/// Classify process behavior based on syscall patterns
fn classify_process_behavior(
    io_intensity: f64,
    memory_intensity: f64,
    network_intensity: f64,
    cpu_usage: f64,
    syscall_rate: f64,
) -> ProcessBehavior {
    // High I/O activity
    if io_intensity > 0.6 && syscall_rate > 100.0 {
        return ProcessBehavior::IoBound;
    }

    // High network activity
    if network_intensity > 0.4 {
        return ProcessBehavior::NetworkBound;
    }

    // High memory management activity
    if memory_intensity > 0.3 {
        return ProcessBehavior::MemoryBound;
    }

    // Low syscall activity but high CPU usage = CPU bound
    if syscall_rate < 50.0 && cpu_usage > 50.0 {
        return ProcessBehavior::CpuBound;
    }

    // Mixed or moderate activity
    if io_intensity > 0.2 && memory_intensity > 0.1 {
        return ProcessBehavior::Mixed;
    }

    ProcessBehavior::Unknown
}

/// Detect specific bottleneck indicators
fn detect_bottleneck_indicators(
    by_category: &HashMap<String, u64>,
    syscall_rate: f64,
    cpu_usage: f64,
) -> Vec<String> {
    let mut indicators = Vec::new();

    let file_io = *by_category.get("file_io").unwrap_or(&0) as f64;
    let memory = *by_category.get("memory").unwrap_or(&0) as f64;
    let network = *by_category.get("network").unwrap_or(&0) as f64;

    // I/O bottleneck indicators
    if file_io > 500.0 {
        indicators.push("high_file_io".to_string());
    }
    if syscall_rate > 1000.0 {
        indicators.push("very_high_syscall_rate".to_string());
    }

    // Memory bottleneck indicators
    if memory > 100.0 {
        indicators.push("frequent_memory_management".to_string());
    }

    // Network bottleneck indicators
    if network > 200.0 {
        indicators.push("high_network_activity".to_string());
    }

    // CPU bottleneck indicators
    if cpu_usage > 80.0 && syscall_rate < 100.0 {
        indicators.push("cpu_intensive".to_string());
    }

    // Mixed bottleneck
    if file_io > 300.0 && memory > 50.0 {
        indicators.push("io_memory_contention".to_string());
    }

    indicators
}

/// Generate performance profile with optimization hints
fn generate_performance_profile(
    behavior: &ProcessBehavior,
    _io_intensity: f64,
    _memory_intensity: f64,
    _network_intensity: f64,
    _syscall_rate: f64,
) -> PerformanceProfile {
    let (workload_type, bottleneck_likelihood, optimization_hints) = match behavior {
        ProcessBehavior::IoBound => {
            let hints = vec![
                "Consider I/O optimization strategies".to_string(),
                "Use async I/O or batching".to_string(),
                "Check for excessive file operations".to_string(),
            ];
            ("file_io_intensive".to_string(), 0.8, hints)
        }
        ProcessBehavior::CpuBound => {
            let hints = vec![
                "CPU optimization opportunities".to_string(),
                "Consider parallel processing".to_string(),
                "Profile for algorithmic improvements".to_string(),
            ];
            ("cpu_intensive".to_string(), 0.7, hints)
        }
        ProcessBehavior::MemoryBound => {
            let hints = vec![
                "Memory allocation optimization needed".to_string(),
                "Consider memory pooling".to_string(),
                "Check for memory leaks".to_string(),
            ];
            ("memory_intensive".to_string(), 0.75, hints)
        }
        ProcessBehavior::NetworkBound => {
            let hints = vec![
                "Network optimization opportunities".to_string(),
                "Consider connection pooling".to_string(),
                "Optimize network protocols".to_string(),
            ];
            ("network_intensive".to_string(), 0.8, hints)
        }
        ProcessBehavior::Mixed => {
            let hints = vec![
                "Mixed workload - profile individual components".to_string(),
                "Consider workload separation".to_string(),
            ];
            ("mixed_workload".to_string(), 0.5, hints)
        }
        ProcessBehavior::Unknown => ("unknown".to_string(), 0.0, vec![]),
    };

    PerformanceProfile {
        workload_type,
        bottleneck_likelihood,
        optimization_hints,
    }
}