use std::collections::HashMap;
use std::time::Instant;
#[derive(Debug, Clone, Copy, PartialEq)]
pub enum HPUBackend {
GPU,
CPU,
}
impl std::fmt::Display for HPUBackend {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
HPUBackend::GPU => write!(f, "GPU"),
HPUBackend::CPU => write!(f, "CPU"),
}
}
}
#[derive(Debug, Clone)]
pub struct CorrelationResult {
pub syscalls: Vec<String>,
pub matrix: Vec<Vec<f32>>,
}
#[derive(Debug, Clone)]
pub struct SyscallCluster {
pub id: usize,
pub members: Vec<String>,
pub centroid: Vec<f32>,
}
#[derive(Debug, Clone)]
pub struct ClusteringResult {
pub k: usize,
pub clusters: Vec<SyscallCluster>,
}
#[derive(Debug)]
pub struct HPUAnalysisReport {
pub backend: HPUBackend,
pub correlation: CorrelationResult,
pub clustering: ClusteringResult,
pub compute_time_us: u64,
}
pub struct HPUProfiler {
_force_cpu: bool,
backend: HPUBackend,
}
impl HPUProfiler {
pub fn new(force_cpu: bool) -> Self {
let backend = if force_cpu {
HPUBackend::CPU
} else {
HPUBackend::CPU
};
Self { _force_cpu: force_cpu, backend }
}
pub fn backend(&self) -> HPUBackend {
self.backend
}
pub fn analyze(&self, syscall_data: &HashMap<String, (u64, u64)>) -> HPUAnalysisReport {
let start = Instant::now();
let syscalls: Vec<String> = syscall_data.keys().cloned().collect();
let correlation = self.compute_correlation(&syscalls, syscall_data);
let clustering = self.compute_kmeans(&syscalls, syscall_data);
let compute_time_us = start.elapsed().as_micros() as u64;
HPUAnalysisReport { backend: self.backend, correlation, clustering, compute_time_us }
}
fn compute_correlation(
&self,
syscalls: &[String],
data: &HashMap<String, (u64, u64)>,
) -> CorrelationResult {
let n = syscalls.len();
let mut matrix = vec![vec![0.0f32; n]; n];
for i in 0..n {
for j in 0..n {
if i == j {
matrix[i][j] = 1.0;
} else {
let count_i = data.get(&syscalls[i]).map_or(1, |(c, _)| *c) as f32;
let count_j = data.get(&syscalls[j]).map_or(1, |(c, _)| *c) as f32;
let ratio =
if count_i > count_j { count_j / count_i } else { count_i / count_j };
matrix[i][j] = ratio;
}
}
}
CorrelationResult { syscalls: syscalls.to_vec(), matrix }
}
fn compute_kmeans(
&self,
syscalls: &[String],
data: &HashMap<String, (u64, u64)>,
) -> ClusteringResult {
if syscalls.is_empty() {
return ClusteringResult { k: 0, clusters: Vec::new() };
}
let k = match syscalls.len() {
1..=2 => 1,
3..=5 => 2,
6..=10 => 3,
_ => 4,
};
let mut sorted: Vec<_> =
syscalls.iter().map(|s| (s.clone(), data.get(s).map_or(0, |(c, _)| *c))).collect();
sorted.sort_by_key(|(_, c)| std::cmp::Reverse(*c));
let chunk_size = sorted.len().div_ceil(k);
let clusters: Vec<SyscallCluster> = sorted
.chunks(chunk_size)
.enumerate()
.map(|(id, chunk)| {
let members: Vec<String> = chunk.iter().map(|(s, _)| s.clone()).collect();
let avg_count =
chunk.iter().map(|(_, c)| *c as f32).sum::<f32>() / chunk.len() as f32;
SyscallCluster { id, members, centroid: vec![avg_count] }
})
.collect();
ClusteringResult { k: clusters.len(), clusters }
}
}
impl HPUAnalysisReport {
pub fn format(&self) -> String {
let mut output = String::new();
output.push_str("\n=== HPU Analysis Report ===\n");
output.push_str(&format!("HPU Backend: {}\n", self.backend));
output.push_str(&format!("Compute time: {}us\n\n", self.compute_time_us));
output.push_str("--- Correlation Matrix ---\n");
if !self.correlation.syscalls.is_empty() {
output.push_str(" ");
for syscall in &self.correlation.syscalls {
output.push_str(&format!("{:>10}", &syscall[..syscall.len().min(10)]));
}
output.push('\n');
for (i, syscall) in self.correlation.syscalls.iter().enumerate() {
output.push_str(&format!("{:10}", &syscall[..syscall.len().min(10)]));
for j in 0..self.correlation.syscalls.len() {
output.push_str(&format!("{:10.3}", self.correlation.matrix[i][j]));
}
output.push('\n');
}
}
output.push('\n');
output.push_str("--- K-means Clustering ---\n");
output.push_str(&format!("Number of clusters: {}\n", self.clustering.k));
for cluster in &self.clustering.clusters {
output.push_str(&format!(
"Cluster {}: {} syscalls\n",
cluster.id,
cluster.members.len()
));
for member in &cluster.members {
output.push_str(&format!(" - {member}\n"));
}
}
output
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_hpu_profiler_cpu_backend() {
let profiler = HPUProfiler::new(true);
assert_eq!(profiler.backend(), HPUBackend::CPU);
}
#[test]
fn test_hpu_profiler_default_backend() {
let profiler = HPUProfiler::new(false);
assert_eq!(profiler.backend(), HPUBackend::CPU);
}
#[test]
fn test_correlation_matrix_empty() {
let profiler = HPUProfiler::new(true);
let data = HashMap::new();
let report = profiler.analyze(&data);
assert!(report.correlation.syscalls.is_empty());
}
#[test]
fn test_correlation_matrix_basic() {
let profiler = HPUProfiler::new(true);
let mut data = HashMap::new();
data.insert("open".to_string(), (30, 1000));
data.insert("write".to_string(), (30, 2000));
data.insert("close".to_string(), (30, 500));
let report = profiler.analyze(&data);
assert_eq!(report.correlation.syscalls.len(), 3);
assert_eq!(report.correlation.matrix.len(), 3);
for i in 0..3 {
assert_eq!(report.correlation.matrix[i][i], 1.0);
}
}
#[test]
fn test_kmeans_clustering() {
let profiler = HPUProfiler::new(true);
let mut data = HashMap::new();
data.insert("open".to_string(), (100, 1000));
data.insert("write".to_string(), (100, 2000));
data.insert("close".to_string(), (100, 500));
data.insert("read".to_string(), (50, 1000));
data.insert("mmap".to_string(), (10, 5000));
let report = profiler.analyze(&data);
assert_eq!(report.clustering.k, 2);
assert!(!report.clustering.clusters.is_empty());
}
#[test]
fn test_report_format() {
let profiler = HPUProfiler::new(true);
let mut data = HashMap::new();
data.insert("open".to_string(), (30, 1000));
data.insert("write".to_string(), (30, 2000));
let report = profiler.analyze(&data);
let formatted = report.format();
assert!(formatted.contains("HPU Analysis Report"));
assert!(formatted.contains("HPU Backend: CPU"));
assert!(formatted.contains("Correlation Matrix"));
assert!(formatted.contains("K-means Clustering"));
assert!(formatted.contains("Cluster"));
}
#[test]
fn test_backend_display() {
assert_eq!(format!("{}", HPUBackend::GPU), "GPU");
assert_eq!(format!("{}", HPUBackend::CPU), "CPU");
}
}