use std::collections::HashMap;
use std::time::{Duration, Instant};
use serde::{Deserialize, Serialize};
use tracing::debug;
use crate::parser::StarFormat;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ProfilingConfig {
pub track_memory: bool,
pub track_timing: bool,
pub sample_rate: f64,
pub max_samples: usize,
pub enable_statistics: bool,
}
impl Default for ProfilingConfig {
fn default() -> Self {
Self {
track_memory: true,
track_timing: true,
sample_rate: 1.0,
max_samples: 10000,
enable_statistics: true,
}
}
}
pub struct StarProfiler {
config: ProfilingConfig,
samples: Vec<ProfileSample>,
operation_stats: HashMap<String, OperationStatistics>,
start_time: Option<Instant>,
current_operation: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ProfileSample {
pub operation: String,
pub duration: Duration,
pub memory_used: Option<u64>,
pub input_size: Option<usize>,
pub output_size: Option<usize>,
pub timestamp: std::time::SystemTime,
pub metadata: HashMap<String, String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct OperationStatistics {
pub count: usize,
pub total_duration: Duration,
pub average_duration: Duration,
pub min_duration: Duration,
pub max_duration: Duration,
pub std_deviation: f64,
pub ops_per_second: f64,
pub bytes_per_second: Option<f64>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ProfilingReport {
pub config: ProfilingConfig,
pub total_duration: Duration,
pub total_samples: usize,
pub operation_stats: HashMap<String, OperationStatistics>,
pub trends: Vec<PerformanceTrend>,
pub memory_patterns: Option<MemoryUsagePattern>,
pub bottlenecks: Vec<PerformanceBottleneck>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PerformanceTrend {
pub operation: String,
pub window_start: std::time::SystemTime,
pub window_end: std::time::SystemTime,
pub average_duration: Duration,
pub trend_direction: TrendDirection,
pub confidence: f64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct MemoryUsagePattern {
pub peak_memory: u64,
pub average_memory: u64,
pub efficiency_ratio: f64,
pub potential_leaks: Vec<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PerformanceBottleneck {
pub operation: String,
pub severity: f64,
pub description: String,
pub suggestions: Vec<String>,
pub time_percentage: f64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum TrendDirection {
Improving,
Stable,
Degrading,
Unknown,
}
impl StarProfiler {
pub fn new() -> Self {
Self::with_config(ProfilingConfig::default())
}
pub fn with_config(config: ProfilingConfig) -> Self {
Self {
config,
samples: Vec::new(),
operation_stats: HashMap::new(),
start_time: None,
current_operation: None,
}
}
pub fn start_operation(&mut self, operation: &str) {
if self.should_sample() {
self.current_operation = Some(operation.to_string());
self.start_time = Some(Instant::now());
debug!("Started profiling operation: {}", operation);
}
}
pub fn end_operation(&mut self) {
self.end_operation_with_metadata(HashMap::new());
}
pub fn end_operation_with_metadata(&mut self, metadata: HashMap<String, String>) {
if let (Some(operation), Some(start_time)) =
(self.current_operation.clone(), self.start_time)
{
let duration = start_time.elapsed();
let sample = ProfileSample {
operation: operation.clone(),
duration,
memory_used: if self.config.track_memory {
Some(self.estimate_memory_usage())
} else {
None
},
input_size: metadata.get("input_size").and_then(|s| s.parse().ok()),
output_size: metadata.get("output_size").and_then(|s| s.parse().ok()),
timestamp: std::time::SystemTime::now(),
metadata,
};
self.add_sample(sample);
self.current_operation = None;
self.start_time = None;
debug!(
"Finished profiling operation: {} ({}ms)",
operation,
duration.as_millis()
);
}
}
pub fn profile_parsing<F, R>(&mut self, format: StarFormat, input_size: usize, f: F) -> R
where
F: FnOnce() -> R,
{
let operation = format!("parse_{format:?}");
let mut metadata = HashMap::new();
metadata.insert("input_size".to_string(), input_size.to_string());
metadata.insert("format".to_string(), format!("{format:?}"));
self.start_operation(&operation);
let result = f();
self.end_operation_with_metadata(metadata);
result
}
pub fn profile_serialization<F, R>(
&mut self,
format: StarFormat,
input_triples: usize,
f: F,
) -> R
where
F: FnOnce() -> R,
{
let operation = format!("serialize_{format:?}");
let mut metadata = HashMap::new();
metadata.insert("input_triples".to_string(), input_triples.to_string());
metadata.insert("format".to_string(), format!("{format:?}"));
self.start_operation(&operation);
let result = f();
self.end_operation_with_metadata(metadata);
result
}
pub fn profile_query<F, R>(&mut self, query_type: &str, f: F) -> R
where
F: FnOnce() -> R,
{
let operation = format!("query_{query_type}");
let mut metadata = HashMap::new();
metadata.insert("query_type".to_string(), query_type.to_string());
self.start_operation(&operation);
let result = f();
self.end_operation_with_metadata(metadata);
result
}
pub fn add_sample(&mut self, sample: ProfileSample) {
if self.samples.len() >= self.config.max_samples {
let remove_count = self.samples.len() - self.config.max_samples + 1;
self.samples.drain(0..remove_count);
}
self.update_operation_stats(&sample);
self.samples.push(sample);
}
pub fn generate_report(&self) -> ProfilingReport {
let total_duration = self.calculate_total_duration();
let trends = self.analyze_trends();
let memory_patterns = if self.config.track_memory {
Some(self.analyze_memory_patterns())
} else {
None
};
let bottlenecks = self.identify_bottlenecks();
ProfilingReport {
config: self.config.clone(),
total_duration,
total_samples: self.samples.len(),
operation_stats: self.operation_stats.clone(),
trends,
memory_patterns,
bottlenecks,
}
}
pub fn get_operation_samples(&self, operation: &str) -> Vec<&ProfileSample> {
self.samples
.iter()
.filter(|sample| sample.operation == operation)
.collect()
}
pub fn get_recent_samples(&self, count: usize) -> Vec<&ProfileSample> {
let start_index = self.samples.len().saturating_sub(count);
self.samples[start_index..].iter().collect()
}
pub fn clear_samples(&mut self) {
self.samples.clear();
self.operation_stats.clear();
}
pub fn export_json(&self) -> serde_json::Result<String> {
serde_json::to_string_pretty(&self.samples)
}
pub fn import_json(&mut self, json: &str) -> serde_json::Result<()> {
let samples: Vec<ProfileSample> = serde_json::from_str(json)?;
for sample in samples {
self.add_sample(sample);
}
Ok(())
}
fn should_sample(&self) -> bool {
if self.config.sample_rate >= 1.0 {
true
} else if self.config.sample_rate <= 0.0 {
false
} else {
use std::collections::hash_map::DefaultHasher;
use std::hash::{Hash, Hasher};
let mut hasher = DefaultHasher::new();
std::time::SystemTime::now().hash(&mut hasher);
let hash = hasher.finish();
(hash as f64 / u64::MAX as f64) < self.config.sample_rate
}
}
fn estimate_memory_usage(&self) -> u64 {
1024 * 1024 }
fn update_operation_stats(&mut self, sample: &ProfileSample) {
let stats = self
.operation_stats
.entry(sample.operation.clone())
.or_insert_with(|| OperationStatistics {
count: 0,
total_duration: Duration::ZERO,
average_duration: Duration::ZERO,
min_duration: sample.duration,
max_duration: sample.duration,
std_deviation: 0.0,
ops_per_second: 0.0,
bytes_per_second: None,
});
stats.count += 1;
stats.total_duration += sample.duration;
stats.average_duration = stats.total_duration / stats.count as u32;
stats.min_duration = stats.min_duration.min(sample.duration);
stats.max_duration = stats.max_duration.max(sample.duration);
if stats.average_duration.as_secs_f64() > 0.0 {
stats.ops_per_second = 1.0 / stats.average_duration.as_secs_f64();
}
if let Some(input_size) = sample.input_size {
let bytes_per_sec = input_size as f64 / sample.duration.as_secs_f64();
stats.bytes_per_second = Some(bytes_per_sec);
}
stats.std_deviation = 0.0;
}
fn calculate_total_duration(&self) -> Duration {
if self.samples.is_empty() {
return Duration::ZERO;
}
let earliest = self
.samples
.iter()
.map(|s| s.timestamp)
.min()
.expect("samples validated to be non-empty");
let latest = self
.samples
.iter()
.map(|s| s.timestamp)
.max()
.expect("samples validated to be non-empty");
latest.duration_since(earliest).unwrap_or(Duration::ZERO)
}
fn analyze_trends(&self) -> Vec<PerformanceTrend> {
let mut trends = Vec::new();
for operation in self.operation_stats.keys() {
let samples = self.get_operation_samples(operation);
if samples.len() >= 3 {
let trend = self.calculate_trend_for_operation(operation, &samples);
trends.push(trend);
}
}
trends
}
fn calculate_trend_for_operation(
&self,
operation: &str,
samples: &[&ProfileSample],
) -> PerformanceTrend {
let n = samples.len() as f64;
let sum_x: f64 = (0..samples.len()).map(|i| i as f64).sum();
let sum_y: f64 = samples.iter().map(|s| s.duration.as_secs_f64()).sum();
let sum_xy: f64 = samples
.iter()
.enumerate()
.map(|(i, s)| i as f64 * s.duration.as_secs_f64())
.sum();
let sum_x2: f64 = (0..samples.len()).map(|i| (i as f64).powi(2)).sum();
let slope = (n * sum_xy - sum_x * sum_y) / (n * sum_x2 - sum_x.powi(2));
let direction = if slope.abs() < 0.001 {
TrendDirection::Stable
} else if slope < 0.0 {
TrendDirection::Improving } else {
TrendDirection::Degrading
};
let confidence = if n >= 10.0 { 0.8 } else { 0.4 };
PerformanceTrend {
operation: operation.to_string(),
window_start: samples
.first()
.expect("collection validated to be non-empty")
.timestamp,
window_end: samples
.last()
.expect("collection validated to be non-empty")
.timestamp,
average_duration: Duration::from_secs_f64(sum_y / n),
trend_direction: direction,
confidence,
}
}
fn analyze_memory_patterns(&self) -> MemoryUsagePattern {
let memory_samples: Vec<u64> = self.samples.iter().filter_map(|s| s.memory_used).collect();
if memory_samples.is_empty() {
return MemoryUsagePattern {
peak_memory: 0,
average_memory: 0,
efficiency_ratio: 0.0,
potential_leaks: Vec::new(),
};
}
let peak_memory = *memory_samples
.iter()
.max()
.expect("memory_samples validated to be non-empty");
let average_memory = memory_samples.iter().sum::<u64>() / memory_samples.len() as u64;
let efficiency_ratio = if peak_memory > 0 {
average_memory as f64 / peak_memory as f64
} else {
0.0
};
let mut potential_leaks = Vec::new();
if memory_samples.len() > 10 {
let first_half_avg = memory_samples[..memory_samples.len() / 2]
.iter()
.sum::<u64>() as f64
/ (memory_samples.len() / 2) as f64;
let second_half_avg = memory_samples[memory_samples.len() / 2..]
.iter()
.sum::<u64>() as f64
/ (memory_samples.len() / 2) as f64;
if second_half_avg > first_half_avg * 1.5 {
potential_leaks.push("Increasing memory usage trend detected".to_string());
}
}
MemoryUsagePattern {
peak_memory,
average_memory,
efficiency_ratio,
potential_leaks,
}
}
fn identify_bottlenecks(&self) -> Vec<PerformanceBottleneck> {
let mut bottlenecks = Vec::new();
let total_time: Duration = self
.operation_stats
.values()
.map(|stats| stats.total_duration)
.sum();
if total_time.as_secs_f64() == 0.0 {
return bottlenecks;
}
for (operation, stats) in &self.operation_stats {
let time_percentage =
stats.total_duration.as_secs_f64() / total_time.as_secs_f64() * 100.0;
if time_percentage > 20.0 {
let severity = (time_percentage / 100.0).min(1.0);
let mut suggestions = Vec::new();
if stats.average_duration.as_millis() > 100 {
suggestions.push("Consider optimizing algorithm or implementation".to_string());
}
if stats.std_deviation > stats.average_duration.as_secs_f64() * 0.5 {
suggestions.push(
"High variance detected - investigate inconsistent performance".to_string(),
);
}
bottlenecks.push(PerformanceBottleneck {
operation: operation.clone(),
severity,
description: format!(
"Operation consumes {time_percentage:.1}% of total execution time"
),
suggestions,
time_percentage,
});
}
}
bottlenecks.sort_by(|a, b| {
b.severity
.partial_cmp(&a.severity)
.unwrap_or(std::cmp::Ordering::Equal)
});
bottlenecks
}
}
impl Default for StarProfiler {
fn default() -> Self {
Self::new()
}
}
#[macro_export]
macro_rules! profile_operation {
($profiler:expr_2021, $operation:expr_2021, $code:block) => {{
$profiler.start_operation($operation);
let result = $code;
$profiler.end_operation();
result
}};
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_profiler_creation() {
let profiler = StarProfiler::new();
assert_eq!(profiler.samples.len(), 0);
assert_eq!(profiler.operation_stats.len(), 0);
}
#[test]
fn test_operation_profiling() {
let mut profiler = StarProfiler::new();
profiler.start_operation("test_operation");
std::thread::sleep(Duration::from_millis(10));
profiler.end_operation();
assert_eq!(profiler.samples.len(), 1);
assert!(profiler.samples[0].duration >= Duration::from_millis(10));
assert_eq!(profiler.samples[0].operation, "test_operation");
}
#[test]
fn test_operation_statistics() {
let mut profiler = StarProfiler::new();
for _ in 0..5 {
profiler.start_operation("test_op");
std::thread::sleep(Duration::from_millis(1));
profiler.end_operation();
}
let stats = profiler.operation_stats.get("test_op").unwrap();
assert_eq!(stats.count, 5);
assert!(stats.average_duration > Duration::ZERO);
assert!(stats.ops_per_second > 0.0);
}
#[test]
fn test_sample_export_import() {
let mut profiler = StarProfiler::new();
profiler.start_operation("export_test");
profiler.end_operation();
let json = profiler.export_json().unwrap();
assert!(!json.is_empty());
let mut new_profiler = StarProfiler::new();
new_profiler.import_json(&json).unwrap();
assert_eq!(new_profiler.samples.len(), 1);
assert_eq!(new_profiler.samples[0].operation, "export_test");
}
#[test]
fn test_trend_analysis() {
let mut profiler = StarProfiler::new();
for i in 1..=10 {
let sample = ProfileSample {
operation: "degrading_op".to_string(),
duration: Duration::from_millis(i * 10),
memory_used: None,
input_size: None,
output_size: None,
timestamp: std::time::SystemTime::now(),
metadata: HashMap::new(),
};
profiler.add_sample(sample);
}
let report = profiler.generate_report();
let trends = &report.trends;
assert!(!trends.is_empty());
assert_eq!(trends[0].operation, "degrading_op");
assert!(matches!(
trends[0].trend_direction,
TrendDirection::Degrading
));
}
}