use crate::parser::{SarifError, SarifResult as ParseResult};
use crate::types::{Result as SarifResult, SarifLog};
use std::collections::HashMap;
use std::sync::Arc;
use std::sync::atomic::{AtomicUsize, Ordering};
use std::time::{Duration, Instant};
#[derive(Debug, Clone)]
pub struct PerformanceMonitor {
pub timings: HashMap<String, Vec<Duration>>,
pub memory_stats: MemoryStats,
pub counters: HashMap<String, usize>,
pub config: PerformanceConfig,
}
#[derive(Debug, Clone, Default)]
pub struct MemoryStats {
pub peak_memory_mb: f64,
pub current_memory_mb: f64,
pub allocation_count: usize,
pub memory_by_operation: HashMap<String, f64>,
}
#[derive(Debug, Clone)]
pub struct PerformanceConfig {
pub track_memory: bool,
pub enable_timing: bool,
pub memory_threshold_mb: f64,
pub batch_size: usize,
pub enable_parallel: bool,
pub cache_config: CacheConfig,
}
#[derive(Debug, Clone)]
pub struct CacheConfig {
pub max_logs: usize,
pub max_indexed_results: usize,
pub max_query_results: usize,
pub ttl_seconds: u64,
}
pub struct MemoryPool<T> {
pool: Vec<T>,
factory: Box<dyn Fn() -> T + Send + Sync>,
max_size: usize,
stats: PoolStats,
}
#[derive(Debug, Default)]
pub struct PoolStats {
pub borrowed: AtomicUsize,
pub returned: AtomicUsize,
pub created: AtomicUsize,
pub peak_size: AtomicUsize,
}
#[allow(dead_code)]
pub struct OptimizedSarifProcessor {
monitor: PerformanceMonitor,
result_pool: Arc<MemoryPool<SarifResult>>,
string_interner: StringInterner,
computation_cache: ComputationCache,
}
#[derive(Debug)]
pub struct StringInterner {
strings: HashMap<String, (Arc<String>, usize)>,
stats: InternerStats,
}
#[derive(Debug, Clone, Default)]
pub struct InternerStats {
pub total_interned: usize,
pub cache_hits: usize,
pub memory_saved_bytes: usize,
}
#[derive(Debug)]
pub struct ComputationCache {
fingerprint_cache: HashMap<String, String>,
path_cache: HashMap<String, String>,
rule_cache: HashMap<String, Option<Arc<crate::types::ReportingDescriptor>>>,
stats: CacheStats,
}
#[derive(Debug, Clone, Default)]
pub struct CacheStats {
pub hits_by_type: HashMap<String, usize>,
pub misses_by_type: HashMap<String, usize>,
pub evictions_by_type: HashMap<String, usize>,
}
pub struct SarifBenchmark {
pub test_datasets: Vec<BenchmarkDataset>,
pub results: Vec<BenchmarkResult>,
pub config: BenchmarkConfig,
}
#[derive(Debug, Clone)]
pub struct BenchmarkDataset {
pub name: String,
pub log: SarifLog,
pub characteristics: DatasetCharacteristics,
}
#[derive(Debug, Clone)]
pub struct DatasetCharacteristics {
pub run_count: usize,
pub result_count: usize,
pub file_count: usize,
pub rule_count: usize,
pub avg_results_per_file: f64,
pub size_mb: f64,
}
#[derive(Debug, Clone)]
pub struct BenchmarkResult {
pub operation: String,
pub dataset: String,
pub duration: Duration,
pub memory_usage_mb: f64,
pub throughput: ThroughputMetrics,
pub custom_metrics: HashMap<String, f64>,
}
#[derive(Debug, Clone)]
pub struct ThroughputMetrics {
pub results_per_second: f64,
pub files_per_second: f64,
pub mb_per_second: f64,
}
#[derive(Debug, Clone)]
pub struct BenchmarkConfig {
pub iterations: usize,
pub warmup_iterations: usize,
pub profile_memory: bool,
pub operations: Vec<String>,
}
impl PerformanceMonitor {
pub fn new(config: PerformanceConfig) -> Self {
Self {
timings: HashMap::new(),
memory_stats: MemoryStats::default(),
counters: HashMap::new(),
config,
}
}
pub fn start_timing(&mut self, operation: &str) -> TimingHandle {
TimingHandle {
operation: operation.to_string(),
start_time: Instant::now(),
monitor: self as *mut PerformanceMonitor,
}
}
pub fn record_timing(&mut self, operation: &str, duration: Duration) {
if self.config.enable_timing {
self.timings
.entry(operation.to_string())
.or_insert_with(Vec::new)
.push(duration);
}
}
pub fn increment_counter(&mut self, counter: &str) {
*self.counters.entry(counter.to_string()).or_insert(0) += 1;
}
pub fn update_memory_stats(&mut self, operation: &str, memory_mb: f64) {
if self.config.track_memory {
self.memory_stats.current_memory_mb = memory_mb;
if memory_mb > self.memory_stats.peak_memory_mb {
self.memory_stats.peak_memory_mb = memory_mb;
}
self.memory_stats
.memory_by_operation
.insert(operation.to_string(), memory_mb);
self.memory_stats.allocation_count += 1;
}
}
pub fn get_average_timing(&self, operation: &str) -> Option<Duration> {
self.timings.get(operation).map(|times| {
let total: Duration = times.iter().sum();
total / times.len() as u32
})
}
pub fn get_percentile_timing(&self, operation: &str, percentile: f64) -> Option<Duration> {
self.timings.get(operation).and_then(|times| {
if times.is_empty() {
return None;
}
let mut sorted_times = times.clone();
sorted_times.sort();
let index = ((percentile / 100.0) * times.len() as f64) as usize;
sorted_times.get(index.min(times.len() - 1)).copied()
})
}
pub fn generate_report(&self) -> PerformanceReport {
let mut operation_stats = HashMap::new();
for (operation, times) in &self.timings {
if !times.is_empty() {
let total: Duration = times.iter().sum();
let avg = total / times.len() as u32;
let min = *times.iter().min().unwrap();
let max = *times.iter().max().unwrap();
operation_stats.insert(
operation.clone(),
OperationStats {
count: times.len(),
total_duration: total,
average_duration: avg,
min_duration: min,
max_duration: max,
p50: self.get_percentile_timing(operation, 50.0).unwrap_or(avg),
p95: self.get_percentile_timing(operation, 95.0).unwrap_or(max),
p99: self.get_percentile_timing(operation, 99.0).unwrap_or(max),
},
);
}
}
PerformanceReport {
operation_stats,
memory_stats: self.memory_stats.clone(),
counters: self.counters.clone(),
total_operations: self.counters.values().sum(),
}
}
}
pub struct TimingHandle {
operation: String,
start_time: Instant,
monitor: *mut PerformanceMonitor,
}
impl Drop for TimingHandle {
fn drop(&mut self) {
let duration = self.start_time.elapsed();
unsafe {
if !self.monitor.is_null() {
(*self.monitor).record_timing(&self.operation, duration);
}
}
}
}
#[derive(Debug, Clone)]
pub struct PerformanceReport {
pub operation_stats: HashMap<String, OperationStats>,
pub memory_stats: MemoryStats,
pub counters: HashMap<String, usize>,
pub total_operations: usize,
}
#[derive(Debug, Clone)]
pub struct OperationStats {
pub count: usize,
pub total_duration: Duration,
pub average_duration: Duration,
pub min_duration: Duration,
pub max_duration: Duration,
pub p50: Duration,
pub p95: Duration,
pub p99: Duration,
}
impl<T> MemoryPool<T> {
pub fn new<F>(factory: F, max_size: usize) -> Self
where
F: Fn() -> T + Send + Sync + 'static,
{
Self {
pool: Vec::with_capacity(max_size),
factory: Box::new(factory),
max_size,
stats: PoolStats::default(),
}
}
pub fn borrow(&mut self) -> T {
self.stats.borrowed.fetch_add(1, Ordering::Relaxed);
if let Some(obj) = self.pool.pop() {
obj
} else {
self.stats.created.fetch_add(1, Ordering::Relaxed);
(self.factory)()
}
}
pub fn return_object(&mut self, obj: T) {
self.stats.returned.fetch_add(1, Ordering::Relaxed);
if self.pool.len() < self.max_size {
self.pool.push(obj);
let current_size = self.pool.len();
let peak = self.stats.peak_size.load(Ordering::Relaxed);
if current_size > peak {
self.stats.peak_size.store(current_size, Ordering::Relaxed);
}
}
}
pub fn get_stats(&self) -> PoolStats {
PoolStats {
borrowed: AtomicUsize::new(self.stats.borrowed.load(Ordering::Relaxed)),
returned: AtomicUsize::new(self.stats.returned.load(Ordering::Relaxed)),
created: AtomicUsize::new(self.stats.created.load(Ordering::Relaxed)),
peak_size: AtomicUsize::new(self.stats.peak_size.load(Ordering::Relaxed)),
}
}
}
impl Default for StringInterner {
fn default() -> Self {
Self::new()
}
}
impl StringInterner {
pub fn new() -> Self {
Self {
strings: HashMap::new(),
stats: InternerStats::default(),
}
}
pub fn intern(&mut self, s: &str) -> Arc<String> {
if let Some((arc_str, ref_count)) = self.strings.get_mut(s) {
*ref_count += 1;
self.stats.cache_hits += 1;
return arc_str.clone();
}
let arc_str = Arc::new(s.to_string());
self.strings.insert(s.to_string(), (arc_str.clone(), 1));
self.stats.total_interned += 1;
self.stats.memory_saved_bytes += s.len();
arc_str
}
pub fn release(&mut self, s: &str) {
if let Some((_, ref_count)) = self.strings.get_mut(s) {
*ref_count -= 1;
if *ref_count == 0 {
self.strings.remove(s);
}
}
}
pub fn get_stats(&self) -> &InternerStats {
&self.stats
}
}
impl Default for ComputationCache {
fn default() -> Self {
Self::new()
}
}
impl ComputationCache {
pub fn new() -> Self {
Self {
fingerprint_cache: HashMap::new(),
path_cache: HashMap::new(),
rule_cache: HashMap::new(),
stats: CacheStats::default(),
}
}
pub fn get_fingerprint<F>(&mut self, key: &str, compute_fn: F) -> String
where
F: FnOnce() -> String,
{
if let Some(fingerprint) = self.fingerprint_cache.get(key).cloned() {
self.record_hit("fingerprint");
return fingerprint;
}
self.record_miss("fingerprint");
let fingerprint = compute_fn();
self.fingerprint_cache
.insert(key.to_string(), fingerprint.clone());
fingerprint
}
pub fn get_path<F>(&mut self, key: &str, resolve_fn: F) -> String
where
F: FnOnce() -> String,
{
if let Some(path) = self.path_cache.get(key).cloned() {
self.record_hit("path");
return path;
}
self.record_miss("path");
let path = resolve_fn();
self.path_cache.insert(key.to_string(), path.clone());
path
}
pub fn clear(&mut self) {
self.fingerprint_cache.clear();
self.path_cache.clear();
self.rule_cache.clear();
}
pub fn get_stats(&self) -> &CacheStats {
&self.stats
}
fn record_hit(&mut self, cache_type: &str) {
*self
.stats
.hits_by_type
.entry(cache_type.to_string())
.or_insert(0) += 1;
}
fn record_miss(&mut self, cache_type: &str) {
*self
.stats
.misses_by_type
.entry(cache_type.to_string())
.or_insert(0) += 1;
}
}
impl SarifBenchmark {
pub fn new(config: BenchmarkConfig) -> Self {
Self {
test_datasets: Vec::new(),
results: Vec::new(),
config,
}
}
pub fn add_dataset(&mut self, dataset: BenchmarkDataset) {
self.test_datasets.push(dataset);
}
pub fn run_benchmarks(&mut self) -> ParseResult<Vec<BenchmarkResult>> {
let mut all_results = Vec::new();
for dataset in &self.test_datasets {
for operation in &self.config.operations {
let result = self.run_single_benchmark(operation, dataset)?;
all_results.push(result);
}
}
self.results = all_results.clone();
Ok(all_results)
}
fn run_single_benchmark(
&self,
operation: &str,
dataset: &BenchmarkDataset,
) -> ParseResult<BenchmarkResult> {
let mut durations = Vec::new();
let mut memory_usage = 0.0;
for _ in 0..self.config.warmup_iterations {
self.execute_operation(operation, dataset)?;
}
for _ in 0..self.config.iterations {
let start_memory = self.get_memory_usage();
let start_time = Instant::now();
self.execute_operation(operation, dataset)?;
let duration = start_time.elapsed();
let end_memory = self.get_memory_usage();
durations.push(duration);
memory_usage += end_memory - start_memory;
}
let avg_duration = durations.iter().sum::<Duration>() / durations.len() as u32;
let avg_memory = memory_usage / self.config.iterations as f64;
let throughput = ThroughputMetrics {
results_per_second: dataset.characteristics.result_count as f64
/ avg_duration.as_secs_f64(),
files_per_second: dataset.characteristics.file_count as f64
/ avg_duration.as_secs_f64(),
mb_per_second: dataset.characteristics.size_mb / avg_duration.as_secs_f64(),
};
Ok(BenchmarkResult {
operation: operation.to_string(),
dataset: dataset.name.clone(),
duration: avg_duration,
memory_usage_mb: avg_memory,
throughput,
custom_metrics: HashMap::new(),
})
}
fn execute_operation(&self, operation: &str, dataset: &BenchmarkDataset) -> ParseResult<()> {
match operation {
"parse" => {
let json = serde_json::to_string(&dataset.log)
.map_err(|e| SarifError::custom(format!("Serialization error: {}", e)))?;
let _: SarifLog = serde_json::from_str(&json)
.map_err(|e| SarifError::custom(format!("Deserialization error: {}", e)))?;
}
"index" => {
let index = crate::utils::indexing::SarifIndex::from_sarif_log(&dataset.log);
let _ = index.stats.result_count;
}
"query" => {
let index = crate::utils::indexing::SarifIndex::from_sarif_log(&dataset.log);
let executor =
crate::utils::query::SarifQueryExecutor::from_index(index, dataset.log.clone());
let query = crate::utils::query::SarifQuery::default();
let _ = executor.execute(&query)?;
}
"conversion" => {
let converter = crate::utils::conversion::CsvConverter::new();
let _ = converter.convert_to_csv(&dataset.log)?;
}
_ => {
return Err(SarifError::custom(format!(
"Unknown operation: {}",
operation
)));
}
}
Ok(())
}
fn get_memory_usage(&self) -> f64 {
0.0
}
pub fn generate_report(&self) -> BenchmarkReport {
BenchmarkReport {
results: self.results.clone(),
summary: self.generate_summary(),
comparisons: self.generate_comparisons(),
}
}
fn generate_summary(&self) -> BenchmarkSummary {
let mut operation_summaries = HashMap::new();
for result in &self.results {
let summary = operation_summaries
.entry(result.operation.clone())
.or_insert_with(|| OperationSummary {
operation: result.operation.clone(),
total_runs: 0,
avg_duration: Duration::ZERO,
avg_throughput: 0.0,
avg_memory_mb: 0.0,
});
summary.total_runs += 1;
summary.avg_duration += result.duration;
summary.avg_throughput += result.throughput.results_per_second;
summary.avg_memory_mb += result.memory_usage_mb;
}
for summary in operation_summaries.values_mut() {
summary.avg_duration /= summary.total_runs as u32;
summary.avg_throughput /= summary.total_runs as f64;
summary.avg_memory_mb /= summary.total_runs as f64;
}
BenchmarkSummary {
operation_summaries,
total_benchmarks: self.results.len(),
fastest_operation: self.find_fastest_operation(),
slowest_operation: self.find_slowest_operation(),
}
}
fn generate_comparisons(&self) -> Vec<BenchmarkComparison> {
let mut comparisons = Vec::new();
for operation in &self.config.operations {
let operation_results: Vec<_> = self
.results
.iter()
.filter(|r| r.operation == *operation)
.collect();
if operation_results.len() > 1 {
for i in 0..operation_results.len() {
for j in i + 1..operation_results.len() {
let baseline = operation_results[i];
let comparison = operation_results[j];
comparisons.push(BenchmarkComparison {
operation: operation.clone(),
baseline_dataset: baseline.dataset.clone(),
comparison_dataset: comparison.dataset.clone(),
duration_ratio: comparison.duration.as_secs_f64()
/ baseline.duration.as_secs_f64(),
throughput_ratio: comparison.throughput.results_per_second
/ baseline.throughput.results_per_second,
memory_ratio: comparison.memory_usage_mb / baseline.memory_usage_mb,
});
}
}
}
}
comparisons
}
fn find_fastest_operation(&self) -> Option<String> {
self.results
.iter()
.min_by_key(|r| r.duration)
.map(|r| r.operation.clone())
}
fn find_slowest_operation(&self) -> Option<String> {
self.results
.iter()
.max_by_key(|r| r.duration)
.map(|r| r.operation.clone())
}
}
#[derive(Debug, Clone)]
pub struct BenchmarkReport {
pub results: Vec<BenchmarkResult>,
pub summary: BenchmarkSummary,
pub comparisons: Vec<BenchmarkComparison>,
}
#[derive(Debug, Clone)]
pub struct BenchmarkSummary {
pub operation_summaries: HashMap<String, OperationSummary>,
pub total_benchmarks: usize,
pub fastest_operation: Option<String>,
pub slowest_operation: Option<String>,
}
#[derive(Debug, Clone)]
pub struct OperationSummary {
pub operation: String,
pub total_runs: usize,
pub avg_duration: Duration,
pub avg_throughput: f64,
pub avg_memory_mb: f64,
}
#[derive(Debug, Clone)]
pub struct BenchmarkComparison {
pub operation: String,
pub baseline_dataset: String,
pub comparison_dataset: String,
pub duration_ratio: f64,
pub throughput_ratio: f64,
pub memory_ratio: f64,
}
impl Default for PerformanceConfig {
fn default() -> Self {
Self {
track_memory: true,
enable_timing: true,
memory_threshold_mb: 1024.0, batch_size: 1000,
enable_parallel: true,
cache_config: CacheConfig::default(),
}
}
}
impl Default for CacheConfig {
fn default() -> Self {
Self {
max_logs: 10,
max_indexed_results: 100000,
max_query_results: 10000,
ttl_seconds: 3600, }
}
}
impl Default for BenchmarkConfig {
fn default() -> Self {
Self {
iterations: 10,
warmup_iterations: 3,
profile_memory: true,
operations: vec![
"parse".to_string(),
"index".to_string(),
"query".to_string(),
"conversion".to_string(),
],
}
}
}
pub fn create_test_dataset(
name: &str,
run_count: usize,
results_per_run: usize,
) -> BenchmarkDataset {
use crate::builder::SarifLogBuilder;
let mut log_builder = SarifLogBuilder::new();
for run_idx in 0..run_count {
let tool_name = format!("test-tool-{}", run_idx);
let mut run_builder = crate::builder::RunBuilder::with_tool(&tool_name, Some("1.0.0"));
for result_idx in 0..results_per_run {
let message = format!("Test result {} from run {}", result_idx, run_idx);
let file_path = format!("test/file{}.rs", result_idx % 10);
let line = (result_idx % 100) as i32 + 1;
let result = crate::builder::ResultBuilder::with_text_message(&message)
.with_rule_id(format!("RULE{:03}", result_idx % 50))
.add_file_location(&file_path, line, 1)
.build();
run_builder = run_builder.add_result(result);
}
log_builder = log_builder.add_run(run_builder.build());
}
let log = log_builder.build_unchecked();
let characteristics = DatasetCharacteristics {
run_count,
result_count: run_count * results_per_run,
file_count: 10, rule_count: 50, avg_results_per_file: (run_count * results_per_run) as f64 / 10.0,
size_mb: estimate_log_size(&log),
};
BenchmarkDataset {
name: name.to_string(),
log,
characteristics,
}
}
fn estimate_log_size(log: &SarifLog) -> f64 {
match serde_json::to_string(log) {
Ok(json) => json.len() as f64 / (1024.0 * 1024.0),
Err(_) => 0.0,
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_performance_monitor() {
let config = PerformanceConfig::default();
let mut monitor = PerformanceMonitor::new(config);
{
let _handle = monitor.start_timing("test_operation");
std::thread::sleep(std::time::Duration::from_millis(10));
}
monitor.increment_counter("test_counter");
monitor.increment_counter("test_counter");
monitor.update_memory_stats("test_operation", 100.0);
let report = monitor.generate_report();
assert_eq!(report.counters["test_counter"], 2);
assert!(report.operation_stats.contains_key("test_operation"));
assert_eq!(report.memory_stats.current_memory_mb, 100.0);
}
#[test]
fn test_memory_pool() {
let mut pool = MemoryPool::new(|| String::new(), 5);
let obj1 = pool.borrow();
let obj2 = pool.borrow();
pool.return_object(obj1);
pool.return_object(obj2);
let stats = pool.get_stats();
assert_eq!(stats.borrowed.load(Ordering::Relaxed), 2);
assert_eq!(stats.returned.load(Ordering::Relaxed), 2);
}
#[test]
fn test_string_interner() {
let mut interner = StringInterner::new();
let str1 = interner.intern("test");
let str2 = interner.intern("test");
assert!(Arc::ptr_eq(&str1, &str2));
let stats = interner.get_stats();
assert_eq!(stats.total_interned, 1);
assert_eq!(stats.cache_hits, 1);
}
#[test]
fn test_computation_cache() {
let mut cache = ComputationCache::new();
let fp1 = cache.get_fingerprint("key1", || "fingerprint1".to_string());
let fp2 = cache.get_fingerprint("key1", || "fingerprint2".to_string());
assert_eq!(fp1, "fingerprint1");
assert_eq!(fp2, "fingerprint1");
let stats = cache.get_stats();
assert_eq!(stats.hits_by_type.get("fingerprint"), Some(&1));
assert_eq!(stats.misses_by_type.get("fingerprint"), Some(&1));
}
#[test]
fn test_benchmark_dataset_creation() {
let dataset = create_test_dataset("test", 2, 10);
assert_eq!(dataset.name, "test");
assert_eq!(dataset.characteristics.run_count, 2);
assert_eq!(dataset.characteristics.result_count, 20);
assert_eq!(dataset.log.runs.len(), 2);
}
#[test]
fn test_benchmark_execution() {
let config = BenchmarkConfig {
iterations: 2,
warmup_iterations: 1,
profile_memory: false,
operations: vec!["parse".to_string()],
};
let mut benchmark = SarifBenchmark::new(config);
let dataset = create_test_dataset("small", 1, 5);
benchmark.add_dataset(dataset);
let results = benchmark.run_benchmarks().unwrap();
assert_eq!(results.len(), 1);
assert_eq!(results[0].operation, "parse");
assert_eq!(results[0].dataset, "small");
}
}