Skip to main content

oxirs_embed/
utils_types.rs

1//! Common types, enums, and config structs for embedding utilities
2
3use serde::{Deserialize, Serialize};
4use std::collections::HashMap;
5use std::time::Duration;
6
7/// Dataset split result
8#[derive(Debug, Clone)]
9pub struct DatasetSplit {
10    pub train: Vec<(String, String, String)>,
11    pub validation: Vec<(String, String, String)>,
12    pub test: Vec<(String, String, String)>,
13}
14
15/// Statistics about a dataset
16#[derive(Debug, Clone, Serialize, Deserialize)]
17pub struct DatasetStatistics {
18    pub num_triples: usize,
19    pub num_entities: usize,
20    pub num_relations: usize,
21    pub entity_frequency: HashMap<String, usize>,
22    pub relation_frequency: HashMap<String, usize>,
23    pub avg_degree: f64,
24    pub density: f64,
25}
26
27/// Embedding distribution statistics
28#[derive(Debug, Clone)]
29pub struct EmbeddingDistributionStats {
30    pub mean: f64,
31    pub std_dev: f64,
32    pub variance: f64,
33    pub min: f64,
34    pub max: f64,
35    pub median: f64,
36    pub num_parameters: usize,
37}
38
39/// Similarity statistics
40#[derive(Debug, Clone)]
41pub struct SimilarityStats {
42    pub mean_similarity: f64,
43    pub min_similarity: f64,
44    pub max_similarity: f64,
45    pub median_similarity: f64,
46    pub num_comparisons: usize,
47}
48
49/// Graph metrics
50#[derive(Debug, Clone, Serialize, Deserialize)]
51pub struct GraphMetrics {
52    pub num_entities: usize,
53    pub num_relations: usize,
54    pub num_triples: usize,
55    pub avg_degree: f64,
56    pub max_degree: usize,
57    pub min_degree: usize,
58    pub density: f64,
59}
60
61/// Benchmarking configuration
62#[derive(Debug, Clone, Serialize, Deserialize)]
63pub struct BenchmarkConfig {
64    /// Number of warmup iterations
65    pub warmup_iterations: usize,
66    /// Number of measurement iterations
67    pub measurement_iterations: usize,
68    /// Target confidence level (0.0-1.0)
69    pub confidence_level: f64,
70    /// Enable memory profiling
71    pub enable_memory_profiling: bool,
72    /// Enable detailed timing analysis
73    pub enable_detailed_timing: bool,
74}
75
76impl Default for BenchmarkConfig {
77    fn default() -> Self {
78        Self {
79            warmup_iterations: 100,
80            measurement_iterations: 1000,
81            confidence_level: 0.95,
82            enable_memory_profiling: true,
83            enable_detailed_timing: true,
84        }
85    }
86}
87
88/// Memory usage statistics
89#[derive(Debug, Clone, Serialize, Deserialize)]
90pub struct MemoryStats {
91    /// Peak memory usage (bytes)
92    pub peak_memory_bytes: usize,
93    /// Average memory usage (bytes)
94    pub avg_memory_bytes: usize,
95    /// Memory allocations count
96    pub allocations: usize,
97    /// Memory deallocations count
98    pub deallocations: usize,
99}
100
101/// Individual benchmark result for a specific operation
102#[derive(Debug, Clone, Serialize, Deserialize)]
103pub struct BenchmarkResult {
104    /// Operation name
105    pub operation: String,
106    /// Total number of iterations
107    pub iterations: usize,
108    /// Total elapsed time
109    pub total_duration: Duration,
110    /// Average time per operation
111    pub avg_duration: Duration,
112    /// Minimum time observed
113    pub min_duration: Duration,
114    /// Maximum time observed
115    pub max_duration: Duration,
116    /// Standard deviation of durations
117    pub std_deviation: Duration,
118    /// Operations per second
119    pub ops_per_second: f64,
120    /// Memory usage statistics
121    pub memory_stats: MemoryStats,
122    /// Additional metrics
123    pub custom_metrics: HashMap<String, f64>,
124}
125
126/// Overall benchmark summary
127#[derive(Debug, Clone, Serialize, Deserialize)]
128pub struct BenchmarkSummary {
129    /// Total benchmark duration
130    pub total_duration: Duration,
131    /// Number of operations benchmarked
132    pub total_operations: usize,
133    /// Overall throughput (ops/sec)
134    pub overall_throughput: f64,
135    /// Performance efficiency score (0.0-1.0)
136    pub efficiency_score: f64,
137    /// Bottleneck analysis
138    pub bottlenecks: Vec<String>,
139}
140
141/// Benchmark comparison result
142#[derive(Debug, Clone, Serialize, Deserialize)]
143pub struct BenchmarkComparison {
144    pub baseline_name: String,
145    pub comparison_name: String,
146    pub throughput_improvement: f64,
147    pub latency_improvement: f64,
148    pub consistency_improvement: f64,
149    pub is_improvement: bool,
150}
151
152/// Performance regression analysis
153#[derive(Debug, Clone, Serialize, Deserialize)]
154pub struct RegressionAnalysis {
155    pub throughput_change: f64,
156    pub is_regression: bool,
157    pub confidence_level: f64,
158    pub analysis_notes: Vec<String>,
159}
160
161impl Default for RegressionAnalysis {
162    fn default() -> Self {
163        Self {
164            throughput_change: 0.0,
165            is_regression: false,
166            confidence_level: 0.0,
167            analysis_notes: vec!["No historical data available".to_string()],
168        }
169    }
170}