1pub mod enhanced_lazy;
2pub mod lazy_loading;
3pub mod limits;
4pub mod memory;
5pub mod parallel;
6pub mod progress;
7pub mod streaming;
8
9use parking_lot::RwLock;
10use serde::{Deserialize, Serialize};
11use std::collections::HashMap;
12use std::time::{Duration, Instant};
13
14pub use limits::{
16 ConcurrencyGuard, ParserSlot, PerformanceGuard, PerformanceLimits, PerformanceViolation,
17 RecursionGuard,
18};
19
20#[derive(Debug, Clone)]
22pub struct PerformanceConfig {
23 pub max_memory_mb: usize,
25
26 pub enable_parallel: bool,
28
29 pub worker_threads: Option<usize>,
31
32 pub enable_lazy_loading: bool,
34
35 pub stream_chunk_size: usize,
37
38 pub progress_interval: Duration,
40}
41
42impl Default for PerformanceConfig {
43 fn default() -> Self {
44 Self {
45 max_memory_mb: 512, enable_parallel: true,
47 worker_threads: None, enable_lazy_loading: true,
49 stream_chunk_size: 8192, progress_interval: Duration::from_millis(100),
51 }
52 }
53}
54
55#[derive(Debug, Default, Clone, Serialize, Deserialize)]
57pub struct PerformanceStats {
58 pub bytes_processed: u64,
59 pub objects_parsed: u64,
60 pub pages_processed: u64,
61 pub memory_peak_mb: usize,
62 pub memory_current_mb: usize,
63 pub parallel_tasks_spawned: u64,
64 pub lazy_loads_performed: u64,
65 pub parse_time_ms: u64,
66 pub filter_time_ms: u64,
67 pub validation_time_ms: u64,
68 pub serialization_time_ms: u64,
69 pub cache_hits: u64,
70 pub cache_misses: u64,
71 pub io_operations: u64,
72 pub compression_ratio: f64,
73 pub operation_times: HashMap<String, Vec<u64>>,
74}
75
76use std::sync::OnceLock;
77
78static PERF_STATS_INSTANCE: OnceLock<RwLock<PerformanceStats>> = OnceLock::new();
79
80fn get_perf_stats() -> &'static RwLock<PerformanceStats> {
81 PERF_STATS_INSTANCE.get_or_init(|| RwLock::new(PerformanceStats::default()))
82}
83
84pub fn get_performance_stats() -> PerformanceStats {
85 get_perf_stats().read().clone()
86}
87
88pub fn increment_bytes_processed(bytes: u64) {
89 get_perf_stats().write().bytes_processed += bytes;
90}
91
92pub fn increment_objects_parsed(count: u64) {
93 get_perf_stats().write().objects_parsed += count;
94}
95
96pub fn increment_pages_processed(count: u64) {
97 get_perf_stats().write().pages_processed += count;
98}
99
100pub fn update_memory_peak(mb: usize) {
101 let mut stats = get_perf_stats().write();
102 if mb > stats.memory_peak_mb {
103 stats.memory_peak_mb = mb;
104 }
105}
106
107pub fn increment_parallel_tasks() {
108 get_perf_stats().write().parallel_tasks_spawned += 1;
109}
110
111pub fn increment_lazy_loads() {
112 get_perf_stats().write().lazy_loads_performed += 1;
113}
114
115pub fn reset_performance_stats() {
116 *get_perf_stats().write() = PerformanceStats::default();
117}
118
119pub fn update_current_memory(mb: usize) {
120 get_perf_stats().write().memory_current_mb = mb;
121 update_memory_peak(mb);
122}
123
124pub fn add_parse_time(ms: u64) {
125 get_perf_stats().write().parse_time_ms += ms;
126}
127
128pub fn add_filter_time(ms: u64) {
129 get_perf_stats().write().filter_time_ms += ms;
130}
131
132pub fn add_validation_time(ms: u64) {
133 get_perf_stats().write().validation_time_ms += ms;
134}
135
136pub fn add_serialization_time(ms: u64) {
137 get_perf_stats().write().serialization_time_ms += ms;
138}
139
140pub fn increment_cache_hits() {
141 get_perf_stats().write().cache_hits += 1;
142}
143
144pub fn increment_cache_misses() {
145 get_perf_stats().write().cache_misses += 1;
146}
147
148pub fn increment_io_operations() {
149 get_perf_stats().write().io_operations += 1;
150}
151
152pub fn update_compression_ratio(ratio: f64) {
153 let mut stats = get_perf_stats().write();
154 stats.compression_ratio = (stats.compression_ratio + ratio) / 2.0;
155}
156
157pub fn record_operation_time(operation: &str, ms: u64) {
158 let mut stats = get_perf_stats().write();
159 stats
160 .operation_times
161 .entry(operation.to_string())
162 .or_default()
163 .push(ms);
164}
165
166#[derive(Debug)]
167pub struct PerformanceTimer {
168 operation: String,
169 start: Instant,
170}
171
172impl PerformanceTimer {
173 pub fn new(operation: &str) -> Self {
174 Self {
175 operation: operation.to_string(),
176 start: Instant::now(),
177 }
178 }
179
180 pub fn finish(self) -> u64 {
181 let elapsed = self.start.elapsed().as_millis() as u64;
182 record_operation_time(&self.operation, elapsed);
183 elapsed
184 }
185}
186
187#[derive(Debug, Clone, Serialize, Deserialize)]
188pub struct PerformanceReport {
189 pub stats: PerformanceStats,
190 pub analysis: PerformanceAnalysis,
191 pub recommendations: Vec<String>,
192 pub bottlenecks: Vec<Bottleneck>,
193}
194
195#[derive(Debug, Clone, Serialize, Deserialize)]
196pub struct PerformanceAnalysis {
197 pub throughput_mb_per_sec: f64,
198 pub objects_per_sec: f64,
199 pub memory_efficiency: f64,
200 pub cache_hit_ratio: f64,
201 pub parallel_efficiency: f64,
202 pub operation_breakdown: HashMap<String, OperationStats>,
203}
204
205#[derive(Debug, Clone, Serialize, Deserialize)]
206pub struct OperationStats {
207 pub total_time_ms: u64,
208 pub avg_time_ms: f64,
209 pub min_time_ms: u64,
210 pub max_time_ms: u64,
211 pub count: usize,
212 pub percentage_of_total: f64,
213}
214
215#[derive(Debug, Clone, Serialize, Deserialize)]
216pub struct Bottleneck {
217 pub category: String,
218 pub description: String,
219 pub severity: BottleneckSeverity,
220 pub impact_score: f64,
221 pub suggested_fix: String,
222}
223
224#[derive(Debug, Clone, Serialize, Deserialize)]
225pub enum BottleneckSeverity {
226 Low,
227 Medium,
228 High,
229 Critical,
230}
231
232pub struct PerformanceAnalyzer;
233
234impl PerformanceAnalyzer {
235 pub fn generate_report() -> PerformanceReport {
236 let stats = get_performance_stats();
237 let analysis = Self::analyze_performance(&stats);
238 let recommendations = Self::generate_recommendations(&stats, &analysis);
239 let bottlenecks = Self::identify_bottlenecks(&stats, &analysis);
240
241 PerformanceReport {
242 stats,
243 analysis,
244 recommendations,
245 bottlenecks,
246 }
247 }
248
249 fn analyze_performance(stats: &PerformanceStats) -> PerformanceAnalysis {
250 let total_time = stats.parse_time_ms
251 + stats.filter_time_ms
252 + stats.validation_time_ms
253 + stats.serialization_time_ms;
254
255 let throughput = if total_time > 0 {
256 (stats.bytes_processed as f64 / 1_048_576.0) / (total_time as f64 / 1000.0)
257 } else {
258 0.0
259 };
260
261 let objects_per_sec = if total_time > 0 {
262 stats.objects_parsed as f64 / (total_time as f64 / 1000.0)
263 } else {
264 0.0
265 };
266
267 let memory_efficiency = if stats.bytes_processed > 0 {
268 (stats.bytes_processed as f64 / 1_048_576.0) / stats.memory_peak_mb as f64
269 } else {
270 0.0
271 };
272
273 let cache_hit_ratio = if stats.cache_hits + stats.cache_misses > 0 {
274 stats.cache_hits as f64 / (stats.cache_hits + stats.cache_misses) as f64
275 } else {
276 0.0
277 };
278
279 let parallel_efficiency = if stats.parallel_tasks_spawned > 0 {
280 stats.objects_parsed as f64 / stats.parallel_tasks_spawned as f64
281 } else {
282 0.0
283 };
284
285 let mut operation_breakdown = HashMap::new();
286
287 for (operation, times) in &stats.operation_times {
288 if !times.is_empty() {
289 let total: u64 = times.iter().sum();
290 let avg = total as f64 / times.len() as f64;
291 let min = *times.iter().min().unwrap_or(&0);
292 let max = *times.iter().max().unwrap_or(&0);
293 let percentage = if total_time > 0 {
294 total as f64 / total_time as f64 * 100.0
295 } else {
296 0.0
297 };
298
299 operation_breakdown.insert(
300 operation.clone(),
301 OperationStats {
302 total_time_ms: total,
303 avg_time_ms: avg,
304 min_time_ms: min,
305 max_time_ms: max,
306 count: times.len(),
307 percentage_of_total: percentage,
308 },
309 );
310 }
311 }
312
313 PerformanceAnalysis {
314 throughput_mb_per_sec: throughput,
315 objects_per_sec,
316 memory_efficiency,
317 cache_hit_ratio,
318 parallel_efficiency,
319 operation_breakdown,
320 }
321 }
322
323 fn generate_recommendations(
324 stats: &PerformanceStats,
325 analysis: &PerformanceAnalysis,
326 ) -> Vec<String> {
327 let mut recommendations = Vec::new();
328
329 if analysis.throughput_mb_per_sec < 10.0 {
330 recommendations
331 .push("Consider enabling parallel processing to improve throughput".to_string());
332 }
333
334 if analysis.cache_hit_ratio < 0.5 {
335 recommendations
336 .push("Low cache hit ratio detected. Consider increasing cache size".to_string());
337 }
338
339 if stats.memory_peak_mb > 1024 {
340 recommendations
341 .push("High memory usage detected. Consider enabling streaming mode".to_string());
342 }
343
344 if analysis.parallel_efficiency < 5.0 && stats.parallel_tasks_spawned > 0 {
345 recommendations.push(
346 "Parallel processing efficiency is low. Consider reducing thread count".to_string(),
347 );
348 }
349
350 if stats.lazy_loads_performed == 0 && stats.bytes_processed > 50_000_000 {
351 recommendations
352 .push("Enable lazy loading for large documents to reduce memory usage".to_string());
353 }
354
355 recommendations
356 }
357
358 fn identify_bottlenecks(
359 stats: &PerformanceStats,
360 analysis: &PerformanceAnalysis,
361 ) -> Vec<Bottleneck> {
362 let mut bottlenecks = Vec::new();
363
364 if analysis.memory_efficiency < 1.0 {
365 bottlenecks.push(Bottleneck {
366 category: "Memory".to_string(),
367 description: "Poor memory efficiency detected".to_string(),
368 severity: BottleneckSeverity::High,
369 impact_score: 1.0 - analysis.memory_efficiency,
370 suggested_fix: "Enable streaming mode or increase chunk sizes".to_string(),
371 });
372 }
373
374 if analysis.cache_hit_ratio < 0.3 {
375 bottlenecks.push(Bottleneck {
376 category: "Caching".to_string(),
377 description: "Very low cache hit ratio".to_string(),
378 severity: BottleneckSeverity::Medium,
379 impact_score: 0.3 - analysis.cache_hit_ratio,
380 suggested_fix: "Increase cache size or implement better caching strategy"
381 .to_string(),
382 });
383 }
384
385 for (operation, op_stats) in &analysis.operation_breakdown {
386 if op_stats.percentage_of_total > 50.0 {
387 bottlenecks.push(Bottleneck {
388 category: "Operation".to_string(),
389 description: format!(
390 "{} operation consuming {}% of total time",
391 operation, op_stats.percentage_of_total
392 ),
393 severity: BottleneckSeverity::High,
394 impact_score: op_stats.percentage_of_total / 100.0,
395 suggested_fix: format!("Optimize {} operation or parallelize it", operation),
396 });
397 }
398 }
399
400 if stats.io_operations > stats.objects_parsed * 2 {
401 bottlenecks.push(Bottleneck {
402 category: "I/O".to_string(),
403 description: "Excessive I/O operations detected".to_string(),
404 severity: BottleneckSeverity::Medium,
405 impact_score: 0.5,
406 suggested_fix: "Implement better buffering or batch I/O operations".to_string(),
407 });
408 }
409
410 bottlenecks
411 }
412}
413
414pub fn start_timer(operation: &str) -> PerformanceTimer {
415 PerformanceTimer::new(operation)
416}
417
418pub fn get_memory_usage() -> usize {
419 #[cfg(target_os = "linux")]
420 {
421 use std::fs;
422 if let Ok(contents) = fs::read_to_string("/proc/self/status") {
423 for line in contents.lines() {
424 if line.starts_with("VmRSS:") {
425 let parts: Vec<&str> = line.split_whitespace().collect();
426 if parts.len() >= 2 {
427 if let Ok(kb) = parts[1].parse::<usize>() {
428 return kb / 1024;
429 }
430 }
431 }
432 }
433 }
434 }
435
436 #[cfg(target_os = "macos")]
437 {
438 use std::process::Command;
439 if let Ok(output) = Command::new("ps")
440 .args(["-o", "rss=", "-p", &std::process::id().to_string()])
441 .output()
442 {
443 if let Ok(rss_str) = String::from_utf8(output.stdout) {
444 if let Ok(kb) = rss_str.trim().parse::<usize>() {
445 return kb / 1024;
446 }
447 }
448 }
449 }
450
451 0
452}
453
454pub fn monitor_memory() {
455 let current_mb = get_memory_usage();
456 update_current_memory(current_mb);
457}