rumdl_lib/
performance.rs

1use crate::lint_context::LintContext;
2use crate::rule::Rule;
3use memory_stats::memory_stats as get_mem_stats;
4use std::collections::HashMap;
5/// Performance benchmarking framework for rumdl
6///
7/// This module provides comprehensive performance testing capabilities to measure
8/// rule execution times, memory usage, and overall linting performance.
9use std::time::{Duration, Instant};
10
11/// Memory usage statistics
12#[derive(Debug, Clone)]
13pub struct MemoryStats {
14    pub peak_memory_mb: f64,
15    pub average_memory_mb: f64,
16    pub memory_samples: Vec<f64>,
17}
18
19/// Performance results for a single rule
20#[derive(Debug, Clone)]
21pub struct RulePerformanceResult {
22    pub rule_name: String,
23    pub execution_time: Duration,
24    pub warnings_count: usize,
25    pub memory_stats: Option<MemoryStats>,
26    pub content_size_bytes: usize,
27    pub lines_processed: usize,
28}
29
30/// Aggregate performance results for all rules
31#[derive(Debug, Clone)]
32pub struct AggregatePerformanceResult {
33    pub total_execution_time: Duration,
34    pub rule_results: Vec<RulePerformanceResult>,
35    pub total_warnings: usize,
36    pub content_size_bytes: usize,
37    pub lines_processed: usize,
38    pub rules_per_second: f64,
39    pub lines_per_second: f64,
40    pub bytes_per_second: f64,
41}
42
43/// Test content generator for different file sizes
44pub struct ContentGenerator;
45
46impl ContentGenerator {
47    /// Generate small test content (<1KB)
48    pub fn small_content() -> String {
49        let mut content = String::new();
50        content.push_str("# Small Test Document\n\n");
51        content.push_str("This is a small test document with various markdown elements.\n\n");
52        content.push_str("## Lists\n\n");
53        content.push_str("- Item 1\n");
54        content.push_str("- Item 2\n");
55        content.push_str("  - Nested item\n\n");
56        content.push_str("## Code\n\n");
57        content.push_str("```rust\nfn main() {\n    println!(\"Hello, world!\");\n}\n```\n\n");
58        content.push_str("## Links\n\n");
59        content.push_str("Visit [example.com](https://example.com) for more info.\n");
60        content.push_str("Bare URL: https://example.com/bare\n\n");
61        content.push_str("Contact: user@example.com\n");
62        content
63    }
64
65    /// Generate medium test content (1-10KB)
66    pub fn medium_content() -> String {
67        let mut content = String::new();
68        content.push_str("# Medium Test Document\n\n");
69
70        // Add multiple sections with various markdown elements
71        for i in 1..=20 {
72            content.push_str(&format!("## Section {i}\n\n"));
73            content.push_str(&format!("This is section {i} with some content.\n\n"));
74
75            // Add lists
76            content.push_str("### Lists\n\n");
77            for j in 1..=5 {
78                content.push_str(&format!("- List item {j} in section {i}\n"));
79                if j % 2 == 0 {
80                    content.push_str(&format!("  - Nested item {i}.{j}\n"));
81                }
82            }
83            content.push('\n');
84
85            // Add code blocks
86            if i % 3 == 0 {
87                content.push_str("### Code Example\n\n");
88                content.push_str("```javascript\n");
89                content.push_str(&format!("function section{i}() {{\n"));
90                content.push_str(&format!("    console.log('Section {i}');\n"));
91                content.push_str("    return true;\n");
92                content.push_str("}\n");
93                content.push_str("```\n\n");
94            }
95
96            // Add links and URLs
97            content.push_str("### Links\n\n");
98            content.push_str(&format!(
99                "Visit [section {i}](https://example.com/section{i}) for details.\n"
100            ));
101            content.push_str(&format!("Bare URL: https://example{i}.com/path\n"));
102            content.push_str(&format!("Email: section{i}@example.com\n\n"));
103
104            // Add emphasis and formatting
105            content.push_str("### Formatting\n\n");
106            content.push_str(&format!("This is **bold text** in section {i}.\n"));
107            content.push_str(&format!("This is *italic text* in section {i}.\n"));
108            content.push_str(&format!("This is `inline code` in section {i}.\n\n"));
109        }
110
111        content
112    }
113
114    /// Generate large test content (10-100KB)
115    pub fn large_content() -> String {
116        let mut content = String::new();
117        content.push_str("# Large Test Document\n\n");
118        content.push_str("This is a comprehensive test document with extensive markdown content.\n\n");
119
120        // Add table of contents
121        content.push_str("## Table of Contents\n\n");
122        for i in 1..=50 {
123            content.push_str(&format!("- [Section {i}](#section-{i})\n"));
124        }
125        content.push('\n');
126
127        // Add many sections with various content
128        for i in 1..=50 {
129            content.push_str(&format!("## Section {i}\n\n"));
130            content.push_str(&format!("This is section {i} with comprehensive content.\n\n"));
131
132            // Add subsections
133            for j in 1..=3 {
134                content.push_str(&format!("### Subsection {i}.{j}\n\n"));
135                content.push_str(&format!("Content for subsection {i}.{j} with multiple paragraphs.\n\n"));
136                content.push_str("Lorem ipsum dolor sit amet, consectetur adipiscing elit. ");
137                content.push_str("Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.\n\n");
138
139                // Add lists with multiple levels
140                content.push_str("#### Lists\n\n");
141                for k in 1..=8 {
142                    content.push_str(&format!("- Item {k} in subsection {i}.{j}\n"));
143                    if k % 2 == 0 {
144                        content.push_str(&format!("  - Nested item {i}.{j}.{k}\n"));
145                        if k % 4 == 0 {
146                            content.push_str(&format!("    - Deep nested item {i}.{j}.{k}\n"));
147                        }
148                    }
149                }
150                content.push('\n');
151
152                // Add code blocks
153                if (i + j) % 3 == 0 {
154                    content.push_str("#### Code Example\n\n");
155                    content.push_str("```rust\n");
156                    content.push_str(&format!("fn section_{i}_{j}_function() {{\n"));
157                    content.push_str(&format!("    let value = {};\n", i * j));
158                    content.push_str("    println!(\"Processing section {}.{}\", value);\n");
159                    content.push_str("    \n");
160                    content.push_str("    // Complex logic here\n");
161                    content.push_str("    for idx in 0..value {\n");
162                    content.push_str("        process_item(idx);\n");
163                    content.push_str("    }\n");
164                    content.push_str("}\n");
165                    content.push_str("```\n\n");
166                }
167
168                // Add tables
169                if (i + j) % 4 == 0 {
170                    content.push_str("#### Data Table\n\n");
171                    content.push_str("| Column 1 | Column 2 | Column 3 | Column 4 |\n");
172                    content.push_str("|----------|----------|----------|----------|\n");
173                    for row in 1..=5 {
174                        content.push_str(&format!(
175                            "| Data {}.{}.{} | Value {} | Result {} | Status {} |\n",
176                            i,
177                            j,
178                            row,
179                            row * 10,
180                            row * 100,
181                            if row % 2 == 0 { "OK" } else { "PENDING" }
182                        ));
183                    }
184                    content.push('\n');
185                }
186
187                // Add links and URLs
188                content.push_str("#### References\n\n");
189                content.push_str(&format!(
190                    "- [Official docs](https://docs.example.com/section{i}/subsection{j})\n"
191                ));
192                content.push_str(&format!("- [API reference](https://api.example.com/v{j}/section{i})\n"));
193                content.push_str(&format!("- Bare URL: https://example{i}.com/path/{j}\n"));
194                content.push_str(&format!("- Contact: section{i}@example{j}.com\n"));
195                content.push('\n');
196            }
197        }
198
199        content
200    }
201
202    /// Generate huge test content (>100KB)
203    pub fn huge_content() -> String {
204        let mut content = String::new();
205        content.push_str("# Huge Test Document\n\n");
206        content.push_str("This is an extremely large test document for stress testing.\n\n");
207
208        // Generate the large content multiple times
209        let base_content = Self::large_content();
210        for i in 1..=5 {
211            content.push_str(&format!("# Part {i} of Huge Document\n\n"));
212            content.push_str(&base_content);
213            content.push_str("\n\n");
214        }
215
216        content
217    }
218}
219
220/// Performance benchmark runner
221pub struct PerformanceBenchmark {
222    rules: Vec<Box<dyn Rule>>,
223    measure_memory: bool,
224}
225
226impl PerformanceBenchmark {
227    pub fn new(rules: Vec<Box<dyn Rule>>) -> Self {
228        Self {
229            rules,
230            measure_memory: false,
231        }
232    }
233
234    pub fn with_memory_measurement(mut self) -> Self {
235        self.measure_memory = true;
236        self
237    }
238
239    /// Get current memory usage in MB (platform-specific)
240    /// Returns None if memory measurement is not available on the platform
241    ///
242    /// Uses physical memory (RSS on Linux/macOS, Working Set on Windows) rather than
243    /// virtual memory for more accurate performance benchmarking, as it represents
244    /// actual RAM usage rather than address space allocation.
245    fn get_memory_usage_mb() -> Option<f64> {
246        const BYTES_IN_MB: f64 = 1048576.0;
247        get_mem_stats().map(|stats| stats.physical_mem as f64 / BYTES_IN_MB)
248    }
249
250    /// Benchmark a single rule with given content
251    pub fn benchmark_rule(&self, rule: &dyn Rule, content: &str) -> RulePerformanceResult {
252        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
253        let content_size = content.len();
254        let lines_count = content.lines().count();
255
256        // Warm up
257        let _ = rule.check(&ctx);
258
259        // Collect memory samples
260        let mut memory_samples = Vec::new();
261
262        // Take initial memory reading
263        if let Some(initial_mem) = Self::get_memory_usage_mb() {
264            memory_samples.push(initial_mem);
265        }
266
267        // Measure execution time
268        let start = Instant::now();
269        let warnings = rule.check(&ctx).unwrap_or_else(|_| vec![]);
270        let execution_time = start.elapsed();
271
272        // Take final memory reading
273        if let Some(final_mem) = Self::get_memory_usage_mb() {
274            memory_samples.push(final_mem);
275        }
276
277        // Calculate memory stats if we have samples
278        let memory_stats = if !memory_samples.is_empty() {
279            let peak = memory_samples.iter().cloned().fold(f64::MIN, f64::max);
280            let average = memory_samples.iter().sum::<f64>() / memory_samples.len() as f64;
281            Some(MemoryStats {
282                peak_memory_mb: peak,
283                average_memory_mb: average,
284                memory_samples,
285            })
286        } else {
287            None
288        };
289
290        RulePerformanceResult {
291            rule_name: rule.name().to_string(),
292            execution_time,
293            warnings_count: warnings.len(),
294            memory_stats,
295            content_size_bytes: content_size,
296            lines_processed: lines_count,
297        }
298    }
299
300    /// Benchmark all rules with given content
301    pub fn benchmark_all_rules(&self, content: &str) -> AggregatePerformanceResult {
302        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard, None);
303        let content_size = content.len();
304        let lines_count = content.lines().count();
305        let mut rule_results = Vec::new();
306        let mut total_warnings = 0;
307
308        // Warm up all rules
309        for rule in &self.rules {
310            let _ = rule.check(&ctx);
311        }
312
313        // Measure total execution time
314        let total_start = Instant::now();
315
316        // Benchmark each rule individually
317        for rule in &self.rules {
318            let result = self.benchmark_rule(rule.as_ref(), content);
319            total_warnings += result.warnings_count;
320            rule_results.push(result);
321        }
322
323        let total_execution_time = total_start.elapsed();
324
325        // Calculate performance metrics
326        let rules_per_second = self.rules.len() as f64 / total_execution_time.as_secs_f64();
327        let lines_per_second = lines_count as f64 / total_execution_time.as_secs_f64();
328        let bytes_per_second = content_size as f64 / total_execution_time.as_secs_f64();
329
330        AggregatePerformanceResult {
331            total_execution_time,
332            rule_results,
333            total_warnings,
334            content_size_bytes: content_size,
335            lines_processed: lines_count,
336            rules_per_second,
337            lines_per_second,
338            bytes_per_second,
339        }
340    }
341
342    /// Run comprehensive performance tests with different content sizes
343    pub fn run_comprehensive_benchmark(&self) -> HashMap<String, AggregatePerformanceResult> {
344        let mut results = HashMap::new();
345
346        println!("Running comprehensive performance benchmark...");
347
348        // Test with different content sizes
349        let test_cases = vec![
350            ("small", ContentGenerator::small_content()),
351            ("medium", ContentGenerator::medium_content()),
352            ("large", ContentGenerator::large_content()),
353            ("huge", ContentGenerator::huge_content()),
354        ];
355
356        for (size_name, content) in test_cases {
357            println!(
358                "Benchmarking {} content ({} bytes, {} lines)...",
359                size_name,
360                content.len(),
361                content.lines().count()
362            );
363
364            let result = self.benchmark_all_rules(&content);
365            results.insert(size_name.to_string(), result);
366        }
367
368        results
369    }
370
371    /// Print detailed performance report
372    pub fn print_performance_report(&self, results: &HashMap<String, AggregatePerformanceResult>) {
373        println!("\n=== RUMDL PERFORMANCE BENCHMARK REPORT ===\n");
374
375        for (size_name, result) in results {
376            println!("📊 {} Content Performance:", size_name.to_uppercase());
377            println!(
378                "   Content size: {} bytes ({} lines)",
379                result.content_size_bytes, result.lines_processed
380            );
381            println!(
382                "   Total execution time: {:.3}ms",
383                result.total_execution_time.as_secs_f64() * 1000.0
384            );
385            println!("   Total warnings found: {}", result.total_warnings);
386            println!("   Performance metrics:");
387            println!("     - Rules per second: {:.1}", result.rules_per_second);
388            println!("     - Lines per second: {:.0}", result.lines_per_second);
389            println!("     - Bytes per second: {:.0}", result.bytes_per_second);
390            println!();
391
392            // Show top 10 slowest rules
393            let mut sorted_rules = result.rule_results.clone();
394            sorted_rules.sort_by(|a, b| b.execution_time.cmp(&a.execution_time));
395
396            println!("   Top 10 slowest rules:");
397            for (i, rule_result) in sorted_rules.iter().take(10).enumerate() {
398                let percentage =
399                    (rule_result.execution_time.as_secs_f64() / result.total_execution_time.as_secs_f64()) * 100.0;
400                println!(
401                    "     {}. {} - {:.3}ms ({:.1}%) - {} warnings",
402                    i + 1,
403                    rule_result.rule_name,
404                    rule_result.execution_time.as_secs_f64() * 1000.0,
405                    percentage,
406                    rule_result.warnings_count
407                );
408            }
409            println!();
410        }
411
412        // Summary comparison
413        println!("📈 Performance Scaling Summary:");
414        if let (Some(small), Some(large)) = (results.get("small"), results.get("large")) {
415            let size_ratio = large.content_size_bytes as f64 / small.content_size_bytes as f64;
416            let time_ratio = large.total_execution_time.as_secs_f64() / small.total_execution_time.as_secs_f64();
417            println!("   Content size ratio (large/small): {size_ratio:.1}x");
418            println!("   Execution time ratio (large/small): {time_ratio:.1}x");
419            println!(
420                "   Scaling efficiency: {:.1}% (lower is better)",
421                (time_ratio / size_ratio) * 100.0
422            );
423        }
424        println!();
425    }
426}
427
428#[cfg(test)]
429mod tests {
430    use super::*;
431
432    #[test]
433    fn test_content_generators() {
434        let small = ContentGenerator::small_content();
435        let medium = ContentGenerator::medium_content();
436        let large = ContentGenerator::large_content();
437
438        // Check actual sizes instead of hardcoded values
439        assert!(
440            small.len() < 1024,
441            "Small content should be < 1KB, got {} bytes",
442            small.len()
443        );
444        assert!(
445            medium.len() >= 1024,
446            "Medium content should be >= 1KB, got {} bytes",
447            medium.len()
448        );
449        assert!(
450            large.len() >= medium.len(),
451            "Large content should be >= medium content, got {} vs {} bytes",
452            large.len(),
453            medium.len()
454        );
455
456        // Verify content has various markdown elements
457        assert!(small.contains("# "), "Should contain headings");
458        assert!(small.contains("- "), "Should contain lists");
459        assert!(small.contains("```"), "Should contain code blocks");
460        assert!(small.contains("http"), "Should contain URLs");
461    }
462
463    #[test]
464    fn test_memory_measurement() {
465        // Test that memory measurement works on all platforms
466        let memory = PerformanceBenchmark::get_memory_usage_mb();
467
468        // Memory measurement should work on Linux, macOS, and Windows via memory-stats crate
469        assert!(
470            memory.is_some(),
471            "Memory measurement should work on all supported platforms"
472        );
473        if let Some(mb) = memory {
474            assert!(mb > 0.0, "Memory usage should be positive");
475        }
476    }
477
478    #[test]
479    fn test_benchmark_rule_with_memory() {
480        use crate::rules;
481
482        // Create a simple test rule
483        let config = crate::config::Config::default();
484        let rules = rules::all_rules(&config);
485        let monitor = PerformanceBenchmark::new(rules.clone()).with_memory_measurement();
486        if let Some(rule) = rules.first() {
487            let content = "# Test\n\nSome content";
488            let result = monitor.benchmark_rule(rule.as_ref(), content);
489
490            // Check basic properties
491            assert!(!result.rule_name.is_empty());
492            assert!(result.execution_time.as_nanos() > 0);
493            assert_eq!(result.content_size_bytes, content.len());
494            assert_eq!(result.lines_processed, 3);
495
496            // Memory stats should be available on all platforms (Linux, macOS, Windows)
497            assert!(
498                result.memory_stats.is_some(),
499                "Should have memory stats on all platforms"
500            );
501            if let Some(stats) = result.memory_stats {
502                assert!(stats.peak_memory_mb > 0.0, "Peak memory should be positive");
503                assert!(stats.average_memory_mb > 0.0, "Average memory should be positive");
504                assert!(!stats.memory_samples.is_empty(), "Should have memory samples");
505            }
506        }
507    }
508
509    #[test]
510    fn test_performance_benchmark_creation() {
511        let rules: Vec<Box<dyn Rule>> = vec![];
512        let benchmark = PerformanceBenchmark::new(rules);
513        assert!(!benchmark.measure_memory);
514
515        let benchmark = benchmark.with_memory_measurement();
516        assert!(benchmark.measure_memory);
517    }
518}