rumdl_lib/
performance.rs

1use crate::lint_context::LintContext;
2use crate::rule::Rule;
3use std::collections::HashMap;
4/// Performance benchmarking framework for rumdl
5///
6/// This module provides comprehensive performance testing capabilities to measure
7/// rule execution times, memory usage, and overall linting performance.
8use std::time::{Duration, Instant};
9
10/// Memory usage statistics
11#[derive(Debug, Clone)]
12pub struct MemoryStats {
13    pub peak_memory_mb: f64,
14    pub average_memory_mb: f64,
15    pub memory_samples: Vec<f64>,
16}
17
18/// Performance results for a single rule
19#[derive(Debug, Clone)]
20pub struct RulePerformanceResult {
21    pub rule_name: String,
22    pub execution_time: Duration,
23    pub warnings_count: usize,
24    pub memory_stats: Option<MemoryStats>,
25    pub content_size_bytes: usize,
26    pub lines_processed: usize,
27}
28
29/// Aggregate performance results for all rules
30#[derive(Debug, Clone)]
31pub struct AggregatePerformanceResult {
32    pub total_execution_time: Duration,
33    pub rule_results: Vec<RulePerformanceResult>,
34    pub total_warnings: usize,
35    pub content_size_bytes: usize,
36    pub lines_processed: usize,
37    pub rules_per_second: f64,
38    pub lines_per_second: f64,
39    pub bytes_per_second: f64,
40}
41
42/// Test content generator for different file sizes
43pub struct ContentGenerator;
44
45impl ContentGenerator {
46    /// Generate small test content (<1KB)
47    pub fn small_content() -> String {
48        let mut content = String::new();
49        content.push_str("# Small Test Document\n\n");
50        content.push_str("This is a small test document with various markdown elements.\n\n");
51        content.push_str("## Lists\n\n");
52        content.push_str("- Item 1\n");
53        content.push_str("- Item 2\n");
54        content.push_str("  - Nested item\n\n");
55        content.push_str("## Code\n\n");
56        content.push_str("```rust\nfn main() {\n    println!(\"Hello, world!\");\n}\n```\n\n");
57        content.push_str("## Links\n\n");
58        content.push_str("Visit [example.com](https://example.com) for more info.\n");
59        content.push_str("Bare URL: https://example.com/bare\n\n");
60        content.push_str("Contact: user@example.com\n");
61        content
62    }
63
64    /// Generate medium test content (1-10KB)
65    pub fn medium_content() -> String {
66        let mut content = String::new();
67        content.push_str("# Medium Test Document\n\n");
68
69        // Add multiple sections with various markdown elements
70        for i in 1..=20 {
71            content.push_str(&format!("## Section {i}\n\n"));
72            content.push_str(&format!("This is section {i} with some content.\n\n"));
73
74            // Add lists
75            content.push_str("### Lists\n\n");
76            for j in 1..=5 {
77                content.push_str(&format!("- List item {j} in section {i}\n"));
78                if j % 2 == 0 {
79                    content.push_str(&format!("  - Nested item {i}.{j}\n"));
80                }
81            }
82            content.push('\n');
83
84            // Add code blocks
85            if i % 3 == 0 {
86                content.push_str("### Code Example\n\n");
87                content.push_str("```javascript\n");
88                content.push_str(&format!("function section{i}() {{\n"));
89                content.push_str(&format!("    console.log('Section {i}');\n"));
90                content.push_str("    return true;\n");
91                content.push_str("}\n");
92                content.push_str("```\n\n");
93            }
94
95            // Add links and URLs
96            content.push_str("### Links\n\n");
97            content.push_str(&format!(
98                "Visit [section {i}](https://example.com/section{i}) for details.\n"
99            ));
100            content.push_str(&format!("Bare URL: https://example{i}.com/path\n"));
101            content.push_str(&format!("Email: section{i}@example.com\n\n"));
102
103            // Add emphasis and formatting
104            content.push_str("### Formatting\n\n");
105            content.push_str(&format!("This is **bold text** in section {i}.\n"));
106            content.push_str(&format!("This is *italic text* in section {i}.\n"));
107            content.push_str(&format!("This is `inline code` in section {i}.\n\n"));
108        }
109
110        content
111    }
112
113    /// Generate large test content (10-100KB)
114    pub fn large_content() -> String {
115        let mut content = String::new();
116        content.push_str("# Large Test Document\n\n");
117        content.push_str("This is a comprehensive test document with extensive markdown content.\n\n");
118
119        // Add table of contents
120        content.push_str("## Table of Contents\n\n");
121        for i in 1..=50 {
122            content.push_str(&format!("- [Section {i}](#section-{i})\n"));
123        }
124        content.push('\n');
125
126        // Add many sections with various content
127        for i in 1..=50 {
128            content.push_str(&format!("## Section {i}\n\n"));
129            content.push_str(&format!("This is section {i} with comprehensive content.\n\n"));
130
131            // Add subsections
132            for j in 1..=3 {
133                content.push_str(&format!("### Subsection {i}.{j}\n\n"));
134                content.push_str(&format!("Content for subsection {i}.{j} with multiple paragraphs.\n\n"));
135                content.push_str("Lorem ipsum dolor sit amet, consectetur adipiscing elit. ");
136                content.push_str("Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.\n\n");
137
138                // Add lists with multiple levels
139                content.push_str("#### Lists\n\n");
140                for k in 1..=8 {
141                    content.push_str(&format!("- Item {k} in subsection {i}.{j}\n"));
142                    if k % 2 == 0 {
143                        content.push_str(&format!("  - Nested item {i}.{j}.{k}\n"));
144                        if k % 4 == 0 {
145                            content.push_str(&format!("    - Deep nested item {i}.{j}.{k}\n"));
146                        }
147                    }
148                }
149                content.push('\n');
150
151                // Add code blocks
152                if (i + j) % 3 == 0 {
153                    content.push_str("#### Code Example\n\n");
154                    content.push_str("```rust\n");
155                    content.push_str(&format!("fn section_{i}_{j}_function() {{\n"));
156                    content.push_str(&format!("    let value = {};\n", i * j));
157                    content.push_str("    println!(\"Processing section {}.{}\", value);\n");
158                    content.push_str("    \n");
159                    content.push_str("    // Complex logic here\n");
160                    content.push_str("    for idx in 0..value {\n");
161                    content.push_str("        process_item(idx);\n");
162                    content.push_str("    }\n");
163                    content.push_str("}\n");
164                    content.push_str("```\n\n");
165                }
166
167                // Add tables
168                if (i + j) % 4 == 0 {
169                    content.push_str("#### Data Table\n\n");
170                    content.push_str("| Column 1 | Column 2 | Column 3 | Column 4 |\n");
171                    content.push_str("|----------|----------|----------|----------|\n");
172                    for row in 1..=5 {
173                        content.push_str(&format!(
174                            "| Data {}.{}.{} | Value {} | Result {} | Status {} |\n",
175                            i,
176                            j,
177                            row,
178                            row * 10,
179                            row * 100,
180                            if row % 2 == 0 { "OK" } else { "PENDING" }
181                        ));
182                    }
183                    content.push('\n');
184                }
185
186                // Add links and URLs
187                content.push_str("#### References\n\n");
188                content.push_str(&format!(
189                    "- [Official docs](https://docs.example.com/section{i}/subsection{j})\n"
190                ));
191                content.push_str(&format!("- [API reference](https://api.example.com/v{j}/section{i})\n"));
192                content.push_str(&format!("- Bare URL: https://example{i}.com/path/{j}\n"));
193                content.push_str(&format!("- Contact: section{i}@example{j}.com\n"));
194                content.push('\n');
195            }
196        }
197
198        content
199    }
200
201    /// Generate huge test content (>100KB)
202    pub fn huge_content() -> String {
203        let mut content = String::new();
204        content.push_str("# Huge Test Document\n\n");
205        content.push_str("This is an extremely large test document for stress testing.\n\n");
206
207        // Generate the large content multiple times
208        let base_content = Self::large_content();
209        for i in 1..=5 {
210            content.push_str(&format!("# Part {i} of Huge Document\n\n"));
211            content.push_str(&base_content);
212            content.push_str("\n\n");
213        }
214
215        content
216    }
217}
218
219/// Performance benchmark runner
220pub struct PerformanceBenchmark {
221    rules: Vec<Box<dyn Rule>>,
222    measure_memory: bool,
223}
224
225impl PerformanceBenchmark {
226    pub fn new(rules: Vec<Box<dyn Rule>>) -> Self {
227        Self {
228            rules,
229            measure_memory: false,
230        }
231    }
232
233    pub fn with_memory_measurement(mut self) -> Self {
234        self.measure_memory = true;
235        self
236    }
237
238    /// Get current memory usage in MB (platform-specific)
239    /// Returns None if memory measurement is not available on the platform
240    fn get_memory_usage_mb() -> Option<f64> {
241        #[cfg(target_os = "linux")]
242        {
243            // Try to read from /proc/self/status
244            if let Ok(status) = std::fs::read_to_string("/proc/self/status") {
245                for line in status.lines() {
246                    if line.starts_with("VmRSS:")
247                        && let Some(kb_str) = line.split_whitespace().nth(1)
248                        && let Ok(kb) = kb_str.parse::<f64>()
249                    {
250                        return Some(kb / 1024.0); // Convert KB to MB
251                    }
252                }
253            }
254        }
255
256        // For other platforms, return None for now
257        // This can be enhanced with platform-specific implementations in the future
258        #[cfg(not(target_os = "linux"))]
259        {
260            // Memory measurement not implemented for this platform yet
261            // Could add support for macOS (using libc), Windows (using winapi), etc.
262        }
263
264        None
265    }
266
267    /// Benchmark a single rule with given content
268    pub fn benchmark_rule(&self, rule: &dyn Rule, content: &str) -> RulePerformanceResult {
269        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
270        let content_size = content.len();
271        let lines_count = content.lines().count();
272
273        // Warm up
274        let _ = rule.check(&ctx);
275
276        // Collect memory samples
277        let mut memory_samples = Vec::new();
278
279        // Take initial memory reading
280        if let Some(initial_mem) = Self::get_memory_usage_mb() {
281            memory_samples.push(initial_mem);
282        }
283
284        // Measure execution time
285        let start = Instant::now();
286        let warnings = rule.check(&ctx).unwrap_or_else(|_| vec![]);
287        let execution_time = start.elapsed();
288
289        // Take final memory reading
290        if let Some(final_mem) = Self::get_memory_usage_mb() {
291            memory_samples.push(final_mem);
292        }
293
294        // Calculate memory stats if we have samples
295        let memory_stats = if !memory_samples.is_empty() {
296            let peak = memory_samples.iter().cloned().fold(f64::MIN, f64::max);
297            let average = memory_samples.iter().sum::<f64>() / memory_samples.len() as f64;
298            Some(MemoryStats {
299                peak_memory_mb: peak,
300                average_memory_mb: average,
301                memory_samples,
302            })
303        } else {
304            None
305        };
306
307        RulePerformanceResult {
308            rule_name: rule.name().to_string(),
309            execution_time,
310            warnings_count: warnings.len(),
311            memory_stats,
312            content_size_bytes: content_size,
313            lines_processed: lines_count,
314        }
315    }
316
317    /// Benchmark all rules with given content
318    pub fn benchmark_all_rules(&self, content: &str) -> AggregatePerformanceResult {
319        let ctx = LintContext::new(content, crate::config::MarkdownFlavor::Standard);
320        let content_size = content.len();
321        let lines_count = content.lines().count();
322        let mut rule_results = Vec::new();
323        let mut total_warnings = 0;
324
325        // Warm up all rules
326        for rule in &self.rules {
327            let _ = rule.check(&ctx);
328        }
329
330        // Measure total execution time
331        let total_start = Instant::now();
332
333        // Benchmark each rule individually
334        for rule in &self.rules {
335            let result = self.benchmark_rule(rule.as_ref(), content);
336            total_warnings += result.warnings_count;
337            rule_results.push(result);
338        }
339
340        let total_execution_time = total_start.elapsed();
341
342        // Calculate performance metrics
343        let rules_per_second = self.rules.len() as f64 / total_execution_time.as_secs_f64();
344        let lines_per_second = lines_count as f64 / total_execution_time.as_secs_f64();
345        let bytes_per_second = content_size as f64 / total_execution_time.as_secs_f64();
346
347        AggregatePerformanceResult {
348            total_execution_time,
349            rule_results,
350            total_warnings,
351            content_size_bytes: content_size,
352            lines_processed: lines_count,
353            rules_per_second,
354            lines_per_second,
355            bytes_per_second,
356        }
357    }
358
359    /// Run comprehensive performance tests with different content sizes
360    pub fn run_comprehensive_benchmark(&self) -> HashMap<String, AggregatePerformanceResult> {
361        let mut results = HashMap::new();
362
363        println!("Running comprehensive performance benchmark...");
364
365        // Test with different content sizes
366        let test_cases = vec![
367            ("small", ContentGenerator::small_content()),
368            ("medium", ContentGenerator::medium_content()),
369            ("large", ContentGenerator::large_content()),
370            ("huge", ContentGenerator::huge_content()),
371        ];
372
373        for (size_name, content) in test_cases {
374            println!(
375                "Benchmarking {} content ({} bytes, {} lines)...",
376                size_name,
377                content.len(),
378                content.lines().count()
379            );
380
381            let result = self.benchmark_all_rules(&content);
382            results.insert(size_name.to_string(), result);
383        }
384
385        results
386    }
387
388    /// Print detailed performance report
389    pub fn print_performance_report(&self, results: &HashMap<String, AggregatePerformanceResult>) {
390        println!("\n=== RUMDL PERFORMANCE BENCHMARK REPORT ===\n");
391
392        for (size_name, result) in results {
393            println!("📊 {} Content Performance:", size_name.to_uppercase());
394            println!(
395                "   Content size: {} bytes ({} lines)",
396                result.content_size_bytes, result.lines_processed
397            );
398            println!(
399                "   Total execution time: {:.3}ms",
400                result.total_execution_time.as_secs_f64() * 1000.0
401            );
402            println!("   Total warnings found: {}", result.total_warnings);
403            println!("   Performance metrics:");
404            println!("     - Rules per second: {:.1}", result.rules_per_second);
405            println!("     - Lines per second: {:.0}", result.lines_per_second);
406            println!("     - Bytes per second: {:.0}", result.bytes_per_second);
407            println!();
408
409            // Show top 10 slowest rules
410            let mut sorted_rules = result.rule_results.clone();
411            sorted_rules.sort_by(|a, b| b.execution_time.cmp(&a.execution_time));
412
413            println!("   Top 10 slowest rules:");
414            for (i, rule_result) in sorted_rules.iter().take(10).enumerate() {
415                let percentage =
416                    (rule_result.execution_time.as_secs_f64() / result.total_execution_time.as_secs_f64()) * 100.0;
417                println!(
418                    "     {}. {} - {:.3}ms ({:.1}%) - {} warnings",
419                    i + 1,
420                    rule_result.rule_name,
421                    rule_result.execution_time.as_secs_f64() * 1000.0,
422                    percentage,
423                    rule_result.warnings_count
424                );
425            }
426            println!();
427        }
428
429        // Summary comparison
430        println!("📈 Performance Scaling Summary:");
431        if let (Some(small), Some(large)) = (results.get("small"), results.get("large")) {
432            let size_ratio = large.content_size_bytes as f64 / small.content_size_bytes as f64;
433            let time_ratio = large.total_execution_time.as_secs_f64() / small.total_execution_time.as_secs_f64();
434            println!("   Content size ratio (large/small): {size_ratio:.1}x");
435            println!("   Execution time ratio (large/small): {time_ratio:.1}x");
436            println!(
437                "   Scaling efficiency: {:.1}% (lower is better)",
438                (time_ratio / size_ratio) * 100.0
439            );
440        }
441        println!();
442    }
443}
444
445#[cfg(test)]
446mod tests {
447    use super::*;
448
449    #[test]
450    fn test_content_generators() {
451        let small = ContentGenerator::small_content();
452        let medium = ContentGenerator::medium_content();
453        let large = ContentGenerator::large_content();
454
455        // Check actual sizes instead of hardcoded values
456        assert!(
457            small.len() < 1024,
458            "Small content should be < 1KB, got {} bytes",
459            small.len()
460        );
461        assert!(
462            medium.len() >= 1024,
463            "Medium content should be >= 1KB, got {} bytes",
464            medium.len()
465        );
466        assert!(
467            large.len() >= medium.len(),
468            "Large content should be >= medium content, got {} vs {} bytes",
469            large.len(),
470            medium.len()
471        );
472
473        // Verify content has various markdown elements
474        assert!(small.contains("# "), "Should contain headings");
475        assert!(small.contains("- "), "Should contain lists");
476        assert!(small.contains("```"), "Should contain code blocks");
477        assert!(small.contains("http"), "Should contain URLs");
478    }
479
480    #[test]
481    fn test_memory_measurement() {
482        // Test that memory measurement doesn't panic
483        let memory = PerformanceBenchmark::get_memory_usage_mb();
484
485        #[cfg(target_os = "linux")]
486        {
487            // On Linux, we should get a value if /proc/self/status exists
488            if std::path::Path::new("/proc/self/status").exists() {
489                assert!(memory.is_some(), "Memory measurement should work on Linux");
490                if let Some(mb) = memory {
491                    assert!(mb > 0.0, "Memory usage should be positive");
492                }
493            }
494        }
495
496        #[cfg(not(target_os = "linux"))]
497        {
498            // On other platforms, we currently return None
499            assert!(memory.is_none(), "Memory measurement not implemented for this platform");
500        }
501    }
502
503    #[test]
504    fn test_benchmark_rule_with_memory() {
505        use crate::rules;
506
507        // Create a simple test rule
508        let config = crate::config::Config::default();
509        let rules = rules::all_rules(&config);
510        let monitor = PerformanceBenchmark::new(rules.clone()).with_memory_measurement();
511        if let Some(rule) = rules.first() {
512            let content = "# Test\n\nSome content";
513            let result = monitor.benchmark_rule(rule.as_ref(), content);
514
515            // Check basic properties
516            assert!(!result.rule_name.is_empty());
517            assert!(result.execution_time.as_nanos() > 0);
518            assert_eq!(result.content_size_bytes, content.len());
519            assert_eq!(result.lines_processed, 3);
520
521            // Memory stats might be None on unsupported platforms
522            #[cfg(target_os = "linux")]
523            {
524                if std::path::Path::new("/proc/self/status").exists() {
525                    assert!(result.memory_stats.is_some(), "Should have memory stats on Linux");
526                }
527            }
528        }
529    }
530
531    #[test]
532    fn test_performance_benchmark_creation() {
533        let rules: Vec<Box<dyn Rule>> = vec![];
534        let benchmark = PerformanceBenchmark::new(rules);
535        assert!(!benchmark.measure_memory);
536
537        let benchmark = benchmark.with_memory_measurement();
538        assert!(benchmark.measure_memory);
539    }
540}