ddex_builder/
caching.rs

1//! Caching optimizations for DDEX Builder
2//!
3//! This module provides multi-level caching for schemas, validation results,
4//! hash computations, and compiled templates to eliminate redundant work.
5
6use crate::error::BuildError;
7use crate::optimized_strings::OptimizedString;
8use blake3::Hasher as Blake3Hasher;
9use indexmap::IndexMap;
10use once_cell::sync::Lazy;
11use std::hash::{Hash, Hasher};
12use std::sync::{Arc, RwLock};
13use std::time::{Duration, Instant};
14
15/// Global cache instance for schema and validation data
16static GLOBAL_CACHE: Lazy<Arc<RwLock<GlobalCache>>> =
17    Lazy::new(|| Arc::new(RwLock::new(GlobalCache::new())));
18
19/// Multi-level cache for DDEX Builder operations
20#[derive(Debug)]
21pub struct GlobalCache {
22    /// Schema cache
23    schemas: SchemaCache,
24    /// Validation results cache
25    validation_cache: ValidationCache,
26    /// Hash computation cache
27    hash_cache: HashCache,
28    /// Template cache for common patterns
29    template_cache: TemplateCache,
30    /// Statistics
31    stats: CacheStats,
32}
33
34impl GlobalCache {
35    /// Create a new global cache
36    pub fn new() -> Self {
37        Self {
38            schemas: SchemaCache::new(),
39            validation_cache: ValidationCache::new(),
40            hash_cache: HashCache::new(),
41            template_cache: TemplateCache::new(),
42            stats: CacheStats::default(),
43        }
44    }
45
46    /// Clear all caches
47    pub fn clear_all(&mut self) {
48        self.schemas.clear();
49        self.validation_cache.clear();
50        self.hash_cache.clear();
51        self.template_cache.clear();
52        self.stats = CacheStats::default();
53    }
54
55    /// Get cache statistics
56    pub fn stats(&self) -> &CacheStats {
57        &self.stats
58    }
59
60    /// Prune expired entries from all caches
61    pub fn prune_expired(&mut self) {
62        self.validation_cache.prune_expired();
63        self.hash_cache.prune_expired();
64        self.template_cache.prune_expired();
65    }
66}
67
68/// Cache for compiled schemas
69#[derive(Debug)]
70pub struct SchemaCache {
71    /// Compiled schemas by version and profile
72    schemas: IndexMap<SchemaKey, CachedSchema>,
73    /// Schema metadata
74    metadata: IndexMap<SchemaKey, SchemaMetadata>,
75}
76
77impl SchemaCache {
78    /// Create new schema cache
79    pub fn new() -> Self {
80        Self {
81            schemas: IndexMap::new(),
82            metadata: IndexMap::new(),
83        }
84    }
85
86    /// Get or compile a schema
87    pub fn get_or_compile(
88        &mut self,
89        version: &str,
90        profile: Option<&str>,
91        compiler: impl FnOnce() -> Result<CompiledSchema, BuildError>,
92    ) -> Result<&CompiledSchema, BuildError> {
93        let key = SchemaKey {
94            version: version.to_string(),
95            profile: profile.map(|p| p.to_string()),
96        };
97
98        if !self.schemas.contains_key(&key) {
99            let start_time = Instant::now();
100            let schema = compiler()?;
101            let compile_time = start_time.elapsed();
102
103            self.metadata.insert(
104                key.clone(),
105                SchemaMetadata {
106                    compile_time,
107                    last_used: Instant::now(),
108                    use_count: 0,
109                },
110            );
111
112            self.schemas.insert(
113                key.clone(),
114                CachedSchema {
115                    schema,
116                    created_at: Instant::now(),
117                },
118            );
119        }
120
121        // Update usage statistics
122        if let Some(metadata) = self.metadata.get_mut(&key) {
123            metadata.last_used = Instant::now();
124            metadata.use_count += 1;
125        }
126
127        Ok(&self.schemas.get(&key).unwrap().schema)
128    }
129
130    /// Check if schema is cached
131    pub fn contains(&self, version: &str, profile: Option<&str>) -> bool {
132        let key = SchemaKey {
133            version: version.to_string(),
134            profile: profile.map(|p| p.to_string()),
135        };
136        self.schemas.contains_key(&key)
137    }
138
139    /// Clear all schemas
140    pub fn clear(&mut self) {
141        self.schemas.clear();
142        self.metadata.clear();
143    }
144
145    /// Get memory usage
146    pub fn memory_usage(&self) -> usize {
147        self.schemas
148            .values()
149            .map(|cached| cached.schema.memory_footprint())
150            .sum()
151    }
152}
153
154/// Cache key for schemas
155#[derive(Debug, Clone, PartialEq, Eq, Hash)]
156struct SchemaKey {
157    version: String,
158    profile: Option<String>,
159}
160
161/// Cached schema with metadata
162#[derive(Debug)]
163#[allow(dead_code)]
164struct CachedSchema {
165    schema: CompiledSchema,
166    created_at: Instant,
167}
168
169/// Schema metadata for statistics
170#[derive(Debug)]
171#[allow(dead_code)]
172struct SchemaMetadata {
173    compile_time: Duration,
174    last_used: Instant,
175    use_count: usize,
176}
177
178/// Compiled schema representation
179#[derive(Debug, Clone)]
180pub struct CompiledSchema {
181    /// Version identifier
182    pub version: String,
183    /// Profile identifier
184    pub profile: Option<String>,
185    /// Validation rules
186    pub rules: Vec<ValidationRule>,
187    /// Required elements
188    pub required_elements: Vec<String>,
189    /// Element constraints
190    pub element_constraints: IndexMap<String, ElementConstraint>,
191}
192
193impl CompiledSchema {
194    /// Calculate memory footprint
195    pub fn memory_footprint(&self) -> usize {
196        std::mem::size_of::<Self>()
197            + self.version.len()
198            + self.profile.as_ref().map_or(0, |p| p.len())
199            + self.rules.len() * std::mem::size_of::<ValidationRule>()
200            + self
201                .required_elements
202                .iter()
203                .map(|e| e.len())
204                .sum::<usize>()
205            + self
206                .element_constraints
207                .keys()
208                .map(|k| k.len())
209                .sum::<usize>()
210    }
211}
212
213/// Validation rule for cached schemas
214#[derive(Debug, Clone)]
215pub struct ValidationRule {
216    /// Path to element being validated
217    pub element_path: String,
218    /// Type of validation rule
219    pub rule_type: RuleType,
220    /// Rule parameters
221    pub parameters: Vec<String>,
222}
223
224/// Type of validation rule
225#[derive(Debug, Clone)]
226pub enum RuleType {
227    /// Field is required
228    Required,
229    /// Must match pattern
230    Pattern(String),
231    /// Numeric range
232    Range(f64, f64),
233    /// String length range
234    Length(usize, usize),
235    /// Custom validation
236    Custom(String),
237}
238
239/// Schema constraints
240#[derive(Debug, Clone)]
241pub struct ElementConstraint {
242    /// Minimum occurrences
243    pub min_occurs: usize,
244    /// Maximum occurrences (None = unbounded)
245    pub max_occurs: Option<usize>,
246    /// Data type name
247    pub data_type: String,
248}
249
250/// Cache for validation results
251#[derive(Debug)]
252pub struct ValidationCache {
253    /// Validation results by content hash
254    results: IndexMap<String, CachedValidationResult>,
255    /// Cache configuration
256    config: ValidationCacheConfig,
257}
258
259impl ValidationCache {
260    /// Create new validation cache
261    pub fn new() -> Self {
262        Self {
263            results: IndexMap::new(),
264            config: ValidationCacheConfig::default(),
265        }
266    }
267
268    /// Get cached validation result
269    pub fn get(&mut self, content_hash: &str) -> Option<ValidationResult> {
270        if let Some(cached) = self.results.get_mut(content_hash) {
271            // Check if expired
272            if cached.created_at.elapsed() > self.config.ttl {
273                self.results.shift_remove(content_hash);
274                return None;
275            }
276
277            cached.last_accessed = Instant::now();
278            cached.access_count += 1;
279            Some(cached.result.clone())
280        } else {
281            None
282        }
283    }
284
285    /// Cache validation result
286    pub fn insert(&mut self, content_hash: String, result: ValidationResult) {
287        // Evict old entries if cache is full
288        if self.results.len() >= self.config.max_entries {
289            self.evict_lru();
290        }
291
292        self.results.insert(
293            content_hash,
294            CachedValidationResult {
295                result,
296                created_at: Instant::now(),
297                last_accessed: Instant::now(),
298                access_count: 0,
299            },
300        );
301    }
302
303    /// Evict least recently used entry
304    fn evict_lru(&mut self) {
305        if let Some((key, _)) = self
306            .results
307            .iter()
308            .min_by_key(|(_, cached)| cached.last_accessed)
309            .map(|(k, v)| (k.clone(), v.last_accessed))
310        {
311            self.results.shift_remove(&key);
312        }
313    }
314
315    /// Prune expired entries
316    pub fn prune_expired(&mut self) {
317        let ttl = self.config.ttl;
318        self.results
319            .retain(|_, cached| cached.created_at.elapsed() <= ttl);
320    }
321
322    /// Clear all validation results
323    pub fn clear(&mut self) {
324        self.results.clear();
325    }
326
327    /// Get cache hit rate
328    pub fn hit_rate(&self) -> f64 {
329        if self.results.is_empty() {
330            0.0
331        } else {
332            let total_accesses: usize = self
333                .results
334                .values()
335                .map(|cached| cached.access_count)
336                .sum();
337            if total_accesses == 0 {
338                0.0
339            } else {
340                self.results.len() as f64 / total_accesses as f64
341            }
342        }
343    }
344}
345
346/// Cached validation result
347#[derive(Debug)]
348struct CachedValidationResult {
349    result: ValidationResult,
350    created_at: Instant,
351    last_accessed: Instant,
352    access_count: usize,
353}
354
355/// Validation cache configuration
356#[derive(Debug)]
357struct ValidationCacheConfig {
358    max_entries: usize,
359    ttl: Duration,
360}
361
362impl Default for ValidationCacheConfig {
363    fn default() -> Self {
364        Self {
365            max_entries: 1000,
366            ttl: Duration::from_secs(300), // 5 minutes
367        }
368    }
369}
370
371/// Validation result
372#[derive(Debug, Clone)]
373pub struct ValidationResult {
374    /// Whether validation passed
375    pub is_valid: bool,
376    /// List of errors
377    pub errors: Vec<String>,
378    /// List of warnings
379    pub warnings: Vec<String>,
380    /// Time taken to validate
381    pub validation_time: Duration,
382}
383
384/// Cache for hash computations
385#[derive(Debug)]
386pub struct HashCache {
387    /// Computed hashes
388    hashes: IndexMap<HashKey, CachedHash>,
389    /// Configuration
390    config: HashCacheConfig,
391}
392
393impl HashCache {
394    /// Create new hash cache
395    pub fn new() -> Self {
396        Self {
397            hashes: IndexMap::new(),
398            config: HashCacheConfig::default(),
399        }
400    }
401
402    /// Get or compute hash
403    pub fn get_or_compute<T: Hash>(
404        &mut self,
405        key: &HashKey,
406        value: &T,
407        hasher_fn: impl FnOnce(&T) -> String,
408    ) -> String {
409        if let Some(cached) = self.hashes.get_mut(key) {
410            if cached.created_at.elapsed() <= self.config.ttl {
411                cached.access_count += 1;
412                return cached.hash.clone();
413            } else {
414                // Expired, remove and recompute
415                self.hashes.shift_remove(key);
416            }
417        }
418
419        // Compute new hash
420        let hash = hasher_fn(value);
421
422        // Evict if necessary
423        if self.hashes.len() >= self.config.max_entries {
424            self.evict_random();
425        }
426
427        self.hashes.insert(
428            key.clone(),
429            CachedHash {
430                hash: hash.clone(),
431                created_at: Instant::now(),
432                access_count: 1,
433            },
434        );
435
436        hash
437    }
438
439    /// Evict a random entry (simple eviction strategy)
440    fn evict_random(&mut self) {
441        if let Some(key) = self.hashes.keys().next().cloned() {
442            self.hashes.shift_remove(&key);
443        }
444    }
445
446    /// Prune expired entries
447    pub fn prune_expired(&mut self) {
448        let ttl = self.config.ttl;
449        self.hashes
450            .retain(|_, cached| cached.created_at.elapsed() <= ttl);
451    }
452
453    /// Clear all hashes
454    pub fn clear(&mut self) {
455        self.hashes.clear();
456    }
457}
458
459/// Hash key for caching
460#[derive(Debug, Clone, PartialEq, Eq, Hash)]
461pub struct HashKey {
462    /// Hashing algorithm used (e.g., "SHA256", "BLAKE3")
463    pub algorithm: String,
464    /// Type of content being hashed
465    pub content_type: String,
466    /// Resulting content identifier hash
467    pub content_id: String,
468}
469
470/// Cached hash result
471#[derive(Debug)]
472struct CachedHash {
473    hash: String,
474    created_at: Instant,
475    access_count: usize,
476}
477
478/// Hash cache configuration
479#[derive(Debug)]
480struct HashCacheConfig {
481    max_entries: usize,
482    ttl: Duration,
483}
484
485impl Default for HashCacheConfig {
486    fn default() -> Self {
487        Self {
488            max_entries: 500,
489            ttl: Duration::from_secs(600), // 10 minutes
490        }
491    }
492}
493
494/// Cache for XML templates and patterns
495#[derive(Debug)]
496pub struct TemplateCache {
497    /// Compiled templates
498    templates: IndexMap<TemplateKey, CachedTemplate>,
499    /// Configuration
500    config: TemplateCacheConfig,
501}
502
503impl TemplateCache {
504    /// Create new template cache
505    pub fn new() -> Self {
506        Self {
507            templates: IndexMap::new(),
508            config: TemplateCacheConfig::default(),
509        }
510    }
511
512    /// Get or compile template
513    pub fn get_or_compile(
514        &mut self,
515        key: &TemplateKey,
516        compiler: impl FnOnce() -> CompiledTemplate,
517    ) -> &CompiledTemplate {
518        if !self.templates.contains_key(key) {
519            if self.templates.len() >= self.config.max_entries {
520                self.evict_lru();
521            }
522
523            let template = compiler();
524            self.templates.insert(
525                key.clone(),
526                CachedTemplate {
527                    template,
528                    created_at: Instant::now(),
529                    last_used: Instant::now(),
530                    use_count: 0,
531                },
532            );
533        }
534
535        // Update usage
536        if let Some(cached) = self.templates.get_mut(key) {
537            cached.last_used = Instant::now();
538            cached.use_count += 1;
539        }
540
541        &self.templates.get(key).unwrap().template
542    }
543
544    /// Evict least recently used template
545    fn evict_lru(&mut self) {
546        if let Some((key, _)) = self
547            .templates
548            .iter()
549            .min_by_key(|(_, cached)| cached.last_used)
550            .map(|(k, v)| (k.clone(), v.last_used))
551        {
552            self.templates.shift_remove(&key);
553        }
554    }
555
556    /// Prune expired templates
557    pub fn prune_expired(&mut self) {
558        let ttl = self.config.ttl;
559        self.templates
560            .retain(|_, cached| cached.created_at.elapsed() <= ttl);
561    }
562
563    /// Clear all templates
564    pub fn clear(&mut self) {
565        self.templates.clear();
566    }
567}
568
569/// Template key
570#[derive(Debug, Clone, PartialEq, Eq, Hash)]
571pub struct TemplateKey {
572    /// Element type name
573    pub element_type: String,
574    /// DDEX version string
575    pub version: String,
576    /// Optional variant identifier
577    pub variant: Option<String>,
578}
579
580/// Cached template
581#[derive(Debug)]
582struct CachedTemplate {
583    template: CompiledTemplate,
584    created_at: Instant,
585    last_used: Instant,
586    use_count: usize,
587}
588
589/// Compiled template for fast XML generation
590#[derive(Debug, Clone)]
591pub struct CompiledTemplate {
592    /// Template parts (static strings and placeholders)
593    pub parts: Vec<TemplatePart>,
594    /// Required fields
595    pub required_fields: Vec<String>,
596    /// Estimated output size
597    pub estimated_size: usize,
598}
599
600/// Template part (static or dynamic)
601#[derive(Debug, Clone)]
602pub enum TemplatePart {
603    /// Static string value
604    Static(OptimizedString),
605    /// Placeholder for dynamic field name
606    Placeholder(String),
607}
608
609/// Template cache configuration
610#[derive(Debug)]
611struct TemplateCacheConfig {
612    max_entries: usize,
613    ttl: Duration,
614}
615
616impl Default for TemplateCacheConfig {
617    fn default() -> Self {
618        Self {
619            max_entries: 100,
620            ttl: Duration::from_secs(1800), // 30 minutes
621        }
622    }
623}
624
625/// Cache statistics
626#[derive(Debug, Default, Clone)]
627pub struct CacheStats {
628    /// Schema cache hits
629    pub schema_hits: usize,
630    /// Schema cache misses
631    pub schema_misses: usize,
632    /// Validation cache hits
633    pub validation_hits: usize,
634    /// Validation cache misses
635    pub validation_misses: usize,
636    /// Hash cache hits
637    pub hash_hits: usize,
638    /// Hash cache misses
639    pub hash_misses: usize,
640    /// Template cache hits
641    pub template_hits: usize,
642    /// Template cache misses
643    pub template_misses: usize,
644}
645
646impl CacheStats {
647    /// Calculate overall hit rate
648    pub fn overall_hit_rate(&self) -> f64 {
649        let total_hits =
650            self.schema_hits + self.validation_hits + self.hash_hits + self.template_hits;
651        let total_requests = total_hits
652            + self.schema_misses
653            + self.validation_misses
654            + self.hash_misses
655            + self.template_misses;
656
657        if total_requests == 0 {
658            0.0
659        } else {
660            total_hits as f64 / total_requests as f64
661        }
662    }
663
664    /// Get cache efficiency summary
665    pub fn summary(&self) -> String {
666        format!(
667            "Cache Hit Rate: {:.1}% (Schema: {:.1}%, Validation: {:.1}%, Hash: {:.1}%, Template: {:.1}%)",
668            self.overall_hit_rate() * 100.0,
669            self.schema_hit_rate() * 100.0,
670            self.validation_hit_rate() * 100.0,
671            self.hash_hit_rate() * 100.0,
672            self.template_hit_rate() * 100.0,
673        )
674    }
675
676    fn schema_hit_rate(&self) -> f64 {
677        let total = self.schema_hits + self.schema_misses;
678        if total == 0 {
679            0.0
680        } else {
681            self.schema_hits as f64 / total as f64
682        }
683    }
684
685    fn validation_hit_rate(&self) -> f64 {
686        let total = self.validation_hits + self.validation_misses;
687        if total == 0 {
688            0.0
689        } else {
690            self.validation_hits as f64 / total as f64
691        }
692    }
693
694    fn hash_hit_rate(&self) -> f64 {
695        let total = self.hash_hits + self.hash_misses;
696        if total == 0 {
697            0.0
698        } else {
699            self.hash_hits as f64 / total as f64
700        }
701    }
702
703    fn template_hit_rate(&self) -> f64 {
704        let total = self.template_hits + self.template_misses;
705        if total == 0 {
706            0.0
707        } else {
708            self.template_hits as f64 / total as f64
709        }
710    }
711}
712
713/// Public API for cache operations
714pub struct CacheManager;
715
716impl CacheManager {
717    /// Get global cache statistics
718    pub fn stats() -> CacheStats {
719        GLOBAL_CACHE.read().unwrap().stats().clone()
720    }
721
722    /// Clear all global caches
723    pub fn clear_all() {
724        GLOBAL_CACHE.write().unwrap().clear_all();
725    }
726
727    /// Prune expired entries
728    pub fn prune_expired() {
729        GLOBAL_CACHE.write().unwrap().prune_expired();
730    }
731
732    /// Get schema from cache or compile
733    pub fn get_schema(
734        version: &str,
735        profile: Option<&str>,
736        compiler: impl FnOnce() -> Result<CompiledSchema, BuildError>,
737    ) -> Result<CompiledSchema, BuildError> {
738        let mut cache = GLOBAL_CACHE.write().unwrap();
739        let schema = cache.schemas.get_or_compile(version, profile, compiler)?;
740        Ok(schema.clone())
741    }
742
743    /// Fast hash computation with caching
744    pub fn fast_hash<T: Hash + std::fmt::Debug>(
745        algorithm: &str,
746        content_type: &str,
747        content_id: &str,
748        value: &T,
749    ) -> String {
750        let key = HashKey {
751            algorithm: algorithm.to_string(),
752            content_type: content_type.to_string(),
753            content_id: content_id.to_string(),
754        };
755
756        let mut cache = GLOBAL_CACHE.write().unwrap();
757        cache.hash_cache.get_or_compute(&key, value, |v| {
758            match algorithm {
759                "blake3" => {
760                    let mut hasher = Blake3Hasher::new();
761                    let bytes = format!("{:?}", v); // Simple serialization for hashing
762                    hasher.update(bytes.as_bytes());
763                    hasher.finalize().to_hex().to_string()
764                }
765                _ => {
766                    // Fallback to default hasher
767                    let mut hasher = std::hash::DefaultHasher::new();
768                    v.hash(&mut hasher);
769                    format!("{:016x}", hasher.finish())
770                }
771            }
772        })
773    }
774}
775
776#[cfg(test)]
777mod tests {
778    use super::*;
779
780    #[test]
781    fn test_schema_cache() {
782        let mut cache = SchemaCache::new();
783
784        // Test cache miss and compilation
785        let schema = cache
786            .get_or_compile("4.3", None, || {
787                Ok(CompiledSchema {
788                    version: "4.3".to_string(),
789                    profile: None,
790                    rules: vec![],
791                    required_elements: vec!["MessageHeader".to_string()],
792                    element_constraints: IndexMap::new(),
793                })
794            })
795            .unwrap();
796
797        assert_eq!(schema.version, "4.3");
798        assert!(cache.contains("4.3", None));
799
800        // Test cache hit
801        let schema2 = cache
802            .get_or_compile("4.3", None, || panic!("Should not compile again"))
803            .unwrap();
804
805        assert_eq!(schema2.version, "4.3");
806    }
807
808    #[test]
809    fn test_validation_cache() {
810        let mut cache = ValidationCache::new();
811        let hash = "test_hash".to_string();
812
813        // Cache miss
814        assert!(cache.get(&hash).is_none());
815
816        // Insert result
817        let result = ValidationResult {
818            is_valid: true,
819            errors: vec![],
820            warnings: vec![],
821            validation_time: Duration::from_millis(10),
822        };
823        cache.insert(hash.clone(), result);
824
825        // Cache hit
826        let cached = cache.get(&hash).unwrap();
827        assert!(cached.is_valid);
828    }
829
830    #[test]
831    fn test_hash_cache() {
832        let mut cache = HashCache::new();
833        let key = HashKey {
834            algorithm: "blake3".to_string(),
835            content_type: "track".to_string(),
836            content_id: "T001".to_string(),
837        };
838
839        let test_value = "test content";
840
841        // First computation
842        let hash1 = cache.get_or_compute(&key, &test_value, |v| format!("hash_{}", v));
843
844        // Second computation (should be cached)
845        let hash2 = cache.get_or_compute(&key, &test_value, |_| panic!("Should not compute again"));
846
847        assert_eq!(hash1, hash2);
848        assert_eq!(hash1, "hash_test content");
849    }
850
851    #[test]
852    fn test_cache_manager() {
853        CacheManager::clear_all();
854        let stats = CacheManager::stats();
855        assert_eq!(stats.overall_hit_rate(), 0.0);
856
857        // Test fast hash
858        let hash1 = CacheManager::fast_hash("blake3", "test", "item1", &"content");
859        let hash2 = CacheManager::fast_hash("blake3", "test", "item1", &"content");
860        assert_eq!(hash1, hash2); // Should be cached
861    }
862
863    #[test]
864    fn test_template_cache() {
865        let mut cache = TemplateCache::new();
866        let key = TemplateKey {
867            element_type: "SoundRecording".to_string(),
868            version: "4.3".to_string(),
869            variant: None,
870        };
871
872        // First access - compile template
873        let template = cache.get_or_compile(&key, || CompiledTemplate {
874            parts: vec![
875                TemplatePart::Static(OptimizedString::new("<SoundRecording>")),
876                TemplatePart::Placeholder("title".to_string()),
877                TemplatePart::Static(OptimizedString::new("</SoundRecording>")),
878            ],
879            required_fields: vec!["title".to_string()],
880            estimated_size: 100,
881        });
882
883        assert_eq!(template.required_fields.len(), 1);
884
885        // Second access - should use cached
886        let template2 = cache.get_or_compile(&key, || panic!("Should not compile again"));
887
888        assert_eq!(template2.required_fields.len(), 1);
889    }
890}