ddex_builder/
caching.rs

1//! Caching optimizations for DDEX Builder
2//! 
3//! This module provides multi-level caching for schemas, validation results,
4//! hash computations, and compiled templates to eliminate redundant work.
5
6use crate::error::BuildError;
7use crate::optimized_strings::OptimizedString;
8use indexmap::IndexMap;
9use std::hash::{Hash, Hasher};
10use std::sync::{Arc, RwLock};
11use std::time::{Duration, Instant};
12use once_cell::sync::Lazy;
13use blake3::Hasher as Blake3Hasher;
14
15/// Global cache instance for schema and validation data
16static GLOBAL_CACHE: Lazy<Arc<RwLock<GlobalCache>>> = Lazy::new(|| {
17    Arc::new(RwLock::new(GlobalCache::new()))
18});
19
20/// Multi-level cache for DDEX Builder operations
21#[derive(Debug)]
22pub struct GlobalCache {
23    /// Schema cache
24    schemas: SchemaCache,
25    /// Validation results cache
26    validation_cache: ValidationCache,
27    /// Hash computation cache
28    hash_cache: HashCache,
29    /// Template cache for common patterns
30    template_cache: TemplateCache,
31    /// Statistics
32    stats: CacheStats,
33}
34
35impl GlobalCache {
36    /// Create a new global cache
37    pub fn new() -> Self {
38        Self {
39            schemas: SchemaCache::new(),
40            validation_cache: ValidationCache::new(),
41            hash_cache: HashCache::new(),
42            template_cache: TemplateCache::new(),
43            stats: CacheStats::default(),
44        }
45    }
46    
47    /// Clear all caches
48    pub fn clear_all(&mut self) {
49        self.schemas.clear();
50        self.validation_cache.clear();
51        self.hash_cache.clear();
52        self.template_cache.clear();
53        self.stats = CacheStats::default();
54    }
55    
56    /// Get cache statistics
57    pub fn stats(&self) -> &CacheStats {
58        &self.stats
59    }
60    
61    /// Prune expired entries from all caches
62    pub fn prune_expired(&mut self) {
63        self.validation_cache.prune_expired();
64        self.hash_cache.prune_expired();
65        self.template_cache.prune_expired();
66    }
67}
68
69/// Cache for compiled schemas
70#[derive(Debug)]
71pub struct SchemaCache {
72    /// Compiled schemas by version and profile
73    schemas: IndexMap<SchemaKey, CachedSchema>,
74    /// Schema metadata
75    metadata: IndexMap<SchemaKey, SchemaMetadata>,
76}
77
78impl SchemaCache {
79    /// Create new schema cache
80    pub fn new() -> Self {
81        Self {
82            schemas: IndexMap::new(),
83            metadata: IndexMap::new(),
84        }
85    }
86    
87    /// Get or compile a schema
88    pub fn get_or_compile(
89        &mut self,
90        version: &str,
91        profile: Option<&str>,
92        compiler: impl FnOnce() -> Result<CompiledSchema, BuildError>,
93    ) -> Result<&CompiledSchema, BuildError> {
94        let key = SchemaKey {
95            version: version.to_string(),
96            profile: profile.map(|p| p.to_string()),
97        };
98        
99        if !self.schemas.contains_key(&key) {
100            let start_time = Instant::now();
101            let schema = compiler()?;
102            let compile_time = start_time.elapsed();
103            
104            self.metadata.insert(key.clone(), SchemaMetadata {
105                compile_time,
106                last_used: Instant::now(),
107                use_count: 0,
108            });
109            
110            self.schemas.insert(key.clone(), CachedSchema {
111                schema,
112                created_at: Instant::now(),
113            });
114        }
115        
116        // Update usage statistics
117        if let Some(metadata) = self.metadata.get_mut(&key) {
118            metadata.last_used = Instant::now();
119            metadata.use_count += 1;
120        }
121        
122        Ok(&self.schemas.get(&key).unwrap().schema)
123    }
124    
125    /// Check if schema is cached
126    pub fn contains(&self, version: &str, profile: Option<&str>) -> bool {
127        let key = SchemaKey {
128            version: version.to_string(),
129            profile: profile.map(|p| p.to_string()),
130        };
131        self.schemas.contains_key(&key)
132    }
133    
134    /// Clear all schemas
135    pub fn clear(&mut self) {
136        self.schemas.clear();
137        self.metadata.clear();
138    }
139    
140    /// Get memory usage
141    pub fn memory_usage(&self) -> usize {
142        self.schemas.values()
143            .map(|cached| cached.schema.memory_footprint())
144            .sum()
145    }
146}
147
148/// Cache key for schemas
149#[derive(Debug, Clone, PartialEq, Eq, Hash)]
150struct SchemaKey {
151    version: String,
152    profile: Option<String>,
153}
154
155/// Cached schema with metadata
156#[derive(Debug)]
157struct CachedSchema {
158    schema: CompiledSchema,
159    created_at: Instant,
160}
161
162/// Schema metadata for statistics
163#[derive(Debug)]
164struct SchemaMetadata {
165    compile_time: Duration,
166    last_used: Instant,
167    use_count: usize,
168}
169
170/// Compiled schema representation
171#[derive(Debug, Clone)]
172pub struct CompiledSchema {
173    /// Version identifier
174    pub version: String,
175    /// Profile identifier
176    pub profile: Option<String>,
177    /// Validation rules
178    pub rules: Vec<ValidationRule>,
179    /// Required elements
180    pub required_elements: Vec<String>,
181    /// Element constraints
182    pub element_constraints: IndexMap<String, ElementConstraint>,
183}
184
185impl CompiledSchema {
186    /// Calculate memory footprint
187    pub fn memory_footprint(&self) -> usize {
188        std::mem::size_of::<Self>() + 
189        self.version.len() +
190        self.profile.as_ref().map_or(0, |p| p.len()) +
191        self.rules.len() * std::mem::size_of::<ValidationRule>() +
192        self.required_elements.iter().map(|e| e.len()).sum::<usize>() +
193        self.element_constraints.keys().map(|k| k.len()).sum::<usize>()
194    }
195}
196
197/// Validation rule
198#[derive(Debug, Clone)]
199pub struct ValidationRule {
200    pub element_path: String,
201    pub rule_type: RuleType,
202    pub parameters: Vec<String>,
203}
204
205/// Rule types
206#[derive(Debug, Clone)]
207pub enum RuleType {
208    Required,
209    Pattern(String),
210    Range(f64, f64),
211    Length(usize, usize),
212    Custom(String),
213}
214
215/// Element constraint
216#[derive(Debug, Clone)]
217pub struct ElementConstraint {
218    pub min_occurs: usize,
219    pub max_occurs: Option<usize>,
220    pub data_type: String,
221}
222
223/// Cache for validation results
224#[derive(Debug)]
225pub struct ValidationCache {
226    /// Validation results by content hash
227    results: IndexMap<String, CachedValidationResult>,
228    /// Cache configuration
229    config: ValidationCacheConfig,
230}
231
232impl ValidationCache {
233    /// Create new validation cache
234    pub fn new() -> Self {
235        Self {
236            results: IndexMap::new(),
237            config: ValidationCacheConfig::default(),
238        }
239    }
240    
241    /// Get cached validation result
242    pub fn get(&mut self, content_hash: &str) -> Option<ValidationResult> {
243        if let Some(cached) = self.results.get_mut(content_hash) {
244            // Check if expired
245            if cached.created_at.elapsed() > self.config.ttl {
246                self.results.remove(content_hash);
247                return None;
248            }
249            
250            cached.last_accessed = Instant::now();
251            cached.access_count += 1;
252            Some(cached.result.clone())
253        } else {
254            None
255        }
256    }
257    
258    /// Cache validation result
259    pub fn insert(&mut self, content_hash: String, result: ValidationResult) {
260        // Evict old entries if cache is full
261        if self.results.len() >= self.config.max_entries {
262            self.evict_lru();
263        }
264        
265        self.results.insert(content_hash, CachedValidationResult {
266            result,
267            created_at: Instant::now(),
268            last_accessed: Instant::now(),
269            access_count: 0,
270        });
271    }
272    
273    /// Evict least recently used entry
274    fn evict_lru(&mut self) {
275        if let Some((key, _)) = self.results.iter()
276            .min_by_key(|(_, cached)| cached.last_accessed)
277            .map(|(k, v)| (k.clone(), v.last_accessed))
278        {
279            self.results.remove(&key);
280        }
281    }
282    
283    /// Prune expired entries
284    pub fn prune_expired(&mut self) {
285        let ttl = self.config.ttl;
286        self.results.retain(|_, cached| cached.created_at.elapsed() <= ttl);
287    }
288    
289    /// Clear all validation results
290    pub fn clear(&mut self) {
291        self.results.clear();
292    }
293    
294    /// Get cache hit rate
295    pub fn hit_rate(&self) -> f64 {
296        if self.results.is_empty() {
297            0.0
298        } else {
299            let total_accesses: usize = self.results.values()
300                .map(|cached| cached.access_count)
301                .sum();
302            if total_accesses == 0 {
303                0.0
304            } else {
305                self.results.len() as f64 / total_accesses as f64
306            }
307        }
308    }
309}
310
311/// Cached validation result
312#[derive(Debug)]
313struct CachedValidationResult {
314    result: ValidationResult,
315    created_at: Instant,
316    last_accessed: Instant,
317    access_count: usize,
318}
319
320/// Validation cache configuration
321#[derive(Debug)]
322struct ValidationCacheConfig {
323    max_entries: usize,
324    ttl: Duration,
325}
326
327impl Default for ValidationCacheConfig {
328    fn default() -> Self {
329        Self {
330            max_entries: 1000,
331            ttl: Duration::from_secs(300), // 5 minutes
332        }
333    }
334}
335
336/// Validation result
337#[derive(Debug, Clone)]
338pub struct ValidationResult {
339    pub is_valid: bool,
340    pub errors: Vec<String>,
341    pub warnings: Vec<String>,
342    pub validation_time: Duration,
343}
344
345/// Cache for hash computations
346#[derive(Debug)]
347pub struct HashCache {
348    /// Computed hashes
349    hashes: IndexMap<HashKey, CachedHash>,
350    /// Configuration
351    config: HashCacheConfig,
352}
353
354impl HashCache {
355    /// Create new hash cache
356    pub fn new() -> Self {
357        Self {
358            hashes: IndexMap::new(),
359            config: HashCacheConfig::default(),
360        }
361    }
362    
363    /// Get or compute hash
364    pub fn get_or_compute<T: Hash>(
365        &mut self,
366        key: &HashKey,
367        value: &T,
368        hasher_fn: impl FnOnce(&T) -> String,
369    ) -> String {
370        if let Some(cached) = self.hashes.get_mut(key) {
371            if cached.created_at.elapsed() <= self.config.ttl {
372                cached.access_count += 1;
373                return cached.hash.clone();
374            } else {
375                // Expired, remove and recompute
376                self.hashes.remove(key);
377            }
378        }
379        
380        // Compute new hash
381        let hash = hasher_fn(value);
382        
383        // Evict if necessary
384        if self.hashes.len() >= self.config.max_entries {
385            self.evict_random();
386        }
387        
388        self.hashes.insert(key.clone(), CachedHash {
389            hash: hash.clone(),
390            created_at: Instant::now(),
391            access_count: 1,
392        });
393        
394        hash
395    }
396    
397    /// Evict a random entry (simple eviction strategy)
398    fn evict_random(&mut self) {
399        if let Some(key) = self.hashes.keys().next().cloned() {
400            self.hashes.remove(&key);
401        }
402    }
403    
404    /// Prune expired entries
405    pub fn prune_expired(&mut self) {
406        let ttl = self.config.ttl;
407        self.hashes.retain(|_, cached| cached.created_at.elapsed() <= ttl);
408    }
409    
410    /// Clear all hashes
411    pub fn clear(&mut self) {
412        self.hashes.clear();
413    }
414}
415
416/// Hash key for caching
417#[derive(Debug, Clone, PartialEq, Eq, Hash)]
418pub struct HashKey {
419    pub algorithm: String,
420    pub content_type: String,
421    pub content_id: String,
422}
423
424/// Cached hash result
425#[derive(Debug)]
426struct CachedHash {
427    hash: String,
428    created_at: Instant,
429    access_count: usize,
430}
431
432/// Hash cache configuration
433#[derive(Debug)]
434struct HashCacheConfig {
435    max_entries: usize,
436    ttl: Duration,
437}
438
439impl Default for HashCacheConfig {
440    fn default() -> Self {
441        Self {
442            max_entries: 500,
443            ttl: Duration::from_secs(600), // 10 minutes
444        }
445    }
446}
447
448/// Cache for XML templates and patterns
449#[derive(Debug)]
450pub struct TemplateCache {
451    /// Compiled templates
452    templates: IndexMap<TemplateKey, CachedTemplate>,
453    /// Configuration
454    config: TemplateCacheConfig,
455}
456
457impl TemplateCache {
458    /// Create new template cache
459    pub fn new() -> Self {
460        Self {
461            templates: IndexMap::new(),
462            config: TemplateCacheConfig::default(),
463        }
464    }
465    
466    /// Get or compile template
467    pub fn get_or_compile(
468        &mut self,
469        key: &TemplateKey,
470        compiler: impl FnOnce() -> CompiledTemplate,
471    ) -> &CompiledTemplate {
472        if !self.templates.contains_key(key) {
473            if self.templates.len() >= self.config.max_entries {
474                self.evict_lru();
475            }
476            
477            let template = compiler();
478            self.templates.insert(key.clone(), CachedTemplate {
479                template,
480                created_at: Instant::now(),
481                last_used: Instant::now(),
482                use_count: 0,
483            });
484        }
485        
486        // Update usage
487        if let Some(cached) = self.templates.get_mut(key) {
488            cached.last_used = Instant::now();
489            cached.use_count += 1;
490        }
491        
492        &self.templates.get(key).unwrap().template
493    }
494    
495    /// Evict least recently used template
496    fn evict_lru(&mut self) {
497        if let Some((key, _)) = self.templates.iter()
498            .min_by_key(|(_, cached)| cached.last_used)
499            .map(|(k, v)| (k.clone(), v.last_used))
500        {
501            self.templates.remove(&key);
502        }
503    }
504    
505    /// Prune expired templates
506    pub fn prune_expired(&mut self) {
507        let ttl = self.config.ttl;
508        self.templates.retain(|_, cached| cached.created_at.elapsed() <= ttl);
509    }
510    
511    /// Clear all templates
512    pub fn clear(&mut self) {
513        self.templates.clear();
514    }
515}
516
517/// Template key
518#[derive(Debug, Clone, PartialEq, Eq, Hash)]
519pub struct TemplateKey {
520    pub element_type: String,
521    pub version: String,
522    pub variant: Option<String>,
523}
524
525/// Cached template
526#[derive(Debug)]
527struct CachedTemplate {
528    template: CompiledTemplate,
529    created_at: Instant,
530    last_used: Instant,
531    use_count: usize,
532}
533
534/// Compiled template for fast XML generation
535#[derive(Debug, Clone)]
536pub struct CompiledTemplate {
537    /// Template parts (static strings and placeholders)
538    pub parts: Vec<TemplatePart>,
539    /// Required fields
540    pub required_fields: Vec<String>,
541    /// Estimated output size
542    pub estimated_size: usize,
543}
544
545/// Template part (static or dynamic)
546#[derive(Debug, Clone)]
547pub enum TemplatePart {
548    Static(OptimizedString),
549    Placeholder(String), // Field name
550}
551
552/// Template cache configuration
553#[derive(Debug)]
554struct TemplateCacheConfig {
555    max_entries: usize,
556    ttl: Duration,
557}
558
559impl Default for TemplateCacheConfig {
560    fn default() -> Self {
561        Self {
562            max_entries: 100,
563            ttl: Duration::from_secs(1800), // 30 minutes
564        }
565    }
566}
567
568/// Cache statistics
569#[derive(Debug, Default, Clone)]
570pub struct CacheStats {
571    pub schema_hits: usize,
572    pub schema_misses: usize,
573    pub validation_hits: usize,
574    pub validation_misses: usize,
575    pub hash_hits: usize,
576    pub hash_misses: usize,
577    pub template_hits: usize,
578    pub template_misses: usize,
579}
580
581impl CacheStats {
582    /// Calculate overall hit rate
583    pub fn overall_hit_rate(&self) -> f64 {
584        let total_hits = self.schema_hits + self.validation_hits + self.hash_hits + self.template_hits;
585        let total_requests = total_hits + self.schema_misses + self.validation_misses + 
586                           self.hash_misses + self.template_misses;
587        
588        if total_requests == 0 {
589            0.0
590        } else {
591            total_hits as f64 / total_requests as f64
592        }
593    }
594    
595    /// Get cache efficiency summary
596    pub fn summary(&self) -> String {
597        format!(
598            "Cache Hit Rate: {:.1}% (Schema: {:.1}%, Validation: {:.1}%, Hash: {:.1}%, Template: {:.1}%)",
599            self.overall_hit_rate() * 100.0,
600            self.schema_hit_rate() * 100.0,
601            self.validation_hit_rate() * 100.0,
602            self.hash_hit_rate() * 100.0,
603            self.template_hit_rate() * 100.0,
604        )
605    }
606    
607    fn schema_hit_rate(&self) -> f64 {
608        let total = self.schema_hits + self.schema_misses;
609        if total == 0 { 0.0 } else { self.schema_hits as f64 / total as f64 }
610    }
611    
612    fn validation_hit_rate(&self) -> f64 {
613        let total = self.validation_hits + self.validation_misses;
614        if total == 0 { 0.0 } else { self.validation_hits as f64 / total as f64 }
615    }
616    
617    fn hash_hit_rate(&self) -> f64 {
618        let total = self.hash_hits + self.hash_misses;
619        if total == 0 { 0.0 } else { self.hash_hits as f64 / total as f64 }
620    }
621    
622    fn template_hit_rate(&self) -> f64 {
623        let total = self.template_hits + self.template_misses;
624        if total == 0 { 0.0 } else { self.template_hits as f64 / total as f64 }
625    }
626}
627
628/// Public API for cache operations
629pub struct CacheManager;
630
631impl CacheManager {
632    /// Get global cache statistics
633    pub fn stats() -> CacheStats {
634        GLOBAL_CACHE.read().unwrap().stats().clone()
635    }
636    
637    /// Clear all global caches
638    pub fn clear_all() {
639        GLOBAL_CACHE.write().unwrap().clear_all();
640    }
641    
642    /// Prune expired entries
643    pub fn prune_expired() {
644        GLOBAL_CACHE.write().unwrap().prune_expired();
645    }
646    
647    /// Get schema from cache or compile
648    pub fn get_schema(
649        version: &str,
650        profile: Option<&str>,
651        compiler: impl FnOnce() -> Result<CompiledSchema, BuildError>,
652    ) -> Result<CompiledSchema, BuildError> {
653        let mut cache = GLOBAL_CACHE.write().unwrap();
654        let schema = cache.schemas.get_or_compile(version, profile, compiler)?;
655        Ok(schema.clone())
656    }
657    
658    /// Fast hash computation with caching
659    pub fn fast_hash<T: Hash + std::fmt::Debug>(algorithm: &str, content_type: &str, content_id: &str, value: &T) -> String {
660        let key = HashKey {
661            algorithm: algorithm.to_string(),
662            content_type: content_type.to_string(),
663            content_id: content_id.to_string(),
664        };
665        
666        let mut cache = GLOBAL_CACHE.write().unwrap();
667        cache.hash_cache.get_or_compute(&key, value, |v| {
668            match algorithm {
669                "blake3" => {
670                    let mut hasher = Blake3Hasher::new();
671                    let bytes = format!("{:?}", v); // Simple serialization for hashing
672                    hasher.update(bytes.as_bytes());
673                    hasher.finalize().to_hex().to_string()
674                }
675                _ => {
676                    // Fallback to default hasher
677                    let mut hasher = std::hash::DefaultHasher::new();
678                    v.hash(&mut hasher);
679                    format!("{:016x}", hasher.finish())
680                }
681            }
682        })
683    }
684}
685
686#[cfg(test)]
687mod tests {
688    use super::*;
689    
690    #[test]
691    fn test_schema_cache() {
692        let mut cache = SchemaCache::new();
693        
694        // Test cache miss and compilation
695        let schema = cache.get_or_compile("4.3", None, || {
696            Ok(CompiledSchema {
697                version: "4.3".to_string(),
698                profile: None,
699                rules: vec![],
700                required_elements: vec!["MessageHeader".to_string()],
701                element_constraints: IndexMap::new(),
702            })
703        }).unwrap();
704        
705        assert_eq!(schema.version, "4.3");
706        assert!(cache.contains("4.3", None));
707        
708        // Test cache hit
709        let schema2 = cache.get_or_compile("4.3", None, || {
710            panic!("Should not compile again")
711        }).unwrap();
712        
713        assert_eq!(schema2.version, "4.3");
714    }
715    
716    #[test]
717    fn test_validation_cache() {
718        let mut cache = ValidationCache::new();
719        let hash = "test_hash".to_string();
720        
721        // Cache miss
722        assert!(cache.get(&hash).is_none());
723        
724        // Insert result
725        let result = ValidationResult {
726            is_valid: true,
727            errors: vec![],
728            warnings: vec![],
729            validation_time: Duration::from_millis(10),
730        };
731        cache.insert(hash.clone(), result);
732        
733        // Cache hit
734        let cached = cache.get(&hash).unwrap();
735        assert!(cached.is_valid);
736    }
737    
738    #[test]
739    fn test_hash_cache() {
740        let mut cache = HashCache::new();
741        let key = HashKey {
742            algorithm: "blake3".to_string(),
743            content_type: "track".to_string(),
744            content_id: "T001".to_string(),
745        };
746        
747        let test_value = "test content";
748        
749        // First computation
750        let hash1 = cache.get_or_compute(&key, &test_value, |v| {
751            format!("hash_{}", v)
752        });
753        
754        // Second computation (should be cached)
755        let hash2 = cache.get_or_compute(&key, &test_value, |_| {
756            panic!("Should not compute again")
757        });
758        
759        assert_eq!(hash1, hash2);
760        assert_eq!(hash1, "hash_test content");
761    }
762    
763    #[test]
764    fn test_cache_manager() {
765        CacheManager::clear_all();
766        let stats = CacheManager::stats();
767        assert_eq!(stats.overall_hit_rate(), 0.0);
768        
769        // Test fast hash
770        let hash1 = CacheManager::fast_hash("blake3", "test", "item1", &"content");
771        let hash2 = CacheManager::fast_hash("blake3", "test", "item1", &"content");
772        assert_eq!(hash1, hash2); // Should be cached
773    }
774    
775    #[test]
776    fn test_template_cache() {
777        let mut cache = TemplateCache::new();
778        let key = TemplateKey {
779            element_type: "SoundRecording".to_string(),
780            version: "4.3".to_string(),
781            variant: None,
782        };
783        
784        // First access - compile template
785        let template = cache.get_or_compile(&key, || {
786            CompiledTemplate {
787                parts: vec![
788                    TemplatePart::Static(OptimizedString::new("<SoundRecording>")),
789                    TemplatePart::Placeholder("title".to_string()),
790                    TemplatePart::Static(OptimizedString::new("</SoundRecording>")),
791                ],
792                required_fields: vec!["title".to_string()],
793                estimated_size: 100,
794            }
795        });
796        
797        assert_eq!(template.required_fields.len(), 1);
798        
799        // Second access - should use cached
800        let template2 = cache.get_or_compile(&key, || {
801            panic!("Should not compile again")
802        });
803        
804        assert_eq!(template2.required_fields.len(), 1);
805    }
806}