ddex_builder/streaming/
reference_manager.rs

1//! Reference management for streaming DDEX XML generation
2//!
3//! Manages stable reference generation and validation during streaming
4//! to ensure proper linking between releases and resources.
5
6use crate::error::BuildError;
7use crate::id_generator::{StableHashConfig, StableHashGenerator};
8use indexmap::{IndexMap, IndexSet};
9
10/// Configuration for reference management during streaming
11///
12/// Controls how references are generated and managed during streaming DDEX XML
13/// generation, including deterministic behavior and memory management.
14///
15/// # Example
16/// ```
17/// use ddex_builder::streaming::reference_manager::ReferenceConfig;
18///
19/// let config = ReferenceConfig {
20///     deterministic: true,
21///     resource_prefix: "AUDIO".to_string(),
22///     release_prefix: "ALBUM".to_string(),
23///     deal_prefix: "DEAL".to_string(),
24///     max_cache_size: 50_000, // Smaller cache for memory-constrained environments
25/// };
26/// ```
27#[derive(Debug, Clone)]
28pub struct ReferenceConfig {
29    /// Use deterministic reference generation based on content hashes
30    pub deterministic: bool,
31    /// Prefix for resource references (default: "R")
32    pub resource_prefix: String,
33    /// Prefix for release references (default: "REL")
34    pub release_prefix: String,
35    /// Prefix for deal references (default: "D")
36    pub deal_prefix: String,
37    /// Maximum number of references to cache in memory before cleanup
38    pub max_cache_size: usize,
39}
40
41impl Default for ReferenceConfig {
42    fn default() -> Self {
43        Self {
44            deterministic: true,
45            resource_prefix: "R".to_string(),
46            release_prefix: "REL".to_string(),
47            deal_prefix: "D".to_string(),
48            max_cache_size: 100_000, // Should handle large catalogs
49        }
50    }
51}
52
53/// Reference information for a resource
54///
55/// Stores metadata about a resource reference generated during streaming,
56/// including identifiers, content information, and sequence tracking.
57///
58/// # Example
59/// ```
60/// use ddex_builder::streaming::reference_manager::ResourceReference;
61///
62/// let resource_ref = ResourceReference {
63///     reference_id: "R12345678".to_string(),
64///     resource_id: "USUM71504847".to_string(), // ISRC
65///     title: "Bohemian Rhapsody".to_string(),
66///     artist: "Queen".to_string(),
67///     resource_type: "SoundRecording".to_string(),
68///     sequence_number: 1,
69/// };
70/// ```
71#[derive(Debug, Clone)]
72pub struct ResourceReference {
73    /// Unique reference ID for this resource within the DDEX message
74    pub reference_id: String,
75    /// Resource identifier (e.g., ISRC, proprietary ID)
76    pub resource_id: String,
77    /// Title of the resource/track
78    pub title: String,
79    /// Artist name for this resource
80    pub artist: String,
81    /// Type of resource (SoundRecording, Video, Image, Text, etc.)
82    pub resource_type: String,
83    /// Sequence number indicating order in the message
84    pub sequence_number: usize,
85}
86
87/// Reference information for a release
88///
89/// Stores metadata about a release reference generated during streaming,
90/// including identifiers, content information, resource links, and sequence tracking.
91///
92/// # Example
93/// ```
94/// use ddex_builder::streaming::reference_manager::ReleaseReference;
95///
96/// let release_ref = ReleaseReference {
97///     reference_id: "REL87654321".to_string(),
98///     release_id: "GRid:A1-12345678901234567890123456789012".to_string(),
99///     title: "Greatest Hits".to_string(),
100///     artist: "The Beatles".to_string(),
101///     resource_references: vec!["R12345678".to_string(), "R87654321".to_string()],
102///     sequence_number: 1,
103/// };
104/// ```
105#[derive(Debug, Clone)]
106pub struct ReleaseReference {
107    /// Unique reference ID for this release within the DDEX message
108    pub reference_id: String,
109    /// Release identifier (e.g., GRid, UPC, proprietary ID)
110    pub release_id: String,
111    /// Title of the release
112    pub title: String,
113    /// Main artist name for this release
114    pub artist: String,
115    /// References to resources (tracks) contained in this release
116    pub resource_references: Vec<String>,
117    /// Sequence number indicating order in the message
118    pub sequence_number: usize,
119}
120
121/// Manages references during streaming operations
122pub struct StreamingReferenceManager {
123    config: ReferenceConfig,
124    #[allow(dead_code)]
125    hash_generator: StableHashGenerator,
126
127    // Resource tracking
128    resource_references: IndexMap<String, String>, // resource_id -> reference_id
129    resource_metadata: IndexMap<String, ResourceReference>,
130    resource_sequence: usize,
131
132    // Release tracking
133    release_references: IndexMap<String, String>, // release_id -> reference_id
134    release_metadata: IndexMap<String, ReleaseReference>,
135    release_sequence: usize,
136
137    // Deal tracking
138    deal_references: IndexMap<String, String>, // deal_id -> reference_id
139    deal_sequence: usize,
140
141    // Validation tracking
142    used_references: IndexSet<String>,
143    orphaned_references: Vec<String>,
144    duplicate_resource_ids: IndexSet<String>,
145    duplicate_release_ids: IndexSet<String>,
146
147    // Memory management
148    references_generated: usize,
149}
150
151impl StreamingReferenceManager {
152    /// Create a new streaming reference manager
153    pub fn new() -> Self {
154        Self::new_with_config(ReferenceConfig::default())
155    }
156
157    /// Create a new streaming reference manager with custom configuration
158    pub fn new_with_config(config: ReferenceConfig) -> Self {
159        let hash_config = StableHashConfig::default();
160
161        StreamingReferenceManager {
162            config,
163            hash_generator: StableHashGenerator::new(hash_config),
164            resource_references: IndexMap::new(),
165            resource_metadata: IndexMap::new(),
166            resource_sequence: 1,
167            release_references: IndexMap::new(),
168            release_metadata: IndexMap::new(),
169            release_sequence: 1,
170            deal_references: IndexMap::new(),
171            deal_sequence: 1,
172            used_references: IndexSet::new(),
173            orphaned_references: Vec::new(),
174            duplicate_resource_ids: IndexSet::new(),
175            duplicate_release_ids: IndexSet::new(),
176            references_generated: 0,
177        }
178    }
179
180    /// Generate a stable reference for a resource
181    pub fn generate_resource_reference(&mut self, resource_id: &str) -> Result<String, BuildError> {
182        // Check for duplicate resource ID
183        if self.resource_references.contains_key(resource_id) {
184            self.duplicate_resource_ids.insert(resource_id.to_string());
185            return Ok(self.resource_references[resource_id].clone());
186        }
187
188        // Generate stable reference
189        let reference_id = if self.config.deterministic {
190            // Use a simplified hash approach for now
191            use sha2::{Digest, Sha256};
192            let mut hasher = Sha256::new();
193            hasher.update(resource_id.as_bytes());
194            let hash = format!("{:x}", hasher.finalize());
195            format!("{}{}", self.config.resource_prefix, &hash[..8])
196        } else {
197            format!(
198                "{}{:06}",
199                self.config.resource_prefix, self.resource_sequence
200            )
201        };
202
203        // Check for reference collision
204        if self.used_references.contains(&reference_id) {
205            return Err(BuildError::InvalidReference {
206                reference: reference_id,
207            });
208        }
209
210        // Store the mapping
211        self.resource_references
212            .insert(resource_id.to_string(), reference_id.clone());
213        self.used_references.insert(reference_id.clone());
214        self.resource_sequence += 1;
215        self.references_generated += 1;
216
217        // Manage memory usage
218        self.manage_cache_size()?;
219
220        Ok(reference_id)
221    }
222
223    /// Generate a stable reference for a release
224    pub fn generate_release_reference(&mut self, release_id: &str) -> Result<String, BuildError> {
225        // Check for duplicate release ID
226        if self.release_references.contains_key(release_id) {
227            self.duplicate_release_ids.insert(release_id.to_string());
228            return Ok(self.release_references[release_id].clone());
229        }
230
231        // Generate stable reference
232        let reference_id = if self.config.deterministic {
233            // Use a simplified hash approach for now
234            use sha2::{Digest, Sha256};
235            let mut hasher = Sha256::new();
236            hasher.update(release_id.as_bytes());
237            let hash = format!("{:x}", hasher.finalize());
238            format!("{}{}", self.config.release_prefix, &hash[..8])
239        } else {
240            format!("{}{:06}", self.config.release_prefix, self.release_sequence)
241        };
242
243        // Check for reference collision
244        if self.used_references.contains(&reference_id) {
245            return Err(BuildError::InvalidReference {
246                reference: reference_id,
247            });
248        }
249
250        // Store the mapping
251        self.release_references
252            .insert(release_id.to_string(), reference_id.clone());
253        self.used_references.insert(reference_id.clone());
254        self.release_sequence += 1;
255        self.references_generated += 1;
256
257        // Manage memory usage
258        self.manage_cache_size()?;
259
260        Ok(reference_id)
261    }
262
263    /// Generate a stable reference for a deal
264    pub fn generate_deal_reference(&mut self, deal_id: &str) -> Result<String, BuildError> {
265        // Check for existing mapping
266        if let Some(existing_ref) = self.deal_references.get(deal_id) {
267            return Ok(existing_ref.clone());
268        }
269
270        // Generate stable reference
271        let reference_id = if self.config.deterministic {
272            // Use a simplified hash approach for now
273            use sha2::{Digest, Sha256};
274            let mut hasher = Sha256::new();
275            hasher.update(deal_id.as_bytes());
276            let hash = format!("{:x}", hasher.finalize());
277            format!("{}{}", self.config.deal_prefix, &hash[..8])
278        } else {
279            format!("{}{:06}", self.config.deal_prefix, self.deal_sequence)
280        };
281
282        // Check for reference collision
283        if self.used_references.contains(&reference_id) {
284            return Err(BuildError::InvalidReference {
285                reference: reference_id,
286            });
287        }
288
289        // Store the mapping
290        self.deal_references
291            .insert(deal_id.to_string(), reference_id.clone());
292        self.used_references.insert(reference_id.clone());
293        self.deal_sequence += 1;
294        self.references_generated += 1;
295
296        // Manage memory usage
297        self.manage_cache_size()?;
298
299        Ok(reference_id)
300    }
301
302    /// Store metadata for a resource reference
303    pub fn store_resource_metadata(
304        &mut self,
305        resource_id: &str,
306        title: &str,
307        artist: &str,
308        resource_type: &str,
309    ) -> Result<(), BuildError> {
310        let reference_id = self
311            .resource_references
312            .get(resource_id)
313            .ok_or_else(|| BuildError::InvalidReference {
314                reference: format!("Resource {} not found", resource_id),
315            })?
316            .clone();
317
318        let metadata = ResourceReference {
319            reference_id: reference_id.clone(),
320            resource_id: resource_id.to_string(),
321            title: title.to_string(),
322            artist: artist.to_string(),
323            resource_type: resource_type.to_string(),
324            sequence_number: self.resource_metadata.len() + 1,
325        };
326
327        self.resource_metadata.insert(reference_id, metadata);
328        Ok(())
329    }
330
331    /// Store metadata for a release reference
332    pub fn store_release_metadata(
333        &mut self,
334        release_id: &str,
335        title: &str,
336        artist: &str,
337        resource_references: Vec<String>,
338    ) -> Result<(), BuildError> {
339        let reference_id = self
340            .release_references
341            .get(release_id)
342            .ok_or_else(|| BuildError::InvalidReference {
343                reference: format!("Release {} not found", release_id),
344            })?
345            .clone();
346
347        // Validate that all resource references exist
348        for resource_ref in &resource_references {
349            if !self.used_references.contains(resource_ref) {
350                self.orphaned_references.push(resource_ref.clone());
351            }
352        }
353
354        let metadata = ReleaseReference {
355            reference_id: reference_id.clone(),
356            release_id: release_id.to_string(),
357            title: title.to_string(),
358            artist: artist.to_string(),
359            resource_references,
360            sequence_number: self.release_metadata.len() + 1,
361        };
362
363        self.release_metadata.insert(reference_id, metadata);
364        Ok(())
365    }
366
367    /// Validate all references at the end of streaming
368    pub fn validate_references(&self) -> ReferenceValidationResult {
369        let mut errors = Vec::new();
370        let mut warnings = Vec::new();
371
372        // Check for orphaned references
373        if !self.orphaned_references.is_empty() {
374            warnings.push(format!(
375                "Found {} orphaned resource references",
376                self.orphaned_references.len()
377            ));
378        }
379
380        // Check for duplicate resource IDs
381        if !self.duplicate_resource_ids.is_empty() {
382            warnings.push(format!(
383                "Found {} duplicate resource IDs",
384                self.duplicate_resource_ids.len()
385            ));
386        }
387
388        // Check for duplicate release IDs
389        if !self.duplicate_release_ids.is_empty() {
390            warnings.push(format!(
391                "Found {} duplicate release IDs",
392                self.duplicate_release_ids.len()
393            ));
394        }
395
396        // Check reference consistency
397        for (resource_id, reference_id) in &self.resource_references {
398            if !self.used_references.contains(reference_id) {
399                errors.push(format!(
400                    "Resource reference {} for resource {} not properly tracked",
401                    reference_id, resource_id
402                ));
403            }
404        }
405
406        for (release_id, reference_id) in &self.release_references {
407            if !self.used_references.contains(reference_id) {
408                errors.push(format!(
409                    "Release reference {} for release {} not properly tracked",
410                    reference_id, release_id
411                ));
412            }
413        }
414
415        ReferenceValidationResult {
416            is_valid: errors.is_empty(),
417            errors,
418            warnings,
419            total_references: self.references_generated,
420            resource_count: self.resource_references.len(),
421            release_count: self.release_references.len(),
422            deal_count: self.deal_references.len(),
423        }
424    }
425
426    /// Get statistics about reference management
427    pub fn get_stats(&self) -> ReferenceStats {
428        ReferenceStats {
429            total_references_generated: self.references_generated,
430            resource_references: self.resource_references.len(),
431            release_references: self.release_references.len(),
432            deal_references: self.deal_references.len(),
433            cache_size: self.current_cache_size(),
434            duplicate_resource_ids: self.duplicate_resource_ids.len(),
435            duplicate_release_ids: self.duplicate_release_ids.len(),
436            orphaned_references: self.orphaned_references.len(),
437        }
438    }
439
440    /// Get a resource reference by resource ID
441    pub fn get_resource_reference(&self, resource_id: &str) -> Option<&str> {
442        self.resource_references
443            .get(resource_id)
444            .map(|s| s.as_str())
445    }
446
447    /// Get a release reference by release ID
448    pub fn get_release_reference(&self, release_id: &str) -> Option<&str> {
449        self.release_references.get(release_id).map(|s| s.as_str())
450    }
451
452    /// Clear old references to manage memory usage
453    fn manage_cache_size(&mut self) -> Result<(), BuildError> {
454        let current_size = self.current_cache_size();
455
456        if current_size > self.config.max_cache_size {
457            // Remove oldest 25% of entries to free up memory
458            let to_remove = current_size / 4;
459
460            // Remove oldest resource references
461            let resource_to_remove =
462                std::cmp::min(to_remove / 2, self.resource_references.len() / 2);
463            for _ in 0..resource_to_remove {
464                if let Some((_resource_id, reference_id)) =
465                    self.resource_references.shift_remove_index(0)
466                {
467                    self.resource_metadata.shift_remove(&reference_id);
468                    self.used_references.shift_remove(&reference_id);
469                }
470            }
471
472            // Remove oldest release references
473            let release_to_remove = std::cmp::min(to_remove / 2, self.release_references.len() / 2);
474            for _ in 0..release_to_remove {
475                if let Some((_release_id, reference_id)) =
476                    self.release_references.shift_remove_index(0)
477                {
478                    self.release_metadata.shift_remove(&reference_id);
479                    self.used_references.shift_remove(&reference_id);
480                }
481            }
482        }
483
484        Ok(())
485    }
486
487    fn current_cache_size(&self) -> usize {
488        self.resource_references.len()
489            + self.release_references.len()
490            + self.deal_references.len()
491            + self.resource_metadata.len()
492            + self.release_metadata.len()
493    }
494}
495
496impl Default for StreamingReferenceManager {
497    fn default() -> Self {
498        Self::new()
499    }
500}
501
502/// Reference validation result
503///
504/// Comprehensive validation report for all references generated during
505/// streaming DDEX XML creation, including error detection and statistics.
506///
507/// # Example
508/// ```
509/// use ddex_builder::streaming::reference_manager::StreamingReferenceManager;
510///
511/// let mut manager = StreamingReferenceManager::new();
512/// // ... generate resources and releases ...
513/// let validation = manager.validate_references();
514///
515/// if !validation.is_valid {
516///     for error in &validation.errors {
517///         eprintln!("Validation error: {}", error);
518///     }
519/// }
520///
521/// for warning in &validation.warnings {
522///     println!("Warning: {}", warning);
523/// }
524///
525/// println!("Validated {} total references ({} resources, {} releases, {} deals)",
526///          validation.total_references,
527///          validation.resource_count,
528///          validation.release_count,
529///          validation.deal_count);
530/// ```
531#[derive(Debug)]
532pub struct ReferenceValidation {
533    /// Whether all references are valid (no errors found)
534    pub is_valid: bool,
535    /// List of validation errors found during reference checking
536    pub errors: Vec<String>,
537    /// List of warnings generated (non-fatal issues)
538    pub warnings: Vec<String>,
539    /// Total number of references checked during validation
540    pub total_references: usize,
541    /// Number of resource references validated
542    pub resource_count: usize,
543    /// Number of release references validated
544    pub release_count: usize,
545    /// Number of deal references validated
546    pub deal_count: usize,
547}
548
549/// Result of reference validation
550#[derive(Debug, Clone)]
551pub struct ReferenceValidationResult {
552    /// Whether all references passed validation
553    pub is_valid: bool,
554    /// List of validation errors found
555    pub errors: Vec<String>,
556    /// List of warnings generated
557    pub warnings: Vec<String>,
558    /// Total number of references checked
559    pub total_references: usize,
560    /// Number of resource references
561    pub resource_count: usize,
562    /// Number of release references
563    pub release_count: usize,
564    /// Number of deal references
565    pub deal_count: usize,
566}
567
568/// Statistics for reference generation
569///
570/// Comprehensive statistics about reference generation during streaming,
571/// including counts, cache usage, and validation issues detected.
572///
573/// # Example
574/// ```
575/// use ddex_builder::streaming::reference_manager::StreamingReferenceManager;
576///
577/// let manager = StreamingReferenceManager::new();
578/// let stats = manager.get_stats();
579///
580/// println!("Generated {} references total", stats.total_references_generated);
581/// println!("Cache usage: {}/{}", stats.cache_size, 100_000);
582///
583/// if stats.duplicate_resource_ids > 0 {
584///     println!("Warning: {} duplicate resource IDs found", stats.duplicate_resource_ids);
585/// }
586/// ```
587#[derive(Debug, Default)]
588pub struct ReferenceStats {
589    /// Total number of references generated across all types
590    pub total_references_generated: usize,
591    /// Number of resource references created
592    pub resource_references: usize,
593    /// Number of release references created
594    pub release_references: usize,
595    /// Number of deal references created
596    pub deal_references: usize,
597    /// Current size of the reference cache in memory
598    pub cache_size: usize,
599    /// Number of duplicate resource IDs detected
600    pub duplicate_resource_ids: usize,
601    /// Number of duplicate release IDs detected
602    pub duplicate_release_ids: usize,
603    /// Number of orphaned references (references without valid targets)
604    pub orphaned_references: usize,
605}
606
607#[cfg(test)]
608mod tests {
609    use super::*;
610
611    #[test]
612    fn test_resource_reference_generation() {
613        let mut manager = StreamingReferenceManager::new();
614
615        let ref1 = manager.generate_resource_reference("resource1").unwrap();
616        let ref2 = manager.generate_resource_reference("resource2").unwrap();
617
618        assert_ne!(ref1, ref2);
619        assert!(ref1.starts_with("R"));
620        assert!(ref2.starts_with("R"));
621
622        // Test duplicate handling
623        let ref3 = manager.generate_resource_reference("resource1").unwrap();
624        assert_eq!(ref1, ref3);
625    }
626
627    #[test]
628    fn test_release_reference_generation() {
629        let mut manager = StreamingReferenceManager::new();
630
631        let ref1 = manager.generate_release_reference("release1").unwrap();
632        let ref2 = manager.generate_release_reference("release2").unwrap();
633
634        assert_ne!(ref1, ref2);
635        assert!(ref1.starts_with("REL"));
636        assert!(ref2.starts_with("REL"));
637    }
638
639    #[test]
640    fn test_metadata_storage() {
641        let mut manager = StreamingReferenceManager::new();
642
643        let resource_ref = manager.generate_resource_reference("resource1").unwrap();
644        manager
645            .store_resource_metadata("resource1", "Title", "Artist", "SoundRecording")
646            .unwrap();
647
648        let metadata = manager.resource_metadata.get(&resource_ref).unwrap();
649        assert_eq!(metadata.title, "Title");
650        assert_eq!(metadata.artist, "Artist");
651    }
652
653    #[test]
654    fn test_reference_validation() {
655        let mut manager = StreamingReferenceManager::new();
656
657        // Add some resources and releases
658        let resource_ref = manager.generate_resource_reference("resource1").unwrap();
659        let _release_ref = manager.generate_release_reference("release1").unwrap();
660
661        // Store metadata with valid resource reference
662        manager
663            .store_release_metadata("release1", "Album Title", "Artist", vec![resource_ref])
664            .unwrap();
665
666        let validation = manager.validate_references();
667        assert!(validation.is_valid);
668        assert_eq!(validation.resource_count, 1);
669        assert_eq!(validation.release_count, 1);
670    }
671
672    #[test]
673    fn test_orphaned_references() {
674        let mut manager = StreamingReferenceManager::new();
675
676        let _release_ref = manager.generate_release_reference("release1").unwrap();
677
678        // Store metadata with invalid resource reference
679        manager
680            .store_release_metadata(
681                "release1",
682                "Album Title",
683                "Artist",
684                vec!["R999999".to_string()],
685            )
686            .unwrap();
687
688        let validation = manager.validate_references();
689        assert!(!validation.warnings.is_empty());
690    }
691}