ddex_builder/
id_generator.rs

1// packages/ddex-builder/src/id_generator.rs
2//! Stable hash-based ID generation for deterministic DDEX messages
3
4use blake3;
5use indexmap::IndexMap;
6use serde::{Deserialize, Serialize};
7use sha2::{Digest, Sha256};
8use unicode_normalization::UnicodeNormalization;
9
10/// Stable hash configuration
11#[derive(Debug, Clone, Serialize, Deserialize)]
12pub struct StableHashConfig {
13    /// Recipe version to use
14    pub recipe: String,
15
16    /// Hash algorithm
17    pub algorithm: HashAlgorithm,
18
19    /// Whether to cache generated IDs
20    pub use_cache: bool,
21
22    /// Salt for hash generation
23    pub salt: Option<String>,
24}
25
26impl Default for StableHashConfig {
27    fn default() -> Self {
28        Self {
29            recipe: "v1".to_string(),
30            algorithm: HashAlgorithm::Blake3,
31            use_cache: true,
32            salt: None,
33        }
34    }
35}
36
37/// Hash algorithm for stable ID generation
38#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
39pub enum HashAlgorithm {
40    /// SHA-256
41    Sha256,
42    /// Blake3 (faster, more secure)
43    Blake3,
44}
45
46/// Recipe for stable hash generation
47#[derive(Debug, Clone, Serialize, Deserialize)]
48pub struct HashRecipe {
49    /// Fields to include in hash
50    pub fields: Vec<String>,
51
52    /// Normalization options
53    pub normalize: NormalizeOptions,
54
55    /// Salt for this entity type
56    pub salt: String,
57}
58
59/// Normalization options for stable hashing
60#[derive(Debug, Clone, Serialize, Deserialize)]
61pub struct NormalizeOptions {
62    /// Unicode normalization form
63    pub unicode: UnicodeForm,
64
65    /// Whether to trim whitespace
66    pub trim: bool,
67
68    /// Case normalization
69    pub case: CaseNormalization,
70}
71
72/// Unicode normalization form for ID generation
73#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
74pub enum UnicodeForm {
75    /// Canonical Decomposition, followed by Canonical Composition
76    NFC,
77    /// Canonical Decomposition
78    NFD,
79    /// Compatibility Decomposition, followed by Canonical Composition
80    NFKC,
81    /// Compatibility Decomposition
82    NFKD,
83}
84
85/// Case normalization strategy for ID generation
86#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
87pub enum CaseNormalization {
88    /// Keep original case
89    AsIs,
90    /// Convert to lowercase
91    Lower,
92    /// Convert to uppercase
93    Upper,
94}
95
96/// Stable hash ID generator
97pub struct StableHashGenerator {
98    config: StableHashConfig,
99    recipes: IndexMap<String, HashRecipe>,
100    cache: IndexMap<String, String>,
101}
102
103impl StableHashGenerator {
104    /// Create new generator with config
105    pub fn new(config: StableHashConfig) -> Self {
106        Self {
107            config,
108            recipes: Self::load_recipes(),
109            cache: IndexMap::new(),
110        }
111    }
112
113    /// Generate stable ID for a release
114    pub fn generate_release_id(
115        &mut self,
116        upc: &str,
117        release_type: &str,
118        track_isrcs: &[String],
119        territory_set: &[String],
120    ) -> Result<String, super::error::BuildError> {
121        let materials = ReleaseHashMaterials {
122            upc: upc.to_string(),
123            release_type: release_type.to_string(),
124            track_isrcs: track_isrcs.to_vec(),
125            territory_set: territory_set.to_vec(),
126        };
127
128        self.generate("Release", &materials)
129    }
130
131    /// Generate stable ID for a resource
132    pub fn generate_resource_id(
133        &mut self,
134        isrc: &str,
135        duration: u32,
136        file_hash: Option<&str>,
137    ) -> Result<String, super::error::BuildError> {
138        let materials = ResourceHashMaterials {
139            isrc: isrc.to_string(),
140            duration,
141            file_hash: file_hash.map(|s| s.to_string()),
142        };
143
144        self.generate("Resource", &materials)
145    }
146
147    /// Generate stable ID for a party
148    pub fn generate_party_id(
149        &mut self,
150        name: &str,
151        role: &str,
152        identifiers: &[String],
153    ) -> Result<String, super::error::BuildError> {
154        let materials = PartyHashMaterials {
155            name: name.to_string(),
156            role: role.to_string(),
157            identifiers: identifiers.to_vec(),
158        };
159
160        self.generate("Party", &materials)
161    }
162
163    /// Generic stable ID generation
164    fn generate<T: Serialize>(
165        &mut self,
166        entity_type: &str,
167        materials: &T,
168    ) -> Result<String, super::error::BuildError> {
169        // Create cache key
170        let cache_key = format!("{}:{}", entity_type, serde_json::to_string(materials)?);
171
172        // Check cache
173        if self.config.use_cache {
174            if let Some(cached) = self.cache.get(&cache_key) {
175                return Ok(cached.clone());
176            }
177        }
178
179        // Get recipe
180        let recipe = self
181            .recipes
182            .get(&format!("{}.{}", entity_type, self.config.recipe))
183            .ok_or_else(|| super::error::BuildError::InvalidFormat {
184                field: "recipe".to_string(),
185                message: format!("No recipe for {}.{}", entity_type, self.config.recipe),
186            })?;
187
188        // Normalize and concatenate fields
189        let normalized = self.normalize_materials(materials, recipe)?;
190
191        // Generate hash
192        let id = match self.config.algorithm {
193            HashAlgorithm::Sha256 => self.hash_sha256(&normalized, &recipe.salt),
194            HashAlgorithm::Blake3 => self.hash_blake3(&normalized, &recipe.salt),
195        };
196
197        // Cache result
198        if self.config.use_cache {
199            self.cache.insert(cache_key, id.clone());
200        }
201
202        Ok(id)
203    }
204
205    fn normalize_materials<T: Serialize>(
206        &self,
207        materials: &T,
208        recipe: &HashRecipe,
209    ) -> Result<String, super::error::BuildError> {
210        let json = serde_json::to_value(materials)?;
211        let mut parts = Vec::new();
212
213        for field in &recipe.fields {
214            if let Some(value) = json.get(field) {
215                let normalized = self.normalize_value(value, &recipe.normalize)?;
216                parts.push(normalized);
217            }
218        }
219
220        Ok(parts.join("|"))
221    }
222
223    fn normalize_value(
224        &self,
225        value: &serde_json::Value,
226        options: &NormalizeOptions,
227    ) -> Result<String, super::error::BuildError> {
228        let text = match value {
229            serde_json::Value::String(s) => s.clone(),
230            serde_json::Value::Array(arr) => {
231                let strings: Vec<String> = arr
232                    .iter()
233                    .map(|v| self.normalize_value(v, options))
234                    .collect::<Result<Vec<_>, _>>()?;
235                strings.join(",")
236            }
237            _ => serde_json::to_string(value)?,
238        };
239
240        // Apply normalization
241        let mut normalized = text;
242
243        // Unicode normalization
244        normalized = match options.unicode {
245            UnicodeForm::NFC => normalized.nfc().collect(),
246            UnicodeForm::NFD => normalized.nfd().collect(),
247            UnicodeForm::NFKC => normalized.nfkc().collect(),
248            UnicodeForm::NFKD => normalized.nfkd().collect(),
249        };
250
251        // Trim
252        if options.trim {
253            normalized = normalized.trim().to_string();
254        }
255
256        // Case normalization
257        normalized = match options.case {
258            CaseNormalization::AsIs => normalized,
259            CaseNormalization::Lower => normalized.to_lowercase(),
260            CaseNormalization::Upper => normalized.to_uppercase(),
261        };
262
263        Ok(normalized)
264    }
265
266    fn hash_sha256(&self, input: &str, salt: &str) -> String {
267        let mut hasher = Sha256::new();
268        hasher.update(salt.as_bytes());
269        hasher.update(input.as_bytes());
270        if let Some(global_salt) = &self.config.salt {
271            hasher.update(global_salt.as_bytes());
272        }
273        let result = hasher.finalize();
274        format!("SHA256:{:x}", result)
275    }
276
277    fn hash_blake3(&self, input: &str, salt: &str) -> String {
278        let mut hasher = blake3::Hasher::new();
279        hasher.update(salt.as_bytes());
280        hasher.update(input.as_bytes());
281        if let Some(global_salt) = &self.config.salt {
282            hasher.update(global_salt.as_bytes());
283        }
284        let hash = hasher.finalize();
285        format!("B3:{}", hash.to_hex())
286    }
287
288    fn load_recipes() -> IndexMap<String, HashRecipe> {
289        let mut recipes = IndexMap::new();
290
291        // Release v1 recipe
292        recipes.insert(
293            "Release.v1".to_string(),
294            HashRecipe {
295                fields: vec![
296                    "upc".to_string(),
297                    "release_type".to_string(),
298                    "track_isrcs".to_string(),
299                    "territory_set".to_string(),
300                ],
301                normalize: NormalizeOptions {
302                    unicode: UnicodeForm::NFC,
303                    trim: true,
304                    case: CaseNormalization::AsIs,
305                },
306                salt: "REL@1".to_string(),
307            },
308        );
309
310        // Resource v1 recipe
311        recipes.insert(
312            "Resource.v1".to_string(),
313            HashRecipe {
314                fields: vec![
315                    "isrc".to_string(),
316                    "duration".to_string(),
317                    "file_hash".to_string(),
318                ],
319                normalize: NormalizeOptions {
320                    unicode: UnicodeForm::NFC,
321                    trim: true,
322                    case: CaseNormalization::AsIs,
323                },
324                salt: "RES@1".to_string(),
325            },
326        );
327
328        // Party v1 recipe
329        recipes.insert(
330            "Party.v1".to_string(),
331            HashRecipe {
332                fields: vec![
333                    "name".to_string(),
334                    "role".to_string(),
335                    "identifiers".to_string(),
336                ],
337                normalize: NormalizeOptions {
338                    unicode: UnicodeForm::NFC,
339                    trim: true,
340                    case: CaseNormalization::Lower,
341                },
342                salt: "PTY@1".to_string(),
343            },
344        );
345
346        recipes
347    }
348}
349
350// Hash material structures
351#[derive(Debug, Serialize)]
352struct ReleaseHashMaterials {
353    upc: String,
354    release_type: String,
355    track_isrcs: Vec<String>,
356    territory_set: Vec<String>,
357}
358
359#[derive(Debug, Serialize)]
360struct ResourceHashMaterials {
361    isrc: String,
362    duration: u32,
363    file_hash: Option<String>,
364}
365
366#[derive(Debug, Serialize)]
367struct PartyHashMaterials {
368    name: String,
369    role: String,
370    identifiers: Vec<String>,
371}