ddex_builder/
namespace_minimizer.rs

1//! # Namespace Minimizer for DDEX Builder
2//! 
3//! This module provides functionality to minimize namespace declarations in generated XML,
4//! hoisting declarations to the root when possible and applying locked prefixes.
5
6use ddex_core::namespace::{NamespaceRegistry, NamespaceScope, ConflictResolution};
7use ddex_core::models::versions::ERNVersion;
8use crate::canonical::rules::CanonicalNamespaceManager;
9use crate::ast::{AST, Element, Node};
10use indexmap::{IndexMap, IndexSet};
11use std::collections::HashMap;
12use tracing::{debug, warn};
13
14/// Namespace minimization result
15#[derive(Debug, Clone)]
16pub struct MinimizationResult {
17    /// Minimal namespace declarations for root element
18    pub root_namespaces: IndexMap<String, String>,
19    /// Updated AST with minimized namespace declarations
20    pub optimized_ast: AST,
21    /// Mapping of old prefixes to new prefixes
22    pub prefix_mapping: IndexMap<String, String>,
23    /// Warnings about namespace changes
24    pub warnings: Vec<String>,
25}
26
27/// Namespace usage analysis
28#[derive(Debug, Clone)]
29pub struct NamespaceUsage {
30    /// All namespaces used in the document
31    pub used_namespaces: IndexSet<String>,
32    /// Elements that use each namespace
33    pub namespace_elements: HashMap<String, IndexSet<String>>,
34    /// Attribute namespaces
35    pub attribute_namespaces: IndexSet<String>,
36}
37
38/// Comprehensive namespace minimizer
39pub struct NamespaceMinimizer {
40    /// Namespace registry for known namespaces
41    registry: NamespaceRegistry,
42    /// Canonical namespace manager
43    canonical_manager: CanonicalNamespaceManager,
44    /// ERN version for version-specific rules
45    version: ERNVersion,
46    /// Conflict resolution strategy
47    conflict_resolution: ConflictResolution,
48}
49
50impl NamespaceMinimizer {
51    /// Create new namespace minimizer
52    pub fn new(version: ERNVersion) -> Self {
53        Self {
54            registry: NamespaceRegistry::new(),
55            canonical_manager: CanonicalNamespaceManager::new(),
56            version,
57            conflict_resolution: ConflictResolution::GenerateUnique,
58        }
59    }
60
61    /// Create namespace minimizer with specific conflict resolution
62    pub fn with_conflict_resolution(mut self, strategy: ConflictResolution) -> Self {
63        self.conflict_resolution = strategy;
64        self
65    }
66
67    /// Minimize namespace declarations in AST
68    pub fn minimize(&self, ast: AST) -> Result<MinimizationResult, String> {
69        debug!("Starting namespace minimization for ERN {:?}", self.version);
70        
71        // Step 1: Analyze namespace usage throughout the document
72        let usage = self.analyze_namespace_usage(&ast)?;
73        debug!("Found {} used namespaces", usage.used_namespaces.len());
74        
75        // Step 2: Create optimal namespace declarations for root
76        let root_namespaces = self.create_minimal_root_declarations(&usage)?;
77        
78        // Step 3: Apply canonical namespace transformations
79        let canonical_namespaces = self.apply_canonical_rules(&root_namespaces)?;
80        
81        // Step 4: Update AST with optimized namespace declarations
82        let (optimized_ast, prefix_mapping) = self.apply_namespace_minimization(ast, &canonical_namespaces)?;
83        
84        // Step 5: Validate the result
85        let warnings = self.validate_minimization(&optimized_ast, &canonical_namespaces);
86        
87        Ok(MinimizationResult {
88            root_namespaces: canonical_namespaces,
89            optimized_ast,
90            prefix_mapping,
91            warnings,
92        })
93    }
94
95    /// Analyze namespace usage throughout the document
96    fn analyze_namespace_usage(&self, ast: &AST) -> Result<NamespaceUsage, String> {
97        let mut used_namespaces = IndexSet::new();
98        let mut namespace_elements = HashMap::new();
99        let mut attribute_namespaces = IndexSet::new();
100
101        // Add namespaces already declared in AST
102        for (prefix, uri) in &ast.namespaces {
103            used_namespaces.insert(uri.clone());
104            namespace_elements.entry(uri.clone()).or_insert_with(IndexSet::new);
105        }
106
107        // Analyze namespace usage in element tree
108        self.analyze_element_usage(&ast.root, &mut used_namespaces, &mut namespace_elements, &mut attribute_namespaces);
109
110        // Add required namespaces for the ERN version
111        let required_namespaces = self.registry.get_version_namespaces(&self.version);
112        for ns_uri in required_namespaces {
113            used_namespaces.insert(ns_uri);
114        }
115
116        Ok(NamespaceUsage {
117            used_namespaces,
118            namespace_elements,
119            attribute_namespaces,
120        })
121    }
122
123    /// Recursively analyze namespace usage in elements
124    fn analyze_element_usage(
125        &self,
126        element: &Element,
127        used_namespaces: &mut IndexSet<String>,
128        namespace_elements: &mut HashMap<String, IndexSet<String>>,
129        attribute_namespaces: &mut IndexSet<String>,
130    ) {
131        // Check element namespace
132        if let Some(ref ns) = element.namespace {
133            used_namespaces.insert(ns.clone());
134            namespace_elements
135                .entry(ns.clone())
136                .or_insert_with(IndexSet::new)
137                .insert(element.name.clone());
138        }
139
140        // Check attribute namespaces
141        for (attr_name, _) in &element.attributes {
142            if attr_name.contains(':') && !attr_name.starts_with("xmlns") {
143                // Extract namespace prefix from qualified attribute name
144                if let Some(prefix) = attr_name.split(':').next() {
145                    // This would need the namespace URI, but for now just track the usage
146                    debug!("Found namespaced attribute: {} with prefix: {}", attr_name, prefix);
147                }
148            }
149        }
150
151        // Recursively analyze children
152        for child in &element.children {
153            if let Node::Element(child_element) = child {
154                self.analyze_element_usage(child_element, used_namespaces, namespace_elements, attribute_namespaces);
155            }
156        }
157    }
158
159    /// Create minimal namespace declarations for root element
160    fn create_minimal_root_declarations(&self, usage: &NamespaceUsage) -> Result<IndexMap<String, String>, String> {
161        let mut declarations = IndexMap::new();
162        
163        // Create declarations for all used namespaces
164        for uri in &usage.used_namespaces {
165            if let Some(preferred_prefix) = self.registry.get_preferred_prefix(uri) {
166                declarations.insert(preferred_prefix.to_string(), uri.clone());
167            } else {
168                // For unknown namespaces, generate a prefix
169                let generated_prefix = self.generate_prefix_for_uri(uri);
170                declarations.insert(generated_prefix, uri.clone());
171            }
172        }
173
174        Ok(declarations)
175    }
176
177    /// Apply canonical namespace rules
178    fn apply_canonical_rules(&self, declarations: &IndexMap<String, String>) -> Result<IndexMap<String, String>, String> {
179        let version_str = match self.version {
180            ERNVersion::V3_8_2 => "3.8.2",
181            ERNVersion::V4_2 => "4.2",
182            ERNVersion::V4_3 => "4.3",
183        };
184
185        Ok(self.canonical_manager.canonicalize_namespaces(declarations, version_str))
186    }
187
188    /// Apply namespace minimization to AST
189    fn apply_namespace_minimization(
190        &self,
191        mut ast: AST,
192        canonical_namespaces: &IndexMap<String, String>,
193    ) -> Result<(AST, IndexMap<String, String>), String> {
194        // Update AST namespaces with canonical declarations
195        ast.namespaces = canonical_namespaces.clone();
196
197        // Create prefix mapping for any changes
198        let mut prefix_mapping = IndexMap::new();
199        
200        // For now, assume no prefix changes (would be more complex in full implementation)
201        for (prefix, _) in canonical_namespaces {
202            prefix_mapping.insert(prefix.clone(), prefix.clone());
203        }
204
205        // Update element prefixes if needed (recursive through tree)
206        self.update_element_prefixes(&mut ast.root, &prefix_mapping);
207
208        Ok((ast, prefix_mapping))
209    }
210
211    /// Update element prefixes based on mapping
212    fn update_element_prefixes(&self, element: &mut Element, _prefix_mapping: &IndexMap<String, String>) {
213        // This would update element names and attributes based on prefix changes
214        // For now, keep existing prefixes
215        
216        // Recursively update children
217        for child in &mut element.children {
218            if let Node::Element(child_element) = child {
219                self.update_element_prefixes(child_element, _prefix_mapping);
220            }
221        }
222    }
223
224    /// Generate a prefix for an unknown URI
225    fn generate_prefix_for_uri(&self, uri: &str) -> String {
226        // Simple heuristic: use domain name or create generic prefix
227        if let Some(domain_start) = uri.find("://") {
228            if let Some(domain_part) = uri[domain_start + 3..].split('/').next() {
229                let domain_clean = domain_part.replace('.', "").replace('-', "");
230                if !domain_clean.is_empty() && domain_clean.len() <= 8 {
231                    return format!("ns{}", domain_clean.chars().take(3).collect::<String>());
232                }
233            }
234        }
235        
236        // Fallback: generate based on hash
237        use std::collections::hash_map::DefaultHasher;
238        use std::hash::{Hash, Hasher};
239        let mut hasher = DefaultHasher::new();
240        uri.hash(&mut hasher);
241        let hash = hasher.finish();
242        format!("ns{}", hash % 1000)
243    }
244
245    /// Validate the minimization result
246    fn validate_minimization(&self, ast: &AST, _namespaces: &IndexMap<String, String>) -> Vec<String> {
247        let mut warnings = Vec::new();
248
249        // Check for unused namespace declarations
250        let declared_uris: IndexSet<_> = ast.namespaces.values().cloned().collect();
251        let usage = match self.analyze_namespace_usage(ast) {
252            Ok(usage) => usage,
253            Err(e) => {
254                warnings.push(format!("Failed to re-analyze namespace usage: {}", e));
255                return warnings;
256            }
257        };
258
259        for uri in &declared_uris {
260            if !usage.used_namespaces.contains(uri) {
261                warnings.push(format!("Declared but unused namespace: {}", uri));
262            }
263        }
264
265        // Check for missing required namespaces
266        let required_namespaces = self.registry.get_version_namespaces(&self.version);
267        for required_uri in required_namespaces {
268            if !declared_uris.contains(&required_uri) {
269                warnings.push(format!("Missing required namespace: {}", required_uri));
270            }
271        }
272
273        warnings
274    }
275
276    /// Hoist namespace declarations to root element when beneficial
277    pub fn hoist_namespaces(&self, mut ast: AST) -> Result<AST, String> {
278        // This would analyze which namespace declarations can be moved to root
279        // For now, assume all namespaces are already at root level
280        debug!("Hoisting namespaces to root level");
281        
282        // Remove duplicate namespace declarations from child elements
283        self.remove_duplicate_declarations(&mut ast.root, &ast.namespaces);
284        
285        Ok(ast)
286    }
287
288    /// Remove duplicate namespace declarations from child elements
289    fn remove_duplicate_declarations(&self, element: &mut Element, root_namespaces: &IndexMap<String, String>) {
290        // Remove xmlns attributes that duplicate root declarations
291        let xmlns_keys: Vec<String> = element.attributes.keys()
292            .filter(|k| k.starts_with("xmlns"))
293            .cloned()
294            .collect();
295            
296        for xmlns_key in xmlns_keys {
297            if let Some(uri) = element.attributes.get(&xmlns_key) {
298                let prefix = if xmlns_key == "xmlns" {
299                    ""
300                } else {
301                    xmlns_key.strip_prefix("xmlns:").unwrap_or("")
302                };
303                
304                // If this namespace is already declared at root with same prefix, remove it
305                if root_namespaces.get(prefix).map(|root_uri| root_uri == uri).unwrap_or(false) {
306                    element.attributes.remove(&xmlns_key);
307                    debug!("Removed duplicate namespace declaration: {} from element {}", xmlns_key, element.name);
308                }
309            }
310        }
311
312        // Recursively process children
313        for child in &mut element.children {
314            if let Node::Element(child_element) = child {
315                self.remove_duplicate_declarations(child_element, root_namespaces);
316            }
317        }
318    }
319}
320
321/// Namespace optimization strategies
322#[derive(Debug, Clone, Copy)]
323pub enum OptimizationStrategy {
324    /// Minimal declarations (default)
325    Minimal,
326    /// Hoist all possible declarations to root
327    HoistAll,
328    /// Conservative approach, keep existing structure
329    Conservative,
330}
331
332/// Advanced namespace minimizer with optimization strategies
333pub struct AdvancedNamespaceMinimizer {
334    base_minimizer: NamespaceMinimizer,
335    strategy: OptimizationStrategy,
336}
337
338impl AdvancedNamespaceMinimizer {
339    pub fn new(version: ERNVersion, strategy: OptimizationStrategy) -> Self {
340        Self {
341            base_minimizer: NamespaceMinimizer::new(version),
342            strategy,
343        }
344    }
345
346    pub fn minimize(&self, ast: AST) -> Result<MinimizationResult, String> {
347        match self.strategy {
348            OptimizationStrategy::Minimal => self.base_minimizer.minimize(ast),
349            OptimizationStrategy::HoistAll => {
350                let minimized = self.base_minimizer.minimize(ast)?;
351                let hoisted_ast = self.base_minimizer.hoist_namespaces(minimized.optimized_ast)?;
352                Ok(MinimizationResult {
353                    optimized_ast: hoisted_ast,
354                    ..minimized
355                })
356            },
357            OptimizationStrategy::Conservative => {
358                // Conservative approach: minimal changes
359                let mut result = self.base_minimizer.minimize(ast)?;
360                result.warnings.push("Conservative mode: minimal namespace optimization applied".to_string());
361                Ok(result)
362            },
363        }
364    }
365}
366
367#[cfg(test)]
368mod tests {
369    use super::*;
370    use crate::ast::{Element, Node};
371
372    #[test]
373    fn test_namespace_minimizer_creation() {
374        let minimizer = NamespaceMinimizer::new(ERNVersion::V4_3);
375        assert!(matches!(minimizer.version, ERNVersion::V4_3));
376    }
377
378    #[test]
379    fn test_namespace_usage_analysis() {
380        let minimizer = NamespaceMinimizer::new(ERNVersion::V4_3);
381        
382        // Create test AST
383        let mut root = Element::new("NewReleaseMessage")
384            .with_namespace("http://ddex.net/xml/ern/43");
385        root.add_child(Element::new("MessageHeader")
386            .with_namespace("http://ddex.net/xml/ern/43"));
387        
388        let mut ast = AST {
389            root,
390            namespaces: {
391                let mut ns = IndexMap::new();
392                ns.insert("ern".to_string(), "http://ddex.net/xml/ern/43".to_string());
393                ns
394            },
395            schema_location: None,
396        };
397        
398        let usage = minimizer.analyze_namespace_usage(&ast).unwrap();
399        assert!(usage.used_namespaces.contains("http://ddex.net/xml/ern/43"));
400    }
401
402    #[test]
403    fn test_prefix_generation() {
404        let minimizer = NamespaceMinimizer::new(ERNVersion::V4_3);
405        
406        let prefix = minimizer.generate_prefix_for_uri("http://example.com/custom");
407        assert!(prefix.starts_with("ns"));
408        assert!(prefix.len() <= 10); // Reasonable length
409    }
410
411    #[test]
412    fn test_minimal_declarations() {
413        let minimizer = NamespaceMinimizer::new(ERNVersion::V4_3);
414        
415        let mut usage = NamespaceUsage {
416            used_namespaces: IndexSet::new(),
417            namespace_elements: HashMap::new(),
418            attribute_namespaces: IndexSet::new(),
419        };
420        
421        usage.used_namespaces.insert("http://ddex.net/xml/ern/43".to_string());
422        usage.used_namespaces.insert("http://ddex.net/xml/avs".to_string());
423        
424        let declarations = minimizer.create_minimal_root_declarations(&usage).unwrap();
425        assert!(declarations.contains_key("ern"));
426        assert!(declarations.contains_key("avs"));
427    }
428}