ddex_builder/
namespace_minimizer.rs

1//! # Namespace Minimizer for DDEX Builder
2//!
3//! This module provides functionality to minimize namespace declarations in generated XML,
4//! hoisting declarations to the root when possible and applying locked prefixes.
5
6use crate::ast::{Element, Node, AST};
7use crate::canonical::rules::CanonicalNamespaceManager;
8use ddex_core::models::versions::ERNVersion;
9use ddex_core::namespace::{ConflictResolution, NamespaceRegistry};
10use indexmap::{IndexMap, IndexSet};
11use std::collections::HashMap;
12use tracing::debug;
13
14/// Namespace minimization result
15#[derive(Debug, Clone)]
16pub struct MinimizationResult {
17    /// Minimal namespace declarations for root element
18    pub root_namespaces: IndexMap<String, String>,
19    /// Updated AST with minimized namespace declarations
20    pub optimized_ast: AST,
21    /// Mapping of old prefixes to new prefixes
22    pub prefix_mapping: IndexMap<String, String>,
23    /// Warnings about namespace changes
24    pub warnings: Vec<String>,
25}
26
27/// Namespace usage analysis
28#[derive(Debug, Clone)]
29pub struct NamespaceUsage {
30    /// All namespaces used in the document
31    pub used_namespaces: IndexSet<String>,
32    /// Elements that use each namespace
33    pub namespace_elements: HashMap<String, IndexSet<String>>,
34    /// Attribute namespaces
35    pub attribute_namespaces: IndexSet<String>,
36}
37
38/// Comprehensive namespace minimizer
39pub struct NamespaceMinimizer {
40    /// Namespace registry for known namespaces
41    registry: NamespaceRegistry,
42    /// Canonical namespace manager
43    canonical_manager: CanonicalNamespaceManager,
44    /// ERN version for version-specific rules
45    version: ERNVersion,
46    /// Conflict resolution strategy
47    conflict_resolution: ConflictResolution,
48}
49
50impl NamespaceMinimizer {
51    /// Create new namespace minimizer
52    pub fn new(version: ERNVersion) -> Self {
53        Self {
54            registry: NamespaceRegistry::new(),
55            canonical_manager: CanonicalNamespaceManager::new(),
56            version,
57            conflict_resolution: ConflictResolution::GenerateUnique,
58        }
59    }
60
61    /// Create namespace minimizer with specific conflict resolution
62    pub fn with_conflict_resolution(mut self, strategy: ConflictResolution) -> Self {
63        self.conflict_resolution = strategy;
64        self
65    }
66
67    /// Minimize namespace declarations in AST
68    pub fn minimize(&self, ast: AST) -> Result<MinimizationResult, String> {
69        debug!("Starting namespace minimization for ERN {:?}", self.version);
70
71        // Step 1: Analyze namespace usage throughout the document
72        let usage = self.analyze_namespace_usage(&ast)?;
73        debug!("Found {} used namespaces", usage.used_namespaces.len());
74
75        // Step 2: Create optimal namespace declarations for root
76        let root_namespaces = self.create_minimal_root_declarations(&usage)?;
77
78        // Step 3: Apply canonical namespace transformations
79        let canonical_namespaces = self.apply_canonical_rules(&root_namespaces)?;
80
81        // Step 4: Update AST with optimized namespace declarations
82        let (optimized_ast, prefix_mapping) =
83            self.apply_namespace_minimization(ast, &canonical_namespaces)?;
84
85        // Step 5: Validate the result
86        let warnings = self.validate_minimization(&optimized_ast, &canonical_namespaces);
87
88        Ok(MinimizationResult {
89            root_namespaces: canonical_namespaces,
90            optimized_ast,
91            prefix_mapping,
92            warnings,
93        })
94    }
95
96    /// Analyze namespace usage throughout the document
97    fn analyze_namespace_usage(&self, ast: &AST) -> Result<NamespaceUsage, String> {
98        let mut used_namespaces = IndexSet::new();
99        let mut namespace_elements = HashMap::new();
100        let mut attribute_namespaces = IndexSet::new();
101
102        // Add namespaces already declared in AST
103        for (_prefix, uri) in &ast.namespaces {
104            used_namespaces.insert(uri.clone());
105            namespace_elements
106                .entry(uri.clone())
107                .or_insert_with(IndexSet::new);
108        }
109
110        // Analyze namespace usage in element tree
111        self.analyze_element_usage(
112            &ast.root,
113            &mut used_namespaces,
114            &mut namespace_elements,
115            &mut attribute_namespaces,
116        );
117
118        // Add required namespaces for the ERN version
119        let required_namespaces = self.registry.get_version_namespaces(&self.version);
120        for ns_uri in required_namespaces {
121            used_namespaces.insert(ns_uri);
122        }
123
124        Ok(NamespaceUsage {
125            used_namespaces,
126            namespace_elements,
127            attribute_namespaces,
128        })
129    }
130
131    /// Recursively analyze namespace usage in elements
132    fn analyze_element_usage(
133        &self,
134        element: &Element,
135        used_namespaces: &mut IndexSet<String>,
136        namespace_elements: &mut HashMap<String, IndexSet<String>>,
137        attribute_namespaces: &mut IndexSet<String>,
138    ) {
139        // Check element namespace
140        if let Some(ref ns) = element.namespace {
141            used_namespaces.insert(ns.clone());
142            namespace_elements
143                .entry(ns.clone())
144                .or_insert_with(IndexSet::new)
145                .insert(element.name.clone());
146        }
147
148        // Check attribute namespaces
149        for (attr_name, _) in &element.attributes {
150            if attr_name.contains(':') && !attr_name.starts_with("xmlns") {
151                // Extract namespace prefix from qualified attribute name
152                if let Some(prefix) = attr_name.split(':').next() {
153                    // This would need the namespace URI, but for now just track the usage
154                    debug!(
155                        "Found namespaced attribute: {} with prefix: {}",
156                        attr_name, prefix
157                    );
158                }
159            }
160        }
161
162        // Recursively analyze children
163        for child in &element.children {
164            if let Node::Element(child_element) = child {
165                self.analyze_element_usage(
166                    child_element,
167                    used_namespaces,
168                    namespace_elements,
169                    attribute_namespaces,
170                );
171            }
172        }
173    }
174
175    /// Create minimal namespace declarations for root element
176    fn create_minimal_root_declarations(
177        &self,
178        usage: &NamespaceUsage,
179    ) -> Result<IndexMap<String, String>, String> {
180        let mut declarations = IndexMap::new();
181
182        // Create declarations for all used namespaces
183        for uri in &usage.used_namespaces {
184            if let Some(preferred_prefix) = self.registry.get_preferred_prefix(uri) {
185                declarations.insert(preferred_prefix.to_string(), uri.clone());
186            } else {
187                // For unknown namespaces, generate a prefix
188                let generated_prefix = self.generate_prefix_for_uri(uri);
189                declarations.insert(generated_prefix, uri.clone());
190            }
191        }
192
193        Ok(declarations)
194    }
195
196    /// Apply canonical namespace rules
197    fn apply_canonical_rules(
198        &self,
199        declarations: &IndexMap<String, String>,
200    ) -> Result<IndexMap<String, String>, String> {
201        let version_str = match self.version {
202            ERNVersion::V3_8_2 => "3.8.2",
203            ERNVersion::V4_2 => "4.2",
204            ERNVersion::V4_3 => "4.3",
205        };
206
207        Ok(self
208            .canonical_manager
209            .canonicalize_namespaces(declarations, version_str))
210    }
211
212    /// Apply namespace minimization to AST
213    fn apply_namespace_minimization(
214        &self,
215        mut ast: AST,
216        canonical_namespaces: &IndexMap<String, String>,
217    ) -> Result<(AST, IndexMap<String, String>), String> {
218        // Update AST namespaces with canonical declarations
219        ast.namespaces = canonical_namespaces.clone();
220
221        // Create prefix mapping for any changes
222        let mut prefix_mapping = IndexMap::new();
223
224        // For now, assume no prefix changes (would be more complex in full implementation)
225        for (prefix, _) in canonical_namespaces {
226            prefix_mapping.insert(prefix.clone(), prefix.clone());
227        }
228
229        // Update element prefixes if needed (recursive through tree)
230        self.update_element_prefixes(&mut ast.root, &prefix_mapping);
231
232        Ok((ast, prefix_mapping))
233    }
234
235    /// Update element prefixes based on mapping
236    fn update_element_prefixes(
237        &self,
238        element: &mut Element,
239        _prefix_mapping: &IndexMap<String, String>,
240    ) {
241        // This would update element names and attributes based on prefix changes
242        // For now, keep existing prefixes
243
244        // Recursively update children
245        for child in &mut element.children {
246            if let Node::Element(child_element) = child {
247                self.update_element_prefixes(child_element, _prefix_mapping);
248            }
249        }
250    }
251
252    /// Generate a prefix for an unknown URI
253    fn generate_prefix_for_uri(&self, uri: &str) -> String {
254        // Simple heuristic: use domain name or create generic prefix
255        if let Some(domain_start) = uri.find("://") {
256            if let Some(domain_part) = uri[domain_start + 3..].split('/').next() {
257                let domain_clean = domain_part.replace('.', "").replace('-', "");
258                if !domain_clean.is_empty() && domain_clean.len() <= 8 {
259                    return format!("ns{}", domain_clean.chars().take(3).collect::<String>());
260                }
261            }
262        }
263
264        // Fallback: generate based on hash
265        use std::collections::hash_map::DefaultHasher;
266        use std::hash::{Hash, Hasher};
267        let mut hasher = DefaultHasher::new();
268        uri.hash(&mut hasher);
269        let hash = hasher.finish();
270        format!("ns{}", hash % 1000)
271    }
272
273    /// Validate the minimization result
274    fn validate_minimization(
275        &self,
276        ast: &AST,
277        _namespaces: &IndexMap<String, String>,
278    ) -> Vec<String> {
279        let mut warnings = Vec::new();
280
281        // Check for unused namespace declarations
282        let declared_uris: IndexSet<_> = ast.namespaces.values().cloned().collect();
283        let usage = match self.analyze_namespace_usage(ast) {
284            Ok(usage) => usage,
285            Err(e) => {
286                warnings.push(format!("Failed to re-analyze namespace usage: {}", e));
287                return warnings;
288            }
289        };
290
291        for uri in &declared_uris {
292            if !usage.used_namespaces.contains(uri) {
293                warnings.push(format!("Declared but unused namespace: {}", uri));
294            }
295        }
296
297        // Check for missing required namespaces
298        let required_namespaces = self.registry.get_version_namespaces(&self.version);
299        for required_uri in required_namespaces {
300            if !declared_uris.contains(&required_uri) {
301                warnings.push(format!("Missing required namespace: {}", required_uri));
302            }
303        }
304
305        warnings
306    }
307
308    /// Hoist namespace declarations to root element when beneficial
309    pub fn hoist_namespaces(&self, mut ast: AST) -> Result<AST, String> {
310        // This would analyze which namespace declarations can be moved to root
311        // For now, assume all namespaces are already at root level
312        debug!("Hoisting namespaces to root level");
313
314        // Remove duplicate namespace declarations from child elements
315        self.remove_duplicate_declarations(&mut ast.root, &ast.namespaces);
316
317        Ok(ast)
318    }
319
320    /// Remove duplicate namespace declarations from child elements
321    fn remove_duplicate_declarations(
322        &self,
323        element: &mut Element,
324        root_namespaces: &IndexMap<String, String>,
325    ) {
326        // Remove xmlns attributes that duplicate root declarations
327        let xmlns_keys: Vec<String> = element
328            .attributes
329            .keys()
330            .filter(|k| k.starts_with("xmlns"))
331            .cloned()
332            .collect();
333
334        for xmlns_key in xmlns_keys {
335            if let Some(uri) = element.attributes.get(&xmlns_key) {
336                let prefix = if xmlns_key == "xmlns" {
337                    ""
338                } else {
339                    xmlns_key.strip_prefix("xmlns:").unwrap_or("")
340                };
341
342                // If this namespace is already declared at root with same prefix, remove it
343                if root_namespaces
344                    .get(prefix)
345                    .map(|root_uri| root_uri == uri)
346                    .unwrap_or(false)
347                {
348                    element.attributes.shift_remove(&xmlns_key);
349                    debug!(
350                        "Removed duplicate namespace declaration: {} from element {}",
351                        xmlns_key, element.name
352                    );
353                }
354            }
355        }
356
357        // Recursively process children
358        for child in &mut element.children {
359            if let Node::Element(child_element) = child {
360                self.remove_duplicate_declarations(child_element, root_namespaces);
361            }
362        }
363    }
364}
365
366/// Namespace optimization strategies
367#[derive(Debug, Clone, Copy)]
368pub enum OptimizationStrategy {
369    /// Minimal declarations (default)
370    Minimal,
371    /// Hoist all possible declarations to root
372    HoistAll,
373    /// Conservative approach, keep existing structure
374    Conservative,
375}
376
377/// Advanced namespace minimizer with optimization strategies
378pub struct AdvancedNamespaceMinimizer {
379    base_minimizer: NamespaceMinimizer,
380    strategy: OptimizationStrategy,
381}
382
383impl AdvancedNamespaceMinimizer {
384    /// Create a new namespace minimizer with specified version and strategy
385    pub fn new(version: ERNVersion, strategy: OptimizationStrategy) -> Self {
386        Self {
387            base_minimizer: NamespaceMinimizer::new(version),
388            strategy,
389        }
390    }
391
392    /// Minimize namespaces in the AST according to the optimization strategy
393    pub fn minimize(&self, ast: AST) -> Result<MinimizationResult, String> {
394        match self.strategy {
395            OptimizationStrategy::Minimal => self.base_minimizer.minimize(ast),
396            OptimizationStrategy::HoistAll => {
397                let minimized = self.base_minimizer.minimize(ast)?;
398                let hoisted_ast = self
399                    .base_minimizer
400                    .hoist_namespaces(minimized.optimized_ast)?;
401                Ok(MinimizationResult {
402                    optimized_ast: hoisted_ast,
403                    ..minimized
404                })
405            }
406            OptimizationStrategy::Conservative => {
407                // Conservative approach: minimal changes
408                let mut result = self.base_minimizer.minimize(ast)?;
409                result
410                    .warnings
411                    .push("Conservative mode: minimal namespace optimization applied".to_string());
412                Ok(result)
413            }
414        }
415    }
416}
417
418#[cfg(test)]
419mod tests {
420    use super::*;
421    use crate::ast::Element;
422
423    #[test]
424    fn test_namespace_minimizer_creation() {
425        let minimizer = NamespaceMinimizer::new(ERNVersion::V4_3);
426        assert!(matches!(minimizer.version, ERNVersion::V4_3));
427    }
428
429    #[test]
430    fn test_namespace_usage_analysis() {
431        let minimizer = NamespaceMinimizer::new(ERNVersion::V4_3);
432
433        // Create test AST
434        let mut root =
435            Element::new("NewReleaseMessage").with_namespace("http://ddex.net/xml/ern/43");
436        root.add_child(Element::new("MessageHeader").with_namespace("http://ddex.net/xml/ern/43"));
437
438        let ast = AST {
439            root,
440            namespaces: {
441                let mut ns = IndexMap::new();
442                ns.insert("ern".to_string(), "http://ddex.net/xml/ern/43".to_string());
443                ns
444            },
445            schema_location: None,
446        };
447
448        let usage = minimizer.analyze_namespace_usage(&ast).unwrap();
449        assert!(usage.used_namespaces.contains("http://ddex.net/xml/ern/43"));
450    }
451
452    #[test]
453    fn test_prefix_generation() {
454        let minimizer = NamespaceMinimizer::new(ERNVersion::V4_3);
455
456        let prefix = minimizer.generate_prefix_for_uri("http://example.com/custom");
457        assert!(prefix.starts_with("ns"));
458        assert!(prefix.len() <= 10); // Reasonable length
459    }
460
461    #[test]
462    fn test_minimal_declarations() {
463        let minimizer = NamespaceMinimizer::new(ERNVersion::V4_3);
464
465        let mut usage = NamespaceUsage {
466            used_namespaces: IndexSet::new(),
467            namespace_elements: HashMap::new(),
468            attribute_namespaces: IndexSet::new(),
469        };
470
471        usage
472            .used_namespaces
473            .insert("http://ddex.net/xml/ern/43".to_string());
474        usage
475            .used_namespaces
476            .insert("http://ddex.net/xml/avs".to_string());
477
478        let declarations = minimizer.create_minimal_root_declarations(&usage).unwrap();
479        assert!(declarations.contains_key("ern"));
480        assert!(declarations.contains_key("avs"));
481    }
482}