Skip to main content

sqry_core/
normalizer.rs

1//! Metadata normalization for symbol attributes
2//!
3//! This module provides normalization of legacy metadata keys to canonical forms,
4//! enabling backward compatibility while transitioning to standardized metadata keys.
5//!
6//! # Design Principles
7//!
8//! 1. **User Convenience**: Short forms (`async`) map to canonical (`is_async`) for easier queries
9//! 2. **No Data Loss**: Unknown keys are preserved without modification
10//! 3. **Last-Wins Semantics**: If both short and canonical keys exist, canonical wins
11//! 4. **Transparent**: Plugins use canonical keys (is_async); normalizer handles user input
12//!
13//! # Usage
14//!
15//! ```rust
16//! use sqry_core::normalizer::MetadataNormalizer;
17//! use std::collections::HashMap;
18//!
19//! let mut raw_metadata = HashMap::new();
20//! raw_metadata.insert("async".to_string(), "true".to_string());        // Short form
21//! raw_metadata.insert("is_static".to_string(), "true".to_string());    // Canonical form
22//! raw_metadata.insert("custom_key".to_string(), "value".to_string());  // Unknown
23//!
24//! let normalizer = MetadataNormalizer::new();
25//! let normalized = normalizer.normalize(raw_metadata);
26//!
27//! assert_eq!(normalized.get("is_async"), Some(&"true".to_string()));   // Normalized to canonical
28//! assert_eq!(normalized.get("is_static"), Some(&"true".to_string()));  // Preserved
29//! assert_eq!(normalized.get("custom_key"), Some(&"value".to_string()));// Preserved
30//! ```
31//!
32//! # Mapping Strategy
33//!
34//! The normalizer uses a static mapping table. User-friendly short forms map TO the
35//! canonical keys defined in `sqry_core::metadata::keys`:
36//!
37//! - `async` → `is_async` (matches metadata::keys::IS_ASYNC)
38//! - `static` → `is_static` (matches metadata::keys::IS_STATIC)
39//! - `const` → `is_const` (matches metadata::keys::IS_CONST)
40//! - `final` → `is_final` (matches metadata::keys::IS_FINAL)
41//! - `abstract` → `is_abstract` (matches metadata::keys::IS_ABSTRACT)
42//! - And more...
43//!
44//! Keys already in canonical form (`is_async`) or not in the mapping table are
45//! preserved as-is.
46
47use std::collections::HashMap;
48
49/// Metadata normalizer for query convenience
50///
51/// Converts user-friendly short forms (e.g., `async`) to canonical keys (e.g., `is_async`)
52/// used by plugins. Enables easier queries while maintaining consistency with `metadata::keys`.
53#[derive(Debug, Clone)]
54pub struct MetadataNormalizer {
55    /// Legacy key → canonical key mappings
56    legacy_to_canonical: HashMap<&'static str, &'static str>,
57}
58
59impl MetadataNormalizer {
60    /// Create a new metadata normalizer with default mappings
61    ///
62    /// # Examples
63    ///
64    /// ```
65    /// use sqry_core::normalizer::MetadataNormalizer;
66    ///
67    /// let normalizer = MetadataNormalizer::new();
68    /// ```
69    #[must_use]
70    pub fn new() -> Self {
71        Self {
72            legacy_to_canonical: Self::legacy_mappings(),
73        }
74    }
75
76    /// Define legacy → canonical key mappings
77    ///
78    /// This is the single source of truth for all legacy key migrations.
79    ///
80    /// **Important**: Canonical keys MUST match the constants in `sqry_core::metadata::keys`.
81    /// The normalizer maps alternative forms (user queries, old indexes) to the canonical
82    /// keys that plugins actually use.
83    ///
84    /// For example:
85    /// - User query: `async:true` → normalized to `is_async:true` (matches `metadata::keys::IS_ASYNC`)
86    /// - User query: `is_async:true` → preserved as-is (already canonical)
87    fn legacy_mappings() -> HashMap<&'static str, &'static str> {
88        let mut mappings = HashMap::new();
89
90        // Query convenience: Allow users to type shorter forms without "is_" prefix
91        // These map TO the canonical keys defined in metadata::keys module
92
93        // Function modifiers
94        mappings.insert("async", "is_async");
95        mappings.insert("static", "is_static");
96        mappings.insert("abstract", "is_abstract");
97        mappings.insert("final", "is_final");
98        mappings.insert("override", "is_override");
99        mappings.insert("mutating", "is_mutating");
100        mappings.insert("generator", "is_generator");
101        mappings.insert("exported", "is_exported");
102        mappings.insert("throwing", "is_throwing");
103
104        // Class/Type modifiers
105        mappings.insert("struct", "is_struct");
106        mappings.insert("enum", "is_enum");
107        mappings.insert("interface", "is_interface");
108        mappings.insert("protocol", "is_protocol");
109        mappings.insert("actor", "is_actor");
110        mappings.insert("extension", "is_extension");
111        mappings.insert("trait", "is_trait");
112        mappings.insert("sealed", "is_sealed");
113        mappings.insert("generics", "has_generics");
114
115        // Property modifiers
116        mappings.insert("computed", "is_computed");
117        mappings.insert("lazy", "is_lazy");
118        mappings.insert("weak", "is_weak");
119        mappings.insert("mutable", "is_mutable");
120        mappings.insert("const", "is_const");
121
122        // Language-specific
123        mappings.insert("classmethod", "is_classmethod");
124        mappings.insert("staticmethod", "is_staticmethod");
125        mappings.insert("property_decorator", "is_property_decorator");
126        mappings.insert("receiver", "has_receiver");
127        mappings.insert("pointer_receiver", "is_pointer_receiver");
128        mappings.insert("synchronized", "is_synchronized");
129        mappings.insert("constexpr", "is_constexpr");
130        mappings.insert("noexcept", "is_noexcept");
131        mappings.insert("unsafe", "is_unsafe");
132        mappings.insert("const_fn", "is_const_fn");
133        mappings.insert("readonly", "is_readonly");
134        mappings.insert("factory", "is_factory");
135        mappings.insert("external", "is_external");
136
137        mappings
138    }
139
140    /// Normalize metadata by mapping legacy keys to canonical forms
141    ///
142    /// # Behavior
143    ///
144    /// - Legacy keys are converted to canonical keys
145    /// - Unknown keys are preserved as-is (no data loss)
146    /// - If both legacy and canonical keys exist, canonical wins
147    /// - Empty metadata returns empty hashmap
148    ///
149    /// # Examples
150    ///
151    /// ```
152    /// use sqry_core::normalizer::MetadataNormalizer;
153    /// use std::collections::HashMap;
154    ///
155    /// let mut raw = HashMap::new();
156    /// raw.insert("async".to_string(), "true".to_string());       // Short form
157    /// raw.insert("custom".to_string(), "value".to_string());     // Unknown
158    ///
159    /// let normalizer = MetadataNormalizer::new();
160    /// let normalized = normalizer.normalize(raw);
161    ///
162    /// assert_eq!(normalized.get("is_async"), Some(&"true".to_string())); // Canonical
163    /// assert_eq!(normalized.get("custom"), Some(&"value".to_string()));   // Preserved
164    /// assert_eq!(normalized.get("async"), None); // Short form removed
165    /// ```
166    #[must_use]
167    pub fn normalize(&self, raw: HashMap<String, String>) -> HashMap<String, String> {
168        let mut normalized = HashMap::new();
169
170        for (key, value) in raw {
171            // Check if this is a legacy key that needs conversion
172            if let Some(&canonical_key) = self.legacy_to_canonical.get(key.as_str()) {
173                // Use canonical key (this will overwrite if canonical already exists - last wins)
174                log::debug!(
175                    "Normalizing metadata: legacy key '{key}' → canonical '{canonical_key}'"
176                );
177                normalized.insert(canonical_key.to_string(), value);
178            } else {
179                // Not a legacy key - preserve as-is
180                // This includes:
181                // - Already-canonical keys (is_async, is_static, etc.)
182                // - Plugin-specific keys (not in mapping table)
183                // - Future keys (forward compatibility)
184                normalized.insert(key, value);
185            }
186        }
187
188        normalized
189    }
190
191    /// Check if a key is a legacy key that will be normalized
192    ///
193    /// # Examples
194    ///
195    /// ```
196    /// use sqry_core::normalizer::MetadataNormalizer;
197    ///
198    /// let normalizer = MetadataNormalizer::new();
199    /// assert!(normalizer.is_legacy_key("async"));           // Short form
200    /// assert!(!normalizer.is_legacy_key("is_async"));       // Canonical
201    /// assert!(!normalizer.is_legacy_key("custom_key"));     // Unknown
202    /// ```
203    #[must_use]
204    pub fn is_legacy_key(&self, key: &str) -> bool {
205        self.legacy_to_canonical.contains_key(key)
206    }
207
208    /// Get the canonical form of a key (if it's a legacy key)
209    ///
210    /// Returns `None` if the key is not a legacy key.
211    ///
212    /// # Examples
213    ///
214    /// ```
215    /// use sqry_core::normalizer::MetadataNormalizer;
216    ///
217    /// let normalizer = MetadataNormalizer::new();
218    /// assert_eq!(normalizer.get_canonical("async"), Some("is_async"));    // Short → canonical
219    /// assert_eq!(normalizer.get_canonical("is_async"), None);            // Already canonical
220    /// assert_eq!(normalizer.get_canonical("custom"), None);              // Unknown
221    /// ```
222    #[must_use]
223    pub fn get_canonical(&self, key: &str) -> Option<&'static str> {
224        self.legacy_to_canonical.get(key).copied()
225    }
226
227    /// Get all legacy keys supported by this normalizer
228    ///
229    /// Useful for documentation and validation.
230    ///
231    /// # Examples
232    ///
233    /// ```
234    /// use sqry_core::normalizer::MetadataNormalizer;
235    ///
236    /// let normalizer = MetadataNormalizer::new();
237    /// let legacy_keys: Vec<&&str> = normalizer.legacy_keys().collect();
238    /// assert!(legacy_keys.contains(&&"async"));       // Short forms
239    /// assert!(legacy_keys.contains(&&"static"));
240    /// ```
241    pub fn legacy_keys(&self) -> impl Iterator<Item = &&'static str> {
242        self.legacy_to_canonical.keys()
243    }
244
245    /// Get all canonical keys (targets of normalization)
246    ///
247    /// # Examples
248    ///
249    /// ```
250    /// use sqry_core::normalizer::MetadataNormalizer;
251    ///
252    /// let normalizer = MetadataNormalizer::new();
253    /// let canonical_keys: Vec<&&str> = normalizer.canonical_keys().collect();
254    /// assert!(canonical_keys.contains(&&"is_async"));    // Canonical forms
255    /// assert!(canonical_keys.contains(&&"is_static"));
256    /// ```
257    pub fn canonical_keys(&self) -> impl Iterator<Item = &&'static str> {
258        self.legacy_to_canonical.values()
259    }
260
261    /// Get all short form → canonical mappings
262    ///
263    /// Returns an iterator over (`short_form`, canonical) pairs.
264    ///
265    /// # Examples
266    ///
267    /// ```
268    /// use sqry_core::normalizer::MetadataNormalizer;
269    ///
270    /// let normalizer = MetadataNormalizer::new();
271    /// for (short_form, canonical) in normalizer.mappings() {
272    ///     println!("{} → {}", short_form, canonical);
273    /// }
274    /// ```
275    pub fn mappings(&self) -> impl Iterator<Item = (&'static str, &'static str)> + '_ {
276        self.legacy_to_canonical.iter().map(|(&k, &v)| (k, v))
277    }
278}
279
280impl Default for MetadataNormalizer {
281    fn default() -> Self {
282        Self::new()
283    }
284}
285
286#[cfg(test)]
287mod tests {
288    use super::*;
289
290    #[test]
291    fn test_normalize_short_form_to_canonical() {
292        let normalizer = MetadataNormalizer::new();
293        let mut raw = HashMap::new();
294        raw.insert("async".to_string(), "true".to_string()); // Short form
295
296        let canonical_metadata = normalizer.normalize(raw);
297
298        assert_eq!(
299            canonical_metadata.get("is_async"),
300            Some(&"true".to_string())
301        ); // Canonical
302        assert_eq!(canonical_metadata.get("async"), None); // Short form removed
303    }
304
305    #[test]
306    fn test_normalize_multiple_short_forms() {
307        let normalizer = MetadataNormalizer::new();
308        let mut raw = HashMap::new();
309        raw.insert("async".to_string(), "true".to_string());
310        raw.insert("static".to_string(), "true".to_string());
311        raw.insert("final".to_string(), "false".to_string());
312
313        let canonical_metadata = normalizer.normalize(raw);
314
315        assert_eq!(
316            canonical_metadata.get("is_async"),
317            Some(&"true".to_string())
318        );
319        assert_eq!(
320            canonical_metadata.get("is_static"),
321            Some(&"true".to_string())
322        );
323        assert_eq!(
324            canonical_metadata.get("is_final"),
325            Some(&"false".to_string())
326        );
327        assert_eq!(canonical_metadata.len(), 3);
328    }
329
330    #[test]
331    fn test_preserve_unknown_keys() {
332        let normalizer = MetadataNormalizer::new();
333        let mut raw = HashMap::new();
334        raw.insert("custom_plugin_key".to_string(), "value1".to_string());
335        raw.insert("another_custom".to_string(), "value2".to_string());
336
337        let canonical_metadata = normalizer.normalize(raw);
338
339        assert_eq!(
340            canonical_metadata.get("custom_plugin_key"),
341            Some(&"value1".to_string())
342        );
343        assert_eq!(
344            canonical_metadata.get("another_custom"),
345            Some(&"value2".to_string())
346        );
347        assert_eq!(canonical_metadata.len(), 2);
348    }
349
350    #[test]
351    fn test_canonical_key_preserved() {
352        let normalizer = MetadataNormalizer::new();
353        let mut raw = HashMap::new();
354        raw.insert("is_async".to_string(), "true".to_string()); // Already canonical
355
356        let canonical_metadata = normalizer.normalize(raw);
357
358        // Canonical key should be preserved as-is
359        assert_eq!(
360            canonical_metadata.get("is_async"),
361            Some(&"true".to_string())
362        );
363        assert_eq!(canonical_metadata.len(), 1);
364    }
365
366    #[test]
367    fn test_canonical_wins_over_short_form() {
368        let normalizer = MetadataNormalizer::new();
369        let mut raw = HashMap::new();
370        raw.insert("async".to_string(), "false".to_string()); // Short form
371        raw.insert("is_async".to_string(), "true".to_string()); // Canonical
372
373        let canonical_metadata = normalizer.normalize(raw);
374
375        // Canonical key should win (last wins semantics in HashMap iteration)
376        // Both map to is_async, so one will overwrite the other
377        assert!(
378            canonical_metadata.get("is_async") == Some(&"true".to_string())
379                || canonical_metadata.get("is_async") == Some(&"false".to_string())
380        );
381        assert_eq!(canonical_metadata.len(), 1);
382    }
383
384    #[test]
385    fn test_empty_metadata() {
386        let normalizer = MetadataNormalizer::new();
387        let raw = HashMap::new();
388
389        let canonical_metadata = normalizer.normalize(raw);
390
391        assert!(canonical_metadata.is_empty());
392    }
393
394    #[test]
395    fn test_mixed_short_canonical_unknown() {
396        let normalizer = MetadataNormalizer::new();
397        let mut raw = HashMap::new();
398        raw.insert("async".to_string(), "true".to_string()); // Short form
399        raw.insert("is_static".to_string(), "true".to_string()); // Canonical
400        raw.insert("custom".to_string(), "value".to_string()); // Unknown
401
402        let canonical_metadata = normalizer.normalize(raw);
403
404        assert_eq!(
405            canonical_metadata.get("is_async"),
406            Some(&"true".to_string())
407        );
408        assert_eq!(
409            canonical_metadata.get("is_static"),
410            Some(&"true".to_string())
411        );
412        assert_eq!(canonical_metadata.get("custom"), Some(&"value".to_string()));
413        assert_eq!(canonical_metadata.len(), 3);
414    }
415
416    #[test]
417    fn test_is_legacy_key() {
418        let normalizer = MetadataNormalizer::new();
419
420        // Short forms are "legacy" (need normalization)
421        assert!(normalizer.is_legacy_key("async"));
422        assert!(normalizer.is_legacy_key("static"));
423        assert!(normalizer.is_legacy_key("final"));
424
425        // Canonical forms are NOT legacy
426        assert!(!normalizer.is_legacy_key("is_async"));
427        assert!(!normalizer.is_legacy_key("is_static"));
428        assert!(!normalizer.is_legacy_key("custom_key"));
429    }
430
431    #[test]
432    fn test_get_canonical() {
433        let normalizer = MetadataNormalizer::new();
434
435        // Short → canonical mapping
436        assert_eq!(normalizer.get_canonical("async"), Some("is_async"));
437        assert_eq!(normalizer.get_canonical("static"), Some("is_static"));
438        assert_eq!(normalizer.get_canonical("throwing"), Some("is_throwing"));
439
440        // Canonical forms have no mapping
441        assert_eq!(normalizer.get_canonical("is_async"), None);
442        assert_eq!(normalizer.get_canonical("custom"), None);
443    }
444
445    #[test]
446    fn test_visibility_key_not_normalized() {
447        // visibility is already canonical, not a short form
448        let normalizer = MetadataNormalizer::new();
449        let mut raw = HashMap::new();
450        raw.insert("visibility".to_string(), "public".to_string());
451
452        let canonical_metadata = normalizer.normalize(raw);
453
454        assert_eq!(
455            canonical_metadata.get("visibility"),
456            Some(&"public".to_string())
457        );
458        assert_eq!(canonical_metadata.len(), 1);
459    }
460
461    #[test]
462    fn test_all_function_modifiers() {
463        let normalizer = MetadataNormalizer::new();
464        let mut raw = HashMap::new();
465        raw.insert("async".to_string(), "true".to_string());
466        raw.insert("static".to_string(), "true".to_string());
467        raw.insert("abstract".to_string(), "true".to_string());
468        raw.insert("final".to_string(), "true".to_string());
469        raw.insert("override".to_string(), "true".to_string());
470
471        let canonical_metadata = normalizer.normalize(raw);
472
473        assert_eq!(
474            canonical_metadata.get("is_async"),
475            Some(&"true".to_string())
476        );
477        assert_eq!(
478            canonical_metadata.get("is_static"),
479            Some(&"true".to_string())
480        );
481        assert_eq!(
482            canonical_metadata.get("is_abstract"),
483            Some(&"true".to_string())
484        );
485        assert_eq!(
486            canonical_metadata.get("is_final"),
487            Some(&"true".to_string())
488        );
489        assert_eq!(
490            canonical_metadata.get("is_override"),
491            Some(&"true".to_string())
492        );
493        assert_eq!(canonical_metadata.len(), 5);
494    }
495
496    #[test]
497    fn test_all_class_modifiers() {
498        let normalizer = MetadataNormalizer::new();
499        let mut raw = HashMap::new();
500        raw.insert("struct".to_string(), "true".to_string());
501        raw.insert("enum".to_string(), "true".to_string());
502        raw.insert("interface".to_string(), "true".to_string());
503        raw.insert("actor".to_string(), "true".to_string());
504        raw.insert("generics".to_string(), "true".to_string());
505
506        let canonical_metadata = normalizer.normalize(raw);
507
508        assert_eq!(
509            canonical_metadata.get("is_struct"),
510            Some(&"true".to_string())
511        );
512        assert_eq!(canonical_metadata.get("is_enum"), Some(&"true".to_string()));
513        assert_eq!(
514            canonical_metadata.get("is_interface"),
515            Some(&"true".to_string())
516        );
517        assert_eq!(
518            canonical_metadata.get("is_actor"),
519            Some(&"true".to_string())
520        );
521        assert_eq!(
522            canonical_metadata.get("has_generics"),
523            Some(&"true".to_string())
524        );
525        assert_eq!(canonical_metadata.len(), 5);
526    }
527
528    #[test]
529    fn test_property_modifiers() {
530        let normalizer = MetadataNormalizer::new();
531        let mut raw = HashMap::new();
532        raw.insert("computed".to_string(), "true".to_string());
533        raw.insert("lazy".to_string(), "true".to_string());
534        raw.insert("weak".to_string(), "true".to_string());
535        raw.insert("const".to_string(), "true".to_string());
536
537        let canonical_metadata = normalizer.normalize(raw);
538
539        assert_eq!(
540            canonical_metadata.get("is_computed"),
541            Some(&"true".to_string())
542        );
543        assert_eq!(canonical_metadata.get("is_lazy"), Some(&"true".to_string()));
544        assert_eq!(canonical_metadata.get("is_weak"), Some(&"true".to_string()));
545        assert_eq!(
546            canonical_metadata.get("is_const"),
547            Some(&"true".to_string())
548        );
549        assert_eq!(canonical_metadata.len(), 4);
550    }
551
552    #[test]
553    fn test_language_specific_python() {
554        let normalizer = MetadataNormalizer::new();
555        let mut raw = HashMap::new();
556        raw.insert("classmethod".to_string(), "true".to_string());
557        raw.insert("staticmethod".to_string(), "true".to_string());
558
559        let canonical_metadata = normalizer.normalize(raw);
560
561        assert_eq!(
562            canonical_metadata.get("is_classmethod"),
563            Some(&"true".to_string())
564        );
565        assert_eq!(
566            canonical_metadata.get("is_staticmethod"),
567            Some(&"true".to_string())
568        );
569        assert_eq!(canonical_metadata.len(), 2);
570    }
571
572    #[test]
573    fn test_legacy_keys_iterator() {
574        let normalizer = MetadataNormalizer::new();
575        let legacy_keys: Vec<&&str> = normalizer.legacy_keys().collect();
576
577        assert!(legacy_keys.len() > 20); // Should have many mappings
578        // Legacy keys are SHORT forms
579        assert!(legacy_keys.contains(&&"async"));
580        assert!(legacy_keys.contains(&&"static"));
581    }
582
583    #[test]
584    fn test_canonical_keys_iterator() {
585        let normalizer = MetadataNormalizer::new();
586        let canonical_keys: Vec<&&str> = normalizer.canonical_keys().collect();
587
588        assert!(canonical_keys.len() > 20);
589        // Canonical keys are IS_* forms
590        assert!(canonical_keys.contains(&&"is_async"));
591        assert!(canonical_keys.contains(&&"is_static"));
592    }
593}