Skip to main content

depyler_tooling/library_mapping/
mod.rs

1//! DEPYLER-0903: Enterprise Library Mapping System
2//!
3//! Deterministic, extensible system for mapping Python external libraries to Rust equivalents.
4//! Uses O(1) hash table lookup with priority chain: overrides > extensions > core.
5//!
6//! # Architecture
7//!
8//! ```text
9//! ┌─────────────────────────────────────────────────────┐
10//! │                  MappingRegistry                     │
11//! ├─────────────────────────────────────────────────────┤
12//! │  Priority 1: User Overrides (highest)               │
13//! │  Priority 2: Enterprise Extensions                  │
14//! │  Priority 3: Core Mappings (shipped with depyler)   │
15//! └─────────────────────────────────────────────────────┘
16//! ```
17//!
18//! # References
19//!
20//! - [1] CLRS: O(1) hash table lookup
21//! - [26] Parnas: Information hiding principle
22//! - [27] Fredman et al.: Perfect hashing
23
24// Allow deprecated Template variant for backwards compatibility
25#![allow(deprecated)]
26
27use serde::{Deserialize, Serialize};
28use std::collections::HashMap;
29
30pub mod toml_plugin;
31
32#[cfg(test)]
33mod tests;
34
35// ============================================================================
36// Core Data Structures (Section 2.1 of spec)
37// ============================================================================
38
39/// A deterministic mapping from Python library to Rust equivalent.
40///
41/// This is a pure function: f(python_module, python_item) → rust_equivalent
42/// No randomness, no learning, no approximation.
43///
44/// Design follows Parnas's information hiding principle [26].
45#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
46pub struct LibraryMapping {
47    /// Python module path (e.g., "pandas", "numpy.linalg")
48    pub python_module: String,
49
50    /// Rust crate and module path (e.g., "polars", "ndarray::linalg")
51    pub rust_crate: String,
52
53    /// Python version requirement (e.g., ">=3.8" or "*")
54    pub python_version_req: String,
55
56    /// Rust crate version constraint (semver)
57    pub rust_crate_version: String,
58
59    /// Item-level mappings: Python name → Rust mapping
60    pub items: HashMap<String, ItemMapping>,
61
62    /// Required Cargo.toml features
63    pub features: Vec<String>,
64
65    /// Mapping confidence level
66    pub confidence: MappingConfidence,
67
68    /// Source of mapping (documentation URL, RFC, etc.)
69    pub provenance: String,
70}
71
72/// Individual item mapping within a library
73#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
74pub struct ItemMapping {
75    /// Rust equivalent name
76    pub rust_name: String,
77
78    /// Transformation pattern
79    pub pattern: TransformPattern,
80
81    /// Type signature transformation (optional)
82    pub type_transform: Option<TypeTransform>,
83}
84
85/// Transformation patterns for Python→Rust mapping
86#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Default)]
87#[serde(tag = "type")]
88pub enum TransformPattern {
89    /// Direct 1:1 rename
90    #[default]
91    Direct,
92
93    /// Method call with extra arguments
94    MethodCall { extra_args: Vec<String> },
95
96    /// Property to method conversion
97    PropertyToMethod,
98
99    /// Constructor pattern (e.g., DataFrame() → DataFrame::new())
100    Constructor { method: String },
101
102    /// Argument reordering [31]
103    ReorderArgs { indices: Vec<usize> },
104
105    /// Type-safe template with validation [32, 33]
106    TypedTemplate {
107        pattern: String,
108        params: Vec<String>,
109        param_types: Vec<ParamType>,
110    },
111
112    /// Legacy template (deprecated)
113    #[deprecated(note = "Use TypedTemplate for type-safe templates")]
114    Template { template: String },
115}
116
117/// Parameter types for TypedTemplate validation (Poka-Yoke)
118#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
119pub enum ParamType {
120    Expr,
121    String,
122    Number,
123    Bytes,
124    Bool,
125    Path,
126    List,
127    Dict,
128}
129
130/// Type transformation hints
131#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
132pub struct TypeTransform {
133    /// Python type hint
134    pub python_type: String,
135    /// Rust type equivalent
136    pub rust_type: String,
137}
138
139/// Confidence level for mappings [36]
140#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Default)]
141pub enum MappingConfidence {
142    /// Verified against official documentation and tests
143    Verified,
144    /// Community-contributed, reviewed
145    Community,
146    /// Experimental, may have edge cases
147    #[default]
148    Experimental,
149}
150
151// ============================================================================
152// Registry Architecture (Section 2.2 of spec)
153// ============================================================================
154
155/// Enterprise-extensible mapping registry with O(1) lookup [1, 27]
156///
157/// Priority chain: overrides > extensions > core
158#[derive(Debug, Default)]
159pub struct MappingRegistry {
160    /// Core mappings (shipped with depyler)
161    core: HashMap<String, LibraryMapping>,
162
163    /// Enterprise extensions (loaded from plugins)
164    extensions: HashMap<String, LibraryMapping>,
165
166    /// User overrides (highest priority)
167    overrides: HashMap<String, LibraryMapping>,
168}
169
170impl MappingRegistry {
171    /// Create a new empty registry
172    pub fn new() -> Self {
173        Self::default()
174    }
175
176    /// Create registry with default core mappings
177    pub fn with_defaults() -> Self {
178        let mut registry = Self::new();
179        registry.register_core_defaults();
180        registry
181    }
182
183    /// Lookup item with priority: overrides > extensions > core
184    ///
185    /// Complexity: O(1) amortized [1]
186    pub fn lookup(&self, module: &str, item: &str) -> Option<&ItemMapping> {
187        self.overrides
188            .get(module)
189            .or_else(|| self.extensions.get(module))
190            .or_else(|| self.core.get(module))
191            .and_then(|m| m.items.get(item))
192    }
193
194    /// Lookup full library mapping with priority
195    pub fn lookup_module(&self, module: &str) -> Option<&LibraryMapping> {
196        self.overrides
197            .get(module)
198            .or_else(|| self.extensions.get(module))
199            .or_else(|| self.core.get(module))
200    }
201
202    /// Register a core mapping (lowest priority)
203    pub fn register_core(&mut self, mapping: LibraryMapping) {
204        self.core.insert(mapping.python_module.clone(), mapping);
205    }
206
207    /// Register an extension mapping (medium priority)
208    pub fn register_extension(&mut self, mapping: LibraryMapping) {
209        self.extensions
210            .insert(mapping.python_module.clone(), mapping);
211    }
212
213    /// Register a user override (highest priority)
214    pub fn register_override(&mut self, mapping: LibraryMapping) {
215        self.overrides
216            .insert(mapping.python_module.clone(), mapping);
217    }
218
219    /// Get count of all registered modules
220    pub fn module_count(&self) -> usize {
221        let mut seen = std::collections::HashSet::new();
222        for key in self.core.keys() {
223            seen.insert(key.as_str());
224        }
225        for key in self.extensions.keys() {
226            seen.insert(key.as_str());
227        }
228        for key in self.overrides.keys() {
229            seen.insert(key.as_str());
230        }
231        seen.len()
232    }
233
234    /// Register default core mappings (Section 3 of spec)
235    fn register_core_defaults(&mut self) {
236        // json → serde_json
237        self.register_core(LibraryMapping {
238            python_module: "json".to_string(),
239            rust_crate: "serde_json".to_string(),
240            python_version_req: "*".to_string(),
241            rust_crate_version: "1.0".to_string(),
242            items: HashMap::from([
243                (
244                    "loads".to_string(),
245                    ItemMapping {
246                        rust_name: "from_str".to_string(),
247                        pattern: TransformPattern::Direct,
248                        type_transform: None,
249                    },
250                ),
251                (
252                    "dumps".to_string(),
253                    ItemMapping {
254                        rust_name: "to_string".to_string(),
255                        pattern: TransformPattern::Direct,
256                        type_transform: None,
257                    },
258                ),
259            ]),
260            features: vec![],
261            confidence: MappingConfidence::Verified,
262            provenance: "https://docs.rs/serde_json/".to_string(),
263        });
264
265        // os → std
266        self.register_core(LibraryMapping {
267            python_module: "os".to_string(),
268            rust_crate: "std".to_string(),
269            python_version_req: "*".to_string(),
270            rust_crate_version: "*".to_string(),
271            items: HashMap::from([
272                (
273                    "getcwd".to_string(),
274                    ItemMapping {
275                        rust_name: "env::current_dir".to_string(),
276                        pattern: TransformPattern::Direct,
277                        type_transform: None,
278                    },
279                ),
280                (
281                    "getenv".to_string(),
282                    ItemMapping {
283                        rust_name: "env::var".to_string(),
284                        pattern: TransformPattern::Direct,
285                        type_transform: None,
286                    },
287                ),
288            ]),
289            features: vec![],
290            confidence: MappingConfidence::Verified,
291            provenance: "https://doc.rust-lang.org/std/".to_string(),
292        });
293
294        // re → regex
295        self.register_core(LibraryMapping {
296            python_module: "re".to_string(),
297            rust_crate: "regex".to_string(),
298            python_version_req: "*".to_string(),
299            rust_crate_version: "1.0".to_string(),
300            items: HashMap::from([
301                (
302                    "compile".to_string(),
303                    ItemMapping {
304                        rust_name: "Regex::new".to_string(),
305                        pattern: TransformPattern::Constructor {
306                            method: "new".to_string(),
307                        },
308                        type_transform: None,
309                    },
310                ),
311                (
312                    "match".to_string(),
313                    ItemMapping {
314                        rust_name: "is_match".to_string(),
315                        pattern: TransformPattern::MethodCall { extra_args: vec![] },
316                        type_transform: None,
317                    },
318                ),
319            ]),
320            features: vec![],
321            confidence: MappingConfidence::Verified,
322            provenance: "https://docs.rs/regex/".to_string(),
323        });
324    }
325}
326
327// ============================================================================
328// Plugin Architecture (Section 2.3 of spec)
329// ============================================================================
330
331/// Enterprise plugin interface for custom library mappings
332pub trait MappingPlugin: Send + Sync {
333    /// Plugin identifier (e.g., "netflix-internal", "google-cloud")
334    fn id(&self) -> &str;
335
336    /// Plugin version
337    fn version(&self) -> &str;
338
339    /// Register mappings into the registry
340    fn register(&self, registry: &mut MappingRegistry);
341
342    /// Optional: Validate that mappings are correct
343    fn validate(&self) -> Result<(), ValidationError> {
344        Ok(())
345    }
346}
347
348/// Validation error for plugin mappings
349#[derive(Debug, Clone)]
350pub struct ValidationError {
351    pub message: String,
352    pub mapping: Option<String>,
353}
354
355impl std::fmt::Display for ValidationError {
356    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
357        write!(f, "Validation error: {}", self.message)
358    }
359}
360
361impl std::error::Error for ValidationError {}
362
363// ============================================================================
364// Transform Pattern Utilities
365// ============================================================================
366
367impl TransformPattern {
368    /// Validate a ReorderArgs pattern
369    ///
370    /// Indices must be a valid permutation (0..n where n = indices.len())
371    pub fn validate_reorder_args(indices: &[usize]) -> Result<(), ValidationError> {
372        let n = indices.len();
373        let mut seen = vec![false; n];
374
375        for &idx in indices {
376            if idx >= n {
377                return Err(ValidationError {
378                    message: format!("Index {} out of bounds for {} args", idx, n),
379                    mapping: None,
380                });
381            }
382            if seen[idx] {
383                return Err(ValidationError {
384                    message: format!("Duplicate index {} in permutation", idx),
385                    mapping: None,
386                });
387            }
388            seen[idx] = true;
389        }
390
391        Ok(())
392    }
393
394    /// Validate a TypedTemplate pattern
395    ///
396    /// Params must match placeholders in pattern, and lengths must match
397    pub fn validate_typed_template(
398        pattern: &str,
399        params: &[String],
400        param_types: &[ParamType],
401    ) -> Result<(), ValidationError> {
402        // Check param/type count match
403        if params.len() != param_types.len() {
404            return Err(ValidationError {
405                message: format!(
406                    "Param count {} != type count {}",
407                    params.len(),
408                    param_types.len()
409                ),
410                mapping: None,
411            });
412        }
413
414        // Check all params appear in pattern
415        for param in params {
416            let placeholder = format!("{{{}}}", param);
417            if !pattern.contains(&placeholder) {
418                return Err(ValidationError {
419                    message: format!("Param '{}' not found in pattern", param),
420                    mapping: None,
421                });
422            }
423        }
424
425        Ok(())
426    }
427}