Skip to main content

depyler_tooling/
module_mapper_phf.rs

1//! PHF-based Module Mapping with O(1) Worst-Case Lookup
2//!
3//! DEPYLER-O1MAP-001: Compile-time perfect hash function implementation
4//!
5//! This module provides a static, compile-time generated perfect hash map
6//! for Python-to-Rust module mappings, guaranteeing O(1) worst-case lookup
7//! with zero runtime allocation.
8//!
9//! ## Usage
10//!
11//! Enable with feature flag:
12//! ```toml
13//! [dependencies]
14//! depyler-core = { version = "3.21", features = ["phf-lookup"] }
15//! ```
16//!
17//! ## Performance
18//!
19//! | Metric | HashMap | PHF |
20//! |--------|---------|-----|
21//! | Lookup | O(1) amortized | O(1) worst-case |
22//! | Memory | ~25 KB heap | ~8 KB .rodata |
23//! | Init | Runtime | Compile-time |
24
25#[cfg(feature = "phf-lookup")]
26use phf::phf_map;
27
28/// Static module mapping entry for PHF lookup
29#[derive(Debug, Clone, Copy)]
30pub struct StaticModuleMapping {
31    /// Rust crate or module path
32    pub rust_path: &'static str,
33    /// Whether this is an external crate
34    pub is_external: bool,
35    /// Cargo.toml version requirement
36    pub version: Option<&'static str>,
37}
38
39/// Static item mapping for function/type lookups
40#[derive(Debug, Clone, Copy)]
41pub struct StaticItemMapping {
42    /// Python item name
43    pub python_name: &'static str,
44    /// Rust equivalent name
45    pub rust_name: &'static str,
46}
47
48// ============================================================================
49// PHF Compile-Time Module Map
50// ============================================================================
51
52#[cfg(feature = "phf-lookup")]
53static MODULE_MAP: phf::Map<&'static str, StaticModuleMapping> = phf_map! {
54    // Standard Library - Core
55    "os" => StaticModuleMapping { rust_path: "std", is_external: false, version: None },
56    "os.path" => StaticModuleMapping { rust_path: "std::path", is_external: false, version: None },
57    "sys" => StaticModuleMapping { rust_path: "std", is_external: false, version: None },
58    "io" => StaticModuleMapping { rust_path: "std::io", is_external: false, version: None },
59    "math" => StaticModuleMapping { rust_path: "std::f64", is_external: false, version: None },
60    "collections" => StaticModuleMapping { rust_path: "std::collections", is_external: false, version: None },
61    "typing" => StaticModuleMapping { rust_path: "", is_external: false, version: None },
62    "pathlib" => StaticModuleMapping { rust_path: "std::path", is_external: false, version: None },
63    "functools" => StaticModuleMapping { rust_path: "std", is_external: false, version: None },
64    "subprocess" => StaticModuleMapping { rust_path: "std::process", is_external: false, version: None },
65    "threading" => StaticModuleMapping { rust_path: "std::thread", is_external: false, version: None },
66
67    // External Crates - Data Serialization
68    "json" => StaticModuleMapping { rust_path: "serde_json", is_external: true, version: Some("1.0") },
69    "csv" => StaticModuleMapping { rust_path: "csv", is_external: true, version: Some("1.3") },
70
71    // External Crates - Text Processing
72    "re" => StaticModuleMapping { rust_path: "regex", is_external: true, version: Some("1.10") },
73
74    // External Crates - Date/Time
75    "datetime" => StaticModuleMapping { rust_path: "chrono", is_external: true, version: Some("0.4") },
76
77    // External Crates - Random/Crypto
78    "random" => StaticModuleMapping { rust_path: "rand", is_external: true, version: Some("0.8") },
79    "hashlib" => StaticModuleMapping { rust_path: "sha2", is_external: true, version: Some("0.10") },
80    "base64" => StaticModuleMapping { rust_path: "base64", is_external: true, version: Some("0.21") },
81
82    // External Crates - CLI
83    "argparse" => StaticModuleMapping { rust_path: "clap", is_external: true, version: Some("4.5") },
84
85    // External Crates - Iteration
86    "itertools" => StaticModuleMapping { rust_path: "itertools", is_external: true, version: Some("0.12") },
87
88    // External Crates - Async
89    "asyncio" => StaticModuleMapping { rust_path: "tokio", is_external: true, version: Some("1.35") },
90
91    // External Crates - Binary
92    "struct" => StaticModuleMapping { rust_path: "byteorder", is_external: true, version: Some("1.5") },
93
94    // External Crates - Statistics
95    "statistics" => StaticModuleMapping { rust_path: "statrs", is_external: true, version: Some("0.16") },
96
97    // External Crates - Temporary File Operations
98    "tempfile" => StaticModuleMapping { rust_path: "tempfile", is_external: true, version: Some("3.0") },
99
100    // External Crates - URL
101    "urllib.parse" => StaticModuleMapping { rust_path: "url", is_external: true, version: Some("2.5") },
102
103    // Batuta Stack - NumPy → Trueno
104    "numpy" => StaticModuleMapping { rust_path: "trueno", is_external: true, version: Some("0.7") },
105    "numpy.linalg" => StaticModuleMapping { rust_path: "trueno::linalg", is_external: true, version: Some("0.7") },
106
107    // Batuta Stack - Sklearn → Aprender
108    "sklearn.linear_model" => StaticModuleMapping { rust_path: "aprender::linear", is_external: true, version: Some("0.14") },
109    "sklearn.cluster" => StaticModuleMapping { rust_path: "aprender::cluster", is_external: true, version: Some("0.14") },
110    "sklearn.tree" => StaticModuleMapping { rust_path: "aprender::tree", is_external: true, version: Some("0.14") },
111    "sklearn.ensemble" => StaticModuleMapping { rust_path: "aprender::ensemble", is_external: true, version: Some("0.14") },
112    "sklearn.preprocessing" => StaticModuleMapping { rust_path: "aprender::preprocessing", is_external: true, version: Some("0.14") },
113    "sklearn.decomposition" => StaticModuleMapping { rust_path: "aprender::decomposition", is_external: true, version: Some("0.14") },
114    "sklearn.model_selection" => StaticModuleMapping { rust_path: "aprender::model_selection", is_external: true, version: Some("0.14") },
115    "sklearn.metrics" => StaticModuleMapping { rust_path: "aprender::metrics", is_external: true, version: Some("0.14") },
116};
117
118// ============================================================================
119// PHF Item Maps (Function/Type Mappings)
120// ============================================================================
121
122#[cfg(feature = "phf-lookup")]
123static JSON_ITEMS: phf::Map<&'static str, &'static str> = phf_map! {
124    "loads" => "from_str",
125    "dumps" => "to_string",
126    "load" => "from_reader",
127    "dump" => "to_writer",
128};
129
130#[cfg(feature = "phf-lookup")]
131static MATH_ITEMS: phf::Map<&'static str, &'static str> = phf_map! {
132    "sqrt" => "sqrt",
133    "sin" => "sin",
134    "cos" => "cos",
135    "tan" => "tan",
136    "floor" => "floor",
137    "ceil" => "ceil",
138    "abs" => "abs",
139    "pow" => "powf",
140    "pi" => "consts::PI",
141    "e" => "consts::E",
142    // DEPYLER-0771: isqrt is handled specially in expr_gen.rs (not a direct method call)
143    "isqrt" => "isqrt",
144};
145
146#[cfg(feature = "phf-lookup")]
147static OS_ITEMS: phf::Map<&'static str, &'static str> = phf_map! {
148    "getcwd" => "env::current_dir",
149    "environ" => "env::vars",
150    "path" => "path::Path",
151    "getenv" => "env::var",
152};
153
154#[cfg(feature = "phf-lookup")]
155static SYS_ITEMS: phf::Map<&'static str, &'static str> = phf_map! {
156    "argv" => "env::args",
157    "exit" => "process::exit",
158    "stdin" => "io::stdin",
159    "stdout" => "io::stdout",
160    "stderr" => "io::stderr",
161};
162
163#[cfg(feature = "phf-lookup")]
164static RE_ITEMS: phf::Map<&'static str, &'static str> = phf_map! {
165    "compile" => "Regex::new",
166    "search" => "Regex::find",
167    "match" => "Regex::is_match",
168    "findall" => "Regex::find_iter",
169    "finditer" => "Regex::find_iter",
170    "sub" => "Regex::replace_all",
171    "subn" => "Regex::replace_all",
172    "split" => "Regex::split",
173    "Pattern" => "Regex",
174    "IGNORECASE" => "(?i)",
175    "I" => "(?i)",
176    "MULTILINE" => "(?m)",
177    "M" => "(?m)",
178};
179
180#[cfg(feature = "phf-lookup")]
181static RANDOM_ITEMS: phf::Map<&'static str, &'static str> = phf_map! {
182    "random" => "random",
183    "randint" => "gen_range",
184    "choice" => "choose",
185    "shuffle" => "shuffle",
186    "uniform" => "gen_range",
187    "seed" => "SeedableRng::seed_from_u64",
188    "randrange" => "gen_range",
189    "sample" => "choose_multiple",
190};
191
192#[cfg(feature = "phf-lookup")]
193static NUMPY_ITEMS: phf::Map<&'static str, &'static str> = phf_map! {
194    "array" => "Vector::from_slice",
195    "zeros" => "Vector::zeros",
196    "ones" => "Vector::ones",
197    "empty" => "Vector::zeros",
198    "arange" => "Vector::arange",
199    "linspace" => "Vector::linspace",
200    "add" => "Vector::add",
201    "subtract" => "Vector::sub",
202    "multiply" => "Vector::mul",
203    "divide" => "Vector::div",
204    "sqrt" => "Vector::sqrt",
205    "exp" => "Vector::exp",
206    "log" => "Vector::ln",
207    "sin" => "Vector::sin",
208    "cos" => "Vector::cos",
209    "abs" => "Vector::abs",
210    "dot" => "Vector::dot",
211    "matmul" => "Matrix::matmul",
212    "sum" => "Vector::sum",
213    "mean" => "Vector::mean",
214    "max" => "Vector::max",
215    "min" => "Vector::min",
216    "std" => "Vector::std",
217    "var" => "Vector::var",
218    "argmax" => "Vector::argmax",
219    "argmin" => "Vector::argmin",
220};
221
222// ============================================================================
223// Public API
224// ============================================================================
225
226/// O(1) worst-case module lookup using PHF
227///
228/// # Example
229///
230/// ```rust,ignore
231/// use depyler_core::module_mapper_phf::get_module_mapping;
232///
233/// if let Some(mapping) = get_module_mapping("json") {
234///     assert_eq!(mapping.rust_path, "serde_json");
235///     assert!(mapping.is_external);
236/// }
237/// ```
238#[cfg(feature = "phf-lookup")]
239pub fn get_module_mapping(module: &str) -> Option<&'static StaticModuleMapping> {
240    MODULE_MAP.get(module)
241}
242
243/// O(1) worst-case item lookup within a module using PHF
244///
245/// # Example
246///
247/// ```rust,ignore
248/// use depyler_core::module_mapper_phf::get_item_mapping;
249///
250/// if let Some(rust_name) = get_item_mapping("json", "loads") {
251///     assert_eq!(rust_name, "from_str");
252/// }
253/// ```
254#[cfg(feature = "phf-lookup")]
255pub fn get_item_mapping(module: &str, item: &str) -> Option<&'static str> {
256    match module {
257        "json" => JSON_ITEMS.get(item).copied(),
258        "math" => MATH_ITEMS.get(item).copied(),
259        "os" => OS_ITEMS.get(item).copied(),
260        "sys" => SYS_ITEMS.get(item).copied(),
261        "re" => RE_ITEMS.get(item).copied(),
262        "random" => RANDOM_ITEMS.get(item).copied(),
263        "numpy" => NUMPY_ITEMS.get(item).copied(),
264        _ => None,
265    }
266}
267
268/// Check if a module is supported by PHF lookup
269#[cfg(feature = "phf-lookup")]
270pub fn is_module_supported(module: &str) -> bool {
271    MODULE_MAP.contains_key(module)
272}
273
274/// Get all supported module names (for diagnostics)
275#[cfg(feature = "phf-lookup")]
276pub fn supported_modules() -> impl Iterator<Item = &'static str> {
277    MODULE_MAP.keys().copied()
278}
279
280// ============================================================================
281// Fallback for non-PHF builds
282// ============================================================================
283
284#[cfg(not(feature = "phf-lookup"))]
285pub fn get_module_mapping(_module: &str) -> Option<&'static StaticModuleMapping> {
286    None // Use HashMap-based ModuleMapper instead
287}
288
289#[cfg(not(feature = "phf-lookup"))]
290pub fn get_item_mapping(_module: &str, _item: &str) -> Option<&'static str> {
291    None // Use HashMap-based ModuleMapper instead
292}
293
294#[cfg(not(feature = "phf-lookup"))]
295pub fn is_module_supported(_module: &str) -> bool {
296    false
297}
298
299#[cfg(not(feature = "phf-lookup"))]
300pub fn supported_modules() -> impl Iterator<Item = &'static str> {
301    std::iter::empty()
302}
303
304// ============================================================================
305// Tests
306// ============================================================================
307
308#[cfg(test)]
309mod fallback_tests {
310    use super::*;
311
312    // Tests for non-PHF fallback mode (always present)
313    #[cfg(not(feature = "phf-lookup"))]
314    #[test]
315    fn test_fallback_get_module_mapping() {
316        assert!(get_module_mapping("json").is_none());
317        assert!(get_module_mapping("os").is_none());
318    }
319
320    #[cfg(not(feature = "phf-lookup"))]
321    #[test]
322    fn test_fallback_get_item_mapping() {
323        assert!(get_item_mapping("json", "loads").is_none());
324        assert!(get_item_mapping("math", "sqrt").is_none());
325    }
326
327    #[cfg(not(feature = "phf-lookup"))]
328    #[test]
329    fn test_fallback_is_module_supported() {
330        assert!(!is_module_supported("json"));
331        assert!(!is_module_supported("os"));
332    }
333
334    #[cfg(not(feature = "phf-lookup"))]
335    #[test]
336    fn test_fallback_supported_modules() {
337        assert_eq!(supported_modules().count(), 0);
338    }
339
340    // Test StaticModuleMapping and StaticItemMapping structs
341    #[test]
342    fn test_static_module_mapping_struct() {
343        let mapping = StaticModuleMapping {
344            rust_path: "serde_json",
345            is_external: true,
346            version: Some("1.0"),
347        };
348        assert_eq!(mapping.rust_path, "serde_json");
349        assert!(mapping.is_external);
350        assert_eq!(mapping.version, Some("1.0"));
351    }
352
353    #[test]
354    fn test_static_item_mapping_struct() {
355        let mapping = StaticItemMapping {
356            python_name: "loads",
357            rust_name: "from_str",
358        };
359        assert_eq!(mapping.python_name, "loads");
360        assert_eq!(mapping.rust_name, "from_str");
361    }
362}
363
364#[cfg(test)]
365#[cfg(feature = "phf-lookup")]
366mod tests {
367    use super::*;
368
369    #[test]
370    fn test_module_lookup_json() {
371        let mapping = get_module_mapping("json").expect("json should be mapped");
372        assert_eq!(mapping.rust_path, "serde_json");
373        assert!(mapping.is_external);
374        assert_eq!(mapping.version, Some("1.0"));
375    }
376
377    #[test]
378    fn test_module_lookup_stdlib() {
379        let mapping = get_module_mapping("os").expect("os should be mapped");
380        assert_eq!(mapping.rust_path, "std");
381        assert!(!mapping.is_external);
382        assert_eq!(mapping.version, None);
383    }
384
385    #[test]
386    fn test_module_lookup_numpy() {
387        let mapping = get_module_mapping("numpy").expect("numpy should be mapped");
388        assert_eq!(mapping.rust_path, "trueno");
389        assert!(mapping.is_external);
390    }
391
392    #[test]
393    fn test_module_lookup_sklearn() {
394        let mapping = get_module_mapping("sklearn.linear_model")
395            .expect("sklearn.linear_model should be mapped");
396        assert_eq!(mapping.rust_path, "aprender::linear");
397        assert!(mapping.is_external);
398    }
399
400    #[test]
401    fn test_item_lookup_json() {
402        assert_eq!(get_item_mapping("json", "loads"), Some("from_str"));
403        assert_eq!(get_item_mapping("json", "dumps"), Some("to_string"));
404    }
405
406    #[test]
407    fn test_item_lookup_math() {
408        assert_eq!(get_item_mapping("math", "sqrt"), Some("sqrt"));
409        assert_eq!(get_item_mapping("math", "pi"), Some("consts::PI"));
410    }
411
412    #[test]
413    fn test_item_lookup_numpy() {
414        assert_eq!(
415            get_item_mapping("numpy", "array"),
416            Some("Vector::from_slice")
417        );
418        assert_eq!(get_item_mapping("numpy", "sum"), Some("Vector::sum"));
419    }
420
421    #[test]
422    fn test_unknown_module() {
423        assert!(get_module_mapping("unknown_module").is_none());
424    }
425
426    #[test]
427    fn test_unknown_item() {
428        assert!(get_item_mapping("json", "unknown_func").is_none());
429    }
430
431    #[test]
432    fn test_supported_modules_count() {
433        let count = supported_modules().count();
434        assert!(
435            count >= 30,
436            "Should have at least 30 modules, got {}",
437            count
438        );
439    }
440}