Skip to main content

depyler_tooling/
typeshed_ingest.rs

1//! Typeshed Stub Ingestion for Auto-Generating Module Mappings
2//!
3//! DEPYLER-O1MAP-001 Section 5: Automation Strategy
4//!
5//! This module parses Python `.pyi` stub files from typeshed and automatically
6//! generates `ModuleMapping` structs for the module mapper.
7//!
8//! ## Design Principles (Toyota Way)
9//!
10//! - **Genchi Genbutsu**: Parse actual typeshed stubs, not assumptions
11//! - **Jidoka**: Validate mappings against known Rust equivalents
12//! - **Kaizen**: Incrementally expand coverage from json → stdlib → ecosystem
13
14use crate::module_mapper::ModuleMapping;
15use std::collections::HashMap;
16
17/// Result of parsing a single function signature from a .pyi stub
18#[derive(Debug, Clone, PartialEq)]
19pub struct ParsedFunction {
20    /// Python function name
21    pub name: String,
22    /// Parameter types (name -> type string)
23    pub params: Vec<(String, String)>,
24    /// Return type string
25    pub return_type: String,
26}
27
28/// Configuration for mapping Python types to Rust types
29#[derive(Debug, Clone)]
30pub struct TypeMappingConfig {
31    /// Python type -> Rust type
32    pub type_map: HashMap<String, String>,
33}
34
35impl Default for TypeMappingConfig {
36    fn default() -> Self {
37        let mut type_map = HashMap::new();
38        // Primitive types
39        type_map.insert("str".to_string(), "String".to_string());
40        type_map.insert("int".to_string(), "i64".to_string());
41        type_map.insert("float".to_string(), "f64".to_string());
42        type_map.insert("bool".to_string(), "bool".to_string());
43        type_map.insert("None".to_string(), "()".to_string());
44        type_map.insert("bytes".to_string(), "Vec<u8>".to_string());
45        // Generic types
46        type_map.insert("Any".to_string(), "serde_json::Value".to_string());
47        type_map.insert("object".to_string(), "serde_json::Value".to_string());
48        // Container types (simplified - full generic handling is more complex)
49        type_map.insert("list".to_string(), "Vec".to_string());
50        type_map.insert("dict".to_string(), "HashMap".to_string());
51        type_map.insert("set".to_string(), "HashSet".to_string());
52        type_map.insert("tuple".to_string(), "tuple".to_string());
53        type_map.insert("List".to_string(), "Vec".to_string());
54        type_map.insert("Dict".to_string(), "HashMap".to_string());
55        type_map.insert("Set".to_string(), "HashSet".to_string());
56        type_map.insert("Tuple".to_string(), "tuple".to_string());
57        type_map.insert("Optional".to_string(), "Option".to_string());
58
59        Self { type_map }
60    }
61}
62
63/// Known Rust crate mappings for Python modules
64#[derive(Debug, Clone)]
65pub struct CrateMappingConfig {
66    /// Python module -> (Rust crate path, is_external, version)
67    pub crate_map: HashMap<String, (String, bool, Option<String>)>,
68}
69
70impl Default for CrateMappingConfig {
71    fn default() -> Self {
72        let mut crate_map = HashMap::new();
73        // Standard library -> Rust stdlib or well-known crates
74        crate_map.insert(
75            "json".to_string(),
76            ("serde_json".to_string(), true, Some("1.0".to_string())),
77        );
78        crate_map.insert("os".to_string(), ("std".to_string(), false, None));
79        crate_map.insert("sys".to_string(), ("std".to_string(), false, None));
80        crate_map.insert("math".to_string(), ("std::f64".to_string(), false, None));
81        crate_map.insert(
82            "re".to_string(),
83            ("regex".to_string(), true, Some("1.10".to_string())),
84        );
85        crate_map.insert(
86            "random".to_string(),
87            ("rand".to_string(), true, Some("0.8".to_string())),
88        );
89        crate_map.insert(
90            "datetime".to_string(),
91            ("chrono".to_string(), true, Some("0.4".to_string())),
92        );
93        crate_map.insert(
94            "collections".to_string(),
95            ("std::collections".to_string(), false, None),
96        );
97        crate_map.insert(
98            "itertools".to_string(),
99            ("itertools".to_string(), true, Some("0.12".to_string())),
100        );
101        crate_map.insert(
102            "hashlib".to_string(),
103            ("sha2".to_string(), true, Some("0.10".to_string())),
104        );
105        crate_map.insert(
106            "base64".to_string(),
107            ("base64".to_string(), true, Some("0.21".to_string())),
108        );
109        crate_map.insert(
110            "csv".to_string(),
111            ("csv".to_string(), true, Some("1.3".to_string())),
112        );
113        crate_map.insert(
114            "pathlib".to_string(),
115            ("std::path".to_string(), false, None),
116        );
117        crate_map.insert(
118            "tempfile".to_string(),
119            ("tempfile".to_string(), true, Some("3.0".to_string())),
120        );
121
122        Self { crate_map }
123    }
124}
125
126/// Known function-to-Rust mappings for specific modules
127/// This is the "semantic bridge" that maps Python function names to Rust equivalents
128#[derive(Debug, Clone)]
129pub struct FunctionMappingConfig {
130    /// (module, python_func) -> rust_func
131    pub func_map: HashMap<(String, String), String>,
132}
133
134impl Default for FunctionMappingConfig {
135    fn default() -> Self {
136        let mut func_map = HashMap::new();
137
138        // json module
139        func_map.insert(
140            ("json".to_string(), "loads".to_string()),
141            "from_str".to_string(),
142        );
143        func_map.insert(
144            ("json".to_string(), "dumps".to_string()),
145            "to_string".to_string(),
146        );
147        func_map.insert(
148            ("json".to_string(), "load".to_string()),
149            "from_reader".to_string(),
150        );
151        func_map.insert(
152            ("json".to_string(), "dump".to_string()),
153            "to_writer".to_string(),
154        );
155
156        // os module
157        func_map.insert(
158            ("os".to_string(), "getcwd".to_string()),
159            "env::current_dir".to_string(),
160        );
161        func_map.insert(
162            ("os".to_string(), "getenv".to_string()),
163            "env::var".to_string(),
164        );
165        func_map.insert(
166            ("os".to_string(), "listdir".to_string()),
167            "fs::read_dir".to_string(),
168        );
169
170        // math module
171        func_map.insert(("math".to_string(), "sqrt".to_string()), "sqrt".to_string());
172        func_map.insert(("math".to_string(), "sin".to_string()), "sin".to_string());
173        func_map.insert(("math".to_string(), "cos".to_string()), "cos".to_string());
174        func_map.insert(
175            ("math".to_string(), "floor".to_string()),
176            "floor".to_string(),
177        );
178        func_map.insert(("math".to_string(), "ceil".to_string()), "ceil".to_string());
179        func_map.insert(("math".to_string(), "abs".to_string()), "abs".to_string());
180        func_map.insert(("math".to_string(), "pow".to_string()), "powf".to_string());
181
182        // re module
183        func_map.insert(
184            ("re".to_string(), "compile".to_string()),
185            "Regex::new".to_string(),
186        );
187        func_map.insert(
188            ("re".to_string(), "match".to_string()),
189            "Regex::is_match".to_string(),
190        );
191        func_map.insert(
192            ("re".to_string(), "search".to_string()),
193            "Regex::find".to_string(),
194        );
195        func_map.insert(
196            ("re".to_string(), "findall".to_string()),
197            "Regex::find_iter".to_string(),
198        );
199        func_map.insert(
200            ("re".to_string(), "sub".to_string()),
201            "Regex::replace_all".to_string(),
202        );
203
204        Self { func_map }
205    }
206}
207
208/// Parse a .pyi stub file content and extract function signatures
209///
210/// # Arguments
211/// * `content` - The content of the .pyi file
212/// * `module_name` - The Python module name (e.g., "json", "os")
213///
214/// # Returns
215/// A `ModuleMapping` ready for use in the module mapper
216pub fn parse_pyi(content: &str, module_name: &str) -> ModuleMapping {
217    parse_pyi_with_config(
218        content,
219        module_name,
220        &TypeMappingConfig::default(),
221        &CrateMappingConfig::default(),
222        &FunctionMappingConfig::default(),
223    )
224}
225
226/// Parse a .pyi stub with custom configuration
227pub fn parse_pyi_with_config(
228    content: &str,
229    module_name: &str,
230    _type_config: &TypeMappingConfig,
231    crate_config: &CrateMappingConfig,
232    func_config: &FunctionMappingConfig,
233) -> ModuleMapping {
234    let functions = extract_function_signatures(content);
235
236    // Get crate mapping for this module
237    let (rust_path, is_external, version) = crate_config
238        .crate_map
239        .get(module_name)
240        .cloned()
241        .unwrap_or_else(|| (module_name.to_string(), true, None));
242
243    // Build item map from parsed functions + known mappings
244    let mut item_map = HashMap::new();
245
246    for func in &functions {
247        // Check if we have a known mapping for this function
248        if let Some(rust_func) = func_config
249            .func_map
250            .get(&(module_name.to_string(), func.name.clone()))
251        {
252            item_map.insert(func.name.clone(), rust_func.clone());
253        } else {
254            // Default: use same name (snake_case preserved)
255            item_map.insert(func.name.clone(), func.name.clone());
256        }
257    }
258
259    ModuleMapping {
260        rust_path,
261        is_external,
262        version,
263        item_map,
264        constructor_patterns: HashMap::new(),
265    }
266}
267
268/// Extract function signatures from .pyi content
269///
270/// Parses lines like:
271/// - `def loads(s: str) -> Any: ...`
272/// - `def dumps(obj: Any, indent: int = None) -> str: ...`
273/// - Multiline function definitions (joined before parsing)
274fn extract_function_signatures(content: &str) -> Vec<ParsedFunction> {
275    let mut functions = Vec::new();
276
277    // First, join multiline function definitions into single lines
278    let normalized = normalize_multiline_functions(content);
279
280    // Simple regex-like parsing for function definitions
281    // Pattern: def <name>(<params>) -> <return_type>: ...
282    for line in normalized.lines() {
283        let line = line.trim();
284
285        // Skip non-function lines
286        if !line.starts_with("def ") {
287            continue;
288        }
289
290        // Skip private/dunder methods for now
291        if line.starts_with("def _") && !line.starts_with("def __init__") {
292            continue;
293        }
294
295        if let Some(func) = parse_function_line(line) {
296            functions.push(func);
297        }
298    }
299
300    functions
301}
302
303/// Normalize multiline function definitions into single lines
304fn normalize_multiline_functions(content: &str) -> String {
305    let mut result = String::new();
306    let mut current_def = String::new();
307    let mut in_def = false;
308    let mut paren_depth = 0;
309
310    for line in content.lines() {
311        let trimmed = line.trim();
312
313        // Skip empty lines and non-function content when not in a def
314        if trimmed.is_empty() && !in_def {
315            continue;
316        }
317
318        if trimmed.starts_with("def ") {
319            // If we were in a def, flush it first (shouldn't happen normally)
320            if in_def && !current_def.is_empty() {
321                result.push_str(&current_def);
322                result.push('\n');
323            }
324            // Start of a new function definition
325            in_def = true;
326            current_def = trimmed.to_string();
327            paren_depth = count_parens(trimmed);
328        } else if in_def {
329            // Continuation of function definition
330            current_def.push(' ');
331            current_def.push_str(trimmed);
332            paren_depth += count_parens(trimmed);
333        }
334
335        // Check if function definition is complete (parens balanced and has closing pattern)
336        if in_def
337            && paren_depth == 0
338            && (current_def.ends_with(": ...") || current_def.ends_with("):"))
339        {
340            result.push_str(&current_def);
341            result.push('\n');
342            current_def.clear();
343            in_def = false;
344        }
345    }
346
347    // Flush any remaining definition
348    if !current_def.is_empty() {
349        result.push_str(&current_def);
350        result.push('\n');
351    }
352
353    result
354}
355
356/// Count net paren depth change in a line
357fn count_parens(s: &str) -> i32 {
358    let mut depth = 0;
359    for ch in s.chars() {
360        match ch {
361            '(' => depth += 1,
362            ')' => depth -= 1,
363            _ => {}
364        }
365    }
366    depth
367}
368
369/// Parse a single function definition line
370fn parse_function_line(line: &str) -> Option<ParsedFunction> {
371    // Remove "def " prefix
372    let line = line.strip_prefix("def ")?;
373
374    // Find function name (up to open paren)
375    let paren_idx = line.find('(')?;
376    let name = line[..paren_idx].trim().to_string();
377
378    // Find the closing paren and return type
379    let close_paren_idx = line.rfind(')')?;
380    let params_str = &line[paren_idx + 1..close_paren_idx];
381
382    // Parse return type (after "->")
383    let return_type = if let Some(arrow_idx) = line.find("->") {
384        let ret_part = &line[arrow_idx + 2..];
385        // Remove trailing ": ..." if present
386        let ret_type = ret_part.trim().trim_end_matches(": ...");
387        ret_type.trim().to_string()
388    } else {
389        "None".to_string()
390    };
391
392    // Parse parameters
393    let params = parse_params(params_str);
394
395    Some(ParsedFunction {
396        name,
397        params,
398        return_type,
399    })
400}
401
402/// Parse parameter list from function signature
403fn parse_params(params_str: &str) -> Vec<(String, String)> {
404    let mut params = Vec::new();
405
406    if params_str.trim().is_empty() {
407        return params;
408    }
409
410    // Simple splitting (doesn't handle nested generics perfectly)
411    // For production, use a proper parser
412    let mut depth = 0;
413    let mut current = String::new();
414    let mut parts = Vec::new();
415
416    for ch in params_str.chars() {
417        match ch {
418            '[' | '(' => {
419                depth += 1;
420                current.push(ch);
421            }
422            ']' | ')' => {
423                depth -= 1;
424                current.push(ch);
425            }
426            ',' if depth == 0 => {
427                parts.push(current.trim().to_string());
428                current = String::new();
429            }
430            _ => current.push(ch),
431        }
432    }
433    if !current.trim().is_empty() {
434        parts.push(current.trim().to_string());
435    }
436
437    // Parse each parameter
438    for part in parts {
439        // Skip *args, **kwargs, self
440        if part.starts_with('*') || part == "self" {
441            continue;
442        }
443
444        // Handle: name: type or name: type = default
445        if let Some(colon_idx) = part.find(':') {
446            let param_name = part[..colon_idx].trim().to_string();
447            let type_part = &part[colon_idx + 1..];
448
449            // Remove default value if present
450            let param_type = if let Some(eq_idx) = type_part.find('=') {
451                type_part[..eq_idx].trim().to_string()
452            } else {
453                type_part.trim().to_string()
454            };
455
456            params.push((param_name, param_type));
457        } else {
458            // Untyped parameter
459            params.push((part.clone(), "Any".to_string()));
460        }
461    }
462
463    params
464}
465
466#[cfg(test)]
467mod tests {
468    use super::*;
469
470    /// TDD RED: Test ingestion of json.pyi stub
471    /// This is the primary acceptance test for the typeshed ingestion system
472    #[test]
473    fn test_ingest_json_stub() {
474        // Mock json.pyi content (simplified from actual typeshed)
475        let json_pyi = r#"
476from typing import Any, IO, Optional
477
478def loads(
479    s: str,
480    *,
481    cls: Optional[type] = None,
482    object_hook: Optional[Any] = None,
483    parse_float: Optional[Any] = None,
484    parse_int: Optional[Any] = None,
485    parse_constant: Optional[Any] = None,
486    object_pairs_hook: Optional[Any] = None,
487) -> Any: ...
488
489def dumps(
490    obj: Any,
491    *,
492    skipkeys: bool = False,
493    ensure_ascii: bool = True,
494    check_circular: bool = True,
495    allow_nan: bool = True,
496    cls: Optional[type] = None,
497    indent: Optional[int] = None,
498    separators: Optional[tuple[str, str]] = None,
499    default: Optional[Any] = None,
500    sort_keys: bool = False,
501) -> str: ...
502
503def load(
504    fp: IO[str],
505    *,
506    cls: Optional[type] = None,
507    object_hook: Optional[Any] = None,
508    parse_float: Optional[Any] = None,
509    parse_int: Optional[Any] = None,
510    parse_constant: Optional[Any] = None,
511    object_pairs_hook: Optional[Any] = None,
512) -> Any: ...
513
514def dump(
515    obj: Any,
516    fp: IO[str],
517    *,
518    skipkeys: bool = False,
519    ensure_ascii: bool = True,
520    check_circular: bool = True,
521    allow_nan: bool = True,
522    cls: Optional[type] = None,
523    indent: Optional[int] = None,
524    separators: Optional[tuple[str, str]] = None,
525    default: Optional[Any] = None,
526    sort_keys: bool = False,
527) -> None: ...
528"#;
529
530        let mapping = parse_pyi(json_pyi, "json");
531
532        // Verify crate mapping
533        assert_eq!(mapping.rust_path, "serde_json");
534        assert!(mapping.is_external);
535        assert_eq!(mapping.version, Some("1.0".to_string()));
536
537        // Verify function mappings
538        assert_eq!(mapping.item_map.get("loads"), Some(&"from_str".to_string()));
539        assert_eq!(
540            mapping.item_map.get("dumps"),
541            Some(&"to_string".to_string())
542        );
543        assert_eq!(
544            mapping.item_map.get("load"),
545            Some(&"from_reader".to_string())
546        );
547        assert_eq!(mapping.item_map.get("dump"), Some(&"to_writer".to_string()));
548    }
549
550    #[test]
551    fn test_parse_simple_function() {
552        let line = "def sqrt(x: float) -> float: ...";
553        let func = parse_function_line(line).unwrap();
554
555        assert_eq!(func.name, "sqrt");
556        assert_eq!(func.params.len(), 1);
557        assert_eq!(func.params[0], ("x".to_string(), "float".to_string()));
558        assert_eq!(func.return_type, "float");
559    }
560
561    #[test]
562    fn test_parse_function_with_defaults() {
563        let line = "def round(number: float, ndigits: int = None) -> float: ...";
564        let func = parse_function_line(line).unwrap();
565
566        assert_eq!(func.name, "round");
567        assert_eq!(func.params.len(), 2);
568        assert_eq!(func.params[0], ("number".to_string(), "float".to_string()));
569        assert_eq!(func.params[1], ("ndigits".to_string(), "int".to_string()));
570        assert_eq!(func.return_type, "float");
571    }
572
573    #[test]
574    fn test_parse_function_with_generic_return() {
575        let line = "def keys(self) -> list[str]: ...";
576        let func = parse_function_line(line).unwrap();
577
578        assert_eq!(func.name, "keys");
579        assert_eq!(func.return_type, "list[str]");
580    }
581
582    #[test]
583    fn test_extract_multiple_functions() {
584        let content = r#"
585def func_a(x: int) -> int: ...
586def func_b(s: str) -> str: ...
587def _private() -> None: ...
588"#;
589
590        let funcs = extract_function_signatures(content);
591
592        // Should have 2 functions (private one excluded)
593        assert_eq!(funcs.len(), 2);
594        assert_eq!(funcs[0].name, "func_a");
595        assert_eq!(funcs[1].name, "func_b");
596    }
597
598    #[test]
599    fn test_ingest_math_stub() {
600        let math_pyi = r#"
601def sqrt(x: float) -> float: ...
602def sin(x: float) -> float: ...
603def cos(x: float) -> float: ...
604def floor(x: float) -> int: ...
605def ceil(x: float) -> int: ...
606def pow(x: float, y: float) -> float: ...
607pi: float
608e: float
609"#;
610
611        let mapping = parse_pyi(math_pyi, "math");
612
613        assert_eq!(mapping.rust_path, "std::f64");
614        assert!(!mapping.is_external);
615
616        // Verify known mappings applied
617        assert_eq!(mapping.item_map.get("sqrt"), Some(&"sqrt".to_string()));
618        assert_eq!(mapping.item_map.get("sin"), Some(&"sin".to_string()));
619        assert_eq!(mapping.item_map.get("cos"), Some(&"cos".to_string()));
620        assert_eq!(mapping.item_map.get("pow"), Some(&"powf".to_string()));
621    }
622
623    #[test]
624    fn test_ingest_os_stub() {
625        let os_pyi = r#"
626def getcwd() -> str: ...
627def getenv(key: str, default: str = None) -> str: ...
628def listdir(path: str = ".") -> list[str]: ...
629"#;
630
631        let mapping = parse_pyi(os_pyi, "os");
632
633        assert_eq!(mapping.rust_path, "std");
634        assert!(!mapping.is_external);
635
636        assert_eq!(
637            mapping.item_map.get("getcwd"),
638            Some(&"env::current_dir".to_string())
639        );
640        assert_eq!(
641            mapping.item_map.get("getenv"),
642            Some(&"env::var".to_string())
643        );
644        assert_eq!(
645            mapping.item_map.get("listdir"),
646            Some(&"fs::read_dir".to_string())
647        );
648    }
649
650    #[test]
651    fn test_unknown_module_fallback() {
652        let unknown_pyi = r#"
653def custom_func(x: int) -> int: ...
654"#;
655
656        let mapping = parse_pyi(unknown_pyi, "unknown_module");
657
658        // Should fallback to module name as crate
659        assert_eq!(mapping.rust_path, "unknown_module");
660        assert!(mapping.is_external);
661
662        // Unknown function should map to itself
663        assert_eq!(
664            mapping.item_map.get("custom_func"),
665            Some(&"custom_func".to_string())
666        );
667    }
668
669    // ============================================================
670    // DEPYLER-COVERAGE-95: Additional comprehensive tests
671    // ============================================================
672
673    #[test]
674    fn test_type_mapping_config_default() {
675        let config = TypeMappingConfig::default();
676
677        // Primitive types
678        assert_eq!(config.type_map.get("str"), Some(&"String".to_string()));
679        assert_eq!(config.type_map.get("int"), Some(&"i64".to_string()));
680        assert_eq!(config.type_map.get("float"), Some(&"f64".to_string()));
681        assert_eq!(config.type_map.get("bool"), Some(&"bool".to_string()));
682        assert_eq!(config.type_map.get("None"), Some(&"()".to_string()));
683        assert_eq!(config.type_map.get("bytes"), Some(&"Vec<u8>".to_string()));
684    }
685
686    #[test]
687    fn test_type_mapping_config_generic_types() {
688        let config = TypeMappingConfig::default();
689
690        assert_eq!(
691            config.type_map.get("Any"),
692            Some(&"serde_json::Value".to_string())
693        );
694        assert_eq!(
695            config.type_map.get("object"),
696            Some(&"serde_json::Value".to_string())
697        );
698    }
699
700    #[test]
701    fn test_type_mapping_config_containers() {
702        let config = TypeMappingConfig::default();
703
704        assert_eq!(config.type_map.get("list"), Some(&"Vec".to_string()));
705        assert_eq!(config.type_map.get("dict"), Some(&"HashMap".to_string()));
706        assert_eq!(config.type_map.get("set"), Some(&"HashSet".to_string()));
707        assert_eq!(config.type_map.get("tuple"), Some(&"tuple".to_string()));
708        assert_eq!(config.type_map.get("Optional"), Some(&"Option".to_string()));
709    }
710
711    #[test]
712    fn test_type_mapping_config_capitalized_containers() {
713        let config = TypeMappingConfig::default();
714
715        assert_eq!(config.type_map.get("List"), Some(&"Vec".to_string()));
716        assert_eq!(config.type_map.get("Dict"), Some(&"HashMap".to_string()));
717        assert_eq!(config.type_map.get("Set"), Some(&"HashSet".to_string()));
718        assert_eq!(config.type_map.get("Tuple"), Some(&"tuple".to_string()));
719    }
720
721    #[test]
722    fn test_crate_mapping_config_default() {
723        let config = CrateMappingConfig::default();
724
725        // Check stdlib modules
726        let (path, is_ext, _) = config.crate_map.get("os").unwrap();
727        assert_eq!(path, "std");
728        assert!(!is_ext);
729    }
730
731    #[test]
732    fn test_crate_mapping_config_external_crates() {
733        let config = CrateMappingConfig::default();
734
735        let (path, is_ext, version) = config.crate_map.get("json").unwrap();
736        assert_eq!(path, "serde_json");
737        assert!(is_ext);
738        assert_eq!(version, &Some("1.0".to_string()));
739    }
740
741    #[test]
742    fn test_crate_mapping_config_regex() {
743        let config = CrateMappingConfig::default();
744
745        let (path, is_ext, version) = config.crate_map.get("re").unwrap();
746        assert_eq!(path, "regex");
747        assert!(is_ext);
748        assert_eq!(version, &Some("1.10".to_string()));
749    }
750
751    #[test]
752    fn test_crate_mapping_config_random() {
753        let config = CrateMappingConfig::default();
754
755        let (path, is_ext, version) = config.crate_map.get("random").unwrap();
756        assert_eq!(path, "rand");
757        assert!(is_ext);
758        assert_eq!(version, &Some("0.8".to_string()));
759    }
760
761    #[test]
762    fn test_crate_mapping_config_datetime() {
763        let config = CrateMappingConfig::default();
764
765        let (path, is_ext, version) = config.crate_map.get("datetime").unwrap();
766        assert_eq!(path, "chrono");
767        assert!(is_ext);
768        assert_eq!(version, &Some("0.4".to_string()));
769    }
770
771    #[test]
772    fn test_crate_mapping_config_itertools() {
773        let config = CrateMappingConfig::default();
774
775        let (path, is_ext, version) = config.crate_map.get("itertools").unwrap();
776        assert_eq!(path, "itertools");
777        assert!(is_ext);
778        assert_eq!(version, &Some("0.12".to_string()));
779    }
780
781    #[test]
782    fn test_crate_mapping_config_hashlib() {
783        let config = CrateMappingConfig::default();
784
785        let (path, is_ext, version) = config.crate_map.get("hashlib").unwrap();
786        assert_eq!(path, "sha2");
787        assert!(is_ext);
788        assert_eq!(version, &Some("0.10".to_string()));
789    }
790
791    #[test]
792    fn test_crate_mapping_config_base64() {
793        let config = CrateMappingConfig::default();
794
795        let (path, is_ext, version) = config.crate_map.get("base64").unwrap();
796        assert_eq!(path, "base64");
797        assert!(is_ext);
798        assert_eq!(version, &Some("0.21".to_string()));
799    }
800
801    #[test]
802    fn test_crate_mapping_config_csv() {
803        let config = CrateMappingConfig::default();
804
805        let (path, is_ext, version) = config.crate_map.get("csv").unwrap();
806        assert_eq!(path, "csv");
807        assert!(is_ext);
808        assert_eq!(version, &Some("1.3".to_string()));
809    }
810
811    #[test]
812    fn test_crate_mapping_config_pathlib() {
813        let config = CrateMappingConfig::default();
814
815        let (path, is_ext, _) = config.crate_map.get("pathlib").unwrap();
816        assert_eq!(path, "std::path");
817        assert!(!is_ext);
818    }
819
820    #[test]
821    fn test_crate_mapping_config_tempfile() {
822        let config = CrateMappingConfig::default();
823
824        let (path, is_ext, version) = config.crate_map.get("tempfile").unwrap();
825        assert_eq!(path, "tempfile");
826        assert!(is_ext);
827        assert_eq!(version, &Some("3.0".to_string()));
828    }
829
830    #[test]
831    fn test_crate_mapping_config_sys() {
832        let config = CrateMappingConfig::default();
833
834        let (path, is_ext, _) = config.crate_map.get("sys").unwrap();
835        assert_eq!(path, "std");
836        assert!(!is_ext);
837    }
838
839    #[test]
840    fn test_crate_mapping_config_math() {
841        let config = CrateMappingConfig::default();
842
843        let (path, is_ext, _) = config.crate_map.get("math").unwrap();
844        assert_eq!(path, "std::f64");
845        assert!(!is_ext);
846    }
847
848    #[test]
849    fn test_crate_mapping_config_collections() {
850        let config = CrateMappingConfig::default();
851
852        let (path, is_ext, _) = config.crate_map.get("collections").unwrap();
853        assert_eq!(path, "std::collections");
854        assert!(!is_ext);
855    }
856
857    #[test]
858    fn test_function_mapping_config_json() {
859        let config = FunctionMappingConfig::default();
860
861        assert_eq!(
862            config
863                .func_map
864                .get(&("json".to_string(), "loads".to_string())),
865            Some(&"from_str".to_string())
866        );
867        assert_eq!(
868            config
869                .func_map
870                .get(&("json".to_string(), "dumps".to_string())),
871            Some(&"to_string".to_string())
872        );
873        assert_eq!(
874            config
875                .func_map
876                .get(&("json".to_string(), "load".to_string())),
877            Some(&"from_reader".to_string())
878        );
879        assert_eq!(
880            config
881                .func_map
882                .get(&("json".to_string(), "dump".to_string())),
883            Some(&"to_writer".to_string())
884        );
885    }
886
887    #[test]
888    fn test_function_mapping_config_os() {
889        let config = FunctionMappingConfig::default();
890
891        assert_eq!(
892            config
893                .func_map
894                .get(&("os".to_string(), "getcwd".to_string())),
895            Some(&"env::current_dir".to_string())
896        );
897        assert_eq!(
898            config
899                .func_map
900                .get(&("os".to_string(), "getenv".to_string())),
901            Some(&"env::var".to_string())
902        );
903        assert_eq!(
904            config
905                .func_map
906                .get(&("os".to_string(), "listdir".to_string())),
907            Some(&"fs::read_dir".to_string())
908        );
909    }
910
911    #[test]
912    fn test_function_mapping_config_math() {
913        let config = FunctionMappingConfig::default();
914
915        assert_eq!(
916            config
917                .func_map
918                .get(&("math".to_string(), "sqrt".to_string())),
919            Some(&"sqrt".to_string())
920        );
921        assert_eq!(
922            config
923                .func_map
924                .get(&("math".to_string(), "sin".to_string())),
925            Some(&"sin".to_string())
926        );
927        assert_eq!(
928            config
929                .func_map
930                .get(&("math".to_string(), "cos".to_string())),
931            Some(&"cos".to_string())
932        );
933        assert_eq!(
934            config
935                .func_map
936                .get(&("math".to_string(), "floor".to_string())),
937            Some(&"floor".to_string())
938        );
939        assert_eq!(
940            config
941                .func_map
942                .get(&("math".to_string(), "ceil".to_string())),
943            Some(&"ceil".to_string())
944        );
945        assert_eq!(
946            config
947                .func_map
948                .get(&("math".to_string(), "abs".to_string())),
949            Some(&"abs".to_string())
950        );
951        assert_eq!(
952            config
953                .func_map
954                .get(&("math".to_string(), "pow".to_string())),
955            Some(&"powf".to_string())
956        );
957    }
958
959    #[test]
960    fn test_function_mapping_config_re() {
961        let config = FunctionMappingConfig::default();
962
963        assert_eq!(
964            config
965                .func_map
966                .get(&("re".to_string(), "compile".to_string())),
967            Some(&"Regex::new".to_string())
968        );
969        assert_eq!(
970            config
971                .func_map
972                .get(&("re".to_string(), "match".to_string())),
973            Some(&"Regex::is_match".to_string())
974        );
975        assert_eq!(
976            config
977                .func_map
978                .get(&("re".to_string(), "search".to_string())),
979            Some(&"Regex::find".to_string())
980        );
981        assert_eq!(
982            config
983                .func_map
984                .get(&("re".to_string(), "findall".to_string())),
985            Some(&"Regex::find_iter".to_string())
986        );
987        assert_eq!(
988            config.func_map.get(&("re".to_string(), "sub".to_string())),
989            Some(&"Regex::replace_all".to_string())
990        );
991    }
992
993    #[test]
994    fn test_count_parens_balanced() {
995        assert_eq!(count_parens("()"), 0);
996        assert_eq!(count_parens("(())"), 0);
997        assert_eq!(count_parens("((()))"), 0);
998    }
999
1000    #[test]
1001    fn test_count_parens_unbalanced() {
1002        assert_eq!(count_parens("("), 1);
1003        assert_eq!(count_parens("(("), 2);
1004        assert_eq!(count_parens(")"), -1);
1005        assert_eq!(count_parens("))"), -2);
1006    }
1007
1008    #[test]
1009    fn test_count_parens_with_content() {
1010        assert_eq!(count_parens("def foo(x: int,"), 1);
1011        assert_eq!(count_parens("y: str) -> int:"), -1);
1012    }
1013
1014    #[test]
1015    fn test_parse_params_empty() {
1016        let params = parse_params("");
1017        assert!(params.is_empty());
1018    }
1019
1020    #[test]
1021    fn test_parse_params_single() {
1022        let params = parse_params("x: int");
1023        assert_eq!(params.len(), 1);
1024        assert_eq!(params[0], ("x".to_string(), "int".to_string()));
1025    }
1026
1027    #[test]
1028    fn test_parse_params_multiple() {
1029        let params = parse_params("x: int, y: str, z: float");
1030        assert_eq!(params.len(), 3);
1031        assert_eq!(params[0], ("x".to_string(), "int".to_string()));
1032        assert_eq!(params[1], ("y".to_string(), "str".to_string()));
1033        assert_eq!(params[2], ("z".to_string(), "float".to_string()));
1034    }
1035
1036    #[test]
1037    fn test_parse_params_with_defaults() {
1038        let params = parse_params("x: int = 0, y: str = \"\"");
1039        assert_eq!(params.len(), 2);
1040        assert_eq!(params[0], ("x".to_string(), "int".to_string()));
1041        assert_eq!(params[1], ("y".to_string(), "str".to_string()));
1042    }
1043
1044    #[test]
1045    fn test_parse_params_skip_self() {
1046        let params = parse_params("self, x: int");
1047        assert_eq!(params.len(), 1);
1048        assert_eq!(params[0], ("x".to_string(), "int".to_string()));
1049    }
1050
1051    #[test]
1052    fn test_parse_params_skip_args_kwargs() {
1053        let params = parse_params("x: int, *args, **kwargs");
1054        assert_eq!(params.len(), 1);
1055        assert_eq!(params[0], ("x".to_string(), "int".to_string()));
1056    }
1057
1058    #[test]
1059    fn test_parse_params_untyped() {
1060        let params = parse_params("x");
1061        assert_eq!(params.len(), 1);
1062        assert_eq!(params[0], ("x".to_string(), "Any".to_string()));
1063    }
1064
1065    #[test]
1066    fn test_parse_params_generic() {
1067        let params = parse_params("x: list[int], y: dict[str, int]");
1068        assert_eq!(params.len(), 2);
1069        assert_eq!(params[0], ("x".to_string(), "list[int]".to_string()));
1070        assert_eq!(params[1], ("y".to_string(), "dict[str, int]".to_string()));
1071    }
1072
1073    #[test]
1074    fn test_parse_function_line_no_return_type() {
1075        let func = parse_function_line("def foo(x: int):").unwrap();
1076        assert_eq!(func.name, "foo");
1077        assert_eq!(func.return_type, "None");
1078    }
1079
1080    #[test]
1081    fn test_parse_function_line_no_params() {
1082        let func = parse_function_line("def foo() -> int: ...").unwrap();
1083        assert_eq!(func.name, "foo");
1084        assert!(func.params.is_empty());
1085        assert_eq!(func.return_type, "int");
1086    }
1087
1088    #[test]
1089    fn test_parse_function_line_invalid() {
1090        assert!(parse_function_line("not a function").is_none());
1091        assert!(parse_function_line("class Foo:").is_none());
1092    }
1093
1094    #[test]
1095    fn test_normalize_multiline_single_line() {
1096        let content = "def foo(x: int) -> int: ...";
1097        let normalized = normalize_multiline_functions(content);
1098        assert!(normalized.contains("def foo(x: int) -> int: ..."));
1099    }
1100
1101    #[test]
1102    fn test_normalize_multiline_actual_multiline() {
1103        let content = r#"def foo(
1104    x: int,
1105    y: str
1106) -> int: ..."#;
1107        let normalized = normalize_multiline_functions(content);
1108        assert!(normalized.contains("def foo("));
1109        assert!(normalized.contains("x: int,"));
1110        // Should be joined into a single logical line
1111    }
1112
1113    #[test]
1114    fn test_extract_function_signatures_empty() {
1115        let content = "";
1116        let funcs = extract_function_signatures(content);
1117        assert!(funcs.is_empty());
1118    }
1119
1120    #[test]
1121    fn test_extract_function_signatures_skip_private() {
1122        let content = r#"
1123def public_func() -> None: ...
1124def _private_func() -> None: ...
1125def __dunder_func() -> None: ...
1126"#;
1127        let funcs = extract_function_signatures(content);
1128        // Should only have public_func
1129        assert_eq!(funcs.len(), 1);
1130        assert_eq!(funcs[0].name, "public_func");
1131    }
1132
1133    #[test]
1134    fn test_extract_function_signatures_include_init() {
1135        let content = r#"
1136def __init__(self, x: int) -> None: ...
1137"#;
1138        let funcs = extract_function_signatures(content);
1139        assert_eq!(funcs.len(), 1);
1140        assert_eq!(funcs[0].name, "__init__");
1141    }
1142
1143    #[test]
1144    fn test_parsed_function_struct() {
1145        let func = ParsedFunction {
1146            name: "test".to_string(),
1147            params: vec![("x".to_string(), "int".to_string())],
1148            return_type: "str".to_string(),
1149        };
1150
1151        assert_eq!(func.name, "test");
1152        assert_eq!(func.params.len(), 1);
1153        assert_eq!(func.return_type, "str");
1154    }
1155
1156    #[test]
1157    fn test_parsed_function_equality() {
1158        let func1 = ParsedFunction {
1159            name: "test".to_string(),
1160            params: vec![],
1161            return_type: "int".to_string(),
1162        };
1163        let func2 = ParsedFunction {
1164            name: "test".to_string(),
1165            params: vec![],
1166            return_type: "int".to_string(),
1167        };
1168        assert_eq!(func1, func2);
1169    }
1170
1171    #[test]
1172    fn test_parse_pyi_with_config_custom_crate() {
1173        let content = "def custom_fn() -> None: ...";
1174        let type_config = TypeMappingConfig::default();
1175        let mut crate_config = CrateMappingConfig::default();
1176        crate_config.crate_map.insert(
1177            "custom".to_string(),
1178            ("my_crate".to_string(), true, Some("2.0".to_string())),
1179        );
1180        let func_config = FunctionMappingConfig::default();
1181
1182        let mapping =
1183            parse_pyi_with_config(content, "custom", &type_config, &crate_config, &func_config);
1184
1185        assert_eq!(mapping.rust_path, "my_crate");
1186        assert!(mapping.is_external);
1187        assert_eq!(mapping.version, Some("2.0".to_string()));
1188    }
1189
1190    #[test]
1191    fn test_ingest_re_stub() {
1192        let re_pyi = r#"
1193def compile(pattern: str) -> Pattern: ...
1194def match(pattern: str, string: str) -> Match: ...
1195def search(pattern: str, string: str) -> Match: ...
1196def findall(pattern: str, string: str) -> list[str]: ...
1197def sub(pattern: str, repl: str, string: str) -> str: ...
1198"#;
1199
1200        let mapping = parse_pyi(re_pyi, "re");
1201
1202        assert_eq!(mapping.rust_path, "regex");
1203        assert!(mapping.is_external);
1204        assert_eq!(mapping.version, Some("1.10".to_string()));
1205
1206        assert_eq!(
1207            mapping.item_map.get("compile"),
1208            Some(&"Regex::new".to_string())
1209        );
1210        assert_eq!(
1211            mapping.item_map.get("match"),
1212            Some(&"Regex::is_match".to_string())
1213        );
1214        assert_eq!(
1215            mapping.item_map.get("search"),
1216            Some(&"Regex::find".to_string())
1217        );
1218        assert_eq!(
1219            mapping.item_map.get("findall"),
1220            Some(&"Regex::find_iter".to_string())
1221        );
1222        assert_eq!(
1223            mapping.item_map.get("sub"),
1224            Some(&"Regex::replace_all".to_string())
1225        );
1226    }
1227
1228    #[test]
1229    fn test_module_mapping_constructor_patterns() {
1230        let mapping = parse_pyi("def foo() -> None: ...", "test");
1231        // Constructor patterns should be empty by default
1232        assert!(mapping.constructor_patterns.is_empty());
1233    }
1234
1235    #[test]
1236    fn test_type_mapping_config_clone() {
1237        let config = TypeMappingConfig::default();
1238        let cloned = config.clone();
1239        assert_eq!(config.type_map.len(), cloned.type_map.len());
1240    }
1241
1242    #[test]
1243    fn test_crate_mapping_config_clone() {
1244        let config = CrateMappingConfig::default();
1245        let cloned = config.clone();
1246        assert_eq!(config.crate_map.len(), cloned.crate_map.len());
1247    }
1248
1249    #[test]
1250    fn test_function_mapping_config_clone() {
1251        let config = FunctionMappingConfig::default();
1252        let cloned = config.clone();
1253        assert_eq!(config.func_map.len(), cloned.func_map.len());
1254    }
1255
1256    #[test]
1257    fn test_parsed_function_clone() {
1258        let func = ParsedFunction {
1259            name: "test".to_string(),
1260            params: vec![("x".to_string(), "int".to_string())],
1261            return_type: "str".to_string(),
1262        };
1263        let cloned = func.clone();
1264        assert_eq!(func, cloned);
1265    }
1266}