py_import_helper/utils/
parsing.rs

1//! Import statement parsing utilities
2//!
3//! This module provides functions for parsing Python import statements
4//! and extracting relevant information such as package names and imported items.
5
6use crate::types::{ImportCategory, ImportStatement, ImportType};
7
8/// Extract the package name from an import statement
9///
10/// # Examples
11///
12/// ```
13/// use py_import_helper::utils::parsing::extract_package;
14///
15/// assert_eq!(extract_package("from typing import Any"), "typing");
16/// assert_eq!(extract_package("import json"), "json");
17/// assert_eq!(extract_package("from collections.abc import Mapping"), "collections.abc");
18/// ```
19#[must_use]
20pub fn extract_package(import_statement: &str) -> String {
21    if let Some(from_part) = import_statement.strip_prefix("from ") {
22        // Use split_once for Unicode-safe splitting
23        if let Some((package, _)) = from_part.split_once(" import ") {
24            let pkg = package.trim();
25            // Validate non-empty package
26            if pkg.is_empty() {
27                return import_statement.to_string();
28            }
29            return pkg.to_string();
30        }
31    } else if let Some(import_part) = import_statement.strip_prefix("import ") {
32        // For direct imports, return the full module path
33        let pkg = import_part
34            .split_whitespace()
35            .next()
36            .unwrap_or(import_part)
37            .trim();
38        // Validate non-empty package
39        if pkg.is_empty() {
40            return import_statement.to_string();
41        }
42        return pkg.to_string();
43    }
44
45    import_statement.to_string()
46}
47
48/// Extract imported items from an import statement
49///
50/// Items are automatically sorted with `ALL_CAPS` names first, then mixed case alphabetically.
51///
52/// # Examples
53///
54/// ```
55/// use py_import_helper::utils::parsing::extract_items;
56///
57/// let items = extract_items("from typing import Any, Optional");
58/// assert_eq!(items, vec!["Any", "Optional"]);
59///
60/// let items = extract_items("from typing import TYPE_CHECKING, Any");
61/// assert_eq!(items, vec!["TYPE_CHECKING", "Any"]);
62/// ```
63#[must_use]
64pub fn extract_items(import_statement: &str) -> Vec<String> {
65    if let Some(from_part) = import_statement.strip_prefix("from ") {
66        // Use split_once for Unicode-safe splitting
67        if let Some((_, items_part)) = from_part.split_once(" import ") {
68            // Unicode-safe character replacement in single pass
69            let cleaned: String = items_part
70                .chars()
71                .map(|c| match c {
72                    '(' | ')' | ',' => ' ',
73                    _ => c,
74                })
75                .collect();
76            let mut items: Vec<String> = cleaned
77                .split_whitespace()
78                .map(|s| s.trim().to_string())
79                .filter(|s| !s.is_empty())
80                .collect();
81
82            // Sort items with ALL_CAPS first, then mixed case alphabetically
83            items.sort_by(|a, b| custom_import_sort(a, b));
84            return items;
85        }
86    } else if let Some(import_part) = import_statement.strip_prefix("import ") {
87        // For direct imports, the "item" is the module itself
88        return vec![import_part.trim().to_string()];
89    }
90    Vec::new()
91}
92
93/// Custom sorting for import items: `ALL_CAPS` first (alphabetically), then mixed case (alphabetically)
94///
95/// This follows the convention used by isort and Black formatters.
96/// Wildcard imports (*) always come last.
97#[must_use]
98pub fn custom_import_sort(a: &str, b: &str) -> std::cmp::Ordering {
99    // Wildcard imports always come last
100    match (a, b) {
101        ("*", "*") => return std::cmp::Ordering::Equal,
102        ("*", _) => return std::cmp::Ordering::Greater,
103        (_, "*") => return std::cmp::Ordering::Less,
104        _ => {}
105    }
106
107    // Check if names are ALL_CAPS by filtering to only alphabetic characters
108    // This correctly handles names like "TYPE_CHECKING" and "_private"
109    let a_is_all_caps = !a.is_empty()
110        && a.chars()
111            .filter(|c| c.is_alphabetic())
112            .all(char::is_uppercase);
113    let b_is_all_caps = !b.is_empty()
114        && b.chars()
115            .filter(|c| c.is_alphabetic())
116            .all(char::is_uppercase);
117
118    match (a_is_all_caps, b_is_all_caps) {
119        // Both are ALL_CAPS or both are mixed case - sort alphabetically (case-insensitive)
120        (true, true) | (false, false) => {
121            // Case-insensitive comparison to match isort/ruff behavior
122            let a_lower = a.to_lowercase();
123            let b_lower = b.to_lowercase();
124            match a_lower.cmp(&b_lower) {
125                std::cmp::Ordering::Equal => a.cmp(b), // If equal case-insensitively, use case-sensitive as tiebreaker
126                other => other,
127            }
128        }
129        // a is ALL_CAPS, b is mixed case - a comes first
130        (true, false) => std::cmp::Ordering::Less,
131        // a is mixed case, b is ALL_CAPS - b comes first
132        (false, true) => std::cmp::Ordering::Greater,
133    }
134}
135
136/// Parse an import statement and categorize it
137#[must_use]
138pub fn parse_import(import_statement: &str, category: ImportCategory) -> Option<ImportStatement> {
139    let trimmed = import_statement.trim();
140    if trimmed.is_empty() {
141        return None;
142    }
143
144    let import_type = if trimmed.starts_with("from ") {
145        ImportType::From
146    } else {
147        ImportType::Direct
148    };
149
150    let package = extract_package(trimmed);
151    let items = extract_items(trimmed);
152    let is_multiline = trimmed.contains('(') || trimmed.contains(')');
153
154    // Reconstruct the statement with sorted items for from imports
155    let statement = if import_type == ImportType::From && !items.is_empty() {
156        format!("from {} import {}", package, items.join(", "))
157    } else {
158        trimmed.to_string()
159    };
160
161    Some(ImportStatement {
162        statement,
163        category,
164        import_type,
165        package,
166        items,
167        is_multiline,
168    })
169}
170
171#[cfg(test)]
172mod tests {
173    use super::*;
174
175    #[test]
176    fn test_extract_package() {
177        assert_eq!(extract_package("from typing import Any"), "typing");
178        assert_eq!(extract_package("import json"), "json");
179        assert_eq!(
180            extract_package("from collections.abc import Mapping"),
181            "collections.abc"
182        );
183    }
184
185    #[test]
186    fn test_extract_items() {
187        let items = extract_items("from typing import Any, Optional");
188        assert_eq!(items, vec!["Any", "Optional"]);
189
190        let items = extract_items("from typing import TYPE_CHECKING, Any");
191        assert_eq!(items, vec!["TYPE_CHECKING", "Any"]);
192    }
193
194    #[test]
195    fn test_custom_import_sort() {
196        let mut items = vec!["Any", "TYPE_CHECKING", "Optional", "LITERAL"];
197        items.sort_by(|a, b| custom_import_sort(a, b));
198        assert_eq!(items, vec!["LITERAL", "TYPE_CHECKING", "Any", "Optional"]);
199    }
200}