py_import_helper/utils/
parsing.rs

1//! Import statement parsing utilities
2//!
3//! This module provides functions for parsing Python import statements
4//! and extracting relevant information such as package names and imported items.
5
6use crate::types::{ImportCategory, ImportStatement, ImportType};
7
8/// Extract the package name from an import statement
9///
10/// # Examples
11///
12/// ```
13/// use py_import_helper::utils::parsing::extract_package;
14///
15/// assert_eq!(extract_package("from typing import Any"), "typing");
16/// assert_eq!(extract_package("import json"), "json");
17/// assert_eq!(extract_package("from collections.abc import Mapping"), "collections.abc");
18/// ```
19#[must_use]
20pub fn extract_package(import_statement: &str) -> String {
21    if let Some(from_part) = import_statement.strip_prefix("from ") {
22        // Use split_once for Unicode-safe splitting
23        if let Some((package, _)) = from_part.split_once(" import ") {
24            let pkg = package.trim();
25            // Validate non-empty package
26            if pkg.is_empty() {
27                return import_statement.to_string();
28            }
29            return pkg.to_string();
30        }
31    } else if let Some(import_part) = import_statement.strip_prefix("import ") {
32        // For direct imports, return the full module path
33        let pkg = import_part
34            .split_whitespace()
35            .next()
36            .unwrap_or(import_part)
37            .trim();
38        // Validate non-empty package
39        if pkg.is_empty() {
40            return import_statement.to_string();
41        }
42        return pkg.to_string();
43    }
44
45    import_statement.to_string()
46}
47
48/// Extract imported items from an import statement
49///
50/// Items are automatically sorted with `ALL_CAPS` names first, then mixed case alphabetically.
51///
52/// # Examples
53///
54/// ```
55/// use py_import_helper::utils::parsing::extract_items;
56///
57/// let items = extract_items("from typing import Any, Optional");
58/// assert_eq!(items, vec!["Any", "Optional"]);
59///
60/// let items = extract_items("from typing import TYPE_CHECKING, Any");
61/// assert_eq!(items, vec!["TYPE_CHECKING", "Any"]);
62/// ```
63#[must_use]
64pub fn extract_items(import_statement: &str) -> Vec<String> {
65    if let Some(from_part) = import_statement.strip_prefix("from ") {
66        // Use split_once for Unicode-safe splitting
67        if let Some((_, items_part)) = from_part.split_once(" import ") {
68            // Unicode-safe character replacement in single pass
69            let cleaned: String = items_part
70                .chars()
71                .map(|c| match c {
72                    '(' | ')' | ',' => ' ',
73                    _ => c,
74                })
75                .collect();
76            let mut items: Vec<String> = cleaned
77                .split_whitespace()
78                .map(|s| s.trim().to_string())
79                .filter(|s| !s.is_empty())
80                .collect();
81
82            // Sort items with ALL_CAPS first, then mixed case alphabetically
83            items.sort_by(|a, b| custom_import_sort(a, b));
84            return items;
85        }
86    } else if let Some(import_part) = import_statement.strip_prefix("import ") {
87        // For direct imports, the "item" is the module itself
88        return vec![import_part.trim().to_string()];
89    }
90    Vec::new()
91}
92
93/// Custom sorting for import items: `ALL_CAPS` first (alphabetically), then mixed case (alphabetically)
94///
95/// This follows the convention used by isort and Black formatters.
96/// Wildcard imports (*) always come last.
97#[must_use]
98pub fn custom_import_sort(a: &str, b: &str) -> std::cmp::Ordering {
99    // Wildcard imports always come last
100    match (a, b) {
101        ("*", "*") => return std::cmp::Ordering::Equal,
102        ("*", _) => return std::cmp::Ordering::Greater,
103        (_, "*") => return std::cmp::Ordering::Less,
104        _ => {}
105    }
106
107    // Check if names are ALL_CAPS by filtering to only alphabetic characters
108    // This correctly handles names like "TYPE_CHECKING" and "_private"
109    let a_is_all_caps = !a.is_empty()
110        && a.chars()
111            .filter(|c| c.is_alphabetic())
112            .all(char::is_uppercase);
113    let b_is_all_caps = !b.is_empty()
114        && b.chars()
115            .filter(|c| c.is_alphabetic())
116            .all(char::is_uppercase);
117
118    match (a_is_all_caps, b_is_all_caps) {
119        // Both are ALL_CAPS or both are mixed case - sort alphabetically
120        (true, true) | (false, false) => a.cmp(b),
121        // a is ALL_CAPS, b is mixed case - a comes first
122        (true, false) => std::cmp::Ordering::Less,
123        // a is mixed case, b is ALL_CAPS - b comes first
124        (false, true) => std::cmp::Ordering::Greater,
125    }
126}
127
128/// Parse an import statement and categorize it
129#[must_use]
130pub fn parse_import(import_statement: &str, category: ImportCategory) -> Option<ImportStatement> {
131    let trimmed = import_statement.trim();
132    if trimmed.is_empty() {
133        return None;
134    }
135
136    let import_type = if trimmed.starts_with("from ") {
137        ImportType::From
138    } else {
139        ImportType::Direct
140    };
141
142    let package = extract_package(trimmed);
143    let items = extract_items(trimmed);
144    let is_multiline = trimmed.contains('(') || trimmed.contains(')');
145
146    // Reconstruct the statement with sorted items for from imports
147    let statement = if import_type == ImportType::From && !items.is_empty() {
148        format!("from {} import {}", package, items.join(", "))
149    } else {
150        trimmed.to_string()
151    };
152
153    Some(ImportStatement {
154        statement,
155        category,
156        import_type,
157        package,
158        items,
159        is_multiline,
160    })
161}
162
163#[cfg(test)]
164mod tests {
165    use super::*;
166
167    #[test]
168    fn test_extract_package() {
169        assert_eq!(extract_package("from typing import Any"), "typing");
170        assert_eq!(extract_package("import json"), "json");
171        assert_eq!(
172            extract_package("from collections.abc import Mapping"),
173            "collections.abc"
174        );
175    }
176
177    #[test]
178    fn test_extract_items() {
179        let items = extract_items("from typing import Any, Optional");
180        assert_eq!(items, vec!["Any", "Optional"]);
181
182        let items = extract_items("from typing import TYPE_CHECKING, Any");
183        assert_eq!(items, vec!["TYPE_CHECKING", "Any"]);
184    }
185
186    #[test]
187    fn test_custom_import_sort() {
188        let mut items = vec!["Any", "TYPE_CHECKING", "Optional", "LITERAL"];
189        items.sort_by(|a, b| custom_import_sort(a, b));
190        assert_eq!(items, vec!["LITERAL", "TYPE_CHECKING", "Any", "Optional"]);
191    }
192}