Skip to main content

perl_module/reference/
mod.rs

1//! Cursor-aware Perl module reference extraction.
2//!
3//! Given source text and a cursor offset, identify module references used
4//! by `use`/`require` statements.
5
6use crate::name::normalize_package_separator;
7use crate::token_parser::parse_module_token;
8use perl_parser_core::text_line::{is_keyword_boundary, line_bounds_at, skip_ascii_whitespace};
9
10/// Statement kind for a parsed module reference.
11#[derive(Debug, Clone, Copy, PartialEq, Eq)]
12pub enum ModuleReferenceKind {
13    /// `use Module::Name;`
14    Use,
15    /// `require Module::Name;`
16    Require,
17}
18
19/// Module reference found at a cursor location.
20#[derive(Debug, Clone, Copy, PartialEq, Eq)]
21pub struct ModuleReference<'a> {
22    /// Statement kind (`use` or `require`).
23    pub kind: ModuleReferenceKind,
24    /// Raw module token text as written in source.
25    pub module_name: &'a str,
26    /// Inclusive byte start offset of `module_name` in the input text.
27    pub module_start: usize,
28    /// Exclusive byte end offset of `module_name` in the input text.
29    pub module_end: usize,
30}
31
32impl ModuleReference<'_> {
33    /// Return the module name normalized to canonical `::` separators.
34    #[must_use]
35    pub fn canonical_module_name(&self) -> String {
36        normalize_package_separator(self.module_name).into_owned()
37    }
38}
39
40/// Find a `use`/`require` module reference at `cursor_pos`.
41#[must_use]
42pub fn find_module_reference(text: &str, cursor_pos: usize) -> Option<ModuleReference<'_>> {
43    if text.is_empty() || cursor_pos > text.len() {
44        return None;
45    }
46
47    let (line_start, line_end) = line_bounds_at(text, cursor_pos);
48    let line = &text[line_start..line_end];
49    let cursor_in_line = cursor_pos.saturating_sub(line_start);
50
51    find_in_line(line, line_start, cursor_in_line)
52}
53
54/// Find a module reference inside `use parent`/`use base` argument lists.
55///
56/// When the cursor is on a quoted module name inside `use parent 'Foo::Bar'`
57/// or `use base qw(Foo::Bar)`, this returns the referenced module name.
58/// For direct `use`/`require` statements, delegates to [`find_module_reference`].
59#[must_use]
60pub fn find_module_reference_extended(
61    text: &str,
62    cursor_pos: usize,
63) -> Option<ModuleReference<'_>> {
64    if let Some(reference) = find_module_reference(text, cursor_pos) {
65        return Some(reference);
66    }
67
68    if text.is_empty() || cursor_pos > text.len() {
69        return None;
70    }
71
72    let (line_start, line_end) = line_bounds_at(text, cursor_pos);
73    let line = &text[line_start..line_end];
74    let cursor_in_line = cursor_pos.saturating_sub(line_start);
75
76    find_parent_base_module_in_line(line, line_start, cursor_in_line)
77}
78
79/// Extract a module reference at `cursor_pos` as a canonical module name.
80#[must_use]
81pub fn extract_module_reference(text: &str, cursor_pos: usize) -> Option<String> {
82    find_module_reference(text, cursor_pos).map(|reference| reference.canonical_module_name())
83}
84
85/// Extract a module reference at `cursor_pos` as a canonical module name,
86/// including `use parent`/`use base` argument modules.
87#[must_use]
88pub fn extract_module_reference_extended(text: &str, cursor_pos: usize) -> Option<String> {
89    find_module_reference_extended(text, cursor_pos)
90        .map(|reference| reference.canonical_module_name())
91}
92
93fn find_in_line(
94    line: &str,
95    line_offset: usize,
96    cursor_in_line: usize,
97) -> Option<ModuleReference<'_>> {
98    find_in_line_for_keyword(line, line_offset, cursor_in_line, "use", ModuleReferenceKind::Use)
99        .or_else(|| {
100            find_in_line_for_keyword(
101                line,
102                line_offset,
103                cursor_in_line,
104                "require",
105                ModuleReferenceKind::Require,
106            )
107        })
108}
109
110fn find_parent_base_module_in_line<'a>(
111    line: &'a str,
112    line_offset: usize,
113    cursor_in_line: usize,
114) -> Option<ModuleReference<'a>> {
115    let trimmed = line.trim_start();
116    let leading_ws = line.len().saturating_sub(trimmed.len());
117
118    let rest = trimmed.strip_prefix("use")?;
119    if !rest.starts_with(|c: char| c.is_whitespace()) {
120        return None;
121    }
122    let rest = rest.trim_start();
123
124    let is_parent = rest.starts_with("parent");
125    let is_base = rest.starts_with("base");
126    if !is_parent && !is_base {
127        return None;
128    }
129
130    let keyword = if is_parent { "parent" } else { "base" };
131    let after_keyword = &rest[keyword.len()..];
132    if !after_keyword.is_empty() && !after_keyword.starts_with(|c: char| c.is_whitespace()) {
133        return None;
134    }
135
136    let args_area = after_keyword;
137    let args_start_in_line = leading_ws + "use ".len() + (rest.len() - after_keyword.len());
138
139    let bytes = args_area.as_bytes();
140    let mut i = 0;
141    while i < bytes.len() {
142        let b = bytes[i];
143
144        if !is_module_start_byte(b) {
145            i += 1;
146            continue;
147        }
148
149        let token_start_in_args = i;
150        let token_end_in_args = scan_canonical_module_token(bytes, i);
151        let token_start_in_line = args_start_in_line + token_start_in_args;
152        let token_end_in_line = args_start_in_line + token_end_in_args;
153        let module_name = &args_area[token_start_in_args..token_end_in_args];
154
155        let is_module_like = module_name.contains("::")
156            || module_name.as_bytes().first().is_some_and(u8::is_ascii_uppercase);
157
158        if is_module_like
159            && cursor_in_line >= token_start_in_line
160            && cursor_in_line <= token_end_in_line
161        {
162            return Some(ModuleReference {
163                kind: ModuleReferenceKind::Use,
164                module_name,
165                module_start: line_offset + token_start_in_line,
166                module_end: line_offset + token_end_in_line,
167            });
168        }
169
170        i = token_end_in_args;
171    }
172
173    None
174}
175
176fn scan_canonical_module_token(bytes: &[u8], start: usize) -> usize {
177    let mut i = start;
178
179    loop {
180        while i < bytes.len() && is_identifier_byte(bytes[i]) {
181            i += 1;
182        }
183
184        if i + 1 < bytes.len()
185            && bytes[i] == b':'
186            && bytes[i + 1] == b':'
187            && i + 2 < bytes.len()
188            && is_module_start_byte(bytes[i + 2])
189        {
190            i += 2;
191        } else {
192            break;
193        }
194    }
195
196    i
197}
198
199fn is_module_start_byte(b: u8) -> bool {
200    b.is_ascii_alphabetic() || b == b'_'
201}
202
203fn is_identifier_byte(b: u8) -> bool {
204    b.is_ascii_alphanumeric() || b == b'_'
205}
206
207fn find_in_line_for_keyword<'a>(
208    line: &'a str,
209    line_offset: usize,
210    cursor_in_line: usize,
211    keyword: &'static str,
212    kind: ModuleReferenceKind,
213) -> Option<ModuleReference<'a>> {
214    let keyword_len = keyword.len();
215    let bytes = line.as_bytes();
216    let mut idx = 0usize;
217
218    while idx + keyword_len <= bytes.len() {
219        if !line[idx..].starts_with(keyword) {
220            idx += 1;
221            continue;
222        }
223
224        if !is_keyword_boundary(bytes, idx, keyword_len) {
225            idx += 1;
226            continue;
227        }
228
229        let after_keyword = idx + keyword_len;
230        if after_keyword >= bytes.len() || !bytes[after_keyword].is_ascii_whitespace() {
231            idx += 1;
232            continue;
233        }
234
235        let module_start = skip_ascii_whitespace(bytes, after_keyword);
236        if module_start >= bytes.len() {
237            idx += 1;
238            continue;
239        }
240
241        if let Some(span) = parse_module_token(line, module_start)
242            && cursor_in_line >= module_start
243            && cursor_in_line <= span.end
244        {
245            return Some(ModuleReference {
246                kind,
247                module_name: &line[module_start..span.end],
248                module_start: line_offset + module_start,
249                module_end: line_offset + span.end,
250            });
251        }
252
253        idx += 1;
254    }
255
256    None
257}