Skip to main content

perl_module_reference/
lib.rs

1//! Cursor-aware Perl module reference extraction.
2//!
3//! This crate has one responsibility: given source text and a cursor offset,
4//! identify module references used by `use`/`require` statements.
5
6#![deny(unsafe_code)]
7#![warn(rust_2018_idioms)]
8#![warn(missing_docs)]
9#![warn(clippy::all)]
10
11use perl_module_name::normalize_package_separator;
12use perl_module_token_parser::parse_module_token;
13use perl_text_line::{is_keyword_boundary, line_bounds_at, skip_ascii_whitespace};
14
15/// Statement kind for a parsed module reference.
16#[derive(Debug, Clone, Copy, PartialEq, Eq)]
17pub enum ModuleReferenceKind {
18    /// `use Module::Name;`
19    Use,
20    /// `require Module::Name;`
21    Require,
22}
23
24/// Module reference found at a cursor location.
25#[derive(Debug, Clone, Copy, PartialEq, Eq)]
26pub struct ModuleReference<'a> {
27    /// Statement kind (`use` or `require`).
28    pub kind: ModuleReferenceKind,
29    /// Raw module token text as written in source.
30    pub module_name: &'a str,
31    /// Inclusive byte start offset of `module_name` in the input text.
32    pub module_start: usize,
33    /// Exclusive byte end offset of `module_name` in the input text.
34    pub module_end: usize,
35}
36
37impl ModuleReference<'_> {
38    /// Return the module name normalized to canonical `::` separators.
39    #[must_use]
40    pub fn canonical_module_name(&self) -> String {
41        normalize_package_separator(self.module_name).into_owned()
42    }
43}
44
45/// Find a `use`/`require` module reference at `cursor_pos`.
46///
47/// Returns [`None`] if the cursor is not over a direct module token in a
48/// `use` or `require` statement.
49#[must_use]
50pub fn find_module_reference(text: &str, cursor_pos: usize) -> Option<ModuleReference<'_>> {
51    if text.is_empty() || cursor_pos > text.len() {
52        return None;
53    }
54
55    let (line_start, line_end) = line_bounds_at(text, cursor_pos);
56    let line = &text[line_start..line_end];
57    let cursor_in_line = cursor_pos.saturating_sub(line_start);
58
59    find_in_line(line, line_start, cursor_in_line)
60}
61
62/// Find a module reference inside `use parent`/`use base` argument lists.
63///
64/// When the cursor is on a quoted module name inside `use parent 'Foo::Bar'`
65/// or `use base qw(Foo::Bar)`, this returns the referenced module name.
66/// For direct `use`/`require` statements, delegates to [`find_module_reference`].
67#[must_use]
68pub fn find_module_reference_extended(
69    text: &str,
70    cursor_pos: usize,
71) -> Option<ModuleReference<'_>> {
72    // First try direct module reference (use Foo::Bar, require Foo::Bar)
73    if let Some(reference) = find_module_reference(text, cursor_pos) {
74        return Some(reference);
75    }
76
77    if text.is_empty() || cursor_pos > text.len() {
78        return None;
79    }
80
81    let (line_start, line_end) = line_bounds_at(text, cursor_pos);
82    let line = &text[line_start..line_end];
83    let cursor_in_line = cursor_pos.saturating_sub(line_start);
84
85    find_parent_base_module_in_line(line, line_start, cursor_in_line)
86}
87
88/// Extract a module reference at `cursor_pos` as a canonical module name.
89///
90/// Returns canonical `::` separators even when source uses legacy `'`
91/// separators.
92#[must_use]
93pub fn extract_module_reference(text: &str, cursor_pos: usize) -> Option<String> {
94    find_module_reference(text, cursor_pos).map(|reference| reference.canonical_module_name())
95}
96
97/// Extract a module reference at `cursor_pos` as a canonical module name,
98/// including `use parent`/`use base` argument modules.
99///
100/// This is the extended version that also resolves quoted module names
101/// inside `use parent 'Module::Name'` and `use base qw(Module::Name)`.
102#[must_use]
103pub fn extract_module_reference_extended(text: &str, cursor_pos: usize) -> Option<String> {
104    find_module_reference_extended(text, cursor_pos)
105        .map(|reference| reference.canonical_module_name())
106}
107
108fn find_in_line(
109    line: &str,
110    line_offset: usize,
111    cursor_in_line: usize,
112) -> Option<ModuleReference<'_>> {
113    find_in_line_for_keyword(line, line_offset, cursor_in_line, "use", ModuleReferenceKind::Use)
114        .or_else(|| {
115            find_in_line_for_keyword(
116                line,
117                line_offset,
118                cursor_in_line,
119                "require",
120                ModuleReferenceKind::Require,
121            )
122        })
123}
124
125/// Check if a line starts with `use parent` or `use base` and extract the
126/// quoted module name under the cursor from the argument list.
127fn find_parent_base_module_in_line<'a>(
128    line: &'a str,
129    line_offset: usize,
130    cursor_in_line: usize,
131) -> Option<ModuleReference<'a>> {
132    let trimmed = line.trim_start();
133    let leading_ws = line.len().saturating_sub(trimmed.len());
134
135    let rest = trimmed.strip_prefix("use")?;
136    if !rest.starts_with(|c: char| c.is_whitespace()) {
137        return None;
138    }
139    let rest = rest.trim_start();
140
141    // Check for parent or base keyword
142    let is_parent = rest.starts_with("parent");
143    let is_base = rest.starts_with("base");
144    if !is_parent && !is_base {
145        return None;
146    }
147
148    let keyword = if is_parent { "parent" } else { "base" };
149    let after_keyword = &rest[keyword.len()..];
150    if !after_keyword.is_empty() && !after_keyword.starts_with(|c: char| c.is_whitespace()) {
151        return None;
152    }
153
154    // Scan the argument area for module-like tokens (Foo::Bar style).
155    // These appear inside quotes ('Module::Name', "Module::Name") or qw(Module::Name).
156    // We use a simple scanner that finds identifiers joined by `::`.
157    let args_area = after_keyword;
158    let args_start_in_line = leading_ws + "use ".len() + (rest.len() - after_keyword.len());
159
160    let bytes = args_area.as_bytes();
161    let mut i = 0;
162    while i < bytes.len() {
163        let b = bytes[i];
164
165        // Skip non-identifier-start characters (quotes, whitespace, parens, commas, etc.)
166        if !is_module_start_byte(b) {
167            i += 1;
168            continue;
169        }
170
171        // Found start of a potential module token -- scan using `::` separators only
172        // (not `'`, which is a string delimiter in this context)
173        let token_start_in_args = i;
174        let token_end_in_args = scan_canonical_module_token(bytes, i);
175        let token_start_in_line = args_start_in_line + token_start_in_args;
176        let token_end_in_line = args_start_in_line + token_end_in_args;
177        let module_name = &args_area[token_start_in_args..token_end_in_args];
178
179        // Only consider tokens that contain `::` (i.e., actual module names)
180        // or start with an uppercase letter (single-segment module names like `Carp`)
181        let is_module_like = module_name.contains("::")
182            || module_name.as_bytes().first().is_some_and(u8::is_ascii_uppercase);
183
184        if is_module_like
185            && cursor_in_line >= token_start_in_line
186            && cursor_in_line <= token_end_in_line
187        {
188            return Some(ModuleReference {
189                kind: ModuleReferenceKind::Use,
190                module_name,
191                module_start: line_offset + token_start_in_line,
192                module_end: line_offset + token_end_in_line,
193            });
194        }
195
196        i = token_end_in_args;
197    }
198
199    None
200}
201
202/// Scan a module token using only canonical `::` separators (not legacy `'`).
203///
204/// Returns the exclusive end offset of the token in the byte slice.
205fn scan_canonical_module_token(bytes: &[u8], start: usize) -> usize {
206    let mut i = start;
207
208    loop {
209        // Scan one identifier segment
210        while i < bytes.len() && is_identifier_byte(bytes[i]) {
211            i += 1;
212        }
213
214        // Check for `::` separator followed by another identifier segment
215        if i + 1 < bytes.len()
216            && bytes[i] == b':'
217            && bytes[i + 1] == b':'
218            && i + 2 < bytes.len()
219            && is_module_start_byte(bytes[i + 2])
220        {
221            i += 2; // skip `::`
222        } else {
223            break;
224        }
225    }
226
227    i
228}
229
230/// Check if a byte can start a Perl module/identifier name.
231fn is_module_start_byte(b: u8) -> bool {
232    b.is_ascii_alphabetic() || b == b'_'
233}
234
235/// Check if a byte is a valid identifier continuation character.
236fn is_identifier_byte(b: u8) -> bool {
237    b.is_ascii_alphanumeric() || b == b'_'
238}
239
240fn find_in_line_for_keyword<'a>(
241    line: &'a str,
242    line_offset: usize,
243    cursor_in_line: usize,
244    keyword: &'static str,
245    kind: ModuleReferenceKind,
246) -> Option<ModuleReference<'a>> {
247    let keyword_len = keyword.len();
248    let bytes = line.as_bytes();
249    let mut idx = 0usize;
250
251    while idx + keyword_len <= bytes.len() {
252        if !line[idx..].starts_with(keyword) {
253            idx += 1;
254            continue;
255        }
256
257        if !is_keyword_boundary(bytes, idx, keyword_len) {
258            idx += 1;
259            continue;
260        }
261
262        let after_keyword = idx + keyword_len;
263        if after_keyword >= bytes.len() || !bytes[after_keyword].is_ascii_whitespace() {
264            idx += 1;
265            continue;
266        }
267
268        let module_start = skip_ascii_whitespace(bytes, after_keyword);
269        if module_start >= bytes.len() {
270            idx += 1;
271            continue;
272        }
273
274        if let Some(span) = parse_module_token(line, module_start)
275            && cursor_in_line >= module_start
276            && cursor_in_line <= span.end
277        {
278            return Some(ModuleReference {
279                kind,
280                module_name: &line[module_start..span.end],
281                module_start: line_offset + module_start,
282                module_end: line_offset + span.end,
283            });
284        }
285
286        idx += 1;
287    }
288
289    None
290}
291
292#[cfg(test)]
293mod tests {
294    use super::{
295        ModuleReferenceKind, extract_module_reference, extract_module_reference_extended,
296        find_module_reference, find_module_reference_extended,
297    };
298
299    #[test]
300    fn finds_use_module_reference() {
301        let text = "use Foo::Bar;";
302        let cursor = text.find("Bar").unwrap_or(0);
303
304        let reference = find_module_reference(text, cursor);
305        assert!(reference.is_some());
306        if let Some(reference) = reference {
307            assert_eq!(reference.kind, ModuleReferenceKind::Use);
308            assert_eq!(reference.module_name, "Foo::Bar");
309            assert_eq!(reference.module_start, 4);
310            assert_eq!(reference.module_end, 12);
311        }
312    }
313
314    #[test]
315    fn finds_require_module_reference() {
316        let text = "require Foo::Bar;";
317        let cursor = text.find("Foo").unwrap_or(0);
318
319        let reference = find_module_reference(text, cursor);
320        assert!(reference.is_some());
321        if let Some(reference) = reference {
322            assert_eq!(reference.kind, ModuleReferenceKind::Require);
323            assert_eq!(reference.module_name, "Foo::Bar");
324        }
325    }
326
327    #[test]
328    fn canonicalizes_legacy_separator() {
329        let text = "use Foo'Bar;";
330        let cursor = text.find("Bar").unwrap_or(0);
331
332        assert_eq!(extract_module_reference(text, cursor), Some("Foo::Bar".to_string()));
333    }
334
335    #[test]
336    fn rejects_non_direct_import_forms() {
337        assert_eq!(find_module_reference("use parent 'Foo::Bar';", 15), None);
338        assert_eq!(find_module_reference("require 'Foo/Bar.pm';", 10), None);
339    }
340
341    #[test]
342    fn cursor_at_token_end_is_accepted() {
343        let text = "use Foo::Bar;";
344        let token_end = "use Foo::Bar".len();
345        assert_eq!(extract_module_reference(text, token_end), Some("Foo::Bar".to_string()));
346    }
347
348    #[test]
349    fn ignores_invalid_reference_tokens() {
350        assert_eq!(find_module_reference("use Foo::", 0), None);
351        assert_eq!(find_module_reference("use Foo'", 0), None);
352        assert_eq!(find_module_reference("5_10", 0), None);
353    }
354
355    // Extended reference tests for use parent / use base
356
357    #[test]
358    fn extended_finds_parent_single_quoted_module() {
359        let text = "use parent 'Foo::Bar';";
360        let cursor = text.find("Foo::Bar").unwrap_or(0);
361
362        let reference = find_module_reference_extended(text, cursor);
363        assert!(reference.is_some());
364        if let Some(reference) = reference {
365            assert_eq!(reference.kind, ModuleReferenceKind::Use);
366            assert_eq!(reference.module_name, "Foo::Bar");
367        }
368    }
369
370    #[test]
371    fn extended_finds_base_single_quoted_module() {
372        let text = "use base 'Foo::Bar';";
373        let cursor = text.find("Foo::Bar").unwrap_or(0);
374
375        let reference = find_module_reference_extended(text, cursor);
376        assert!(reference.is_some());
377        if let Some(reference) = reference {
378            assert_eq!(reference.kind, ModuleReferenceKind::Use);
379            assert_eq!(reference.module_name, "Foo::Bar");
380        }
381    }
382
383    #[test]
384    fn extended_finds_parent_qw_module() {
385        let text = "use parent qw(Foo::Bar Baz::Qux);";
386        let cursor = text.find("Baz::Qux").unwrap_or(0);
387
388        let reference = find_module_reference_extended(text, cursor);
389        assert!(reference.is_some());
390        if let Some(reference) = reference {
391            assert_eq!(reference.module_name, "Baz::Qux");
392        }
393    }
394
395    #[test]
396    fn extended_finds_parent_double_quoted_module() {
397        let text = r#"use parent "Foo::Bar";"#;
398        let cursor = text.find("Foo::Bar").unwrap_or(0);
399
400        assert_eq!(extract_module_reference_extended(text, cursor), Some("Foo::Bar".to_string()));
401    }
402
403    #[test]
404    fn extended_still_finds_direct_use() {
405        let text = "use File::Basename;";
406        let cursor = text.find("Basename").unwrap_or(0);
407
408        assert_eq!(
409            extract_module_reference_extended(text, cursor),
410            Some("File::Basename".to_string())
411        );
412    }
413
414    #[test]
415    fn extended_returns_none_for_cursor_outside_token() {
416        let text = "use parent 'Foo::Bar';";
417        // cursor on the quote character
418        let cursor = text.find('\'').unwrap_or(0);
419
420        assert_eq!(find_module_reference_extended(text, cursor), None);
421    }
422
423    #[test]
424    fn extended_returns_none_for_empty_text() {
425        assert_eq!(find_module_reference_extended("", 0), None);
426    }
427
428    #[test]
429    fn extended_does_not_match_use_parenthetical() {
430        // "use parentModule" should not match (no word boundary after "parent")
431        let text = "use parentModule 'Foo::Bar';";
432        let cursor = text.find("Foo::Bar").unwrap_or(0);
433
434        assert_eq!(find_module_reference_extended(text, cursor), None);
435    }
436}