Skip to main content

perl_lsp_selection_range/
lib.rs

1//! textDocument/selectionRange handler - smart selection expansion
2//!
3//! This module provides intelligent selection expansion that grows from
4//! the narrowest syntactic element outward:
5//!
6//! - **Strings**: string content -> full string (with quotes) -> expression
7//! - **Hash access**: key -> subscript `{key}` -> full expression `$h{key}`
8//! - **Function names**: name -> signature -> full sub definition
9//! - **General**: word -> trimmed line -> statement -> block -> function -> file
10
11use lsp_types::{Position, Range, SelectionRange};
12
13// ---------------------------------------------------------------------------
14// Byte / position mapping helpers
15// ---------------------------------------------------------------------------
16
17fn byte_offset(text: &str, pos: Position) -> usize {
18    let mut off = 0usize;
19    for (line, l) in text.split_inclusive('\n').enumerate() {
20        if line as u32 == pos.line {
21            let mut col = 0u32;
22            for (i, ch) in l.char_indices() {
23                if col == pos.character {
24                    return off + i;
25                }
26                col += ch.len_utf16() as u32;
27            }
28            return off + l.len();
29        }
30        off += l.len();
31    }
32    off
33}
34
35fn make_range(text: &str, start: usize, end: usize) -> Range {
36    let start = start.min(text.len());
37    let end = end.min(text.len());
38    let mut line = 0u32;
39    let mut col = 0u32;
40    let mut i = 0usize;
41    let mut s = Position::new(0, 0);
42    let mut e = Position::new(0, 0);
43    let mut found_start = false;
44    let mut found_end = false;
45    for ch in text.chars() {
46        if i == start {
47            s = Position::new(line, col);
48            found_start = true;
49        }
50        if i == end {
51            e = Position::new(line, col);
52            found_end = true;
53            break;
54        }
55        i += ch.len_utf8();
56        if ch == '\n' {
57            line += 1;
58            col = 0;
59        } else {
60            col += ch.len_utf16() as u32;
61        }
62    }
63    if !found_start {
64        s = Position::new(line, col);
65    }
66    if !found_end {
67        e = Position::new(line, col);
68    }
69    Range::new(s, e)
70}
71
72// ---------------------------------------------------------------------------
73// Span finders
74// ---------------------------------------------------------------------------
75
76/// Find the word (identifier/variable) span around `off`.
77fn word_span(bytes: &[u8], off: usize) -> (usize, usize) {
78    let safe_off = off.min(bytes.len().saturating_sub(1));
79    let start = (0..=safe_off)
80        .rev()
81        .find(|&i| {
82            i == 0
83                || (!bytes[i - 1].is_ascii_alphanumeric()
84                    && bytes[i - 1] != b'_'
85                    && bytes[i - 1] != b'$'
86                    && bytes[i - 1] != b'@'
87                    && bytes[i - 1] != b'%')
88        })
89        .unwrap_or(off);
90    let end = (off..bytes.len())
91        .find(|&i| !bytes[i].is_ascii_alphanumeric() && bytes[i] != b'_')
92        .unwrap_or(bytes.len());
93    (start, end)
94}
95
96/// If `off` is inside a quoted string, return (content_start, content_end, full_start, full_end).
97/// content excludes quote characters, full includes them.
98fn string_span(text: &str, off: usize) -> Option<(usize, usize, usize, usize)> {
99    let bytes = text.as_bytes();
100    // Look for matching quote pairs around `off`
101    for &q in b"\"'" {
102        // Search backwards for opening quote
103        let mut open = None;
104        for i in (0..off).rev() {
105            if bytes[i] == q {
106                // Make sure it's not escaped
107                let mut backslashes = 0usize;
108                let mut j = i;
109                while j > 0 && bytes[j - 1] == b'\\' {
110                    backslashes += 1;
111                    j -= 1;
112                }
113                if backslashes.is_multiple_of(2) {
114                    open = Some(i);
115                    break;
116                }
117            }
118            // Stop at newline for safety (don't cross lines for non-heredoc strings)
119            if bytes[i] == b'\n' {
120                break;
121            }
122        }
123
124        if let Some(open_pos) = open {
125            // Search forwards for closing quote
126            let mut i = off;
127            while i < bytes.len() {
128                if bytes[i] == q {
129                    let mut backslashes = 0usize;
130                    let mut j = i;
131                    while j > 0 && bytes[j - 1] == b'\\' {
132                        backslashes += 1;
133                        j -= 1;
134                    }
135                    if backslashes.is_multiple_of(2) {
136                        // Found matching close
137                        let content_start = open_pos + 1;
138                        let content_end = i;
139                        let full_start = open_pos;
140                        let full_end = i + 1;
141                        return Some((content_start, content_end, full_start, full_end));
142                    }
143                }
144                if bytes[i] == b'\n' {
145                    break;
146                }
147                i += 1;
148            }
149        }
150    }
151    None
152}
153
154/// If `off` is inside a hash subscript `{...}`, return (key_start, key_end, subscript_start,
155/// subscript_end, expr_start, expr_end).
156///
157/// - key: the text inside `{}`
158/// - subscript: `{key}` including braces
159/// - expr: `$hash{key}` including the variable
160fn hash_access_span(text: &str, off: usize) -> Option<(usize, usize, usize, usize, usize, usize)> {
161    let bytes = text.as_bytes();
162
163    // Check if we're inside braces `{ ... }`
164    let mut open = None;
165    let mut depth = 0i32;
166    for i in (0..off).rev() {
167        if bytes[i] == b'}' {
168            depth += 1;
169        } else if bytes[i] == b'{' {
170            if depth == 0 {
171                open = Some(i);
172                break;
173            }
174            depth -= 1;
175        }
176    }
177
178    let open_pos = open?;
179
180    // Check that what precedes the `{` looks like a hash variable or expression
181    // (e.g. `$hash`, `$self->`, `$h`, `$hash_ref->`)
182    if open_pos == 0 {
183        return None;
184    }
185    let before = &text[..open_pos];
186    let trimmed_before = before.trim_end();
187    // Must end with an identifier char or `->`
188    let looks_like_hash = trimmed_before.ends_with(|c: char| c.is_ascii_alphanumeric() || c == '_')
189        || trimmed_before.ends_with("->");
190    if !looks_like_hash {
191        return None;
192    }
193
194    // Find closing brace
195    let mut close = None;
196    let mut depth = 0i32;
197    for (i, &b) in bytes.iter().enumerate().skip(off) {
198        if b == b'{' {
199            depth += 1;
200        } else if b == b'}' {
201            if depth == 0 {
202                close = Some(i);
203                break;
204            }
205            depth -= 1;
206        }
207    }
208
209    let close_pos = close?;
210
211    let key_start = open_pos + 1;
212    let key_end = close_pos;
213    let subscript_start = open_pos;
214    let subscript_end = close_pos + 1;
215
216    // Walk backwards to find the start of the full expression ($hash or $self->hash)
217    let mut expr_start = open_pos;
218    // Skip any whitespace between variable and `{`
219    while expr_start > 0 && bytes[expr_start - 1] == b' ' {
220        expr_start -= 1;
221    }
222    // Walk back through `->` if present
223    if expr_start >= 2 && &bytes[expr_start - 2..expr_start] == b"->" {
224        expr_start -= 2;
225        // Continue walking back through identifier
226        while expr_start > 0
227            && (bytes[expr_start - 1].is_ascii_alphanumeric() || bytes[expr_start - 1] == b'_')
228        {
229            expr_start -= 1;
230        }
231    }
232    // Walk back through identifier chars
233    while expr_start > 0
234        && (bytes[expr_start - 1].is_ascii_alphanumeric() || bytes[expr_start - 1] == b'_')
235    {
236        expr_start -= 1;
237    }
238    // Include sigil ($, @, %)
239    if expr_start > 0
240        && (bytes[expr_start - 1] == b'$'
241            || bytes[expr_start - 1] == b'@'
242            || bytes[expr_start - 1] == b'%')
243    {
244        expr_start -= 1;
245    }
246
247    Some((key_start, key_end, subscript_start, subscript_end, expr_start, subscript_end))
248}
249
250/// If `off` is on a function name in a `sub` definition, return
251/// (name_start, name_end, sig_start, sig_end, full_start, full_end).
252fn sub_definition_span(
253    text: &str,
254    off: usize,
255) -> Option<(usize, usize, Option<(usize, usize)>, usize, usize)> {
256    let bytes = text.as_bytes();
257
258    // Look backwards for `sub ` keyword
259    let sub_keyword = text[..off.min(text.len())].rfind("sub ")?;
260
261    // The name starts right after `sub `
262    let name_start = sub_keyword + 4;
263
264    // Skip whitespace
265    let name_start = text[name_start..]
266        .find(|c: char| !c.is_whitespace())
267        .map(|i| name_start + i)
268        .unwrap_or(name_start);
269
270    // Find end of name (identifier characters)
271    let mut name_end = name_start;
272    while name_end < bytes.len()
273        && (bytes[name_end].is_ascii_alphanumeric() || bytes[name_end] == b'_')
274    {
275        name_end += 1;
276    }
277
278    // Cursor must actually be on/near the name, or within the sub body
279    if off < sub_keyword {
280        return None;
281    }
282
283    // Find signature span (parenthesized parameter list)
284    let after_name = &text[name_end..];
285    let sig_span = if let Some(paren_off) = after_name.find('(') {
286        let sig_start = name_end + paren_off;
287        // Find matching close paren
288        let mut depth = 0i32;
289        let mut sig_end = sig_start;
290        for (i, b) in bytes[sig_start..].iter().enumerate() {
291            if *b == b'(' {
292                depth += 1;
293            } else if *b == b')' {
294                depth -= 1;
295                if depth == 0 {
296                    sig_end = sig_start + i + 1;
297                    break;
298                }
299            }
300        }
301        if sig_end > sig_start { Some((sig_start, sig_end)) } else { None }
302    } else {
303        None
304    };
305
306    // Find the full sub definition end (matching brace)
307    let func_end = {
308        let mut depth = 0i32;
309        let mut found_brace = false;
310        text[sub_keyword..]
311            .char_indices()
312            .find(|(_, c)| {
313                if *c == '{' {
314                    found_brace = true;
315                    depth += 1;
316                } else if *c == '}' && found_brace {
317                    depth -= 1;
318                    if depth == 0 {
319                        return true;
320                    }
321                }
322                false
323            })
324            .map(|(i, _)| sub_keyword + i + 1)
325            .unwrap_or(text.len())
326    };
327
328    Some((name_start, name_end, sig_span, sub_keyword, func_end))
329}
330
331// ---------------------------------------------------------------------------
332// Chain builder
333// ---------------------------------------------------------------------------
334
335/// Build a `SelectionRange` chain from a list of `(start, end)` spans.
336/// Deduplicates ranges with the same LSP positions and ensures each parent
337/// strictly encompasses its child.
338fn build_chain(text: &str, spans: &[(usize, usize)]) -> SelectionRange {
339    // Build ranges from spans, deduplicating
340    let mut ranges: Vec<Range> = Vec::new();
341    for &(s, e) in spans {
342        let r = make_range(text, s, e);
343        if ranges.last().is_none_or(|prev| *prev != r) {
344            ranges.push(r);
345        }
346    }
347
348    // Build nested chain from outermost to innermost
349    let mut chain = SelectionRange { range: Range::default(), parent: None };
350    for r in ranges.into_iter().rev() {
351        chain = SelectionRange { range: r, parent: Some(Box::new(chain)) };
352    }
353    // The outermost `chain` is now the innermost selection; strip the dummy
354    // we may have left at the tail.
355    strip_default_tail(chain)
356}
357
358/// Remove the trailing dummy `Range::default()` node we may have seeded.
359fn strip_default_tail(mut sel: SelectionRange) -> SelectionRange {
360    if sel.parent.is_none() && sel.range == Range::default() {
361        // Shouldn't happen if spans is non-empty, but safety fallback
362        return sel;
363    }
364    if let Some(ref mut p) = sel.parent {
365        if p.parent.is_none() && p.range == Range::default() {
366            sel.parent = None;
367        } else {
368            **p = strip_default_tail(*p.clone());
369        }
370    }
371    sel
372}
373
374// ---------------------------------------------------------------------------
375// Public API
376// ---------------------------------------------------------------------------
377
378/// Generates smart selection ranges for given positions, expanding from the
379/// narrowest syntactic element outward to the file scope.
380///
381/// The expansion chain is context-sensitive:
382///
383/// - **Inside a string**: string content -> full string (with quotes) ->
384///   expression -> statement -> block -> function -> file
385/// - **Inside a hash access**: key -> subscript `{key}` -> full expression
386///   `$h{key}` -> statement -> block -> function -> file
387/// - **On a function name**: name -> signature (if present) -> full sub
388///   definition -> file
389/// - **General**: word -> trimmed line -> full line -> statement -> block ->
390///   function -> file
391pub fn selection_ranges(text: &str, positions: &[Position]) -> Vec<SelectionRange> {
392    positions
393        .iter()
394        .map(|&pos| {
395            let off = byte_offset(text, pos);
396            let bytes = text.as_bytes();
397
398            let mut spans: Vec<(usize, usize)> = Vec::new();
399
400            // 1. Word span (identifier or variable)
401            let (w_start, w_end) = word_span(bytes, off);
402            spans.push((w_start, w_end));
403
404            // 2. Context-specific intermediate ranges
405            //
406            // String content -> full string
407            if let Some((cs, ce, fs, fe)) = string_span(text, off) {
408                // Insert content span before word if narrower
409                if cs <= w_start && ce >= w_end && (cs != w_start || ce != w_end) {
410                    spans.push((cs, ce));
411                }
412                spans.push((fs, fe));
413            }
414
415            // Hash access: key -> subscript -> full expression
416            if let Some((ks, ke, ss, se, es, ee)) = hash_access_span(text, off) {
417                // Key span
418                if ks <= w_start && ke >= w_end && (ks != w_start || ke != w_end) {
419                    spans.push((ks, ke));
420                }
421                spans.push((ss, se));
422                spans.push((es, ee));
423            }
424
425            // 3. Trimmed line
426            let line_start = text[..off].rfind('\n').map(|i| i + 1).unwrap_or(0);
427            let line_end = text[off..].find('\n').map(|i| off + i).unwrap_or(text.len());
428            let line_text = &text[line_start..line_end];
429            let trim_left = line_text.find(|c: char| !c.is_whitespace()).unwrap_or(0);
430            let trim_right = line_text
431                .rfind(|c: char| !c.is_whitespace())
432                .map(|i| i + 1)
433                .unwrap_or(line_text.len());
434            spans.push((line_start + trim_left, line_start + trim_right));
435
436            // 4. Full line
437            spans.push((line_start, line_end));
438
439            // 5. Statement (semicolon boundaries)
440            let stmt_start = text[..off]
441                .rfind(';')
442                .map(|i| {
443                    text[i + 1..]
444                        .chars()
445                        .position(|c| !c.is_whitespace())
446                        .map(|j| i + 1 + j)
447                        .unwrap_or(i + 1)
448                })
449                .unwrap_or(0);
450            let stmt_end = text[off..]
451                .find(';')
452                .map(|i| off + i + 1)
453                .unwrap_or_else(|| text[off..].find('\n').map(|i| off + i).unwrap_or(text.len()));
454            spans.push((stmt_start, stmt_end));
455
456            // 6. Block (brace boundaries)
457            let block_start = text[..off].rfind('{').unwrap_or(0);
458            let block_end = text[off..].find('}').map(|i| off + i + 1).unwrap_or(text.len());
459            if block_end > block_start {
460                spans.push((block_start, block_end));
461            }
462
463            // 7. Function (sub definition)
464            if let Some((name_s, name_e, sig_span, full_s, full_e)) = sub_definition_span(text, off)
465            {
466                // If cursor is on/near the name, add name span first
467                if off >= name_s && off <= name_e {
468                    spans.push((name_s, name_e));
469                }
470                // Add signature if present
471                if let Some((sig_s, sig_e)) = sig_span {
472                    // Name + signature combined
473                    spans.push((name_s, sig_e));
474                    // Just signature
475                    if off >= sig_s && off <= sig_e {
476                        spans.push((sig_s, sig_e));
477                    }
478                }
479                spans.push((full_s, full_e));
480            } else {
481                // Fallback: file-level
482                spans.push((0, text.len()));
483            }
484
485            // 8. File scope (always outermost)
486            spans.push((0, text.len()));
487
488            // Sort spans by size (smallest first), then deduplicate
489            spans.sort_by(|a, b| {
490                let size_a = a.1.saturating_sub(a.0);
491                let size_b = b.1.saturating_sub(b.0);
492                size_a.cmp(&size_b)
493            });
494            spans.dedup();
495
496            // Filter out spans that don't contain the cursor offset
497            spans.retain(|&(s, e)| s <= off && e >= off);
498
499            // Ensure strictly growing containment
500            let mut filtered: Vec<(usize, usize)> = Vec::new();
501            for span in &spans {
502                if let Some(prev) = filtered.last() {
503                    // Must be strictly larger (encompass previous)
504                    if span.0 <= prev.0 && span.1 >= prev.1 && (span.0 < prev.0 || span.1 > prev.1)
505                    {
506                        filtered.push(*span);
507                    }
508                } else {
509                    filtered.push(*span);
510                }
511            }
512
513            if filtered.is_empty() {
514                filtered.push((0, text.len()));
515            }
516
517            build_chain(text, &filtered)
518        })
519        .collect()
520}
521
522#[cfg(test)]
523mod tests {
524    use super::*;
525
526    /// Helper: collect the chain of ranges as (start_line, start_col, end_line, end_col) tuples.
527    fn chain_to_vec(sel: &SelectionRange) -> Vec<(u32, u32, u32, u32)> {
528        let mut out = Vec::new();
529        let mut cur = sel;
530        loop {
531            let r = &cur.range;
532            out.push((r.start.line, r.start.character, r.end.line, r.end.character));
533            if let Some(ref p) = cur.parent {
534                cur = p;
535            } else {
536                break;
537            }
538        }
539        out
540    }
541
542    #[test]
543    fn string_content_expands_to_full_string() {
544        // Cursor inside "hello" on the 'e' (offset 5 in the string content)
545        let text = r#"my $x = "hello";"#;
546        //           0123456789...
547        // "hello" starts at byte 8 (the opening quote)
548        // 'e' is at byte 10 (content: h=9, e=10)
549        let pos = Position::new(0, 10);
550        let results = selection_ranges(text, &[pos]);
551        assert_eq!(results.len(), 1);
552        let chain = chain_to_vec(&results[0]);
553
554        // The innermost range should be the word "hello" or narrower
555        // Then we should see string content, then full string with quotes
556        assert!(chain.len() >= 3, "expected at least 3 levels for string, got {}", chain.len());
557
558        // Verify ranges grow strictly
559        for window in chain.windows(2) {
560            let inner = window[0];
561            let outer = window[1];
562            assert!(
563                outer.0 <= inner.0 && outer.2 >= inner.2,
564                "parent ({},{})..({},{}) must encompass child ({},{})..({},{})",
565                outer.0,
566                outer.1,
567                outer.2,
568                outer.3,
569                inner.0,
570                inner.1,
571                inner.2,
572                inner.3,
573            );
574        }
575    }
576
577    #[test]
578    fn hash_access_key_expands() {
579        let text = r#"my $v = $hash{key};"#;
580        //           01234567890123456789
581        // 'k' of key is at byte 14
582        let pos = Position::new(0, 14);
583        let results = selection_ranges(text, &[pos]);
584        assert_eq!(results.len(), 1);
585        let chain = chain_to_vec(&results[0]);
586
587        assert!(
588            chain.len() >= 3,
589            "expected at least 3 levels for hash access, got {}",
590            chain.len()
591        );
592
593        // Verify ranges grow strictly
594        for window in chain.windows(2) {
595            let inner = window[0];
596            let outer = window[1];
597            assert!(outer.0 <= inner.0 && outer.2 >= inner.2, "parent must encompass child");
598        }
599    }
600
601    #[test]
602    fn function_name_expands_to_full_sub() {
603        let text = "sub greet ($name) {\n    print $name;\n}\n";
604        // 'greet' starts at byte 4
605        let pos = Position::new(0, 5); // on the 'r' of greet
606        let results = selection_ranges(text, &[pos]);
607        assert_eq!(results.len(), 1);
608        let chain = chain_to_vec(&results[0]);
609
610        assert!(
611            chain.len() >= 2,
612            "expected at least 2 levels for function name, got {}",
613            chain.len()
614        );
615
616        // Last range should be the full file
617        let last = &chain[chain.len() - 1];
618        assert_eq!(last.0, 0, "outermost should start at line 0");
619    }
620
621    #[test]
622    fn empty_text_returns_zero_range() {
623        let text = "";
624        let pos = Position::new(0, 0);
625        let results = selection_ranges(text, &[pos]);
626        assert_eq!(results.len(), 1);
627    }
628
629    #[test]
630    fn multiple_positions() {
631        let text = "my $x = 1;\nmy $y = 2;\n";
632        let positions = vec![Position::new(0, 3), Position::new(1, 3)];
633        let results = selection_ranges(text, &positions);
634        assert_eq!(results.len(), 2);
635    }
636}