Skip to main content

fresh/services/completion/
provider.rs

1//! Completion provider trait and shared types.
2//!
3//! This module defines the core abstraction for pluggable completion sources.
4//! Providers can be implemented in Rust (for performance-critical, buffer-local
5//! algorithms) or in TypeScript plugins (for extensibility).
6
7use std::fmt;
8
9/// A single completion candidate produced by a provider.
10#[derive(Debug, Clone)]
11pub struct CompletionCandidate {
12    /// The text to display in the completion popup.
13    pub label: String,
14
15    /// The text to insert when the completion is accepted.
16    /// If `None`, `label` is used as the insert text.
17    pub insert_text: Option<String>,
18
19    /// Optional detail shown alongside the label (e.g., type info).
20    pub detail: Option<String>,
21
22    /// Icon hint for the popup (e.g., "v" for variable, "λ" for function).
23    pub icon: Option<String>,
24
25    /// Provider-assigned relevance score. Higher is better.
26    /// Used by the `CompletionService` to merge and rank results from
27    /// multiple providers.
28    pub score: i64,
29
30    /// Which provider produced this candidate. Set automatically by the
31    /// service; providers should leave this as `None`.
32    pub source: Option<CompletionSourceId>,
33
34    /// If `true`, the insert_text contains LSP-style snippet syntax
35    /// (`$0`, `${1:placeholder}`, etc.).
36    pub is_snippet: bool,
37
38    /// Opaque provider-specific data carried through to acceptance.
39    /// For example, the LSP provider stores the serialised `CompletionItem`
40    /// so it can request `completionItem/resolve` on accept.
41    pub provider_data: Option<String>,
42}
43
44impl CompletionCandidate {
45    /// Create a simple word candidate (no snippet, no extra data).
46    pub fn word(label: String, score: i64) -> Self {
47        Self {
48            label,
49            insert_text: None,
50            detail: None,
51            icon: None,
52            score,
53            source: None,
54            is_snippet: false,
55            provider_data: None,
56        }
57    }
58}
59
60/// Identifies a registered completion provider.
61#[derive(Debug, Clone, PartialEq, Eq, Hash)]
62pub struct CompletionSourceId(pub String);
63
64impl fmt::Display for CompletionSourceId {
65    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
66        f.write_str(&self.0)
67    }
68}
69
70/// A byte-slice from another open buffer, for multi-buffer scanning.
71///
72/// The `CompletionService` provides these in MRU order (most recently
73/// focused first). Each slice is capped to `NORMAL_SCAN_RADIUS` bytes
74/// around the buffer's last-known cursor position, so huge background
75/// buffers stay cheap.
76#[derive(Debug, Clone)]
77pub struct OtherBufferSlice {
78    /// The buffer's id (for dedup / labelling).
79    pub buffer_id: u64,
80    /// Pre-extracted bytes from the other buffer.
81    pub bytes: Vec<u8>,
82    /// Human-readable label (filename or "untitled").
83    pub label: String,
84}
85
86/// Context passed to every provider when completion is requested.
87///
88/// All byte ranges are clamped to valid buffer positions by the service
89/// before being handed to providers.
90#[derive(Debug, Clone)]
91pub struct CompletionContext {
92    /// The prefix the user has already typed (from word start to cursor).
93    pub prefix: String,
94
95    /// Byte offset of the cursor in the buffer.
96    pub cursor_byte: usize,
97
98    /// Byte offset where the current word starts (for replacement range).
99    pub word_start_byte: usize,
100
101    /// Total buffer size in bytes.
102    pub buffer_len: usize,
103
104    /// Whether this buffer is lazily loaded (multi-gigabyte).
105    pub is_large_file: bool,
106
107    /// The safe scan window: providers MUST NOT read outside this range.
108    /// For normal files this covers a generous region around the cursor.
109    /// For huge files this is clamped to a small neighbourhood.
110    pub scan_range: std::ops::Range<usize>,
111
112    /// Byte position of the first visible line in the viewport.
113    /// Useful for proximity scoring—candidates near the viewport rank higher.
114    pub viewport_top_byte: usize,
115
116    /// Approximate byte position of the last visible line.
117    pub viewport_bottom_byte: usize,
118
119    /// The file extension or language id, if known.
120    pub language_id: Option<String>,
121
122    /// Extra characters (beyond alphanumeric and `_`) that are considered
123    /// part of an identifier in the current language.
124    ///
125    /// Examples:
126    /// - Lisp/Clojure/CSS: `"-"` (kebab-case)
127    /// - PHP/Bash: `"$"` (sigils)
128    /// - Ruby: `"?!"`
129    /// - Rust (default): `""` (only `[A-Za-z0-9_]`)
130    ///
131    /// Populated from `LanguageConfig::word_characters` if set, otherwise
132    /// empty (standard alphanumeric + underscore).
133    pub word_chars_extra: String,
134
135    /// Whether the prefix contains at least one uppercase character.
136    /// When `true`, providers should use **smart-case** matching:
137    /// prefer case-sensitive matches, and penalise case mismatches in scoring
138    /// rather than filtering them out entirely.
139    pub prefix_has_uppercase: bool,
140
141    /// Pre-sliced byte windows from other open buffers, ordered by MRU
142    /// (most recently used first). Enables multi-buffer dabbrev scanning.
143    pub other_buffers: Vec<OtherBufferSlice>,
144}
145
146/// Maximum scan radius (in bytes) around the cursor for normal files.
147pub const NORMAL_SCAN_RADIUS: usize = 512 * 1024; // 512 KB
148
149/// Maximum scan radius for large/huge files—keeps completion instant.
150pub const LARGE_FILE_SCAN_RADIUS: usize = 32 * 1024; // 32 KB
151
152impl CompletionContext {
153    /// Compute the scan range for a given cursor position and buffer size.
154    pub fn compute_scan_range(
155        cursor_byte: usize,
156        buffer_len: usize,
157        is_large_file: bool,
158    ) -> std::ops::Range<usize> {
159        let radius = if is_large_file {
160            LARGE_FILE_SCAN_RADIUS
161        } else {
162            NORMAL_SCAN_RADIUS
163        };
164        let start = cursor_byte.saturating_sub(radius);
165        let end = (cursor_byte + radius).min(buffer_len);
166        start..end
167    }
168}
169
170/// Result returned by a provider's `provide` method.
171pub enum ProviderResult {
172    /// Synchronous results, available immediately.
173    Ready(Vec<CompletionCandidate>),
174    /// The provider will deliver results asynchronously (e.g., LSP).
175    /// The `u64` is a request id that will be matched later when results
176    /// arrive via `CompletionService::supply_async_results`.
177    Pending(u64),
178}
179
180/// Trait that all completion providers implement.
181///
182/// # Huge-file contract
183///
184/// Providers MUST honour `ctx.scan_range`. Reading outside that window on a
185/// lazily-loaded buffer will either trigger expensive chunk loads or return
186/// garbage bytes. The `CompletionService` enforces this constraint by
187/// construction, but providers should also be defensive.
188pub trait CompletionProvider: Send {
189    /// Unique, stable identifier for this provider (e.g., `"lsp"`, `"dabbrev"`).
190    fn id(&self) -> CompletionSourceId;
191
192    /// Human-readable name shown in UI (e.g., "Dynamic Abbreviation").
193    fn display_name(&self) -> &str;
194
195    /// Whether this provider should be active for the given context.
196    ///
197    /// Returning `false` skips the provider entirely (no allocation).
198    /// For example, a Rust-only provider might return `false` for markdown
199    /// files, or a heavy provider might decline for huge files.
200    fn is_enabled(&self, ctx: &CompletionContext) -> bool;
201
202    /// Produce completion candidates.
203    ///
204    /// Implementations receive the buffer bytes they need through the
205    /// `buffer_window` slice, which corresponds exactly to `ctx.scan_range`.
206    /// This avoids giving providers direct `Buffer` access (which would be
207    /// unsafe for the huge-file contract).
208    fn provide(&self, ctx: &CompletionContext, buffer_window: &[u8]) -> ProviderResult;
209
210    /// Priority tier for this provider. Lower numbers run first and their
211    /// results are shown higher in the list when scores are equal.
212    /// Convention: 0 = LSP, 10 = ctags/index, 20 = buffer words, 30 = dabbrev.
213    fn priority(&self) -> u32 {
214        20
215    }
216}
217
218// ============================================================================
219// Shared helpers for smart-case matching and language-aware word detection
220// ============================================================================
221
222/// Check whether a character is a word constituent for the given context.
223///
224/// This replaces the naive `is_alphanumeric() || c == '_'` check with a
225/// language-aware test that also respects `word_chars_extra`.
226pub fn is_word_char_for_lang(c: char, extra: &str) -> bool {
227    c.is_alphanumeric() || c == '_' || extra.contains(c)
228}
229
230/// Check whether a grapheme cluster is a word constituent.
231///
232/// A grapheme is a word constituent if *any* of its characters satisfy
233/// `is_word_char_for_lang`. This handles composed characters (e.g., `é`
234/// as `e` + combining acute) correctly.
235pub fn is_word_grapheme_for_lang(g: &str, extra: &str) -> bool {
236    g.chars().any(|c| is_word_char_for_lang(c, extra))
237}
238
239/// Determine whether a prefix match should be case-sensitive.
240///
241/// **Smart-case rule**: if the prefix contains any uppercase letter, use
242/// case-sensitive matching. Otherwise, match case-insensitively.
243pub fn smart_case_matches(candidate: &str, prefix: &str, prefix_has_upper: bool) -> bool {
244    if prefix_has_upper {
245        candidate.starts_with(prefix)
246    } else {
247        candidate.to_lowercase().starts_with(&prefix.to_lowercase())
248    }
249}
250
251/// Score penalty for case mismatch (when smart-case is off but casing differs).
252///
253/// Applied when the prefix is all-lowercase and the candidate has different
254/// casing. The candidate still matches, but ranks lower than an exact-case hit.
255pub fn case_mismatch_penalty(candidate: &str, prefix: &str, prefix_has_upper: bool) -> i64 {
256    if prefix_has_upper {
257        // Strict mode — no penalty if it matched (it's already exact-case).
258        0
259    } else {
260        // Lenient mode — penalise if the candidate's prefix differs in casing.
261        if candidate.starts_with(prefix) {
262            0 // exact casing, no penalty
263        } else {
264            -50_000 // case mismatch penalty
265        }
266    }
267}