fresh/services/completion/provider.rs
1//! Completion provider trait and shared types.
2//!
3//! This module defines the core abstraction for pluggable completion sources.
4//! Providers can be implemented in Rust (for performance-critical, buffer-local
5//! algorithms) or in TypeScript plugins (for extensibility).
6
7use std::fmt;
8
9/// A single completion candidate produced by a provider.
10#[derive(Debug, Clone)]
11pub struct CompletionCandidate {
12 /// The text to display in the completion popup.
13 pub label: String,
14
15 /// The text to insert when the completion is accepted.
16 /// If `None`, `label` is used as the insert text.
17 pub insert_text: Option<String>,
18
19 /// Optional detail shown alongside the label (e.g., type info).
20 pub detail: Option<String>,
21
22 /// Icon hint for the popup (e.g., "v" for variable, "λ" for function).
23 pub icon: Option<String>,
24
25 /// Provider-assigned relevance score. Higher is better.
26 /// Used by the `CompletionService` to merge and rank results from
27 /// multiple providers.
28 pub score: i64,
29
30 /// Which provider produced this candidate. Set automatically by the
31 /// service; providers should leave this as `None`.
32 pub source: Option<CompletionSourceId>,
33
34 /// If `true`, the insert_text contains LSP-style snippet syntax
35 /// (`$0`, `${1:placeholder}`, etc.).
36 pub is_snippet: bool,
37
38 /// Opaque provider-specific data carried through to acceptance.
39 /// For example, the LSP provider stores the serialised `CompletionItem`
40 /// so it can request `completionItem/resolve` on accept.
41 pub provider_data: Option<String>,
42}
43
44impl CompletionCandidate {
45 /// Create a simple word candidate (no snippet, no extra data).
46 pub fn word(label: String, score: i64) -> Self {
47 Self {
48 label,
49 insert_text: None,
50 detail: None,
51 icon: None,
52 score,
53 source: None,
54 is_snippet: false,
55 provider_data: None,
56 }
57 }
58}
59
60/// Identifies a registered completion provider.
61#[derive(Debug, Clone, PartialEq, Eq, Hash)]
62pub struct CompletionSourceId(pub String);
63
64impl fmt::Display for CompletionSourceId {
65 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
66 f.write_str(&self.0)
67 }
68}
69
70/// A byte-slice from another open buffer, for multi-buffer scanning.
71///
72/// The `CompletionService` provides these in MRU order (most recently
73/// focused first). Each slice is capped to `NORMAL_SCAN_RADIUS` bytes
74/// around the buffer's last-known cursor position, so huge background
75/// buffers stay cheap.
76#[derive(Debug, Clone)]
77pub struct OtherBufferSlice {
78 /// The buffer's id (for dedup / labelling).
79 pub buffer_id: u64,
80 /// Pre-extracted bytes from the other buffer.
81 pub bytes: Vec<u8>,
82 /// Human-readable label (filename or "untitled").
83 pub label: String,
84}
85
86/// Context passed to every provider when completion is requested.
87///
88/// All byte ranges are clamped to valid buffer positions by the service
89/// before being handed to providers.
90#[derive(Debug, Clone)]
91pub struct CompletionContext {
92 /// The prefix the user has already typed (from word start to cursor).
93 pub prefix: String,
94
95 /// Byte offset of the cursor in the buffer.
96 pub cursor_byte: usize,
97
98 /// Byte offset where the current word starts (for replacement range).
99 pub word_start_byte: usize,
100
101 /// Total buffer size in bytes.
102 pub buffer_len: usize,
103
104 /// Whether this buffer is lazily loaded (multi-gigabyte).
105 pub is_large_file: bool,
106
107 /// The safe scan window: providers MUST NOT read outside this range.
108 /// For normal files this covers a generous region around the cursor.
109 /// For huge files this is clamped to a small neighbourhood.
110 pub scan_range: std::ops::Range<usize>,
111
112 /// Byte position of the first visible line in the viewport.
113 /// Useful for proximity scoring—candidates near the viewport rank higher.
114 pub viewport_top_byte: usize,
115
116 /// Approximate byte position of the last visible line.
117 pub viewport_bottom_byte: usize,
118
119 /// The file extension or language id, if known.
120 pub language_id: Option<String>,
121
122 /// Extra characters (beyond alphanumeric and `_`) that are considered
123 /// part of an identifier in the current language.
124 ///
125 /// Examples:
126 /// - Lisp/Clojure/CSS: `"-"` (kebab-case)
127 /// - PHP/Bash: `"$"` (sigils)
128 /// - Ruby: `"?!"`
129 /// - Rust (default): `""` (only `[A-Za-z0-9_]`)
130 ///
131 /// Populated from `LanguageConfig::word_characters` if set, otherwise
132 /// empty (standard alphanumeric + underscore).
133 pub word_chars_extra: String,
134
135 /// Whether the prefix contains at least one uppercase character.
136 /// When `true`, providers should use **smart-case** matching:
137 /// prefer case-sensitive matches, and penalise case mismatches in scoring
138 /// rather than filtering them out entirely.
139 pub prefix_has_uppercase: bool,
140
141 /// Pre-sliced byte windows from other open buffers, ordered by MRU
142 /// (most recently used first). Enables multi-buffer dabbrev scanning.
143 pub other_buffers: Vec<OtherBufferSlice>,
144}
145
146/// Maximum scan radius (in bytes) around the cursor for normal files.
147pub const NORMAL_SCAN_RADIUS: usize = 512 * 1024; // 512 KB
148
149/// Maximum scan radius for large/huge files—keeps completion instant.
150pub const LARGE_FILE_SCAN_RADIUS: usize = 32 * 1024; // 32 KB
151
152impl CompletionContext {
153 /// Compute the scan range for a given cursor position and buffer size.
154 pub fn compute_scan_range(
155 cursor_byte: usize,
156 buffer_len: usize,
157 is_large_file: bool,
158 ) -> std::ops::Range<usize> {
159 let radius = if is_large_file {
160 LARGE_FILE_SCAN_RADIUS
161 } else {
162 NORMAL_SCAN_RADIUS
163 };
164 let start = cursor_byte.saturating_sub(radius);
165 let end = (cursor_byte + radius).min(buffer_len);
166 start..end
167 }
168}
169
170/// Result returned by a provider's `provide` method.
171pub enum ProviderResult {
172 /// Synchronous results, available immediately.
173 Ready(Vec<CompletionCandidate>),
174 /// The provider will deliver results asynchronously (e.g., LSP).
175 /// The `u64` is a request id that will be matched later when results
176 /// arrive via `CompletionService::supply_async_results`.
177 Pending(u64),
178}
179
180/// Trait that all completion providers implement.
181///
182/// # Huge-file contract
183///
184/// Providers MUST honour `ctx.scan_range`. Reading outside that window on a
185/// lazily-loaded buffer will either trigger expensive chunk loads or return
186/// garbage bytes. The `CompletionService` enforces this constraint by
187/// construction, but providers should also be defensive.
188pub trait CompletionProvider: Send {
189 /// Unique, stable identifier for this provider (e.g., `"lsp"`, `"dabbrev"`).
190 fn id(&self) -> CompletionSourceId;
191
192 /// Human-readable name shown in UI (e.g., "Dynamic Abbreviation").
193 fn display_name(&self) -> &str;
194
195 /// Whether this provider should be active for the given context.
196 ///
197 /// Returning `false` skips the provider entirely (no allocation).
198 /// For example, a Rust-only provider might return `false` for markdown
199 /// files, or a heavy provider might decline for huge files.
200 fn is_enabled(&self, ctx: &CompletionContext) -> bool;
201
202 /// Produce completion candidates.
203 ///
204 /// Implementations receive the buffer bytes they need through the
205 /// `buffer_window` slice, which corresponds exactly to `ctx.scan_range`.
206 /// This avoids giving providers direct `Buffer` access (which would be
207 /// unsafe for the huge-file contract).
208 fn provide(&self, ctx: &CompletionContext, buffer_window: &[u8]) -> ProviderResult;
209
210 /// Priority tier for this provider. Lower numbers run first and their
211 /// results are shown higher in the list when scores are equal.
212 /// Convention: 0 = LSP, 10 = ctags/index, 20 = buffer words, 30 = dabbrev.
213 fn priority(&self) -> u32 {
214 20
215 }
216}
217
218// ============================================================================
219// Shared helpers for smart-case matching and language-aware word detection
220// ============================================================================
221
222/// Check whether a character is a word constituent for the given context.
223///
224/// This replaces the naive `is_alphanumeric() || c == '_'` check with a
225/// language-aware test that also respects `word_chars_extra`.
226pub fn is_word_char_for_lang(c: char, extra: &str) -> bool {
227 c.is_alphanumeric() || c == '_' || extra.contains(c)
228}
229
230/// Check whether a grapheme cluster is a word constituent.
231///
232/// A grapheme is a word constituent if *any* of its characters satisfy
233/// `is_word_char_for_lang`. This handles composed characters (e.g., `é`
234/// as `e` + combining acute) correctly.
235pub fn is_word_grapheme_for_lang(g: &str, extra: &str) -> bool {
236 g.chars().any(|c| is_word_char_for_lang(c, extra))
237}
238
239/// Determine whether a prefix match should be case-sensitive.
240///
241/// **Smart-case rule**: if the prefix contains any uppercase letter, use
242/// case-sensitive matching. Otherwise, match case-insensitively.
243pub fn smart_case_matches(candidate: &str, prefix: &str, prefix_has_upper: bool) -> bool {
244 if prefix_has_upper {
245 candidate.starts_with(prefix)
246 } else {
247 candidate.to_lowercase().starts_with(&prefix.to_lowercase())
248 }
249}
250
251/// Score penalty for case mismatch (when smart-case is off but casing differs).
252///
253/// Applied when the prefix is all-lowercase and the candidate has different
254/// casing. The candidate still matches, but ranks lower than an exact-case hit.
255pub fn case_mismatch_penalty(candidate: &str, prefix: &str, prefix_has_upper: bool) -> i64 {
256 if prefix_has_upper {
257 // Strict mode — no penalty if it matched (it's already exact-case).
258 0
259 } else {
260 // Lenient mode — penalise if the candidate's prefix differs in casing.
261 if candidate.starts_with(prefix) {
262 0 // exact casing, no penalty
263 } else {
264 -50_000 // case mismatch penalty
265 }
266 }
267}