fresh-editor 0.3.12

A lightweight, fast terminal-based text editor with LSP support and TypeScript plugins
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
//! The core fuzzy-matching algorithm.
//!
//! Given a [`PreparedPattern`] and a target string, this module scores the
//! match (or rejects it) using two complementary strategies:
//!
//! 1. A **DP pass** ([`find_best_match`]) that tracks the highest-scoring
//!    way to interleave query characters into the target, rewarding
//!    consecutive matches, word boundaries, camelCase transitions, etc.
//!    The DP uses an arena of backpointer nodes instead of cloning
//!    position vectors, keeping each state update O(1).
//!
//! 2. A **contiguous-substring pass** ([`find_contiguous_match`]) that
//!    finds literal-substring occurrences of the query and scores them.
//!    The DP sometimes prefers scattered matches with more word-boundary
//!    bonuses, so we always compare with the substring result and take
//!    whichever scores higher.
//!
//! Early rejection via [`super::pattern::is_subsequence_prepared`] happens
//! before any target-side allocation, so the heavy lifting here only runs
//! for candidates that could plausibly match.
//!
//! # Amortised hot path via [`FuzzyMatcher`]
//!
//! A [`FuzzyMatcher`] owns a [`PreparedPattern`] *and* two reusable
//! `Vec<char>` scratch buffers for the target.  Callers construct one per
//! keystroke and reuse it across all candidate targets:
//!
//! ```ignore
//! let mut matcher = FuzzyMatcher::new(search_query);
//! for file in files {
//!     let m = matcher.match_target(&file.relative_path);
//!     // ...
//! }
//! ```
//!
//! On the first call the scratch buffers grow to the size of the longest
//! target seen; every subsequent call truncates and refills them in place,
//! so the allocator is touched once per keystroke rather than once per
//! candidate.  This is the explicit alternative to stashing the scratch in
//! `thread_local!` state — lifetimes are visible in the signature and
//! future parallelisation can give each worker its own matcher.

use super::pattern::{is_subsequence_prepared, PreparedPattern, PreparedTerm};
use super::{score, FuzzyMatch};

/// A reusable fuzzy matcher that amortises both query preparation and
/// target-side scratch allocation across many calls.
///
/// See the module docs for usage.
#[derive(Debug, Clone)]
pub struct FuzzyMatcher {
    pattern: PreparedPattern,
    /// Scratch buffer: `target.chars().collect()`, cleared and refilled per call.
    target_chars: Vec<char>,
    /// Scratch buffer: lowercased target chars, cleared and refilled per call.
    target_lower: Vec<char>,
}

impl FuzzyMatcher {
    /// Create a new matcher for the given query, with empty scratch buffers.
    ///
    /// The scratch buffers start empty and grow lazily on the first
    /// [`match_target`](Self::match_target) call that gets past rejection.
    pub fn new(query: &str) -> Self {
        Self::from_pattern(PreparedPattern::new(query))
    }

    /// Construct a matcher from an already-built [`PreparedPattern`].
    pub fn from_pattern(pattern: PreparedPattern) -> Self {
        Self {
            pattern,
            target_chars: Vec::new(),
            target_lower: Vec::new(),
        }
    }

    /// Returns `true` if the prepared query has no terms (empty / whitespace).
    pub fn is_empty(&self) -> bool {
        self.pattern.is_empty()
    }

    /// Match a single target string against this matcher's query.
    ///
    /// Reuses the internal `Vec<char>` scratch buffers — no per-call
    /// allocations on the common "passed rejection → scoring" path after
    /// the first call has warmed them up.
    pub fn match_target(&mut self, target: &str) -> FuzzyMatch {
        if self.pattern.is_empty() {
            return FuzzyMatch {
                matched: true,
                score: 0,
                match_positions: Vec::new(),
            };
        }

        // Fast rejection gate.  Runs entirely on `&str`, no allocation.
        // In the multi-term case we must pass the gate for *every* term.
        for term in &self.pattern.terms {
            if !is_subsequence_prepared(term, target) {
                return FuzzyMatch::no_match();
            }
        }

        // Refill scratch buffers in place.  No `malloc`/`realloc` on
        // steady-state — the existing capacity is reused after the first
        // call that made them large enough for the corpus's worst case.
        refill_target(&mut self.target_chars, &mut self.target_lower, target);

        // Split the borrow so the scoring helpers can see both the pattern
        // and the scratch buffers simultaneously without fighting the
        // borrow checker.
        let pattern = &self.pattern;
        let target_chars: &[char] = &self.target_chars;
        let target_lower: &[char] = &self.target_lower;

        if pattern.terms.len() > 1 {
            score_multi_term(pattern, target_chars, target_lower)
        } else {
            score_single_term(&pattern.terms[0], target_chars, target_lower)
        }
    }
}

/// Entry point used by the backward-compatible free functions.
///
/// Creates a temporary `FuzzyMatcher` (which allocates empty scratch Vecs
/// but does not grow them until first use), so callers using the legacy
/// `fuzzy_match_prepared` API still work, just without cross-call
/// amortisation.  Hot-path callers should construct their own
/// `FuzzyMatcher` and reuse it.
pub(super) fn match_prepared(pattern: &PreparedPattern, target: &str) -> FuzzyMatch {
    let mut matcher = FuzzyMatcher::from_pattern(pattern.clone());
    matcher.match_target(target)
}

/// Refill the scratch buffers with the raw and lowercased characters of
/// `target`.  Grows the buffers if needed; otherwise reuses existing
/// capacity, so steady-state calls do zero allocations.
///
/// Note: for non-ASCII chars with expanding case mappings (e.g. Turkish
/// `İ` → `i` + combining dot), `target_lower` can be longer than
/// `target_chars`.  This matches the semantics of the previous
/// `target.to_lowercase().chars().collect()` implementation, including
/// its latent indexing inconsistency — preserving behaviour is out of
/// scope for this change.
fn refill_target(target_chars: &mut Vec<char>, target_lower: &mut Vec<char>, target: &str) {
    target_chars.clear();
    target_lower.clear();
    // `target.len()` is the byte length, which is an upper bound on the
    // character count (chars are 1-4 bytes in UTF-8).  Reserving once
    // avoids any growth checks during the push loop.
    target_chars.reserve(target.len());
    target_lower.reserve(target.len());
    for c in target.chars() {
        target_chars.push(c);
        for lc in c.to_lowercase() {
            target_lower.push(lc);
        }
    }
}

/// Score a single prepared term against a target whose char buffers have
/// already been refilled in the caller's scratch space.
fn score_single_term(
    term: &PreparedTerm,
    target_chars: &[char],
    target_lower: &[char],
) -> FuzzyMatch {
    let query_lower = &term.lower_chars;
    let query_len = query_lower.len();
    let target_len = target_lower.len();

    // Try to find the best matching positions using a DP approach.
    let dp_result = find_best_match(query_lower, target_chars, target_lower);

    // Also check for a contiguous substring match.  The DP may miss this
    // because it optimises per-character bonuses (word boundaries, etc.)
    // which can favour scattered matches over a tight substring.
    let substr_result = find_contiguous_match(query_lower, target_chars, target_lower);

    // Pick the better result.
    let (positions, mut final_score) = match (dp_result, substr_result) {
        (Some(dp), Some(sub)) => {
            if sub.1 >= dp.1 {
                sub
            } else {
                dp
            }
        }
        (Some(dp), None) => dp,
        (None, Some(sub)) => sub,
        (None, None) => return FuzzyMatch::no_match(),
    };

    // Check if all matched positions are consecutive (contiguous substring).
    let is_contiguous =
        positions.len() == query_len && positions.windows(2).all(|w| w[1] == w[0] + 1);

    if is_contiguous {
        final_score += score::CONTIGUOUS_SUBSTRING;
    }

    // Exact match bonus: query matches entire target.
    let mut credited_full_path_prefix = false;
    if query_len == target_len {
        final_score += score::EXACT_MATCH;
    } else if target_len > query_len && is_contiguous {
        // Check if the query is a prefix match (all consecutive from start).
        let is_prefix_match = positions.iter().enumerate().all(|(i, &pos)| pos == i);

        if is_prefix_match && query_len < target_chars.len() {
            let next_char = target_chars[query_len];
            if next_char == '.' {
                // Highest priority: exact basename match (before extension).
                final_score += score::EXACT_MATCH;
                credited_full_path_prefix = true;
            } else if next_char == '-' || next_char == '_' || next_char == ' ' {
                // Second priority: match before word separator.
                final_score += score::EXACT_BASENAME_MATCH;
                credited_full_path_prefix = true;
            }
        }
    }

    // Path-segment prefix bonus.  A contiguous match that begins at the start
    // of a path segment (basename or interior directory) outranks an equally
    // contiguous match that lands mid-segment.  This is what users expect
    // when typing a short query like "ts": a basename starting with "ts"
    // (e.g. `tsconfig.json`) should rank above unrelated files whose only
    // `ts` happens to follow an extension dot (e.g. `pkg.ts`, `finder.ts`).
    //
    // Skipped when the existing `EXACT_*` block above already credited a
    // full-path prefix that ends in a separator — those matches are already
    // in the top tier and would be double-counted otherwise.
    if is_contiguous
        && !credited_full_path_prefix
        && !positions.is_empty()
        && positions.len() == query_len
    {
        let start = positions[0];
        let starts_segment = start == 0 || (start > 0 && target_chars[start - 1] == '/');
        if starts_segment {
            let last_slash = target_chars.iter().rposition(|&c| c == '/');
            let basename_start = last_slash.map(|i| i + 1).unwrap_or(0);
            if start == basename_start {
                final_score += score::BASENAME_PREFIX;
            } else {
                final_score += score::PATH_SEGMENT_PREFIX;
            }
        }
    }

    FuzzyMatch {
        matched: true,
        score: final_score,
        match_positions: positions,
    }
}

/// Score a multi-term pattern against a target whose char buffers have
/// already been refilled in the caller's scratch space.
fn score_multi_term(
    pattern: &PreparedPattern,
    target_chars: &[char],
    target_lower: &[char],
) -> FuzzyMatch {
    let mut total_score = 0;
    let mut all_positions = Vec::new();

    for term in &pattern.terms {
        let result = score_single_term(term, target_chars, target_lower);
        if !result.matched {
            return FuzzyMatch::no_match();
        }
        total_score += result.score;
        all_positions.extend(result.match_positions);
    }

    // Sort and deduplicate positions (terms may have overlapping matches).
    all_positions.sort_unstable();
    all_positions.dedup();

    // Tight-span bonus: reward targets where each term appears as a
    // contiguous substring and all the term matches are packed close
    // together (regardless of what sits between them).  This handles
    // every realistic "the query reconstructs a single name" case
    // uniformly — "/etc/hosts", "save_file.rs", "saveFile.rs",
    // "etcmohosts", even "foo.bar.baz" — without hard-coding a list
    // of separator characters that would miss camelCase, runs-together
    // words, or unusual punctuation.
    //
    // The bonus fires iff:
    //   1. Every term appears as a contiguous substring in `target_lower`,
    //   2. In the order given by the query, and
    //   3. The gap between consecutive term matches is small (we allow
    //      up to 4 chars per gap — generous enough for "etcmohosts"
    //      and "foo/bar/baz" but not for two terms at opposite ends
    //      of a long path).
    if let Some((span_start, span_end)) = find_sequential_term_span(target_lower, &pattern.terms) {
        let term_len_sum: usize = pattern.terms.iter().map(|t| t.lower_chars.len()).sum();
        let span_len = span_end - span_start;
        let gap_excess = span_len.saturating_sub(term_len_sum);
        let num_gaps = pattern.terms.len().saturating_sub(1);
        // Tolerance scales with the number of gaps: 4 chars per gap.
        let max_gap_excess = num_gaps.saturating_mul(4);

        if gap_excess <= max_gap_excess {
            total_score += score::EXACT_MATCH;

            let target_char_count = target_chars.len();
            all_positions = (span_start..span_end)
                .filter(|&i| i < target_char_count)
                .collect();
        }
    }

    FuzzyMatch {
        matched: true,
        score: total_score,
        match_positions: all_positions,
    }
}

/// Greedily find the first sequential occurrence of each term as a
/// contiguous substring in `target_lower`, advancing past each match
/// before searching for the next term.  Returns the `(start, end)`
/// char indices of the overall span covering the first term's start
/// through the last term's end, or `None` if any term can't be found
/// contiguously in the required order.
///
/// "Greedy" here means we take the *earliest* occurrence of each term,
/// not the tightest span — tighter spans would require backtracking
/// and are not worth the cost for a ranking signal.
fn find_sequential_term_span(
    target_lower: &[char],
    terms: &[PreparedTerm],
) -> Option<(usize, usize)> {
    let mut search_from = 0;
    let mut first_start: Option<usize> = None;
    let mut last_end = 0;
    for term in terms {
        let needle = &term.lower_chars;
        if needle.is_empty() {
            continue;
        }
        if search_from + needle.len() > target_lower.len() {
            return None;
        }
        let end_bound = target_lower.len() - needle.len();
        let found =
            (search_from..=end_bound).find(|&s| target_lower[s..s + needle.len()] == *needle)?;
        if first_start.is_none() {
            first_start = Some(found);
        }
        search_from = found + needle.len();
        last_end = search_from;
    }
    first_start.map(|start| (start, last_end))
}

/// Find the best contiguous substring match of `query` in `target`.
///
/// Scans for all occurrences of the query as a substring and picks the
/// one with the highest score (preferring word boundaries, basename, etc.).
fn find_contiguous_match(
    query: &[char],
    target_chars: &[char],
    target_lower: &[char],
) -> Option<(Vec<usize>, i32)> {
    let m = query.len();
    let n = target_lower.len();
    if m == 0 || m > n {
        return None;
    }

    let mut best: Option<(Vec<usize>, i32)> = None;

    for start in 0..=n - m {
        // Check if query matches at this position.
        if target_lower[start..start + m] != *query {
            continue;
        }

        // Score this contiguous match.
        let mut match_score = 0;

        if start == 0 {
            match_score += score::START_OF_STRING;
        }

        // Word boundary bonus for the first character.
        if start > 0 && start < target_chars.len() {
            let prev_char = target_chars[start - 1];
            if prev_char == ' '
                || prev_char == '_'
                || prev_char == '-'
                || prev_char == '/'
                || prev_char == '.'
            {
                match_score += score::WORD_BOUNDARY;
            } else if prev_char.is_lowercase() && target_chars[start].is_uppercase() {
                match_score += score::CAMEL_CASE;
            }
        }

        // Consecutive bonus for chars 1..m.
        match_score += score::CONSECUTIVE * (m as i32 - 1);

        let is_better = match &best {
            None => true,
            Some((_, s)) => match_score > *s,
        };
        if is_better {
            let positions: Vec<usize> = (start..start + m).collect();
            best = Some((positions, match_score));
        }
    }

    best
}

/// A single node in the backpointer arena used by [`find_best_match`].
///
/// Each node records the target index matched for one query character and a
/// link to the node that matched the previous query character (or `None` for
/// the first match).  Walking back from the final node reconstructs the full
/// list of match positions without ever cloning a `Vec<usize>`.
#[derive(Clone, Copy)]
struct ChainNode {
    ti: usize,
    prev: Option<u32>,
}

/// Find the best matching positions for query in target.
///
/// Same greedy DP as the original implementation, but replaces the
/// `Vec<usize>` position-cloning with an arena of linked backpointer nodes.
/// Per-state updates become O(1) (push one node) instead of O(m) (clone the
/// full positions vector), turning the worst-case cost from O(n·m²) into
/// O(n·m) with a single linear walk at the end to reconstruct positions.
///
/// Callers are expected to have already run subsequence rejection via
/// [`super::pattern::is_subsequence_prepared`] before allocating the
/// target buffers this function consumes — the duplicate guard that used
/// to live here was pure overhead on the hot path and has been removed.
fn find_best_match(
    query: &[char],
    target_chars: &[char],
    target_lower: &[char],
) -> Option<(Vec<usize>, i32)> {
    if query.is_empty() {
        return Some((Vec::new(), 0));
    }

    let n = target_lower.len();
    let m = query.len();

    if n < m {
        return None;
    }

    // Arena of backpointer nodes.  `best_node_for_qi[qi]` indexes into the
    // arena for the currently-best chain ending at query index `qi`, or
    // `None` if no match for `query[0..qi]` has been seen yet.  `qi == 0`
    // is always `None` (empty chain).
    let mut arena: Vec<ChainNode> = Vec::with_capacity(m.saturating_mul(4));
    let mut best_score: Vec<Option<i32>> = vec![None; m + 1];
    let mut best_node_for_qi: Vec<Option<u32>> = vec![None; m + 1];
    best_score[0] = Some(0); // empty query matches with score 0

    for ti in 0..n {
        // Process in reverse so we don't use values we just wrote this iteration.
        for qi in (0..m).rev() {
            if target_lower[ti] != query[qi] {
                continue;
            }

            // Can we extend the best chain for query[0..qi]?
            let prev_score = match best_score[qi] {
                Some(s) => s,
                None => continue,
            };
            // `last_match_pos` comes from the previous chain head, or None
            // if `qi == 0` (first character of the query).
            let prev_last_pos = best_node_for_qi[qi].map(|idx| arena[idx as usize].ti);

            // Match positions must be strictly increasing.
            if let Some(lp) = prev_last_pos {
                if ti <= lp {
                    continue;
                }
            }

            // Score the (ti, prev_last_pos) transition.
            let mut match_score = 0;

            // Start of string bonus
            if ti == 0 {
                match_score += score::START_OF_STRING;
            }

            // Word boundary bonus
            if ti > 0 && ti < target_chars.len() {
                let prev_char = target_chars[ti - 1];
                if prev_char == ' '
                    || prev_char == '_'
                    || prev_char == '-'
                    || prev_char == '/'
                    || prev_char == '.'
                {
                    match_score += score::WORD_BOUNDARY;
                } else if prev_char.is_lowercase() && target_chars[ti].is_uppercase() {
                    match_score += score::CAMEL_CASE;
                }
            }

            // Consecutive / gap handling
            if let Some(lp) = prev_last_pos {
                if ti == lp + 1 {
                    match_score += score::CONSECUTIVE;
                } else {
                    let gap_size = ti - lp - 1;
                    match_score += score::GAP_START_PENALTY;
                    match_score += score::GAP_PENALTY * (gap_size as i32 - 1).max(0);
                }
            }

            let new_score = prev_score + match_score;

            let should_update = match best_score[qi + 1] {
                None => true,
                Some(curr) => new_score > curr,
            };

            if should_update {
                let new_idx = arena.len() as u32;
                arena.push(ChainNode {
                    ti,
                    prev: best_node_for_qi[qi],
                });
                best_score[qi + 1] = Some(new_score);
                best_node_for_qi[qi + 1] = Some(new_idx);
            }
        }
    }

    let final_score = best_score[m]?;
    let final_node = best_node_for_qi[m]?;

    // Walk backwards through the arena to recover positions.
    let mut positions = vec![0usize; m];
    let mut cursor = Some(final_node);
    let mut idx = m;
    while let Some(node_idx) = cursor {
        debug_assert!(idx > 0);
        idx -= 1;
        let node = arena[node_idx as usize];
        positions[idx] = node.ti;
        cursor = node.prev;
    }

    Some((positions, final_score))
}