skim 4.5.0

Fuzzy Finder in rust!
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
//! Item representation and management.
//!
//! This module provides the core item types used by skim, including ranked items,
//! item pools for efficient storage, and ranking criteria for sorting matches.
use std::cmp::min;
use std::default::Default;
use std::hash::Hash;
use std::ops::Deref;
use std::sync::Arc;
use std::sync::atomic::{AtomicUsize, Ordering};

#[cfg(feature = "cli")]
use clap::ValueEnum;
#[cfg(feature = "cli")]
use clap::builder::PossibleValue;

use crate::spinlock::{SpinLock, SpinLockGuard};
use crate::{MatchRange, Rank, SkimItem};
use tokio::sync::Notify;

//------------------------------------------------------------------------------

/// Builder for creating rank values based on configurable criteria
#[derive(Debug)]
pub struct RankBuilder {
    criterion: Vec<RankCriteria>,
}

impl Default for RankBuilder {
    fn default() -> Self {
        Self {
            criterion: vec![RankCriteria::Score, RankCriteria::Begin, RankCriteria::End],
        }
    }
}

impl RankBuilder {
    /// Creates a new rank builder with the given criteria
    #[must_use]
    pub fn new(mut criterion: Vec<RankCriteria>) -> Self {
        if !criterion.contains(&RankCriteria::Score) && !criterion.contains(&RankCriteria::NegScore) {
            criterion.insert(0, RankCriteria::Score);
        }

        criterion.dedup();
        Self { criterion }
    }

    /// Returns the tiebreak criteria slice.
    #[must_use]
    pub fn criteria(&self) -> &[RankCriteria] {
        &self.criterion
    }

    /// Computes the byte offset of the first character after the last path separator
    /// (`/` or `\`) in `text`.  Returns `0` when no separator is present.
    fn path_name_offset(text: &str) -> i32 {
        text.rfind(['/', '\\'])
            .map_or(0, |pos| i32::try_from(pos).unwrap_or(i32::MAX).saturating_add(1))
    }

    /// Builds a `Rank` from raw match measurements.
    ///
    /// The values are stored as-is; the tiebreak ordering and sign-flipping are
    /// applied lazily by [`Rank::sort_key`] at comparison time.
    /// The `index` will be overridden later
    #[must_use]
    pub fn build_rank(&self, score: i32, begin: usize, end: usize, item_text: &str) -> Rank {
        Rank {
            score,
            begin: i32::try_from(begin).unwrap_or(i32::MAX),
            end: i32::try_from(end).unwrap_or(i32::MAX),
            length: i32::try_from(item_text.len()).unwrap_or(i32::MAX),
            index: Default::default(),
            path_name_offset: Self::path_name_offset(item_text),
        }
    }
}

impl Rank {
    /// Computes the ordered sort key for this rank given a slice of tiebreak criteria.
    ///
    /// Each criterion maps to one slot in the returned `[i32; 5]` array. Values are
    /// sign-flipped where necessary so that the array compares lexicographically with
    /// the "best" match sorting first (ascending order).
    #[must_use]
    pub fn sort_key(&self, criteria: &[RankCriteria]) -> [i32; 5] {
        let mut key = [0i32; 5];
        for (priority, criterion) in criteria.iter().take(5).enumerate() {
            key[priority] = match criterion {
                RankCriteria::Score => -self.score,
                RankCriteria::NegScore => self.score,
                RankCriteria::Begin => self.begin,
                RankCriteria::NegBegin => -self.begin,
                RankCriteria::End => self.end,
                RankCriteria::NegEnd => -self.end,
                RankCriteria::Length => self.length,
                RankCriteria::NegLength => -self.length,
                RankCriteria::Index => self.index,
                RankCriteria::NegIndex => -self.index,
                // PathName: prefer matches that fall within the filename portion (i.e. at or
                // after the last path separator).  `path_name_offset - begin` is <= 0 when the
                // match starts inside the filename, and positive when it starts in a directory
                // component.  Lower values sort first, so filename matches rank higher.
                RankCriteria::PathName => self.path_name_offset - self.begin,
                RankCriteria::NegPathName => self.begin - self.path_name_offset,
            };
        }
        key
    }
}

//------------------------------------------------------------------------------
/// An item that has been matched against a query
#[derive(Clone)]
pub struct MatchedItem {
    /// The underlying skim item
    pub item: Arc<dyn SkimItem>,
    /// Raw match measurements
    pub rank: Rank,
    /// Range of characters that matched the pattern
    pub matched_range: Option<MatchRange>,
    /// Sort key precomputed at construction time from `rank` and the tiebreak
    /// criteria.  Caching avoids recomputing it on every comparison during sort.
    sort_key: [i32; 5],
}

impl std::fmt::Debug for MatchedItem {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        f.debug_struct("MatchedItem")
            .field("item", &self.item.text())
            .field("rank", &self.rank)
            .field("matched_range", &self.matched_range)
            .finish_non_exhaustive()
    }
}

impl Hash for MatchedItem {
    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
        state.write_i32(self.rank.index);
        self.text().hash(state);
    }
}

impl Deref for MatchedItem {
    type Target = Arc<dyn SkimItem>;

    fn deref(&self) -> &Self::Target {
        &self.item
    }
}

impl MatchedItem {
    /// Create a new `MatchedItem`, building the `sort_key` from the Rank and `RankBuilder`
    pub fn new(
        item: Arc<dyn SkimItem>,
        rank: Rank,
        matched_range: Option<MatchRange>,
        rank_builder: &RankBuilder,
    ) -> Self {
        Self {
            item,
            rank,
            matched_range,
            sort_key: rank.sort_key(rank_builder.criteria()),
        }
    }
    /// Merge two sorted `Vec<MatchedItem>` lists into one, preserving sort order by rank.
    ///
    /// Both input lists must already be sorted by the same tiebreak criteria (ascending).
    /// The merge is O(n+m).
    #[must_use]
    pub fn sorted_merge(existing: Vec<MatchedItem>, incoming: Vec<MatchedItem>) -> Vec<MatchedItem> {
        if existing.is_empty() {
            return incoming;
        }
        if incoming.is_empty() {
            return existing;
        }

        // Fast path: if all existing <= all incoming, we can append without merging.
        #[allow(clippy::missing_panics_doc)]
        if existing.last().unwrap() <= incoming.first().unwrap() {
            let mut out = existing;
            out.extend(incoming);
            return out;
        }

        // Fast path: if all incoming <= all existing, prepend without complex merge.
        #[allow(clippy::missing_panics_doc)]
        if incoming.last().unwrap() <= existing.first().unwrap() {
            let mut out = incoming;
            out.extend(existing);
            return out;
        }

        let mut merged = Vec::with_capacity(existing.len() + incoming.len());
        let mut a = existing.into_iter().peekable();
        let mut b = incoming.into_iter().peekable();

        loop {
            match (a.peek(), b.peek()) {
                (Some(av), Some(bv)) => {
                    if av <= bv {
                        #[allow(clippy::missing_panics_doc)]
                        merged.push(a.next().unwrap());
                    } else {
                        #[allow(clippy::missing_panics_doc)]
                        merged.push(b.next().unwrap());
                    }
                }
                (Some(_), None) => {
                    merged.extend(a);
                    break;
                }
                (None, _) => {
                    merged.extend(b);
                    break;
                }
            }
        }

        merged
    }

    /// Merge `incoming` into an already-sorted `existing` vector in-place.
    ///
    /// This function chooses between two strategies:
    /// - If `incoming` is small (≤ 256 items), insert them one-by-one using
    ///   binary search to find the insertion point.  Each insert is O(n) due
    ///   to element shifting, giving O(m·n) overall, but the constant factor
    ///   is small for tiny m and avoids any extra allocation.
    /// - Otherwise, perform a backwards in-place merge that writes the result
    ///   directly into `existing`'s buffer (after a single `reserve`).  This
    ///   is O(n+m) time with **zero additional heap allocation** beyond the
    ///   amortised `Vec::reserve`.
    ///
    /// `existing` must be sorted according to the same ordering used by
    /// `MatchedItem::cmp`.
    pub fn merge_into_sorted(existing: &mut Vec<MatchedItem>, incoming: Vec<MatchedItem>) {
        const SMALL_INSERT_THRESHOLD: usize = 256;

        if incoming.is_empty() {
            return;
        }

        // When existing is empty, extend preserves any pre-allocated capacity
        // (e.g. a caller that did `Vec::with_capacity(total)` before a fold).
        if existing.is_empty() {
            existing.extend(incoming);
            return;
        }

        // Fast path: all existing ≤ first incoming — just append.
        #[allow(clippy::missing_panics_doc)]
        if existing.last().unwrap() <= incoming.first().unwrap() {
            existing.extend(incoming);
            return;
        }

        if incoming.len() <= SMALL_INSERT_THRESHOLD {
            for item in incoming {
                let pos = existing.binary_search_by(|e| e.cmp(&item)).unwrap_or_else(|p| p);
                existing.insert(pos, item);
            }
        } else {
            Self::merge_backwards(existing, incoming);
        }
    }

    /// Merges `incoming` into `existing` in-place using a right-to-left merge.
    ///
    /// Both inputs must already be sorted.  After `existing.reserve(b_len)`,
    /// the buffer has room for all elements.  We then merge from the rightmost
    /// end of each run, writing the larger element at the write cursor which
    /// starts at `new_len - 1` and moves left.
    ///
    /// **Key invariant**: the write position is always strictly greater than
    /// the read position in `existing` while both runs have remaining elements
    /// (because `write - ai == remaining B elements > 0`), so
    /// `copy_nonoverlapping` never aliases.  Each element is moved exactly
    /// once.
    ///
    /// # Safety (internal)
    ///
    /// Uses `unsafe` for raw-pointer moves.  `MatchedItem::cmp` compares plain
    /// integer fields and cannot panic, so no element is leaked or
    /// double-dropped.  `incoming`'s backing allocation is freed with length 0
    /// after all its elements have been moved out.
    fn merge_backwards(existing: &mut Vec<MatchedItem>, incoming: Vec<MatchedItem>) {
        let a_len = existing.len();
        let b_len = incoming.len();
        let new_len = a_len + b_len;

        existing.reserve(b_len);

        // Decompose `incoming` so we can move elements out via raw pointers
        // and free the allocation separately.
        let (b_ptr, b_cap) = {
            let mut v = std::mem::ManuallyDrop::new(incoming);
            (v.as_mut_ptr(), v.capacity())
        };

        // SAFETY: see doc-comment above for the aliasing / move proof.
        unsafe {
            let a_ptr = existing.as_mut_ptr();
            let mut write = new_len;
            let mut ai = a_len;
            let mut bi = b_len;

            while ai > 0 && bi > 0 {
                write -= 1;
                if *a_ptr.add(ai - 1) >= *b_ptr.add(bi - 1) {
                    ai -= 1;
                    std::ptr::copy_nonoverlapping(a_ptr.add(ai), a_ptr.add(write), 1);
                } else {
                    bi -= 1;
                    std::ptr::copy_nonoverlapping(b_ptr.add(bi), a_ptr.add(write), 1);
                }
            }

            // Remaining B elements go to the front of existing.
            // (Remaining A elements at 0..ai are already in their final positions.)
            if bi > 0 {
                std::ptr::copy_nonoverlapping(b_ptr, a_ptr, bi);
            }

            existing.set_len(new_len);

            // Free incoming's backing allocation; all elements were moved out.
            drop(Vec::from_raw_parts(b_ptr, 0, b_cap));
        }
    }
}

impl MatchedItem {
    /// Downcast the `MatchedItem` to the corresponding `SkimItem` struct
    #[must_use]
    pub fn downcast_item<T: SkimItem>(&self) -> Option<&T> {
        (*self.item).as_any().downcast_ref::<T>()
    }
}

use std::cmp::Ordering as CmpOrd;

impl PartialEq for MatchedItem {
    fn eq(&self, other: &Self) -> bool {
        self.text().eq(&other.text()) && self.rank.index.eq(&other.rank.index)
    }
}

impl std::cmp::Eq for MatchedItem {}

impl PartialOrd for MatchedItem {
    fn partial_cmp(&self, other: &Self) -> Option<CmpOrd> {
        Some(self.cmp(other))
    }
}

impl Ord for MatchedItem {
    fn cmp(&self, other: &Self) -> CmpOrd {
        self.sort_key
            .cmp(&other.sort_key)
            .then_with(|| self.rank.index.cmp(&other.rank.index))
    }
}

//------------------------------------------------------------------------------
const ITEM_POOL_CAPACITY: usize = 16384;

/// Thread-safe pool for storing and managing items efficiently
pub struct ItemPool {
    /// Total number of items in the pool
    length: AtomicUsize,
    /// The main pool of items
    pool: SpinLock<Vec<Arc<dyn SkimItem>>>,
    /// Number of items that were taken
    taken: AtomicUsize,

    /// Reserved first N lines as header
    reserved_items: SpinLock<Vec<Arc<dyn SkimItem>>>,
    /// Number of lines to reserve as header
    lines_to_reserve: usize,
    /// Reverse the order of items (--tac flag)
    tac: bool,

    /// Notified whenever new items are appended to the pool (async path).
    ///
    /// Listeners (e.g. the TUI event loop) can `await` this to wake up
    /// immediately when items arrive instead of waiting for the next
    /// periodic tick.
    pub items_available: Arc<Notify>,
}

impl Default for ItemPool {
    fn default() -> Self {
        Self {
            length: AtomicUsize::new(0),
            pool: SpinLock::new(Vec::with_capacity(ITEM_POOL_CAPACITY)),
            taken: AtomicUsize::new(0),
            reserved_items: SpinLock::new(Vec::new()),
            lines_to_reserve: 0,
            tac: false,
            items_available: Arc::new(Notify::new()),
        }
    }
}

impl ItemPool {
    /// Creates a new empty item pool
    #[must_use]
    pub fn new() -> Self {
        Self::default()
    }

    /// Creates a new item pool from skim options
    #[must_use]
    pub fn from_options(options: &crate::SkimOptions) -> Self {
        Self {
            length: AtomicUsize::new(0),
            pool: SpinLock::new(Vec::with_capacity(ITEM_POOL_CAPACITY)),
            taken: AtomicUsize::new(0),
            reserved_items: SpinLock::new(Vec::new()),
            lines_to_reserve: options.header_lines,
            tac: options.tac,
            items_available: Arc::new(Notify::new()),
        }
    }

    /// Returns the total number of items in the pool
    pub fn len(&self) -> usize {
        self.length.load(Ordering::SeqCst)
    }

    /// Returns true if the pool contains no items
    pub fn is_empty(&self) -> bool {
        self.len() == 0
    }

    /// Returns the number of items that have not been taken yet
    pub fn num_not_taken(&self) -> usize {
        self.length.load(Ordering::SeqCst) - self.taken.load(Ordering::SeqCst)
    }

    /// Returns the number of items that have been taken
    pub fn num_taken(&self) -> usize {
        self.taken.load(Ordering::SeqCst)
    }

    /// Clears all items from the pool and resets counters
    pub fn clear(&self) {
        let mut items = self.pool.lock();
        items.clear();
        let mut header_items = self.reserved_items.lock();
        header_items.clear();
        self.taken.store(0, Ordering::SeqCst);
        self.length.store(0, Ordering::SeqCst);
    }

    /// Resets the taken counter without clearing items
    pub fn reset(&self) {
        // lock to ensure consistency
        let _items = self.pool.lock();

        self.taken.store(0, Ordering::SeqCst);
    }

    /// append the items and return the `new_size` of the pool
    pub fn append(&self, mut items: Vec<Arc<dyn SkimItem>>) -> usize {
        let len = items.len();
        trace!("item pool, append {len} items");
        let mut pool = self.pool.lock();
        let mut header_items = self.reserved_items.lock();

        let to_reserve = self.lines_to_reserve - header_items.len();
        if to_reserve > 0 {
            let to_reserve = min(to_reserve, items.len());
            // Split items: first part goes to header, rest to main pool
            let remaining = items.split_off(to_reserve);

            // Header items are always in input order, regardless of tac
            header_items.extend(items);

            if self.tac {
                // For --tac, prepend non-header items (newest items go to front)
                for item in remaining {
                    pool.insert(0, item);
                }
            } else {
                pool.extend(remaining);
            }
        } else if self.tac {
            // For --tac, prepend items (newest items go to front)
            for item in items {
                pool.insert(0, item);
            }
        } else {
            pool.extend(items);
        }
        self.length.store(pool.len(), Ordering::SeqCst);
        trace!("item pool, done append {len} items, total: {}", pool.len());
        let new_len = pool.len();
        drop(pool);
        drop(header_items);
        // Wake any listener that is waiting for new items (e.g. the event loop
        // or the filter-mode loop) so it can restart the matcher immediately
        // instead of waiting for the next periodic tick.
        self.items_available.notify_one();

        new_len
    }

    /// Takes items from the pool, copying new items since last take and releasing lock immediately
    pub fn take(&self) -> Vec<Arc<dyn SkimItem>> {
        let guard = self.pool.lock();
        let taken = self.taken.swap(guard.len(), Ordering::SeqCst);
        // Copy the new items out so we can release the lock immediately
        let items = guard[taken..].to_vec();
        drop(guard); // Explicitly release lock
        items
    }

    /// Returns a copy of the reserved header items
    pub fn reserved(&self) -> Vec<Arc<dyn SkimItem>> {
        let guard = self.reserved_items.lock();
        guard.clone()
    }
}

/// Guard for accessing a slice of items from the pool
pub struct ItemPoolGuard<'a, T: Sized + 'a> {
    guard: SpinLockGuard<'a, Vec<T>>,
    start: usize,
}

impl<T: Sized> Deref for ItemPoolGuard<'_, T> {
    type Target = [T];

    fn deref(&self) -> &[T] {
        &self.guard[self.start..]
    }
}

//------------------------------------------------------------------------------
/// Criteria for ranking and sorting matched items
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
pub enum RankCriteria {
    /// Sort by match score (lower is better)
    Score,
    /// Sort by match score (higher is better)
    NegScore,
    /// Sort by beginning position of match
    Begin,
    /// Sort by beginning position of match (reversed)
    NegBegin,
    /// Sort by ending position of match
    End,
    /// Sort by ending position of match (reversed)
    NegEnd,
    /// Sort by item length
    Length,
    /// Sort by item length (reversed)
    NegLength,
    /// Sort by item index
    Index,
    /// Sort by item index (reversed)
    NegIndex,
    /// Give a bonus to matches that are after the last path separator (`/` or `\`)
    PathName,
    /// Give a bonus to matches that are after the last path separator (reversed)
    NegPathName,
}

#[cfg(feature = "cli")]
impl ValueEnum for RankCriteria {
    fn value_variants<'a>() -> &'a [Self] {
        use RankCriteria::{
            Begin, End, Index, Length, NegBegin, NegEnd, NegIndex, NegLength, NegPathName, NegScore, PathName, Score,
        };
        &[
            Score,
            NegScore,
            Begin,
            NegBegin,
            End,
            NegEnd,
            Length,
            NegLength,
            Index,
            NegIndex,
            PathName,
            NegPathName,
        ]
    }

    fn to_possible_value(&self) -> Option<clap::builder::PossibleValue> {
        use RankCriteria::{
            Begin, End, Index, Length, NegBegin, NegEnd, NegIndex, NegLength, NegPathName, NegScore, PathName, Score,
        };
        Some(match self {
            Score => PossibleValue::new("score"),
            Begin => PossibleValue::new("begin"),
            End => PossibleValue::new("end"),
            NegScore => PossibleValue::new("-score"),
            NegBegin => PossibleValue::new("-begin"),
            NegEnd => PossibleValue::new("-end"),
            Length => PossibleValue::new("length"),
            NegLength => PossibleValue::new("-length"),
            Index => PossibleValue::new("index"),
            NegIndex => PossibleValue::new("-index"),
            PathName => PossibleValue::new("pathname"),
            NegPathName => PossibleValue::new("-pathname"),
        })
    }
}