lance_index/prefilter.rs
1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright The Lance Authors
3
4use std::sync::Arc;
5
6use async_trait::async_trait;
7use lance_core::utils::mask::RowAddrMask;
8use lance_core::Result;
9
10/// A trait to be implemented by anything supplying a prefilter row addr mask
11///
12/// This trait is for internal use only and has no stability guarantees.
13#[async_trait]
14pub trait FilterLoader: Send + 'static {
15 async fn load(self: Box<Self>) -> Result<RowAddrMask>;
16}
17
18/// Filter out row ids that we know are not relevant to the query.
19///
20/// This could be both rows that are deleted or a prefilter
21/// that should be applied to the search
22///
23/// <section class="warning">
24/// Internal use only. No API stability guarantees.
25/// </section>
26#[async_trait]
27pub trait PreFilter: Send + Sync {
28 /// Waits for the prefilter to be fully loaded
29 ///
30 /// The prefilter loads in the background while the rest of the index
31 /// search is running. When you are ready to use the prefilter you
32 /// must first call this method to ensure it is fully loaded. This
33 /// allows `filter_row_ids` to be a synchronous method.
34 async fn wait_for_ready(&self) -> Result<()>;
35
36 /// If the filter is empty.
37 fn is_empty(&self) -> bool;
38
39 /// Get the row addr mask for this prefilter
40 ///
41 /// This method must be called after `wait_for_ready`
42 fn mask(&self) -> Arc<RowAddrMask>;
43
44 /// Check whether a slice of row ids should be included in a query.
45 ///
46 /// Returns a vector of indices into the input slice that should be included,
47 /// also known as a selection vector.
48 ///
49 /// This method must be called after `wait_for_ready`
50 fn filter_row_ids<'a>(&self, row_ids: Box<dyn Iterator<Item = &'a u64> + 'a>) -> Vec<u64>;
51}
52
53/// A prefilter that does nothing
54pub struct NoFilter;
55
56#[async_trait]
57impl PreFilter for NoFilter {
58 async fn wait_for_ready(&self) -> Result<()> {
59 Ok(())
60 }
61
62 fn is_empty(&self) -> bool {
63 true
64 }
65
66 fn mask(&self) -> Arc<RowAddrMask> {
67 Arc::new(RowAddrMask::all_rows())
68 }
69
70 fn filter_row_ids<'a>(&self, row_ids: Box<dyn Iterator<Item = &'a u64> + 'a>) -> Vec<u64> {
71 row_ids.enumerate().map(|(i, _)| i as u64).collect()
72 }
73}