pub struct Scanner { /* private fields */ }
Expand description
Dataset Scanner
ⓘ
let dataset = Dataset::open(uri).await.unwrap();
let stream = dataset.scan()
.project(&["col", "col2.subfield"]).unwrap()
.limit(10)
.into_stream();
stream
.map(|batch| batch.num_rows())
.buffered(16)
.sum()
Implementations§
source§impl Scanner
impl Scanner
pub fn new(dataset: Arc<Dataset>) -> Self
sourcepub fn project(&mut self, columns: &[&str]) -> Result<&mut Self>
pub fn project(&mut self, columns: &[&str]) -> Result<&mut Self>
Projection.
Only seelect the specific columns. If not specifid, all columns will be scanned.
sourcepub fn limit(&mut self, limit: i64, offset: Option<i64>) -> Result<&mut Self>
pub fn limit(&mut self, limit: i64, offset: Option<i64>) -> Result<&mut Self>
Set limit and offset.
sourcepub fn nearest(
&mut self,
column: &str,
q: &Float32Array,
k: usize
) -> Result<&mut Self>
pub fn nearest(
&mut self,
column: &str,
q: &Float32Array,
k: usize
) -> Result<&mut Self>
Find k-nearest neighbour within the vector column.
pub fn nprobs(&mut self, n: usize) -> &mut Self
sourcepub fn refine(&mut self, factor: u32) -> &mut Self
pub fn refine(&mut self, factor: u32) -> &mut Self
Apply a refine step to the vector search.
A refine step uses the original vector values to re-rank the distances.
sourcepub fn with_row_id(&mut self) -> &mut Self
pub fn with_row_id(&mut self) -> &mut Self
Instruct the scanner to return the _rowid
meta column from the dataset.
sourcepub async fn try_into_stream(&self) -> Result<ScannerStream>
pub async fn try_into_stream(&self) -> Result<ScannerStream>
Create a stream of this Scanner.
TODO: implement as IntoStream/IntoIterator.