pub struct SemanticIndexer { /* private fields */ }Implementations§
Source§impl SemanticIndexer
impl SemanticIndexer
pub fn new(embedder_type: &str, data_dir: Option<&Path>) -> Result<Self>
pub fn with_batch_size(self, batch_size: usize) -> Result<Self>
pub fn batch_size(&self) -> usize
pub fn embedder_id(&self) -> &str
pub fn embedder_dimension(&self) -> usize
pub fn embed_messages( &self, messages: &[EmbeddingInput], ) -> Result<Vec<EmbeddedMessage>>
Sourcepub fn embed_messages_with_sink(
&self,
messages: &[EmbeddingInput],
sink: &SemanticProgressSink,
) -> Result<Vec<EmbeddedMessage>>
pub fn embed_messages_with_sink( &self, messages: &[EmbeddingInput], sink: &SemanticProgressSink, ) -> Result<Vec<EmbeddedMessage>>
Variant of [embed_messages] that emits embed_batch_* events
into the given JSONL sink. The sink is silent unless
CASS_SEMANTIC_PROGRESS_JSONL is set, so this path is safe to
take in production.
pub fn build_and_save_index<I>(
&self,
embedded_messages: I,
data_dir: &Path,
) -> Result<FsVectorIndex>where
I: IntoIterator<Item = EmbeddedMessage>,
pub fn build_and_save_index_shards<I>(
&self,
embedded_messages: I,
data_dir: &Path,
plan: SemanticShardBuildPlan,
) -> Result<SemanticShardBuildOutcome>where
I: IntoIterator<Item = EmbeddedMessage>,
Sourcepub fn append_to_index(
&self,
embedded_messages: impl IntoIterator<Item = EmbeddedMessage>,
data_dir: &Path,
) -> Result<usize>
pub fn append_to_index( &self, embedded_messages: impl IntoIterator<Item = EmbeddedMessage>, data_dir: &Path, ) -> Result<usize>
Append new embeddings to an existing FSVI index via the WAL.
Used for incremental semantic indexing in watch mode. Opens the existing index, appends a batch of new embeddings, and compacts if the WAL has grown large enough.
Returns the number of entries appended.
pub fn run_backfill_batch( &self, messages: &[EmbeddingInput], data_dir: &Path, manifest: &mut SemanticManifest, plan: SemanticBackfillBatchPlan, ) -> Result<SemanticBackfillBatchOutcome>
Sourcepub fn run_backfill_batch_with_sink(
&self,
messages: &[EmbeddingInput],
data_dir: &Path,
manifest: &mut SemanticManifest,
plan: SemanticBackfillBatchPlan,
last_message_id: Option<i64>,
sink: &SemanticProgressSink,
) -> Result<SemanticBackfillBatchOutcome>
pub fn run_backfill_batch_with_sink( &self, messages: &[EmbeddingInput], data_dir: &Path, manifest: &mut SemanticManifest, plan: SemanticBackfillBatchPlan, last_message_id: Option<i64>, sink: &SemanticProgressSink, ) -> Result<SemanticBackfillBatchOutcome>
Variant of [run_backfill_batch] that emits semantic progress
events to the given JSONL sink and persists last_message_id
into the resumable checkpoint when supplied. The sink is silent
unless CASS_SEMANTIC_PROGRESS_JSONL is set, so this path is
safe to take in production.
pub fn run_backfill_from_storage( &self, storage: &FrankenStorage, data_dir: &Path, manifest: &mut SemanticManifest, plan: SemanticBackfillStoragePlan, ) -> Result<SemanticBackfillBatchOutcome>
Sourcepub fn run_backfill_from_storage_with_sink(
&self,
storage: &FrankenStorage,
data_dir: &Path,
manifest: &mut SemanticManifest,
plan: SemanticBackfillStoragePlan,
sink: &SemanticProgressSink,
) -> Result<SemanticBackfillBatchOutcome>
pub fn run_backfill_from_storage_with_sink( &self, storage: &FrankenStorage, data_dir: &Path, manifest: &mut SemanticManifest, plan: SemanticBackfillStoragePlan, sink: &SemanticProgressSink, ) -> Result<SemanticBackfillBatchOutcome>
Variant of [run_backfill_from_storage] that emits semantic
progress events to a JSONL sink and persists last_message_id
in the resumable checkpoint. The sink is silent unless
CASS_SEMANTIC_PROGRESS_JSONL is set.
Sourcepub fn run_capped_backfill_from_storage_with_sink(
&self,
storage: &FrankenStorage,
data_dir: &Path,
manifest: &mut SemanticManifest,
plan: SemanticBackfillStoragePlan,
sink: &SemanticProgressSink,
) -> Result<SemanticBackfillBatchOutcome>
pub fn run_capped_backfill_from_storage_with_sink( &self, storage: &FrankenStorage, data_dir: &Path, manifest: &mut SemanticManifest, plan: SemanticBackfillStoragePlan, sink: &SemanticProgressSink, ) -> Result<SemanticBackfillBatchOutcome>
Variant of [run_backfill_from_storage_with_sink] for CLI backfill
runs. It applies operator checkpoint caps from
CASS_SEMANTIC_MAX_MESSAGES_PER_CHECKPOINT and
CASS_SEMANTIC_MAX_BYTES_PER_CHECKPOINT while keeping each selected
conversation whole, so message-cursor resume cannot strand the tail of
a partially selected conversation.
Sourcepub fn build_hnsw_index(
&self,
vector_index: &FsVectorIndex,
data_dir: &Path,
m: Option<usize>,
ef_construction: Option<usize>,
) -> Result<PathBuf>
pub fn build_hnsw_index( &self, vector_index: &FsVectorIndex, data_dir: &Path, m: Option<usize>, ef_construction: Option<usize>, ) -> Result<PathBuf>
Build and save an HNSW index for approximate nearest neighbor search.
This creates an HNSW graph structure from the existing VectorIndex,
enabling O(log n) approximate search with the --approximate flag.
§Arguments
vector_index- The VectorIndex to build HNSW fromdata_dir- Directory to save the HNSW indexm- Max connections per node (default: 16)ef_construction- Search width during build (default: 200)
§Returns
Path to the saved HNSW index file
Auto Trait Implementations§
impl Freeze for SemanticIndexer
impl !RefUnwindSafe for SemanticIndexer
impl Send for SemanticIndexer
impl Sync for SemanticIndexer
impl Unpin for SemanticIndexer
impl UnsafeUnpin for SemanticIndexer
impl !UnwindSafe for SemanticIndexer
Blanket Implementations§
Source§impl<'a, T, E> AsTaggedExplicit<'a, E> for Twhere
T: 'a,
impl<'a, T, E> AsTaggedExplicit<'a, E> for Twhere
T: 'a,
Source§impl<'a, T, E> AsTaggedImplicit<'a, E> for Twhere
T: 'a,
impl<'a, T, E> AsTaggedImplicit<'a, E> for Twhere
T: 'a,
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Source§impl<T> Downcast for Twhere
T: Any,
impl<T> Downcast for Twhere
T: Any,
Source§fn into_any(self: Box<T>) -> Box<dyn Any>
fn into_any(self: Box<T>) -> Box<dyn Any>
Box<dyn Trait> (where Trait: Downcast) to Box<dyn Any>, which can then be
downcast into Box<dyn ConcreteType> where ConcreteType implements Trait.Source§fn into_any_rc(self: Rc<T>) -> Rc<dyn Any>
fn into_any_rc(self: Rc<T>) -> Rc<dyn Any>
Rc<Trait> (where Trait: Downcast) to Rc<Any>, which can then be further
downcast into Rc<ConcreteType> where ConcreteType implements Trait.Source§fn as_any(&self) -> &(dyn Any + 'static)
fn as_any(&self) -> &(dyn Any + 'static)
&Trait (where Trait: Downcast) to &Any. This is needed since Rust cannot
generate &Any’s vtable from &Trait’s.Source§fn as_any_mut(&mut self) -> &mut (dyn Any + 'static)
fn as_any_mut(&mut self) -> &mut (dyn Any + 'static)
&mut Trait (where Trait: Downcast) to &Any. This is needed since Rust cannot
generate &mut Any’s vtable from &mut Trait’s.Source§impl<T> Downcast for Twhere
T: Any,
impl<T> Downcast for Twhere
T: Any,
Source§fn into_any(self: Box<T>) -> Box<dyn Any>
fn into_any(self: Box<T>) -> Box<dyn Any>
Box<dyn Trait> (where Trait: Downcast) to Box<dyn Any>. Box<dyn Any> can
then be further downcast into Box<ConcreteType> where ConcreteType implements Trait.Source§fn into_any_rc(self: Rc<T>) -> Rc<dyn Any>
fn into_any_rc(self: Rc<T>) -> Rc<dyn Any>
Rc<Trait> (where Trait: Downcast) to Rc<Any>. Rc<Any> can then be
further downcast into Rc<ConcreteType> where ConcreteType implements Trait.Source§fn as_any(&self) -> &(dyn Any + 'static)
fn as_any(&self) -> &(dyn Any + 'static)
&Trait (where Trait: Downcast) to &Any. This is needed since Rust cannot
generate &Any’s vtable from &Trait’s.Source§fn as_any_mut(&mut self) -> &mut (dyn Any + 'static)
fn as_any_mut(&mut self) -> &mut (dyn Any + 'static)
&mut Trait (where Trait: Downcast) to &Any. This is needed since Rust cannot
generate &mut Any’s vtable from &mut Trait’s.Source§impl<T> DowncastSend for T
impl<T> DowncastSend for T
Source§impl<T> DowncastSync for T
impl<T> DowncastSync for T
Source§impl<T> DowncastSync for T
impl<T> DowncastSync for T
Source§impl<T> Instrument for T
impl<T> Instrument for T
Source§fn instrument(self, span: Span) -> Instrumented<Self>
fn instrument(self, span: Span) -> Instrumented<Self>
Source§fn in_current_span(self) -> Instrumented<Self>
fn in_current_span(self) -> Instrumented<Self>
Source§impl<T> Instrument for T
impl<T> Instrument for T
Source§fn instrument(self, _span: NoopSpan) -> Self
fn instrument(self, _span: NoopSpan) -> Self
Source§fn in_current_span(self) -> Self
fn in_current_span(self) -> Self
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
self into a Left variant of Either<Self, Self>
if into_left is true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
self into a Left variant of Either<Self, Self>
if into_left(&self) returns true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read more