pub struct VectorIndexBuilder<E: VectorExtractor> { /* private fields */ }Expand description
Builder for constructing vector search indices.
Orchestrates the full pipeline: discover files → extract documents → batch embed → insert into backend.
§Example
use fabryk_vector::{VectorIndexBuilder, MockEmbeddingProvider, MockVectorExtractor};
use std::sync::Arc;
let provider = Arc::new(MockEmbeddingProvider::new(384));
let extractor = MockVectorExtractor;
let (backend, stats) = VectorIndexBuilder::new(extractor)
.with_content_path("/data/concepts")
.with_embedding_provider(provider)
.build()
.await?;Implementations§
Source§impl<E: VectorExtractor> VectorIndexBuilder<E>
impl<E: VectorExtractor> VectorIndexBuilder<E>
Sourcepub fn with_content_path(self, path: impl Into<PathBuf>) -> Self
pub fn with_content_path(self, path: impl Into<PathBuf>) -> Self
Sets the content directory path.
Sourcepub fn with_embedding_provider(
self,
provider: Arc<dyn EmbeddingProvider>,
) -> Self
pub fn with_embedding_provider( self, provider: Arc<dyn EmbeddingProvider>, ) -> Self
Sets the embedding provider.
Sourcepub fn with_error_handling(self, handling: ErrorHandling) -> Self
pub fn with_error_handling(self, handling: ErrorHandling) -> Self
Sets the error handling strategy.
Sourcepub fn with_batch_size(self, size: usize) -> Self
pub fn with_batch_size(self, size: usize) -> Self
Sets the batch size for embedding operations.
Sourcepub fn with_cache_path(self, path: impl Into<PathBuf>) -> Self
pub fn with_cache_path(self, path: impl Into<PathBuf>) -> Self
Sets the cache file path for vector index persistence.
When set, the builder will:
- Check if the cache is fresh before building (by comparing content hashes)
- Load from cache on hit (fast path, avoids re-embedding)
- Save to cache after a successful build (for next time)
Sourcepub fn skip_cache(self) -> Self
pub fn skip_cache(self) -> Self
Forces a rebuild even if the cache is fresh.
Sourcepub async fn build(self) -> Result<(SimpleVectorBackend, VectorIndexStats)>
pub async fn build(self) -> Result<(SimpleVectorBackend, VectorIndexStats)>
Builds the vector index.
Returns a SimpleVectorBackend populated with embedded documents,
plus build statistics.
§Phases
- Discover + Extract: Find content files, parse frontmatter,
call extractor to produce
VectorDocuments. - Batch Embed + Insert: Embed documents in batches via the provider, then insert into the backend.
Sourcepub async fn build_append(
self,
backend: &mut SimpleVectorBackend,
) -> Result<VectorIndexStats>
pub async fn build_append( self, backend: &mut SimpleVectorBackend, ) -> Result<VectorIndexStats>
Append documents from a content path into an existing backend.
Unlike build(), this does not create a new backend — it adds
embedded documents to the provided one. Use this to index multiple
content directories (potentially with different extractors) into
a single vector search backend.
§Example
// Build initial index from concept cards
let (mut backend, stats1) = VectorIndexBuilder::new(card_extractor)
.with_content_path(&cards_path)
.with_embedding_provider(provider.clone())
.build()
.await?;
// Append source documents with a different extractor
let stats2 = VectorIndexBuilder::new(source_extractor)
.with_content_path(&sources_path)
.with_embedding_provider(provider)
.build_append(&mut backend)
.await?;