swiftide_core/
indexing_traits.rsuse crate::node::Node;
use crate::Embeddings;
use crate::{
indexing_defaults::IndexingDefaults, indexing_stream::IndexingStream, SparseEmbeddings,
};
use std::fmt::Debug;
use std::sync::Arc;
use crate::prompt::Prompt;
use anyhow::Result;
use async_trait::async_trait;
pub use dyn_clone::DynClone;
#[cfg(feature = "test-utils")]
#[doc(hidden)]
use mockall::{mock, predicate::str};
#[async_trait]
pub trait Transformer: Send + Sync + DynClone {
async fn transform_node(&self, node: Node) -> Result<Node>;
fn concurrency(&self) -> Option<usize> {
None
}
fn name(&self) -> &'static str {
let name = std::any::type_name::<Self>();
name.split("::").last().unwrap_or(name)
}
}
dyn_clone::clone_trait_object!(Transformer);
#[cfg(feature = "test-utils")]
mock! {
#[derive(Debug)]
pub Transformer {}
#[async_trait]
impl Transformer for Transformer {
async fn transform_node(&self, node: Node) -> Result<Node>;
fn concurrency(&self) -> Option<usize>;
fn name(&self) -> &'static str;
}
impl Clone for Transformer {
fn clone(&self) -> Self;
}
}
#[async_trait]
impl Transformer for Box<dyn Transformer> {
async fn transform_node(&self, node: Node) -> Result<Node> {
self.as_ref().transform_node(node).await
}
fn concurrency(&self) -> Option<usize> {
self.as_ref().concurrency()
}
fn name(&self) -> &'static str {
self.as_ref().name()
}
}
#[async_trait]
impl Transformer for Arc<dyn Transformer> {
async fn transform_node(&self, node: Node) -> Result<Node> {
self.as_ref().transform_node(node).await
}
fn concurrency(&self) -> Option<usize> {
self.as_ref().concurrency()
}
fn name(&self) -> &'static str {
self.as_ref().name()
}
}
#[async_trait]
impl Transformer for &dyn Transformer {
async fn transform_node(&self, node: Node) -> Result<Node> {
(*self).transform_node(node).await
}
fn concurrency(&self) -> Option<usize> {
(*self).concurrency()
}
}
#[async_trait]
impl<F> Transformer for F
where
F: Fn(Node) -> Result<Node> + Send + Sync + Clone,
{
async fn transform_node(&self, node: Node) -> Result<Node> {
self(node)
}
}
#[async_trait]
pub trait BatchableTransformer: Send + Sync + DynClone {
async fn batch_transform(&self, nodes: Vec<Node>) -> IndexingStream;
fn concurrency(&self) -> Option<usize> {
None
}
fn name(&self) -> &'static str {
let name = std::any::type_name::<Self>();
name.split("::").last().unwrap_or(name)
}
fn batch_size(&self) -> Option<usize> {
None
}
}
dyn_clone::clone_trait_object!(BatchableTransformer);
#[cfg(feature = "test-utils")]
mock! {
#[derive(Debug)]
pub BatchableTransformer {}
#[async_trait]
impl BatchableTransformer for BatchableTransformer {
async fn batch_transform(&self, nodes: Vec<Node>) -> IndexingStream;
fn name(&self) -> &'static str;
fn batch_size(&self) -> Option<usize>;
fn concurrency(&self) -> Option<usize>;
}
impl Clone for BatchableTransformer {
fn clone(&self) -> Self;
}
}
#[async_trait]
impl<F> BatchableTransformer for F
where
F: Fn(Vec<Node>) -> IndexingStream + Send + Sync + Clone,
{
async fn batch_transform(&self, nodes: Vec<Node>) -> IndexingStream {
self(nodes)
}
}
#[async_trait]
impl BatchableTransformer for Box<dyn BatchableTransformer> {
async fn batch_transform(&self, nodes: Vec<Node>) -> IndexingStream {
self.as_ref().batch_transform(nodes).await
}
fn concurrency(&self) -> Option<usize> {
self.as_ref().concurrency()
}
fn name(&self) -> &'static str {
self.as_ref().name()
}
}
#[async_trait]
impl BatchableTransformer for Arc<dyn BatchableTransformer> {
async fn batch_transform(&self, nodes: Vec<Node>) -> IndexingStream {
self.as_ref().batch_transform(nodes).await
}
fn concurrency(&self) -> Option<usize> {
self.as_ref().concurrency()
}
fn name(&self) -> &'static str {
self.as_ref().name()
}
}
#[async_trait]
impl BatchableTransformer for &dyn BatchableTransformer {
async fn batch_transform(&self, nodes: Vec<Node>) -> IndexingStream {
(*self).batch_transform(nodes).await
}
fn concurrency(&self) -> Option<usize> {
(*self).concurrency()
}
}
pub trait Loader: DynClone {
fn into_stream(self) -> IndexingStream;
fn into_stream_boxed(self: Box<Self>) -> IndexingStream {
unimplemented!("Please implement into_stream_boxed for your loader, it needs to be implemented on the concrete type")
}
fn name(&self) -> &'static str {
let name = std::any::type_name::<Self>();
name.split("::").last().unwrap_or(name)
}
}
dyn_clone::clone_trait_object!(Loader);
#[cfg(feature = "test-utils")]
mock! {
#[derive(Debug)]
pub Loader {}
#[async_trait]
impl Loader for Loader {
fn into_stream(self) -> IndexingStream;
fn into_stream_boxed(self: Box<Self>) -> IndexingStream;
fn name(&self) -> &'static str;
}
impl Clone for Loader {
fn clone(&self) -> Self;
}
}
impl Loader for Box<dyn Loader> {
fn into_stream(self) -> IndexingStream {
Loader::into_stream_boxed(self)
}
fn into_stream_boxed(self: Box<Self>) -> IndexingStream {
Loader::into_stream(*self)
}
fn name(&self) -> &'static str {
self.as_ref().name()
}
}
impl Loader for &dyn Loader {
fn into_stream(self) -> IndexingStream {
Loader::into_stream_boxed(Box::new(self))
}
fn into_stream_boxed(self: Box<Self>) -> IndexingStream {
Loader::into_stream(*self)
}
}
#[async_trait]
pub trait ChunkerTransformer: Send + Sync + Debug + DynClone {
async fn transform_node(&self, node: Node) -> IndexingStream;
fn concurrency(&self) -> Option<usize> {
None
}
fn name(&self) -> &'static str {
let name = std::any::type_name::<Self>();
name.split("::").last().unwrap_or(name)
}
}
dyn_clone::clone_trait_object!(ChunkerTransformer);
#[cfg(feature = "test-utils")]
mock! {
#[derive(Debug)]
pub ChunkerTransformer {}
#[async_trait]
impl ChunkerTransformer for ChunkerTransformer {
async fn transform_node(&self, node: Node) -> IndexingStream;
fn name(&self) -> &'static str;
fn concurrency(&self) -> Option<usize>;
}
impl Clone for ChunkerTransformer {
fn clone(&self) -> Self;
}
}
#[async_trait]
impl ChunkerTransformer for Box<dyn ChunkerTransformer> {
async fn transform_node(&self, node: Node) -> IndexingStream {
self.as_ref().transform_node(node).await
}
fn concurrency(&self) -> Option<usize> {
self.as_ref().concurrency()
}
fn name(&self) -> &'static str {
self.as_ref().name()
}
}
#[async_trait]
impl ChunkerTransformer for Arc<dyn ChunkerTransformer> {
async fn transform_node(&self, node: Node) -> IndexingStream {
self.as_ref().transform_node(node).await
}
fn concurrency(&self) -> Option<usize> {
self.as_ref().concurrency()
}
fn name(&self) -> &'static str {
self.as_ref().name()
}
}
#[async_trait]
impl ChunkerTransformer for &dyn ChunkerTransformer {
async fn transform_node(&self, node: Node) -> IndexingStream {
(*self).transform_node(node).await
}
fn concurrency(&self) -> Option<usize> {
(*self).concurrency()
}
}
#[async_trait]
pub trait NodeCache: Send + Sync + Debug + DynClone {
async fn get(&self, node: &Node) -> bool;
async fn set(&self, node: &Node);
async fn clear(&self) -> Result<()> {
unimplemented!("Clear not implemented")
}
fn name(&self) -> &'static str {
let name = std::any::type_name::<Self>();
name.split("::").last().unwrap_or(name)
}
}
dyn_clone::clone_trait_object!(NodeCache);
#[cfg(feature = "test-utils")]
mock! {
#[derive(Debug)]
pub NodeCache {}
#[async_trait]
impl NodeCache for NodeCache {
async fn get(&self, node: &Node) -> bool;
async fn set(&self, node: &Node);
async fn clear(&self) -> Result<()>;
fn name(&self) -> &'static str;
}
impl Clone for NodeCache {
fn clone(&self) -> Self;
}
}
#[async_trait]
impl NodeCache for Box<dyn NodeCache> {
async fn get(&self, node: &Node) -> bool {
self.as_ref().get(node).await
}
async fn set(&self, node: &Node) {
self.as_ref().set(node).await;
}
async fn clear(&self) -> Result<()> {
self.as_ref().clear().await
}
fn name(&self) -> &'static str {
self.as_ref().name()
}
}
#[async_trait]
impl NodeCache for Arc<dyn NodeCache> {
async fn get(&self, node: &Node) -> bool {
self.as_ref().get(node).await
}
async fn set(&self, node: &Node) {
self.as_ref().set(node).await;
}
async fn clear(&self) -> Result<()> {
self.as_ref().clear().await
}
fn name(&self) -> &'static str {
self.as_ref().name()
}
}
#[async_trait]
impl NodeCache for &dyn NodeCache {
async fn get(&self, node: &Node) -> bool {
(*self).get(node).await
}
async fn set(&self, node: &Node) {
(*self).set(node).await;
}
async fn clear(&self) -> Result<()> {
(*self).clear().await
}
}
#[async_trait]
pub trait EmbeddingModel: Send + Sync + Debug + DynClone {
async fn embed(&self, input: Vec<String>) -> Result<Embeddings>;
fn name(&self) -> &'static str {
let name = std::any::type_name::<Self>();
name.split("::").last().unwrap_or(name)
}
}
dyn_clone::clone_trait_object!(EmbeddingModel);
#[cfg(feature = "test-utils")]
mock! {
#[derive(Debug)]
pub EmbeddingModel {}
#[async_trait]
impl EmbeddingModel for EmbeddingModel {
async fn embed(&self, input: Vec<String>) -> Result<Embeddings>;
fn name(&self) -> &'static str;
}
impl Clone for EmbeddingModel {
fn clone(&self) -> Self;
}
}
#[async_trait]
impl EmbeddingModel for Box<dyn EmbeddingModel> {
async fn embed(&self, input: Vec<String>) -> Result<Embeddings> {
self.as_ref().embed(input).await
}
fn name(&self) -> &'static str {
self.as_ref().name()
}
}
#[async_trait]
impl EmbeddingModel for Arc<dyn EmbeddingModel> {
async fn embed(&self, input: Vec<String>) -> Result<Embeddings> {
self.as_ref().embed(input).await
}
fn name(&self) -> &'static str {
self.as_ref().name()
}
}
#[async_trait]
impl EmbeddingModel for &dyn EmbeddingModel {
async fn embed(&self, input: Vec<String>) -> Result<Embeddings> {
(*self).embed(input).await
}
}
#[async_trait]
pub trait SparseEmbeddingModel: Send + Sync + Debug + DynClone {
async fn sparse_embed(&self, input: Vec<String>) -> Result<SparseEmbeddings>;
fn name(&self) -> &'static str {
let name = std::any::type_name::<Self>();
name.split("::").last().unwrap_or(name)
}
}
dyn_clone::clone_trait_object!(SparseEmbeddingModel);
#[cfg(feature = "test-utils")]
mock! {
#[derive(Debug)]
pub SparseEmbeddingModel {}
#[async_trait]
impl SparseEmbeddingModel for SparseEmbeddingModel {
async fn sparse_embed(&self, input: Vec<String>) -> Result<SparseEmbeddings>;
fn name(&self) -> &'static str;
}
impl Clone for SparseEmbeddingModel {
fn clone(&self) -> Self;
}
}
#[async_trait]
impl SparseEmbeddingModel for Box<dyn SparseEmbeddingModel> {
async fn sparse_embed(&self, input: Vec<String>) -> Result<SparseEmbeddings> {
self.as_ref().sparse_embed(input).await
}
fn name(&self) -> &'static str {
self.as_ref().name()
}
}
#[async_trait]
impl SparseEmbeddingModel for Arc<dyn SparseEmbeddingModel> {
async fn sparse_embed(&self, input: Vec<String>) -> Result<SparseEmbeddings> {
self.as_ref().sparse_embed(input).await
}
fn name(&self) -> &'static str {
self.as_ref().name()
}
}
#[async_trait]
impl SparseEmbeddingModel for &dyn SparseEmbeddingModel {
async fn sparse_embed(&self, input: Vec<String>) -> Result<SparseEmbeddings> {
(*self).sparse_embed(input).await
}
}
#[async_trait]
pub trait SimplePrompt: Debug + Send + Sync + DynClone {
async fn prompt(&self, prompt: Prompt) -> Result<String>;
fn name(&self) -> &'static str {
let name = std::any::type_name::<Self>();
name.split("::").last().unwrap_or(name)
}
}
dyn_clone::clone_trait_object!(SimplePrompt);
#[cfg(feature = "test-utils")]
mock! {
#[derive(Debug)]
pub SimplePrompt {}
#[async_trait]
impl SimplePrompt for SimplePrompt {
async fn prompt(&self, prompt: Prompt) -> Result<String>;
fn name(&self) -> &'static str;
}
impl Clone for SimplePrompt {
fn clone(&self) -> Self;
}
}
#[async_trait]
impl SimplePrompt for Box<dyn SimplePrompt> {
async fn prompt(&self, prompt: Prompt) -> Result<String> {
self.as_ref().prompt(prompt).await
}
fn name(&self) -> &'static str {
self.as_ref().name()
}
}
#[async_trait]
impl SimplePrompt for Arc<dyn SimplePrompt> {
async fn prompt(&self, prompt: Prompt) -> Result<String> {
self.as_ref().prompt(prompt).await
}
fn name(&self) -> &'static str {
self.as_ref().name()
}
}
#[async_trait]
impl SimplePrompt for &dyn SimplePrompt {
async fn prompt(&self, prompt: Prompt) -> Result<String> {
(*self).prompt(prompt).await
}
}
#[async_trait]
pub trait Persist: Debug + Send + Sync + DynClone {
async fn setup(&self) -> Result<()>;
async fn store(&self, node: Node) -> Result<Node>;
async fn batch_store(&self, nodes: Vec<Node>) -> IndexingStream;
fn batch_size(&self) -> Option<usize> {
None
}
fn name(&self) -> &'static str {
let name = std::any::type_name::<Self>();
name.split("::").last().unwrap_or(name)
}
}
dyn_clone::clone_trait_object!(Persist);
#[cfg(feature = "test-utils")]
mock! {
#[derive(Debug)]
pub Persist {}
#[async_trait]
impl Persist for Persist {
async fn setup(&self) -> Result<()>;
async fn store(&self, node: Node) -> Result<Node>;
async fn batch_store(&self, nodes: Vec<Node>) -> IndexingStream;
fn batch_size(&self) -> Option<usize>;
fn name(&self) -> &'static str;
}
impl Clone for Persist {
fn clone(&self) -> Self;
}
}
#[async_trait]
impl Persist for Box<dyn Persist> {
async fn setup(&self) -> Result<()> {
self.as_ref().setup().await
}
async fn store(&self, node: Node) -> Result<Node> {
self.as_ref().store(node).await
}
async fn batch_store(&self, nodes: Vec<Node>) -> IndexingStream {
self.as_ref().batch_store(nodes).await
}
fn batch_size(&self) -> Option<usize> {
self.as_ref().batch_size()
}
fn name(&self) -> &'static str {
self.as_ref().name()
}
}
#[async_trait]
impl Persist for Arc<dyn Persist> {
async fn setup(&self) -> Result<()> {
self.as_ref().setup().await
}
async fn store(&self, node: Node) -> Result<Node> {
self.as_ref().store(node).await
}
async fn batch_store(&self, nodes: Vec<Node>) -> IndexingStream {
self.as_ref().batch_store(nodes).await
}
fn batch_size(&self) -> Option<usize> {
self.as_ref().batch_size()
}
fn name(&self) -> &'static str {
self.as_ref().name()
}
}
#[async_trait]
impl Persist for &dyn Persist {
async fn setup(&self) -> Result<()> {
(*self).setup().await
}
async fn store(&self, node: Node) -> Result<Node> {
(*self).store(node).await
}
async fn batch_store(&self, nodes: Vec<Node>) -> IndexingStream {
(*self).batch_store(nodes).await
}
fn batch_size(&self) -> Option<usize> {
(*self).batch_size()
}
}
pub trait WithIndexingDefaults {
fn with_indexing_defaults(&mut self, _indexing_defaults: IndexingDefaults) {}
}
pub trait WithBatchIndexingDefaults {
fn with_indexing_defaults(&mut self, _indexing_defaults: IndexingDefaults) {}
}
impl WithIndexingDefaults for dyn Transformer {}
impl WithIndexingDefaults for Box<dyn Transformer> {
fn with_indexing_defaults(&mut self, indexing_defaults: IndexingDefaults) {
self.as_mut().with_indexing_defaults(indexing_defaults);
}
}
impl WithBatchIndexingDefaults for dyn BatchableTransformer {}
impl WithBatchIndexingDefaults for Box<dyn BatchableTransformer> {
fn with_indexing_defaults(&mut self, indexing_defaults: IndexingDefaults) {
self.as_mut().with_indexing_defaults(indexing_defaults);
}
}
impl<F> WithIndexingDefaults for F where F: Fn(Node) -> Result<Node> {}
impl<F> WithBatchIndexingDefaults for F where F: Fn(Vec<Node>) -> IndexingStream {}
#[cfg(feature = "test-utils")]
impl WithIndexingDefaults for MockTransformer {}
#[cfg(feature = "test-utils")]
impl WithBatchIndexingDefaults for MockBatchableTransformer {}