vectus 0.1.2

A vector database implemented in Rust for learning purposes.
Documentation
#![allow(unused)]
use ndarray::{Array1, Array2};
use std::env;

pub mod document;
pub(crate) mod hnsw;
pub mod model;
pub use hnsw::metric;

use document::{DocBuilder, Document};
use hnsw::{metric::Metric, HNSWInitializer, HNSW};
use model::{Model, ModelType};

#[derive(Debug)]
pub enum StorageType {
    InMemory,
    Persistent,
}

pub struct Vectus {
    pub model: Model,
    pub embeddings: Array2<f64>,
    pub documents: Vec<Document>,
    storage_type: StorageType,
    hnsw: HNSW,
}

impl Vectus {
    pub fn new(model_name: ModelType, storage_type: StorageType, metric: Metric) -> Vectus {
        let model = Model::new(
            ModelType::OpenAI,
            env::var("OPENAI_API_KEY").expect("Please set the OPENAI_API_KEY environment variable"),
        );

        let initializer = HNSWInitializer {
            max_level: 12,
            ef_construction: 350,
            m: 32,
            m_max: 64,
            norm: 3.0,
            entry: None,
            metric,
        };

        Vectus {
            model,
            embeddings: Array2::zeros((0, 0)),
            documents: Vec::new(),
            storage_type,
            hnsw: HNSW::new(initializer),
        }
    }

    pub async fn get_k_relevant_documents(&self, query: &String, k: usize) -> Vec<Document> {
        let query_embedding = match self.model.get_embedding(query).await {
            Ok(embedding) => embedding,
            Err(e) => panic!("Error getting embedding: {}", e),
        };

        let query_emb = Array1::from_vec(query_embedding.clone());
        let mut result = self.hnsw.search(query_emb.clone(), self.hnsw.len(), k);

        let mut relevant_docs: Vec<Document> = Vec::new();
        for i in 0..k {
            relevant_docs.push(self.documents[result[i]].clone());
        }

        relevant_docs
    }

    pub async fn add_documents(&mut self, docs: &Vec<Document>) -> Result<(), String> {
        let mut embeddings: Vec<Vec<f64>> = Vec::new();

        if docs.len() == 0 {
            return Err("No documents to add!".to_string());
        }

        for doc in docs {
            let embedding: Vec<f64> = match self.model.get_embedding(&doc.page_content).await {
                Ok(embedding) => embedding,
                Err(e) => panic!("Error getting embedding: {}", e),
            };

            let nembd = Array1::from_vec(embedding.clone());
            self.store_emb_db(&nembd);
            self.documents.push(doc.clone());
            embeddings.push(embedding);
        }

        self.embeddings =
            Array2::from_shape_vec((docs.len(), embeddings[0].len()), embeddings.concat()).unwrap();

        Ok(())
    }

    fn store_emb_db(&mut self, embedding: &Array1<f64>) {
        match self.storage_type {
            StorageType::InMemory => {
                self.hnsw.insert(embedding, self.hnsw.len());
            }
            StorageType::Persistent => {
                panic!("{:?} Not implemented yet!", self.storage_type);
            }
        }
    }
}