axiomsync 1.0.0

Core data-processing engine for AxiomSync local retrieval runtime.
Documentation
use chrono::Utc;

use crate::embedding::{embed_text, tokenize_set};
use crate::models::SearchFilter;
use crate::uri::AxiomUri;

use super::exact::{ExactQueryKeys, exact_match_score};
use super::rank::{
    LexicalCorpusView, LexicalDocView, cosine, exact_confidence_bonus, lexical_score, path_score,
    recency_score, score_ordering, uri_path_prefix_match,
};
use super::{InMemoryIndex, ScoredRecord};

impl InMemoryIndex {
    pub fn search(
        &self,
        query: &str,
        target_uri: Option<&AxiomUri>,
        limit: usize,
        score_threshold: Option<f32>,
        filter: Option<&SearchFilter>,
    ) -> Vec<ScoredRecord> {
        let exact_query = ExactQueryKeys::from_query(query);
        let q_embed = embed_text(query);
        let q_tokens = tokenize_set(query);
        let q_token_list = crate::embedding::tokenize_vec(query);
        let query_lower = query.to_lowercase();
        let target_uri_text = target_uri.map(AxiomUri::to_string_uri);
        let target_scope_root =
            target_uri.map(|target| format!("axiom://{}", target.scope().as_str()));
        let avg_doc_length = if self.records.is_empty() {
            1.0
        } else {
            (super::usize_to_f32(self.total_doc_length) / super::usize_to_f32(self.records.len()))
                .max(1.0)
        };
        let filter_projection = self.filter_projection_uris(filter);
        let now = Utc::now();

        let mut scored = Vec::new();
        for (arc_uri, record) in self.records.iter() {
            if let Some(target) = target_uri_text.as_deref()
                && !uri_path_prefix_match(&record.uri, target)
            {
                continue;
            }
            if let Some(allowed_uris) = filter_projection.as_ref()
                && !allowed_uris.contains(record.uri.as_str())
            {
                continue;
            }

            let uri = record.uri.as_str();
            let dense = cosine(&q_embed, self.vectors.get(uri).map_or(&[], Vec::as_slice));
            let sparse = lexical_score(
                &q_token_list,
                &q_tokens,
                &query_lower,
                LexicalDocView {
                    term_freq: self.term_freqs.get(uri),
                    token_set: self.token_sets.get(uri),
                    text_lower: self.raw_text_lower.get(uri).map(String::as_str),
                    doc_len: self.doc_lengths.get(uri).copied().unwrap_or(0),
                },
                LexicalCorpusView {
                    doc_freqs: &self.doc_freqs,
                    total_docs: self.records.len(),
                    avg_doc_len: avg_doc_length,
                },
            );
            let recency = recency_score(now, record.updated_at);
            let path = path_score(
                uri,
                target_uri_text.as_deref(),
                target_scope_root.as_deref(),
            );
            let exact = exact_match_score(&exact_query, self.exact_keys.get(uri), record);
            let exact_component = super::W_EXACT.mul_add(
                exact,
                super::W_EXACT_HIGH_CONF_BOOST * exact * exact * exact,
            );
            let exact_bonus = exact_confidence_bonus(exact);

            let score = exact_bonus
                + super::W_PATH.mul_add(
                    path,
                    super::W_RECENCY.mul_add(
                        recency,
                        super::W_SPARSE
                            .mul_add(sparse, super::W_DENSE.mul_add(dense, exact_component)),
                    ),
                );
            if let Some(threshold) = score_threshold
                && score < threshold
            {
                continue;
            }

            scored.push(ScoredRecord {
                uri: arc_uri.clone(),
                is_leaf: record.is_leaf,
                depth: record.depth,
                exact,
                dense,
                sparse,
                recency,
                path,
                score,
            });
        }

        scored.sort_by(score_ordering);
        scored.truncate(limit);
        scored
    }

    pub fn search_directories(
        &self,
        query: &str,
        target_uri: Option<&AxiomUri>,
        limit: usize,
        filter: Option<&SearchFilter>,
    ) -> Vec<ScoredRecord> {
        let mut out = self
            .search(
                query,
                target_uri,
                limit.saturating_mul(4).max(20),
                None,
                filter,
            )
            .into_iter()
            .filter(|score| !score.is_leaf)
            .collect::<Vec<_>>();
        out.sort_by(score_ordering);
        out.truncate(limit);
        out
    }
}