memory-indexer 0.3.1

An in-memory full-text fuzzy search indexer.
Documentation
use super::types::{
    DocData, DocId, DomainIndex, InMemoryIndex, IndexState, Posting, SNAPSHOT_VERSION,
    SnapshotData, TermDomain, TermId,
};

impl InMemoryIndex {
    pub(super) fn index_state_mut(&mut self, index_name: &str) -> &mut IndexState {
        self.indexes
            .entry(index_name.to_string())
            .or_insert_with(|| IndexState {
                version: SNAPSHOT_VERSION,
                ..Default::default()
            })
    }
}

pub struct Index<'a> {
    pub(super) state: &'a mut IndexState,
}

impl<'a> Index<'a> {
    fn ensure_domain(&mut self, domain: TermDomain) -> &mut DomainIndex {
        self.state.domains.entry(domain).or_default()
    }

    pub(super) fn clear(&mut self) {
        *self.state = IndexState::default();
    }

    pub(super) fn import_snapshot(&mut self, snapshot: SnapshotData) {
        self.state.version = snapshot.version;
        self.state.terms = snapshot.terms;
        self.state.docs = snapshot.docs;
        self.state.doc_ids = snapshot.doc_ids;
        self.state.domains = snapshot.domains;
        self.state.total_len = snapshot.total_len;
        self.state.domain_total_len = snapshot.domain_total_len;

        self.rebuild_indexes();
    }

    fn rebuild_indexes(&mut self) {
        self.state.term_index.clear();
        self.state.term_index.reserve(self.state.terms.len());
        for (idx, term) in self.state.terms.iter().enumerate() {
            self.state.term_index.insert(term.clone(), idx as TermId);
        }

        self.state.doc_index.clear();
        self.state.free_docs.clear();
        for (idx, doc_id) in self.state.doc_ids.iter().enumerate() {
            match self.state.docs.get(idx).and_then(|doc| doc.as_ref()) {
                Some(_) => {
                    self.state.doc_index.insert(doc_id.clone(), idx as DocId);
                }
                None => self.state.free_docs.push(idx as DocId),
            }
        }

        self.state.dirty.clear();
        self.state.deleted.clear();
    }

    pub(super) fn add_posting(&mut self, term: TermId, domain: TermDomain, doc: DocId, freq: u32) {
        if freq == 0 {
            return;
        }
        let domain_index = self.ensure_domain(domain);
        let postings = domain_index.postings.entry(term).or_default();
        if let Some(existing) = postings.iter_mut().find(|p| p.doc == doc) {
            existing.freq += freq;
        } else {
            postings.push(Posting { doc, freq });
        }
        if let Ok(mut aux) = domain_index.aux.write() {
            aux.clear();
        }
    }

    pub(super) fn remove_doc_terms(&mut self, doc_id: DocId, doc_data: &DocData) {
        let mut remove_terms: Vec<(TermId, TermDomain)> = Vec::new();

        let mut process_term = |term: TermId, domain: TermDomain, maps: &mut Index<'_>| {
            if let Some(domain_index) = maps.state.domains.get_mut(&domain)
                && let Some(postings) = domain_index.postings.get_mut(&term)
            {
                let before = postings.len();
                postings.retain(|p| p.doc != doc_id);
                if postings.len() != before
                    && let Ok(mut aux) = domain_index.aux.write()
                {
                    aux.clear();
                }
                if postings.is_empty() {
                    remove_terms.push((term, domain));
                }
            }
        };

        for entry in &doc_data.term_freqs {
            for (domain, _) in entry.positive_domains() {
                process_term(entry.term, domain, self);
            }
        }

        for (term, domain) in remove_terms {
            if let Some(domain_index) = self.state.domains.get_mut(&domain) {
                let should_remove = domain_index
                    .postings
                    .get(&term)
                    .map(|docs| docs.is_empty())
                    .unwrap_or(true);
                if should_remove {
                    domain_index.postings.remove(&term);
                    if let Ok(mut aux) = domain_index.aux.write() {
                        aux.clear();
                    }
                }
            }
        }
    }
}

impl From<Index<'_>> for Option<SnapshotData> {
    fn from(maps: Index<'_>) -> Self {
        if maps.state.docs.iter().all(|d| d.is_none()) {
            return None;
        }

        Some(SnapshotData {
            version: maps.state.version,
            terms: maps.state.terms.clone(),
            docs: maps.state.docs.clone(),
            doc_ids: maps.state.doc_ids.clone(),
            domains: maps.state.domains.clone(),
            total_len: maps.state.total_len,
            domain_total_len: maps.state.domain_total_len,
        })
    }
}