milli-core 1.15.1

Meilisearch HTTP server
Documentation
use std::io::Write;

use big_s::S;
use bumpalo::Bump;
use heed::EnvOpenOptions;
use maplit::{btreemap, hashset};

use crate::progress::Progress;
use crate::update::new::indexer;
use crate::update::{IndexerConfig, Settings};
use crate::vector::EmbeddingConfigs;
use crate::{db_snap, Criterion, FilterableAttributesRule, Index};
pub const CONTENT: &str = include_str!("../../../../tests/assets/test_set.ndjson");
use crate::constants::RESERVED_GEO_FIELD_NAME;

pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
    let path = tempfile::tempdir().unwrap();
    let options = EnvOpenOptions::new();
    let mut options = options.read_txn_without_tls();
    options.map_size(10 * 1024 * 1024); // 10 MB
    let index = Index::new(options, &path, true).unwrap();

    let mut wtxn = index.write_txn().unwrap();
    let config = IndexerConfig::default();

    let mut builder = Settings::new(&mut wtxn, &index, &config);

    builder.set_criteria(criteria.to_vec());
    builder.set_filterable_fields(vec![
        FilterableAttributesRule::Field(S("tag")),
        FilterableAttributesRule::Field(S("asc_desc_rank")),
        FilterableAttributesRule::Field(S(RESERVED_GEO_FIELD_NAME)),
        FilterableAttributesRule::Field(S("opt1")),
        FilterableAttributesRule::Field(S("opt1.opt2")),
        FilterableAttributesRule::Field(S("tag_in")),
    ]);
    builder.set_sortable_fields(hashset! {
        S("tag"),
        S("asc_desc_rank"),
    });
    builder.set_synonyms(btreemap! {
        S("hello") => vec![S("good morning")],
        S("world") => vec![S("earth")],
        S("america") => vec![S("the united states")],
    });
    builder.set_searchable_fields(vec![S("title"), S("description")]);
    builder.execute(|_| (), || false).unwrap();
    wtxn.commit().unwrap();

    // index documents
    let config = IndexerConfig { max_memory: Some(10 * 1024 * 1024), ..Default::default() };
    let rtxn = index.read_txn().unwrap();
    let mut wtxn = index.write_txn().unwrap();

    let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
    let mut new_fields_ids_map = db_fields_ids_map.clone();

    let embedders = EmbeddingConfigs::default();
    let mut indexer = indexer::DocumentOperation::new();

    let mut file = tempfile::tempfile().unwrap();
    file.write_all(CONTENT.as_bytes()).unwrap();
    file.sync_all().unwrap();
    let payload = unsafe { memmap2::Mmap::map(&file).unwrap() };

    // index documents
    indexer.replace_documents(&payload).unwrap();

    let indexer_alloc = Bump::new();
    let (document_changes, operation_stats, primary_key) = indexer
        .into_changes(
            &indexer_alloc,
            &index,
            &rtxn,
            None,
            &mut new_fields_ids_map,
            &|| false,
            Progress::default(),
        )
        .unwrap();

    if let Some(error) = operation_stats.into_iter().find_map(|stat| stat.error) {
        panic!("{error}");
    }

    indexer::index(
        &mut wtxn,
        &index,
        &crate::ThreadPoolNoAbortBuilder::new().build().unwrap(),
        config.grenad_parameters(),
        &db_fields_ids_map,
        new_fields_ids_map,
        primary_key,
        &document_changes,
        embedders,
        &|| false,
        &Progress::default(),
    )
    .unwrap();

    wtxn.commit().unwrap();
    drop(rtxn);

    index
}

#[test]
fn snapshot_integration_dataset() {
    let index = setup_search_index_with_criteria(&[Criterion::Attribute]);
    db_snap!(index, word_position_docids, @"3c9347a767bceef3beb31465f1e5f3ae");
}