seekstorm 3.2.1

use std::{
    collections::HashMap,
    env::current_exe,
    fs::{self, File},
    path::{Path, PathBuf},
    time::Instant,
};

use itertools::Itertools;
use serde_json::Value;
use std::collections::HashSet;
use utoipa::OpenApi;

use seekstorm::{
    commit::Commit,
    highlighter::highlighter,
    index::{
        AccessType, ApikeyObject, ApikeyQuotaObject, Close, Clustering, CreateIndexRequest,
        DeleteDocument, DeleteDocuments, DeleteDocumentsByQuery, Document, DocumentCompression,
        FileType, FrequentwordType, GetDocumentRequest, GetIteratorRequest, IS_AVX2, IS_NEON,
        IS_SYSTEM_LE, IndexArc, IndexDocument, IndexDocuments, IndexMetaObject,
        IndexResponseObject, LexicalSimilarity, QueryCompletion, SchemaField, SearchRequestObject,
        SearchResultObject, SpellingCorrection, StemmerType, StopwordType, Synonym, TokenizerType,
        UpdateDocument, UpdateDocuments, create_index, open_index,
    },
    ingest::IndexPdfBytes,
    iterator::{GetIterator, IteratorResult},
    search::{Search, SearchMode},
    utils::decode_bytes_from_base64_string,
    vector::Inference,
};

use crate::{VERSION, http_server::calculate_hash};

const APIKEY_PATH: &str = "apikey.json";

/// Save file atomically
pub(crate) fn save_file_atomically(path: &PathBuf, content: String) {
    let mut temp_path = path.clone();
    temp_path.set_extension("bak");
    fs::write(&temp_path, content).unwrap();
    match fs::rename(temp_path, path) {
        Ok(_) => {}
        Err(e) => println!("error: {e:?}"),
    }
}

pub(crate) fn save_apikey_data(apikey: &ApikeyObject, index_path: &PathBuf) {
    let apikey_id: u64 = apikey.id;

    let apikey_id_path = Path::new(&index_path).join(apikey_id.to_string());
    let apikey_persistence_json = serde_json::to_string(&apikey).unwrap();
    let apikey_persistence_path = Path::new(&apikey_id_path).join(APIKEY_PATH);
    save_file_atomically(&apikey_persistence_path, apikey_persistence_json);
}

/// Live
///
/// Returns a live message with the SeekStorm server version.
#[utoipa::path(
    tag = "Info",
    get,
    path = "/api/v1/live",
    responses(
        (status = 200, description = "SeekStorm server is live", body = String),
    )
)]
pub(crate) fn live_api() -> String {
    let simd = if *IS_AVX2 {
        " (AVX2 enabled)"
    } else if *IS_NEON {
        " (NEON enabled)"
    } else {
        " (SIMD disabled)"
    };
    "SeekStorm server ".to_owned() + VERSION + simd
}

/// Create API Key
///
/// Creates an API key and returns the Base64 encoded API key.  
/// Expects the Base64 encoded master API key in the header.  
/// Use the **master API key displayed** in the server console at startup.
///  
/// WARNING: make sure to set the MASTER_KEY_SECRET environment variable to a secret, otherwise your generated API keys will be compromised.  
/// For development purposes you may also use the SeekStorm server console command 'create' to create an demo API key 'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA='.
#[utoipa::path(
    tag = "API Key",
    post,
    path = "/api/v1/apikey",
    params(
        ("apikey" = String, Header, description = "YOUR_MASTER_API_KEY",example="BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB="),
    ),
    request_body = inline(ApikeyQuotaObject),
    responses(
        (status = 200, description = "API key created, returns Base64 encoded API key", body = String),
        (status = UNAUTHORIZED, description = "master_apikey invalid"),
        (status = UNAUTHORIZED, description = "master_apikey missing")
    )
)]
pub(crate) fn create_apikey_api<'a>(
    index_path: &'a PathBuf,
    apikey_quota_request_object: ApikeyQuotaObject,
    apikey: &[u8],
    apikey_list: &'a mut HashMap<u128, ApikeyObject>,
) -> &'a mut ApikeyObject {
    let apikey_hash_u128 = calculate_hash(&apikey) as u128;

    let mut apikey_id: u64 = 0;
    let mut apikey_list_vec: Vec<(&u128, &ApikeyObject)> = apikey_list.iter().collect();
    apikey_list_vec.sort_by_key(|a| a.1.id);
    for value in apikey_list_vec {
        if value.1.id == apikey_id {
            apikey_id = value.1.id + 1;
        } else {
            break;
        }
    }

    let apikey_object = ApikeyObject {
        id: apikey_id,
        apikey_hash: apikey_hash_u128,
        quota: apikey_quota_request_object,
        index_list: HashMap::new(),
    };

    let apikey_id_path = Path::new(&index_path).join(apikey_id.to_string());
    fs::create_dir_all(apikey_id_path).unwrap();

    save_apikey_data(&apikey_object, index_path);

    apikey_list.insert(apikey_hash_u128, apikey_object);
    apikey_list.get_mut(&apikey_hash_u128).unwrap()
}

/// Delete API Key
///
/// Deletes an API and returns the number of remaining API keys.
/// Expects the Base64 encoded master API key in the header.
/// WARNING: This will delete all indices and documents associated with the API key.
#[utoipa::path(
    delete,
    tag = "API Key",
    path = "/api/v1/apikey",
    params(
        ("apikey" = String, Header, description = "YOUR_MASTER_API_KEY",example="BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB="),
    ),
    responses(
        (status = 200, description = "API key deleted, returns number of remaining API keys", body = u64),
        (status = UNAUTHORIZED, description = "master_apikey invalid"),
        (status = UNAUTHORIZED, description = "master_apikey missing")
    )
)]
pub(crate) fn delete_apikey_api(
    index_path: &PathBuf,
    apikey_list: &mut HashMap<u128, ApikeyObject>,
    apikey_hash: u128,
) -> Result<u64, String> {
    if let Some(apikey_object) = apikey_list.get(&apikey_hash) {
        let apikey_id_path = Path::new(&index_path).join(apikey_object.id.to_string());
        println!("delete path {}", apikey_id_path.to_string_lossy());
        fs::remove_dir_all(&apikey_id_path).unwrap();

        apikey_list.remove(&apikey_hash);
        Ok(apikey_list.len() as u64)
    } else {
        Err("not found".to_string())
    }
}

/// Open all indices below a single apikey
pub(crate) async fn open_all_indices(
    index_path: &PathBuf,
    index_list: &mut HashMap<u64, IndexArc>,
) {
    if !Path::exists(index_path) {
        fs::create_dir_all(index_path).unwrap();
    }

    for result in fs::read_dir(index_path).unwrap() {
        let path = result.unwrap();
        if path.path().is_dir() {
            let single_index_path = path.path();

            let index_arc = match open_index(&single_index_path).await {
                Ok(index_arc) => index_arc,
                Err(err) => {
                    println!("{} {}", err, single_index_path.display());
                    continue;
                }
            };

            let index_id = index_arc.read().await.meta.id;

            index_list.insert(index_id, index_arc);
        }
    }
}

/// Open api key
pub(crate) async fn open_apikey(
    index_path: &PathBuf,
    apikey_list: &mut HashMap<u128, ApikeyObject>,
) -> bool {
    let apikey_path = Path::new(&index_path).join(APIKEY_PATH);
    match fs::read_to_string(apikey_path) {
        Ok(apikey_string) => {
            let mut apikey_object: ApikeyObject = serde_json::from_str(&apikey_string).unwrap();

            open_all_indices(index_path, &mut apikey_object.index_list).await;
            apikey_list.insert(apikey_object.apikey_hash, apikey_object);

            true
        }
        Err(_) => false,
    }
}

/// Open all apikeys in the specified path
pub(crate) async fn open_all_apikeys(
    index_path: &PathBuf,
    apikey_list: &mut HashMap<u128, ApikeyObject>,
) -> bool {
    let mut test_index_flag = false;
    if !Path::exists(index_path) {
        println!("index path not found: {} ", index_path.to_string_lossy());
        fs::create_dir_all(index_path).unwrap();
    }

    for result in fs::read_dir(index_path).unwrap() {
        let path = result.unwrap();
        if path.path().is_dir() {
            let single_index_path = path.path();
            test_index_flag |= open_apikey(&single_index_path, apikey_list).await;
        }
    }
    test_index_flag
}

/// Create Index
///
/// Create an index within the directory associated with the specified API key and return the index_id.
#[utoipa::path(
    post,
    tag = "Index",
    path = "/api/v1/index",
    params(
        ("apikey" = String, Header, description = "YOUR_SECRET_API_KEY",example="AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA="),
    ),


    request_body(
        content = CreateIndexRequest,
        examples(
            ("Example: Lexical index" = (value = json!({
				"schema":[{
					"field": "title", 
					"field_type": "Text", 
					"store": true, 
					"index_lexical": true,
					"boost":10.0
				},
					{
					"field": "body",
					"field_type": "Text", 
					"store": true, 
					"index_lexical": true,
					"longest": true
				},
				{
					"field": "url", 
					"field_type": "String32", 
					"store": true, 
					"index_lexical": false
				}],
				"index_name": "wikipedia",
				"similarity": "Bm25fProximity",
				"tokenizer": "UnicodeAlphanumeric"
			}))),
			("Example: Hybrid index" = (value = json!({
				"schema":[{
					"field": "title", 
					"field_type": "Text", 
					"store": true, 
					"index_lexical": true,
					"index_vector": true
				},
				{
					"field": "body",
					"field_type": "Text", 
					"store": true, 
					"index_lexical": true,
					"index_vector": true
				},
					{
					"field": "url", 
					"field_type": "String32", 
					"store": true, 
					"index_lexical": false,
					"index_vector": false
				}],
				"index_name": "wikipedia",
				"similarity": "Bm25fProximity",
				"tokenizer": "UnicodeAlphanumeric",
				"clustering": "Auto",
				"inference": {"Model2Vec": { "model": "PotionBase2M", "chunk_size": 1000, "quantization": "ScalarQuantizationI8" }}
			}))),
            ("Example: Vector index" = (value = json!({
				"schema":[
				{
					"field":"vector",
					"field_type":"Json",
					"store":false,
					"index_lexical":false,
					"index_vector":true
				},
				{
					"field":"index",
					"field_type":"Text",
					"store":true,
					"index_lexical":false,
					"index_vector":false
				}],
				"index_name": "sift1m",
				"clustering": "Auto",
				"inference": {
					"External": { "dimensions": 128, "precision": "F32", "quantization": "ScalarQuantizationI8", "similarity": "Euclidean" }
				}
			})))
        )
    ),

    responses(
        (status = OK, description = "Index created, returns the index_id", body = u64),
        (status = BAD_REQUEST, description = "Request object incorrect"),
        (status = NOT_FOUND, description = "API key does not exists"),
        (status = UNAUTHORIZED, description = "API key is missing"),
        (status = UNAUTHORIZED, description = "API key does not exists")
    )
)]
#[allow(clippy::too_many_arguments)]
pub(crate) async fn create_index_api<'a>(
    index_path: &'a PathBuf,
    index_name: String,
    schema: Vec<SchemaField>,
    lexical_similarity: LexicalSimilarity,
    tokenizer: TokenizerType,
    stemmer: StemmerType,
    stop_words: StopwordType,
    frequent_words: FrequentwordType,
    ngram_indexing: u8,
    document_compression: DocumentCompression,
    synonyms: Vec<Synonym>,
    force_shard_number: Option<usize>,
    apikey_object: &'a mut ApikeyObject,
    spelling_correction: Option<SpellingCorrection>,
    query_completion: Option<QueryCompletion>,
    mute: bool,
    clustering: Clustering,
    inference: Inference,
) -> u64 {
    let mut index_id: u64 = 0;
    for id in apikey_object.index_list.keys().sorted() {
        if *id == index_id {
            index_id = id + 1;
        } else {
            break;
        }
    }

    let index_id_path = Path::new(&index_path)
        .join(apikey_object.id.to_string())
        .join(index_id.to_string());
    fs::create_dir_all(&index_id_path).unwrap();

    let meta = IndexMetaObject {
        id: index_id,
        name: index_name,
        lexical_similarity,
        tokenizer,
        stemmer,
        stop_words,
        frequent_words,
        ngram_indexing,
        document_compression,
        access_type: AccessType::Mmap,
        spelling_correction,
        query_completion,
        clustering,
        inference,
    };

    let index_arc = create_index(
        &index_id_path,
        meta,
        &schema,
        &synonyms,
        11,
        mute,
        force_shard_number,
    )
    .await
    .unwrap();

    apikey_object.index_list.insert(index_id, index_arc);

    index_id
}

/// Delete Index
///
/// Delete an index within the directory associated with the specified API key and return the number of remaining indices.
#[utoipa::path(
    delete,
    tag = "Index",
    path = "/api/v1/index/{index_id}",
    params(
        ("apikey" = String, Header, description = "YOUR_SECRET_API_KEY",example="AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA="),
        ("index_id" = u64, Path, description = "index id"),
    ),
    responses(
        (status = 200, description = "Index deleted, returns the number of indices", body = u64),
        (status = BAD_REQUEST, description = "index_id invalid or missing"),
        (status = NOT_FOUND, description = "Index_id does not exists"),
        (status = NOT_FOUND, description = "api_key does not exists"),
        (status = UNAUTHORIZED, description = "api_key does not exists"),
        (status = UNAUTHORIZED, description = "api_key missing")
    )
)]
pub(crate) async fn delete_index_api(
    index_id: u64,
    index_list: &mut HashMap<u64, IndexArc>,
) -> Result<u64, String> {
    if let Some(index_arc) = index_list.get(&index_id) {
        let mut index_mut = index_arc.write().await;
        index_mut.delete_index();
        drop(index_mut);
        index_list.remove(&index_id);

        Ok(index_list.len() as u64)
    } else {
        Err("index_id not found".to_string())
    }
}

/// Commit Index
///
/// Commit moves indexed documents from the intermediate uncompressed data structure (array lists/HashMap, queryable by realtime search) in RAM
/// to the final compressed data structure (roaring bitmap) on Mmap or disk -
/// which is persistent, more compact, with lower query latency and allows search with realtime=false.
/// Commit is invoked automatically each time 64K documents are newly indexed **per shard** as well as on close_index (e.g. server quit).
/// There is no way to prevent this automatic commit by not manually invoking it.
/// But commit can also be invoked manually at any time at any number of newly indexed documents.
/// commit is a **hard commit** for persistence on disk. A **soft commit** for searchability
/// is invoked implicitly with every index_doc,
/// i.e. the document can immediately searched and included in the search results
/// if it matches the query AND the query parameter realtime=true is enabled.
/// **Use commit with caution, as it is an expensive operation**.
/// **Usually, there is no need to invoke it manually**, as it is invoked automatically every 64k documents **per shard** and when the index is closed with close_index.
/// Before terminating the program, always call close_index (commit), otherwise all documents indexed since last (manual or automatic) commit are lost.
/// There are only 2 reasons that justify a manual commit:
/// 1. if you want to search newly indexed documents without using realtime=true for search performance reasons or
/// 2. if after indexing new documents there won't be more documents indexed (for some time),
///    so there won't be (soon) a commit invoked automatically at the next 64k threshold **per shard** or close_index,
///    but you still need immediate persistence guarantees on disk to protect against data loss in the event of a crash.
#[utoipa::path(
    patch,
    tag = "Index",
    path = "/api/v1/index/{index_id}",
    params(
        ("apikey" = String, Header, description = "YOUR_SECRET_API_KEY",example="AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA="),
        ("index_id" = u64, Path, description = "index id"),
    ),
    responses(
        (status = 200, description = "Index committed, returns the number of committed documents", body = u64),
        (status = BAD_REQUEST, description = "Index id invalid or missing"),
        (status = NOT_FOUND, description = "Index id does not exist"),
        (status = NOT_FOUND, description = "API key does not exist"),
        (status = UNAUTHORIZED, description = "api_key does not exists"),
        (status = UNAUTHORIZED, description = "api_key missing")
    )
)]
pub(crate) async fn commit_index_api(index_arc: &IndexArc) -> Result<u64, String> {
    let index_arc_clone = index_arc.clone();
    let index_ref = index_arc.read().await;
    let indexed_doc_count = index_ref.indexed_doc_count().await;

    drop(index_ref);
    index_arc_clone.commit().await;

    Ok(indexed_doc_count as u64)
}

pub(crate) async fn close_index_api(index_arc: &IndexArc) -> Result<u64, String> {
    let indexed_doc_count = index_arc.read().await.indexed_doc_count().await;
    index_arc.close().await;

    Ok(indexed_doc_count as u64)
}

pub(crate) async fn set_synonyms_api(
    index_arc: &IndexArc,
    synonyms: Vec<Synonym>,
) -> Result<usize, String> {
    let mut index_mut = index_arc.write().await;
    index_mut.set_synonyms(&synonyms)
}

pub(crate) async fn add_synonyms_api(
    index_arc: &IndexArc,
    synonyms: Vec<Synonym>,
) -> Result<usize, String> {
    let mut index_mut = index_arc.write().await;
    index_mut.add_synonyms(&synonyms)
}

pub(crate) async fn get_synonyms_api(index_arc: &IndexArc) -> Result<Vec<Synonym>, String> {
    let index_ref = index_arc.read().await;
    index_ref.get_synonyms()
}

/// Get Index Info
///
/// Get index Info from index with index_id
#[utoipa::path(
    get,
    tag = "Index",
    path = "/api/v1/index/{index_id}",
    params(
        ("apikey" = String, Header, description = "YOUR_SECRET_API_KEY",example="AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA="),
        ("index_id" = u64, Path, description = "index id"),
    ),
    responses(
        (
            status = 200, description = "Index found, returns the index info", 
            body = IndexResponseObject,
        ),
        (status = BAD_REQUEST, description = "Request object incorrect"),
        (status = NOT_FOUND, description = "Index id does not exist"),
        (status = NOT_FOUND, description = "API key does not exist"),
        (status = UNAUTHORIZED, description = "api_key does not exists"),
        (status = UNAUTHORIZED, description = "api_key missing"),
    )
)]
pub(crate) async fn get_index_info_api(
    index_id: u64,
    index_list: &HashMap<u64, IndexArc>,
) -> Result<IndexResponseObject, String> {
    if let Some(index_arc) = index_list.get(&index_id) {
        let index_ref = index_arc.read().await;

        Ok(IndexResponseObject {
            version: VERSION.to_string(),
            schema: index_ref.schema_map.clone(),
            id: index_ref.meta.id,
            name: index_ref.meta.name.clone(),
            indexed_doc_count: index_ref.indexed_doc_count().await,
            committed_doc_count: index_ref.committed_doc_count().await,
            operations_count: 0,
            query_count: 0,
            facets_minmax: index_ref.index_facets_minmax().await,
        })
    } else {
        Err("index_id not found".to_string())
    }
}

/// Get API Key Info
///
/// Get info about all indices associated with the specified API key
#[utoipa::path(
    get,
    tag = "API Key",
    path = "/api/v1/apikey",
    params(
        ("apikey" = String, Header, description = "YOUR_SECRET_API_KEY",example="AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA="),
    ),
    responses(
        (
            status = 200, description = "Indices found, returns a list of index info", 
            body = Vec<IndexResponseObject>,
        ),
        (status = BAD_REQUEST, description = "Request object incorrect"),
        (status = NOT_FOUND, description = "Index ID or API key missing"),
        (status = UNAUTHORIZED, description = "API key does not exists"),
    )
)]
pub(crate) async fn get_apikey_indices_info_api(
    index_list: &HashMap<u64, IndexArc>,
) -> Result<Vec<IndexResponseObject>, String> {
    let mut index_response_object_vec: Vec<IndexResponseObject> = Vec::new();
    for index in index_list.iter() {
        let index_ref = index.1.read().await;

        index_response_object_vec.push(IndexResponseObject {
            version: VERSION.to_string(),
            schema: index_ref.schema_map.clone(),
            id: index_ref.meta.id,
            name: index_ref.meta.name.clone(),
            indexed_doc_count: index_ref.indexed_doc_count().await,
            committed_doc_count: index_ref.committed_doc_count().await,
            operations_count: 0,
            query_count: 0,
            facets_minmax: index_ref.index_facets_minmax().await,
        });
    }

    Ok(index_response_object_vec)
}

/// Index Document(s)
///
/// Index a JSON document or an array of JSON documents (bulk), each consisting of arbitrary key-value pairs to the index with the specified apikey and index_id, and return the number of indexed docs.
/// Index documents enables true real-time search (as opposed to near realtime.search):
/// When in query_index the parameter `realtime` is set to `true` then indexed, but uncommitted documents are immediately included in the search results, without requiring a commit or refresh.
/// Therefore a explicit commit_index is almost never required, as it is invoked automatically after 64k documents are indexed **per shard** or on close_index for persistence.
#[utoipa::path(
    post,
    tag = "Document",
    path = "/api/v1/index/{index_id}/doc",
    params(
        ("apikey" = String, Header, description = "YOUR_SECRET_API_KEY",example="AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA="),
        ("index_id" = u64, Path, description = "index id"),
    ),


 	request_body(
        content = HashMap<String, Value>, description = "JSON document or array of JSON documents, each consisting of key-value pairs",content_type = "application/json", 
        examples(
    ("Example: Single Lexical/Hybrid document" = (value = json!(  {
        "title": "title1 test",
        "body": "body1",
        "url": "url1"
		} ))),
	("Example: Multiple Lexical/Hybrid documents" = (value = json!(  [
{
    "title":"title2",
    "body":"body2 test",
    "url":"url2"
},
{
    "title":"title3 test",
    "body":"body3 test",
    "url":"url3"
}
]   ))),
    ("Example: Multiple Vector documents" = (value = json!(  [
    {"vector":[0.001, 0.002, 0.003, 0.004, 0.005, 0.006, 0.007, 0.008, 0.009, 0.010, 0.011, 0.012, 0.013, 0.014, 0.015, 0.016, 0.017, 0.018, 0.019, 0.020, 0.021, 0.022, 0.023, 0.024, 0.025, 0.026, 0.027, 0.028, 0.029, 0.030, 0.031, 0.032, 0.033, 0.034, 0.035, 0.036, 0.037, 0.038, 0.039, 0.040, 0.041, 0.042, 0.043, 0.044, 0.045, 0.046, 0.047, 0.048, 0.049, 0.050, 0.051, 0.052, 0.053, 0.054, 0.055, 0.056, 0.057, 0.058, 0.059, 0.060, 0.061, 0.062, 0.063, 0.064, 0.065, 0.066, 0.067, 0.068, 0.069, 0.070, 0.071, 0.072, 0.073, 0.074, 0.075, 0.076, 0.077, 0.078, 0.079, 0.080, 0.081, 0.082, 0.083, 0.084, 0.085, 0.086, 0.087, 0.088, 0.089, 0.090, 0.091, 0.092, 0.093, 0.094, 0.095, 0.096, 0.097, 0.098, 0.099, 0.100, 0.101, 0.102, 0.103, 0.104, 0.105, 0.106, 0.107, 0.108, 0.109, 0.110, 0.111, 0.112, 0.113, 0.114, 0.115, 0.116, 0.117, 0.118, 0.119, 0.120, 0.121, 0.122, 0.123, 0.124, 0.125, 0.126, 0.127, 0.128],"index":"0"},
    {"vector":[0.129, 0.130, 0.131, 0.132, 0.133, 0.134, 0.135, 0.136, 0.137, 0.138, 0.139, 0.140, 0.141, 0.142, 0.143, 0.144, 0.145, 0.146, 0.147, 0.148, 0.149, 0.150, 0.151, 0.152, 0.153, 0.154, 0.155, 0.156, 0.157, 0.158, 0.159, 0.160, 0.161, 0.162, 0.163, 0.164, 0.165, 0.166, 0.167, 0.168, 0.169, 0.170, 0.171, 0.172, 0.173, 0.174, 0.175, 0.176, 0.177, 0.178, 0.179, 0.180, 0.181, 0.182, 0.183, 0.184, 0.185, 0.186, 0.187, 0.188, 0.189, 0.190, 0.191, 0.192, 0.193, 0.194, 0.195, 0.196, 0.197, 0.198, 0.199, 0.200, 0.201, 0.202, 0.203, 0.204, 0.205, 0.206, 0.207, 0.208, 0.209, 0.210, 0.211, 0.212, 0.213, 0.214, 0.215, 0.216, 0.217, 0.218, 0.219, 0.220, 0.221, 0.222, 0.223, 0.224, 0.225, 0.226, 0.227, 0.228, 0.229, 0.230, 0.231, 0.232, 0.233, 0.234, 0.235, 0.236, 0.237, 0.238, 0.239, 0.240, 0.241, 0.242, 0.243, 0.244, 0.245, 0.246, 0.247, 0.248, 0.249, 0.250, 0.251, 0.252, 0.253, 0.254, 0.255, 0.256],"index":"1"},
    {"vector":[0.257, 0.258, 0.259, 0.260, 0.261, 0.262, 0.263, 0.264, 0.265, 0.266, 0.267, 0.268, 0.269, 0.270, 0.271, 0.272, 0.273, 0.274, 0.275, 0.276, 0.277, 0.278, 0.279, 0.280, 0.281, 0.282, 0.283, 0.284, 0.285, 0.286, 0.287, 0.288, 0.289, 0.290, 0.291, 0.292, 0.293, 0.294, 0.295, 0.296, 0.297, 0.298, 0.299, 0.300, 0.301, 0.302, 0.303, 0.304, 0.305, 0.306, 0.307, 0.308, 0.309, 0.310, 0.311, 0.312, 0.313, 0.314, 0.315, 0.316, 0.317, 0.318, 0.319, 0.320, 0.321, 0.322, 0.323, 0.324, 0.325, 0.326, 0.327, 0.328, 0.329, 0.330, 0.331, 0.332, 0.333, 0.334, 0.335, 0.336, 0.337, 0.338, 0.339, 0.340, 0.341, 0.342, 0.343, 0.344, 0.345, 0.346, 0.347, 0.348, 0.349, 0.350, 0.351, 0.352, 0.353, 0.354, 0.355, 0.356, 0.357, 0.358, 0.359, 0.360, 0.361, 0.362, 0.363, 0.364, 0.365, 0.366, 0.367, 0.368, 0.369, 0.370, 0.371, 0.372, 0.373, 0.374, 0.375, 0.376, 0.377, 0.378, 0.379, 0.380, 0.381, 0.382, 0.383, 0.384],"index":"2"}
]   )))
        )
    ),
    responses(
        (status = 200, description = "Document indexed, returns the number of indexed documents", body = usize),
        (status = BAD_REQUEST, description = "Document object invalid"),
        (status = NOT_FOUND, description = "Index id does not exist"),
        (status = NOT_FOUND, description = "API key does not exist"),
        (status = UNAUTHORIZED, description = "api_key does not exists"),
        (status = UNAUTHORIZED, description = "api_key missing")
    )
)]
pub(crate) async fn index_document_api(
    index_arc: &IndexArc,
    document: Document,
) -> Result<usize, String> {
    index_arc.index_document(document, FileType::None).await;

    Ok(index_arc.read().await.indexed_doc_count().await)
}

/// Index PDF file
///
/// Index PDF file (byte array) to the index with the specified apikey and index_id, and return the number of indexed docs.
/// - Converts PDF to a JSON document with "title", "body", "url" and "date" fields and indexes it.
/// - extracts title from metatag, or first line of text, or from filename
/// - extracts creation date from metatag, or from file creation date (Unix timestamp: the number of seconds since 1 January 1970)
/// - copies all ingested pdf files to "files" subdirectory in index
#[utoipa::path(
    post,
    tag = "PDF File",
    path = "/api/v1/index/{index_id}/file",
    params(
        ("apikey" = String, Header, description = "YOUR_SECRET_API_KEY",example="AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA="),
        ("file" = String, Header, description = "filepath from header for JSON 'url' field"),
        ("date" = String, Header, description = "date (timestamp) from header, as fallback for JSON 'date' field, if PDF date meta tag unaivailable"),
        ("index_id" = u64, Path, description = "index id"),
    ),
    request_body = inline(&[u8]),
    responses(
        (status = 200, description = "PDF file indexed, returns the number of indexed documents", body = usize),
        (status = BAD_REQUEST, description = "Document object invalid"),
        (status = NOT_FOUND, description = "Index id does not exist"),
        (status = NOT_FOUND, description = "API key does not exist"),
        (status = UNAUTHORIZED, description = "api_key does not exists"),
        (status = UNAUTHORIZED, description = "api_key missing")
    )
)]
pub(crate) async fn index_file_api(
    index_arc: &IndexArc,
    file_path: &Path,
    file_date: i64,
    document: &[u8],
) -> Result<usize, String> {
    match index_arc
        .index_pdf_bytes(file_path, file_date, document)
        .await
    {
        Ok(_) => Ok(index_arc.read().await.indexed_doc_count().await),
        Err(e) => Err(e),
    }
}

/// Get PDF file
///
/// Get PDF file from index with index_id
/// ⚠️ Use search or get_iterator first to obtain s valid doc_id. Document IDs are not guaranteed to be continuous and gapless!
#[utoipa::path(
    get,
    tag = "PDF File",
    path = "/api/v1/index/{index_id}/file/{document_id}",
    params(
        ("apikey" = String, Header, description = "YOUR_SECRET_API_KEY",example="AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA="),
        ("index_id" = u64, Path, description = "index id"),
        ("document_id" = u64, Path, description = "document id"),
    ),
    responses(
        (status = 200, description = "PDF file found, returns the PDF file as byte array", body = [u8]),
        (status = BAD_REQUEST, description = "index_id invalid or missing"),
        (status = BAD_REQUEST, description = "doc_id invalid or missing"),
        (status = BAD_REQUEST, description = "Request object incorrect"),
        (status = NOT_FOUND, description = "Index id does not exist"),
        (status = NOT_FOUND, description = "Document id does not exist"),
        (status = NOT_FOUND, description = "api_key does not exists"),
        (status = UNAUTHORIZED, description = "api_key does not exists"),
        (status = UNAUTHORIZED, description = "api_key missing"),
    )
)]
pub(crate) async fn get_file_api(index_arc: &IndexArc, document_id: usize) -> Option<Vec<u8>> {
    if !index_arc.read().await.stored_field_names.is_empty() {
        index_arc.read().await.get_file(document_id).await.ok()
    } else {
        None
    }
}

pub(crate) async fn index_documents_api(
    index_arc: &IndexArc,
    document_vec: Vec<Document>,
) -> Result<usize, String> {
    index_arc.index_documents(document_vec).await;
    Ok(index_arc.read().await.indexed_doc_count().await)
}

/// Get Document
///
/// Get document from index with index_id
/// ⚠️ Use search or get_iterator first to obtain a valid doc_id. Document IDs are not guaranteed to be continuous and gapless!
#[utoipa::path(
    get,
    tag = "Document",
    path = "/api/v1/index/{index_id}/doc/{document_id}",
    params(
        ("apikey" = String, Header, description = "YOUR_SECRET_API_KEY",example="AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA="),
        ("index_id" = u64, Path, description = "index id"),
        ("document_id" = u64, Path, description = "document id"),
    ),
    request_body(content = GetDocumentRequest, example=json!({
        "query_terms": ["test"],
        "fields": ["title", "body"],
        "highlights": [
        { "field": "title", "fragment_number": 0, "fragment_size": 1000, "highlight_markup": true},
        { "field": "body", "fragment_number": 2, "fragment_size": 160, "highlight_markup": true},
        { "field": "body", "name": "body2", "fragment_number": 0, "fragment_size": 4000, "highlight_markup": true}]
    })),
    responses(
        (status = 200, description = "Document found, returns the JSON document consisting of arbitrary key-value pairs", body = HashMap<String, Value>),
        (status = BAD_REQUEST, description = "index_id invalid or missing"),
        (status = BAD_REQUEST, description = "doc_id invalid or missing"),
        (status = BAD_REQUEST, description = "Request object incorrect"),
        (status = NOT_FOUND, description = "Index id does not exist"),
        (status = NOT_FOUND, description = "Document id does not exist"),
        (status = NOT_FOUND, description = "api_key does not exists"),
        (status = UNAUTHORIZED, description = "api_key does not exists"),
        (status = UNAUTHORIZED, description = "api_key missing"),
    )
)]
pub(crate) async fn get_document_api(
    index_arc: &IndexArc,
    document_id: usize,
    get_document_request: GetDocumentRequest,
) -> Option<Document> {
    if !index_arc.read().await.stored_field_names.is_empty() {
        let highlighter_option = if get_document_request.highlights.is_empty()
            || get_document_request.query_terms.is_empty()
        {
            None
        } else {
            Some(
                highlighter(
                    index_arc,
                    get_document_request.highlights,
                    get_document_request.query_terms,
                )
                .await,
            )
        };

        index_arc
            .read()
            .await
            .get_document(
                document_id,
                true,
                &highlighter_option,
                &HashSet::from_iter(get_document_request.fields),
                &get_document_request.distance_fields,
            )
            .await
            .ok()
    } else {
        None
    }
}

/// Update Document(s)
///
/// Update a JSON document or an array of JSON documents (bulk), each consisting of arbitrary key-value pairs to the index with the specified apikey and index_id, and return the number of indexed docs.
/// Update document is a combination of delete_document and index_document.
/// All current limitations of delete_document apply.
/// Update documents enables true real-time search (as opposed to near realtime.search):
/// When in query_index the parameter `realtime` is set to `true` then indexed, but uncommitted documents are immediately included in the search results, without requiring a commit or refresh.
/// Therefore a explicit commit_index is almost never required, as it is invoked automatically after 64k documents are indexed **per shard** or on close_index for persistence.
#[utoipa::path(
    patch,
    tag = "Document",
    path = "/api/v1/index/{index_id}/doc",
    params(
        ("apikey" = String, Header, description = "YOUR_SECRET_API_KEY",example="AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA="),
        ("index_id" = u64, Path, description = "index id"),
    ),
    request_body(content = (u64, HashMap<String, Value>), description = "Tuple of (doc_id, JSON document) or array of tuples (doc_id, JSON documents), each JSON document consisting of arbitrary key-value pairs", content_type = "application/json", example=json!([0,{"title":"title1 test","body":"body1","url":"url1"}])),
    responses(
        (status = 200, description = "Document indexed, returns the number of indexed documents", body = usize),
        (status = BAD_REQUEST, description = "Document object invalid"),
        (status = NOT_FOUND, description = "Index id does not exist"),
        (status = NOT_FOUND, description = "API key does not exist"),
        (status = UNAUTHORIZED, description = "api_key does not exists"),
        (status = UNAUTHORIZED, description = "api_key missing")
    )
)]
pub(crate) async fn update_document_api(
    index_arc: &IndexArc,
    id_document: (u64, Document),
) -> Result<u64, String> {
    index_arc.update_document(id_document).await;
    Ok(index_arc.read().await.indexed_doc_count().await as u64)
}

pub(crate) async fn update_documents_api(
    index_arc: &IndexArc,
    id_document_vec: Vec<(u64, Document)>,
) -> Result<u64, String> {
    index_arc.update_documents(id_document_vec).await;
    Ok(index_arc.read().await.indexed_doc_count().await as u64)
}

/// Delete Document
///
/// Delete document by document_id from index with index_id
/// ⚠️ Use search or get_iterator first to obtain a valid doc_id. Document IDs are not guaranteed to be continuous and gapless!
/// Immediately effective, indpendent of commit.
/// Index space used by deleted documents is not reclaimed (until compaction is implemented), but result_count_total is updated.
/// By manually deleting the delete.bin file the deleted documents can be recovered (until compaction).
/// Deleted documents impact performance, especially but not limited to counting (Count, TopKCount). They also increase the size of the index (until compaction is implemented).
/// For minimal query latency delete index and reindexing documents is preferred over deleting documents (until compaction is implemented).
/// BM25 scores are not updated (until compaction is implemented), but the impact is minimal.
#[utoipa::path(
    delete,
    tag = "Document",
    path = "/api/v1/index/{index_id}/doc/{document_id}",
    params(
        ("apikey" = String, Header, description = "YOUR_SECRET_API_KEY",example="AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA="),
        ("index_id" = u64, Path, description = "index id"),
        ("document_id" = u64, Path, description = "document id"),
    ),
    responses(
        (status = 200, description = "Document deleted, returns indexed documents count", body = usize),
        (status = BAD_REQUEST, description = "index_id invalid or missing"),
        (status = BAD_REQUEST, description = "doc_id invalid or missing"),
        (status = BAD_REQUEST, description = "Request object incorrect"),
        (status = NOT_FOUND, description = "Index id does not exist"),
        (status = NOT_FOUND, description = "Document id does not exist"),
        (status = NOT_FOUND, description = "api_key does not exists"),
        (status = UNAUTHORIZED, description = "api_key does not exists"),
        (status = UNAUTHORIZED, description = "api_key missing"),
    )
)]
pub(crate) async fn delete_document_by_parameter_api(
    index_arc: &IndexArc,
    document_id: u64,
) -> Result<u64, String> {
    index_arc.delete_document(document_id).await;
    Ok(index_arc.read().await.indexed_doc_count().await as u64)
}

/// Delete Document(s) by Request Object
///
/// Delete document by document_id, by array of document_id (bulk), by query (SearchRequestObject) from index with index_id, or clear all documents from index.
/// Immediately effective, indpendent of commit.
/// Index space used by deleted documents is not reclaimed (until compaction is implemented), but result_count_total is updated.
/// By manually deleting the delete.bin file the deleted documents can be recovered (until compaction).
/// Deleted documents impact performance, especially but not limited to counting (Count, TopKCount). They also increase the size of the index (until compaction is implemented).
/// For minimal query latency delete index and reindexing documents is preferred over deleting documents (until compaction is implemented).
/// BM25 scores are not updated (until compaction is implemented), but the impact is minimal.
/// Document ID can by obtained by search. When deleting by query (SearchRequestObject), it is advised to perform a dry run search first, to see which documents will be deleted.
#[utoipa::path(
    delete,
    tag = "Document",
    path = "/api/v1/index/{index_id}/doc",
    params(
        ("apikey" = String, Header, description = "YOUR_SECRET_API_KEY",example="AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA="),
        ("index_id" = u64, Path, description = "index id"),
    ),
    request_body(content = SearchRequestObject, description = "Specifies the document(s) to delete by different request objects\n- 'clear' : delete all documents in index (clear index)\n- u64 : delete single doc ID\n- [u64] : delete array of doc ID \n- SearchRequestObject : delete documents by query", content_type = "application/json", example=json!({
        "query":"test",
        "offset":0,
        "length":10,
        "realtime": false,
        "field_filter": ["title", "body"]
    })),

    responses(
        (status = 200, description = "Document deleted, returns indexed documents count", body = usize),
        (status = BAD_REQUEST, description = "index_id invalid or missing"),
        (status = BAD_REQUEST, description = "doc_id invalid or missing"),
        (status = BAD_REQUEST, description = "Request object incorrect"),
        (status = NOT_FOUND, description = "Index id does not exist"),
        (status = NOT_FOUND, description = "Document id does not exist"),
        (status = NOT_FOUND, description = "api_key does not exists"),
        (status = UNAUTHORIZED, description = "api_key does not exists"),
        (status = UNAUTHORIZED, description = "api_key missing"),
    )
)]
pub(crate) async fn delete_document_by_object_api(
    index_arc: &IndexArc,
    document_id: u64,
) -> Result<u64, String> {
    index_arc.delete_document(document_id).await;
    Ok(index_arc.read().await.indexed_doc_count().await as u64)
}

pub(crate) async fn delete_documents_by_object_api(
    index_arc: &IndexArc,
    document_id_vec: Vec<u64>,
) -> Result<u64, String> {
    index_arc.delete_documents(document_id_vec).await;
    Ok(index_arc.read().await.indexed_doc_count().await as u64)
}

pub(crate) async fn delete_documents_by_query_api(
    index_arc: &IndexArc,
    search_request: SearchRequestObject,
) -> Result<u64, String> {
    index_arc
        .delete_documents_by_query(
            search_request.query_string.to_owned(),
            search_request.query_type_default,
            search_request.offset,
            search_request.length,
            search_request.realtime,
            search_request.field_filter,
            search_request.facet_filter,
            search_request.result_sort,
        )
        .await;

    Ok(index_arc.read().await.indexed_doc_count().await as u64)
}

/// Document iterator
///
/// Document iterator via GET and POST are identical, only the way parameters are passed differ.
/// The document iterator allows to iterate over all document IDs and documents in the entire index, forward or backward.
/// It enables efficient sequential access to every document, even in very large indexes, without running a search.
/// Paging through the index works without collecting document IDs to Min-heap in size-limited RAM first.
/// The iterator guarantees that only valid document IDs are returned, even though document IDs are not strictly continuous.
/// Document IDs can also be fetched in batches, reducing round trips and significantly improving performance, especially when using the REST API.
/// Typical use cases include index export, conversion, analytics, audits, and inspection.
/// Explanation of "eventually continuous" docid:
/// In SeekStorm, document IDs become continuous over time. In a multi-sharded index, each shard maintains its own document ID space.
/// Because documents are distributed across shards in a non-deterministic, load-dependent way, shard-local document IDs advance at different rates.
/// When these are mapped to global document IDs, temporary gaps can appear.
/// As a result, simply iterating from 0 to the total document count may encounter invalid IDs near the end.
/// The Document Iterator abstracts this complexity and reliably returns only valid document IDs.
/// # Parameters
/// - docid=None, take>0: **skip first s document IDs**, then **take next t document IDs** of an index.
/// - docid=None, take<0: **skip last s document IDs**, then **take previous t document IDs** of an index.
/// - docid=Some, take>0: **skip next s document IDs**, then **take next t document IDs** of an index, relative to a given document ID, with end-of-index indicator.
/// - docid=Some, take<0: **skip previous s document IDs**, then **take previous t document IDs**, relative to a given document ID, with start-of-index indicator.
/// - take=0: does not make sense, that defies the purpose of get_iterator.
/// - The sign of take indicates the direction of iteration: positive take for forward iteration, negative take for backward iteration.
/// - The skip parameter is always positive, indicating the number of document IDs to skip before taking document IDs. The skip direction is determined by the sign of take too.
/// - include_document: if true, the documents are also retrieved along with their document IDs.
/// Next page:     take last  docid from previous result set, skip=1, take=+page_size
/// Previous page: take first docid from previous result set, skip=1, take=-page_size
/// Returns an IteratorResult, consisting of the number of actually skipped document IDs, and a list of taken document IDs and documents, sorted ascending).
/// Detect end/begin of index during iteration: if returned vec.len() < requested take || if returned skip <requested skip
#[utoipa::path(
    get,
    tag = "Iterator",
    path = "/api/v1/index/{index_id}/doc_id",
    params(
        ("apikey" = String, Header, description = "YOUR_SECRET_API_KEY",example="AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA="),
        ("index_id" = u64, Path, description = "index id"),
    ),

  params(
        ("apikey" = String, Header, description = "YOUR_SECRET_API_KEY",example="AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA="),
        ("index_id" = u64, Path, description = "index id", example=0),
        ("document_id" = u64, Query,  description = "document id"),
        ("skip" = u64, Query,  description = "skip document IDs", minimum = 0, example=0),
        ("take" = u64, Query,  description = "take document IDs",  example=-1),
        ("include_deleted" = bool, Query,  description = "include deleted document IDs in results", example=false),
        ("include_document" = bool, Query,  description = "include documents in results", example=false),
        ("fields" = Vec<String>, Query,  description = "fields to include in document. If not specified, all fields are included", example=json!(["title","body"]) ),
    ),
    responses(
        (status = 200, description = "Document ID found, returning an IteratorResult", body = IteratorResult),
        (status = BAD_REQUEST, description = "index_id invalid or missing"),
        (status = BAD_REQUEST, description = "Request object incorrect"),
        (status = NOT_FOUND, description = "Index id does not exist"),
        (status = NOT_FOUND, description = "api_key does not exists"),
        (status = UNAUTHORIZED, description = "api_key does not exists"),
        (status = UNAUTHORIZED, description = "api_key missing"),
    )
)]
pub(crate) async fn get_iterator_api_get(
    index_arc: &IndexArc,
    document_id: Option<u64>,
    skip: usize,
    take: isize,
    include_deleted: bool,
    include_document: bool,
    fields: Vec<String>,
) -> IteratorResult {
    index_arc
        .get_iterator(
            document_id,
            skip,
            take,
            include_deleted,
            include_document,
            fields,
        )
        .await
}

/// Document iterator
///
/// Document iterator via GET and POST are identical, only the way parameters are passed differ.
/// The document iterator allows to iterate over all document IDs and documents in the entire index, forward or backward.
/// It enables efficient sequential access to every document, even in very large indexes, without running a search.
/// Paging through the index works without collecting document IDs to Min-heap in size-limited RAM first.
/// The iterator guarantees that only valid document IDs are returned, even though document IDs are not strictly continuous.
/// Document IDs can also be fetched in batches, reducing round trips and significantly improving performance, especially when using the REST API.
/// Typical use cases include index export, conversion, analytics, audits, and inspection.
/// Explanation of "eventually continuous" docid:
/// In SeekStorm, document IDs become continuous over time. In a multi-sharded index, each shard maintains its own document ID space.
/// Because documents are distributed across shards in a non-deterministic, load-dependent way, shard-local document IDs advance at different rates.
/// When these are mapped to global document IDs, temporary gaps can appear.
/// As a result, simply iterating from 0 to the total document count may encounter invalid IDs near the end.
/// The Document Iterator abstracts this complexity and reliably returns only valid document IDs.
/// # Parameters
/// - docid=None, take>0: **skip first s document IDs**, then **take next t document IDs** of an index.
/// - docid=None, take<0: **skip last s document IDs**, then **take previous t document IDs** of an index.
/// - docid=Some, take>0: **skip next s document IDs**, then **take next t document IDs** of an index, relative to a given document ID, with end-of-index indicator.
/// - docid=Some, take<0: **skip previous s document IDs**, then **take previous t document IDs**, relative to a given document ID, with start-of-index indicator.
/// - take=0: does not make sense, that defies the purpose of get_iterator.
/// - The sign of take indicates the direction of iteration: positive take for forward iteration, negative take for backward iteration.
/// - The skip parameter is always positive, indicating the number of document IDs to skip before taking document IDs. The skip direction is determined by the sign of take too.
/// - include_document: if true, the documents are also retrieved along with their document IDs.
/// Next page:     take last  docid from previous result set, skip=1, take=+page_size
/// Previous page: take first docid from previous result set, skip=1, take=-page_size
/// Returns an IteratorResult, consisting of the number of actually skipped document IDs, and a list of taken document IDs and documents, sorted ascending).
/// Detect end/begin of index during iteration: if returned vec.len() < requested take || if returned skip <requested skip
#[utoipa::path(
    post,
    tag = "Iterator",
    path = "/api/v1/index/{index_id}/doc_id",
    params(
        ("apikey" = String, Header, description = "YOUR_SECRET_API_KEY",example="AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA="),
        ("index_id" = u64, Path, description = "index id"),
    ),
    request_body(content = GetIteratorRequest, example=json!({
        "document_id": null,
        "skip": 0,
        "take": -1,
    })),
    responses(
        (status = 200, description = "Document ID found, returning an IteratorResult", body = IteratorResult),
        (status = BAD_REQUEST, description = "index_id invalid or missing"),
        (status = BAD_REQUEST, description = "Request object incorrect"),
        (status = NOT_FOUND, description = "Index id does not exist"),
        (status = NOT_FOUND, description = "api_key does not exists"),
        (status = UNAUTHORIZED, description = "api_key does not exists"),
        (status = UNAUTHORIZED, description = "api_key missing"),
    )
)]
pub(crate) async fn get_iterator_api_post(
    index_arc: &IndexArc,
    document_id: Option<u64>,
    skip: usize,
    take: isize,
    include_deleted: bool,
    include_document: bool,
    fields: Vec<String>,
) -> IteratorResult {
    index_arc
        .get_iterator(
            document_id,
            skip,
            take,
            include_deleted,
            include_document,
            fields,
        )
        .await
}

pub(crate) async fn clear_index_api(index_arc: &IndexArc) -> Result<u64, String> {
    let mut index_mut = index_arc.write().await;
    index_mut.clear_index().await;
    Ok(index_mut.indexed_doc_count().await as u64)
}

/// Query Index
///
/// Query results from index with index_id
/// The following parameters are supported:
/// - Result type
/// - Result sorting
/// - Realtime search
/// - Field filter
/// - Fields to include in search results
/// - Distance fields: derived fields from distance calculations
/// - Highlights: keyword-in-context snippets and term highlighting
/// - Query facets: which facets fields to calculate and return at query time
/// - Facet filter: filter facets by field and value
/// - Result sort: sort results by field and direction
/// - Query type default: default query type, if not specified in query
#[utoipa::path(
    post,
    tag = "Query",
    path = "/api/v1/index/{index_id}/query",
    params(
        ("apikey" = String, Header, description = "YOUR_SECRET_API_KEY",example="AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA="),
        ("index_id" = u64, Path, description = "index id"),
    ),


    request_body(
        content = SearchRequestObject,
        examples(
            ("Example: Lexical search" = (value = json!(  {
        "query": "detroit",
        "query_vector": null,
        "enable_empty_query": false,
        "offset": 0,
        "length": 10,
        "result_type": "TopkCount",
        "realtime": true,
        "highlights": [
            {
                "field": "",
                "name": "",
                "fragment_number": 0,
                "fragment_size": 0,
                "highlight_markup": true,
                "pre_tags": "",
                "post_tags": ""
            }
        ],
        "field_filter": ["title"],
        "fields": [],
        "query_type_default": "Intersection",
        "query_rewriting": "SearchOnly",
        "search_mode": "Lexical"
    }   ))),
	("Example: Hybrid search" = (value = json!(  {
        "query": "detroit",
        "query_vector": null,
        "enable_empty_query": false,
        "offset": 0,
        "length": 10,
        "result_type": "TopkCount",
        "realtime": true,
        "highlights": [
            {
                "field": "",
                "name": "",
                "fragment_number": 0,
                "fragment_size": 0,
                "highlight_markup": true,
                "pre_tags": "",
                "post_tags": ""
            }
        ],
        "field_filter": ["title"],
        "fields": [],
        "query_type_default": "Intersection",
        "query_rewriting": "SearchOnly",
        "search_mode": {
            "Hybrid": {
                "similarity_threshold": 0.7,
                "ann_mode": {
                    "Nprobe": 55
                }
            }
        }
    }  ))),
("Example: Vector search" = (value = json!(  {
    "query":"",
    "query_vector": [
        0.001, 0.002, 0.003, 0.004, 0.005, 0.006, 0.007, 0.008, 0.009, 0.010, 0.011, 0.012, 0.013,
        0.014, 0.015, 0.016, 0.017, 0.018, 0.019, 0.020, 0.021, 0.022, 0.023, 0.024, 0.025, 0.026,
        0.027, 0.028, 0.029, 0.030, 0.031, 0.032, 0.033, 0.034, 0.035, 0.036, 0.037, 0.038, 0.039,
        0.040, 0.041, 0.042, 0.043, 0.044, 0.045, 0.046, 0.047, 0.048, 0.049, 0.050, 0.051, 0.052,
        0.053, 0.054, 0.055, 0.056, 0.057, 0.058, 0.059, 0.060, 0.061, 0.062, 0.063, 0.064, 0.065,
        0.066, 0.067, 0.068, 0.069, 0.070, 0.071, 0.072, 0.073, 0.074, 0.075, 0.076, 0.077, 0.078,
        0.079, 0.080, 0.081, 0.082, 0.083, 0.084, 0.085, 0.086, 0.087, 0.088, 0.089, 0.090, 0.091,
        0.092, 0.093, 0.094, 0.095, 0.096, 0.097, 0.098, 0.099, 0.100, 0.101, 0.102, 0.103, 0.104,
        0.105, 0.106, 0.107, 0.108, 0.109, 0.110, 0.111, 0.112, 0.113, 0.114, 0.115, 0.116, 0.117,
        0.118, 0.119, 0.120, 0.121, 0.122, 0.123, 0.124, 0.125, 0.126, 0.127, 0.128
    ],
    "offset":0,
    "length":10,
    "result_type": "Topk",
    "realtime": false,
    "search_mode": {
        "Vector":{
            "similarity_threshold": 0.7, 
            "ann_mode": {"Nprobe":55}
        }
    }

}  )))
        )
    ),
    responses(
        (status = 200, description = "Results found, returns the SearchResultObject", body = SearchResultObject),
        (status = BAD_REQUEST, description = "Request object incorrect"),
        (status = NOT_FOUND, description = "Index id does not exist"),
        (status = NOT_FOUND, description = "API key does not exist"),
        (status = UNAUTHORIZED, description = "api_key does not exists"),
        (status = UNAUTHORIZED, description = "api_key missing"),
    )
)]
pub(crate) async fn query_index_api_post(
    index_arc: &IndexArc,
    search_request: SearchRequestObject,
) -> SearchResultObject {
    query_index_api(index_arc, search_request).await
}

/// Query Index
///
/// Query results from index with index_id.
/// Query index via GET is a convenience function, that **offers only a limited set of parameters compared to Query Index via POST**.
/// Always use Query Index via POST for the full set of parameters and maximum flexibility.
/// Query Index via GET is provided for simple queries and quick testing, and to be easily callable from browser address bar, but it is not intended for production use.
#[utoipa::path(
    get,
    tag = "Query",
    path = "/api/v1/index/{index_id}/query",
    params(
        ("apikey" = String, Header, description = "YOUR_SECRET_API_KEY",example="AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA="),
        ("index_id" = u64, Path, description = "index id", example=0),
        ("query" = String, Query,  description = "query string", example="hello"),
        ("offset" = u64, Query,  description = "result offset", minimum = 0, example=0),
        ("length" = u64, Query,  description = "result length", minimum = 1, example=10),
        ("realtime" = bool, Query,  description = "include uncommitted documents", example=false),
        ("enable_empty_query" = bool, Query,  description = "allow empty query", example=false)
    ),
    responses(
        (status = 200, description = "Results found, returns the SearchResultObject", body = SearchResultObject),
        (status = BAD_REQUEST, description = "No query specified"),
        (status = NOT_FOUND, description = "Index id does not exist"),
        (status = NOT_FOUND, description = "API key does not exist"),
        (status = UNAUTHORIZED, description = "api_key does not exists"),
        (status = UNAUTHORIZED, description = "api_key missing"),
    )
)]
pub(crate) async fn query_index_api_get(
    index_arc: &IndexArc,
    search_request: SearchRequestObject,
) -> SearchResultObject {
    query_index_api(index_arc, search_request).await
}

use seekstorm::vector::{embedding_from_bytes_be, embedding_from_json};

pub(crate) async fn query_index_api(
    index_arc: &IndexArc,
    search_request: SearchRequestObject,
) -> SearchResultObject {
    let start_time = Instant::now();

    let query_vector = if let Some(value) = search_request.query_vector
        && search_request.search_mode != SearchMode::Lexical
    {
        match &value {
            Value::String(string_base64) => {
                if let Ok(bytes) = decode_bytes_from_base64_string(string_base64)
                    && let Some(embedding) = embedding_from_bytes_be(
                        &bytes,
                        index_arc.read().await.vector_precision,
                        index_arc.read().await.vector_dimensions_original,
                        *IS_SYSTEM_LE,
                    )
                {
                    Some(embedding)
                } else {
                    None
                }
            }
            Value::Array(_) => embedding_from_json(
                &value,
                index_arc.read().await.vector_precision,
                index_arc.read().await.vector_dimensions_original,
            ),
            _ => None,
        }
    } else {
        None
    };

    let result_object = index_arc
        .search(
            search_request.query_string.to_owned(),
            query_vector,
            search_request.query_type_default,
            search_request.search_mode,
            search_request.enable_empty_query,
            search_request.offset,
            search_request.length,
            search_request.result_type,
            search_request.realtime,
            search_request.field_filter,
            search_request.query_facets,
            search_request.facet_filter,
            search_request.result_sort,
            search_request.query_rewriting,
        )
        .await;

    let elapsed_time = start_time.elapsed().as_nanos();

    let mut results: Vec<Document> = Vec::new();

    if !index_arc.read().await.stored_field_names.is_empty() {
        let return_fields_filter = HashSet::from_iter(search_request.fields);

        let highlighter_option = if search_request.highlights.is_empty() {
            None
        } else {
            Some(
                highlighter(
                    index_arc,
                    search_request.highlights,
                    result_object.query_terms.clone(),
                )
                .await,
            )
        };

        for result in result_object.results.iter() {
            match index_arc
                .read()
                .await
                .get_document(
                    result.doc_id,
                    search_request.realtime,
                    &highlighter_option,
                    &return_fields_filter,
                    &search_request.distance_fields,
                )
                .await
            {
                Ok(doc) => {
                    let mut doc = doc;
                    doc.insert("_id".to_string(), result.doc_id.into());
                    doc.insert("_score".to_string(), result.score.into());

                    results.push(doc);
                }
                Err(_e) => {}
            }
        }
    }

    SearchResultObject {
        original_query: result_object.original_query.to_owned(),
        query: result_object.query.to_owned(),
        time: elapsed_time,
        offset: search_request.offset,
        length: search_request.length,
        count: result_object.results.len(),
        count_total: result_object.result_count_total,
        query_terms: result_object.query_terms,
        results,
        facets: result_object.facets,
        suggestions: result_object.suggestions,
    }
}

#[derive(OpenApi, Default)]
#[openapi(paths(
    live_api,
    create_apikey_api,
    get_apikey_indices_info_api,
    delete_apikey_api,
    create_index_api,
    get_index_info_api,
    commit_index_api,
    delete_index_api,
    get_iterator_api_post,
    get_iterator_api_get,
    index_document_api,
    update_document_api,
    index_file_api,
    get_document_api,
    get_file_api,
    delete_document_by_parameter_api,
    delete_document_by_object_api,
    query_index_api_post,
    query_index_api_get,
),
tags(
    (name="Info", description="Return info about the server"),
    (name="API Key", description="Create and delete API keys"),
    (name="Index", description="Create and delete indices"),
    (name="Iterator", description="Iterate through document IDs and documents"),
    (name="Document", description="Index, update, get and delete documents"),
    (name="PDF File", description="Index, and get PDF file"),
    (name="Query", description="Query an index"),
)
)]
#[openapi(info(title = "SeekStorm REST API documentation"))]
#[openapi(servers((url = "http://127.0.0.1", description = "Local SeekStorm server")))]
struct ApiDoc;

pub fn generate_openapi() {
    let openapi = ApiDoc::openapi();

    println!("{}", openapi.to_pretty_json().unwrap());

    let mut path = current_exe().unwrap();
    path.pop();
    let path_json = path.join("openapi.json");
    let path_yml = path.join("openapi.yml");

    serde_json::to_writer_pretty(&File::create(path_json.clone()).unwrap(), &openapi).unwrap();
    fs::write(path_yml.clone(), openapi.to_yaml().unwrap()).unwrap();

    println!(
        "OpenAPI documents generated: {} {}",
        path_json.to_string_lossy(),
        path_yml.to_string_lossy()
    );
}