use std::{
collections::HashMap,
env::current_exe,
fs::{self, File},
path::{Path, PathBuf},
time::Instant,
};
use ahash::AHashMap;
use itertools::Itertools;
use serde_json::Value;
use std::collections::HashSet;
use utoipa::{OpenApi, ToSchema};
use seekstorm::{
commit::Commit,
highlighter::{Highlight, highlighter},
index::{
AccessType, Close, Clustering, DeleteDocument, DeleteDocuments, DeleteDocumentsByQuery,
DistanceField, Document, DocumentCompression, Facet, FileType, FrequentwordType,
IS_SYSTEM_LE, IndexArc, IndexDocument, IndexDocuments, IndexMetaObject, LexicalSimilarity,
MinMaxFieldJson, NgramSet, QueryCompletion, SchemaField, SpellingCorrection, StemmerType,
StopwordType, Synonym, TokenizerType, UpdateDocument, UpdateDocuments, create_index,
open_index,
},
ingest::IndexPdfBytes,
iterator::{GetIterator, IteratorResult},
search::{
FacetFilter, QueryFacet, QueryRewriting, QueryType, ResultSort, ResultType, Search,
SearchMode,
},
utils::decode_bytes_from_base64_string,
vector::Inference,
};
use serde::{Deserialize, Serialize};
use crate::{
VERSION,
http_server::calculate_hash,
multi_tenancy::{ApikeyObject, ApikeyQuotaObject},
};
const APIKEY_PATH: &str = "apikey.json";
#[derive(Deserialize, Serialize, Clone, ToSchema, Debug)]
pub struct SearchRequestObject {
#[serde(rename = "query")]
pub query_string: String,
#[serde(default)]
pub query_vector: Option<Value>,
#[serde(default)]
#[schema(required = false, default = false, example = false)]
pub enable_empty_query: bool,
#[serde(default)]
#[schema(required = false, minimum = 0, default = 0, example = 0)]
pub offset: usize,
#[serde(default = "length_api")]
#[schema(required = false, minimum = 1, default = 10, example = 10)]
pub length: usize,
#[serde(default)]
pub result_type: ResultType,
#[serde(default)]
pub realtime: bool,
#[serde(default)]
pub highlights: Vec<Highlight>,
#[schema(required = false, example = json!(["title"]))]
#[serde(default)]
pub field_filter: Vec<String>,
#[serde(default)]
pub fields: Vec<String>,
#[serde(default)]
pub distance_fields: Vec<DistanceField>,
#[serde(default)]
pub query_facets: Vec<QueryFacet>,
#[serde(default)]
pub facet_filter: Vec<FacetFilter>,
#[schema(required = false, example = json!([{"field": "date", "order": "Ascending", "base": "None" }]))]
#[serde(default)]
pub result_sort: Vec<ResultSort>,
#[schema(required = false, example = QueryType::Intersection)]
#[serde(default = "query_type_api")]
pub query_type_default: QueryType,
#[schema(required = false, example = QueryRewriting::SearchOnly)]
#[serde(default = "query_rewriting_api")]
pub query_rewriting: QueryRewriting,
#[schema(required = false, example = SearchMode::Lexical)]
#[serde(default = "search_mode_api")]
pub search_mode: SearchMode,
}
fn search_mode_api() -> SearchMode {
SearchMode::Lexical
}
fn query_type_api() -> QueryType {
QueryType::Intersection
}
fn query_rewriting_api() -> QueryRewriting {
QueryRewriting::SearchOnly
}
fn length_api() -> usize {
10
}
#[derive(Debug, Clone, Deserialize, Serialize, ToSchema)]
pub struct SearchResultObject {
pub time: u128,
pub original_query: String,
pub query: String,
pub offset: usize,
pub length: usize,
pub count: usize,
pub count_total: usize,
pub query_terms: Vec<String>,
#[schema(value_type=Vec<HashMap<String, serde_json::Value>>)]
pub results: Vec<Document>,
#[schema(value_type=HashMap<String, Vec<(String, usize)>>)]
pub facets: AHashMap<String, Facet>,
pub suggestions: Vec<String>,
}
#[derive(Debug, Clone, Deserialize, Serialize, ToSchema)]
pub struct CreateIndexRequest {
#[schema(example = "demo_index")]
pub index_name: String,
#[schema(required = true, example = json!([
{"field":"title","field_type":"Text","store":true,"index_lexical":true,"boost":10.0},
{"field":"body","field_type":"Text","store":true,"index_lexical":true,"longest":true},
{"field":"url","field_type":"Text","store":true,"index_lexical":false},
{"field":"date","field_type":"Timestamp","store":true,"index_lexical":false,"facet":true}]))]
#[serde(default)]
pub schema: Vec<SchemaField>,
#[serde(default = "similarity_type_api")]
pub similarity: LexicalSimilarity,
#[serde(default = "tokenizer_type_api")]
pub tokenizer: TokenizerType,
#[serde(default)]
pub stemmer: StemmerType,
#[serde(default)]
pub stop_words: StopwordType,
#[serde(default)]
pub frequent_words: FrequentwordType,
#[serde(default = "ngram_indexing_api")]
pub ngram_indexing: u8,
#[serde(default = "document_compression_api")]
pub document_compression: DocumentCompression,
#[schema(required = true, example = json!([{"terms":["berry","lingonberry","blueberry","gooseberry"],"multiway":false}]))]
#[serde(default)]
pub synonyms: Vec<Synonym>,
#[serde(default)]
pub force_shard_number: Option<usize>,
#[serde(default)]
pub spelling_correction: Option<SpellingCorrection>,
#[serde(default)]
pub query_completion: Option<QueryCompletion>,
#[serde(default)]
pub clustering: Clustering,
#[serde(default)]
pub inference: Inference,
}
fn similarity_type_api() -> LexicalSimilarity {
LexicalSimilarity::Bm25fProximity
}
fn tokenizer_type_api() -> TokenizerType {
TokenizerType::UnicodeAlphanumeric
}
fn ngram_indexing_api() -> u8 {
NgramSet::NgramFF as u8 | NgramSet::NgramFFF as u8
}
fn document_compression_api() -> DocumentCompression {
DocumentCompression::Snappy
}
#[derive(Debug, Clone, Deserialize, Serialize)]
pub struct DeleteApikeyRequest {
pub apikey_base64: String,
}
#[derive(Debug, Clone, Deserialize, Serialize, ToSchema)]
pub struct GetIteratorRequest {
#[serde(default)]
pub document_id: Option<u64>,
#[serde(default)]
pub skip: usize,
#[serde(default = "default_1usize")]
pub take: isize,
#[serde(default)]
pub include_deleted: bool,
#[serde(default)]
pub include_document: bool,
#[serde(default)]
pub fields: Vec<String>,
}
fn default_1usize() -> isize {
1
}
#[derive(Debug, Clone, Deserialize, Serialize, ToSchema)]
pub struct GetDocumentRequest {
#[serde(default)]
pub query_terms: Vec<String>,
#[serde(default)]
pub highlights: Vec<Highlight>,
#[serde(default)]
pub fields: Vec<String>,
#[serde(default)]
pub distance_fields: Vec<DistanceField>,
}
#[derive(Debug, Clone, Deserialize, Serialize, ToSchema)]
pub(crate) struct IndexResponseObject {
pub id: u64,
#[schema(example = "demo_index")]
pub name: String,
#[schema(example = json!({
"title":{
"field":"title",
"store":true,
"index_lexical":true,
"field_type":"Text",
"boost":10.0,
"field_id":0
},
"body":{
"field":"body",
"store":true,
"index_lexical":true,
"field_type":"Text",
"field_id":1
},
"url":{
"field":"url",
"store":true,
"index_lexical":false,
"field_type":"Text",
"field_id":2
},
"date":{
"field":"date",
"store":true,
"index_lexical":false,
"field_type":"Timestamp",
"facet":true,
"field_id":3
}
}))]
pub schema: HashMap<String, SchemaField>,
pub indexed_doc_count: usize,
pub committed_doc_count: usize,
pub operations_count: u64,
pub query_count: u64,
#[schema(example = "0.11.1")]
pub version: String,
#[schema(example = json!({"date":{"min":831306011,"max":1730901447}}))]
pub facets_minmax: HashMap<String, MinMaxFieldJson>,
}
pub(crate) fn save_file_atomically(path: &PathBuf, content: String) {
let mut temp_path = path.clone();
temp_path.set_extension("bak");
fs::write(&temp_path, content).unwrap();
match fs::rename(temp_path, path) {
Ok(_) => {}
Err(e) => println!("error: {e:?}"),
}
}
pub(crate) fn save_apikey_data(apikey: &ApikeyObject, index_path: &PathBuf) {
let apikey_id: u64 = apikey.id;
let apikey_id_path = Path::new(&index_path).join(apikey_id.to_string());
let apikey_persistence_json = serde_json::to_string(&apikey).unwrap();
let apikey_persistence_path = Path::new(&apikey_id_path).join(APIKEY_PATH);
save_file_atomically(&apikey_persistence_path, apikey_persistence_json);
}
#[utoipa::path(
tag = "Info",
get,
path = "/api/v1/live",
responses(
(status = 200, description = "SeekStorm server is live", body = String),
)
)]
pub(crate) fn live_api() -> String {
"SeekStorm server ".to_owned() + VERSION
}
#[utoipa::path(
tag = "API Key",
post,
path = "/api/v1/apikey",
params(
("apikey" = String, Header, description = "YOUR_MASTER_API_KEY",example="BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB="),
),
request_body = inline(ApikeyQuotaObject),
responses(
(status = 200, description = "API key created, returns Base64 encoded API key", body = String),
(status = UNAUTHORIZED, description = "master_apikey invalid"),
(status = UNAUTHORIZED, description = "master_apikey missing")
)
)]
pub(crate) fn create_apikey_api<'a>(
index_path: &'a PathBuf,
apikey_quota_request_object: ApikeyQuotaObject,
apikey: &[u8],
apikey_list: &'a mut HashMap<u128, ApikeyObject>,
) -> &'a mut ApikeyObject {
let apikey_hash_u128 = calculate_hash(&apikey) as u128;
let mut apikey_id: u64 = 0;
let mut apikey_list_vec: Vec<(&u128, &ApikeyObject)> = apikey_list.iter().collect();
apikey_list_vec.sort_by_key(|a| a.1.id);
for value in apikey_list_vec {
if value.1.id == apikey_id {
apikey_id = value.1.id + 1;
} else {
break;
}
}
let apikey_object = ApikeyObject {
id: apikey_id,
apikey_hash: apikey_hash_u128,
quota: apikey_quota_request_object,
index_list: HashMap::new(),
};
let apikey_id_path = Path::new(&index_path).join(apikey_id.to_string());
fs::create_dir_all(apikey_id_path).unwrap();
save_apikey_data(&apikey_object, index_path);
apikey_list.insert(apikey_hash_u128, apikey_object);
apikey_list.get_mut(&apikey_hash_u128).unwrap()
}
#[utoipa::path(
delete,
tag = "API Key",
path = "/api/v1/apikey",
params(
("apikey" = String, Header, description = "YOUR_MASTER_API_KEY",example="BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB="),
),
responses(
(status = 200, description = "API key deleted, returns number of remaining API keys", body = u64),
(status = UNAUTHORIZED, description = "master_apikey invalid"),
(status = UNAUTHORIZED, description = "master_apikey missing")
)
)]
pub(crate) fn delete_apikey_api(
index_path: &PathBuf,
apikey_list: &mut HashMap<u128, ApikeyObject>,
apikey_hash: u128,
) -> Result<u64, String> {
if let Some(apikey_object) = apikey_list.get(&apikey_hash) {
let apikey_id_path = Path::new(&index_path).join(apikey_object.id.to_string());
println!("delete path {}", apikey_id_path.to_string_lossy());
fs::remove_dir_all(&apikey_id_path).unwrap();
apikey_list.remove(&apikey_hash);
Ok(apikey_list.len() as u64)
} else {
Err("not found".to_string())
}
}
pub(crate) async fn open_all_indices(
index_path: &PathBuf,
index_list: &mut HashMap<u64, IndexArc>,
) {
if !Path::exists(index_path) {
fs::create_dir_all(index_path).unwrap();
}
for result in fs::read_dir(index_path).unwrap() {
let path = result.unwrap();
if path.path().is_dir() {
let single_index_path = path.path();
let index_arc = match open_index(&single_index_path, false).await {
Ok(index_arc) => index_arc,
Err(err) => {
println!("{} {}", err, single_index_path.display());
continue;
}
};
let index_id = index_arc.read().await.meta.id;
index_list.insert(index_id, index_arc);
}
}
}
pub(crate) async fn open_apikey(
index_path: &PathBuf,
apikey_list: &mut HashMap<u128, ApikeyObject>,
) -> bool {
let apikey_path = Path::new(&index_path).join(APIKEY_PATH);
match fs::read_to_string(apikey_path) {
Ok(apikey_string) => {
let mut apikey_object: ApikeyObject = serde_json::from_str(&apikey_string).unwrap();
open_all_indices(index_path, &mut apikey_object.index_list).await;
apikey_list.insert(apikey_object.apikey_hash, apikey_object);
true
}
Err(_) => false,
}
}
pub(crate) async fn open_all_apikeys(
index_path: &PathBuf,
apikey_list: &mut HashMap<u128, ApikeyObject>,
) -> bool {
let mut test_index_flag = false;
if !Path::exists(index_path) {
println!("index path not found: {} ", index_path.to_string_lossy());
fs::create_dir_all(index_path).unwrap();
}
for result in fs::read_dir(index_path).unwrap() {
let path = result.unwrap();
if path.path().is_dir() {
let single_index_path = path.path();
test_index_flag |= open_apikey(&single_index_path, apikey_list).await;
}
}
test_index_flag
}
#[utoipa::path(
post,
tag = "Index",
path = "/api/v1/index",
params(
("apikey" = String, Header, description = "YOUR_SECRET_API_KEY",example="AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA="),
),
request_body = inline(CreateIndexRequest),
responses(
(status = OK, description = "Index created, returns the index_id", body = u64),
(status = BAD_REQUEST, description = "Request object incorrect"),
(status = NOT_FOUND, description = "API key does not exists"),
(status = UNAUTHORIZED, description = "API key is missing"),
(status = UNAUTHORIZED, description = "API key does not exists")
)
)]
#[allow(clippy::too_many_arguments)]
pub(crate) async fn create_index_api<'a>(
index_path: &'a PathBuf,
index_name: String,
schema: Vec<SchemaField>,
lexical_similarity: LexicalSimilarity,
tokenizer: TokenizerType,
stemmer: StemmerType,
stop_words: StopwordType,
frequent_words: FrequentwordType,
ngram_indexing: u8,
document_compression: DocumentCompression,
synonyms: Vec<Synonym>,
force_shard_number: Option<usize>,
apikey_object: &'a mut ApikeyObject,
spelling_correction: Option<SpellingCorrection>,
query_completion: Option<QueryCompletion>,
mute: bool,
clustering: Clustering,
inference: Inference,
) -> u64 {
let mut index_id: u64 = 0;
for id in apikey_object.index_list.keys().sorted() {
if *id == index_id {
index_id = id + 1;
} else {
break;
}
}
let index_id_path = Path::new(&index_path)
.join(apikey_object.id.to_string())
.join(index_id.to_string());
fs::create_dir_all(&index_id_path).unwrap();
let meta = IndexMetaObject {
id: index_id,
name: index_name,
lexical_similarity,
tokenizer,
stemmer,
stop_words,
frequent_words,
ngram_indexing,
document_compression,
access_type: AccessType::Mmap,
spelling_correction,
query_completion,
clustering,
inference,
};
let index_arc = create_index(
&index_id_path,
meta,
&schema,
&synonyms,
11,
mute,
force_shard_number,
)
.await
.unwrap();
apikey_object.index_list.insert(index_id, index_arc);
index_id
}
#[utoipa::path(
delete,
tag = "Index",
path = "/api/v1/index/{index_id}",
params(
("apikey" = String, Header, description = "YOUR_SECRET_API_KEY",example="AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA="),
("index_id" = u64, Path, description = "index id"),
),
responses(
(status = 200, description = "Index deleted, returns the number of indices", body = u64),
(status = BAD_REQUEST, description = "index_id invalid or missing"),
(status = NOT_FOUND, description = "Index_id does not exists"),
(status = NOT_FOUND, description = "api_key does not exists"),
(status = UNAUTHORIZED, description = "api_key does not exists"),
(status = UNAUTHORIZED, description = "api_key missing")
)
)]
pub(crate) async fn delete_index_api(
index_id: u64,
index_list: &mut HashMap<u64, IndexArc>,
) -> Result<u64, String> {
if let Some(index_arc) = index_list.get(&index_id) {
let mut index_mut = index_arc.write().await;
index_mut.delete_index();
drop(index_mut);
index_list.remove(&index_id);
Ok(index_list.len() as u64)
} else {
Err("index_id not found".to_string())
}
}
#[utoipa::path(
patch,
tag = "Index",
path = "/api/v1/index/{index_id}",
params(
("apikey" = String, Header, description = "YOUR_SECRET_API_KEY",example="AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA="),
("index_id" = u64, Path, description = "index id"),
),
responses(
(status = 200, description = "Index committed, returns the number of committed documents", body = u64),
(status = BAD_REQUEST, description = "Index id invalid or missing"),
(status = NOT_FOUND, description = "Index id does not exist"),
(status = NOT_FOUND, description = "API key does not exist"),
(status = UNAUTHORIZED, description = "api_key does not exists"),
(status = UNAUTHORIZED, description = "api_key missing")
)
)]
pub(crate) async fn commit_index_api(index_arc: &IndexArc) -> Result<u64, String> {
let index_arc_clone = index_arc.clone();
let index_ref = index_arc.read().await;
let indexed_doc_count = index_ref.indexed_doc_count().await;
drop(index_ref);
index_arc_clone.commit().await;
Ok(indexed_doc_count as u64)
}
pub(crate) async fn close_index_api(index_arc: &IndexArc) -> Result<u64, String> {
let indexed_doc_count = index_arc.read().await.indexed_doc_count().await;
index_arc.close().await;
Ok(indexed_doc_count as u64)
}
pub(crate) async fn set_synonyms_api(
index_arc: &IndexArc,
synonyms: Vec<Synonym>,
) -> Result<usize, String> {
let mut index_mut = index_arc.write().await;
index_mut.set_synonyms(&synonyms)
}
pub(crate) async fn add_synonyms_api(
index_arc: &IndexArc,
synonyms: Vec<Synonym>,
) -> Result<usize, String> {
let mut index_mut = index_arc.write().await;
index_mut.add_synonyms(&synonyms)
}
pub(crate) async fn get_synonyms_api(index_arc: &IndexArc) -> Result<Vec<Synonym>, String> {
let index_ref = index_arc.read().await;
index_ref.get_synonyms()
}
#[utoipa::path(
get,
tag = "Index",
path = "/api/v1/index/{index_id}",
params(
("apikey" = String, Header, description = "YOUR_SECRET_API_KEY",example="AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA="),
("index_id" = u64, Path, description = "index id"),
),
responses(
(
status = 200, description = "Index found, returns the index info",
body = IndexResponseObject,
),
(status = BAD_REQUEST, description = "Request object incorrect"),
(status = NOT_FOUND, description = "Index id does not exist"),
(status = NOT_FOUND, description = "API key does not exist"),
(status = UNAUTHORIZED, description = "api_key does not exists"),
(status = UNAUTHORIZED, description = "api_key missing"),
)
)]
pub(crate) async fn get_index_info_api(
index_id: u64,
index_list: &HashMap<u64, IndexArc>,
) -> Result<IndexResponseObject, String> {
if let Some(index_arc) = index_list.get(&index_id) {
let index_ref = index_arc.read().await;
Ok(IndexResponseObject {
version: VERSION.to_string(),
schema: index_ref.schema_map.clone(),
id: index_ref.meta.id,
name: index_ref.meta.name.clone(),
indexed_doc_count: index_ref.indexed_doc_count().await,
committed_doc_count: index_ref.committed_doc_count().await,
operations_count: 0,
query_count: 0,
facets_minmax: index_ref.index_facets_minmax().await,
})
} else {
Err("index_id not found".to_string())
}
}
#[utoipa::path(
get,
tag = "API Key",
path = "/api/v1/apikey",
params(
("apikey" = String, Header, description = "YOUR_SECRET_API_KEY",example="AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA="),
),
responses(
(
status = 200, description = "Indices found, returns a list of index info",
body = Vec<IndexResponseObject>,
),
(status = BAD_REQUEST, description = "Request object incorrect"),
(status = NOT_FOUND, description = "Index ID or API key missing"),
(status = UNAUTHORIZED, description = "API key does not exists"),
)
)]
pub(crate) async fn get_apikey_indices_info_api(
index_list: &HashMap<u64, IndexArc>,
) -> Result<Vec<IndexResponseObject>, String> {
let mut index_response_object_vec: Vec<IndexResponseObject> = Vec::new();
for index in index_list.iter() {
let index_ref = index.1.read().await;
index_response_object_vec.push(IndexResponseObject {
version: VERSION.to_string(),
schema: index_ref.schema_map.clone(),
id: index_ref.meta.id,
name: index_ref.meta.name.clone(),
indexed_doc_count: index_ref.indexed_doc_count().await,
committed_doc_count: index_ref.committed_doc_count().await,
operations_count: 0,
query_count: 0,
facets_minmax: index_ref.index_facets_minmax().await,
});
}
Ok(index_response_object_vec)
}
#[utoipa::path(
post,
tag = "Document",
path = "/api/v1/index/{index_id}/doc",
params(
("apikey" = String, Header, description = "YOUR_SECRET_API_KEY",example="AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA="),
("index_id" = u64, Path, description = "index id"),
),
request_body(content = HashMap<String, Value>, description = "JSON document or array of JSON documents, each consisting of key-value pairs", content_type = "application/json", example=json!({"title":"title1 test","body":"body1","url":"url1"})),
responses(
(status = 200, description = "Document indexed, returns the number of indexed documents", body = usize),
(status = BAD_REQUEST, description = "Document object invalid"),
(status = NOT_FOUND, description = "Index id does not exist"),
(status = NOT_FOUND, description = "API key does not exist"),
(status = UNAUTHORIZED, description = "api_key does not exists"),
(status = UNAUTHORIZED, description = "api_key missing")
)
)]
pub(crate) async fn index_document_api(
index_arc: &IndexArc,
document: Document,
) -> Result<usize, String> {
index_arc.index_document(document, FileType::None).await;
Ok(index_arc.read().await.indexed_doc_count().await)
}
#[utoipa::path(
post,
tag = "PDF File",
path = "/api/v1/index/{index_id}/file",
params(
("apikey" = String, Header, description = "YOUR_SECRET_API_KEY",example="AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA="),
("file" = String, Header, description = "filepath from header for JSON 'url' field"),
("date" = String, Header, description = "date (timestamp) from header, as fallback for JSON 'date' field, if PDF date meta tag unaivailable"),
("index_id" = u64, Path, description = "index id"),
),
request_body = inline(&[u8]),
responses(
(status = 200, description = "PDF file indexed, returns the number of indexed documents", body = usize),
(status = BAD_REQUEST, description = "Document object invalid"),
(status = NOT_FOUND, description = "Index id does not exist"),
(status = NOT_FOUND, description = "API key does not exist"),
(status = UNAUTHORIZED, description = "api_key does not exists"),
(status = UNAUTHORIZED, description = "api_key missing")
)
)]
pub(crate) async fn index_file_api(
index_arc: &IndexArc,
file_path: &Path,
file_date: i64,
document: &[u8],
) -> Result<usize, String> {
match index_arc
.index_pdf_bytes(file_path, file_date, document)
.await
{
Ok(_) => Ok(index_arc.read().await.indexed_doc_count().await),
Err(e) => Err(e),
}
}
#[utoipa::path(
get,
tag = "PDF File",
path = "/api/v1/index/{index_id}/file/{document_id}",
params(
("apikey" = String, Header, description = "YOUR_SECRET_API_KEY",example="AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA="),
("index_id" = u64, Path, description = "index id"),
("document_id" = u64, Path, description = "document id"),
),
responses(
(status = 200, description = "PDF file found, returns the PDF file as byte array", body = [u8]),
(status = BAD_REQUEST, description = "index_id invalid or missing"),
(status = BAD_REQUEST, description = "doc_id invalid or missing"),
(status = BAD_REQUEST, description = "Request object incorrect"),
(status = NOT_FOUND, description = "Index id does not exist"),
(status = NOT_FOUND, description = "Document id does not exist"),
(status = NOT_FOUND, description = "api_key does not exists"),
(status = UNAUTHORIZED, description = "api_key does not exists"),
(status = UNAUTHORIZED, description = "api_key missing"),
)
)]
pub(crate) async fn get_file_api(index_arc: &IndexArc, document_id: usize) -> Option<Vec<u8>> {
if !index_arc.read().await.stored_field_names.is_empty() {
index_arc.read().await.get_file(document_id).await.ok()
} else {
None
}
}
pub(crate) async fn index_documents_api(
index_arc: &IndexArc,
document_vec: Vec<Document>,
) -> Result<usize, String> {
index_arc.index_documents(document_vec).await;
Ok(index_arc.read().await.indexed_doc_count().await)
}
#[utoipa::path(
get,
tag = "Document",
path = "/api/v1/index/{index_id}/doc/{document_id}",
params(
("apikey" = String, Header, description = "YOUR_SECRET_API_KEY",example="AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA="),
("index_id" = u64, Path, description = "index id"),
("document_id" = u64, Path, description = "document id"),
),
request_body(content = GetDocumentRequest, example=json!({
"query_terms": ["test"],
"fields": ["title", "body"],
"highlights": [
{ "field": "title", "fragment_number": 0, "fragment_size": 1000, "highlight_markup": true},
{ "field": "body", "fragment_number": 2, "fragment_size": 160, "highlight_markup": true},
{ "field": "body", "name": "body2", "fragment_number": 0, "fragment_size": 4000, "highlight_markup": true}]
})),
responses(
(status = 200, description = "Document found, returns the JSON document consisting of arbitrary key-value pairs", body = HashMap<String, Value>),
(status = BAD_REQUEST, description = "index_id invalid or missing"),
(status = BAD_REQUEST, description = "doc_id invalid or missing"),
(status = BAD_REQUEST, description = "Request object incorrect"),
(status = NOT_FOUND, description = "Index id does not exist"),
(status = NOT_FOUND, description = "Document id does not exist"),
(status = NOT_FOUND, description = "api_key does not exists"),
(status = UNAUTHORIZED, description = "api_key does not exists"),
(status = UNAUTHORIZED, description = "api_key missing"),
)
)]
pub(crate) async fn get_document_api(
index_arc: &IndexArc,
document_id: usize,
get_document_request: GetDocumentRequest,
) -> Option<Document> {
if !index_arc.read().await.stored_field_names.is_empty() {
let highlighter_option = if get_document_request.highlights.is_empty()
|| get_document_request.query_terms.is_empty()
{
None
} else {
Some(
highlighter(
index_arc,
get_document_request.highlights,
get_document_request.query_terms,
)
.await,
)
};
index_arc
.read()
.await
.get_document(
document_id,
true,
&highlighter_option,
&HashSet::from_iter(get_document_request.fields),
&get_document_request.distance_fields,
)
.await
.ok()
} else {
None
}
}
#[utoipa::path(
patch,
tag = "Document",
path = "/api/v1/index/{index_id}/doc",
params(
("apikey" = String, Header, description = "YOUR_SECRET_API_KEY",example="AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA="),
("index_id" = u64, Path, description = "index id"),
),
request_body(content = (u64, HashMap<String, Value>), description = "Tuple of (doc_id, JSON document) or array of tuples (doc_id, JSON documents), each JSON document consisting of arbitrary key-value pairs", content_type = "application/json", example=json!([0,{"title":"title1 test","body":"body1","url":"url1"}])),
responses(
(status = 200, description = "Document indexed, returns the number of indexed documents", body = usize),
(status = BAD_REQUEST, description = "Document object invalid"),
(status = NOT_FOUND, description = "Index id does not exist"),
(status = NOT_FOUND, description = "API key does not exist"),
(status = UNAUTHORIZED, description = "api_key does not exists"),
(status = UNAUTHORIZED, description = "api_key missing")
)
)]
pub(crate) async fn update_document_api(
index_arc: &IndexArc,
id_document: (u64, Document),
) -> Result<u64, String> {
index_arc.update_document(id_document).await;
Ok(index_arc.read().await.indexed_doc_count().await as u64)
}
pub(crate) async fn update_documents_api(
index_arc: &IndexArc,
id_document_vec: Vec<(u64, Document)>,
) -> Result<u64, String> {
index_arc.update_documents(id_document_vec).await;
Ok(index_arc.read().await.indexed_doc_count().await as u64)
}
#[utoipa::path(
delete,
tag = "Document",
path = "/api/v1/index/{index_id}/doc/{document_id}",
params(
("apikey" = String, Header, description = "YOUR_SECRET_API_KEY",example="AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA="),
("index_id" = u64, Path, description = "index id"),
("document_id" = u64, Path, description = "document id"),
),
responses(
(status = 200, description = "Document deleted, returns indexed documents count", body = usize),
(status = BAD_REQUEST, description = "index_id invalid or missing"),
(status = BAD_REQUEST, description = "doc_id invalid or missing"),
(status = BAD_REQUEST, description = "Request object incorrect"),
(status = NOT_FOUND, description = "Index id does not exist"),
(status = NOT_FOUND, description = "Document id does not exist"),
(status = NOT_FOUND, description = "api_key does not exists"),
(status = UNAUTHORIZED, description = "api_key does not exists"),
(status = UNAUTHORIZED, description = "api_key missing"),
)
)]
pub(crate) async fn delete_document_by_parameter_api(
index_arc: &IndexArc,
document_id: u64,
) -> Result<u64, String> {
index_arc.delete_document(document_id).await;
Ok(index_arc.read().await.indexed_doc_count().await as u64)
}
#[utoipa::path(
delete,
tag = "Document",
path = "/api/v1/index/{index_id}/doc",
params(
("apikey" = String, Header, description = "YOUR_SECRET_API_KEY",example="AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA="),
("index_id" = u64, Path, description = "index id"),
),
request_body(content = SearchRequestObject, description = "Specifies the document(s) to delete by different request objects\n- 'clear' : delete all documents in index (clear index)\n- u64 : delete single doc ID\n- [u64] : delete array of doc ID \n- SearchRequestObject : delete documents by query", content_type = "application/json", example=json!({
"query":"test",
"offset":0,
"length":10,
"realtime": false,
"field_filter": ["title", "body"]
})),
responses(
(status = 200, description = "Document deleted, returns indexed documents count", body = usize),
(status = BAD_REQUEST, description = "index_id invalid or missing"),
(status = BAD_REQUEST, description = "doc_id invalid or missing"),
(status = BAD_REQUEST, description = "Request object incorrect"),
(status = NOT_FOUND, description = "Index id does not exist"),
(status = NOT_FOUND, description = "Document id does not exist"),
(status = NOT_FOUND, description = "api_key does not exists"),
(status = UNAUTHORIZED, description = "api_key does not exists"),
(status = UNAUTHORIZED, description = "api_key missing"),
)
)]
pub(crate) async fn delete_document_by_object_api(
index_arc: &IndexArc,
document_id: u64,
) -> Result<u64, String> {
index_arc.delete_document(document_id).await;
Ok(index_arc.read().await.indexed_doc_count().await as u64)
}
pub(crate) async fn delete_documents_by_object_api(
index_arc: &IndexArc,
document_id_vec: Vec<u64>,
) -> Result<u64, String> {
index_arc.delete_documents(document_id_vec).await;
Ok(index_arc.read().await.indexed_doc_count().await as u64)
}
pub(crate) async fn delete_documents_by_query_api(
index_arc: &IndexArc,
search_request: SearchRequestObject,
) -> Result<u64, String> {
index_arc
.delete_documents_by_query(
search_request.query_string.to_owned(),
search_request.query_type_default,
search_request.offset,
search_request.length,
search_request.realtime,
search_request.field_filter,
search_request.facet_filter,
search_request.result_sort,
)
.await;
Ok(index_arc.read().await.indexed_doc_count().await as u64)
}
#[utoipa::path(
get,
tag = "Iterator",
path = "/api/v1/index/{index_id}/doc_id",
params(
("apikey" = String, Header, description = "YOUR_SECRET_API_KEY",example="AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA="),
("index_id" = u64, Path, description = "index id"),
),
params(
("apikey" = String, Header, description = "YOUR_SECRET_API_KEY",example="AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA="),
("index_id" = u64, Path, description = "index id", example=0),
("document_id" = u64, Query, description = "document id"),
("skip" = u64, Query, description = "skip document IDs", minimum = 0, example=0),
("take" = u64, Query, description = "take document IDs", example=-1),
("include_deleted" = bool, Query, description = "include deleted document IDs in results", example=false),
("include_document" = bool, Query, description = "include documents in results", example=false),
("fields" = Vec<String>, Query, description = "fields to include in document. If not specified, all fields are included", example=json!(["title","body"]) ),
),
responses(
(status = 200, description = "Document ID found, returning an IteratorResult", body = IteratorResult),
(status = BAD_REQUEST, description = "index_id invalid or missing"),
(status = BAD_REQUEST, description = "Request object incorrect"),
(status = NOT_FOUND, description = "Index id does not exist"),
(status = NOT_FOUND, description = "api_key does not exists"),
(status = UNAUTHORIZED, description = "api_key does not exists"),
(status = UNAUTHORIZED, description = "api_key missing"),
)
)]
pub(crate) async fn get_iterator_api_get(
index_arc: &IndexArc,
document_id: Option<u64>,
skip: usize,
take: isize,
include_deleted: bool,
include_document: bool,
fields: Vec<String>,
) -> IteratorResult {
index_arc
.get_iterator(
document_id,
skip,
take,
include_deleted,
include_document,
fields,
)
.await
}
#[utoipa::path(
post,
tag = "Iterator",
path = "/api/v1/index/{index_id}/doc_id",
params(
("apikey" = String, Header, description = "YOUR_SECRET_API_KEY",example="AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA="),
("index_id" = u64, Path, description = "index id"),
),
request_body(content = GetIteratorRequest, example=json!({
"document_id": null,
"skip": 0,
"take": -1,
})),
responses(
(status = 200, description = "Document ID found, returning an IteratorResult", body = IteratorResult),
(status = BAD_REQUEST, description = "index_id invalid or missing"),
(status = BAD_REQUEST, description = "Request object incorrect"),
(status = NOT_FOUND, description = "Index id does not exist"),
(status = NOT_FOUND, description = "api_key does not exists"),
(status = UNAUTHORIZED, description = "api_key does not exists"),
(status = UNAUTHORIZED, description = "api_key missing"),
)
)]
pub(crate) async fn get_iterator_api_post(
index_arc: &IndexArc,
document_id: Option<u64>,
skip: usize,
take: isize,
include_deleted: bool,
include_document: bool,
fields: Vec<String>,
) -> IteratorResult {
index_arc
.get_iterator(
document_id,
skip,
take,
include_deleted,
include_document,
fields,
)
.await
}
pub(crate) async fn clear_index_api(index_arc: &IndexArc) -> Result<u64, String> {
let mut index_mut = index_arc.write().await;
index_mut.clear_index().await;
Ok(index_mut.indexed_doc_count().await as u64)
}
#[utoipa::path(
post,
tag = "Query",
path = "/api/v1/index/{index_id}/query",
params(
("apikey" = String, Header, description = "YOUR_SECRET_API_KEY",example="AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA="),
("index_id" = u64, Path, description = "index id"),
),
request_body = inline(SearchRequestObject),
responses(
(status = 200, description = "Results found, returns the SearchResultObject", body = SearchResultObject),
(status = BAD_REQUEST, description = "Request object incorrect"),
(status = NOT_FOUND, description = "Index id does not exist"),
(status = NOT_FOUND, description = "API key does not exist"),
(status = UNAUTHORIZED, description = "api_key does not exists"),
(status = UNAUTHORIZED, description = "api_key missing"),
)
)]
pub(crate) async fn query_index_api_post(
index_arc: &IndexArc,
search_request: SearchRequestObject,
) -> SearchResultObject {
query_index_api(index_arc, search_request).await
}
#[utoipa::path(
get,
tag = "Query",
path = "/api/v1/index/{index_id}/query",
params(
("apikey" = String, Header, description = "YOUR_SECRET_API_KEY",example="AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA="),
("index_id" = u64, Path, description = "index id", example=0),
("query" = String, Query, description = "query string", example="hello"),
("offset" = u64, Query, description = "result offset", minimum = 0, example=0),
("length" = u64, Query, description = "result length", minimum = 1, example=10),
("realtime" = bool, Query, description = "include uncommitted documents", example=false),
("enable_empty_query" = bool, Query, description = "allow empty query", example=false)
),
responses(
(status = 200, description = "Results found, returns the SearchResultObject", body = SearchResultObject),
(status = BAD_REQUEST, description = "No query specified"),
(status = NOT_FOUND, description = "Index id does not exist"),
(status = NOT_FOUND, description = "API key does not exist"),
(status = UNAUTHORIZED, description = "api_key does not exists"),
(status = UNAUTHORIZED, description = "api_key missing"),
)
)]
pub(crate) async fn query_index_api_get(
index_arc: &IndexArc,
search_request: SearchRequestObject,
) -> SearchResultObject {
query_index_api(index_arc, search_request).await
}
use seekstorm::vector::{embedding_from_bytes_be, embedding_from_json};
pub(crate) async fn query_index_api(
index_arc: &IndexArc,
search_request: SearchRequestObject,
) -> SearchResultObject {
let start_time = Instant::now();
let query_vector = if let Some(value) = search_request.query_vector
&& search_request.search_mode != SearchMode::Lexical
{
match &value {
Value::String(string_base64) => {
if let Ok(bytes) = decode_bytes_from_base64_string(string_base64)
&& let Some(embedding) = embedding_from_bytes_be(
&bytes,
index_arc.read().await.vector_precision,
index_arc.read().await.vector_dimensions,
*IS_SYSTEM_LE,
)
{
Some(embedding)
} else {
None
}
}
Value::Array(_) => embedding_from_json(
&value,
index_arc.read().await.vector_precision,
index_arc.read().await.vector_dimensions,
),
_ => None,
}
} else {
None
};
let result_object = index_arc
.search(
search_request.query_string.to_owned(),
query_vector,
search_request.query_type_default,
search_request.search_mode,
search_request.enable_empty_query,
search_request.offset,
search_request.length,
search_request.result_type,
search_request.realtime,
search_request.field_filter,
search_request.query_facets,
search_request.facet_filter,
search_request.result_sort,
search_request.query_rewriting,
)
.await;
let elapsed_time = start_time.elapsed().as_nanos();
let return_fields_filter = HashSet::from_iter(search_request.fields);
let mut results: Vec<Document> = Vec::new();
if !index_arc.read().await.stored_field_names.is_empty() {
let highlighter_option = if search_request.highlights.is_empty() {
None
} else {
Some(
highlighter(
index_arc,
search_request.highlights,
result_object.query_terms.clone(),
)
.await,
)
};
for result in result_object.results.iter() {
match index_arc
.read()
.await
.get_document(
result.doc_id,
search_request.realtime,
&highlighter_option,
&return_fields_filter,
&search_request.distance_fields,
)
.await
{
Ok(doc) => {
let mut doc = doc;
doc.insert("_id".to_string(), result.doc_id.into());
doc.insert("_score".to_string(), result.score.into());
results.push(doc);
}
Err(_e) => {}
}
}
}
SearchResultObject {
original_query: result_object.original_query.to_owned(),
query: result_object.query.to_owned(),
time: elapsed_time,
offset: search_request.offset,
length: search_request.length,
count: result_object.results.len(),
count_total: result_object.result_count_total,
query_terms: result_object.query_terms,
results,
facets: result_object.facets,
suggestions: result_object.suggestions,
}
}
#[derive(OpenApi, Default)]
#[openapi(paths(
live_api,
create_apikey_api,
get_apikey_indices_info_api,
delete_apikey_api,
create_index_api,
get_index_info_api,
commit_index_api,
delete_index_api,
get_iterator_api_post,
get_iterator_api_get,
index_document_api,
update_document_api,
index_file_api,
get_document_api,
get_file_api,
delete_document_by_parameter_api,
delete_document_by_object_api,
query_index_api_post,
query_index_api_get,
),
tags(
(name="Info", description="Return info about the server"),
(name="API Key", description="Create and delete API keys"),
(name="Index", description="Create and delete indices"),
(name="Iterator", description="Iterate through document IDs and documents"),
(name="Document", description="Index, update, get and delete documents"),
(name="PDF File", description="Index, and get PDF file"),
(name="Query", description="Query an index"),
)
)]
#[openapi(info(title = "SeekStorm REST API documentation"))]
#[openapi(servers((url = "http://127.0.0.1", description = "Local SeekStorm server")))]
struct ApiDoc;
pub fn generate_openapi() {
let openapi = ApiDoc::openapi();
println!("{}", openapi.to_pretty_json().unwrap());
let mut path = current_exe().unwrap();
path.pop();
let path_json = path.join("openapi.json");
let path_yml = path.join("openapi.yml");
serde_json::to_writer_pretty(&File::create(path_json.clone()).unwrap(), &openapi).unwrap();
fs::write(path_yml.clone(), openapi.to_yaml().unwrap()).unwrap();
println!(
"OpenAPI documents generated: {} {}",
path_json.to_string_lossy(),
path_yml.to_string_lossy()
);
}