Crate seekstorm

source ·
Expand description

§seekstorm

SeekStorm is an open-source, sub-millisecond full-text search library & multi-tenancy server written in Rust. The SeekStorm library can be embedded into your program, while the SeekStorm server is a standalone search server to be accessed via HTTP.

§Add required crates to your project

cargo add seekstorm
cargo add tokio
cargo add serde_json
use std::{collections::HashSet, error::Error, path::Path, sync::Arc};
use seekstorm::{index::*,search::*,highlighter::*};
use tokio::sync::RwLock;

§use an asynchronous Rust runtime

#[tokio::main]
async fn main() -> Result<(), Box<dyn Error + Send + Sync>> {

§create index

let index_path=Path::new("C:/index/");
let schema_json = r#"
[{"field_name":"title","field_type":"Text","field_stored":false,"field_indexed":false},
{"field_name":"body","field_type":"Text","field_stored":true,"field_indexed":true},
{"field_name":"url","field_type":"Text","field_stored":false,"field_indexed":false}]"#;
let schema=serde_json::from_str(schema_json).unwrap();
let meta = IndexMetaObject {
id: 0,
name: "test_index".to_string(),
similarity:SimilarityType::Bm25f,
tokenizer:TokenizerType::AsciiAlphabetic,
access_type: AccessType::Mmap,
};
let segment_number_bits1=11;
let serialize_schema=true;
let index=create_index(index_path,meta,&schema,serialize_schema,segment_number_bits1).unwrap();
let _index_arc = Arc::new(RwLock::new(index));

§open index (alternatively to create index)

let index_path=Path::new("C:/index/");
let index_arc=open_index(index_path).await.unwrap();

§index documents

let documents_json = r#"
[{"title":"title1 test","body":"body1","url":"url1"},
{"title":"title2","body":"body2 test","url":"url2"},
{"title":"title3 test","body":"body3 test","url":"url3"}]"#;
let documents_vec=serde_json::from_str(documents_json).unwrap();
index_arc.index_documents(documents_vec).await;

§search index

let query="test".to_string();
let offset=10;
let length=10;
let query_type=QueryType::Intersection;
let result_type=ResultType::TopkCount;
let include_uncommitted=false;
let field_filter=Vec::new();
let result_list = index_arc.search(query, query_type, offset, length, result_type,include_uncommitted,field_filter).await;

§display results

let highlights:Vec<Highlight>= vec![
Highlight {
    field: "body".to_string(),
    name:String::new(),
    fragment_number: 2,
    fragment_size: 160,
    highlight_markup: true,
},
];    
let highlighter=Some(highlighter(highlights, result_list.query_term_strings));
let fields_hashset= HashSet::new();
let mut index=index_arc.write().await;
for result in result_list.results.iter() {
  let doc=index.get_document(result.doc_id,false,&highlighter,&fields_hashset).unwrap();
  println!("result {} rank {} body field {:?}" , result.doc_id,result.score, doc.get("body"));
}

§clear index

index.clear_index();

§delete index

index.delete_index();

§close index

index.close_index();

§seekstorm library version string

let version=version();
println!("version {}",version);

§end of main function

   Ok(())
}

Modules§

  • Extracts the most relevant fragments (snippets, summaries) from specified fields of the document to provide a “keyword in context” (KWIC) functionality. With highlight_markup the matching query terms within the fragments can be highlighted with HTML markup.
  • Operate the index: reate_index, open_index, clear_index, close_index, delete_index, index_document(s)
  • Search the index for all indexed documents, both for committed and uncommitted documents. The latter enables true realtime search: documents are available for search in exact the same millisecond they are indexed.