Skip to main content

hermes_core/
lib.rs

1//! Hermes - A minimal async search engine library
2//!
3//! Features:
4//! - Fully async IO with Directory abstraction for network/local/memory storage
5//! - SSTable-based term dictionary with hot cache and lazy loading
6//! - Bitpacked posting lists with block-level skip info
7//! - Document store with Zstd compression
8//! - Multiple segments with merge support
9//! - Text and numeric field support
10//! - Term, boolean, and boost queries
11//! - BlockWAND / MaxScore query optimizations
12
13pub mod compression;
14pub mod directories;
15pub mod dsl;
16pub mod error;
17pub mod index;
18pub mod merge;
19pub mod query;
20pub mod segment;
21pub mod structures;
22pub mod tokenizer;
23
24// Re-exports from dsl
25pub use dsl::{
26    Document, Field, FieldDef, FieldEntry, FieldType, FieldValue, IndexDef, QueryLanguageParser,
27    Schema, SchemaBuilder, SdlParser, parse_sdl, parse_single_index,
28};
29
30// Re-exports from structures
31pub use structures::{
32    AsyncSSTableReader, BlockPostingList, HorizontalBP128Iterator, HorizontalBP128PostingList,
33    PostingList, PostingListIterator, SSTableValue, TERMINATED, TermInfo,
34};
35
36// Re-exports from directories
37#[cfg(feature = "native")]
38pub use directories::FsDirectory;
39#[cfg(feature = "http")]
40pub use directories::HttpDirectory;
41#[cfg(feature = "native")]
42pub use directories::MmapDirectory;
43pub use directories::{
44    AsyncFileRead, CachingDirectory, Directory, DirectoryWriter, FileSlice, LazyFileHandle,
45    LazyFileSlice, OwnedBytes, RamDirectory, SliceCacheStats, SliceCachingDirectory,
46};
47
48/// Default directory type for native builds - uses memory-mapped files for efficient access
49#[cfg(feature = "native")]
50pub type DefaultDirectory = MmapDirectory;
51
52// Re-exports from segment
53pub use segment::{
54    AsyncSegmentReader, AsyncStoreReader, FieldStats, SegmentId, SegmentMeta, SegmentReader,
55};
56#[cfg(feature = "native")]
57pub use segment::{SegmentBuilder, SegmentBuilderConfig, SegmentBuilderStats};
58
59// Re-exports from query
60pub use query::{
61    BlockMaxScoreExecutor, Bm25Params, BooleanQuery, BoostQuery, Query, ScoredDoc, Scorer,
62    SearchHit, SearchResponse, SearchResult, TermQuery, TopKCollector, WandExecutor, WandOrQuery,
63    search_segment,
64};
65
66// Re-exports from tokenizer
67pub use tokenizer::{
68    BoxedTokenizer, Language, LanguageAwareTokenizer, LowercaseTokenizer, MultiLanguageStemmer,
69    SimpleTokenizer, StemmerTokenizer, Token, Tokenizer, TokenizerRegistry, parse_language,
70};
71
72// Re-exports from other modules
73pub use directories::SLICE_CACHE_EXTENSION;
74pub use error::{Error, Result};
75pub use index::Searcher;
76#[cfg(feature = "native")]
77pub use index::{Index, IndexReader, IndexWriter};
78pub use index::{IndexConfig, IndexMetadata, SLICE_CACHE_FILENAME};
79#[cfg(feature = "native")]
80pub use index::{
81    IndexingStats, SchemaConfig, SchemaFieldConfig, create_index_at_path, create_index_from_sdl,
82    index_documents_from_reader, index_json_document, parse_schema,
83};
84
85// Re-exports from merge
86#[cfg(feature = "native")]
87pub use merge::SegmentManager;
88pub use merge::{MergeCandidate, MergePolicy, NoMergePolicy, SegmentInfo, TieredMergePolicy};
89
90pub type DocId = u32;
91pub type TermFreq = u32;
92pub type Score = f32;
93
94/// Default number of indexing threads (cpu / 2, minimum 1).
95/// Centralized so all configs share one definition.
96#[cfg(feature = "native")]
97pub fn default_indexing_threads() -> usize {
98    (num_cpus::get() / 2).max(1)
99}
100
101/// Default number of compression threads (cpu / 4, minimum 1).
102/// Centralized so all configs share one definition.
103#[cfg(feature = "native")]
104pub fn default_compression_threads() -> usize {
105    (num_cpus::get() / 4).max(1)
106}