Skip to main content

hermes_core/segment/builder/
config.rs

1//! Configuration and statistics types for segment builder
2
3use std::path::PathBuf;
4
5use crate::compression::CompressionLevel;
6
7/// Statistics about segment builder state
8#[derive(Debug, Clone, Default)]
9pub struct SegmentBuilderStats {
10    /// Number of documents indexed
11    pub num_docs: u32,
12    /// Number of unique terms in the inverted index
13    pub unique_terms: usize,
14    /// Total postings in memory (across all terms)
15    pub postings_in_memory: usize,
16    /// Number of interned strings
17    pub interned_strings: usize,
18    /// Size of doc_field_lengths vector
19    pub doc_field_lengths_size: usize,
20    /// Estimated total memory usage in bytes
21    pub estimated_memory_bytes: usize,
22    /// Memory breakdown by component
23    pub memory_breakdown: MemoryBreakdown,
24}
25
26/// Detailed memory breakdown by component
27#[derive(Debug, Clone, Default)]
28pub struct MemoryBreakdown {
29    /// Postings memory (CompactPosting structs)
30    pub postings_bytes: usize,
31    /// Inverted index HashMap overhead
32    pub index_overhead_bytes: usize,
33    /// Term interner memory
34    pub interner_bytes: usize,
35    /// Document field lengths
36    pub field_lengths_bytes: usize,
37    /// Dense vector storage
38    pub dense_vectors_bytes: usize,
39    /// Number of dense vectors
40    pub dense_vector_count: usize,
41    /// Sparse vector storage
42    pub sparse_vectors_bytes: usize,
43    /// Position index storage
44    pub position_index_bytes: usize,
45}
46
47/// Configuration for segment builder
48#[derive(Clone)]
49pub struct SegmentBuilderConfig {
50    /// Directory for temporary spill files
51    pub temp_dir: PathBuf,
52    /// Compression level for document store
53    pub compression_level: CompressionLevel,
54    /// Number of threads for parallel compression
55    pub num_compression_threads: usize,
56    /// Initial capacity for term interner
57    pub interner_capacity: usize,
58    /// Initial capacity for posting lists hashmap
59    pub posting_map_capacity: usize,
60}
61
62impl Default for SegmentBuilderConfig {
63    fn default() -> Self {
64        Self {
65            temp_dir: std::env::temp_dir(),
66            compression_level: CompressionLevel(7),
67            num_compression_threads: num_cpus::get(),
68            interner_capacity: 1_000_000,
69            posting_map_capacity: 500_000,
70        }
71    }
72}