1use std::collections::HashMap;
11
12use serde::{Deserialize, Serialize};
13
14use crate::schema::Field;
15use crate::SegmentComponent;
16
17pub type ByteCount = usize;
19
20pub enum ComponentSpaceUsage {
22 PerField(PerFieldSpaceUsage),
24 Store(StoreSpaceUsage),
26 Basic(ByteCount),
28}
29
30#[derive(Clone, Debug, Serialize, Deserialize)]
32pub struct SearcherSpaceUsage {
33 segments: Vec<SegmentSpaceUsage>,
34 total: ByteCount,
35}
36
37impl SearcherSpaceUsage {
38 pub(crate) fn new() -> SearcherSpaceUsage {
39 SearcherSpaceUsage {
40 segments: Vec::new(),
41 total: 0,
42 }
43 }
44
45 pub(crate) fn add_segment(&mut self, segment: SegmentSpaceUsage) {
48 self.total += segment.total();
49 self.segments.push(segment);
50 }
51
52 pub fn segments(&self) -> &[SegmentSpaceUsage] {
54 &self.segments[..]
55 }
56
57 pub fn total(&self) -> ByteCount {
60 self.total
61 }
62}
63
64#[derive(Clone, Debug, Serialize, Deserialize)]
66pub struct SegmentSpaceUsage {
67 num_docs: u32,
68
69 termdict: PerFieldSpaceUsage,
70 postings: PerFieldSpaceUsage,
71 positions: PerFieldSpaceUsage,
72 fast_fields: PerFieldSpaceUsage,
73 fieldnorms: PerFieldSpaceUsage,
74
75 store: StoreSpaceUsage,
76
77 deletes: ByteCount,
78
79 total: ByteCount,
80}
81
82impl SegmentSpaceUsage {
83 #[allow(clippy::too_many_arguments)]
84 pub(crate) fn new(
85 num_docs: u32,
86 termdict: PerFieldSpaceUsage,
87 postings: PerFieldSpaceUsage,
88 positions: PerFieldSpaceUsage,
89 fast_fields: PerFieldSpaceUsage,
90 fieldnorms: PerFieldSpaceUsage,
91 store: StoreSpaceUsage,
92 deletes: ByteCount,
93 ) -> SegmentSpaceUsage {
94 let total = termdict.total()
95 + postings.total()
96 + positions.total()
97 + fast_fields.total()
98 + fieldnorms.total()
99 + store.total()
100 + deletes;
101 SegmentSpaceUsage {
102 num_docs,
103 termdict,
104 postings,
105 positions,
106 fast_fields,
107 fieldnorms,
108 store,
109 deletes,
110 total,
111 }
112 }
113
114 pub fn component(&self, component: SegmentComponent) -> ComponentSpaceUsage {
119 use self::ComponentSpaceUsage::*;
120 use crate::SegmentComponent::*;
121 match component {
122 Postings => PerField(self.postings().clone()),
123 Positions => PerField(self.positions().clone()),
124 FastFields => PerField(self.fast_fields().clone()),
125 FieldNorms => PerField(self.fieldnorms().clone()),
126 Terms => PerField(self.termdict().clone()),
127 SegmentComponent::Store => ComponentSpaceUsage::Store(self.store().clone()),
128 SegmentComponent::TempStore => ComponentSpaceUsage::Store(self.store().clone()),
129 Delete => Basic(self.deletes()),
130 }
131 }
132
133 pub fn num_docs(&self) -> u32 {
135 self.num_docs
136 }
137
138 pub fn termdict(&self) -> &PerFieldSpaceUsage {
140 &self.termdict
141 }
142
143 pub fn postings(&self) -> &PerFieldSpaceUsage {
145 &self.postings
146 }
147
148 pub fn positions(&self) -> &PerFieldSpaceUsage {
150 &self.positions
151 }
152
153 pub fn fast_fields(&self) -> &PerFieldSpaceUsage {
155 &self.fast_fields
156 }
157
158 pub fn fieldnorms(&self) -> &PerFieldSpaceUsage {
160 &self.fieldnorms
161 }
162
163 pub fn store(&self) -> &StoreSpaceUsage {
165 &self.store
166 }
167
168 pub fn deletes(&self) -> ByteCount {
170 self.deletes
171 }
172
173 pub fn total(&self) -> ByteCount {
175 self.total
176 }
177}
178
179#[derive(Clone, Debug, Serialize, Deserialize)]
185pub struct StoreSpaceUsage {
186 data: ByteCount,
187 offsets: ByteCount,
188}
189
190impl StoreSpaceUsage {
191 pub(crate) fn new(data: ByteCount, offsets: ByteCount) -> StoreSpaceUsage {
192 StoreSpaceUsage { data, offsets }
193 }
194
195 pub fn data_usage(&self) -> ByteCount {
197 self.data
198 }
199
200 pub fn offsets_usage(&self) -> ByteCount {
202 self.offsets
203 }
204
205 pub fn total(&self) -> ByteCount {
207 self.data + self.offsets
208 }
209}
210
211#[derive(Clone, Debug, Serialize, Deserialize)]
216pub struct PerFieldSpaceUsage {
217 fields: HashMap<Field, FieldUsage>,
218 total: ByteCount,
219}
220
221impl PerFieldSpaceUsage {
222 pub(crate) fn new(fields: HashMap<Field, FieldUsage>) -> PerFieldSpaceUsage {
223 let total = fields.values().map(FieldUsage::total).sum();
224 PerFieldSpaceUsage { fields, total }
225 }
226
227 pub fn fields(&self) -> impl Iterator<Item = (&Field, &FieldUsage)> {
229 self.fields.iter()
230 }
231
232 pub fn total(&self) -> ByteCount {
234 self.total
235 }
236}
237
238#[derive(Clone, Debug, Serialize, Deserialize)]
243pub struct FieldUsage {
244 field: Field,
245 num_bytes: ByteCount,
246 sub_num_bytes: Vec<Option<ByteCount>>,
250}
251
252impl FieldUsage {
253 pub(crate) fn empty(field: Field) -> FieldUsage {
254 FieldUsage {
255 field,
256 num_bytes: 0,
257 sub_num_bytes: Vec::new(),
258 }
259 }
260
261 pub(crate) fn add_field_idx(&mut self, idx: usize, size: ByteCount) {
262 if self.sub_num_bytes.len() < idx + 1 {
263 self.sub_num_bytes.resize(idx + 1, None);
264 }
265 assert!(self.sub_num_bytes[idx].is_none());
266 self.sub_num_bytes[idx] = Some(size);
267 self.num_bytes += size
268 }
269
270 pub fn field(&self) -> Field {
272 self.field
273 }
274
275 pub fn sub_num_bytes(&self) -> &[Option<ByteCount>] {
277 &self.sub_num_bytes[..]
278 }
279
280 pub fn total(&self) -> ByteCount {
282 self.num_bytes
283 }
284}
285
286#[cfg(test)]
287mod test {
288 use crate::core::Index;
289 use crate::schema::{Field, Schema, FAST, INDEXED, STORED, TEXT};
290 use crate::space_usage::{ByteCount, PerFieldSpaceUsage};
291 use crate::Term;
292
293 #[test]
294 fn test_empty() {
295 let schema = Schema::builder().build();
296 let index = Index::create_in_ram(schema);
297 let reader = index.reader().unwrap();
298 let searcher = reader.searcher();
299 let searcher_space_usage = searcher.space_usage().unwrap();
300 assert_eq!(0, searcher_space_usage.total());
301 }
302
303 fn expect_single_field(
304 field_space: &PerFieldSpaceUsage,
305 field: &Field,
306 min_size: ByteCount,
307 max_size: ByteCount,
308 ) {
309 assert!(field_space.total() >= min_size);
310 assert!(field_space.total() <= max_size);
311 assert_eq!(
312 vec![(field, field_space.total())],
313 field_space
314 .fields()
315 .map(|(x, y)| (x, y.total()))
316 .collect::<Vec<_>>()
317 );
318 }
319
320 #[test]
321 fn test_fast_indexed() -> crate::Result<()> {
322 let mut schema_builder = Schema::builder();
323 let name = schema_builder.add_u64_field("name", FAST | INDEXED);
324 let schema = schema_builder.build();
325 let index = Index::create_in_ram(schema);
326
327 {
328 let mut index_writer = index.writer_for_tests()?;
329 index_writer.add_document(doc!(name => 1u64))?;
330 index_writer.add_document(doc!(name => 2u64))?;
331 index_writer.add_document(doc!(name => 10u64))?;
332 index_writer.add_document(doc!(name => 20u64))?;
333 index_writer.commit()?;
334 }
335
336 let reader = index.reader()?;
337 let searcher = reader.searcher();
338 let searcher_space_usage = searcher.space_usage()?;
339 assert!(searcher_space_usage.total() > 0);
340 assert_eq!(1, searcher_space_usage.segments().len());
341
342 let segment = &searcher_space_usage.segments()[0];
343 assert!(segment.total() > 0);
344
345 assert_eq!(4, segment.num_docs());
346
347 expect_single_field(segment.termdict(), &name, 1, 512);
348 expect_single_field(segment.postings(), &name, 1, 512);
349 assert_eq!(0, segment.positions().total());
350 expect_single_field(segment.fast_fields(), &name, 1, 512);
351 expect_single_field(segment.fieldnorms(), &name, 1, 512);
352 assert_eq!(0, segment.deletes());
355 Ok(())
356 }
357
358 #[test]
359 fn test_text() -> crate::Result<()> {
360 let mut schema_builder = Schema::builder();
361 let name = schema_builder.add_text_field("name", TEXT);
362 let schema = schema_builder.build();
363 let index = Index::create_in_ram(schema);
364
365 {
366 let mut index_writer = index.writer_for_tests()?;
367 index_writer.add_document(doc!(name => "hi"))?;
368 index_writer.add_document(doc!(name => "this is a test"))?;
369 index_writer.add_document(
370 doc!(name => "some more documents with some word overlap with the other test"),
371 )?;
372 index_writer.add_document(doc!(name => "hello hi goodbye"))?;
373 index_writer.commit()?;
374 }
375
376 let reader = index.reader()?;
377 let searcher = reader.searcher();
378 let searcher_space_usage = searcher.space_usage()?;
379 assert!(searcher_space_usage.total() > 0);
380 assert_eq!(1, searcher_space_usage.segments().len());
381
382 let segment = &searcher_space_usage.segments()[0];
383 assert!(segment.total() > 0);
384
385 assert_eq!(4, segment.num_docs());
386
387 expect_single_field(segment.termdict(), &name, 1, 512);
388 expect_single_field(segment.postings(), &name, 1, 512);
389 expect_single_field(segment.positions(), &name, 1, 512);
390 assert_eq!(0, segment.fast_fields().total());
391 expect_single_field(segment.fieldnorms(), &name, 1, 512);
392 assert_eq!(0, segment.deletes());
395 Ok(())
396 }
397
398 #[test]
399 fn test_store() -> crate::Result<()> {
400 let mut schema_builder = Schema::builder();
401 let name = schema_builder.add_text_field("name", STORED);
402 let schema = schema_builder.build();
403 let index = Index::create_in_ram(schema);
404
405 {
406 let mut index_writer = index.writer_for_tests()?;
407 index_writer.add_document(doc!(name => "hi"))?;
408 index_writer.add_document(doc!(name => "this is a test"))?;
409 index_writer.add_document(
410 doc!(name => "some more documents with some word overlap with the other test"),
411 )?;
412 index_writer.add_document(doc!(name => "hello hi goodbye"))?;
413 index_writer.commit()?;
414 }
415 let reader = index.reader()?;
416 let searcher = reader.searcher();
417 let searcher_space_usage = searcher.space_usage()?;
418 assert!(searcher_space_usage.total() > 0);
419 assert_eq!(1, searcher_space_usage.segments().len());
420
421 let segment = &searcher_space_usage.segments()[0];
422 assert!(segment.total() > 0);
423
424 assert_eq!(4, segment.num_docs());
425
426 assert_eq!(0, segment.termdict().total());
427 assert_eq!(0, segment.postings().total());
428 assert_eq!(0, segment.positions().total());
429 assert_eq!(0, segment.fast_fields().total());
430 assert_eq!(0, segment.fieldnorms().total());
431 assert!(segment.store().total() > 0);
432 assert!(segment.store().total() < 512);
433 assert_eq!(0, segment.deletes());
434 Ok(())
435 }
436
437 #[test]
438 fn test_deletes() -> crate::Result<()> {
439 let mut schema_builder = Schema::builder();
440 let name = schema_builder.add_u64_field("name", INDEXED);
441 let schema = schema_builder.build();
442 let index = Index::create_in_ram(schema);
443
444 {
445 let mut index_writer = index.writer_for_tests()?;
446 index_writer.add_document(doc!(name => 1u64))?;
447 index_writer.add_document(doc!(name => 2u64))?;
448 index_writer.add_document(doc!(name => 3u64))?;
449 index_writer.add_document(doc!(name => 4u64))?;
450 index_writer.commit()?;
451 }
452
453 {
454 let mut index_writer2 = index.writer(50_000_000)?;
455 index_writer2.delete_term(Term::from_field_u64(name, 2u64));
456 index_writer2.delete_term(Term::from_field_u64(name, 3u64));
457 index_writer2.commit()?;
459 }
460
461 let reader = index.reader()?;
462 let searcher = reader.searcher();
463 let searcher_space_usage = searcher.space_usage()?;
464 assert!(searcher_space_usage.total() > 0);
465 assert_eq!(1, searcher_space_usage.segments().len());
466
467 let segment_space_usage = &searcher_space_usage.segments()[0];
468 assert!(segment_space_usage.total() > 0);
469
470 assert_eq!(2, segment_space_usage.num_docs());
471
472 expect_single_field(segment_space_usage.termdict(), &name, 1, 512);
473 expect_single_field(segment_space_usage.postings(), &name, 1, 512);
474 assert_eq!(0, segment_space_usage.positions().total());
475 assert_eq!(0, segment_space_usage.fast_fields().total());
476 expect_single_field(segment_space_usage.fieldnorms(), &name, 1, 512);
477 assert!(segment_space_usage.deletes() > 0);
478 Ok(())
479 }
480}