use std::fmt;
use std::io;
use crate::index::segment_infos;
use crate::index::segment_reader::SegmentReader;
use crate::store::Directory;
#[derive(Debug)]
pub struct LeafReaderContext {
pub ord: usize,
pub doc_base: i32,
pub reader: SegmentReader,
}
pub struct DirectoryReader {
segments: Box<[LeafReaderContext]>,
max_doc: i32,
}
impl fmt::Debug for DirectoryReader {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("DirectoryReader")
.field("num_segments", &self.segments.len())
.field("max_doc", &self.max_doc)
.finish()
}
}
impl DirectoryReader {
pub fn open(directory: &dyn Directory) -> io::Result<Self> {
let files = directory.list_all()?;
let segments_file = segment_infos::get_last_commit_segments_file_name(&files)?;
let infos = segment_infos::read(directory, &segments_file)?;
let mut segments = Vec::with_capacity(infos.segments.len());
let mut doc_base = 0i32;
for (ord, seg) in infos.segments.iter().enumerate() {
let reader = SegmentReader::open(directory, &seg.name, &seg.id)?;
let max_doc = reader.max_doc();
segments.push(LeafReaderContext {
ord,
doc_base,
reader,
});
doc_base += max_doc;
}
Ok(Self {
max_doc: doc_base,
segments: segments.into_boxed_slice(),
})
}
pub fn max_doc(&self) -> i32 {
self.max_doc
}
pub fn num_docs(&self) -> i32 {
self.max_doc
}
pub fn leaves(&self) -> &[LeafReaderContext] {
&self.segments
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::document::{self, Document};
use crate::index::{IndexWriter, IndexWriterConfig};
use crate::store::MemoryDirectory;
use assertables::*;
fn write_index(num_docs: usize, compound: bool) -> Box<dyn Directory> {
let config = IndexWriterConfig::new().set_use_compound_file(compound);
let writer = IndexWriter::with_config(config);
for i in 0..num_docs {
let mut doc = Document::new();
doc.add(document::text_field("content", &format!("doc {i}")));
writer.add_document(doc).unwrap();
}
let result = writer.commit().unwrap();
let seg_files = result.into_segment_files().unwrap();
let mut mem_dir = MemoryDirectory::new();
for sf in &seg_files {
mem_dir.write_file(&sf.name, &sf.data).unwrap();
}
Box::new(mem_dir)
}
#[test]
fn test_open_single_segment() {
let dir = write_index(5, false);
let reader = DirectoryReader::open(dir.as_ref()).unwrap();
assert_eq!(reader.max_doc(), 5);
assert_eq!(reader.num_docs(), 5);
assert_eq!(reader.leaves().len(), 1);
let leaf = &reader.leaves()[0];
assert_eq!(leaf.ord, 0);
assert_eq!(leaf.doc_base, 0);
assert_eq!(leaf.reader.max_doc(), 5);
}
#[test]
fn test_open_compound() {
let dir = write_index(3, true);
let reader = DirectoryReader::open(dir.as_ref()).unwrap();
assert_eq!(reader.max_doc(), 3);
assert_eq!(reader.leaves().len(), 1);
}
#[test]
fn test_multi_segment() {
let config = IndexWriterConfig::new()
.set_use_compound_file(false)
.set_max_buffered_docs(2);
let writer = IndexWriter::with_config(config);
for i in 0..5 {
let mut doc = Document::new();
doc.add(document::text_field("content", &format!("doc {i}")));
writer.add_document(doc).unwrap();
}
let result = writer.commit().unwrap();
let seg_files = result.into_segment_files().unwrap();
let mut mem_dir = MemoryDirectory::new();
for sf in &seg_files {
mem_dir.write_file(&sf.name, &sf.data).unwrap();
}
let dir = Box::new(mem_dir) as Box<dyn Directory>;
let reader = DirectoryReader::open(dir.as_ref()).unwrap();
assert_eq!(reader.max_doc(), 5);
assert_gt!(reader.leaves().len(), 1);
let mut expected_base = 0;
for (i, leaf) in reader.leaves().iter().enumerate() {
assert_eq!(leaf.ord, i);
assert_eq!(leaf.doc_base, expected_base);
expected_base += leaf.reader.max_doc();
}
assert_eq!(expected_base, 5);
}
#[test]
fn test_leaf_reader_access() {
let config = IndexWriterConfig::new().set_use_compound_file(false);
let writer = IndexWriter::with_config(config);
let mut doc = Document::new();
doc.add(document::string_field("path", "/test.txt", true));
writer.add_document(doc).unwrap();
let result = writer.commit().unwrap();
let seg_files = result.into_segment_files().unwrap();
let mut mem_dir = MemoryDirectory::new();
for sf in &seg_files {
mem_dir.write_file(&sf.name, &sf.data).unwrap();
}
let mut reader = DirectoryReader::open(&mem_dir).unwrap();
let leaf = &mut reader.segments[0];
let sfr = leaf.reader.get_fields_reader().unwrap();
let fields = sfr.document(0).unwrap();
assert!(!fields.is_empty());
}
#[test]
fn test_empty_directory_fails() {
let dir = Box::new(MemoryDirectory::new()) as Box<dyn Directory>;
let result = DirectoryReader::open(dir.as_ref());
assert_err!(result);
}
}