use std::collections::HashMap;
use std::io;
use crate::codecs::competitive_impact::NormsLookup;
use crate::document::IndexOptions;
use crate::index::indexing_chain::PerFieldData;
use crate::index::{FieldInfos, SegmentInfo};
use crate::store::SharedDirectory;
use super::blocktree_writer::BlockTreeTermsWriter;
pub const BLOCK_SIZE: usize = 128;
pub const LEVEL1_FACTOR: usize = 32;
pub const LEVEL1_NUM_DOCS: usize = LEVEL1_FACTOR * BLOCK_SIZE;
pub const VERSION_START: i32 = 0;
pub const VERSION_CURRENT: i32 = VERSION_START;
pub const META_EXTENSION: &str = "psm";
pub const DOC_EXTENSION: &str = "doc";
pub const POS_EXTENSION: &str = "pos";
pub const TERMS_CODEC: &str = "Lucene103PostingsWriterTerms";
pub const META_CODEC: &str = "Lucene103PostingsWriterMeta";
pub const DOC_CODEC: &str = "Lucene103PostingsWriterDoc";
pub const POS_CODEC: &str = "Lucene103PostingsWriterPos";
pub const TERMS_EXTENSION: &str = "tim";
pub const TERMS_INDEX_EXTENSION: &str = "tip";
pub const TERMS_META_EXTENSION: &str = "tmd";
pub const TERMS_CODEC_NAME: &str = "BlockTreeTermsDict";
pub const TERMS_INDEX_CODEC_NAME: &str = "BlockTreeTermsIndex";
pub const TERMS_META_CODEC_NAME: &str = "BlockTreeTermsMeta";
pub const BLOCKTREE_VERSION_START: i32 = 0;
pub const BLOCKTREE_VERSION_CURRENT: i32 = BLOCKTREE_VERSION_START;
pub const DEFAULT_MIN_BLOCK_SIZE: usize = 25;
pub const DEFAULT_MAX_BLOCK_SIZE: usize = 48;
#[derive(Clone, Copy, Debug)]
pub struct IntBlockTermState {
pub doc_freq: i32,
pub total_term_freq: i64,
pub doc_start_fp: i64,
pub pos_start_fp: i64,
pub last_pos_block_offset: i64,
pub singleton_doc_id: i32,
}
impl IntBlockTermState {
pub fn new() -> Self {
Self {
doc_freq: 0,
total_term_freq: 0,
doc_start_fp: 0,
pos_start_fp: 0,
last_pos_block_offset: -1,
singleton_doc_id: -1,
}
}
}
impl Default for IntBlockTermState {
fn default() -> Self {
Self::new()
}
}
pub fn write(
directory: &SharedDirectory,
segment_info: &SegmentInfo,
segment_suffix: &str,
field_infos: &FieldInfos,
per_field: &HashMap<String, PerFieldData>,
) -> io::Result<Vec<String>> {
let mut btw = BlockTreeTermsWriter::new(
directory,
&segment_info.name,
segment_suffix,
&segment_info.id,
field_infos,
)?;
let mut indexed_fields: Vec<_> = field_infos
.iter()
.filter(|fi| fi.index_options() != IndexOptions::None)
.collect();
indexed_fields.sort_by_key(|fi| fi.name().to_string());
for fi in indexed_fields {
if let Some(pfd) = per_field.get(fi.name())
&& pfd.has_postings()
{
let sorted = pfd.sorted_postings();
let norms = if fi.omit_norms() {
NormsLookup::no_norms()
} else {
NormsLookup::new(&pfd.norms, &pfd.norms_docs)
};
btw.write_field(fi, &sorted, &pfd.postings, &norms)?;
}
}
btw.finish()
}