use std::io::{self, Write};
use fastfield_codecs::VecColumn;
use crate::fastfield::serializer::CompositeFastFieldSerializer;
use crate::fastfield::MultivalueStartIndex;
use crate::indexer::doc_id_mapping::DocIdMapping;
use crate::schema::{Document, Field, Value};
use crate::DocId;
pub struct BytesFastFieldWriter {
field: Field,
vals: Vec<u8>,
doc_index: Vec<u64>,
}
impl BytesFastFieldWriter {
pub fn new(field: Field) -> Self {
BytesFastFieldWriter {
field,
vals: Vec::new(),
doc_index: Vec::new(),
}
}
pub fn mem_usage(&self) -> usize {
self.vals.capacity() + self.doc_index.capacity() * std::mem::size_of::<u64>()
}
pub fn field(&self) -> Field {
self.field
}
pub(crate) fn next_doc(&mut self) {
self.doc_index.push(self.vals.len() as u64);
}
pub fn add_document(&mut self, doc: &Document) -> crate::Result<()> {
self.next_doc();
for field_value in doc.get_all(self.field) {
if let Value::Bytes(ref bytes) = field_value {
self.vals.extend_from_slice(bytes);
return Ok(());
}
}
Ok(())
}
pub fn add_document_val(&mut self, val: &[u8]) -> DocId {
let doc = self.doc_index.len() as DocId;
self.next_doc();
self.vals.extend_from_slice(val);
doc
}
fn get_ordered_values<'a: 'b, 'b>(
&'a self,
doc_id_map: Option<&'b DocIdMapping>,
) -> impl Iterator<Item = &'b [u8]> {
let doc_id_iter: Box<dyn Iterator<Item = u32>> = if let Some(doc_id_map) = doc_id_map {
Box::new(doc_id_map.iter_old_doc_ids())
} else {
let max_doc = self.doc_index.len() as u32;
Box::new(0..max_doc)
};
doc_id_iter.map(move |doc_id| self.get_values_for_doc_id(doc_id))
}
fn get_values_for_doc_id(&self, doc_id: u32) -> &[u8] {
let start_pos = self.doc_index[doc_id as usize] as usize;
let end_pos = self
.doc_index
.get(doc_id as usize + 1)
.cloned()
.unwrap_or(self.vals.len() as u64) as usize; &self.vals[start_pos..end_pos]
}
pub fn serialize(
mut self,
serializer: &mut CompositeFastFieldSerializer,
doc_id_map: Option<&DocIdMapping>,
) -> io::Result<()> {
{
self.doc_index.push(self.vals.len() as u64);
let col = VecColumn::from(&self.doc_index[..]);
if let Some(doc_id_map) = doc_id_map {
let multi_value_start_index = MultivalueStartIndex::new(&col, doc_id_map);
serializer.create_auto_detect_u64_fast_field_with_idx(
self.field,
multi_value_start_index,
0,
)?;
} else {
serializer.create_auto_detect_u64_fast_field_with_idx(self.field, col, 0)?;
}
}
let mut value_serializer = serializer.new_bytes_fast_field(self.field);
if let Some(doc_id_map) = doc_id_map {
for vals in self.get_ordered_values(Some(doc_id_map)) {
value_serializer.write_all(vals)?;
}
} else {
value_serializer.write_all(&self.vals)?;
}
Ok(())
}
}