use crate::error::Result;
use crate::interner::{InternedValue, StringInterner};
use crate::schema::BpsvSchema;
use crate::value::BpsvValue;
use std::sync::Arc;
#[derive(Debug, Clone)]
pub struct InternedBpsvDocument {
schema: Arc<BpsvSchema>,
rows: Vec<InternedRow>,
sequence_number: Option<u32>,
interner: StringInterner,
}
#[derive(Debug, Clone)]
pub struct InternedRow {
values: Vec<InternedValue>,
}
impl InternedBpsvDocument {
pub fn from_document(doc: crate::document::BpsvDocument<'_>) -> Self {
let interner = StringInterner::with_capacity(100);
let mut interned_rows = Vec::with_capacity(doc.rows().len());
let schema = Arc::new(doc.schema().clone());
let sequence_number = doc.sequence_number();
for row in doc.into_owned_rows() {
let mut interned_values = Vec::with_capacity(row.len());
let typed_values = if let Some(typed) = row.typed_values {
typed
} else {
let mut typed = Vec::new();
for (value, field) in row.raw_values.iter().zip(schema.fields()) {
if let Ok(typed_value) = BpsvValue::parse(value, &field.field_type) {
typed.push(typed_value);
} else {
typed.push(BpsvValue::Empty);
}
}
typed
};
for value in typed_values {
interned_values.push(InternedValue::from_bpsv_value(value, &interner));
}
interned_rows.push(InternedRow {
values: interned_values,
});
}
Self {
schema,
rows: interned_rows,
sequence_number,
interner,
}
}
pub fn parse(data: &str) -> Result<Self> {
let doc = crate::document::BpsvDocument::parse(data)?;
Ok(Self::from_document(doc))
}
pub fn schema(&self) -> &BpsvSchema {
&self.schema
}
pub fn rows(&self) -> &[InternedRow] {
&self.rows
}
pub fn sequence_number(&self) -> Option<u32> {
self.sequence_number
}
pub fn memory_stats(&self) -> crate::interner::MemoryStats {
self.interner.memory_usage()
}
pub fn interner_hit_rate(&self) -> f64 {
self.interner.hit_rate()
}
pub fn find_rows(&self, field_name: &str, value: &str) -> Vec<&InternedRow> {
let field_index = match self.schema.get_field(field_name) {
Some(field) => field.index,
None => return vec![],
};
self.rows
.iter()
.filter(|row| {
row.values
.get(field_index)
.and_then(|v| v.as_str())
.map(|s| s == value)
.unwrap_or(false)
})
.collect()
}
pub fn get_row(&self, index: usize) -> Option<&InternedRow> {
self.rows.get(index)
}
pub fn row_count(&self) -> usize {
self.rows.len()
}
pub fn is_empty(&self) -> bool {
self.rows.is_empty()
}
}
impl InternedRow {
pub fn get(&self, index: usize) -> Option<&InternedValue> {
self.values.get(index)
}
pub fn get_by_name(&self, field_name: &str, schema: &BpsvSchema) -> Option<&InternedValue> {
schema
.get_field(field_name)
.and_then(|field| self.get(field.index))
}
pub fn values(&self) -> &[InternedValue] {
&self.values
}
pub fn len(&self) -> usize {
self.values.len()
}
pub fn is_empty(&self) -> bool {
self.values.is_empty()
}
}