ngdp_bpsv/
interned_document.rs1use crate::error::Result;
4use crate::interner::{InternedValue, StringInterner};
5use crate::schema::BpsvSchema;
6use crate::value::BpsvValue;
7use std::sync::Arc;
8
9#[derive(Debug, Clone)]
14pub struct InternedBpsvDocument {
15 schema: Arc<BpsvSchema>,
17 rows: Vec<InternedRow>,
19 sequence_number: Option<u32>,
21 interner: StringInterner,
23}
24
25#[derive(Debug, Clone)]
27pub struct InternedRow {
28 values: Vec<InternedValue>,
29}
30
31impl InternedBpsvDocument {
32 pub fn from_document(doc: crate::document::BpsvDocument<'_>) -> Self {
34 let interner = StringInterner::with_capacity(100);
35 let mut interned_rows = Vec::with_capacity(doc.rows().len());
36
37 let schema = Arc::new(doc.schema().clone());
39 let sequence_number = doc.sequence_number();
40
41 for row in doc.into_owned_rows() {
43 let mut interned_values = Vec::with_capacity(row.len());
44
45 let typed_values = if let Some(typed) = row.typed_values {
47 typed
48 } else {
49 let mut typed = Vec::new();
51 for (value, field) in row.raw_values.iter().zip(schema.fields()) {
52 if let Ok(typed_value) = BpsvValue::parse(value, &field.field_type) {
53 typed.push(typed_value);
54 } else {
55 typed.push(BpsvValue::Empty);
56 }
57 }
58 typed
59 };
60
61 for value in typed_values {
63 interned_values.push(InternedValue::from_bpsv_value(value, &interner));
64 }
65
66 interned_rows.push(InternedRow {
67 values: interned_values,
68 });
69 }
70
71 Self {
72 schema,
73 rows: interned_rows,
74 sequence_number,
75 interner,
76 }
77 }
78
79 pub fn parse(data: &str) -> Result<Self> {
81 let doc = crate::document::BpsvDocument::parse(data)?;
82 Ok(Self::from_document(doc))
83 }
84
85 pub fn schema(&self) -> &BpsvSchema {
87 &self.schema
88 }
89
90 pub fn rows(&self) -> &[InternedRow] {
92 &self.rows
93 }
94
95 pub fn sequence_number(&self) -> Option<u32> {
97 self.sequence_number
98 }
99
100 pub fn memory_stats(&self) -> crate::interner::MemoryStats {
102 self.interner.memory_usage()
103 }
104
105 pub fn interner_hit_rate(&self) -> f64 {
107 self.interner.hit_rate()
108 }
109
110 pub fn find_rows(&self, field_name: &str, value: &str) -> Vec<&InternedRow> {
112 let field_index = match self.schema.get_field(field_name) {
113 Some(field) => field.index,
114 None => return vec![],
115 };
116
117 self.rows
118 .iter()
119 .filter(|row| {
120 row.values
121 .get(field_index)
122 .and_then(|v| v.as_str())
123 .map(|s| s == value)
124 .unwrap_or(false)
125 })
126 .collect()
127 }
128
129 pub fn get_row(&self, index: usize) -> Option<&InternedRow> {
131 self.rows.get(index)
132 }
133
134 pub fn row_count(&self) -> usize {
136 self.rows.len()
137 }
138
139 pub fn is_empty(&self) -> bool {
141 self.rows.is_empty()
142 }
143}
144
145impl InternedRow {
146 pub fn get(&self, index: usize) -> Option<&InternedValue> {
148 self.values.get(index)
149 }
150
151 pub fn get_by_name(&self, field_name: &str, schema: &BpsvSchema) -> Option<&InternedValue> {
153 schema
154 .get_field(field_name)
155 .and_then(|field| self.get(field.index))
156 }
157
158 pub fn values(&self) -> &[InternedValue] {
160 &self.values
161 }
162
163 pub fn len(&self) -> usize {
165 self.values.len()
166 }
167
168 pub fn is_empty(&self) -> bool {
170 self.values.is_empty()
171 }
172}