1use anyhow::Result;
7use serde::{Deserialize, Serialize};
8use std::collections::HashMap;
9
10use crate::{DomainInfo, PredicateInfo, StringInterner, SymbolTable};
11
12#[derive(Clone, Debug, Serialize, Deserialize)]
16pub struct CompactSchema {
17 strings: Vec<String>,
19 domains: Vec<CompactDomain>,
21 predicates: Vec<CompactPredicate>,
23 variables: Vec<(usize, usize)>,
25}
26
27#[derive(Clone, Debug, Serialize, Deserialize)]
29struct CompactDomain {
30 name_id: usize,
32 cardinality: usize,
34 description_id: Option<usize>,
36}
37
38#[derive(Clone, Debug, Serialize, Deserialize)]
40struct CompactPredicate {
41 name_id: usize,
43 arg_domain_ids: Vec<usize>,
45 description_id: Option<usize>,
47}
48
49impl CompactSchema {
50 pub fn from_symbol_table(table: &SymbolTable) -> Self {
66 let mut interner = StringInterner::new();
67 let mut string_to_id = HashMap::new();
68
69 let mut intern = |s: &str| -> usize {
71 if let Some(&id) = string_to_id.get(s) {
72 id
73 } else {
74 let id = interner.intern(s);
75 string_to_id.insert(s.to_string(), id);
76 id
77 }
78 };
79
80 let domains: Vec<_> = table
82 .domains
83 .values()
84 .map(|domain| {
85 let name_id = intern(&domain.name);
86 let description_id = domain.description.as_ref().map(|d| intern(d));
87
88 CompactDomain {
89 name_id,
90 cardinality: domain.cardinality,
91 description_id,
92 }
93 })
94 .collect();
95
96 let predicates: Vec<_> = table
98 .predicates
99 .values()
100 .map(|pred| {
101 let name_id = intern(&pred.name);
102 let arg_domain_ids: Vec<_> = pred.arg_domains.iter().map(|d| intern(d)).collect();
103 let description_id = pred.description.as_ref().map(|d| intern(d));
104
105 CompactPredicate {
106 name_id,
107 arg_domain_ids,
108 description_id,
109 }
110 })
111 .collect();
112
113 let variables: Vec<_> = table
115 .variables
116 .iter()
117 .map(|(var, domain)| {
118 let var_id = intern(var);
119 let domain_id = intern(domain);
120 (var_id, domain_id)
121 })
122 .collect();
123
124 let strings: Vec<_> = (0..interner.len())
126 .filter_map(|id| interner.resolve(id).map(|s| s.to_string()))
127 .collect();
128
129 CompactSchema {
130 strings,
131 domains,
132 predicates,
133 variables,
134 }
135 }
136
137 pub fn to_symbol_table(&self) -> Result<SymbolTable> {
139 let mut table = SymbolTable::new();
140
141 for compact in &self.domains {
143 let name = self.strings.get(compact.name_id).ok_or_else(|| {
144 anyhow::anyhow!("Invalid string ID {} for domain name", compact.name_id)
145 })?;
146
147 let mut domain = DomainInfo::new(name.clone(), compact.cardinality);
148
149 if let Some(desc_id) = compact.description_id {
150 let description = self.strings.get(desc_id).ok_or_else(|| {
151 anyhow::anyhow!("Invalid string ID {} for description", desc_id)
152 })?;
153 domain.description = Some(description.clone());
154 }
155
156 table.add_domain(domain)?;
157 }
158
159 for compact in &self.predicates {
161 let name = self.strings.get(compact.name_id).ok_or_else(|| {
162 anyhow::anyhow!("Invalid string ID {} for predicate name", compact.name_id)
163 })?;
164
165 let arg_domains: Result<Vec<_>> = compact
166 .arg_domain_ids
167 .iter()
168 .map(|&id| {
169 self.strings
170 .get(id)
171 .cloned()
172 .ok_or_else(|| anyhow::anyhow!("Invalid string ID {} for arg domain", id))
173 })
174 .collect();
175
176 let mut pred = PredicateInfo::new(name.clone(), arg_domains?);
177
178 if let Some(desc_id) = compact.description_id {
179 let description = self.strings.get(desc_id).ok_or_else(|| {
180 anyhow::anyhow!("Invalid string ID {} for description", desc_id)
181 })?;
182 pred.description = Some(description.clone());
183 }
184
185 table.add_predicate(pred)?;
186 }
187
188 for &(var_id, domain_id) in &self.variables {
190 let var = self
191 .strings
192 .get(var_id)
193 .ok_or_else(|| anyhow::anyhow!("Invalid string ID {} for variable", var_id))?;
194
195 let domain = self.strings.get(domain_id).ok_or_else(|| {
196 anyhow::anyhow!("Invalid string ID {} for variable domain", domain_id)
197 })?;
198
199 table.bind_variable(var, domain)?;
200 }
201
202 Ok(table)
203 }
204
205 pub fn to_binary(&self) -> Result<Vec<u8>> {
207 oxicode::serde::encode_to_vec(self, oxicode::config::standard())
208 .map_err(|e| anyhow::anyhow!("Bincode encode error: {}", e))
209 }
210
211 pub fn from_binary(data: &[u8]) -> Result<Self> {
213 let (result, _): (Self, usize) =
214 oxicode::serde::decode_from_slice(data, oxicode::config::standard())
215 .map_err(|e| anyhow::anyhow!("Bincode decode error: {}", e))?;
216 Ok(result)
217 }
218
219 pub fn string_count(&self) -> usize {
221 self.strings.len()
222 }
223
224 pub fn compression_stats(&self) -> CompressionStats {
226 let string_bytes: usize = self.strings.iter().map(|s| s.len()).sum();
227 let domain_count = self.domains.len();
228 let predicate_count = self.predicates.len();
229 let variable_count = self.variables.len();
230
231 let avg_string_len = if !self.strings.is_empty() {
233 string_bytes / self.strings.len()
234 } else {
235 0
236 };
237
238 let estimated_original_size = domain_count * (avg_string_len + 16) + predicate_count * (avg_string_len + 16) + variable_count * (avg_string_len * 2); CompressionStats {
243 unique_strings: self.strings.len(),
244 total_string_bytes: string_bytes,
245 domain_count,
246 predicate_count,
247 variable_count,
248 estimated_original_size,
249 compact_size: string_bytes
250 + domain_count * 24
251 + predicate_count * 24
252 + variable_count * 16,
253 }
254 }
255}
256
257#[derive(Clone, Debug)]
259pub struct CompressionStats {
260 pub unique_strings: usize,
262 pub total_string_bytes: usize,
264 pub domain_count: usize,
266 pub predicate_count: usize,
268 pub variable_count: usize,
270 pub estimated_original_size: usize,
272 pub compact_size: usize,
274}
275
276impl CompressionStats {
277 pub fn compression_ratio(&self) -> f64 {
279 if self.estimated_original_size > 0 {
280 self.compact_size as f64 / self.estimated_original_size as f64
281 } else {
282 1.0
283 }
284 }
285
286 pub fn space_savings(&self) -> f64 {
288 (1.0 - self.compression_ratio()) * 100.0
289 }
290}
291
292#[cfg(test)]
293mod tests {
294 use super::*;
295
296 #[test]
297 fn test_compact_round_trip() {
298 let mut table = SymbolTable::new();
299 table.add_domain(DomainInfo::new("Person", 100)).unwrap();
300 table.add_domain(DomainInfo::new("Location", 50)).unwrap();
301 table
302 .add_predicate(PredicateInfo::new(
303 "at",
304 vec!["Person".to_string(), "Location".to_string()],
305 ))
306 .unwrap();
307 table.bind_variable("x", "Person").unwrap();
308
309 let compact = CompactSchema::from_symbol_table(&table);
310 let recovered = compact.to_symbol_table().unwrap();
311
312 assert_eq!(table.domains.len(), recovered.domains.len());
313 assert_eq!(table.predicates.len(), recovered.predicates.len());
314 assert_eq!(table.variables.len(), recovered.variables.len());
315 }
316
317 #[test]
318 fn test_string_deduplication() {
319 let mut table = SymbolTable::new();
320 table.add_domain(DomainInfo::new("Person", 100)).unwrap();
321 table
322 .add_predicate(PredicateInfo::new("knows", vec!["Person".to_string()]))
323 .unwrap();
324 table
325 .add_predicate(PredicateInfo::new("likes", vec!["Person".to_string()]))
326 .unwrap();
327
328 let compact = CompactSchema::from_symbol_table(&table);
329
330 assert_eq!(compact.string_count(), 3);
333 }
334
335 #[test]
336 fn test_binary_serialization() {
337 let mut table = SymbolTable::new();
338 table.add_domain(DomainInfo::new("Person", 100)).unwrap();
339
340 let compact = CompactSchema::from_symbol_table(&table);
341 let binary = compact.to_binary().unwrap();
342 let recovered = CompactSchema::from_binary(&binary).unwrap();
343
344 let table2 = recovered.to_symbol_table().unwrap();
345 assert_eq!(table.domains.len(), table2.domains.len());
346 }
347
348 #[test]
349 fn test_compression_stats() {
350 let mut table = SymbolTable::new();
351 table.add_domain(DomainInfo::new("Person", 100)).unwrap();
352 table.add_domain(DomainInfo::new("Location", 50)).unwrap();
353
354 let compact = CompactSchema::from_symbol_table(&table);
355 let stats = compact.compression_stats();
356
357 assert_eq!(stats.domain_count, 2);
358 assert!(stats.compression_ratio() > 0.0);
360 assert!(stats.space_savings() > -200.0);
362 }
363
364 #[test]
365 fn test_empty_table() {
366 let table = SymbolTable::new();
367 let compact = CompactSchema::from_symbol_table(&table);
368 let recovered = compact.to_symbol_table().unwrap();
369
370 assert_eq!(recovered.domains.len(), 0);
371 assert_eq!(recovered.predicates.len(), 0);
372 }
373}