Skip to main content

tensorlogic_adapters/
compact.rs

1//! Compact schema representation for efficient storage and transmission.
2//!
3//! This module provides space-efficient encodings for symbol tables,
4//! using techniques like string interning and delta encoding.
5
6use anyhow::Result;
7use serde::{Deserialize, Serialize};
8use std::collections::HashMap;
9
10use crate::{DomainInfo, PredicateInfo, StringInterner, SymbolTable};
11
12/// Compact representation of a symbol table.
13///
14/// Uses string interning and delta encoding to minimize size.
15#[derive(Clone, Debug, Serialize, Deserialize)]
16pub struct CompactSchema {
17    /// String interner for deduplication.
18    strings: Vec<String>,
19    /// Compact domain representations.
20    domains: Vec<CompactDomain>,
21    /// Compact predicate representations.
22    predicates: Vec<CompactPredicate>,
23    /// Variable bindings (name_id, domain_id).
24    variables: Vec<(usize, usize)>,
25}
26
27/// Compact domain representation.
28#[derive(Clone, Debug, Serialize, Deserialize)]
29struct CompactDomain {
30    /// String ID for name.
31    name_id: usize,
32    /// Cardinality.
33    cardinality: usize,
34    /// Optional description ID.
35    description_id: Option<usize>,
36}
37
38/// Compact predicate representation.
39#[derive(Clone, Debug, Serialize, Deserialize)]
40struct CompactPredicate {
41    /// String ID for name.
42    name_id: usize,
43    /// String IDs for argument domains.
44    arg_domain_ids: Vec<usize>,
45    /// Optional description ID.
46    description_id: Option<usize>,
47}
48
49impl CompactSchema {
50    /// Create a compact schema from a symbol table.
51    ///
52    /// # Example
53    ///
54    /// ```rust
55    /// use tensorlogic_adapters::{SymbolTable, DomainInfo, CompactSchema};
56    ///
57    /// let mut table = SymbolTable::new();
58    /// table.add_domain(DomainInfo::new("Person", 100)).unwrap();
59    ///
60    /// let compact = CompactSchema::from_symbol_table(&table);
61    /// let recovered = compact.to_symbol_table().unwrap();
62    ///
63    /// assert_eq!(table.domains.len(), recovered.domains.len());
64    /// ```
65    pub fn from_symbol_table(table: &SymbolTable) -> Self {
66        let mut interner = StringInterner::new();
67        let mut string_to_id = HashMap::new();
68
69        // Helper to intern a string
70        let mut intern = |s: &str| -> usize {
71            if let Some(&id) = string_to_id.get(s) {
72                id
73            } else {
74                let id = interner.intern(s);
75                string_to_id.insert(s.to_string(), id);
76                id
77            }
78        };
79
80        // Compact domains
81        let domains: Vec<_> = table
82            .domains
83            .values()
84            .map(|domain| {
85                let name_id = intern(&domain.name);
86                let description_id = domain.description.as_ref().map(|d| intern(d));
87
88                CompactDomain {
89                    name_id,
90                    cardinality: domain.cardinality,
91                    description_id,
92                }
93            })
94            .collect();
95
96        // Compact predicates
97        let predicates: Vec<_> = table
98            .predicates
99            .values()
100            .map(|pred| {
101                let name_id = intern(&pred.name);
102                let arg_domain_ids: Vec<_> = pred.arg_domains.iter().map(|d| intern(d)).collect();
103                let description_id = pred.description.as_ref().map(|d| intern(d));
104
105                CompactPredicate {
106                    name_id,
107                    arg_domain_ids,
108                    description_id,
109                }
110            })
111            .collect();
112
113        // Compact variables
114        let variables: Vec<_> = table
115            .variables
116            .iter()
117            .map(|(var, domain)| {
118                let var_id = intern(var);
119                let domain_id = intern(domain);
120                (var_id, domain_id)
121            })
122            .collect();
123
124        // Extract interned strings
125        let strings: Vec<_> = (0..interner.len())
126            .filter_map(|id| interner.resolve(id).map(|s| s.to_string()))
127            .collect();
128
129        CompactSchema {
130            strings,
131            domains,
132            predicates,
133            variables,
134        }
135    }
136
137    /// Convert compact schema back to a symbol table.
138    pub fn to_symbol_table(&self) -> Result<SymbolTable> {
139        let mut table = SymbolTable::new();
140
141        // Reconstruct domains
142        for compact in &self.domains {
143            let name = self.strings.get(compact.name_id).ok_or_else(|| {
144                anyhow::anyhow!("Invalid string ID {} for domain name", compact.name_id)
145            })?;
146
147            let mut domain = DomainInfo::new(name.clone(), compact.cardinality);
148
149            if let Some(desc_id) = compact.description_id {
150                let description = self.strings.get(desc_id).ok_or_else(|| {
151                    anyhow::anyhow!("Invalid string ID {} for description", desc_id)
152                })?;
153                domain.description = Some(description.clone());
154            }
155
156            table.add_domain(domain)?;
157        }
158
159        // Reconstruct predicates
160        for compact in &self.predicates {
161            let name = self.strings.get(compact.name_id).ok_or_else(|| {
162                anyhow::anyhow!("Invalid string ID {} for predicate name", compact.name_id)
163            })?;
164
165            let arg_domains: Result<Vec<_>> = compact
166                .arg_domain_ids
167                .iter()
168                .map(|&id| {
169                    self.strings
170                        .get(id)
171                        .cloned()
172                        .ok_or_else(|| anyhow::anyhow!("Invalid string ID {} for arg domain", id))
173                })
174                .collect();
175
176            let mut pred = PredicateInfo::new(name.clone(), arg_domains?);
177
178            if let Some(desc_id) = compact.description_id {
179                let description = self.strings.get(desc_id).ok_or_else(|| {
180                    anyhow::anyhow!("Invalid string ID {} for description", desc_id)
181                })?;
182                pred.description = Some(description.clone());
183            }
184
185            table.add_predicate(pred)?;
186        }
187
188        // Reconstruct variables
189        for &(var_id, domain_id) in &self.variables {
190            let var = self
191                .strings
192                .get(var_id)
193                .ok_or_else(|| anyhow::anyhow!("Invalid string ID {} for variable", var_id))?;
194
195            let domain = self.strings.get(domain_id).ok_or_else(|| {
196                anyhow::anyhow!("Invalid string ID {} for variable domain", domain_id)
197            })?;
198
199            table.bind_variable(var, domain)?;
200        }
201
202        Ok(table)
203    }
204
205    /// Serialize to compact binary format.
206    pub fn to_binary(&self) -> Result<Vec<u8>> {
207        oxicode::serde::encode_to_vec(self, oxicode::config::standard())
208            .map_err(|e| anyhow::anyhow!("Bincode encode error: {}", e))
209    }
210
211    /// Deserialize from compact binary format.
212    pub fn from_binary(data: &[u8]) -> Result<Self> {
213        let (result, _): (Self, usize) =
214            oxicode::serde::decode_from_slice(data, oxicode::config::standard())
215                .map_err(|e| anyhow::anyhow!("Bincode decode error: {}", e))?;
216        Ok(result)
217    }
218
219    /// Get the number of unique strings.
220    pub fn string_count(&self) -> usize {
221        self.strings.len()
222    }
223
224    /// Get statistics about compression.
225    pub fn compression_stats(&self) -> CompressionStats {
226        let string_bytes: usize = self.strings.iter().map(|s| s.len()).sum();
227        let domain_count = self.domains.len();
228        let predicate_count = self.predicates.len();
229        let variable_count = self.variables.len();
230
231        // Estimate original size (rough approximation)
232        let avg_string_len = if !self.strings.is_empty() {
233            string_bytes / self.strings.len()
234        } else {
235            0
236        };
237
238        let estimated_original_size = domain_count * (avg_string_len + 16) // name + cardinality + overhead
239            + predicate_count * (avg_string_len + 16) // name + args overhead
240            + variable_count * (avg_string_len * 2); // var name + domain name
241
242        CompressionStats {
243            unique_strings: self.strings.len(),
244            total_string_bytes: string_bytes,
245            domain_count,
246            predicate_count,
247            variable_count,
248            estimated_original_size,
249            compact_size: string_bytes
250                + domain_count * 24
251                + predicate_count * 24
252                + variable_count * 16,
253        }
254    }
255}
256
257/// Compression statistics for compact schemas.
258#[derive(Clone, Debug)]
259pub struct CompressionStats {
260    /// Number of unique strings.
261    pub unique_strings: usize,
262    /// Total bytes used by strings.
263    pub total_string_bytes: usize,
264    /// Number of domains.
265    pub domain_count: usize,
266    /// Number of predicates.
267    pub predicate_count: usize,
268    /// Number of variables.
269    pub variable_count: usize,
270    /// Estimated original size in bytes.
271    pub estimated_original_size: usize,
272    /// Compact representation size in bytes.
273    pub compact_size: usize,
274}
275
276impl CompressionStats {
277    /// Calculate compression ratio.
278    pub fn compression_ratio(&self) -> f64 {
279        if self.estimated_original_size > 0 {
280            self.compact_size as f64 / self.estimated_original_size as f64
281        } else {
282            1.0
283        }
284    }
285
286    /// Calculate space savings as a percentage.
287    pub fn space_savings(&self) -> f64 {
288        (1.0 - self.compression_ratio()) * 100.0
289    }
290}
291
292#[cfg(test)]
293mod tests {
294    use super::*;
295
296    #[test]
297    fn test_compact_round_trip() {
298        let mut table = SymbolTable::new();
299        table.add_domain(DomainInfo::new("Person", 100)).unwrap();
300        table.add_domain(DomainInfo::new("Location", 50)).unwrap();
301        table
302            .add_predicate(PredicateInfo::new(
303                "at",
304                vec!["Person".to_string(), "Location".to_string()],
305            ))
306            .unwrap();
307        table.bind_variable("x", "Person").unwrap();
308
309        let compact = CompactSchema::from_symbol_table(&table);
310        let recovered = compact.to_symbol_table().unwrap();
311
312        assert_eq!(table.domains.len(), recovered.domains.len());
313        assert_eq!(table.predicates.len(), recovered.predicates.len());
314        assert_eq!(table.variables.len(), recovered.variables.len());
315    }
316
317    #[test]
318    fn test_string_deduplication() {
319        let mut table = SymbolTable::new();
320        table.add_domain(DomainInfo::new("Person", 100)).unwrap();
321        table
322            .add_predicate(PredicateInfo::new("knows", vec!["Person".to_string()]))
323            .unwrap();
324        table
325            .add_predicate(PredicateInfo::new("likes", vec!["Person".to_string()]))
326            .unwrap();
327
328        let compact = CompactSchema::from_symbol_table(&table);
329
330        // "Person" should only be stored once
331        // Expected strings: "Person", "knows", "likes"
332        assert_eq!(compact.string_count(), 3);
333    }
334
335    #[test]
336    fn test_binary_serialization() {
337        let mut table = SymbolTable::new();
338        table.add_domain(DomainInfo::new("Person", 100)).unwrap();
339
340        let compact = CompactSchema::from_symbol_table(&table);
341        let binary = compact.to_binary().unwrap();
342        let recovered = CompactSchema::from_binary(&binary).unwrap();
343
344        let table2 = recovered.to_symbol_table().unwrap();
345        assert_eq!(table.domains.len(), table2.domains.len());
346    }
347
348    #[test]
349    fn test_compression_stats() {
350        let mut table = SymbolTable::new();
351        table.add_domain(DomainInfo::new("Person", 100)).unwrap();
352        table.add_domain(DomainInfo::new("Location", 50)).unwrap();
353
354        let compact = CompactSchema::from_symbol_table(&table);
355        let stats = compact.compression_stats();
356
357        assert_eq!(stats.domain_count, 2);
358        // For small schemas, compression ratio might be > 1.0 due to overhead
359        assert!(stats.compression_ratio() > 0.0);
360        // Space savings can be negative for very small schemas
361        assert!(stats.space_savings() > -200.0);
362    }
363
364    #[test]
365    fn test_empty_table() {
366        let table = SymbolTable::new();
367        let compact = CompactSchema::from_symbol_table(&table);
368        let recovered = compact.to_symbol_table().unwrap();
369
370        assert_eq!(recovered.domains.len(), 0);
371        assert_eq!(recovered.predicates.len(), 0);
372    }
373}