Skip to main content

xlog_ir/
metadata.rs

1//! Metadata for RIR nodes (cardinality, memory estimates, skew)
2
3use xlog_core::Schema;
4
5/// Hint for physical layout selection
6#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
7pub enum LayoutHint {
8    /// Standard cuDF table (baseline)
9    #[default]
10    CudfTable,
11    /// HISA-style indexed storage for recursion
12    HisaIndexed,
13    /// VFLog-style columnar for bandwidth workloads
14    VflogColumnar,
15}
16
17/// Signature of data skew for join optimization
18#[derive(Debug, Clone)]
19pub struct SkewSignature {
20    /// Top-k hot keys
21    pub hot_keys: Vec<u64>,
22    /// Shannon entropy of key distribution
23    pub entropy: f64,
24}
25
26impl SkewSignature {
27    /// Check if data is considered skewed (entropy below threshold)
28    pub fn is_skewed(&self) -> bool {
29        self.entropy < 3.0 // bits
30    }
31}
32
33/// Metadata attached to each RIR node
34#[derive(Debug, Clone)]
35pub struct RirMeta {
36    /// Schema of output relation
37    pub schema: Schema,
38    /// Estimated row count range (min, max)
39    pub est_rows: (u64, u64),
40    /// Estimated memory bytes range (min, max)
41    pub est_bytes: (u64, u64),
42    /// Optional skew signature
43    pub skew: Option<SkewSignature>,
44    /// Whether this node produces deterministic output
45    pub deterministic: bool,
46    /// Layout hint for physical storage
47    pub layout_hint: LayoutHint,
48}
49
50impl Default for RirMeta {
51    fn default() -> Self {
52        Self {
53            schema: Schema::new(vec![]),
54            est_rows: (0, 0),
55            est_bytes: (0, 0),
56            skew: None,
57            deterministic: true,
58            layout_hint: LayoutHint::default(),
59        }
60    }
61}
62
63impl RirMeta {
64    /// Create metadata with schema
65    pub fn with_schema(schema: Schema) -> Self {
66        Self {
67            schema,
68            ..Default::default()
69        }
70    }
71
72    /// Set estimated rows
73    pub fn with_rows(mut self, min: u64, max: u64) -> Self {
74        self.est_rows = (min, max);
75        self.est_bytes = (
76            min * self.schema.row_size_bytes() as u64,
77            max * self.schema.row_size_bytes() as u64,
78        );
79        self
80    }
81
82    /// Set layout hint
83    pub fn with_layout(mut self, hint: LayoutHint) -> Self {
84        self.layout_hint = hint;
85        self
86    }
87}
88
89#[cfg(test)]
90mod tests {
91    use super::*;
92    use xlog_core::ScalarType;
93
94    #[test]
95    fn test_rir_meta_default() {
96        let meta = RirMeta::default();
97        assert_eq!(meta.est_rows, (0, 0));
98        assert!(meta.deterministic);
99    }
100
101    #[test]
102    fn test_layout_hint_default() {
103        let hint = LayoutHint::default();
104        assert_eq!(hint, LayoutHint::CudfTable);
105    }
106
107    #[test]
108    fn test_skew_signature() {
109        let sig = SkewSignature {
110            hot_keys: vec![42, 100],
111            entropy: 2.5,
112        };
113        assert!(sig.is_skewed());
114    }
115
116    #[test]
117    fn test_meta_with_rows() {
118        let schema = Schema::new(vec![
119            ("a".to_string(), ScalarType::U32),
120            ("b".to_string(), ScalarType::U32),
121        ]);
122        let meta = RirMeta::with_schema(schema).with_rows(100, 200);
123        assert_eq!(meta.est_rows, (100, 200));
124        assert_eq!(meta.est_bytes, (800, 1600)); // 8 bytes per row
125    }
126}