Skip to main content

zer_core/
record_pool.rs

1use crate::{
2    record::{FieldValue, Record, RecordId},
3    schema::Schema,
4    traits::RecordStore,
5};
6
7/// Column-major record store: `columns[field_idx][record_idx]`.
8///
9/// All field values are stored as UTF-8 strings in schema field order.
10/// Missing or non-text values become empty strings (treated as
11/// `ComparisonLevel::None` by every comparator).
12///
13/// The `RecordId` for record `r` is `ids[r]`.
14#[derive(Debug, Clone)]
15pub struct RecordPool {
16    pub ids:      Vec<RecordId>,
17    /// `columns[field_idx][record_idx]` = UTF-8 text value.
18    pub columns:  Vec<Vec<String>>,
19    pub n_fields: usize,
20}
21
22impl RecordPool {
23    pub fn new(n_fields: usize) -> Self {
24        Self {
25            ids:      Vec::new(),
26            columns:  vec![Vec::new(); n_fields],
27            n_fields,
28        }
29    }
30
31    pub fn from_records(records: &[Record], schema: &Schema) -> Self {
32        let mut pool = Self::with_capacity(records.len(), schema.fields.len());
33        for r in records {
34            pool.push(r, schema);
35        }
36        pool
37    }
38
39    /// Build a pool from a [`RecordStore`], loading only the records with IDs
40    /// listed in `ids`.  Records are inserted in `ids` order; pool position `i`
41    /// corresponds to `ids[i]`.
42    pub fn from_store(store: &dyn RecordStore, ids: &[RecordId], schema: &Schema) -> Self {
43        let records: Vec<Record> = ids
44            .iter()
45            .filter_map(|id| store.get(*id).map(|c| c.into_owned()))
46            .collect();
47        Self::from_records(&records, schema)
48    }
49
50    /// Build a pool from `(Record, Record)` pairs: record `2*i` is side A of
51    /// pair `i`, record `2*i+1` is side B.  Allows `compare_batch(&pairs)` to
52    /// build a pool once and delegate to `compare_batch_from_pool`.
53    pub fn from_pairs(pairs: &[(Record, Record)], schema: &Schema) -> Self {
54        let mut pool = Self::with_capacity(pairs.len() * 2, schema.fields.len());
55        for (a, b) in pairs {
56            pool.push(a, schema);
57            pool.push(b, schema);
58        }
59        pool
60    }
61
62    pub fn with_capacity(cap: usize, n_fields: usize) -> Self {
63        Self {
64            ids:      Vec::with_capacity(cap),
65            columns:  vec![Vec::with_capacity(cap); n_fields],
66            n_fields,
67        }
68    }
69
70    /// Append one record.  Fields are stored in schema order.
71    pub fn push(&mut self, record: &Record, schema: &Schema) {
72        self.ids.push(record.id);
73        for (fi, field) in schema.fields.iter().enumerate() {
74            self.columns[fi].push(field_value_to_string(record.fields.get(&field.name)));
75        }
76    }
77
78    /// Direct column access: bytes of field `f` for record `r`.
79    #[inline]
80    pub fn get(&self, field_idx: usize, record_idx: usize) -> &str {
81        &self.columns[field_idx][record_idx]
82    }
83
84    pub fn len(&self) -> usize {
85        self.ids.len()
86    }
87
88    pub fn is_empty(&self) -> bool {
89        self.ids.is_empty()
90    }
91}
92
93fn field_value_to_string(v: Option<&FieldValue>) -> String {
94    match v {
95        Some(FieldValue::Text(s))   => s.clone(),
96        Some(FieldValue::Int(i))    => i.to_string(),
97        Some(FieldValue::UInt(u))   => u.to_string(),
98        Some(FieldValue::Float(f))  => f.to_string(),
99        Some(FieldValue::Bool(b))   => b.to_string(),
100        Some(FieldValue::Bytes(_))  => String::new(),
101        Some(FieldValue::Null) | None => String::new(),
102    }
103}
104
105#[cfg(test)]
106mod tests {
107    use crate::{
108        record::FieldValue,
109        schema::{FieldKind, SchemaBuilder},
110    };
111
112    use super::*;
113
114    fn person_schema() -> Schema {
115        SchemaBuilder::new()
116            .field("naam", FieldKind::Name)
117            .field("dob",  FieldKind::Date)
118            .build()
119            .unwrap()
120    }
121
122    #[test]
123    fn pool_from_records_stores_in_column_order() {
124        let schema = person_schema();
125        let records = vec![
126            Record::new(1)
127                .insert("naam", FieldValue::Text("Alice".into()))
128                .insert("dob",  FieldValue::Text("1990-01-01".into())),
129            Record::new(2)
130                .insert("naam", FieldValue::Text("Bob".into()))
131                .insert("dob",  FieldValue::Text("1985-06-15".into())),
132        ];
133        let pool = RecordPool::from_records(&records, &schema);
134
135        assert_eq!(pool.len(), 2);
136        assert_eq!(pool.ids,            vec![1, 2]);
137        assert_eq!(pool.get(0, 0), "Alice");
138        assert_eq!(pool.get(0, 1), "Bob");
139        assert_eq!(pool.get(1, 0), "1990-01-01");
140        assert_eq!(pool.get(1, 1), "1985-06-15");
141    }
142
143    #[test]
144    fn pool_missing_field_is_empty_string() {
145        let schema = person_schema();
146        // Record has naam but no dob
147        let r = Record::new(1).insert("naam", FieldValue::Text("Alice".into()));
148        let pool = RecordPool::from_records(&[r], &schema);
149        assert_eq!(pool.get(0, 0), "Alice");
150        assert_eq!(pool.get(1, 0), "");
151    }
152
153    #[test]
154    fn pool_null_field_is_empty_string() {
155        let schema = person_schema();
156        let r = Record::new(1).insert("naam", FieldValue::Null);
157        let pool = RecordPool::from_records(&[r], &schema);
158        assert_eq!(pool.get(0, 0), "");
159    }
160
161    #[test]
162    fn pool_push_incremental() {
163        let schema = person_schema();
164        let mut pool = RecordPool::new(schema.fields.len());
165
166        pool.push(
167            &Record::new(10).insert("naam", FieldValue::Text("X".into())),
168            &schema,
169        );
170        pool.push(
171            &Record::new(20).insert("naam", FieldValue::Text("Y".into())),
172            &schema,
173        );
174
175        assert_eq!(pool.len(), 2);
176        assert_eq!(pool.ids, vec![10, 20]);
177        assert_eq!(pool.get(0, 0), "X");
178        assert_eq!(pool.get(0, 1), "Y");
179    }
180}