Skip to main content

grafeo_core/execution/
factorized_vector.rs

1//! Factorized vector for avoiding Cartesian product materialization.
2//!
3//! A `FactorizedVector` can represent data in two states:
4//! - **Flat**: One value per logical row (same as `ValueVector`)
5//! - **Unflat**: Multiple values per parent row, with offset arrays tracking boundaries
6//!
7//! This enables multi-hop graph traversals without duplicating source columns.
8//!
9//! # Example
10//!
11//! For a 2-hop traversal where node A has neighbors `[B1, B2]` and node A' has neighbor `[B3]`:
12//!
13//! ```text
14//! Flat representation (current):
15//!   Row 0: (A, B1)
16//!   Row 1: (A, B2)
17//!   Row 2: (A', B3)
18//!   -> 3 rows, A duplicated twice
19//!
20//! Factorized representation:
21//!   Level 0 (flat): [A, A']           (2 values)
22//!   Level 1 (unflat): [B1, B2, B3]    (3 values)
23//!   Offsets: [0, 2, 3]                (A's neighbors at 0..2, A's at 2..3)
24//!   -> 5 values total, no duplication
25//! ```
26
27use grafeo_common::types::{LogicalType, Value};
28
29use super::vector::ValueVector;
30
31/// A vector that can represent nested/repeated values without duplication.
32///
33/// In flat mode, this behaves like a regular `ValueVector` - one value per logical row.
34/// In unflat mode, values are grouped by parent, with offset arrays tracking boundaries.
35#[derive(Debug, Clone)]
36pub struct FactorizedVector {
37    /// The underlying data storage.
38    data: ValueVector,
39    /// The factorization state.
40    state: FactorizedState,
41}
42
43/// The factorization state of a vector.
44#[derive(Debug, Clone)]
45pub enum FactorizedState {
46    /// Flat: one value per logical row. `data.len() == logical_row_count`.
47    Flat,
48    /// Unflat: multiple values per logical row, grouped by parent.
49    Unflat(UnflatMetadata),
50}
51
52/// Metadata for unflat vectors.
53///
54/// Tracks how values are grouped by their parent rows using an offset array.
55#[derive(Debug, Clone)]
56pub struct UnflatMetadata {
57    /// Offset array: `offsets[i]` is the start index in data for parent `i`.
58    /// Length is `parent_count + 1`, where the last element equals `data.len()`.
59    offsets: Vec<u32>,
60    /// Number of parent rows this vector is grouped by.
61    parent_count: usize,
62}
63
64impl FactorizedVector {
65    /// Creates a new flat factorized vector from a `ValueVector`.
66    ///
67    /// In flat mode, there's one value per logical row.
68    #[must_use]
69    pub fn flat(data: ValueVector) -> Self {
70        Self {
71            data,
72            state: FactorizedState::Flat,
73        }
74    }
75
76    /// Creates a new unflat factorized vector.
77    ///
78    /// # Arguments
79    ///
80    /// * `data` - The underlying values
81    /// * `offsets` - Offset array where `offsets[i]` is the start index for parent `i`.
82    ///   Must have length `parent_count + 1`.
83    /// * `parent_count` - Number of parent rows
84    ///
85    /// # Panics
86    ///
87    /// Panics if `offsets.len() != parent_count + 1` or if offsets are invalid.
88    #[must_use]
89    pub fn unflat(data: ValueVector, offsets: Vec<u32>, parent_count: usize) -> Self {
90        debug_assert_eq!(
91            offsets.len(),
92            parent_count + 1,
93            "offsets must have length parent_count + 1"
94        );
95        debug_assert!(
96            offsets.last().copied() == Some(data.len() as u32),
97            "last offset must equal data length"
98        );
99
100        Self {
101            data,
102            state: FactorizedState::Unflat(UnflatMetadata {
103                offsets,
104                parent_count,
105            }),
106        }
107    }
108
109    /// Creates an empty flat factorized vector with the given type.
110    #[must_use]
111    pub fn empty(data_type: LogicalType) -> Self {
112        Self::flat(ValueVector::with_type(data_type))
113    }
114
115    /// Returns true if this vector is in flat state.
116    #[must_use]
117    pub fn is_flat(&self) -> bool {
118        matches!(self.state, FactorizedState::Flat)
119    }
120
121    /// Returns true if this vector is in unflat state.
122    #[must_use]
123    pub fn is_unflat(&self) -> bool {
124        matches!(self.state, FactorizedState::Unflat(_))
125    }
126
127    /// Returns the number of physical values stored.
128    ///
129    /// This is the actual storage size, not the logical row count.
130    #[must_use]
131    pub fn physical_len(&self) -> usize {
132        self.data.len()
133    }
134
135    /// Returns the logical type of the underlying data.
136    #[must_use]
137    pub fn data_type(&self) -> LogicalType {
138        self.data.logical_type()
139    }
140
141    /// Returns a reference to the underlying `ValueVector`.
142    #[must_use]
143    pub fn data(&self) -> &ValueVector {
144        &self.data
145    }
146
147    /// Returns a mutable reference to the underlying `ValueVector`.
148    ///
149    /// Use with caution - modifying the data directly may invalidate offsets.
150    pub fn data_mut(&mut self) -> &mut ValueVector {
151        &mut self.data
152    }
153
154    /// Returns the offset array for unflat vectors, or `None` for flat vectors.
155    #[must_use]
156    pub fn offsets(&self) -> Option<&[u32]> {
157        match &self.state {
158            FactorizedState::Flat => None,
159            FactorizedState::Unflat(meta) => Some(&meta.offsets),
160        }
161    }
162
163    /// Returns the parent count for unflat vectors.
164    ///
165    /// For flat vectors, returns the data length (each value is its own parent).
166    #[must_use]
167    pub fn parent_count(&self) -> usize {
168        match &self.state {
169            FactorizedState::Flat => self.data.len(),
170            FactorizedState::Unflat(meta) => meta.parent_count,
171        }
172    }
173
174    /// Gets the count of values for a specific parent index.
175    ///
176    /// For flat vectors, always returns 1.
177    #[must_use]
178    pub fn count_for_parent(&self, parent_idx: usize) -> usize {
179        match &self.state {
180            FactorizedState::Flat => 1,
181            FactorizedState::Unflat(meta) => {
182                if parent_idx >= meta.parent_count {
183                    return 0;
184                }
185                let start = meta.offsets[parent_idx] as usize;
186                let end = meta.offsets[parent_idx + 1] as usize;
187                end - start
188            }
189        }
190    }
191
192    /// Gets the start and end indices for a specific parent.
193    ///
194    /// Returns `(start, end)` where values for this parent are at indices `start..end`.
195    #[must_use]
196    pub fn range_for_parent(&self, parent_idx: usize) -> (usize, usize) {
197        match &self.state {
198            FactorizedState::Flat => (parent_idx, parent_idx + 1),
199            FactorizedState::Unflat(meta) => {
200                if parent_idx >= meta.parent_count {
201                    return (0, 0);
202                }
203                let start = meta.offsets[parent_idx] as usize;
204                let end = meta.offsets[parent_idx + 1] as usize;
205                (start, end)
206            }
207        }
208    }
209
210    /// Gets a value at a physical index.
211    #[must_use]
212    pub fn get_physical(&self, physical_idx: usize) -> Option<Value> {
213        self.data.get_value(physical_idx)
214    }
215
216    /// Gets a NodeId at a physical index.
217    #[must_use]
218    pub fn get_node_id_physical(
219        &self,
220        physical_idx: usize,
221    ) -> Option<grafeo_common::types::NodeId> {
222        self.data.get_node_id(physical_idx)
223    }
224
225    /// Gets an EdgeId at a physical index.
226    #[must_use]
227    pub fn get_edge_id_physical(
228        &self,
229        physical_idx: usize,
230    ) -> Option<grafeo_common::types::EdgeId> {
231        self.data.get_edge_id(physical_idx)
232    }
233
234    /// Gets a value for a parent at a relative offset within that parent's values.
235    ///
236    /// For flat vectors, `relative_idx` must be 0.
237    #[must_use]
238    pub fn get_for_parent(&self, parent_idx: usize, relative_idx: usize) -> Option<Value> {
239        let (start, end) = self.range_for_parent(parent_idx);
240        let physical_idx = start + relative_idx;
241        if physical_idx >= end {
242            return None;
243        }
244        self.data.get_value(physical_idx)
245    }
246
247    /// Computes the total logical row count given parent multiplicities.
248    ///
249    /// For a flat vector at level 0, this is just `data.len()`.
250    /// For an unflat vector, this is the sum of values for each parent,
251    /// multiplied by that parent's multiplicity.
252    ///
253    /// # Arguments
254    ///
255    /// * `parent_multiplicities` - For each parent index, how many logical rows it represents.
256    ///   If `None`, each parent represents 1 logical row.
257    #[must_use]
258    pub fn logical_row_count(&self, parent_multiplicities: Option<&[usize]>) -> usize {
259        match &self.state {
260            FactorizedState::Flat => self.data.len(),
261            FactorizedState::Unflat(meta) => {
262                let mut total = 0;
263                for i in 0..meta.parent_count {
264                    let count = self.count_for_parent(i);
265                    let mult = parent_multiplicities.map_or(1, |m| m.get(i).copied().unwrap_or(1));
266                    total += count * mult;
267                }
268                total
269            }
270        }
271    }
272
273    /// Flattens this vector to a regular `ValueVector`.
274    ///
275    /// For flat vectors, returns a clone of the underlying data.
276    /// For unflat vectors, duplicates values according to parent multiplicities.
277    ///
278    /// # Arguments
279    ///
280    /// * `parent_multiplicities` - For each parent, how many times to repeat its values.
281    ///   If `None`, each parent's values appear once.
282    #[must_use]
283    pub fn flatten(&self, parent_multiplicities: Option<&[usize]>) -> ValueVector {
284        match &self.state {
285            FactorizedState::Flat => {
286                // If multiplicities are provided, we need to duplicate values
287                if let Some(mults) = parent_multiplicities {
288                    let capacity = mults.iter().sum();
289                    let mut result = ValueVector::with_capacity(self.data.logical_type(), capacity);
290                    for (i, &mult) in mults.iter().enumerate() {
291                        if let Some(value) = self.data.get_value(i) {
292                            for _ in 0..mult {
293                                result.push_value(value.clone());
294                            }
295                        }
296                    }
297                    result
298                } else {
299                    self.data.clone()
300                }
301            }
302            FactorizedState::Unflat(meta) => {
303                let capacity = self.logical_row_count(parent_multiplicities);
304                let mut result =
305                    ValueVector::with_capacity(self.data.logical_type(), capacity.max(1));
306
307                for parent_idx in 0..meta.parent_count {
308                    let mult = parent_multiplicities
309                        .map_or(1, |m| m.get(parent_idx).copied().unwrap_or(1));
310                    let (start, end) = self.range_for_parent(parent_idx);
311
312                    for _ in 0..mult {
313                        for phys_idx in start..end {
314                            if let Some(value) = self.data.get_value(phys_idx) {
315                                result.push_value(value);
316                            }
317                        }
318                    }
319                }
320                result
321            }
322        }
323    }
324
325    /// Creates an iterator over (parent_idx, physical_idx, value) tuples.
326    pub fn iter_with_parent(&self) -> impl Iterator<Item = (usize, usize, Value)> + '_ {
327        FactorizedVectorIter {
328            vector: self,
329            parent_idx: 0,
330            physical_idx: 0,
331        }
332    }
333}
334
335/// Iterator over factorized vector values with parent indices.
336struct FactorizedVectorIter<'a> {
337    vector: &'a FactorizedVector,
338    parent_idx: usize,
339    physical_idx: usize,
340}
341
342impl Iterator for FactorizedVectorIter<'_> {
343    type Item = (usize, usize, Value);
344
345    fn next(&mut self) -> Option<Self::Item> {
346        match &self.vector.state {
347            FactorizedState::Flat => {
348                if self.physical_idx >= self.vector.data.len() {
349                    return None;
350                }
351                let value = self.vector.data.get_value(self.physical_idx)?;
352                let result = (self.physical_idx, self.physical_idx, value);
353                self.physical_idx += 1;
354                self.parent_idx += 1;
355                Some(result)
356            }
357            FactorizedState::Unflat(meta) => {
358                // Skip empty parents
359                while self.parent_idx < meta.parent_count {
360                    let (_start, end) = self.vector.range_for_parent(self.parent_idx);
361                    if self.physical_idx < end {
362                        let value = self.vector.data.get_value(self.physical_idx)?;
363                        let result = (self.parent_idx, self.physical_idx, value);
364                        self.physical_idx += 1;
365                        return Some(result);
366                    }
367                    self.parent_idx += 1;
368                    if self.parent_idx < meta.parent_count {
369                        let (new_start, _) = self.vector.range_for_parent(self.parent_idx);
370                        self.physical_idx = new_start;
371                    }
372                }
373                None
374            }
375        }
376    }
377}
378
379#[cfg(test)]
380mod tests {
381    use grafeo_common::types::{EdgeId, NodeId};
382
383    use super::*;
384
385    #[test]
386    fn test_flat_vector() {
387        let mut data = ValueVector::with_type(LogicalType::Int64);
388        data.push_int64(1);
389        data.push_int64(2);
390        data.push_int64(3);
391
392        let vec = FactorizedVector::flat(data);
393
394        assert!(vec.is_flat());
395        assert!(!vec.is_unflat());
396        assert_eq!(vec.physical_len(), 3);
397        assert_eq!(vec.parent_count(), 3);
398        assert_eq!(vec.count_for_parent(0), 1);
399        assert_eq!(vec.count_for_parent(1), 1);
400        assert_eq!(vec.count_for_parent(2), 1);
401    }
402
403    #[test]
404    fn test_unflat_vector() {
405        // Create neighbors: parent 0 has [10, 20], parent 1 has [30]
406        let mut data = ValueVector::with_type(LogicalType::Int64);
407        data.push_int64(10);
408        data.push_int64(20);
409        data.push_int64(30);
410
411        let offsets = vec![0, 2, 3]; // parent 0: 0..2, parent 1: 2..3
412        let vec = FactorizedVector::unflat(data, offsets, 2);
413
414        assert!(!vec.is_flat());
415        assert!(vec.is_unflat());
416        assert_eq!(vec.physical_len(), 3);
417        assert_eq!(vec.parent_count(), 2);
418        assert_eq!(vec.count_for_parent(0), 2);
419        assert_eq!(vec.count_for_parent(1), 1);
420        assert_eq!(vec.range_for_parent(0), (0, 2));
421        assert_eq!(vec.range_for_parent(1), (2, 3));
422    }
423
424    #[test]
425    fn test_empty_vector() {
426        let vec = FactorizedVector::empty(LogicalType::Int64);
427
428        assert!(vec.is_flat());
429        assert_eq!(vec.physical_len(), 0);
430        assert_eq!(vec.data_type(), LogicalType::Int64);
431    }
432
433    #[test]
434    fn test_data_type() {
435        let mut data = ValueVector::with_type(LogicalType::String);
436        data.push_string("hello");
437
438        let vec = FactorizedVector::flat(data);
439        assert_eq!(vec.data_type(), LogicalType::String);
440    }
441
442    #[test]
443    fn test_data_access() {
444        let mut data = ValueVector::with_type(LogicalType::Int64);
445        data.push_int64(42);
446
447        let vec = FactorizedVector::flat(data);
448
449        // Test immutable access
450        assert_eq!(vec.data().len(), 1);
451        assert_eq!(vec.data().get_int64(0), Some(42));
452    }
453
454    #[test]
455    fn test_data_mut() {
456        let mut data = ValueVector::with_type(LogicalType::Int64);
457        data.push_int64(42);
458
459        let mut vec = FactorizedVector::flat(data);
460
461        // Modify via mutable access
462        vec.data_mut().push_int64(100);
463        assert_eq!(vec.physical_len(), 2);
464    }
465
466    #[test]
467    fn test_offsets_flat() {
468        let mut data = ValueVector::with_type(LogicalType::Int64);
469        data.push_int64(1);
470
471        let vec = FactorizedVector::flat(data);
472        assert!(vec.offsets().is_none());
473    }
474
475    #[test]
476    fn test_offsets_unflat() {
477        let mut data = ValueVector::with_type(LogicalType::Int64);
478        data.push_int64(10);
479        data.push_int64(20);
480        data.push_int64(30);
481
482        let offsets = vec![0, 2, 3];
483        let vec = FactorizedVector::unflat(data, offsets.clone(), 2);
484
485        assert_eq!(vec.offsets(), Some(offsets.as_slice()));
486    }
487
488    #[test]
489    fn test_get_physical() {
490        let mut data = ValueVector::with_type(LogicalType::Int64);
491        data.push_int64(10);
492        data.push_int64(20);
493
494        let vec = FactorizedVector::flat(data);
495
496        assert_eq!(vec.get_physical(0), Some(Value::Int64(10)));
497        assert_eq!(vec.get_physical(1), Some(Value::Int64(20)));
498        assert_eq!(vec.get_physical(2), None);
499    }
500
501    #[test]
502    fn test_get_node_id_physical() {
503        let mut data = ValueVector::with_type(LogicalType::Node);
504        data.push_node_id(NodeId::new(100));
505        data.push_node_id(NodeId::new(200));
506
507        let vec = FactorizedVector::flat(data);
508
509        assert_eq!(vec.get_node_id_physical(0), Some(NodeId::new(100)));
510        assert_eq!(vec.get_node_id_physical(1), Some(NodeId::new(200)));
511        assert_eq!(vec.get_node_id_physical(2), None);
512    }
513
514    #[test]
515    fn test_get_edge_id_physical() {
516        let mut data = ValueVector::with_type(LogicalType::Edge);
517        data.push_edge_id(EdgeId::new(100));
518        data.push_edge_id(EdgeId::new(200));
519
520        let vec = FactorizedVector::flat(data);
521
522        assert_eq!(vec.get_edge_id_physical(0), Some(EdgeId::new(100)));
523        assert_eq!(vec.get_edge_id_physical(1), Some(EdgeId::new(200)));
524        assert_eq!(vec.get_edge_id_physical(2), None);
525    }
526
527    #[test]
528    fn test_range_for_parent_flat() {
529        let mut data = ValueVector::with_type(LogicalType::Int64);
530        data.push_int64(1);
531        data.push_int64(2);
532        data.push_int64(3);
533
534        let vec = FactorizedVector::flat(data);
535
536        assert_eq!(vec.range_for_parent(0), (0, 1));
537        assert_eq!(vec.range_for_parent(1), (1, 2));
538        assert_eq!(vec.range_for_parent(2), (2, 3));
539    }
540
541    #[test]
542    fn test_range_for_parent_out_of_bounds() {
543        let mut data = ValueVector::with_type(LogicalType::Int64);
544        data.push_int64(10);
545        data.push_int64(20);
546
547        let offsets = vec![0, 2];
548        let vec = FactorizedVector::unflat(data, offsets, 1);
549
550        // Out of bounds should return (0, 0)
551        assert_eq!(vec.range_for_parent(10), (0, 0));
552    }
553
554    #[test]
555    fn test_count_for_parent_out_of_bounds() {
556        let mut data = ValueVector::with_type(LogicalType::Int64);
557        data.push_int64(10);
558        data.push_int64(20);
559
560        let offsets = vec![0, 2];
561        let vec = FactorizedVector::unflat(data, offsets, 1);
562
563        // Out of bounds should return 0
564        assert_eq!(vec.count_for_parent(10), 0);
565    }
566
567    #[test]
568    fn test_get_for_parent() {
569        let mut data = ValueVector::with_type(LogicalType::Int64);
570        data.push_int64(10);
571        data.push_int64(20);
572        data.push_int64(30);
573
574        let offsets = vec![0, 2, 3];
575        let vec = FactorizedVector::unflat(data, offsets, 2);
576
577        // Parent 0's values
578        assert_eq!(vec.get_for_parent(0, 0), Some(Value::Int64(10)));
579        assert_eq!(vec.get_for_parent(0, 1), Some(Value::Int64(20)));
580        assert_eq!(vec.get_for_parent(0, 2), None); // Out of range
581
582        // Parent 1's values
583        assert_eq!(vec.get_for_parent(1, 0), Some(Value::Int64(30)));
584        assert_eq!(vec.get_for_parent(1, 1), None); // Out of range
585    }
586
587    #[test]
588    fn test_get_for_parent_flat() {
589        let mut data = ValueVector::with_type(LogicalType::Int64);
590        data.push_int64(10);
591        data.push_int64(20);
592
593        let vec = FactorizedVector::flat(data);
594
595        assert_eq!(vec.get_for_parent(0, 0), Some(Value::Int64(10)));
596        assert_eq!(vec.get_for_parent(1, 0), Some(Value::Int64(20)));
597        assert_eq!(vec.get_for_parent(0, 1), None); // Only one value per parent in flat mode
598    }
599
600    #[test]
601    fn test_flatten_flat_no_mults() {
602        let mut data = ValueVector::with_type(LogicalType::Int64);
603        data.push_int64(1);
604        data.push_int64(2);
605        data.push_int64(3);
606
607        let vec = FactorizedVector::flat(data);
608        let flat = vec.flatten(None);
609
610        assert_eq!(flat.len(), 3);
611        assert_eq!(flat.get_int64(0), Some(1));
612        assert_eq!(flat.get_int64(2), Some(3));
613    }
614
615    #[test]
616    fn test_flatten_flat_with_mults() {
617        let mut data = ValueVector::with_type(LogicalType::Int64);
618        data.push_int64(10);
619        data.push_int64(20);
620
621        let vec = FactorizedVector::flat(data);
622
623        // Duplicate first value 3 times, second value 2 times
624        let mults = [3, 2];
625        let flat = vec.flatten(Some(&mults));
626
627        assert_eq!(flat.len(), 5);
628        assert_eq!(flat.get_int64(0), Some(10));
629        assert_eq!(flat.get_int64(1), Some(10));
630        assert_eq!(flat.get_int64(2), Some(10));
631        assert_eq!(flat.get_int64(3), Some(20));
632        assert_eq!(flat.get_int64(4), Some(20));
633    }
634
635    #[test]
636    fn test_flatten_unflat() {
637        let mut data = ValueVector::with_type(LogicalType::Int64);
638        data.push_int64(10);
639        data.push_int64(20);
640        data.push_int64(30);
641
642        let offsets = vec![0, 2, 3];
643        let vec = FactorizedVector::unflat(data, offsets, 2);
644
645        // Flatten without multiplicities
646        let flat = vec.flatten(None);
647        assert_eq!(flat.len(), 3);
648        assert_eq!(flat.get_int64(0), Some(10));
649        assert_eq!(flat.get_int64(1), Some(20));
650        assert_eq!(flat.get_int64(2), Some(30));
651    }
652
653    #[test]
654    fn test_flatten_with_multiplicities() {
655        let mut data = ValueVector::with_type(LogicalType::Int64);
656        data.push_int64(10);
657        data.push_int64(20);
658        data.push_int64(30);
659
660        let offsets = vec![0, 2, 3];
661        let vec = FactorizedVector::unflat(data, offsets, 2);
662
663        // Parent 0 has mult 2, parent 1 has mult 1
664        // So: [10,20], [10,20], [30]
665        let mults = [2, 1];
666        let flat = vec.flatten(Some(&mults));
667
668        assert_eq!(flat.len(), 5);
669        assert_eq!(flat.get_int64(0), Some(10));
670        assert_eq!(flat.get_int64(1), Some(20));
671        assert_eq!(flat.get_int64(2), Some(10));
672        assert_eq!(flat.get_int64(3), Some(20));
673        assert_eq!(flat.get_int64(4), Some(30));
674    }
675
676    #[test]
677    fn test_logical_row_count_flat() {
678        let mut data = ValueVector::with_type(LogicalType::Int64);
679        data.push_int64(1);
680        data.push_int64(2);
681        data.push_int64(3);
682
683        let vec = FactorizedVector::flat(data);
684
685        assert_eq!(vec.logical_row_count(None), 3);
686    }
687
688    #[test]
689    fn test_logical_row_count() {
690        let mut data = ValueVector::with_type(LogicalType::Int64);
691        data.push_int64(10);
692        data.push_int64(20);
693        data.push_int64(30);
694
695        let offsets = vec![0, 2, 3];
696        let vec = FactorizedVector::unflat(data, offsets, 2);
697
698        // Without multiplicities: 2 + 1 = 3
699        assert_eq!(vec.logical_row_count(None), 3);
700
701        // With multiplicities [2, 3]: 2*2 + 1*3 = 7
702        let mults = [2, 3];
703        assert_eq!(vec.logical_row_count(Some(&mults)), 7);
704    }
705
706    #[test]
707    fn test_logical_row_count_missing_mult() {
708        let mut data = ValueVector::with_type(LogicalType::Int64);
709        data.push_int64(10);
710        data.push_int64(20);
711        data.push_int64(30);
712
713        let offsets = vec![0, 2, 3];
714        let vec = FactorizedVector::unflat(data, offsets, 2);
715
716        // Only provide one multiplicity - second should default to 1
717        let mults = [2];
718        assert_eq!(vec.logical_row_count(Some(&mults)), 5); // 2*2 + 1*1 = 5
719    }
720
721    #[test]
722    fn test_iter_with_parent_flat() {
723        let mut data = ValueVector::with_type(LogicalType::Int64);
724        data.push_int64(1);
725        data.push_int64(2);
726
727        let vec = FactorizedVector::flat(data);
728        let items: Vec<_> = vec.iter_with_parent().collect();
729
730        assert_eq!(items.len(), 2);
731        assert_eq!(items[0], (0, 0, Value::Int64(1)));
732        assert_eq!(items[1], (1, 1, Value::Int64(2)));
733    }
734
735    #[test]
736    fn test_iter_with_parent_unflat() {
737        let mut data = ValueVector::with_type(LogicalType::Int64);
738        data.push_int64(10);
739        data.push_int64(20);
740        data.push_int64(30);
741
742        let offsets = vec![0, 2, 3];
743        let vec = FactorizedVector::unflat(data, offsets, 2);
744        let items: Vec<_> = vec.iter_with_parent().collect();
745
746        assert_eq!(items.len(), 3);
747        assert_eq!(items[0], (0, 0, Value::Int64(10)));
748        assert_eq!(items[1], (0, 1, Value::Int64(20)));
749        assert_eq!(items[2], (1, 2, Value::Int64(30)));
750    }
751
752    #[test]
753    fn test_iter_with_parent_empty_parents() {
754        // Parent 0 has [], parent 1 has [10, 20], parent 2 has []
755        let mut data = ValueVector::with_type(LogicalType::Int64);
756        data.push_int64(10);
757        data.push_int64(20);
758
759        let offsets = vec![0, 0, 2, 2];
760        let vec = FactorizedVector::unflat(data, offsets, 3);
761        let items: Vec<_> = vec.iter_with_parent().collect();
762
763        // Should skip empty parents
764        assert_eq!(items.len(), 2);
765        assert_eq!(items[0], (1, 0, Value::Int64(10)));
766        assert_eq!(items[1], (1, 1, Value::Int64(20)));
767    }
768
769    #[test]
770    fn test_iter_with_parent_empty() {
771        let data = ValueVector::with_type(LogicalType::Int64);
772        let vec = FactorizedVector::flat(data);
773        let items: Vec<_> = vec.iter_with_parent().collect();
774
775        assert!(items.is_empty());
776    }
777
778    #[test]
779    fn test_empty_parents() {
780        // Parent 0 has [], parent 1 has [10, 20], parent 2 has []
781        let mut data = ValueVector::with_type(LogicalType::Int64);
782        data.push_int64(10);
783        data.push_int64(20);
784
785        let offsets = vec![0, 0, 2, 2]; // parent 0: empty, parent 1: 0..2, parent 2: empty
786        let vec = FactorizedVector::unflat(data, offsets, 3);
787
788        assert_eq!(vec.count_for_parent(0), 0);
789        assert_eq!(vec.count_for_parent(1), 2);
790        assert_eq!(vec.count_for_parent(2), 0);
791        assert_eq!(vec.logical_row_count(None), 2);
792    }
793
794    #[test]
795    fn test_node_id_vector() {
796        let mut data = ValueVector::with_type(LogicalType::Node);
797        data.push_node_id(NodeId::new(100));
798        data.push_node_id(NodeId::new(200));
799        data.push_node_id(NodeId::new(300));
800
801        let offsets = vec![0, 2, 3];
802        let vec = FactorizedVector::unflat(data, offsets, 2);
803
804        assert_eq!(vec.count_for_parent(0), 2);
805        assert_eq!(vec.count_for_parent(1), 1);
806
807        // Check physical access
808        let flat = vec.flatten(None);
809        assert_eq!(flat.get_node_id(0), Some(NodeId::new(100)));
810        assert_eq!(flat.get_node_id(1), Some(NodeId::new(200)));
811        assert_eq!(flat.get_node_id(2), Some(NodeId::new(300)));
812    }
813
814    #[test]
815    fn test_factorized_state_debug() {
816        let state_flat = FactorizedState::Flat;
817        assert!(format!("{state_flat:?}").contains("Flat"));
818
819        let state_unflat = FactorizedState::Unflat(UnflatMetadata {
820            offsets: vec![0, 1],
821            parent_count: 1,
822        });
823        assert!(format!("{state_unflat:?}").contains("Unflat"));
824    }
825
826    #[test]
827    fn test_unflat_metadata_debug() {
828        let meta = UnflatMetadata {
829            offsets: vec![0, 2, 5],
830            parent_count: 2,
831        };
832        let debug = format!("{meta:?}");
833        assert!(debug.contains("offsets"));
834        assert!(debug.contains("parent_count"));
835    }
836
837    #[test]
838    fn test_factorized_vector_clone() {
839        let mut data = ValueVector::with_type(LogicalType::Int64);
840        data.push_int64(1);
841        data.push_int64(2);
842
843        let vec = FactorizedVector::flat(data);
844        let cloned = vec.clone();
845
846        assert_eq!(vec.physical_len(), cloned.physical_len());
847        assert_eq!(vec.is_flat(), cloned.is_flat());
848    }
849}