Skip to main content

grafeo_core/execution/
factorized_vector.rs

1//! Factorized vector for avoiding Cartesian product materialization.
2//!
3//! A `FactorizedVector` can represent data in two states:
4//! - **Flat**: One value per logical row (same as `ValueVector`)
5//! - **Unflat**: Multiple values per parent row, with offset arrays tracking boundaries
6//!
7//! This enables multi-hop graph traversals without duplicating source columns.
8//!
9//! # Example
10//!
11//! For a 2-hop traversal where node A has neighbors `[B1, B2]` and node A' has neighbor `[B3]`:
12//!
13//! ```text
14//! Flat representation (current):
15//!   Row 0: (A, B1)
16//!   Row 1: (A, B2)
17//!   Row 2: (A', B3)
18//!   -> 3 rows, A duplicated twice
19//!
20//! Factorized representation:
21//!   Level 0 (flat): [A, A']           (2 values)
22//!   Level 1 (unflat): [B1, B2, B3]    (3 values)
23//!   Offsets: [0, 2, 3]                (A's neighbors at 0..2, A's at 2..3)
24//!   -> 5 values total, no duplication
25//! ```
26
27use grafeo_common::types::{LogicalType, Value};
28
29use super::vector::ValueVector;
30
31/// A vector that can represent nested/repeated values without duplication.
32///
33/// In flat mode, this behaves like a regular `ValueVector` - one value per logical row.
34/// In unflat mode, values are grouped by parent, with offset arrays tracking boundaries.
35#[derive(Debug, Clone)]
36pub struct FactorizedVector {
37    /// The underlying data storage.
38    data: ValueVector,
39    /// The factorization state.
40    state: FactorizedState,
41}
42
43/// The factorization state of a vector.
44#[derive(Debug, Clone)]
45#[non_exhaustive]
46pub enum FactorizedState {
47    /// Flat: one value per logical row. `data.len() == logical_row_count`.
48    Flat,
49    /// Unflat: multiple values per logical row, grouped by parent.
50    Unflat(UnflatMetadata),
51}
52
53/// Metadata for unflat vectors.
54///
55/// Tracks how values are grouped by their parent rows using an offset array.
56#[derive(Debug, Clone)]
57pub struct UnflatMetadata {
58    /// Offset array: `offsets[i]` is the start index in data for parent `i`.
59    /// Length is `parent_count + 1`, where the last element equals `data.len()`.
60    offsets: Vec<u32>,
61    /// Number of parent rows this vector is grouped by.
62    parent_count: usize,
63}
64
65impl FactorizedVector {
66    /// Creates a new flat factorized vector from a `ValueVector`.
67    ///
68    /// In flat mode, there's one value per logical row.
69    #[must_use]
70    pub fn flat(data: ValueVector) -> Self {
71        Self {
72            data,
73            state: FactorizedState::Flat,
74        }
75    }
76
77    /// Creates a new unflat factorized vector.
78    ///
79    /// # Arguments
80    ///
81    /// * `data` - The underlying values
82    /// * `offsets` - Offset array where `offsets[i]` is the start index for parent `i`.
83    ///   Must have length `parent_count + 1`.
84    /// * `parent_count` - Number of parent rows
85    ///
86    /// # Panics
87    ///
88    /// Panics if `offsets.len() != parent_count + 1` or if offsets are invalid.
89    #[must_use]
90    pub fn unflat(data: ValueVector, offsets: Vec<u32>, parent_count: usize) -> Self {
91        debug_assert_eq!(
92            offsets.len(),
93            parent_count + 1,
94            "offsets must have length parent_count + 1"
95        );
96        debug_assert!(
97            offsets.last().copied() == Some(data.len() as u32),
98            "last offset must equal data length"
99        );
100
101        Self {
102            data,
103            state: FactorizedState::Unflat(UnflatMetadata {
104                offsets,
105                parent_count,
106            }),
107        }
108    }
109
110    /// Creates an empty flat factorized vector with the given type.
111    #[must_use]
112    pub fn empty(data_type: LogicalType) -> Self {
113        Self::flat(ValueVector::with_type(data_type))
114    }
115
116    /// Returns true if this vector is in flat state.
117    #[must_use]
118    pub fn is_flat(&self) -> bool {
119        matches!(self.state, FactorizedState::Flat)
120    }
121
122    /// Returns true if this vector is in unflat state.
123    #[must_use]
124    pub fn is_unflat(&self) -> bool {
125        matches!(self.state, FactorizedState::Unflat(_))
126    }
127
128    /// Returns the number of physical values stored.
129    ///
130    /// This is the actual storage size, not the logical row count.
131    #[must_use]
132    pub fn physical_len(&self) -> usize {
133        self.data.len()
134    }
135
136    /// Returns the logical type of the underlying data.
137    #[must_use]
138    pub fn data_type(&self) -> LogicalType {
139        self.data.logical_type()
140    }
141
142    /// Returns a reference to the underlying `ValueVector`.
143    #[must_use]
144    pub fn data(&self) -> &ValueVector {
145        &self.data
146    }
147
148    /// Returns a mutable reference to the underlying `ValueVector`.
149    ///
150    /// Use with caution - modifying the data directly may invalidate offsets.
151    pub fn data_mut(&mut self) -> &mut ValueVector {
152        &mut self.data
153    }
154
155    /// Returns the offset array for unflat vectors, or `None` for flat vectors.
156    #[must_use]
157    pub fn offsets(&self) -> Option<&[u32]> {
158        match &self.state {
159            FactorizedState::Flat => None,
160            FactorizedState::Unflat(meta) => Some(&meta.offsets),
161        }
162    }
163
164    /// Returns the parent count for unflat vectors.
165    ///
166    /// For flat vectors, returns the data length (each value is its own parent).
167    #[must_use]
168    pub fn parent_count(&self) -> usize {
169        match &self.state {
170            FactorizedState::Flat => self.data.len(),
171            FactorizedState::Unflat(meta) => meta.parent_count,
172        }
173    }
174
175    /// Gets the count of values for a specific parent index.
176    ///
177    /// For flat vectors, always returns 1.
178    #[must_use]
179    pub fn count_for_parent(&self, parent_idx: usize) -> usize {
180        match &self.state {
181            FactorizedState::Flat => 1,
182            FactorizedState::Unflat(meta) => {
183                if parent_idx >= meta.parent_count {
184                    return 0;
185                }
186                let start = meta.offsets[parent_idx] as usize;
187                let end = meta.offsets[parent_idx + 1] as usize;
188                end - start
189            }
190        }
191    }
192
193    /// Gets the start and end indices for a specific parent.
194    ///
195    /// Returns `(start, end)` where values for this parent are at indices `start..end`.
196    #[must_use]
197    pub fn range_for_parent(&self, parent_idx: usize) -> (usize, usize) {
198        match &self.state {
199            FactorizedState::Flat => (parent_idx, parent_idx + 1),
200            FactorizedState::Unflat(meta) => {
201                if parent_idx >= meta.parent_count {
202                    return (0, 0);
203                }
204                let start = meta.offsets[parent_idx] as usize;
205                let end = meta.offsets[parent_idx + 1] as usize;
206                (start, end)
207            }
208        }
209    }
210
211    /// Gets a value at a physical index.
212    #[must_use]
213    pub fn get_physical(&self, physical_idx: usize) -> Option<Value> {
214        self.data.get_value(physical_idx)
215    }
216
217    /// Gets a NodeId at a physical index.
218    #[must_use]
219    pub fn get_node_id_physical(
220        &self,
221        physical_idx: usize,
222    ) -> Option<grafeo_common::types::NodeId> {
223        self.data.get_node_id(physical_idx)
224    }
225
226    /// Gets an EdgeId at a physical index.
227    #[must_use]
228    pub fn get_edge_id_physical(
229        &self,
230        physical_idx: usize,
231    ) -> Option<grafeo_common::types::EdgeId> {
232        self.data.get_edge_id(physical_idx)
233    }
234
235    /// Gets a value for a parent at a relative offset within that parent's values.
236    ///
237    /// For flat vectors, `relative_idx` must be 0.
238    #[must_use]
239    pub fn get_for_parent(&self, parent_idx: usize, relative_idx: usize) -> Option<Value> {
240        let (start, end) = self.range_for_parent(parent_idx);
241        let physical_idx = start + relative_idx;
242        if physical_idx >= end {
243            return None;
244        }
245        self.data.get_value(physical_idx)
246    }
247
248    /// Computes the total logical row count given parent multiplicities.
249    ///
250    /// For a flat vector at level 0, this is just `data.len()`.
251    /// For an unflat vector, this is the sum of values for each parent,
252    /// multiplied by that parent's multiplicity.
253    ///
254    /// # Arguments
255    ///
256    /// * `parent_multiplicities` - For each parent index, how many logical rows it represents.
257    ///   If `None`, each parent represents 1 logical row.
258    #[must_use]
259    pub fn logical_row_count(&self, parent_multiplicities: Option<&[usize]>) -> usize {
260        match &self.state {
261            FactorizedState::Flat => self.data.len(),
262            FactorizedState::Unflat(meta) => {
263                let mut total = 0;
264                for i in 0..meta.parent_count {
265                    let count = self.count_for_parent(i);
266                    let mult = parent_multiplicities.map_or(1, |m| m.get(i).copied().unwrap_or(1));
267                    total += count * mult;
268                }
269                total
270            }
271        }
272    }
273
274    /// Flattens this vector to a regular `ValueVector`.
275    ///
276    /// For flat vectors, returns a clone of the underlying data.
277    /// For unflat vectors, duplicates values according to parent multiplicities.
278    ///
279    /// # Arguments
280    ///
281    /// * `parent_multiplicities` - For each parent, how many times to repeat its values.
282    ///   If `None`, each parent's values appear once.
283    #[must_use]
284    pub fn flatten(&self, parent_multiplicities: Option<&[usize]>) -> ValueVector {
285        match &self.state {
286            FactorizedState::Flat => {
287                // If multiplicities are provided, we need to duplicate values
288                if let Some(mults) = parent_multiplicities {
289                    let capacity = mults.iter().sum();
290                    let mut result = ValueVector::with_capacity(self.data.logical_type(), capacity);
291                    for (i, &mult) in mults.iter().enumerate() {
292                        if let Some(value) = self.data.get_value(i) {
293                            for _ in 0..mult {
294                                result.push_value(value.clone());
295                            }
296                        }
297                    }
298                    result
299                } else {
300                    self.data.clone()
301                }
302            }
303            FactorizedState::Unflat(meta) => {
304                let capacity = self.logical_row_count(parent_multiplicities);
305                let mut result =
306                    ValueVector::with_capacity(self.data.logical_type(), capacity.max(1));
307
308                for parent_idx in 0..meta.parent_count {
309                    let mult = parent_multiplicities
310                        .map_or(1, |m| m.get(parent_idx).copied().unwrap_or(1));
311                    let (start, end) = self.range_for_parent(parent_idx);
312
313                    for _ in 0..mult {
314                        for phys_idx in start..end {
315                            if let Some(value) = self.data.get_value(phys_idx) {
316                                result.push_value(value);
317                            }
318                        }
319                    }
320                }
321                result
322            }
323        }
324    }
325
326    /// Creates an iterator over (parent_idx, physical_idx, value) tuples.
327    pub fn iter_with_parent(&self) -> impl Iterator<Item = (usize, usize, Value)> + '_ {
328        FactorizedVectorIter {
329            vector: self,
330            parent_idx: 0,
331            physical_idx: 0,
332        }
333    }
334}
335
336/// Iterator over factorized vector values with parent indices.
337struct FactorizedVectorIter<'a> {
338    vector: &'a FactorizedVector,
339    parent_idx: usize,
340    physical_idx: usize,
341}
342
343impl Iterator for FactorizedVectorIter<'_> {
344    type Item = (usize, usize, Value);
345
346    fn next(&mut self) -> Option<Self::Item> {
347        match &self.vector.state {
348            FactorizedState::Flat => {
349                if self.physical_idx >= self.vector.data.len() {
350                    return None;
351                }
352                let value = self.vector.data.get_value(self.physical_idx)?;
353                let result = (self.physical_idx, self.physical_idx, value);
354                self.physical_idx += 1;
355                self.parent_idx += 1;
356                Some(result)
357            }
358            FactorizedState::Unflat(meta) => {
359                // Skip empty parents
360                while self.parent_idx < meta.parent_count {
361                    let (_start, end) = self.vector.range_for_parent(self.parent_idx);
362                    if self.physical_idx < end {
363                        let value = self.vector.data.get_value(self.physical_idx)?;
364                        let result = (self.parent_idx, self.physical_idx, value);
365                        self.physical_idx += 1;
366                        return Some(result);
367                    }
368                    self.parent_idx += 1;
369                    if self.parent_idx < meta.parent_count {
370                        let (new_start, _) = self.vector.range_for_parent(self.parent_idx);
371                        self.physical_idx = new_start;
372                    }
373                }
374                None
375            }
376        }
377    }
378}
379
380#[cfg(test)]
381mod tests {
382    use grafeo_common::types::{EdgeId, NodeId};
383
384    use super::*;
385
386    #[test]
387    fn test_flat_vector() {
388        let mut data = ValueVector::with_type(LogicalType::Int64);
389        data.push_int64(1);
390        data.push_int64(2);
391        data.push_int64(3);
392
393        let vec = FactorizedVector::flat(data);
394
395        assert!(vec.is_flat());
396        assert!(!vec.is_unflat());
397        assert_eq!(vec.physical_len(), 3);
398        assert_eq!(vec.parent_count(), 3);
399        assert_eq!(vec.count_for_parent(0), 1);
400        assert_eq!(vec.count_for_parent(1), 1);
401        assert_eq!(vec.count_for_parent(2), 1);
402    }
403
404    #[test]
405    fn test_unflat_vector() {
406        // Create neighbors: parent 0 has [10, 20], parent 1 has [30]
407        let mut data = ValueVector::with_type(LogicalType::Int64);
408        data.push_int64(10);
409        data.push_int64(20);
410        data.push_int64(30);
411
412        let offsets = vec![0, 2, 3]; // parent 0: 0..2, parent 1: 2..3
413        let vec = FactorizedVector::unflat(data, offsets, 2);
414
415        assert!(!vec.is_flat());
416        assert!(vec.is_unflat());
417        assert_eq!(vec.physical_len(), 3);
418        assert_eq!(vec.parent_count(), 2);
419        assert_eq!(vec.count_for_parent(0), 2);
420        assert_eq!(vec.count_for_parent(1), 1);
421        assert_eq!(vec.range_for_parent(0), (0, 2));
422        assert_eq!(vec.range_for_parent(1), (2, 3));
423    }
424
425    #[test]
426    fn test_empty_vector() {
427        let vec = FactorizedVector::empty(LogicalType::Int64);
428
429        assert!(vec.is_flat());
430        assert_eq!(vec.physical_len(), 0);
431        assert_eq!(vec.data_type(), LogicalType::Int64);
432    }
433
434    #[test]
435    fn test_data_type() {
436        let mut data = ValueVector::with_type(LogicalType::String);
437        data.push_string("hello");
438
439        let vec = FactorizedVector::flat(data);
440        assert_eq!(vec.data_type(), LogicalType::String);
441    }
442
443    #[test]
444    fn test_data_access() {
445        let mut data = ValueVector::with_type(LogicalType::Int64);
446        data.push_int64(42);
447
448        let vec = FactorizedVector::flat(data);
449
450        // Test immutable access
451        assert_eq!(vec.data().len(), 1);
452        assert_eq!(vec.data().get_int64(0), Some(42));
453    }
454
455    #[test]
456    fn test_data_mut() {
457        let mut data = ValueVector::with_type(LogicalType::Int64);
458        data.push_int64(42);
459
460        let mut vec = FactorizedVector::flat(data);
461
462        // Modify via mutable access
463        vec.data_mut().push_int64(100);
464        assert_eq!(vec.physical_len(), 2);
465    }
466
467    #[test]
468    fn test_offsets_flat() {
469        let mut data = ValueVector::with_type(LogicalType::Int64);
470        data.push_int64(1);
471
472        let vec = FactorizedVector::flat(data);
473        assert!(vec.offsets().is_none());
474    }
475
476    #[test]
477    fn test_offsets_unflat() {
478        let mut data = ValueVector::with_type(LogicalType::Int64);
479        data.push_int64(10);
480        data.push_int64(20);
481        data.push_int64(30);
482
483        let offsets = vec![0, 2, 3];
484        let vec = FactorizedVector::unflat(data, offsets.clone(), 2);
485
486        assert_eq!(vec.offsets(), Some(offsets.as_slice()));
487    }
488
489    #[test]
490    fn test_get_physical() {
491        let mut data = ValueVector::with_type(LogicalType::Int64);
492        data.push_int64(10);
493        data.push_int64(20);
494
495        let vec = FactorizedVector::flat(data);
496
497        assert_eq!(vec.get_physical(0), Some(Value::Int64(10)));
498        assert_eq!(vec.get_physical(1), Some(Value::Int64(20)));
499        assert_eq!(vec.get_physical(2), None);
500    }
501
502    #[test]
503    fn test_get_node_id_physical() {
504        let mut data = ValueVector::with_type(LogicalType::Node);
505        data.push_node_id(NodeId::new(100));
506        data.push_node_id(NodeId::new(200));
507
508        let vec = FactorizedVector::flat(data);
509
510        assert_eq!(vec.get_node_id_physical(0), Some(NodeId::new(100)));
511        assert_eq!(vec.get_node_id_physical(1), Some(NodeId::new(200)));
512        assert_eq!(vec.get_node_id_physical(2), None);
513    }
514
515    #[test]
516    fn test_get_edge_id_physical() {
517        let mut data = ValueVector::with_type(LogicalType::Edge);
518        data.push_edge_id(EdgeId::new(100));
519        data.push_edge_id(EdgeId::new(200));
520
521        let vec = FactorizedVector::flat(data);
522
523        assert_eq!(vec.get_edge_id_physical(0), Some(EdgeId::new(100)));
524        assert_eq!(vec.get_edge_id_physical(1), Some(EdgeId::new(200)));
525        assert_eq!(vec.get_edge_id_physical(2), None);
526    }
527
528    #[test]
529    fn test_range_for_parent_flat() {
530        let mut data = ValueVector::with_type(LogicalType::Int64);
531        data.push_int64(1);
532        data.push_int64(2);
533        data.push_int64(3);
534
535        let vec = FactorizedVector::flat(data);
536
537        assert_eq!(vec.range_for_parent(0), (0, 1));
538        assert_eq!(vec.range_for_parent(1), (1, 2));
539        assert_eq!(vec.range_for_parent(2), (2, 3));
540    }
541
542    #[test]
543    fn test_range_for_parent_out_of_bounds() {
544        let mut data = ValueVector::with_type(LogicalType::Int64);
545        data.push_int64(10);
546        data.push_int64(20);
547
548        let offsets = vec![0, 2];
549        let vec = FactorizedVector::unflat(data, offsets, 1);
550
551        // Out of bounds should return (0, 0)
552        assert_eq!(vec.range_for_parent(10), (0, 0));
553    }
554
555    #[test]
556    fn test_count_for_parent_out_of_bounds() {
557        let mut data = ValueVector::with_type(LogicalType::Int64);
558        data.push_int64(10);
559        data.push_int64(20);
560
561        let offsets = vec![0, 2];
562        let vec = FactorizedVector::unflat(data, offsets, 1);
563
564        // Out of bounds should return 0
565        assert_eq!(vec.count_for_parent(10), 0);
566    }
567
568    #[test]
569    fn test_get_for_parent() {
570        let mut data = ValueVector::with_type(LogicalType::Int64);
571        data.push_int64(10);
572        data.push_int64(20);
573        data.push_int64(30);
574
575        let offsets = vec![0, 2, 3];
576        let vec = FactorizedVector::unflat(data, offsets, 2);
577
578        // Parent 0's values
579        assert_eq!(vec.get_for_parent(0, 0), Some(Value::Int64(10)));
580        assert_eq!(vec.get_for_parent(0, 1), Some(Value::Int64(20)));
581        assert_eq!(vec.get_for_parent(0, 2), None); // Out of range
582
583        // Parent 1's values
584        assert_eq!(vec.get_for_parent(1, 0), Some(Value::Int64(30)));
585        assert_eq!(vec.get_for_parent(1, 1), None); // Out of range
586    }
587
588    #[test]
589    fn test_get_for_parent_flat() {
590        let mut data = ValueVector::with_type(LogicalType::Int64);
591        data.push_int64(10);
592        data.push_int64(20);
593
594        let vec = FactorizedVector::flat(data);
595
596        assert_eq!(vec.get_for_parent(0, 0), Some(Value::Int64(10)));
597        assert_eq!(vec.get_for_parent(1, 0), Some(Value::Int64(20)));
598        assert_eq!(vec.get_for_parent(0, 1), None); // Only one value per parent in flat mode
599    }
600
601    #[test]
602    fn test_flatten_flat_no_mults() {
603        let mut data = ValueVector::with_type(LogicalType::Int64);
604        data.push_int64(1);
605        data.push_int64(2);
606        data.push_int64(3);
607
608        let vec = FactorizedVector::flat(data);
609        let flat = vec.flatten(None);
610
611        assert_eq!(flat.len(), 3);
612        assert_eq!(flat.get_int64(0), Some(1));
613        assert_eq!(flat.get_int64(2), Some(3));
614    }
615
616    #[test]
617    fn test_flatten_flat_with_mults() {
618        let mut data = ValueVector::with_type(LogicalType::Int64);
619        data.push_int64(10);
620        data.push_int64(20);
621
622        let vec = FactorizedVector::flat(data);
623
624        // Duplicate first value 3 times, second value 2 times
625        let mults = [3, 2];
626        let flat = vec.flatten(Some(&mults));
627
628        assert_eq!(flat.len(), 5);
629        assert_eq!(flat.get_int64(0), Some(10));
630        assert_eq!(flat.get_int64(1), Some(10));
631        assert_eq!(flat.get_int64(2), Some(10));
632        assert_eq!(flat.get_int64(3), Some(20));
633        assert_eq!(flat.get_int64(4), Some(20));
634    }
635
636    #[test]
637    fn test_flatten_unflat() {
638        let mut data = ValueVector::with_type(LogicalType::Int64);
639        data.push_int64(10);
640        data.push_int64(20);
641        data.push_int64(30);
642
643        let offsets = vec![0, 2, 3];
644        let vec = FactorizedVector::unflat(data, offsets, 2);
645
646        // Flatten without multiplicities
647        let flat = vec.flatten(None);
648        assert_eq!(flat.len(), 3);
649        assert_eq!(flat.get_int64(0), Some(10));
650        assert_eq!(flat.get_int64(1), Some(20));
651        assert_eq!(flat.get_int64(2), Some(30));
652    }
653
654    #[test]
655    fn test_flatten_with_multiplicities() {
656        let mut data = ValueVector::with_type(LogicalType::Int64);
657        data.push_int64(10);
658        data.push_int64(20);
659        data.push_int64(30);
660
661        let offsets = vec![0, 2, 3];
662        let vec = FactorizedVector::unflat(data, offsets, 2);
663
664        // Parent 0 has mult 2, parent 1 has mult 1
665        // So: [10,20], [10,20], [30]
666        let mults = [2, 1];
667        let flat = vec.flatten(Some(&mults));
668
669        assert_eq!(flat.len(), 5);
670        assert_eq!(flat.get_int64(0), Some(10));
671        assert_eq!(flat.get_int64(1), Some(20));
672        assert_eq!(flat.get_int64(2), Some(10));
673        assert_eq!(flat.get_int64(3), Some(20));
674        assert_eq!(flat.get_int64(4), Some(30));
675    }
676
677    #[test]
678    fn test_logical_row_count_flat() {
679        let mut data = ValueVector::with_type(LogicalType::Int64);
680        data.push_int64(1);
681        data.push_int64(2);
682        data.push_int64(3);
683
684        let vec = FactorizedVector::flat(data);
685
686        assert_eq!(vec.logical_row_count(None), 3);
687    }
688
689    #[test]
690    fn test_logical_row_count() {
691        let mut data = ValueVector::with_type(LogicalType::Int64);
692        data.push_int64(10);
693        data.push_int64(20);
694        data.push_int64(30);
695
696        let offsets = vec![0, 2, 3];
697        let vec = FactorizedVector::unflat(data, offsets, 2);
698
699        // Without multiplicities: 2 + 1 = 3
700        assert_eq!(vec.logical_row_count(None), 3);
701
702        // With multiplicities [2, 3]: 2*2 + 1*3 = 7
703        let mults = [2, 3];
704        assert_eq!(vec.logical_row_count(Some(&mults)), 7);
705    }
706
707    #[test]
708    fn test_logical_row_count_missing_mult() {
709        let mut data = ValueVector::with_type(LogicalType::Int64);
710        data.push_int64(10);
711        data.push_int64(20);
712        data.push_int64(30);
713
714        let offsets = vec![0, 2, 3];
715        let vec = FactorizedVector::unflat(data, offsets, 2);
716
717        // Only provide one multiplicity - second should default to 1
718        let mults = [2];
719        assert_eq!(vec.logical_row_count(Some(&mults)), 5); // 2*2 + 1*1 = 5
720    }
721
722    #[test]
723    fn test_iter_with_parent_flat() {
724        let mut data = ValueVector::with_type(LogicalType::Int64);
725        data.push_int64(1);
726        data.push_int64(2);
727
728        let vec = FactorizedVector::flat(data);
729        let items: Vec<_> = vec.iter_with_parent().collect();
730
731        assert_eq!(items.len(), 2);
732        assert_eq!(items[0], (0, 0, Value::Int64(1)));
733        assert_eq!(items[1], (1, 1, Value::Int64(2)));
734    }
735
736    #[test]
737    fn test_iter_with_parent_unflat() {
738        let mut data = ValueVector::with_type(LogicalType::Int64);
739        data.push_int64(10);
740        data.push_int64(20);
741        data.push_int64(30);
742
743        let offsets = vec![0, 2, 3];
744        let vec = FactorizedVector::unflat(data, offsets, 2);
745        let items: Vec<_> = vec.iter_with_parent().collect();
746
747        assert_eq!(items.len(), 3);
748        assert_eq!(items[0], (0, 0, Value::Int64(10)));
749        assert_eq!(items[1], (0, 1, Value::Int64(20)));
750        assert_eq!(items[2], (1, 2, Value::Int64(30)));
751    }
752
753    #[test]
754    fn test_iter_with_parent_empty_parents() {
755        // Parent 0 has [], parent 1 has [10, 20], parent 2 has []
756        let mut data = ValueVector::with_type(LogicalType::Int64);
757        data.push_int64(10);
758        data.push_int64(20);
759
760        let offsets = vec![0, 0, 2, 2];
761        let vec = FactorizedVector::unflat(data, offsets, 3);
762        let items: Vec<_> = vec.iter_with_parent().collect();
763
764        // Should skip empty parents
765        assert_eq!(items.len(), 2);
766        assert_eq!(items[0], (1, 0, Value::Int64(10)));
767        assert_eq!(items[1], (1, 1, Value::Int64(20)));
768    }
769
770    #[test]
771    fn test_iter_with_parent_empty() {
772        let data = ValueVector::with_type(LogicalType::Int64);
773        let vec = FactorizedVector::flat(data);
774        let items: Vec<_> = vec.iter_with_parent().collect();
775
776        assert!(items.is_empty());
777    }
778
779    #[test]
780    fn test_empty_parents() {
781        // Parent 0 has [], parent 1 has [10, 20], parent 2 has []
782        let mut data = ValueVector::with_type(LogicalType::Int64);
783        data.push_int64(10);
784        data.push_int64(20);
785
786        let offsets = vec![0, 0, 2, 2]; // parent 0: empty, parent 1: 0..2, parent 2: empty
787        let vec = FactorizedVector::unflat(data, offsets, 3);
788
789        assert_eq!(vec.count_for_parent(0), 0);
790        assert_eq!(vec.count_for_parent(1), 2);
791        assert_eq!(vec.count_for_parent(2), 0);
792        assert_eq!(vec.logical_row_count(None), 2);
793    }
794
795    #[test]
796    fn test_node_id_vector() {
797        let mut data = ValueVector::with_type(LogicalType::Node);
798        data.push_node_id(NodeId::new(100));
799        data.push_node_id(NodeId::new(200));
800        data.push_node_id(NodeId::new(300));
801
802        let offsets = vec![0, 2, 3];
803        let vec = FactorizedVector::unflat(data, offsets, 2);
804
805        assert_eq!(vec.count_for_parent(0), 2);
806        assert_eq!(vec.count_for_parent(1), 1);
807
808        // Check physical access
809        let flat = vec.flatten(None);
810        assert_eq!(flat.get_node_id(0), Some(NodeId::new(100)));
811        assert_eq!(flat.get_node_id(1), Some(NodeId::new(200)));
812        assert_eq!(flat.get_node_id(2), Some(NodeId::new(300)));
813    }
814
815    #[test]
816    fn test_factorized_state_debug() {
817        let state_flat = FactorizedState::Flat;
818        assert!(format!("{state_flat:?}").contains("Flat"));
819
820        let state_unflat = FactorizedState::Unflat(UnflatMetadata {
821            offsets: vec![0, 1],
822            parent_count: 1,
823        });
824        assert!(format!("{state_unflat:?}").contains("Unflat"));
825    }
826
827    #[test]
828    fn test_unflat_metadata_debug() {
829        let meta = UnflatMetadata {
830            offsets: vec![0, 2, 5],
831            parent_count: 2,
832        };
833        let debug = format!("{meta:?}");
834        assert!(debug.contains("offsets"));
835        assert!(debug.contains("parent_count"));
836    }
837
838    #[test]
839    fn test_factorized_vector_clone() {
840        let mut data = ValueVector::with_type(LogicalType::Int64);
841        data.push_int64(1);
842        data.push_int64(2);
843
844        let vec = FactorizedVector::flat(data);
845        let cloned = vec.clone();
846
847        assert_eq!(vec.physical_len(), cloned.physical_len());
848        assert_eq!(vec.is_flat(), cloned.is_flat());
849    }
850}