Skip to main content

grafeo_core/execution/
factorized_vector.rs

1//! Factorized vector for avoiding Cartesian product materialization.
2//!
3//! A `FactorizedVector` can represent data in two states:
4//! - **Flat**: One value per logical row (same as `ValueVector`)
5//! - **Unflat**: Multiple values per parent row, with offset arrays tracking boundaries
6//!
7//! This enables multi-hop graph traversals without duplicating source columns.
8//!
9//! # Example
10//!
11//! For a 2-hop traversal where node A has neighbors `[B1, B2]` and node A' has neighbor `[B3]`:
12//!
13//! ```text
14//! Flat representation (current):
15//!   Row 0: (A, B1)
16//!   Row 1: (A, B2)
17//!   Row 2: (A', B3)
18//!   -> 3 rows, A duplicated twice
19//!
20//! Factorized representation:
21//!   Level 0 (flat): [A, A']           (2 values)
22//!   Level 1 (unflat): [B1, B2, B3]    (3 values)
23//!   Offsets: [0, 2, 3]                (A's neighbors at 0..2, A's at 2..3)
24//!   -> 5 values total, no duplication
25//! ```
26
27use grafeo_common::types::{LogicalType, Value};
28
29use super::vector::ValueVector;
30
31/// A vector that can represent nested/repeated values without duplication.
32///
33/// In flat mode, this behaves like a regular `ValueVector` - one value per logical row.
34/// In unflat mode, values are grouped by parent, with offset arrays tracking boundaries.
35#[derive(Debug, Clone)]
36pub struct FactorizedVector {
37    /// The underlying data storage.
38    data: ValueVector,
39    /// The factorization state.
40    state: FactorizedState,
41}
42
43/// The factorization state of a vector.
44#[derive(Debug, Clone)]
45#[non_exhaustive]
46pub enum FactorizedState {
47    /// Flat: one value per logical row. `data.len() == logical_row_count`.
48    Flat,
49    /// Unflat: multiple values per logical row, grouped by parent.
50    Unflat(UnflatMetadata),
51}
52
53/// Metadata for unflat vectors.
54///
55/// Tracks how values are grouped by their parent rows using an offset array.
56#[derive(Debug, Clone)]
57pub struct UnflatMetadata {
58    /// Offset array: `offsets[i]` is the start index in data for parent `i`.
59    /// Length is `parent_count + 1`, where the last element equals `data.len()`.
60    offsets: Vec<u32>,
61    /// Number of parent rows this vector is grouped by.
62    parent_count: usize,
63}
64
65impl FactorizedVector {
66    /// Creates a new flat factorized vector from a `ValueVector`.
67    ///
68    /// In flat mode, there's one value per logical row.
69    #[must_use]
70    pub fn flat(data: ValueVector) -> Self {
71        Self {
72            data,
73            state: FactorizedState::Flat,
74        }
75    }
76
77    /// Creates a new unflat factorized vector.
78    ///
79    /// # Arguments
80    ///
81    /// * `data` - The underlying values
82    /// * `offsets` - Offset array where `offsets[i]` is the start index for parent `i`.
83    ///   Must have length `parent_count + 1`.
84    /// * `parent_count` - Number of parent rows
85    ///
86    /// # Panics
87    ///
88    /// Panics if `offsets.len() != parent_count + 1` or if offsets are invalid.
89    #[must_use]
90    pub fn unflat(data: ValueVector, offsets: Vec<u32>, parent_count: usize) -> Self {
91        debug_assert_eq!(
92            offsets.len(),
93            parent_count + 1,
94            "offsets must have length parent_count + 1"
95        );
96        // reason: data length in factorized vectors is bounded by chunk size, fits u32
97        #[allow(clippy::cast_possible_truncation)]
98        let data_len_u32 = data.len() as u32;
99        debug_assert!(
100            offsets.last().copied() == Some(data_len_u32),
101            "last offset must equal data length"
102        );
103
104        Self {
105            data,
106            state: FactorizedState::Unflat(UnflatMetadata {
107                offsets,
108                parent_count,
109            }),
110        }
111    }
112
113    /// Creates an empty flat factorized vector with the given type.
114    #[must_use]
115    pub fn empty(data_type: LogicalType) -> Self {
116        Self::flat(ValueVector::with_type(data_type))
117    }
118
119    /// Returns true if this vector is in flat state.
120    #[must_use]
121    pub fn is_flat(&self) -> bool {
122        matches!(self.state, FactorizedState::Flat)
123    }
124
125    /// Returns true if this vector is in unflat state.
126    #[must_use]
127    pub fn is_unflat(&self) -> bool {
128        matches!(self.state, FactorizedState::Unflat(_))
129    }
130
131    /// Returns the number of physical values stored.
132    ///
133    /// This is the actual storage size, not the logical row count.
134    #[must_use]
135    pub fn physical_len(&self) -> usize {
136        self.data.len()
137    }
138
139    /// Returns the logical type of the underlying data.
140    #[must_use]
141    pub fn data_type(&self) -> LogicalType {
142        self.data.logical_type()
143    }
144
145    /// Returns a reference to the underlying `ValueVector`.
146    #[must_use]
147    pub fn data(&self) -> &ValueVector {
148        &self.data
149    }
150
151    /// Returns a mutable reference to the underlying `ValueVector`.
152    ///
153    /// Use with caution - modifying the data directly may invalidate offsets.
154    pub fn data_mut(&mut self) -> &mut ValueVector {
155        &mut self.data
156    }
157
158    /// Returns the offset array for unflat vectors, or `None` for flat vectors.
159    #[must_use]
160    pub fn offsets(&self) -> Option<&[u32]> {
161        match &self.state {
162            FactorizedState::Flat => None,
163            FactorizedState::Unflat(meta) => Some(&meta.offsets),
164        }
165    }
166
167    /// Returns the parent count for unflat vectors.
168    ///
169    /// For flat vectors, returns the data length (each value is its own parent).
170    #[must_use]
171    pub fn parent_count(&self) -> usize {
172        match &self.state {
173            FactorizedState::Flat => self.data.len(),
174            FactorizedState::Unflat(meta) => meta.parent_count,
175        }
176    }
177
178    /// Gets the count of values for a specific parent index.
179    ///
180    /// For flat vectors, always returns 1.
181    #[must_use]
182    pub fn count_for_parent(&self, parent_idx: usize) -> usize {
183        match &self.state {
184            FactorizedState::Flat => 1,
185            FactorizedState::Unflat(meta) => {
186                if parent_idx >= meta.parent_count {
187                    return 0;
188                }
189                let start = meta.offsets[parent_idx] as usize;
190                let end = meta.offsets[parent_idx + 1] as usize;
191                end - start
192            }
193        }
194    }
195
196    /// Gets the start and end indices for a specific parent.
197    ///
198    /// Returns `(start, end)` where values for this parent are at indices `start..end`.
199    #[must_use]
200    pub fn range_for_parent(&self, parent_idx: usize) -> (usize, usize) {
201        match &self.state {
202            FactorizedState::Flat => (parent_idx, parent_idx + 1),
203            FactorizedState::Unflat(meta) => {
204                if parent_idx >= meta.parent_count {
205                    return (0, 0);
206                }
207                let start = meta.offsets[parent_idx] as usize;
208                let end = meta.offsets[parent_idx + 1] as usize;
209                (start, end)
210            }
211        }
212    }
213
214    /// Gets a value at a physical index.
215    #[must_use]
216    pub fn get_physical(&self, physical_idx: usize) -> Option<Value> {
217        self.data.get_value(physical_idx)
218    }
219
220    /// Gets a NodeId at a physical index.
221    #[must_use]
222    pub fn get_node_id_physical(
223        &self,
224        physical_idx: usize,
225    ) -> Option<grafeo_common::types::NodeId> {
226        self.data.get_node_id(physical_idx)
227    }
228
229    /// Gets an EdgeId at a physical index.
230    #[must_use]
231    pub fn get_edge_id_physical(
232        &self,
233        physical_idx: usize,
234    ) -> Option<grafeo_common::types::EdgeId> {
235        self.data.get_edge_id(physical_idx)
236    }
237
238    /// Gets a value for a parent at a relative offset within that parent's values.
239    ///
240    /// For flat vectors, `relative_idx` must be 0.
241    #[must_use]
242    pub fn get_for_parent(&self, parent_idx: usize, relative_idx: usize) -> Option<Value> {
243        let (start, end) = self.range_for_parent(parent_idx);
244        let physical_idx = start + relative_idx;
245        if physical_idx >= end {
246            return None;
247        }
248        self.data.get_value(physical_idx)
249    }
250
251    /// Computes the total logical row count given parent multiplicities.
252    ///
253    /// For a flat vector at level 0, this is just `data.len()`.
254    /// For an unflat vector, this is the sum of values for each parent,
255    /// multiplied by that parent's multiplicity.
256    ///
257    /// # Arguments
258    ///
259    /// * `parent_multiplicities` - For each parent index, how many logical rows it represents.
260    ///   If `None`, each parent represents 1 logical row.
261    #[must_use]
262    pub fn logical_row_count(&self, parent_multiplicities: Option<&[usize]>) -> usize {
263        match &self.state {
264            FactorizedState::Flat => self.data.len(),
265            FactorizedState::Unflat(meta) => {
266                let mut total = 0;
267                for i in 0..meta.parent_count {
268                    let count = self.count_for_parent(i);
269                    let mult = parent_multiplicities.map_or(1, |m| m.get(i).copied().unwrap_or(1));
270                    total += count * mult;
271                }
272                total
273            }
274        }
275    }
276
277    /// Flattens this vector to a regular `ValueVector`.
278    ///
279    /// For flat vectors, returns a clone of the underlying data.
280    /// For unflat vectors, duplicates values according to parent multiplicities.
281    ///
282    /// # Arguments
283    ///
284    /// * `parent_multiplicities` - For each parent, how many times to repeat its values.
285    ///   If `None`, each parent's values appear once.
286    #[must_use]
287    pub fn flatten(&self, parent_multiplicities: Option<&[usize]>) -> ValueVector {
288        match &self.state {
289            FactorizedState::Flat => {
290                // If multiplicities are provided, we need to duplicate values
291                if let Some(mults) = parent_multiplicities {
292                    let capacity = mults.iter().sum();
293                    let mut result = ValueVector::with_capacity(self.data.logical_type(), capacity);
294                    for (i, &mult) in mults.iter().enumerate() {
295                        if let Some(value) = self.data.get_value(i) {
296                            for _ in 0..mult {
297                                result.push_value(value.clone());
298                            }
299                        }
300                    }
301                    result
302                } else {
303                    self.data.clone()
304                }
305            }
306            FactorizedState::Unflat(meta) => {
307                let capacity = self.logical_row_count(parent_multiplicities);
308                let mut result =
309                    ValueVector::with_capacity(self.data.logical_type(), capacity.max(1));
310
311                for parent_idx in 0..meta.parent_count {
312                    let mult = parent_multiplicities
313                        .map_or(1, |m| m.get(parent_idx).copied().unwrap_or(1));
314                    let (start, end) = self.range_for_parent(parent_idx);
315
316                    for _ in 0..mult {
317                        for phys_idx in start..end {
318                            if let Some(value) = self.data.get_value(phys_idx) {
319                                result.push_value(value);
320                            }
321                        }
322                    }
323                }
324                result
325            }
326        }
327    }
328
329    /// Creates an iterator over (parent_idx, physical_idx, value) tuples.
330    pub fn iter_with_parent(&self) -> impl Iterator<Item = (usize, usize, Value)> + '_ {
331        FactorizedVectorIter {
332            vector: self,
333            parent_idx: 0,
334            physical_idx: 0,
335        }
336    }
337}
338
339/// Iterator over factorized vector values with parent indices.
340struct FactorizedVectorIter<'a> {
341    vector: &'a FactorizedVector,
342    parent_idx: usize,
343    physical_idx: usize,
344}
345
346impl Iterator for FactorizedVectorIter<'_> {
347    type Item = (usize, usize, Value);
348
349    fn next(&mut self) -> Option<Self::Item> {
350        match &self.vector.state {
351            FactorizedState::Flat => {
352                if self.physical_idx >= self.vector.data.len() {
353                    return None;
354                }
355                let value = self.vector.data.get_value(self.physical_idx)?;
356                let result = (self.physical_idx, self.physical_idx, value);
357                self.physical_idx += 1;
358                self.parent_idx += 1;
359                Some(result)
360            }
361            FactorizedState::Unflat(meta) => {
362                // Skip empty parents
363                while self.parent_idx < meta.parent_count {
364                    let (_start, end) = self.vector.range_for_parent(self.parent_idx);
365                    if self.physical_idx < end {
366                        let value = self.vector.data.get_value(self.physical_idx)?;
367                        let result = (self.parent_idx, self.physical_idx, value);
368                        self.physical_idx += 1;
369                        return Some(result);
370                    }
371                    self.parent_idx += 1;
372                    if self.parent_idx < meta.parent_count {
373                        let (new_start, _) = self.vector.range_for_parent(self.parent_idx);
374                        self.physical_idx = new_start;
375                    }
376                }
377                None
378            }
379        }
380    }
381}
382
383#[cfg(test)]
384mod tests {
385    use grafeo_common::types::{EdgeId, NodeId};
386
387    use super::*;
388
389    #[test]
390    fn test_flat_vector() {
391        let mut data = ValueVector::with_type(LogicalType::Int64);
392        data.push_int64(1);
393        data.push_int64(2);
394        data.push_int64(3);
395
396        let vec = FactorizedVector::flat(data);
397
398        assert!(vec.is_flat());
399        assert!(!vec.is_unflat());
400        assert_eq!(vec.physical_len(), 3);
401        assert_eq!(vec.parent_count(), 3);
402        assert_eq!(vec.count_for_parent(0), 1);
403        assert_eq!(vec.count_for_parent(1), 1);
404        assert_eq!(vec.count_for_parent(2), 1);
405    }
406
407    #[test]
408    fn test_unflat_vector() {
409        // Create neighbors: parent 0 has [10, 20], parent 1 has [30]
410        let mut data = ValueVector::with_type(LogicalType::Int64);
411        data.push_int64(10);
412        data.push_int64(20);
413        data.push_int64(30);
414
415        let offsets = vec![0, 2, 3]; // parent 0: 0..2, parent 1: 2..3
416        let vec = FactorizedVector::unflat(data, offsets, 2);
417
418        assert!(!vec.is_flat());
419        assert!(vec.is_unflat());
420        assert_eq!(vec.physical_len(), 3);
421        assert_eq!(vec.parent_count(), 2);
422        assert_eq!(vec.count_for_parent(0), 2);
423        assert_eq!(vec.count_for_parent(1), 1);
424        assert_eq!(vec.range_for_parent(0), (0, 2));
425        assert_eq!(vec.range_for_parent(1), (2, 3));
426    }
427
428    #[test]
429    fn test_empty_vector() {
430        let vec = FactorizedVector::empty(LogicalType::Int64);
431
432        assert!(vec.is_flat());
433        assert_eq!(vec.physical_len(), 0);
434        assert_eq!(vec.data_type(), LogicalType::Int64);
435    }
436
437    #[test]
438    fn test_data_type() {
439        let mut data = ValueVector::with_type(LogicalType::String);
440        data.push_string("hello");
441
442        let vec = FactorizedVector::flat(data);
443        assert_eq!(vec.data_type(), LogicalType::String);
444    }
445
446    #[test]
447    fn test_data_access() {
448        let mut data = ValueVector::with_type(LogicalType::Int64);
449        data.push_int64(42);
450
451        let vec = FactorizedVector::flat(data);
452
453        // Test immutable access
454        assert_eq!(vec.data().len(), 1);
455        assert_eq!(vec.data().get_int64(0), Some(42));
456    }
457
458    #[test]
459    fn test_data_mut() {
460        let mut data = ValueVector::with_type(LogicalType::Int64);
461        data.push_int64(42);
462
463        let mut vec = FactorizedVector::flat(data);
464
465        // Modify via mutable access
466        vec.data_mut().push_int64(100);
467        assert_eq!(vec.physical_len(), 2);
468    }
469
470    #[test]
471    fn test_offsets_flat() {
472        let mut data = ValueVector::with_type(LogicalType::Int64);
473        data.push_int64(1);
474
475        let vec = FactorizedVector::flat(data);
476        assert!(vec.offsets().is_none());
477    }
478
479    #[test]
480    fn test_offsets_unflat() {
481        let mut data = ValueVector::with_type(LogicalType::Int64);
482        data.push_int64(10);
483        data.push_int64(20);
484        data.push_int64(30);
485
486        let offsets = vec![0, 2, 3];
487        let vec = FactorizedVector::unflat(data, offsets.clone(), 2);
488
489        assert_eq!(vec.offsets(), Some(offsets.as_slice()));
490    }
491
492    #[test]
493    fn test_get_physical() {
494        let mut data = ValueVector::with_type(LogicalType::Int64);
495        data.push_int64(10);
496        data.push_int64(20);
497
498        let vec = FactorizedVector::flat(data);
499
500        assert_eq!(vec.get_physical(0), Some(Value::Int64(10)));
501        assert_eq!(vec.get_physical(1), Some(Value::Int64(20)));
502        assert_eq!(vec.get_physical(2), None);
503    }
504
505    #[test]
506    fn test_get_node_id_physical() {
507        let mut data = ValueVector::with_type(LogicalType::Node);
508        data.push_node_id(NodeId::new(100));
509        data.push_node_id(NodeId::new(200));
510
511        let vec = FactorizedVector::flat(data);
512
513        assert_eq!(vec.get_node_id_physical(0), Some(NodeId::new(100)));
514        assert_eq!(vec.get_node_id_physical(1), Some(NodeId::new(200)));
515        assert_eq!(vec.get_node_id_physical(2), None);
516    }
517
518    #[test]
519    fn test_get_edge_id_physical() {
520        let mut data = ValueVector::with_type(LogicalType::Edge);
521        data.push_edge_id(EdgeId::new(100));
522        data.push_edge_id(EdgeId::new(200));
523
524        let vec = FactorizedVector::flat(data);
525
526        assert_eq!(vec.get_edge_id_physical(0), Some(EdgeId::new(100)));
527        assert_eq!(vec.get_edge_id_physical(1), Some(EdgeId::new(200)));
528        assert_eq!(vec.get_edge_id_physical(2), None);
529    }
530
531    #[test]
532    fn test_range_for_parent_flat() {
533        let mut data = ValueVector::with_type(LogicalType::Int64);
534        data.push_int64(1);
535        data.push_int64(2);
536        data.push_int64(3);
537
538        let vec = FactorizedVector::flat(data);
539
540        assert_eq!(vec.range_for_parent(0), (0, 1));
541        assert_eq!(vec.range_for_parent(1), (1, 2));
542        assert_eq!(vec.range_for_parent(2), (2, 3));
543    }
544
545    #[test]
546    fn test_range_for_parent_out_of_bounds() {
547        let mut data = ValueVector::with_type(LogicalType::Int64);
548        data.push_int64(10);
549        data.push_int64(20);
550
551        let offsets = vec![0, 2];
552        let vec = FactorizedVector::unflat(data, offsets, 1);
553
554        // Out of bounds should return (0, 0)
555        assert_eq!(vec.range_for_parent(10), (0, 0));
556    }
557
558    #[test]
559    fn test_count_for_parent_out_of_bounds() {
560        let mut data = ValueVector::with_type(LogicalType::Int64);
561        data.push_int64(10);
562        data.push_int64(20);
563
564        let offsets = vec![0, 2];
565        let vec = FactorizedVector::unflat(data, offsets, 1);
566
567        // Out of bounds should return 0
568        assert_eq!(vec.count_for_parent(10), 0);
569    }
570
571    #[test]
572    fn test_get_for_parent() {
573        let mut data = ValueVector::with_type(LogicalType::Int64);
574        data.push_int64(10);
575        data.push_int64(20);
576        data.push_int64(30);
577
578        let offsets = vec![0, 2, 3];
579        let vec = FactorizedVector::unflat(data, offsets, 2);
580
581        // Parent 0's values
582        assert_eq!(vec.get_for_parent(0, 0), Some(Value::Int64(10)));
583        assert_eq!(vec.get_for_parent(0, 1), Some(Value::Int64(20)));
584        assert_eq!(vec.get_for_parent(0, 2), None); // Out of range
585
586        // Parent 1's values
587        assert_eq!(vec.get_for_parent(1, 0), Some(Value::Int64(30)));
588        assert_eq!(vec.get_for_parent(1, 1), None); // Out of range
589    }
590
591    #[test]
592    fn test_get_for_parent_flat() {
593        let mut data = ValueVector::with_type(LogicalType::Int64);
594        data.push_int64(10);
595        data.push_int64(20);
596
597        let vec = FactorizedVector::flat(data);
598
599        assert_eq!(vec.get_for_parent(0, 0), Some(Value::Int64(10)));
600        assert_eq!(vec.get_for_parent(1, 0), Some(Value::Int64(20)));
601        assert_eq!(vec.get_for_parent(0, 1), None); // Only one value per parent in flat mode
602    }
603
604    #[test]
605    fn test_flatten_flat_no_mults() {
606        let mut data = ValueVector::with_type(LogicalType::Int64);
607        data.push_int64(1);
608        data.push_int64(2);
609        data.push_int64(3);
610
611        let vec = FactorizedVector::flat(data);
612        let flat = vec.flatten(None);
613
614        assert_eq!(flat.len(), 3);
615        assert_eq!(flat.get_int64(0), Some(1));
616        assert_eq!(flat.get_int64(2), Some(3));
617    }
618
619    #[test]
620    fn test_flatten_flat_with_mults() {
621        let mut data = ValueVector::with_type(LogicalType::Int64);
622        data.push_int64(10);
623        data.push_int64(20);
624
625        let vec = FactorizedVector::flat(data);
626
627        // Duplicate first value 3 times, second value 2 times
628        let mults = [3, 2];
629        let flat = vec.flatten(Some(&mults));
630
631        assert_eq!(flat.len(), 5);
632        assert_eq!(flat.get_int64(0), Some(10));
633        assert_eq!(flat.get_int64(1), Some(10));
634        assert_eq!(flat.get_int64(2), Some(10));
635        assert_eq!(flat.get_int64(3), Some(20));
636        assert_eq!(flat.get_int64(4), Some(20));
637    }
638
639    #[test]
640    fn test_flatten_unflat() {
641        let mut data = ValueVector::with_type(LogicalType::Int64);
642        data.push_int64(10);
643        data.push_int64(20);
644        data.push_int64(30);
645
646        let offsets = vec![0, 2, 3];
647        let vec = FactorizedVector::unflat(data, offsets, 2);
648
649        // Flatten without multiplicities
650        let flat = vec.flatten(None);
651        assert_eq!(flat.len(), 3);
652        assert_eq!(flat.get_int64(0), Some(10));
653        assert_eq!(flat.get_int64(1), Some(20));
654        assert_eq!(flat.get_int64(2), Some(30));
655    }
656
657    #[test]
658    fn test_flatten_with_multiplicities() {
659        let mut data = ValueVector::with_type(LogicalType::Int64);
660        data.push_int64(10);
661        data.push_int64(20);
662        data.push_int64(30);
663
664        let offsets = vec![0, 2, 3];
665        let vec = FactorizedVector::unflat(data, offsets, 2);
666
667        // Parent 0 has mult 2, parent 1 has mult 1
668        // So: [10,20], [10,20], [30]
669        let mults = [2, 1];
670        let flat = vec.flatten(Some(&mults));
671
672        assert_eq!(flat.len(), 5);
673        assert_eq!(flat.get_int64(0), Some(10));
674        assert_eq!(flat.get_int64(1), Some(20));
675        assert_eq!(flat.get_int64(2), Some(10));
676        assert_eq!(flat.get_int64(3), Some(20));
677        assert_eq!(flat.get_int64(4), Some(30));
678    }
679
680    #[test]
681    fn test_logical_row_count_flat() {
682        let mut data = ValueVector::with_type(LogicalType::Int64);
683        data.push_int64(1);
684        data.push_int64(2);
685        data.push_int64(3);
686
687        let vec = FactorizedVector::flat(data);
688
689        assert_eq!(vec.logical_row_count(None), 3);
690    }
691
692    #[test]
693    fn test_logical_row_count() {
694        let mut data = ValueVector::with_type(LogicalType::Int64);
695        data.push_int64(10);
696        data.push_int64(20);
697        data.push_int64(30);
698
699        let offsets = vec![0, 2, 3];
700        let vec = FactorizedVector::unflat(data, offsets, 2);
701
702        // Without multiplicities: 2 + 1 = 3
703        assert_eq!(vec.logical_row_count(None), 3);
704
705        // With multiplicities [2, 3]: 2*2 + 1*3 = 7
706        let mults = [2, 3];
707        assert_eq!(vec.logical_row_count(Some(&mults)), 7);
708    }
709
710    #[test]
711    fn test_logical_row_count_missing_mult() {
712        let mut data = ValueVector::with_type(LogicalType::Int64);
713        data.push_int64(10);
714        data.push_int64(20);
715        data.push_int64(30);
716
717        let offsets = vec![0, 2, 3];
718        let vec = FactorizedVector::unflat(data, offsets, 2);
719
720        // Only provide one multiplicity - second should default to 1
721        let mults = [2];
722        assert_eq!(vec.logical_row_count(Some(&mults)), 5); // 2*2 + 1*1 = 5
723    }
724
725    #[test]
726    fn test_iter_with_parent_flat() {
727        let mut data = ValueVector::with_type(LogicalType::Int64);
728        data.push_int64(1);
729        data.push_int64(2);
730
731        let vec = FactorizedVector::flat(data);
732        let items: Vec<_> = vec.iter_with_parent().collect();
733
734        assert_eq!(items.len(), 2);
735        assert_eq!(items[0], (0, 0, Value::Int64(1)));
736        assert_eq!(items[1], (1, 1, Value::Int64(2)));
737    }
738
739    #[test]
740    fn test_iter_with_parent_unflat() {
741        let mut data = ValueVector::with_type(LogicalType::Int64);
742        data.push_int64(10);
743        data.push_int64(20);
744        data.push_int64(30);
745
746        let offsets = vec![0, 2, 3];
747        let vec = FactorizedVector::unflat(data, offsets, 2);
748        let items: Vec<_> = vec.iter_with_parent().collect();
749
750        assert_eq!(items.len(), 3);
751        assert_eq!(items[0], (0, 0, Value::Int64(10)));
752        assert_eq!(items[1], (0, 1, Value::Int64(20)));
753        assert_eq!(items[2], (1, 2, Value::Int64(30)));
754    }
755
756    #[test]
757    fn test_iter_with_parent_empty_parents() {
758        // Parent 0 has [], parent 1 has [10, 20], parent 2 has []
759        let mut data = ValueVector::with_type(LogicalType::Int64);
760        data.push_int64(10);
761        data.push_int64(20);
762
763        let offsets = vec![0, 0, 2, 2];
764        let vec = FactorizedVector::unflat(data, offsets, 3);
765        let items: Vec<_> = vec.iter_with_parent().collect();
766
767        // Should skip empty parents
768        assert_eq!(items.len(), 2);
769        assert_eq!(items[0], (1, 0, Value::Int64(10)));
770        assert_eq!(items[1], (1, 1, Value::Int64(20)));
771    }
772
773    #[test]
774    fn test_iter_with_parent_empty() {
775        let data = ValueVector::with_type(LogicalType::Int64);
776        let vec = FactorizedVector::flat(data);
777        let items: Vec<_> = vec.iter_with_parent().collect();
778
779        assert!(items.is_empty());
780    }
781
782    #[test]
783    fn test_empty_parents() {
784        // Parent 0 has [], parent 1 has [10, 20], parent 2 has []
785        let mut data = ValueVector::with_type(LogicalType::Int64);
786        data.push_int64(10);
787        data.push_int64(20);
788
789        let offsets = vec![0, 0, 2, 2]; // parent 0: empty, parent 1: 0..2, parent 2: empty
790        let vec = FactorizedVector::unflat(data, offsets, 3);
791
792        assert_eq!(vec.count_for_parent(0), 0);
793        assert_eq!(vec.count_for_parent(1), 2);
794        assert_eq!(vec.count_for_parent(2), 0);
795        assert_eq!(vec.logical_row_count(None), 2);
796    }
797
798    #[test]
799    fn test_node_id_vector() {
800        let mut data = ValueVector::with_type(LogicalType::Node);
801        data.push_node_id(NodeId::new(100));
802        data.push_node_id(NodeId::new(200));
803        data.push_node_id(NodeId::new(300));
804
805        let offsets = vec![0, 2, 3];
806        let vec = FactorizedVector::unflat(data, offsets, 2);
807
808        assert_eq!(vec.count_for_parent(0), 2);
809        assert_eq!(vec.count_for_parent(1), 1);
810
811        // Check physical access
812        let flat = vec.flatten(None);
813        assert_eq!(flat.get_node_id(0), Some(NodeId::new(100)));
814        assert_eq!(flat.get_node_id(1), Some(NodeId::new(200)));
815        assert_eq!(flat.get_node_id(2), Some(NodeId::new(300)));
816    }
817
818    #[test]
819    fn test_factorized_state_debug() {
820        let state_flat = FactorizedState::Flat;
821        assert!(format!("{state_flat:?}").contains("Flat"));
822
823        let state_unflat = FactorizedState::Unflat(UnflatMetadata {
824            offsets: vec![0, 1],
825            parent_count: 1,
826        });
827        assert!(format!("{state_unflat:?}").contains("Unflat"));
828    }
829
830    #[test]
831    fn test_unflat_metadata_debug() {
832        let meta = UnflatMetadata {
833            offsets: vec![0, 2, 5],
834            parent_count: 2,
835        };
836        let debug = format!("{meta:?}");
837        assert!(debug.contains("offsets"));
838        assert!(debug.contains("parent_count"));
839    }
840
841    #[test]
842    fn test_factorized_vector_clone() {
843        let mut data = ValueVector::with_type(LogicalType::Int64);
844        data.push_int64(1);
845        data.push_int64(2);
846
847        let vec = FactorizedVector::flat(data);
848        let cloned = vec.clone();
849
850        assert_eq!(vec.physical_len(), cloned.physical_len());
851        assert_eq!(vec.is_flat(), cloned.is_flat());
852    }
853}