1use grafeo_common::types::{LogicalType, Value};
28
29use super::vector::ValueVector;
30
31#[derive(Debug, Clone)]
36pub struct FactorizedVector {
37 data: ValueVector,
39 state: FactorizedState,
41}
42
43#[derive(Debug, Clone)]
45#[non_exhaustive]
46pub enum FactorizedState {
47 Flat,
49 Unflat(UnflatMetadata),
51}
52
53#[derive(Debug, Clone)]
57pub struct UnflatMetadata {
58 offsets: Vec<u32>,
61 parent_count: usize,
63}
64
65impl FactorizedVector {
66 #[must_use]
70 pub fn flat(data: ValueVector) -> Self {
71 Self {
72 data,
73 state: FactorizedState::Flat,
74 }
75 }
76
77 #[must_use]
90 pub fn unflat(data: ValueVector, offsets: Vec<u32>, parent_count: usize) -> Self {
91 debug_assert_eq!(
92 offsets.len(),
93 parent_count + 1,
94 "offsets must have length parent_count + 1"
95 );
96 debug_assert!(
97 offsets.last().copied() == Some(data.len() as u32),
98 "last offset must equal data length"
99 );
100
101 Self {
102 data,
103 state: FactorizedState::Unflat(UnflatMetadata {
104 offsets,
105 parent_count,
106 }),
107 }
108 }
109
110 #[must_use]
112 pub fn empty(data_type: LogicalType) -> Self {
113 Self::flat(ValueVector::with_type(data_type))
114 }
115
116 #[must_use]
118 pub fn is_flat(&self) -> bool {
119 matches!(self.state, FactorizedState::Flat)
120 }
121
122 #[must_use]
124 pub fn is_unflat(&self) -> bool {
125 matches!(self.state, FactorizedState::Unflat(_))
126 }
127
128 #[must_use]
132 pub fn physical_len(&self) -> usize {
133 self.data.len()
134 }
135
136 #[must_use]
138 pub fn data_type(&self) -> LogicalType {
139 self.data.logical_type()
140 }
141
142 #[must_use]
144 pub fn data(&self) -> &ValueVector {
145 &self.data
146 }
147
148 pub fn data_mut(&mut self) -> &mut ValueVector {
152 &mut self.data
153 }
154
155 #[must_use]
157 pub fn offsets(&self) -> Option<&[u32]> {
158 match &self.state {
159 FactorizedState::Flat => None,
160 FactorizedState::Unflat(meta) => Some(&meta.offsets),
161 }
162 }
163
164 #[must_use]
168 pub fn parent_count(&self) -> usize {
169 match &self.state {
170 FactorizedState::Flat => self.data.len(),
171 FactorizedState::Unflat(meta) => meta.parent_count,
172 }
173 }
174
175 #[must_use]
179 pub fn count_for_parent(&self, parent_idx: usize) -> usize {
180 match &self.state {
181 FactorizedState::Flat => 1,
182 FactorizedState::Unflat(meta) => {
183 if parent_idx >= meta.parent_count {
184 return 0;
185 }
186 let start = meta.offsets[parent_idx] as usize;
187 let end = meta.offsets[parent_idx + 1] as usize;
188 end - start
189 }
190 }
191 }
192
193 #[must_use]
197 pub fn range_for_parent(&self, parent_idx: usize) -> (usize, usize) {
198 match &self.state {
199 FactorizedState::Flat => (parent_idx, parent_idx + 1),
200 FactorizedState::Unflat(meta) => {
201 if parent_idx >= meta.parent_count {
202 return (0, 0);
203 }
204 let start = meta.offsets[parent_idx] as usize;
205 let end = meta.offsets[parent_idx + 1] as usize;
206 (start, end)
207 }
208 }
209 }
210
211 #[must_use]
213 pub fn get_physical(&self, physical_idx: usize) -> Option<Value> {
214 self.data.get_value(physical_idx)
215 }
216
217 #[must_use]
219 pub fn get_node_id_physical(
220 &self,
221 physical_idx: usize,
222 ) -> Option<grafeo_common::types::NodeId> {
223 self.data.get_node_id(physical_idx)
224 }
225
226 #[must_use]
228 pub fn get_edge_id_physical(
229 &self,
230 physical_idx: usize,
231 ) -> Option<grafeo_common::types::EdgeId> {
232 self.data.get_edge_id(physical_idx)
233 }
234
235 #[must_use]
239 pub fn get_for_parent(&self, parent_idx: usize, relative_idx: usize) -> Option<Value> {
240 let (start, end) = self.range_for_parent(parent_idx);
241 let physical_idx = start + relative_idx;
242 if physical_idx >= end {
243 return None;
244 }
245 self.data.get_value(physical_idx)
246 }
247
248 #[must_use]
259 pub fn logical_row_count(&self, parent_multiplicities: Option<&[usize]>) -> usize {
260 match &self.state {
261 FactorizedState::Flat => self.data.len(),
262 FactorizedState::Unflat(meta) => {
263 let mut total = 0;
264 for i in 0..meta.parent_count {
265 let count = self.count_for_parent(i);
266 let mult = parent_multiplicities.map_or(1, |m| m.get(i).copied().unwrap_or(1));
267 total += count * mult;
268 }
269 total
270 }
271 }
272 }
273
274 #[must_use]
284 pub fn flatten(&self, parent_multiplicities: Option<&[usize]>) -> ValueVector {
285 match &self.state {
286 FactorizedState::Flat => {
287 if let Some(mults) = parent_multiplicities {
289 let capacity = mults.iter().sum();
290 let mut result = ValueVector::with_capacity(self.data.logical_type(), capacity);
291 for (i, &mult) in mults.iter().enumerate() {
292 if let Some(value) = self.data.get_value(i) {
293 for _ in 0..mult {
294 result.push_value(value.clone());
295 }
296 }
297 }
298 result
299 } else {
300 self.data.clone()
301 }
302 }
303 FactorizedState::Unflat(meta) => {
304 let capacity = self.logical_row_count(parent_multiplicities);
305 let mut result =
306 ValueVector::with_capacity(self.data.logical_type(), capacity.max(1));
307
308 for parent_idx in 0..meta.parent_count {
309 let mult = parent_multiplicities
310 .map_or(1, |m| m.get(parent_idx).copied().unwrap_or(1));
311 let (start, end) = self.range_for_parent(parent_idx);
312
313 for _ in 0..mult {
314 for phys_idx in start..end {
315 if let Some(value) = self.data.get_value(phys_idx) {
316 result.push_value(value);
317 }
318 }
319 }
320 }
321 result
322 }
323 }
324 }
325
326 pub fn iter_with_parent(&self) -> impl Iterator<Item = (usize, usize, Value)> + '_ {
328 FactorizedVectorIter {
329 vector: self,
330 parent_idx: 0,
331 physical_idx: 0,
332 }
333 }
334}
335
336struct FactorizedVectorIter<'a> {
338 vector: &'a FactorizedVector,
339 parent_idx: usize,
340 physical_idx: usize,
341}
342
343impl Iterator for FactorizedVectorIter<'_> {
344 type Item = (usize, usize, Value);
345
346 fn next(&mut self) -> Option<Self::Item> {
347 match &self.vector.state {
348 FactorizedState::Flat => {
349 if self.physical_idx >= self.vector.data.len() {
350 return None;
351 }
352 let value = self.vector.data.get_value(self.physical_idx)?;
353 let result = (self.physical_idx, self.physical_idx, value);
354 self.physical_idx += 1;
355 self.parent_idx += 1;
356 Some(result)
357 }
358 FactorizedState::Unflat(meta) => {
359 while self.parent_idx < meta.parent_count {
361 let (_start, end) = self.vector.range_for_parent(self.parent_idx);
362 if self.physical_idx < end {
363 let value = self.vector.data.get_value(self.physical_idx)?;
364 let result = (self.parent_idx, self.physical_idx, value);
365 self.physical_idx += 1;
366 return Some(result);
367 }
368 self.parent_idx += 1;
369 if self.parent_idx < meta.parent_count {
370 let (new_start, _) = self.vector.range_for_parent(self.parent_idx);
371 self.physical_idx = new_start;
372 }
373 }
374 None
375 }
376 }
377 }
378}
379
380#[cfg(test)]
381mod tests {
382 use grafeo_common::types::{EdgeId, NodeId};
383
384 use super::*;
385
386 #[test]
387 fn test_flat_vector() {
388 let mut data = ValueVector::with_type(LogicalType::Int64);
389 data.push_int64(1);
390 data.push_int64(2);
391 data.push_int64(3);
392
393 let vec = FactorizedVector::flat(data);
394
395 assert!(vec.is_flat());
396 assert!(!vec.is_unflat());
397 assert_eq!(vec.physical_len(), 3);
398 assert_eq!(vec.parent_count(), 3);
399 assert_eq!(vec.count_for_parent(0), 1);
400 assert_eq!(vec.count_for_parent(1), 1);
401 assert_eq!(vec.count_for_parent(2), 1);
402 }
403
404 #[test]
405 fn test_unflat_vector() {
406 let mut data = ValueVector::with_type(LogicalType::Int64);
408 data.push_int64(10);
409 data.push_int64(20);
410 data.push_int64(30);
411
412 let offsets = vec![0, 2, 3]; let vec = FactorizedVector::unflat(data, offsets, 2);
414
415 assert!(!vec.is_flat());
416 assert!(vec.is_unflat());
417 assert_eq!(vec.physical_len(), 3);
418 assert_eq!(vec.parent_count(), 2);
419 assert_eq!(vec.count_for_parent(0), 2);
420 assert_eq!(vec.count_for_parent(1), 1);
421 assert_eq!(vec.range_for_parent(0), (0, 2));
422 assert_eq!(vec.range_for_parent(1), (2, 3));
423 }
424
425 #[test]
426 fn test_empty_vector() {
427 let vec = FactorizedVector::empty(LogicalType::Int64);
428
429 assert!(vec.is_flat());
430 assert_eq!(vec.physical_len(), 0);
431 assert_eq!(vec.data_type(), LogicalType::Int64);
432 }
433
434 #[test]
435 fn test_data_type() {
436 let mut data = ValueVector::with_type(LogicalType::String);
437 data.push_string("hello");
438
439 let vec = FactorizedVector::flat(data);
440 assert_eq!(vec.data_type(), LogicalType::String);
441 }
442
443 #[test]
444 fn test_data_access() {
445 let mut data = ValueVector::with_type(LogicalType::Int64);
446 data.push_int64(42);
447
448 let vec = FactorizedVector::flat(data);
449
450 assert_eq!(vec.data().len(), 1);
452 assert_eq!(vec.data().get_int64(0), Some(42));
453 }
454
455 #[test]
456 fn test_data_mut() {
457 let mut data = ValueVector::with_type(LogicalType::Int64);
458 data.push_int64(42);
459
460 let mut vec = FactorizedVector::flat(data);
461
462 vec.data_mut().push_int64(100);
464 assert_eq!(vec.physical_len(), 2);
465 }
466
467 #[test]
468 fn test_offsets_flat() {
469 let mut data = ValueVector::with_type(LogicalType::Int64);
470 data.push_int64(1);
471
472 let vec = FactorizedVector::flat(data);
473 assert!(vec.offsets().is_none());
474 }
475
476 #[test]
477 fn test_offsets_unflat() {
478 let mut data = ValueVector::with_type(LogicalType::Int64);
479 data.push_int64(10);
480 data.push_int64(20);
481 data.push_int64(30);
482
483 let offsets = vec![0, 2, 3];
484 let vec = FactorizedVector::unflat(data, offsets.clone(), 2);
485
486 assert_eq!(vec.offsets(), Some(offsets.as_slice()));
487 }
488
489 #[test]
490 fn test_get_physical() {
491 let mut data = ValueVector::with_type(LogicalType::Int64);
492 data.push_int64(10);
493 data.push_int64(20);
494
495 let vec = FactorizedVector::flat(data);
496
497 assert_eq!(vec.get_physical(0), Some(Value::Int64(10)));
498 assert_eq!(vec.get_physical(1), Some(Value::Int64(20)));
499 assert_eq!(vec.get_physical(2), None);
500 }
501
502 #[test]
503 fn test_get_node_id_physical() {
504 let mut data = ValueVector::with_type(LogicalType::Node);
505 data.push_node_id(NodeId::new(100));
506 data.push_node_id(NodeId::new(200));
507
508 let vec = FactorizedVector::flat(data);
509
510 assert_eq!(vec.get_node_id_physical(0), Some(NodeId::new(100)));
511 assert_eq!(vec.get_node_id_physical(1), Some(NodeId::new(200)));
512 assert_eq!(vec.get_node_id_physical(2), None);
513 }
514
515 #[test]
516 fn test_get_edge_id_physical() {
517 let mut data = ValueVector::with_type(LogicalType::Edge);
518 data.push_edge_id(EdgeId::new(100));
519 data.push_edge_id(EdgeId::new(200));
520
521 let vec = FactorizedVector::flat(data);
522
523 assert_eq!(vec.get_edge_id_physical(0), Some(EdgeId::new(100)));
524 assert_eq!(vec.get_edge_id_physical(1), Some(EdgeId::new(200)));
525 assert_eq!(vec.get_edge_id_physical(2), None);
526 }
527
528 #[test]
529 fn test_range_for_parent_flat() {
530 let mut data = ValueVector::with_type(LogicalType::Int64);
531 data.push_int64(1);
532 data.push_int64(2);
533 data.push_int64(3);
534
535 let vec = FactorizedVector::flat(data);
536
537 assert_eq!(vec.range_for_parent(0), (0, 1));
538 assert_eq!(vec.range_for_parent(1), (1, 2));
539 assert_eq!(vec.range_for_parent(2), (2, 3));
540 }
541
542 #[test]
543 fn test_range_for_parent_out_of_bounds() {
544 let mut data = ValueVector::with_type(LogicalType::Int64);
545 data.push_int64(10);
546 data.push_int64(20);
547
548 let offsets = vec![0, 2];
549 let vec = FactorizedVector::unflat(data, offsets, 1);
550
551 assert_eq!(vec.range_for_parent(10), (0, 0));
553 }
554
555 #[test]
556 fn test_count_for_parent_out_of_bounds() {
557 let mut data = ValueVector::with_type(LogicalType::Int64);
558 data.push_int64(10);
559 data.push_int64(20);
560
561 let offsets = vec![0, 2];
562 let vec = FactorizedVector::unflat(data, offsets, 1);
563
564 assert_eq!(vec.count_for_parent(10), 0);
566 }
567
568 #[test]
569 fn test_get_for_parent() {
570 let mut data = ValueVector::with_type(LogicalType::Int64);
571 data.push_int64(10);
572 data.push_int64(20);
573 data.push_int64(30);
574
575 let offsets = vec![0, 2, 3];
576 let vec = FactorizedVector::unflat(data, offsets, 2);
577
578 assert_eq!(vec.get_for_parent(0, 0), Some(Value::Int64(10)));
580 assert_eq!(vec.get_for_parent(0, 1), Some(Value::Int64(20)));
581 assert_eq!(vec.get_for_parent(0, 2), None); assert_eq!(vec.get_for_parent(1, 0), Some(Value::Int64(30)));
585 assert_eq!(vec.get_for_parent(1, 1), None); }
587
588 #[test]
589 fn test_get_for_parent_flat() {
590 let mut data = ValueVector::with_type(LogicalType::Int64);
591 data.push_int64(10);
592 data.push_int64(20);
593
594 let vec = FactorizedVector::flat(data);
595
596 assert_eq!(vec.get_for_parent(0, 0), Some(Value::Int64(10)));
597 assert_eq!(vec.get_for_parent(1, 0), Some(Value::Int64(20)));
598 assert_eq!(vec.get_for_parent(0, 1), None); }
600
601 #[test]
602 fn test_flatten_flat_no_mults() {
603 let mut data = ValueVector::with_type(LogicalType::Int64);
604 data.push_int64(1);
605 data.push_int64(2);
606 data.push_int64(3);
607
608 let vec = FactorizedVector::flat(data);
609 let flat = vec.flatten(None);
610
611 assert_eq!(flat.len(), 3);
612 assert_eq!(flat.get_int64(0), Some(1));
613 assert_eq!(flat.get_int64(2), Some(3));
614 }
615
616 #[test]
617 fn test_flatten_flat_with_mults() {
618 let mut data = ValueVector::with_type(LogicalType::Int64);
619 data.push_int64(10);
620 data.push_int64(20);
621
622 let vec = FactorizedVector::flat(data);
623
624 let mults = [3, 2];
626 let flat = vec.flatten(Some(&mults));
627
628 assert_eq!(flat.len(), 5);
629 assert_eq!(flat.get_int64(0), Some(10));
630 assert_eq!(flat.get_int64(1), Some(10));
631 assert_eq!(flat.get_int64(2), Some(10));
632 assert_eq!(flat.get_int64(3), Some(20));
633 assert_eq!(flat.get_int64(4), Some(20));
634 }
635
636 #[test]
637 fn test_flatten_unflat() {
638 let mut data = ValueVector::with_type(LogicalType::Int64);
639 data.push_int64(10);
640 data.push_int64(20);
641 data.push_int64(30);
642
643 let offsets = vec![0, 2, 3];
644 let vec = FactorizedVector::unflat(data, offsets, 2);
645
646 let flat = vec.flatten(None);
648 assert_eq!(flat.len(), 3);
649 assert_eq!(flat.get_int64(0), Some(10));
650 assert_eq!(flat.get_int64(1), Some(20));
651 assert_eq!(flat.get_int64(2), Some(30));
652 }
653
654 #[test]
655 fn test_flatten_with_multiplicities() {
656 let mut data = ValueVector::with_type(LogicalType::Int64);
657 data.push_int64(10);
658 data.push_int64(20);
659 data.push_int64(30);
660
661 let offsets = vec![0, 2, 3];
662 let vec = FactorizedVector::unflat(data, offsets, 2);
663
664 let mults = [2, 1];
667 let flat = vec.flatten(Some(&mults));
668
669 assert_eq!(flat.len(), 5);
670 assert_eq!(flat.get_int64(0), Some(10));
671 assert_eq!(flat.get_int64(1), Some(20));
672 assert_eq!(flat.get_int64(2), Some(10));
673 assert_eq!(flat.get_int64(3), Some(20));
674 assert_eq!(flat.get_int64(4), Some(30));
675 }
676
677 #[test]
678 fn test_logical_row_count_flat() {
679 let mut data = ValueVector::with_type(LogicalType::Int64);
680 data.push_int64(1);
681 data.push_int64(2);
682 data.push_int64(3);
683
684 let vec = FactorizedVector::flat(data);
685
686 assert_eq!(vec.logical_row_count(None), 3);
687 }
688
689 #[test]
690 fn test_logical_row_count() {
691 let mut data = ValueVector::with_type(LogicalType::Int64);
692 data.push_int64(10);
693 data.push_int64(20);
694 data.push_int64(30);
695
696 let offsets = vec![0, 2, 3];
697 let vec = FactorizedVector::unflat(data, offsets, 2);
698
699 assert_eq!(vec.logical_row_count(None), 3);
701
702 let mults = [2, 3];
704 assert_eq!(vec.logical_row_count(Some(&mults)), 7);
705 }
706
707 #[test]
708 fn test_logical_row_count_missing_mult() {
709 let mut data = ValueVector::with_type(LogicalType::Int64);
710 data.push_int64(10);
711 data.push_int64(20);
712 data.push_int64(30);
713
714 let offsets = vec![0, 2, 3];
715 let vec = FactorizedVector::unflat(data, offsets, 2);
716
717 let mults = [2];
719 assert_eq!(vec.logical_row_count(Some(&mults)), 5); }
721
722 #[test]
723 fn test_iter_with_parent_flat() {
724 let mut data = ValueVector::with_type(LogicalType::Int64);
725 data.push_int64(1);
726 data.push_int64(2);
727
728 let vec = FactorizedVector::flat(data);
729 let items: Vec<_> = vec.iter_with_parent().collect();
730
731 assert_eq!(items.len(), 2);
732 assert_eq!(items[0], (0, 0, Value::Int64(1)));
733 assert_eq!(items[1], (1, 1, Value::Int64(2)));
734 }
735
736 #[test]
737 fn test_iter_with_parent_unflat() {
738 let mut data = ValueVector::with_type(LogicalType::Int64);
739 data.push_int64(10);
740 data.push_int64(20);
741 data.push_int64(30);
742
743 let offsets = vec![0, 2, 3];
744 let vec = FactorizedVector::unflat(data, offsets, 2);
745 let items: Vec<_> = vec.iter_with_parent().collect();
746
747 assert_eq!(items.len(), 3);
748 assert_eq!(items[0], (0, 0, Value::Int64(10)));
749 assert_eq!(items[1], (0, 1, Value::Int64(20)));
750 assert_eq!(items[2], (1, 2, Value::Int64(30)));
751 }
752
753 #[test]
754 fn test_iter_with_parent_empty_parents() {
755 let mut data = ValueVector::with_type(LogicalType::Int64);
757 data.push_int64(10);
758 data.push_int64(20);
759
760 let offsets = vec![0, 0, 2, 2];
761 let vec = FactorizedVector::unflat(data, offsets, 3);
762 let items: Vec<_> = vec.iter_with_parent().collect();
763
764 assert_eq!(items.len(), 2);
766 assert_eq!(items[0], (1, 0, Value::Int64(10)));
767 assert_eq!(items[1], (1, 1, Value::Int64(20)));
768 }
769
770 #[test]
771 fn test_iter_with_parent_empty() {
772 let data = ValueVector::with_type(LogicalType::Int64);
773 let vec = FactorizedVector::flat(data);
774 let items: Vec<_> = vec.iter_with_parent().collect();
775
776 assert!(items.is_empty());
777 }
778
779 #[test]
780 fn test_empty_parents() {
781 let mut data = ValueVector::with_type(LogicalType::Int64);
783 data.push_int64(10);
784 data.push_int64(20);
785
786 let offsets = vec![0, 0, 2, 2]; let vec = FactorizedVector::unflat(data, offsets, 3);
788
789 assert_eq!(vec.count_for_parent(0), 0);
790 assert_eq!(vec.count_for_parent(1), 2);
791 assert_eq!(vec.count_for_parent(2), 0);
792 assert_eq!(vec.logical_row_count(None), 2);
793 }
794
795 #[test]
796 fn test_node_id_vector() {
797 let mut data = ValueVector::with_type(LogicalType::Node);
798 data.push_node_id(NodeId::new(100));
799 data.push_node_id(NodeId::new(200));
800 data.push_node_id(NodeId::new(300));
801
802 let offsets = vec![0, 2, 3];
803 let vec = FactorizedVector::unflat(data, offsets, 2);
804
805 assert_eq!(vec.count_for_parent(0), 2);
806 assert_eq!(vec.count_for_parent(1), 1);
807
808 let flat = vec.flatten(None);
810 assert_eq!(flat.get_node_id(0), Some(NodeId::new(100)));
811 assert_eq!(flat.get_node_id(1), Some(NodeId::new(200)));
812 assert_eq!(flat.get_node_id(2), Some(NodeId::new(300)));
813 }
814
815 #[test]
816 fn test_factorized_state_debug() {
817 let state_flat = FactorizedState::Flat;
818 assert!(format!("{state_flat:?}").contains("Flat"));
819
820 let state_unflat = FactorizedState::Unflat(UnflatMetadata {
821 offsets: vec![0, 1],
822 parent_count: 1,
823 });
824 assert!(format!("{state_unflat:?}").contains("Unflat"));
825 }
826
827 #[test]
828 fn test_unflat_metadata_debug() {
829 let meta = UnflatMetadata {
830 offsets: vec![0, 2, 5],
831 parent_count: 2,
832 };
833 let debug = format!("{meta:?}");
834 assert!(debug.contains("offsets"));
835 assert!(debug.contains("parent_count"));
836 }
837
838 #[test]
839 fn test_factorized_vector_clone() {
840 let mut data = ValueVector::with_type(LogicalType::Int64);
841 data.push_int64(1);
842 data.push_int64(2);
843
844 let vec = FactorizedVector::flat(data);
845 let cloned = vec.clone();
846
847 assert_eq!(vec.physical_len(), cloned.physical_len());
848 assert_eq!(vec.is_flat(), cloned.is_flat());
849 }
850}