1use grafeo_common::types::{LogicalType, Value};
28
29use super::vector::ValueVector;
30
31#[derive(Debug, Clone)]
36pub struct FactorizedVector {
37 data: ValueVector,
39 state: FactorizedState,
41}
42
43#[derive(Debug, Clone)]
45pub enum FactorizedState {
46 Flat,
48 Unflat(UnflatMetadata),
50}
51
52#[derive(Debug, Clone)]
56pub struct UnflatMetadata {
57 offsets: Vec<u32>,
60 parent_count: usize,
62}
63
64impl FactorizedVector {
65 #[must_use]
69 pub fn flat(data: ValueVector) -> Self {
70 Self {
71 data,
72 state: FactorizedState::Flat,
73 }
74 }
75
76 #[must_use]
89 pub fn unflat(data: ValueVector, offsets: Vec<u32>, parent_count: usize) -> Self {
90 debug_assert_eq!(
91 offsets.len(),
92 parent_count + 1,
93 "offsets must have length parent_count + 1"
94 );
95 debug_assert!(
96 offsets.last().copied() == Some(data.len() as u32),
97 "last offset must equal data length"
98 );
99
100 Self {
101 data,
102 state: FactorizedState::Unflat(UnflatMetadata {
103 offsets,
104 parent_count,
105 }),
106 }
107 }
108
109 #[must_use]
111 pub fn empty(data_type: LogicalType) -> Self {
112 Self::flat(ValueVector::with_type(data_type))
113 }
114
115 #[must_use]
117 pub fn is_flat(&self) -> bool {
118 matches!(self.state, FactorizedState::Flat)
119 }
120
121 #[must_use]
123 pub fn is_unflat(&self) -> bool {
124 matches!(self.state, FactorizedState::Unflat(_))
125 }
126
127 #[must_use]
131 pub fn physical_len(&self) -> usize {
132 self.data.len()
133 }
134
135 #[must_use]
137 pub fn data_type(&self) -> LogicalType {
138 self.data.logical_type()
139 }
140
141 #[must_use]
143 pub fn data(&self) -> &ValueVector {
144 &self.data
145 }
146
147 pub fn data_mut(&mut self) -> &mut ValueVector {
151 &mut self.data
152 }
153
154 #[must_use]
156 pub fn offsets(&self) -> Option<&[u32]> {
157 match &self.state {
158 FactorizedState::Flat => None,
159 FactorizedState::Unflat(meta) => Some(&meta.offsets),
160 }
161 }
162
163 #[must_use]
167 pub fn parent_count(&self) -> usize {
168 match &self.state {
169 FactorizedState::Flat => self.data.len(),
170 FactorizedState::Unflat(meta) => meta.parent_count,
171 }
172 }
173
174 #[must_use]
178 pub fn count_for_parent(&self, parent_idx: usize) -> usize {
179 match &self.state {
180 FactorizedState::Flat => 1,
181 FactorizedState::Unflat(meta) => {
182 if parent_idx >= meta.parent_count {
183 return 0;
184 }
185 let start = meta.offsets[parent_idx] as usize;
186 let end = meta.offsets[parent_idx + 1] as usize;
187 end - start
188 }
189 }
190 }
191
192 #[must_use]
196 pub fn range_for_parent(&self, parent_idx: usize) -> (usize, usize) {
197 match &self.state {
198 FactorizedState::Flat => (parent_idx, parent_idx + 1),
199 FactorizedState::Unflat(meta) => {
200 if parent_idx >= meta.parent_count {
201 return (0, 0);
202 }
203 let start = meta.offsets[parent_idx] as usize;
204 let end = meta.offsets[parent_idx + 1] as usize;
205 (start, end)
206 }
207 }
208 }
209
210 #[must_use]
212 pub fn get_physical(&self, physical_idx: usize) -> Option<Value> {
213 self.data.get_value(physical_idx)
214 }
215
216 #[must_use]
218 pub fn get_node_id_physical(
219 &self,
220 physical_idx: usize,
221 ) -> Option<grafeo_common::types::NodeId> {
222 self.data.get_node_id(physical_idx)
223 }
224
225 #[must_use]
227 pub fn get_edge_id_physical(
228 &self,
229 physical_idx: usize,
230 ) -> Option<grafeo_common::types::EdgeId> {
231 self.data.get_edge_id(physical_idx)
232 }
233
234 #[must_use]
238 pub fn get_for_parent(&self, parent_idx: usize, relative_idx: usize) -> Option<Value> {
239 let (start, end) = self.range_for_parent(parent_idx);
240 let physical_idx = start + relative_idx;
241 if physical_idx >= end {
242 return None;
243 }
244 self.data.get_value(physical_idx)
245 }
246
247 #[must_use]
258 pub fn logical_row_count(&self, parent_multiplicities: Option<&[usize]>) -> usize {
259 match &self.state {
260 FactorizedState::Flat => self.data.len(),
261 FactorizedState::Unflat(meta) => {
262 let mut total = 0;
263 for i in 0..meta.parent_count {
264 let count = self.count_for_parent(i);
265 let mult = parent_multiplicities.map_or(1, |m| m.get(i).copied().unwrap_or(1));
266 total += count * mult;
267 }
268 total
269 }
270 }
271 }
272
273 #[must_use]
283 pub fn flatten(&self, parent_multiplicities: Option<&[usize]>) -> ValueVector {
284 match &self.state {
285 FactorizedState::Flat => {
286 if let Some(mults) = parent_multiplicities {
288 let capacity = mults.iter().sum();
289 let mut result = ValueVector::with_capacity(self.data.logical_type(), capacity);
290 for (i, &mult) in mults.iter().enumerate() {
291 if let Some(value) = self.data.get_value(i) {
292 for _ in 0..mult {
293 result.push_value(value.clone());
294 }
295 }
296 }
297 result
298 } else {
299 self.data.clone()
300 }
301 }
302 FactorizedState::Unflat(meta) => {
303 let capacity = self.logical_row_count(parent_multiplicities);
304 let mut result =
305 ValueVector::with_capacity(self.data.logical_type(), capacity.max(1));
306
307 for parent_idx in 0..meta.parent_count {
308 let mult = parent_multiplicities
309 .map_or(1, |m| m.get(parent_idx).copied().unwrap_or(1));
310 let (start, end) = self.range_for_parent(parent_idx);
311
312 for _ in 0..mult {
313 for phys_idx in start..end {
314 if let Some(value) = self.data.get_value(phys_idx) {
315 result.push_value(value);
316 }
317 }
318 }
319 }
320 result
321 }
322 }
323 }
324
325 pub fn iter_with_parent(&self) -> impl Iterator<Item = (usize, usize, Value)> + '_ {
327 FactorizedVectorIter {
328 vector: self,
329 parent_idx: 0,
330 physical_idx: 0,
331 }
332 }
333}
334
335struct FactorizedVectorIter<'a> {
337 vector: &'a FactorizedVector,
338 parent_idx: usize,
339 physical_idx: usize,
340}
341
342impl Iterator for FactorizedVectorIter<'_> {
343 type Item = (usize, usize, Value);
344
345 fn next(&mut self) -> Option<Self::Item> {
346 match &self.vector.state {
347 FactorizedState::Flat => {
348 if self.physical_idx >= self.vector.data.len() {
349 return None;
350 }
351 let value = self.vector.data.get_value(self.physical_idx)?;
352 let result = (self.physical_idx, self.physical_idx, value);
353 self.physical_idx += 1;
354 self.parent_idx += 1;
355 Some(result)
356 }
357 FactorizedState::Unflat(meta) => {
358 while self.parent_idx < meta.parent_count {
360 let (_start, end) = self.vector.range_for_parent(self.parent_idx);
361 if self.physical_idx < end {
362 let value = self.vector.data.get_value(self.physical_idx)?;
363 let result = (self.parent_idx, self.physical_idx, value);
364 self.physical_idx += 1;
365 return Some(result);
366 }
367 self.parent_idx += 1;
368 if self.parent_idx < meta.parent_count {
369 let (new_start, _) = self.vector.range_for_parent(self.parent_idx);
370 self.physical_idx = new_start;
371 }
372 }
373 None
374 }
375 }
376 }
377}
378
379#[cfg(test)]
380mod tests {
381 use grafeo_common::types::{EdgeId, NodeId};
382
383 use super::*;
384
385 #[test]
386 fn test_flat_vector() {
387 let mut data = ValueVector::with_type(LogicalType::Int64);
388 data.push_int64(1);
389 data.push_int64(2);
390 data.push_int64(3);
391
392 let vec = FactorizedVector::flat(data);
393
394 assert!(vec.is_flat());
395 assert!(!vec.is_unflat());
396 assert_eq!(vec.physical_len(), 3);
397 assert_eq!(vec.parent_count(), 3);
398 assert_eq!(vec.count_for_parent(0), 1);
399 assert_eq!(vec.count_for_parent(1), 1);
400 assert_eq!(vec.count_for_parent(2), 1);
401 }
402
403 #[test]
404 fn test_unflat_vector() {
405 let mut data = ValueVector::with_type(LogicalType::Int64);
407 data.push_int64(10);
408 data.push_int64(20);
409 data.push_int64(30);
410
411 let offsets = vec![0, 2, 3]; let vec = FactorizedVector::unflat(data, offsets, 2);
413
414 assert!(!vec.is_flat());
415 assert!(vec.is_unflat());
416 assert_eq!(vec.physical_len(), 3);
417 assert_eq!(vec.parent_count(), 2);
418 assert_eq!(vec.count_for_parent(0), 2);
419 assert_eq!(vec.count_for_parent(1), 1);
420 assert_eq!(vec.range_for_parent(0), (0, 2));
421 assert_eq!(vec.range_for_parent(1), (2, 3));
422 }
423
424 #[test]
425 fn test_empty_vector() {
426 let vec = FactorizedVector::empty(LogicalType::Int64);
427
428 assert!(vec.is_flat());
429 assert_eq!(vec.physical_len(), 0);
430 assert_eq!(vec.data_type(), LogicalType::Int64);
431 }
432
433 #[test]
434 fn test_data_type() {
435 let mut data = ValueVector::with_type(LogicalType::String);
436 data.push_string("hello");
437
438 let vec = FactorizedVector::flat(data);
439 assert_eq!(vec.data_type(), LogicalType::String);
440 }
441
442 #[test]
443 fn test_data_access() {
444 let mut data = ValueVector::with_type(LogicalType::Int64);
445 data.push_int64(42);
446
447 let vec = FactorizedVector::flat(data);
448
449 assert_eq!(vec.data().len(), 1);
451 assert_eq!(vec.data().get_int64(0), Some(42));
452 }
453
454 #[test]
455 fn test_data_mut() {
456 let mut data = ValueVector::with_type(LogicalType::Int64);
457 data.push_int64(42);
458
459 let mut vec = FactorizedVector::flat(data);
460
461 vec.data_mut().push_int64(100);
463 assert_eq!(vec.physical_len(), 2);
464 }
465
466 #[test]
467 fn test_offsets_flat() {
468 let mut data = ValueVector::with_type(LogicalType::Int64);
469 data.push_int64(1);
470
471 let vec = FactorizedVector::flat(data);
472 assert!(vec.offsets().is_none());
473 }
474
475 #[test]
476 fn test_offsets_unflat() {
477 let mut data = ValueVector::with_type(LogicalType::Int64);
478 data.push_int64(10);
479 data.push_int64(20);
480 data.push_int64(30);
481
482 let offsets = vec![0, 2, 3];
483 let vec = FactorizedVector::unflat(data, offsets.clone(), 2);
484
485 assert_eq!(vec.offsets(), Some(offsets.as_slice()));
486 }
487
488 #[test]
489 fn test_get_physical() {
490 let mut data = ValueVector::with_type(LogicalType::Int64);
491 data.push_int64(10);
492 data.push_int64(20);
493
494 let vec = FactorizedVector::flat(data);
495
496 assert_eq!(vec.get_physical(0), Some(Value::Int64(10)));
497 assert_eq!(vec.get_physical(1), Some(Value::Int64(20)));
498 assert_eq!(vec.get_physical(2), None);
499 }
500
501 #[test]
502 fn test_get_node_id_physical() {
503 let mut data = ValueVector::with_type(LogicalType::Node);
504 data.push_node_id(NodeId::new(100));
505 data.push_node_id(NodeId::new(200));
506
507 let vec = FactorizedVector::flat(data);
508
509 assert_eq!(vec.get_node_id_physical(0), Some(NodeId::new(100)));
510 assert_eq!(vec.get_node_id_physical(1), Some(NodeId::new(200)));
511 assert_eq!(vec.get_node_id_physical(2), None);
512 }
513
514 #[test]
515 fn test_get_edge_id_physical() {
516 let mut data = ValueVector::with_type(LogicalType::Edge);
517 data.push_edge_id(EdgeId::new(100));
518 data.push_edge_id(EdgeId::new(200));
519
520 let vec = FactorizedVector::flat(data);
521
522 assert_eq!(vec.get_edge_id_physical(0), Some(EdgeId::new(100)));
523 assert_eq!(vec.get_edge_id_physical(1), Some(EdgeId::new(200)));
524 assert_eq!(vec.get_edge_id_physical(2), None);
525 }
526
527 #[test]
528 fn test_range_for_parent_flat() {
529 let mut data = ValueVector::with_type(LogicalType::Int64);
530 data.push_int64(1);
531 data.push_int64(2);
532 data.push_int64(3);
533
534 let vec = FactorizedVector::flat(data);
535
536 assert_eq!(vec.range_for_parent(0), (0, 1));
537 assert_eq!(vec.range_for_parent(1), (1, 2));
538 assert_eq!(vec.range_for_parent(2), (2, 3));
539 }
540
541 #[test]
542 fn test_range_for_parent_out_of_bounds() {
543 let mut data = ValueVector::with_type(LogicalType::Int64);
544 data.push_int64(10);
545 data.push_int64(20);
546
547 let offsets = vec![0, 2];
548 let vec = FactorizedVector::unflat(data, offsets, 1);
549
550 assert_eq!(vec.range_for_parent(10), (0, 0));
552 }
553
554 #[test]
555 fn test_count_for_parent_out_of_bounds() {
556 let mut data = ValueVector::with_type(LogicalType::Int64);
557 data.push_int64(10);
558 data.push_int64(20);
559
560 let offsets = vec![0, 2];
561 let vec = FactorizedVector::unflat(data, offsets, 1);
562
563 assert_eq!(vec.count_for_parent(10), 0);
565 }
566
567 #[test]
568 fn test_get_for_parent() {
569 let mut data = ValueVector::with_type(LogicalType::Int64);
570 data.push_int64(10);
571 data.push_int64(20);
572 data.push_int64(30);
573
574 let offsets = vec![0, 2, 3];
575 let vec = FactorizedVector::unflat(data, offsets, 2);
576
577 assert_eq!(vec.get_for_parent(0, 0), Some(Value::Int64(10)));
579 assert_eq!(vec.get_for_parent(0, 1), Some(Value::Int64(20)));
580 assert_eq!(vec.get_for_parent(0, 2), None); assert_eq!(vec.get_for_parent(1, 0), Some(Value::Int64(30)));
584 assert_eq!(vec.get_for_parent(1, 1), None); }
586
587 #[test]
588 fn test_get_for_parent_flat() {
589 let mut data = ValueVector::with_type(LogicalType::Int64);
590 data.push_int64(10);
591 data.push_int64(20);
592
593 let vec = FactorizedVector::flat(data);
594
595 assert_eq!(vec.get_for_parent(0, 0), Some(Value::Int64(10)));
596 assert_eq!(vec.get_for_parent(1, 0), Some(Value::Int64(20)));
597 assert_eq!(vec.get_for_parent(0, 1), None); }
599
600 #[test]
601 fn test_flatten_flat_no_mults() {
602 let mut data = ValueVector::with_type(LogicalType::Int64);
603 data.push_int64(1);
604 data.push_int64(2);
605 data.push_int64(3);
606
607 let vec = FactorizedVector::flat(data);
608 let flat = vec.flatten(None);
609
610 assert_eq!(flat.len(), 3);
611 assert_eq!(flat.get_int64(0), Some(1));
612 assert_eq!(flat.get_int64(2), Some(3));
613 }
614
615 #[test]
616 fn test_flatten_flat_with_mults() {
617 let mut data = ValueVector::with_type(LogicalType::Int64);
618 data.push_int64(10);
619 data.push_int64(20);
620
621 let vec = FactorizedVector::flat(data);
622
623 let mults = [3, 2];
625 let flat = vec.flatten(Some(&mults));
626
627 assert_eq!(flat.len(), 5);
628 assert_eq!(flat.get_int64(0), Some(10));
629 assert_eq!(flat.get_int64(1), Some(10));
630 assert_eq!(flat.get_int64(2), Some(10));
631 assert_eq!(flat.get_int64(3), Some(20));
632 assert_eq!(flat.get_int64(4), Some(20));
633 }
634
635 #[test]
636 fn test_flatten_unflat() {
637 let mut data = ValueVector::with_type(LogicalType::Int64);
638 data.push_int64(10);
639 data.push_int64(20);
640 data.push_int64(30);
641
642 let offsets = vec![0, 2, 3];
643 let vec = FactorizedVector::unflat(data, offsets, 2);
644
645 let flat = vec.flatten(None);
647 assert_eq!(flat.len(), 3);
648 assert_eq!(flat.get_int64(0), Some(10));
649 assert_eq!(flat.get_int64(1), Some(20));
650 assert_eq!(flat.get_int64(2), Some(30));
651 }
652
653 #[test]
654 fn test_flatten_with_multiplicities() {
655 let mut data = ValueVector::with_type(LogicalType::Int64);
656 data.push_int64(10);
657 data.push_int64(20);
658 data.push_int64(30);
659
660 let offsets = vec![0, 2, 3];
661 let vec = FactorizedVector::unflat(data, offsets, 2);
662
663 let mults = [2, 1];
666 let flat = vec.flatten(Some(&mults));
667
668 assert_eq!(flat.len(), 5);
669 assert_eq!(flat.get_int64(0), Some(10));
670 assert_eq!(flat.get_int64(1), Some(20));
671 assert_eq!(flat.get_int64(2), Some(10));
672 assert_eq!(flat.get_int64(3), Some(20));
673 assert_eq!(flat.get_int64(4), Some(30));
674 }
675
676 #[test]
677 fn test_logical_row_count_flat() {
678 let mut data = ValueVector::with_type(LogicalType::Int64);
679 data.push_int64(1);
680 data.push_int64(2);
681 data.push_int64(3);
682
683 let vec = FactorizedVector::flat(data);
684
685 assert_eq!(vec.logical_row_count(None), 3);
686 }
687
688 #[test]
689 fn test_logical_row_count() {
690 let mut data = ValueVector::with_type(LogicalType::Int64);
691 data.push_int64(10);
692 data.push_int64(20);
693 data.push_int64(30);
694
695 let offsets = vec![0, 2, 3];
696 let vec = FactorizedVector::unflat(data, offsets, 2);
697
698 assert_eq!(vec.logical_row_count(None), 3);
700
701 let mults = [2, 3];
703 assert_eq!(vec.logical_row_count(Some(&mults)), 7);
704 }
705
706 #[test]
707 fn test_logical_row_count_missing_mult() {
708 let mut data = ValueVector::with_type(LogicalType::Int64);
709 data.push_int64(10);
710 data.push_int64(20);
711 data.push_int64(30);
712
713 let offsets = vec![0, 2, 3];
714 let vec = FactorizedVector::unflat(data, offsets, 2);
715
716 let mults = [2];
718 assert_eq!(vec.logical_row_count(Some(&mults)), 5); }
720
721 #[test]
722 fn test_iter_with_parent_flat() {
723 let mut data = ValueVector::with_type(LogicalType::Int64);
724 data.push_int64(1);
725 data.push_int64(2);
726
727 let vec = FactorizedVector::flat(data);
728 let items: Vec<_> = vec.iter_with_parent().collect();
729
730 assert_eq!(items.len(), 2);
731 assert_eq!(items[0], (0, 0, Value::Int64(1)));
732 assert_eq!(items[1], (1, 1, Value::Int64(2)));
733 }
734
735 #[test]
736 fn test_iter_with_parent_unflat() {
737 let mut data = ValueVector::with_type(LogicalType::Int64);
738 data.push_int64(10);
739 data.push_int64(20);
740 data.push_int64(30);
741
742 let offsets = vec![0, 2, 3];
743 let vec = FactorizedVector::unflat(data, offsets, 2);
744 let items: Vec<_> = vec.iter_with_parent().collect();
745
746 assert_eq!(items.len(), 3);
747 assert_eq!(items[0], (0, 0, Value::Int64(10)));
748 assert_eq!(items[1], (0, 1, Value::Int64(20)));
749 assert_eq!(items[2], (1, 2, Value::Int64(30)));
750 }
751
752 #[test]
753 fn test_iter_with_parent_empty_parents() {
754 let mut data = ValueVector::with_type(LogicalType::Int64);
756 data.push_int64(10);
757 data.push_int64(20);
758
759 let offsets = vec![0, 0, 2, 2];
760 let vec = FactorizedVector::unflat(data, offsets, 3);
761 let items: Vec<_> = vec.iter_with_parent().collect();
762
763 assert_eq!(items.len(), 2);
765 assert_eq!(items[0], (1, 0, Value::Int64(10)));
766 assert_eq!(items[1], (1, 1, Value::Int64(20)));
767 }
768
769 #[test]
770 fn test_iter_with_parent_empty() {
771 let data = ValueVector::with_type(LogicalType::Int64);
772 let vec = FactorizedVector::flat(data);
773 let items: Vec<_> = vec.iter_with_parent().collect();
774
775 assert!(items.is_empty());
776 }
777
778 #[test]
779 fn test_empty_parents() {
780 let mut data = ValueVector::with_type(LogicalType::Int64);
782 data.push_int64(10);
783 data.push_int64(20);
784
785 let offsets = vec![0, 0, 2, 2]; let vec = FactorizedVector::unflat(data, offsets, 3);
787
788 assert_eq!(vec.count_for_parent(0), 0);
789 assert_eq!(vec.count_for_parent(1), 2);
790 assert_eq!(vec.count_for_parent(2), 0);
791 assert_eq!(vec.logical_row_count(None), 2);
792 }
793
794 #[test]
795 fn test_node_id_vector() {
796 let mut data = ValueVector::with_type(LogicalType::Node);
797 data.push_node_id(NodeId::new(100));
798 data.push_node_id(NodeId::new(200));
799 data.push_node_id(NodeId::new(300));
800
801 let offsets = vec![0, 2, 3];
802 let vec = FactorizedVector::unflat(data, offsets, 2);
803
804 assert_eq!(vec.count_for_parent(0), 2);
805 assert_eq!(vec.count_for_parent(1), 1);
806
807 let flat = vec.flatten(None);
809 assert_eq!(flat.get_node_id(0), Some(NodeId::new(100)));
810 assert_eq!(flat.get_node_id(1), Some(NodeId::new(200)));
811 assert_eq!(flat.get_node_id(2), Some(NodeId::new(300)));
812 }
813
814 #[test]
815 fn test_factorized_state_debug() {
816 let state_flat = FactorizedState::Flat;
817 assert!(format!("{state_flat:?}").contains("Flat"));
818
819 let state_unflat = FactorizedState::Unflat(UnflatMetadata {
820 offsets: vec![0, 1],
821 parent_count: 1,
822 });
823 assert!(format!("{state_unflat:?}").contains("Unflat"));
824 }
825
826 #[test]
827 fn test_unflat_metadata_debug() {
828 let meta = UnflatMetadata {
829 offsets: vec![0, 2, 5],
830 parent_count: 2,
831 };
832 let debug = format!("{meta:?}");
833 assert!(debug.contains("offsets"));
834 assert!(debug.contains("parent_count"));
835 }
836
837 #[test]
838 fn test_factorized_vector_clone() {
839 let mut data = ValueVector::with_type(LogicalType::Int64);
840 data.push_int64(1);
841 data.push_int64(2);
842
843 let vec = FactorizedVector::flat(data);
844 let cloned = vec.clone();
845
846 assert_eq!(vec.physical_len(), cloned.physical_len());
847 assert_eq!(vec.is_flat(), cloned.is_flat());
848 }
849}