1use grafeo_common::types::{LogicalType, Value};
28
29use super::vector::ValueVector;
30
31#[derive(Debug, Clone)]
36pub struct FactorizedVector {
37 data: ValueVector,
39 state: FactorizedState,
41}
42
43#[derive(Debug, Clone)]
45#[non_exhaustive]
46pub enum FactorizedState {
47 Flat,
49 Unflat(UnflatMetadata),
51}
52
53#[derive(Debug, Clone)]
57pub struct UnflatMetadata {
58 offsets: Vec<u32>,
61 parent_count: usize,
63}
64
65impl FactorizedVector {
66 #[must_use]
70 pub fn flat(data: ValueVector) -> Self {
71 Self {
72 data,
73 state: FactorizedState::Flat,
74 }
75 }
76
77 #[must_use]
90 pub fn unflat(data: ValueVector, offsets: Vec<u32>, parent_count: usize) -> Self {
91 debug_assert_eq!(
92 offsets.len(),
93 parent_count + 1,
94 "offsets must have length parent_count + 1"
95 );
96 #[allow(clippy::cast_possible_truncation)]
98 let data_len_u32 = data.len() as u32;
99 debug_assert!(
100 offsets.last().copied() == Some(data_len_u32),
101 "last offset must equal data length"
102 );
103
104 Self {
105 data,
106 state: FactorizedState::Unflat(UnflatMetadata {
107 offsets,
108 parent_count,
109 }),
110 }
111 }
112
113 #[must_use]
115 pub fn empty(data_type: LogicalType) -> Self {
116 Self::flat(ValueVector::with_type(data_type))
117 }
118
119 #[must_use]
121 pub fn is_flat(&self) -> bool {
122 matches!(self.state, FactorizedState::Flat)
123 }
124
125 #[must_use]
127 pub fn is_unflat(&self) -> bool {
128 matches!(self.state, FactorizedState::Unflat(_))
129 }
130
131 #[must_use]
135 pub fn physical_len(&self) -> usize {
136 self.data.len()
137 }
138
139 #[must_use]
141 pub fn data_type(&self) -> LogicalType {
142 self.data.logical_type()
143 }
144
145 #[must_use]
147 pub fn data(&self) -> &ValueVector {
148 &self.data
149 }
150
151 pub fn data_mut(&mut self) -> &mut ValueVector {
155 &mut self.data
156 }
157
158 #[must_use]
160 pub fn offsets(&self) -> Option<&[u32]> {
161 match &self.state {
162 FactorizedState::Flat => None,
163 FactorizedState::Unflat(meta) => Some(&meta.offsets),
164 }
165 }
166
167 #[must_use]
171 pub fn parent_count(&self) -> usize {
172 match &self.state {
173 FactorizedState::Flat => self.data.len(),
174 FactorizedState::Unflat(meta) => meta.parent_count,
175 }
176 }
177
178 #[must_use]
182 pub fn count_for_parent(&self, parent_idx: usize) -> usize {
183 match &self.state {
184 FactorizedState::Flat => 1,
185 FactorizedState::Unflat(meta) => {
186 if parent_idx >= meta.parent_count {
187 return 0;
188 }
189 let start = meta.offsets[parent_idx] as usize;
190 let end = meta.offsets[parent_idx + 1] as usize;
191 end - start
192 }
193 }
194 }
195
196 #[must_use]
200 pub fn range_for_parent(&self, parent_idx: usize) -> (usize, usize) {
201 match &self.state {
202 FactorizedState::Flat => (parent_idx, parent_idx + 1),
203 FactorizedState::Unflat(meta) => {
204 if parent_idx >= meta.parent_count {
205 return (0, 0);
206 }
207 let start = meta.offsets[parent_idx] as usize;
208 let end = meta.offsets[parent_idx + 1] as usize;
209 (start, end)
210 }
211 }
212 }
213
214 #[must_use]
216 pub fn get_physical(&self, physical_idx: usize) -> Option<Value> {
217 self.data.get_value(physical_idx)
218 }
219
220 #[must_use]
222 pub fn get_node_id_physical(
223 &self,
224 physical_idx: usize,
225 ) -> Option<grafeo_common::types::NodeId> {
226 self.data.get_node_id(physical_idx)
227 }
228
229 #[must_use]
231 pub fn get_edge_id_physical(
232 &self,
233 physical_idx: usize,
234 ) -> Option<grafeo_common::types::EdgeId> {
235 self.data.get_edge_id(physical_idx)
236 }
237
238 #[must_use]
242 pub fn get_for_parent(&self, parent_idx: usize, relative_idx: usize) -> Option<Value> {
243 let (start, end) = self.range_for_parent(parent_idx);
244 let physical_idx = start + relative_idx;
245 if physical_idx >= end {
246 return None;
247 }
248 self.data.get_value(physical_idx)
249 }
250
251 #[must_use]
262 pub fn logical_row_count(&self, parent_multiplicities: Option<&[usize]>) -> usize {
263 match &self.state {
264 FactorizedState::Flat => self.data.len(),
265 FactorizedState::Unflat(meta) => {
266 let mut total = 0;
267 for i in 0..meta.parent_count {
268 let count = self.count_for_parent(i);
269 let mult = parent_multiplicities.map_or(1, |m| m.get(i).copied().unwrap_or(1));
270 total += count * mult;
271 }
272 total
273 }
274 }
275 }
276
277 #[must_use]
287 pub fn flatten(&self, parent_multiplicities: Option<&[usize]>) -> ValueVector {
288 match &self.state {
289 FactorizedState::Flat => {
290 if let Some(mults) = parent_multiplicities {
292 let capacity = mults.iter().sum();
293 let mut result = ValueVector::with_capacity(self.data.logical_type(), capacity);
294 for (i, &mult) in mults.iter().enumerate() {
295 if let Some(value) = self.data.get_value(i) {
296 for _ in 0..mult {
297 result.push_value(value.clone());
298 }
299 }
300 }
301 result
302 } else {
303 self.data.clone()
304 }
305 }
306 FactorizedState::Unflat(meta) => {
307 let capacity = self.logical_row_count(parent_multiplicities);
308 let mut result =
309 ValueVector::with_capacity(self.data.logical_type(), capacity.max(1));
310
311 for parent_idx in 0..meta.parent_count {
312 let mult = parent_multiplicities
313 .map_or(1, |m| m.get(parent_idx).copied().unwrap_or(1));
314 let (start, end) = self.range_for_parent(parent_idx);
315
316 for _ in 0..mult {
317 for phys_idx in start..end {
318 if let Some(value) = self.data.get_value(phys_idx) {
319 result.push_value(value);
320 }
321 }
322 }
323 }
324 result
325 }
326 }
327 }
328
329 pub fn iter_with_parent(&self) -> impl Iterator<Item = (usize, usize, Value)> + '_ {
331 FactorizedVectorIter {
332 vector: self,
333 parent_idx: 0,
334 physical_idx: 0,
335 }
336 }
337}
338
339struct FactorizedVectorIter<'a> {
341 vector: &'a FactorizedVector,
342 parent_idx: usize,
343 physical_idx: usize,
344}
345
346impl Iterator for FactorizedVectorIter<'_> {
347 type Item = (usize, usize, Value);
348
349 fn next(&mut self) -> Option<Self::Item> {
350 match &self.vector.state {
351 FactorizedState::Flat => {
352 if self.physical_idx >= self.vector.data.len() {
353 return None;
354 }
355 let value = self.vector.data.get_value(self.physical_idx)?;
356 let result = (self.physical_idx, self.physical_idx, value);
357 self.physical_idx += 1;
358 self.parent_idx += 1;
359 Some(result)
360 }
361 FactorizedState::Unflat(meta) => {
362 while self.parent_idx < meta.parent_count {
364 let (_start, end) = self.vector.range_for_parent(self.parent_idx);
365 if self.physical_idx < end {
366 let value = self.vector.data.get_value(self.physical_idx)?;
367 let result = (self.parent_idx, self.physical_idx, value);
368 self.physical_idx += 1;
369 return Some(result);
370 }
371 self.parent_idx += 1;
372 if self.parent_idx < meta.parent_count {
373 let (new_start, _) = self.vector.range_for_parent(self.parent_idx);
374 self.physical_idx = new_start;
375 }
376 }
377 None
378 }
379 }
380 }
381}
382
383#[cfg(test)]
384mod tests {
385 use grafeo_common::types::{EdgeId, NodeId};
386
387 use super::*;
388
389 #[test]
390 fn test_flat_vector() {
391 let mut data = ValueVector::with_type(LogicalType::Int64);
392 data.push_int64(1);
393 data.push_int64(2);
394 data.push_int64(3);
395
396 let vec = FactorizedVector::flat(data);
397
398 assert!(vec.is_flat());
399 assert!(!vec.is_unflat());
400 assert_eq!(vec.physical_len(), 3);
401 assert_eq!(vec.parent_count(), 3);
402 assert_eq!(vec.count_for_parent(0), 1);
403 assert_eq!(vec.count_for_parent(1), 1);
404 assert_eq!(vec.count_for_parent(2), 1);
405 }
406
407 #[test]
408 fn test_unflat_vector() {
409 let mut data = ValueVector::with_type(LogicalType::Int64);
411 data.push_int64(10);
412 data.push_int64(20);
413 data.push_int64(30);
414
415 let offsets = vec![0, 2, 3]; let vec = FactorizedVector::unflat(data, offsets, 2);
417
418 assert!(!vec.is_flat());
419 assert!(vec.is_unflat());
420 assert_eq!(vec.physical_len(), 3);
421 assert_eq!(vec.parent_count(), 2);
422 assert_eq!(vec.count_for_parent(0), 2);
423 assert_eq!(vec.count_for_parent(1), 1);
424 assert_eq!(vec.range_for_parent(0), (0, 2));
425 assert_eq!(vec.range_for_parent(1), (2, 3));
426 }
427
428 #[test]
429 fn test_empty_vector() {
430 let vec = FactorizedVector::empty(LogicalType::Int64);
431
432 assert!(vec.is_flat());
433 assert_eq!(vec.physical_len(), 0);
434 assert_eq!(vec.data_type(), LogicalType::Int64);
435 }
436
437 #[test]
438 fn test_data_type() {
439 let mut data = ValueVector::with_type(LogicalType::String);
440 data.push_string("hello");
441
442 let vec = FactorizedVector::flat(data);
443 assert_eq!(vec.data_type(), LogicalType::String);
444 }
445
446 #[test]
447 fn test_data_access() {
448 let mut data = ValueVector::with_type(LogicalType::Int64);
449 data.push_int64(42);
450
451 let vec = FactorizedVector::flat(data);
452
453 assert_eq!(vec.data().len(), 1);
455 assert_eq!(vec.data().get_int64(0), Some(42));
456 }
457
458 #[test]
459 fn test_data_mut() {
460 let mut data = ValueVector::with_type(LogicalType::Int64);
461 data.push_int64(42);
462
463 let mut vec = FactorizedVector::flat(data);
464
465 vec.data_mut().push_int64(100);
467 assert_eq!(vec.physical_len(), 2);
468 }
469
470 #[test]
471 fn test_offsets_flat() {
472 let mut data = ValueVector::with_type(LogicalType::Int64);
473 data.push_int64(1);
474
475 let vec = FactorizedVector::flat(data);
476 assert!(vec.offsets().is_none());
477 }
478
479 #[test]
480 fn test_offsets_unflat() {
481 let mut data = ValueVector::with_type(LogicalType::Int64);
482 data.push_int64(10);
483 data.push_int64(20);
484 data.push_int64(30);
485
486 let offsets = vec![0, 2, 3];
487 let vec = FactorizedVector::unflat(data, offsets.clone(), 2);
488
489 assert_eq!(vec.offsets(), Some(offsets.as_slice()));
490 }
491
492 #[test]
493 fn test_get_physical() {
494 let mut data = ValueVector::with_type(LogicalType::Int64);
495 data.push_int64(10);
496 data.push_int64(20);
497
498 let vec = FactorizedVector::flat(data);
499
500 assert_eq!(vec.get_physical(0), Some(Value::Int64(10)));
501 assert_eq!(vec.get_physical(1), Some(Value::Int64(20)));
502 assert_eq!(vec.get_physical(2), None);
503 }
504
505 #[test]
506 fn test_get_node_id_physical() {
507 let mut data = ValueVector::with_type(LogicalType::Node);
508 data.push_node_id(NodeId::new(100));
509 data.push_node_id(NodeId::new(200));
510
511 let vec = FactorizedVector::flat(data);
512
513 assert_eq!(vec.get_node_id_physical(0), Some(NodeId::new(100)));
514 assert_eq!(vec.get_node_id_physical(1), Some(NodeId::new(200)));
515 assert_eq!(vec.get_node_id_physical(2), None);
516 }
517
518 #[test]
519 fn test_get_edge_id_physical() {
520 let mut data = ValueVector::with_type(LogicalType::Edge);
521 data.push_edge_id(EdgeId::new(100));
522 data.push_edge_id(EdgeId::new(200));
523
524 let vec = FactorizedVector::flat(data);
525
526 assert_eq!(vec.get_edge_id_physical(0), Some(EdgeId::new(100)));
527 assert_eq!(vec.get_edge_id_physical(1), Some(EdgeId::new(200)));
528 assert_eq!(vec.get_edge_id_physical(2), None);
529 }
530
531 #[test]
532 fn test_range_for_parent_flat() {
533 let mut data = ValueVector::with_type(LogicalType::Int64);
534 data.push_int64(1);
535 data.push_int64(2);
536 data.push_int64(3);
537
538 let vec = FactorizedVector::flat(data);
539
540 assert_eq!(vec.range_for_parent(0), (0, 1));
541 assert_eq!(vec.range_for_parent(1), (1, 2));
542 assert_eq!(vec.range_for_parent(2), (2, 3));
543 }
544
545 #[test]
546 fn test_range_for_parent_out_of_bounds() {
547 let mut data = ValueVector::with_type(LogicalType::Int64);
548 data.push_int64(10);
549 data.push_int64(20);
550
551 let offsets = vec![0, 2];
552 let vec = FactorizedVector::unflat(data, offsets, 1);
553
554 assert_eq!(vec.range_for_parent(10), (0, 0));
556 }
557
558 #[test]
559 fn test_count_for_parent_out_of_bounds() {
560 let mut data = ValueVector::with_type(LogicalType::Int64);
561 data.push_int64(10);
562 data.push_int64(20);
563
564 let offsets = vec![0, 2];
565 let vec = FactorizedVector::unflat(data, offsets, 1);
566
567 assert_eq!(vec.count_for_parent(10), 0);
569 }
570
571 #[test]
572 fn test_get_for_parent() {
573 let mut data = ValueVector::with_type(LogicalType::Int64);
574 data.push_int64(10);
575 data.push_int64(20);
576 data.push_int64(30);
577
578 let offsets = vec![0, 2, 3];
579 let vec = FactorizedVector::unflat(data, offsets, 2);
580
581 assert_eq!(vec.get_for_parent(0, 0), Some(Value::Int64(10)));
583 assert_eq!(vec.get_for_parent(0, 1), Some(Value::Int64(20)));
584 assert_eq!(vec.get_for_parent(0, 2), None); assert_eq!(vec.get_for_parent(1, 0), Some(Value::Int64(30)));
588 assert_eq!(vec.get_for_parent(1, 1), None); }
590
591 #[test]
592 fn test_get_for_parent_flat() {
593 let mut data = ValueVector::with_type(LogicalType::Int64);
594 data.push_int64(10);
595 data.push_int64(20);
596
597 let vec = FactorizedVector::flat(data);
598
599 assert_eq!(vec.get_for_parent(0, 0), Some(Value::Int64(10)));
600 assert_eq!(vec.get_for_parent(1, 0), Some(Value::Int64(20)));
601 assert_eq!(vec.get_for_parent(0, 1), None); }
603
604 #[test]
605 fn test_flatten_flat_no_mults() {
606 let mut data = ValueVector::with_type(LogicalType::Int64);
607 data.push_int64(1);
608 data.push_int64(2);
609 data.push_int64(3);
610
611 let vec = FactorizedVector::flat(data);
612 let flat = vec.flatten(None);
613
614 assert_eq!(flat.len(), 3);
615 assert_eq!(flat.get_int64(0), Some(1));
616 assert_eq!(flat.get_int64(2), Some(3));
617 }
618
619 #[test]
620 fn test_flatten_flat_with_mults() {
621 let mut data = ValueVector::with_type(LogicalType::Int64);
622 data.push_int64(10);
623 data.push_int64(20);
624
625 let vec = FactorizedVector::flat(data);
626
627 let mults = [3, 2];
629 let flat = vec.flatten(Some(&mults));
630
631 assert_eq!(flat.len(), 5);
632 assert_eq!(flat.get_int64(0), Some(10));
633 assert_eq!(flat.get_int64(1), Some(10));
634 assert_eq!(flat.get_int64(2), Some(10));
635 assert_eq!(flat.get_int64(3), Some(20));
636 assert_eq!(flat.get_int64(4), Some(20));
637 }
638
639 #[test]
640 fn test_flatten_unflat() {
641 let mut data = ValueVector::with_type(LogicalType::Int64);
642 data.push_int64(10);
643 data.push_int64(20);
644 data.push_int64(30);
645
646 let offsets = vec![0, 2, 3];
647 let vec = FactorizedVector::unflat(data, offsets, 2);
648
649 let flat = vec.flatten(None);
651 assert_eq!(flat.len(), 3);
652 assert_eq!(flat.get_int64(0), Some(10));
653 assert_eq!(flat.get_int64(1), Some(20));
654 assert_eq!(flat.get_int64(2), Some(30));
655 }
656
657 #[test]
658 fn test_flatten_with_multiplicities() {
659 let mut data = ValueVector::with_type(LogicalType::Int64);
660 data.push_int64(10);
661 data.push_int64(20);
662 data.push_int64(30);
663
664 let offsets = vec![0, 2, 3];
665 let vec = FactorizedVector::unflat(data, offsets, 2);
666
667 let mults = [2, 1];
670 let flat = vec.flatten(Some(&mults));
671
672 assert_eq!(flat.len(), 5);
673 assert_eq!(flat.get_int64(0), Some(10));
674 assert_eq!(flat.get_int64(1), Some(20));
675 assert_eq!(flat.get_int64(2), Some(10));
676 assert_eq!(flat.get_int64(3), Some(20));
677 assert_eq!(flat.get_int64(4), Some(30));
678 }
679
680 #[test]
681 fn test_logical_row_count_flat() {
682 let mut data = ValueVector::with_type(LogicalType::Int64);
683 data.push_int64(1);
684 data.push_int64(2);
685 data.push_int64(3);
686
687 let vec = FactorizedVector::flat(data);
688
689 assert_eq!(vec.logical_row_count(None), 3);
690 }
691
692 #[test]
693 fn test_logical_row_count() {
694 let mut data = ValueVector::with_type(LogicalType::Int64);
695 data.push_int64(10);
696 data.push_int64(20);
697 data.push_int64(30);
698
699 let offsets = vec![0, 2, 3];
700 let vec = FactorizedVector::unflat(data, offsets, 2);
701
702 assert_eq!(vec.logical_row_count(None), 3);
704
705 let mults = [2, 3];
707 assert_eq!(vec.logical_row_count(Some(&mults)), 7);
708 }
709
710 #[test]
711 fn test_logical_row_count_missing_mult() {
712 let mut data = ValueVector::with_type(LogicalType::Int64);
713 data.push_int64(10);
714 data.push_int64(20);
715 data.push_int64(30);
716
717 let offsets = vec![0, 2, 3];
718 let vec = FactorizedVector::unflat(data, offsets, 2);
719
720 let mults = [2];
722 assert_eq!(vec.logical_row_count(Some(&mults)), 5); }
724
725 #[test]
726 fn test_iter_with_parent_flat() {
727 let mut data = ValueVector::with_type(LogicalType::Int64);
728 data.push_int64(1);
729 data.push_int64(2);
730
731 let vec = FactorizedVector::flat(data);
732 let items: Vec<_> = vec.iter_with_parent().collect();
733
734 assert_eq!(items.len(), 2);
735 assert_eq!(items[0], (0, 0, Value::Int64(1)));
736 assert_eq!(items[1], (1, 1, Value::Int64(2)));
737 }
738
739 #[test]
740 fn test_iter_with_parent_unflat() {
741 let mut data = ValueVector::with_type(LogicalType::Int64);
742 data.push_int64(10);
743 data.push_int64(20);
744 data.push_int64(30);
745
746 let offsets = vec![0, 2, 3];
747 let vec = FactorizedVector::unflat(data, offsets, 2);
748 let items: Vec<_> = vec.iter_with_parent().collect();
749
750 assert_eq!(items.len(), 3);
751 assert_eq!(items[0], (0, 0, Value::Int64(10)));
752 assert_eq!(items[1], (0, 1, Value::Int64(20)));
753 assert_eq!(items[2], (1, 2, Value::Int64(30)));
754 }
755
756 #[test]
757 fn test_iter_with_parent_empty_parents() {
758 let mut data = ValueVector::with_type(LogicalType::Int64);
760 data.push_int64(10);
761 data.push_int64(20);
762
763 let offsets = vec![0, 0, 2, 2];
764 let vec = FactorizedVector::unflat(data, offsets, 3);
765 let items: Vec<_> = vec.iter_with_parent().collect();
766
767 assert_eq!(items.len(), 2);
769 assert_eq!(items[0], (1, 0, Value::Int64(10)));
770 assert_eq!(items[1], (1, 1, Value::Int64(20)));
771 }
772
773 #[test]
774 fn test_iter_with_parent_empty() {
775 let data = ValueVector::with_type(LogicalType::Int64);
776 let vec = FactorizedVector::flat(data);
777 let items: Vec<_> = vec.iter_with_parent().collect();
778
779 assert!(items.is_empty());
780 }
781
782 #[test]
783 fn test_empty_parents() {
784 let mut data = ValueVector::with_type(LogicalType::Int64);
786 data.push_int64(10);
787 data.push_int64(20);
788
789 let offsets = vec![0, 0, 2, 2]; let vec = FactorizedVector::unflat(data, offsets, 3);
791
792 assert_eq!(vec.count_for_parent(0), 0);
793 assert_eq!(vec.count_for_parent(1), 2);
794 assert_eq!(vec.count_for_parent(2), 0);
795 assert_eq!(vec.logical_row_count(None), 2);
796 }
797
798 #[test]
799 fn test_node_id_vector() {
800 let mut data = ValueVector::with_type(LogicalType::Node);
801 data.push_node_id(NodeId::new(100));
802 data.push_node_id(NodeId::new(200));
803 data.push_node_id(NodeId::new(300));
804
805 let offsets = vec![0, 2, 3];
806 let vec = FactorizedVector::unflat(data, offsets, 2);
807
808 assert_eq!(vec.count_for_parent(0), 2);
809 assert_eq!(vec.count_for_parent(1), 1);
810
811 let flat = vec.flatten(None);
813 assert_eq!(flat.get_node_id(0), Some(NodeId::new(100)));
814 assert_eq!(flat.get_node_id(1), Some(NodeId::new(200)));
815 assert_eq!(flat.get_node_id(2), Some(NodeId::new(300)));
816 }
817
818 #[test]
819 fn test_factorized_state_debug() {
820 let state_flat = FactorizedState::Flat;
821 assert!(format!("{state_flat:?}").contains("Flat"));
822
823 let state_unflat = FactorizedState::Unflat(UnflatMetadata {
824 offsets: vec![0, 1],
825 parent_count: 1,
826 });
827 assert!(format!("{state_unflat:?}").contains("Unflat"));
828 }
829
830 #[test]
831 fn test_unflat_metadata_debug() {
832 let meta = UnflatMetadata {
833 offsets: vec![0, 2, 5],
834 parent_count: 2,
835 };
836 let debug = format!("{meta:?}");
837 assert!(debug.contains("offsets"));
838 assert!(debug.contains("parent_count"));
839 }
840
841 #[test]
842 fn test_factorized_vector_clone() {
843 let mut data = ValueVector::with_type(LogicalType::Int64);
844 data.push_int64(1);
845 data.push_int64(2);
846
847 let vec = FactorizedVector::flat(data);
848 let cloned = vec.clone();
849
850 assert_eq!(vec.physical_len(), cloned.physical_len());
851 assert_eq!(vec.is_flat(), cloned.is_flat());
852 }
853}