typeline_core/record_data/iter/
ref_iter.rs

1use std::{cell::Ref, ops::Range};
2
3use crate::record_data::{
4    field::{
5        CowFieldDataRef, FieldId, FieldManager, FieldRefOffset,
6        FIELD_REF_LOOKUP_ITER_ID,
7    },
8    field_data::{FieldValueHeader, FieldValueType, RunLength},
9    field_value::{FieldReference, SlicedFieldReference},
10    field_value_ref::{
11        FieldValueRef, FieldValueSlice, TypedField, TypedRange,
12        ValidTypedRange,
13    },
14    match_set::MatchSetManager,
15};
16
17use super::{
18    super::field_data_ref::{DestructuredFieldDataRef, FieldDataRef},
19    field_iter::FieldIter,
20    field_iterator::{FieldIterOpts, FieldIterator},
21    field_value_slice_iter::{FieldValueRangeIter, InlineBytesIter},
22};
23
24pub trait ReferenceFieldValueType: FieldValueType + Clone + 'static {
25    fn field_id_offset(&self) -> FieldRefOffset;
26    fn range(&self) -> Option<Range<usize>>;
27}
28impl ReferenceFieldValueType for FieldReference {
29    fn field_id_offset(&self) -> FieldRefOffset {
30        self.field_ref_offset
31    }
32
33    fn range(&self) -> Option<Range<usize>> {
34        None
35    }
36}
37impl ReferenceFieldValueType for SlicedFieldReference {
38    fn field_id_offset(&self) -> FieldRefOffset {
39        self.field_ref_offset
40    }
41
42    fn range(&self) -> Option<Range<usize>> {
43        Some(self.begin..self.end)
44    }
45}
46
47pub struct FieldRefUnpacked<'a, R> {
48    pub reference: R,
49    pub data: FieldValueRef<'a>,
50    pub header: FieldValueHeader,
51}
52
53pub struct DerefIter<'a, R> {
54    refs_iter: FieldValueRangeIter<'a, R>,
55    data_iter: FieldIter<DestructuredFieldDataRef<'a>>,
56    field_refs: Ref<'a, [FieldId]>,
57    last_field_id_offset: FieldRefOffset,
58    data_cow_ref: CowFieldDataRef<'a>,
59    field_mgr: &'a FieldManager,
60}
61
62#[derive(Clone)]
63pub enum AnyDerefIter<'a> {
64    FieldRef(DerefIter<'a, FieldReference>),
65    SlicedFieldRef(DerefIter<'a, SlicedFieldReference>),
66}
67
68#[derive(Clone)]
69pub enum AnyRefSliceIter<'a> {
70    FieldRef(FieldValueRangeIter<'a, FieldReference>),
71    SlicedFieldRef(FieldValueRangeIter<'a, SlicedFieldReference>),
72}
73
74pub struct RefAwareTypedRange<'a> {
75    pub base: ValidTypedRange<'a>,
76    pub refs: Option<AnyRefSliceIter<'a>>,
77    pub field_ref_offset: Option<FieldRefOffset>,
78}
79
80// TODO: probably burn this f***er to the ground and rebuild from the ground up
81// based on pointers, not this overcomposed nonsense containing the same data
82// redundantly three times and still being wildly unsafe at the same time.
83pub struct AutoDerefIter<'a, I> {
84    iter: I,
85    field_refs: Ref<'a, [FieldId]>,
86    // SAFETY: the 'static for is obviously a lie, but we make
87    // sure to never leak any slice of that lifetime from this type.
88    // The actual lifetime is bound by range we receive from `iter`,
89    // which conceptually lives until iter dies or gets called again,
90    // but we cannot express that to the type system.
91    deref_iter: Option<AnyDerefIter<'static>>,
92    field_mgr: &'a FieldManager,
93}
94
95impl<'a, R: ReferenceFieldValueType> FieldRefUnpacked<'a, R> {
96    pub fn apply_ref(&self) -> (FieldValueRef<'a>, RunLength) {
97        (
98            self.reference
99                .range()
100                .map(|r| self.data.subslice(r))
101                .unwrap_or(self.data),
102            self.header.run_length,
103        )
104    }
105}
106
107impl<'a, R: ReferenceFieldValueType> Clone for DerefIter<'a, R> {
108    fn clone(&self) -> Self {
109        Self {
110            refs_iter: self.refs_iter.clone(),
111            field_refs: Ref::clone(&self.field_refs),
112            last_field_id_offset: self.last_field_id_offset,
113            data_iter: self.data_iter.clone(),
114            data_cow_ref: self.data_cow_ref.clone(),
115            field_mgr: self.field_mgr,
116        }
117    }
118}
119
120impl<'a, R: ReferenceFieldValueType> DerefIter<'a, R> {
121    pub fn new(
122        field_refs: Ref<'a, [FieldId]>,
123        refs_iter: FieldValueRangeIter<'a, R>,
124        field_mgr: &'a FieldManager,
125        match_set_mgr: &'_ MatchSetManager,
126        last_field_id_offset: FieldRefOffset,
127        field_pos: usize,
128    ) -> Self {
129        let last_field_id = field_refs[usize::from(last_field_id_offset)];
130        let (data_cow_ref, mut data_iter) = unsafe {
131            Self::get_data_ref_and_iter(
132                field_mgr,
133                match_set_mgr,
134                last_field_id,
135            )
136        };
137        data_iter.move_to_field_pos(field_pos);
138        Self {
139            refs_iter,
140            data_iter,
141            field_refs,
142            last_field_id_offset,
143            data_cow_ref,
144            field_mgr,
145        }
146    }
147    pub fn reset(
148        &mut self,
149        match_set_mgr: &'_ MatchSetManager,
150        refs_iter: FieldValueRangeIter<'a, R>,
151        field_id_offset: FieldRefOffset,
152        field_pos: usize,
153    ) {
154        self.refs_iter = refs_iter;
155        self.move_to_field_pos(match_set_mgr, field_id_offset, field_pos);
156    }
157
158    fn move_to_field(
159        &mut self,
160        msm: &'_ MatchSetManager,
161        field_id_offset: FieldRefOffset,
162    ) {
163        if self.last_field_id_offset == field_id_offset {
164            return;
165        }
166        let prev_field_id =
167            self.field_refs[usize::from(self.last_field_id_offset)];
168        let field_id = self.field_refs[usize::from(field_id_offset)];
169        let (cow_ref_new, data_iter_new) = unsafe {
170            Self::get_data_ref_and_iter(self.field_mgr, msm, field_id)
171        };
172        self.field_mgr.store_iter(
173            prev_field_id,
174            FIELD_REF_LOOKUP_ITER_ID,
175            std::mem::replace(&mut self.data_iter, data_iter_new),
176        );
177        let _ = std::mem::replace(&mut self.data_cow_ref, cow_ref_new);
178        self.last_field_id_offset = field_id_offset;
179    }
180    // SAFETY: caller has to ensure that the cow ref outlives the iter
181    unsafe fn get_data_ref_and_iter(
182        fm: &FieldManager,
183        msm: &MatchSetManager,
184        field_id: FieldId,
185    ) -> (
186        CowFieldDataRef<'static>,
187        FieldIter<DestructuredFieldDataRef<'static>>,
188    ) {
189        let fr = fm.get_cow_field_ref(msm, field_id);
190        let iter =
191            fm.lookup_iter(
192                field_id,
193                unsafe {
194                    std::mem::transmute::<
195                        &CowFieldDataRef<'_>,
196                        &CowFieldDataRef<'a>,
197                    >(&fr)
198                },
199                FIELD_REF_LOOKUP_ITER_ID,
200            );
201        unsafe { std::mem::transmute((fr, iter)) }
202    }
203    pub fn move_to_field_keep_pos(
204        &mut self,
205        match_set_mgr: &'_ MatchSetManager,
206        field_id_offset: FieldRefOffset,
207    ) {
208        if self.last_field_id_offset == field_id_offset {
209            return;
210        }
211        let pos = self.data_iter.get_next_field_pos();
212        self.move_to_field(match_set_mgr, field_id_offset);
213        self.data_iter.move_to_field_pos(pos);
214    }
215    pub fn move_to_field_pos(
216        &mut self,
217        match_set_mgr: &'_ MatchSetManager,
218        field_id_offset: FieldRefOffset,
219        field_pos: usize,
220    ) {
221        if self.last_field_id_offset == field_id_offset {
222            self.move_to_field(match_set_mgr, field_id_offset);
223        }
224        self.data_iter.move_to_field_pos(field_pos);
225    }
226    pub fn set_refs_iter(&mut self, refs_iter: FieldValueRangeIter<'a, R>) {
227        self.refs_iter = refs_iter;
228    }
229    pub fn typed_field_fwd(
230        &mut self,
231        match_set_mgr: &'_ MatchSetManager,
232        limit: usize,
233    ) -> Option<FieldRefUnpacked<R>> {
234        let (field_ref, rl) = self.refs_iter.peek()?;
235        self.move_to_field_keep_pos(
236            match_set_mgr,
237            field_ref.field_id_offset(),
238        );
239        let tf = self
240            .data_iter
241            .typed_field_fwd((rl as usize).min(limit))
242            .unwrap();
243        self.refs_iter.next_n_fields(tf.header.run_length as usize);
244        Some(FieldRefUnpacked {
245            reference: field_ref.clone(),
246            data: tf.value,
247            header: tf.header,
248        })
249    }
250    pub fn typed_range_fwd(
251        &mut self,
252        match_set_mgr: &'_ MatchSetManager,
253        mut limit: usize,
254        opts: FieldIterOpts,
255    ) -> Option<(ValidTypedRange, FieldValueRangeIter<R>)> {
256        let (mut field_ref, mut field_rl) = self.refs_iter.peek()?;
257        let refs_headers_start = self.refs_iter.header_ptr();
258        let refs_data_start = self.refs_iter.data_ptr();
259        let refs_oversize_start = self.refs_iter.field_run_length_bwd();
260        let ref_header_idx = self.refs_iter.headers_remaining();
261        let field_id_offset = field_ref.field_id_offset();
262        self.move_to_field_keep_pos(match_set_mgr, field_id_offset);
263        let fmt = self.data_iter.get_next_field_format();
264
265        let data_start = self.data_iter.get_next_field_data();
266        let oversize_start = self.data_iter.field_run_length_bwd();
267        let header_idx = self.data_iter.get_next_header_index();
268
269        let mut refs_oversize_end = 0;
270        let mut field_count = 0;
271        loop {
272            let data_stride = self.data_iter.next_n_fields_with_fmt(
273                (field_rl as usize).min(limit),
274                [fmt.repr],
275                opts.with_invert_kinds_check(false),
276            );
277            field_count += data_stride;
278            limit -= data_stride;
279            if data_stride != field_rl as usize {
280                self.refs_iter.next_n_fields(data_stride);
281                refs_oversize_end = field_rl - data_stride as RunLength;
282                break;
283            }
284            self.refs_iter.next();
285
286            if let Some(v) = self.refs_iter.peek() {
287                (field_ref, field_rl) = v;
288                if field_ref.field_id_offset() != field_id_offset {
289                    break;
290                }
291            } else {
292                break;
293            }
294        }
295        let mut header_count =
296            self.data_iter.get_next_header_index() - header_idx;
297        if self.data_iter.field_run_length_bwd() != 0 {
298            header_count += 1;
299        }
300        let mut refs_header_count =
301            ref_header_idx - self.refs_iter.headers_remaining();
302        let mut refs_data_len = unsafe {
303            self.refs_iter.data_ptr().offset_from(refs_data_start) as usize
304        };
305        if self.refs_iter.field_run_length_bwd() != 0 {
306            refs_header_count += 1;
307            refs_data_len += 1;
308        }
309        unsafe {
310            let (h_s1, h_s2) =
311                self.data_iter.field_data_ref().headers().as_slices();
312            let headers = if h_s1.len() > header_idx {
313                &h_s1[header_idx..header_idx + header_count]
314            } else {
315                &h_s2[header_idx - h_s1.len()
316                    ..header_idx - h_s1.len() + header_count]
317            };
318            Some((
319                ValidTypedRange::new_unchecked(TypedRange {
320                    headers,
321                    data: FieldValueSlice::new(
322                        self.data_iter.field_data_ref(),
323                        fmt,
324                        data_start,
325                        // HACK // BUG // SAFETY:
326                        // this is unsound. we might have skipped over
327                        // dead fields with another type
328                        self.data_iter.get_prev_field_data_end(),
329                        field_count,
330                    ),
331                    field_count,
332                    first_header_run_length_oversize: oversize_start,
333                    last_header_run_length_oversize: self
334                        .data_iter
335                        .field_run_length_fwd_oversize(),
336                }),
337                FieldValueRangeIter::new(
338                    std::slice::from_raw_parts(refs_data_start, refs_data_len),
339                    std::slice::from_raw_parts(
340                        refs_headers_start,
341                        refs_header_count,
342                    ),
343                    refs_oversize_start,
344                    refs_oversize_end,
345                ),
346            ))
347        }
348    }
349    pub fn next_n_fields(
350        &mut self,
351        limit: usize,
352        allow_ring_wrap: bool,
353    ) -> usize {
354        let ref_skip = self.refs_iter.next_n_fields(limit);
355        if self.refs_iter.peek().map(|(v, _rl)| v.field_id_offset())
356            == Some(self.last_field_id_offset)
357        {
358            let data_skip =
359                self.data_iter.next_n_fields(ref_skip, allow_ring_wrap);
360            assert!(data_skip == ref_skip);
361        }
362        ref_skip
363    }
364    pub fn get_next_field_pos(&self) -> usize {
365        self.data_iter.get_next_field_pos()
366    }
367}
368
369// manual because  `Ref<'a, [FieldId]>` isn't `Clone`
370impl<'a, I: Clone> Clone for AutoDerefIter<'a, I> {
371    fn clone(&self) -> Self {
372        Self {
373            iter: self.iter.clone(),
374            field_refs: Ref::clone(&self.field_refs),
375            deref_iter: self.deref_iter.clone(),
376            field_mgr: self.field_mgr,
377        }
378    }
379}
380
381impl<'a> RefAwareTypedRange<'a> {
382    pub fn without_refs(range: ValidTypedRange<'a>) -> Self {
383        Self {
384            base: range,
385            refs: None,
386            field_ref_offset: None,
387        }
388    }
389}
390
391impl<'a, I: FieldIterator> AutoDerefIter<'a, I> {
392    pub fn new(
393        field_mgr: &'a FieldManager,
394        iter_field_id: FieldId,
395        iter: I,
396    ) -> Self {
397        let iter_field_id = field_mgr.dealias_field_id(iter_field_id);
398        Self::from_field_refs(
399            field_mgr,
400            Ref::map(field_mgr.fields[iter_field_id].borrow(), |f| {
401                &*f.field_refs
402            }),
403            iter,
404        )
405    }
406    pub fn from_field_refs(
407        field_mgr: &'a FieldManager,
408        field_refs: Ref<'a, [FieldId]>,
409        iter: I,
410    ) -> Self {
411        Self {
412            iter,
413            field_refs,
414            deref_iter: None,
415            field_mgr,
416        }
417    }
418    pub fn get_next_field_pos(&mut self) -> usize {
419        match &self.deref_iter {
420            Some(AnyDerefIter::SlicedFieldRef(iter)) => {
421                iter.get_next_field_pos()
422            }
423            Some(AnyDerefIter::FieldRef(iter)) => iter.get_next_field_pos(),
424            None => self.iter.get_next_field_pos(),
425        }
426    }
427    pub fn move_to_field_pos(&mut self, field_pos: usize) {
428        self.deref_iter = None;
429        self.iter.move_to_field_pos(field_pos);
430    }
431    fn setup_for_field_refs_range(
432        &mut self,
433        match_set_mgr: &MatchSetManager,
434        field_pos_before: usize,
435        range: &ValidTypedRange<'static>,
436    ) -> bool {
437        if let FieldValueSlice::FieldReference(refs) = range.data {
438            let refs_iter = FieldValueRangeIter::from_valid_range(range, refs);
439            let field_id_offset = refs_iter.peek().unwrap().0.field_ref_offset;
440            if let Some(AnyDerefIter::FieldRef(ri)) = &mut self.deref_iter {
441                ri.reset(
442                    match_set_mgr,
443                    refs_iter,
444                    field_id_offset,
445                    field_pos_before,
446                );
447            } else {
448                self.deref_iter = Some(AnyDerefIter::FieldRef(unsafe {
449                    std::mem::transmute::<
450                        DerefIter<'a, FieldReference>,
451                        DerefIter<'static, FieldReference>,
452                    >(DerefIter::new(
453                        Ref::clone(&self.field_refs),
454                        refs_iter,
455                        self.field_mgr,
456                        match_set_mgr,
457                        field_id_offset,
458                        field_pos_before,
459                    ))
460                }));
461            }
462            return true;
463        }
464        if let FieldValueSlice::SlicedFieldReference(refs) = range.data {
465            let refs_iter = FieldValueRangeIter::from_valid_range(range, refs);
466            let field_id_offset = refs_iter.peek().unwrap().0.field_ref_offset;
467            if let Some(AnyDerefIter::SlicedFieldRef(ri)) =
468                &mut self.deref_iter
469            {
470                ri.reset(
471                    match_set_mgr,
472                    refs_iter,
473                    field_id_offset,
474                    field_pos_before,
475                );
476            } else {
477                self.deref_iter = Some(AnyDerefIter::SlicedFieldRef(unsafe {
478                    std::mem::transmute::<
479                        DerefIter<'a, SlicedFieldReference>,
480                        DerefIter<'static, SlicedFieldReference>,
481                    >(DerefIter::new(
482                        Ref::clone(&self.field_refs),
483                        refs_iter,
484                        self.field_mgr,
485                        match_set_mgr,
486                        field_id_offset,
487                        field_pos_before,
488                    ))
489                }));
490            }
491            return true;
492        }
493        false
494    }
495    pub fn typed_range_fwd(
496        &mut self,
497        match_set_mgr: &'_ MatchSetManager,
498        limit: usize,
499        opts: FieldIterOpts,
500    ) -> Option<RefAwareTypedRange> {
501        loop {
502            if let Some(ref_iter) = &mut self.deref_iter {
503                {
504                    // HACK
505                    // workaround borrow checker limitation, thank you polonius
506                    // https://rust-lang.github.io/rfcs/2094-nll.html#problem-case-3-conditional-control-flow-across-functions
507                    // https://github.com/rust-lang/rust/issues/54663
508                    let ref_iter = unsafe {
509                        std::mem::transmute::<
510                            &'_ mut AnyDerefIter,
511                            &'static mut AnyDerefIter,
512                        >(ref_iter)
513                    };
514                    // SAFETY: must be very careful with `ref_iter` here, as we
515                    // messed with it's lifetime
516                    match ref_iter {
517                        AnyDerefIter::FieldRef(iter) => {
518                            if let Some((range, refs)) = iter.typed_range_fwd(
519                                match_set_mgr,
520                                limit,
521                                opts,
522                            ) {
523                                let (fr, _) = refs.peek().unwrap();
524                                // SAFETY:
525                                // these returns are why the borrow checker is
526                                // unhappy with us. they force the borrow
527                                // of ri to be for the scope of the entire
528                                // function, despite the borrow effectively
529                                // ending with this return
530                                return Some(RefAwareTypedRange {
531                                    base: range,
532                                    refs: Some(AnyRefSliceIter::FieldRef(
533                                        refs,
534                                    )),
535                                    field_ref_offset: Some(
536                                        fr.field_ref_offset,
537                                    ),
538                                });
539                            }
540                        }
541                        AnyDerefIter::SlicedFieldRef(iter) => {
542                            if let Some((range, refs)) = iter.typed_range_fwd(
543                                match_set_mgr,
544                                limit,
545                                opts,
546                            ) {
547                                let (fr, _) = refs.peek().unwrap();
548                                // SAFETY: see FieldRef branch, same thing
549                                return Some(RefAwareTypedRange {
550                                    base: range,
551                                    refs: Some(
552                                        AnyRefSliceIter::SlicedFieldRef(refs),
553                                    ),
554                                    field_ref_offset: Some(
555                                        fr.field_ref_offset,
556                                    ),
557                                });
558                            }
559                        }
560                    }
561                }
562                self.deref_iter = None;
563            }
564
565            let field_pos = self.iter.get_next_field_pos();
566            if let Some(range) = self.iter.typed_range_fwd(limit, opts) {
567                // May god forgive me for I have sinned.
568                // SAFETY: we use this range to init our
569                // deref iter, who has to lie about his lifetime
570                // as explained in the comment on this struct.
571                // So we have to lie again here.
572                let range = unsafe {
573                    std::mem::transmute::<
574                        ValidTypedRange<'_>,
575                        ValidTypedRange<'static>,
576                    >(range)
577                };
578                if self.setup_for_field_refs_range(
579                    match_set_mgr,
580                    field_pos,
581                    &range,
582                ) {
583                    continue;
584                }
585                return Some(RefAwareTypedRange {
586                    base: range,
587                    refs: None,
588                    field_ref_offset: None,
589                });
590            }
591            return None;
592        }
593    }
594    pub fn typed_field_fwd(
595        &mut self,
596        match_set_mgr: &'_ MatchSetManager,
597        limit: usize,
598    ) -> Option<(FieldValueRef, RunLength, Option<FieldRefOffset>)> {
599        loop {
600            if let Some(ref_iter) = &mut self.deref_iter {
601                // SAFETY: see `typed_range_fwd` for why we need this nonsense
602                let ref_iter = unsafe {
603                    std::mem::transmute::<
604                        &'_ mut AnyDerefIter,
605                        &'static mut AnyDerefIter,
606                    >(ref_iter)
607                };
608                match ref_iter {
609                    AnyDerefIter::FieldRef(iter) => {
610                        if let Some(fru) =
611                            iter.typed_field_fwd(match_set_mgr, limit)
612                        {
613                            let (v, rl) = fru.apply_ref();
614                            return Some((
615                                v,
616                                rl,
617                                Some(fru.reference.field_ref_offset),
618                            ));
619                        }
620                    }
621                    AnyDerefIter::SlicedFieldRef(iter) => {
622                        if let Some(fru) =
623                            iter.typed_field_fwd(match_set_mgr, limit)
624                        {
625                            let (v, rl) = fru.apply_ref();
626                            return Some((
627                                v,
628                                rl,
629                                Some(fru.reference.field_ref_offset),
630                            ));
631                        }
632                    }
633                };
634                self.deref_iter = None;
635            }
636            let field_pos = self.iter.get_next_field_pos();
637            if let Some(field) = self.iter.typed_field_fwd(limit) {
638                if !matches!(
639                    field.value,
640                    FieldValueRef::FieldReference(_)
641                        | FieldValueRef::SlicedFieldReference(_)
642                ) {
643                    // HACK //SAFETY: polonius issue. see `typed_range_fwd`
644                    let field = unsafe {
645                        std::mem::transmute::<TypedField<'_>, TypedField<'_>>(
646                            field,
647                        )
648                    };
649                    return Some((field.value, field.header.run_length, None));
650                }
651                self.iter.typed_field_bwd(limit);
652                let range = self
653                    .iter
654                    .typed_range_fwd(limit, FieldIterOpts::default())
655                    .unwrap();
656
657                // SAFETY: see `typed_range_fwd` for why we need this
658                // nonsense
659                let range = unsafe {
660                    std::mem::transmute::<
661                        ValidTypedRange<'_>,
662                        ValidTypedRange<'static>,
663                    >(range)
664                };
665                self.setup_for_field_refs_range(
666                    match_set_mgr,
667                    field_pos,
668                    &range,
669                )
670                .then_some(())
671                .unwrap();
672                continue;
673            }
674            return None;
675        }
676    }
677    pub fn next_range(
678        &mut self,
679        msm: &'_ MatchSetManager,
680    ) -> Option<RefAwareTypedRange> {
681        self.typed_range_fwd(msm, usize::MAX, FieldIterOpts::default())
682    }
683    // using `next_range` and nesting RefAwareTypedSliceIters is significantly
684    // faster. for example, `tl seqn=1G sum p` gets a 5x speedup
685    pub fn next_value(
686        &mut self,
687        msm: &'_ MatchSetManager,
688        limit: usize,
689    ) -> Option<(FieldValueRef, RunLength, Option<FieldRefOffset>)> {
690        self.typed_field_fwd(msm, limit)
691    }
692    pub fn next_n_fields(&mut self, mut limit: usize) -> usize {
693        let mut ri_count = 0;
694        if let Some(ri) = &mut self.deref_iter {
695            ri_count = match ri {
696                AnyDerefIter::FieldRef(iter) => {
697                    iter.next_n_fields(limit, true)
698                }
699                AnyDerefIter::SlicedFieldRef(iter) => {
700                    iter.next_n_fields(limit, true)
701                }
702            };
703            if ri_count == limit {
704                return limit;
705            }
706            limit -= ri_count;
707        }
708        let base_count = self.iter.next_n_fields(limit, true);
709        if base_count > 0 {
710            self.deref_iter = None;
711        }
712        ri_count + base_count
713    }
714
715    pub fn into_base_iter(self) -> I {
716        self.iter
717    }
718    pub fn clone_base(self) -> I {
719        self.iter
720    }
721    pub fn is_next_valid(&self) -> bool {
722        self.iter.is_next_valid()
723    }
724}
725
726#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
727pub struct RangeOffsets {
728    pub from_begin: usize,
729    pub from_end: usize,
730}
731
732impl<'a, R: FieldDataRef, I: FieldIterator<FieldDataRefType = R>>
733    From<AutoDerefIter<'a, I>> for FieldIter<R>
734{
735    fn from(value: AutoDerefIter<'a, I>) -> Self {
736        value.iter.into_base_iter()
737    }
738}
739
740pub struct RefAwareInlineBytesIter<'a> {
741    iter: InlineBytesIter<'a>,
742    refs: Option<AnyRefSliceIter<'a>>,
743}
744
745impl<'a> RefAwareInlineBytesIter<'a> {
746    pub fn new(
747        data: &'a [u8],
748        headers: &'a [FieldValueHeader],
749        first_oversize: RunLength,
750        last_oversize: RunLength,
751        refs: Option<AnyRefSliceIter<'a>>,
752    ) -> Self {
753        Self {
754            iter: InlineBytesIter::new(
755                data,
756                headers,
757                first_oversize,
758                last_oversize,
759            ),
760            refs,
761        }
762    }
763    pub fn from_range(range: &'a RefAwareTypedRange, data: &'a [u8]) -> Self {
764        Self {
765            iter: InlineBytesIter::from_range(&range.base, data),
766            refs: range.refs.clone(),
767        }
768    }
769}
770
771impl<'a> Iterator for RefAwareInlineBytesIter<'a> {
772    type Item = (&'a [u8], RunLength, RangeOffsets);
773
774    // returns a triple of (data, run length, offset)
775    // the offset is the position of data in the original data slice
776    // this is needed if we want to create a field reference into the
777    // original data, because it has to include that offset
778    #[inline(always)]
779    fn next(&mut self) -> Option<Self::Item> {
780        match &mut self.refs {
781            Some(AnyRefSliceIter::FieldRef(refs_iter)) => {
782                let (_fr, rl_ref) = refs_iter.peek()?;
783                let (data, rl_data) = self.iter.peek()?;
784                let run_len = rl_ref.min(rl_data);
785                self.iter.next_n_fields(run_len as usize);
786                refs_iter.next_n_fields(run_len as usize);
787                Some((data, run_len, RangeOffsets::default()))
788            }
789            Some(AnyRefSliceIter::SlicedFieldRef(refs_iter)) => {
790                let (fr, rl_ref) = refs_iter.peek()?;
791                let (data, rl_data) = self.iter.peek()?;
792                let run_len = rl_ref.min(rl_data);
793                self.iter.next_n_fields(run_len as usize);
794                refs_iter.next_n_fields(run_len as usize);
795                Some((
796                    &data[fr.begin..fr.end],
797                    run_len,
798                    RangeOffsets {
799                        from_begin: fr.begin,
800                        from_end: data.len() - fr.end,
801                    },
802                ))
803            }
804            None => {
805                let (data, rl) = self.iter.next()?;
806                Some((data, rl, RangeOffsets::default()))
807            }
808        }
809    }
810}
811
812pub struct RefAwareInlineTextIter<'a> {
813    iter: RefAwareInlineBytesIter<'a>,
814}
815
816impl<'a> RefAwareInlineTextIter<'a> {
817    pub fn new(
818        data: &'a str,
819        headers: &'a [FieldValueHeader],
820        first_oversize: RunLength,
821        last_oversize: RunLength,
822        refs: Option<AnyRefSliceIter<'a>>,
823    ) -> Self {
824        Self {
825            iter: RefAwareInlineBytesIter::new(
826                data.as_bytes(),
827                headers,
828                first_oversize,
829                last_oversize,
830                refs,
831            ),
832        }
833    }
834    pub fn from_range(range: &'a RefAwareTypedRange, data: &'a str) -> Self {
835        Self {
836            iter: RefAwareInlineBytesIter::from_range(range, data.as_bytes()),
837        }
838    }
839}
840
841impl<'a> Iterator for RefAwareInlineTextIter<'a> {
842    type Item = (&'a str, RunLength, RangeOffsets);
843    #[inline(always)]
844    fn next(&mut self) -> Option<Self::Item> {
845        let (data, rl, offsets) = self.iter.next()?;
846        Some((unsafe { std::str::from_utf8_unchecked(data) }, rl, offsets))
847    }
848}
849
850pub struct RefAwareBytesBufferIter<'a> {
851    iter: FieldValueRangeIter<'a, Vec<u8>>,
852    refs: Option<AnyRefSliceIter<'a>>,
853}
854
855impl<'a> RefAwareBytesBufferIter<'a> {
856    pub unsafe fn new(
857        values: &'a [Vec<u8>],
858        headers: &'a [FieldValueHeader],
859        first_oversize: RunLength,
860        last_oversize: RunLength,
861        refs: Option<AnyRefSliceIter<'a>>,
862    ) -> Self {
863        Self {
864            iter: unsafe {
865                FieldValueRangeIter::new(
866                    values,
867                    headers,
868                    first_oversize,
869                    last_oversize,
870                )
871            },
872            refs,
873        }
874    }
875    pub fn from_range(
876        range: &'a RefAwareTypedRange,
877        values: &'a [Vec<u8>],
878    ) -> Self {
879        Self {
880            iter: FieldValueRangeIter::from_valid_range(&range.base, values),
881            refs: range.refs.clone(),
882        }
883    }
884}
885
886impl<'a> Iterator for RefAwareBytesBufferIter<'a> {
887    type Item = (&'a [u8], RunLength, RangeOffsets);
888    #[inline(always)]
889    fn next(&mut self) -> Option<Self::Item> {
890        match &mut self.refs {
891            Some(AnyRefSliceIter::FieldRef(refs_iter)) => {
892                let (_fr, rl_ref) = refs_iter.peek()?;
893                let (data, rl_data) = self.iter.peek()?;
894                let run_len = rl_ref.min(rl_data);
895                self.iter.next_n_fields(run_len as usize);
896                refs_iter.next_n_fields(run_len as usize);
897                Some((data, run_len, RangeOffsets::default()))
898            }
899            Some(AnyRefSliceIter::SlicedFieldRef(refs_iter)) => {
900                let (fr, rl_ref) = refs_iter.peek()?;
901                let (data, rl_data) = self.iter.peek()?;
902                let run_len = rl_ref.min(rl_data);
903                self.iter.next_n_fields(run_len as usize);
904                refs_iter.next_n_fields(run_len as usize);
905                Some((
906                    &data[fr.begin..fr.end],
907                    run_len,
908                    RangeOffsets {
909                        from_begin: fr.begin,
910                        from_end: data.len() - fr.end,
911                    },
912                ))
913            }
914            None => {
915                let (data, rl) = self.iter.next()?;
916                Some((data, rl, RangeOffsets::default()))
917            }
918        }
919    }
920}
921
922pub struct RefAwareTextBufferIter<'a> {
923    iter: FieldValueRangeIter<'a, String>,
924    refs: Option<AnyRefSliceIter<'a>>,
925}
926
927impl<'a> RefAwareTextBufferIter<'a> {
928    pub unsafe fn new(
929        values: &'a [String],
930        headers: &'a [FieldValueHeader],
931        first_oversize: RunLength,
932        last_oversize: RunLength,
933        refs: Option<AnyRefSliceIter<'a>>,
934    ) -> Self {
935        Self {
936            iter: unsafe {
937                FieldValueRangeIter::new(
938                    values,
939                    headers,
940                    first_oversize,
941                    last_oversize,
942                )
943            },
944            refs,
945        }
946    }
947    pub fn from_range(
948        range: &'a RefAwareTypedRange,
949        values: &'a [String],
950    ) -> Self {
951        Self {
952            iter: FieldValueRangeIter::from_valid_range(&range.base, values),
953            refs: range.refs.clone(),
954        }
955    }
956}
957
958impl<'a> Iterator for RefAwareTextBufferIter<'a> {
959    type Item = (&'a str, RunLength, RangeOffsets);
960    #[inline(always)]
961    fn next(&mut self) -> Option<Self::Item> {
962        match &mut self.refs {
963            Some(AnyRefSliceIter::FieldRef(refs_iter)) => {
964                let (_fr, rl_ref) = refs_iter.peek()?;
965                let (data, rl_data) = self.iter.peek()?;
966                let run_len = rl_ref.min(rl_data);
967                self.iter.next_n_fields(run_len as usize);
968                refs_iter.next_n_fields(run_len as usize);
969                Some((data, run_len, RangeOffsets::default()))
970            }
971            Some(AnyRefSliceIter::SlicedFieldRef(refs_iter)) => {
972                let (fr, rl_ref) = refs_iter.peek()?;
973                let (data, rl_data) = self.iter.peek()?;
974                let run_len = rl_ref.min(rl_data);
975                self.iter.next_n_fields(run_len as usize);
976                refs_iter.next_n_fields(run_len as usize);
977                Some((&data[fr.begin..fr.end], run_len, {
978                    RangeOffsets {
979                        from_begin: fr.begin,
980                        from_end: data.len() - fr.end,
981                    }
982                }))
983            }
984            None => {
985                let (data, rl) = self.iter.next()?;
986                Some((data, rl, RangeOffsets::default()))
987            }
988        }
989    }
990}
991
992// TODO: //PERF: I'm pretty sure this is completely pointless now.
993// what is the ref iter even doing ?
994pub struct RefAwareFieldValueRangeIter<'a, T> {
995    iter: FieldValueRangeIter<'a, T>,
996    refs: Option<FieldValueRangeIter<'a, FieldReference>>,
997}
998
999impl<'a, T: FieldValueType + 'static> RefAwareFieldValueRangeIter<'a, T> {
1000    fn unpack_refs_iter(
1001        refs: Option<AnyRefSliceIter<'a>>,
1002    ) -> Option<FieldValueRangeIter<'a, FieldReference>> {
1003        match refs {
1004            Some(AnyRefSliceIter::FieldRef(iter)) => Some(iter),
1005            Some(AnyRefSliceIter::SlicedFieldRef(_)) => {
1006                panic!(
1007                    "sliced field references to `{}` are not allowed",
1008                    T::REPR.to_str()
1009                )
1010            }
1011            None => None,
1012        }
1013    }
1014    pub unsafe fn new(
1015        values: &'a [T],
1016        headers: &'a [FieldValueHeader],
1017        first_oversize: RunLength,
1018        last_oversize: RunLength,
1019        refs: Option<AnyRefSliceIter<'a>>,
1020    ) -> Self {
1021        Self {
1022            iter: unsafe {
1023                FieldValueRangeIter::new(
1024                    values,
1025                    headers,
1026                    first_oversize,
1027                    last_oversize,
1028                )
1029            },
1030            refs: Self::unpack_refs_iter(refs),
1031        }
1032    }
1033    pub fn from_range(range: &'a RefAwareTypedRange, values: &'a [T]) -> Self {
1034        Self {
1035            iter: FieldValueRangeIter::from_valid_range(&range.base, values),
1036            refs: Self::unpack_refs_iter(range.refs.clone()),
1037        }
1038    }
1039}
1040
1041impl<'a, T: FieldValueType + 'static> Iterator
1042    for RefAwareFieldValueRangeIter<'a, T>
1043{
1044    type Item = (&'a T, RunLength);
1045    #[inline(always)]
1046    fn next(&mut self) -> Option<Self::Item> {
1047        if let Some(ref mut refs_iter) = self.refs {
1048            let (_fr, rl_ref) = refs_iter.peek()?;
1049            let (data, rl_data) = self.iter.peek()?;
1050            let run_len = rl_ref.min(rl_data);
1051            self.iter.next_n_fields(run_len as usize);
1052            refs_iter.next_n_fields(run_len as usize);
1053            Some((data, run_len))
1054        } else {
1055            let (data, rl) = self.iter.next()?;
1056            Some((data, rl))
1057        }
1058    }
1059}
1060
1061pub struct RefAwareUnfoldRunLength<I, T> {
1062    iter: I,
1063    last: Option<T>,
1064    remaining_run_len: RunLength,
1065}
1066
1067impl<I: Iterator<Item = (T, RunLength, RangeOffsets)>, T: Clone>
1068    RefAwareUnfoldRunLength<I, T>
1069{
1070    pub fn new(iter: I) -> Self {
1071        Self {
1072            iter,
1073            last: None,
1074            remaining_run_len: 0,
1075        }
1076    }
1077}
1078
1079pub trait RefAwareUnfoldIterRunLength<T>: Sized {
1080    fn unfold_rl(self) -> RefAwareUnfoldRunLength<Self, T>;
1081}
1082
1083impl<T: Clone, I: Iterator<Item = (T, RunLength, RangeOffsets)>>
1084    RefAwareUnfoldIterRunLength<T> for I
1085{
1086    fn unfold_rl(self) -> RefAwareUnfoldRunLength<Self, T> {
1087        RefAwareUnfoldRunLength::new(self)
1088    }
1089}
1090
1091impl<I: Iterator<Item = (T, RunLength, RangeOffsets)>, T: Clone> Iterator
1092    for RefAwareUnfoldRunLength<I, T>
1093{
1094    type Item = T;
1095    #[inline(always)]
1096    fn next(&mut self) -> Option<Self::Item> {
1097        if self.remaining_run_len > 0 {
1098            self.remaining_run_len -= 1;
1099            return self.last.clone();
1100        } else if let Some((v, rl, _offset)) = self.iter.next() {
1101            self.remaining_run_len = rl - 1;
1102            self.last = Some(v);
1103        } else {
1104            self.last = None;
1105        }
1106        self.last.clone()
1107    }
1108}
1109
1110#[cfg(test)]
1111mod ref_iter_tests {
1112    use super::{
1113        super::ref_iter::{AutoDerefIter, RefAwareInlineTextIter},
1114        RangeOffsets,
1115    };
1116    use crate::record_data::{
1117        action_buffer::ActorRef,
1118        field::{FieldManager, FieldRefOffset},
1119        field_data::{
1120            field_value_flags, FieldData, FieldValueFormat, FieldValueHeader,
1121            FieldValueRepr, RunLength,
1122        },
1123        field_value::SlicedFieldReference,
1124        field_value_ref::FieldValueSlice,
1125        iter::{field_iter::FieldIter, field_iterator::FieldIterOpts},
1126        match_set::MatchSetManager,
1127        push_interface::PushInterface,
1128        scope_manager::ScopeManager,
1129    };
1130
1131    #[track_caller]
1132    fn compare_iter_output(
1133        fd: FieldData,
1134        fd_refs: FieldData,
1135        expected: &[(&'static str, RunLength, RangeOffsets)],
1136    ) {
1137        let mut match_set_mgr = MatchSetManager::default();
1138        let mut field_mgr = FieldManager::default();
1139        let mut scope_mgr = ScopeManager::default();
1140        let scope_id = scope_mgr.add_scope(None);
1141        let ms_id = match_set_mgr.add_match_set(
1142            &mut field_mgr,
1143            &mut scope_mgr,
1144            scope_id,
1145        );
1146
1147        let field_id = field_mgr.add_field_with_data(
1148            &match_set_mgr,
1149            ms_id,
1150            ActorRef::default(),
1151            fd,
1152        );
1153        let refs_field_id = field_mgr.add_field_with_data(
1154            &match_set_mgr,
1155            ms_id,
1156            ActorRef::default(),
1157            fd_refs,
1158        );
1159        field_mgr.register_field_reference(refs_field_id, field_id);
1160
1161        {
1162            let fr = field_mgr.get_cow_field_ref_raw(refs_field_id);
1163            let iter = FieldIter::from_start(fr.destructured_field_ref());
1164            let mut ref_iter =
1165                AutoDerefIter::new(&field_mgr, refs_field_id, iter);
1166            let range = ref_iter
1167                .typed_range_fwd(
1168                    &match_set_mgr,
1169                    usize::MAX,
1170                    FieldIterOpts::default(),
1171                )
1172                .unwrap();
1173            let iter = match range.base.data {
1174                FieldValueSlice::TextInline(v) => {
1175                    RefAwareInlineTextIter::from_range(&range, v)
1176                }
1177                _ => panic!("wrong data type"),
1178            };
1179            assert_eq!(iter.collect::<Vec<_>>(), expected);
1180        }
1181
1182        field_mgr.drop_field_refcount(field_id, &mut match_set_mgr);
1183        field_mgr.drop_field_refcount(refs_field_id, &mut match_set_mgr);
1184    }
1185    fn compare_iter_output_parallel_ref(
1186        mut fd: FieldData,
1187        expected: &[(&'static str, RunLength)],
1188    ) {
1189        let mut fd_refs = FieldData::default();
1190        for h in &mut fd.headers {
1191            if h.same_value_as_previous() {
1192                fd_refs.headers.push_back(FieldValueHeader {
1193                    fmt: FieldValueFormat {
1194                        repr: FieldValueRepr::SlicedFieldReference,
1195                        flags: h.flags
1196                            & (field_value_flags::DELETED
1197                                | field_value_flags::SHARED_VALUE
1198                                | field_value_flags::SAME_VALUE_AS_PREVIOUS),
1199                        size: std::mem::size_of::<SlicedFieldReference>()
1200                            as u16,
1201                    },
1202                    run_length: h.run_length,
1203                });
1204            } else {
1205                push_ref(
1206                    &mut fd_refs,
1207                    0,
1208                    h.size as usize,
1209                    h.run_length as usize,
1210                );
1211                let h_ref = fd_refs.headers.back_mut().unwrap();
1212                h_ref.flags |= h.flags
1213                    & (field_value_flags::DELETED
1214                        | field_value_flags::SHARED_VALUE);
1215            }
1216        }
1217
1218        compare_iter_output(
1219            fd,
1220            fd_refs,
1221            &expected
1222                .iter()
1223                .map(|(v, rl)| (*v, *rl, RangeOffsets::default()))
1224                .collect::<Vec<_>>(),
1225        );
1226    }
1227    fn push_ref(fd: &mut FieldData, begin: usize, end: usize, rl: usize) {
1228        fd.push_sliced_field_reference(
1229            SlicedFieldReference {
1230                field_ref_offset: FieldRefOffset::from(0u8),
1231                begin,
1232                end,
1233            },
1234            rl,
1235            false,
1236            false,
1237        );
1238    }
1239
1240    #[test]
1241    fn simple() {
1242        let mut fd = FieldData::default();
1243        fd.push_str("a", 1, false, false);
1244        fd.push_str("bb", 2, false, false);
1245        fd.push_str("ccc", 3, false, false);
1246        compare_iter_output_parallel_ref(
1247            fd,
1248            &[("a", 1), ("bb", 2), ("ccc", 3)],
1249        );
1250    }
1251
1252    #[test]
1253    fn shared_ref() {
1254        let mut fd = FieldData::default();
1255        fd.push_str("aaa", 1, false, false);
1256        fd.push_str("bbbb", 2, false, false);
1257        fd.push_str("ccccc", 3, false, false);
1258        let mut fdr = FieldData::default();
1259        push_ref(&mut fdr, 1, 3, 6);
1260        compare_iter_output(
1261            fd,
1262            fdr,
1263            &[
1264                (
1265                    "aa",
1266                    1,
1267                    RangeOffsets {
1268                        from_begin: 1,
1269                        from_end: 0,
1270                    },
1271                ),
1272                (
1273                    "bb",
1274                    2,
1275                    RangeOffsets {
1276                        from_begin: 1,
1277                        from_end: 1,
1278                    },
1279                ),
1280                (
1281                    "cc",
1282                    3,
1283                    RangeOffsets {
1284                        from_begin: 1,
1285                        from_end: 2,
1286                    },
1287                ),
1288            ],
1289        );
1290    }
1291
1292    #[test]
1293    fn with_deletion() {
1294        let mut fd = FieldData::default();
1295        fd.push_str("a", 1, false, false);
1296        fd.push_str("bb", 2, false, false);
1297        fd.push_str("ccc", 3, false, false);
1298
1299        let mut fdr = FieldData::default();
1300        push_ref(&mut fdr, 0, 1, 1);
1301        push_ref(&mut fdr, 0, 2, 2);
1302        push_ref(&mut fdr, 0, 3, 3);
1303
1304        fd.headers[1].set_deleted(true);
1305        fd.field_count -= 2;
1306        fdr.headers[1].set_deleted(true);
1307        fdr.field_count -= 2;
1308
1309        compare_iter_output(
1310            fd,
1311            fdr,
1312            &[
1313                ("a", 1, RangeOffsets::default()),
1314                ("ccc", 3, RangeOffsets::default()),
1315            ],
1316        );
1317    }
1318
1319    #[test]
1320    fn with_same_as_previous() {
1321        let mut fd = FieldData::default();
1322        fd.push_str("aaa", 1, false, false);
1323
1324        fd.headers.push_back(fd.headers[0]);
1325        fd.headers[1].set_same_value_as_previous(true);
1326        fd.headers[1].run_length = 5;
1327        fd.headers[1].set_shared_value(true);
1328        fd.field_count += 5;
1329
1330        fd.push_str("c", 3, false, false);
1331        compare_iter_output_parallel_ref(
1332            fd,
1333            &[("aaa", 1), ("aaa", 5), ("c", 3)],
1334        );
1335    }
1336
1337    #[test]
1338    fn with_same_as_previous_after_deleted() {
1339        let mut fd = FieldData::default();
1340        fd.push_str("00", 1, false, false);
1341        fd.push_str("1", 1, false, false);
1342        fd.headers.push_back(fd.headers[1]);
1343        fd.headers[2].set_same_value_as_previous(true);
1344        fd.headers[2].run_length = 5;
1345        fd.headers[2].set_shared_value(true);
1346        fd.field_count += 5;
1347        fd.headers[1].set_deleted(true);
1348        fd.field_count -= 1;
1349        fd.push_str("333", 3, false, false);
1350        compare_iter_output_parallel_ref(
1351            fd,
1352            &[("00", 1), ("1", 5), ("333", 3)],
1353        );
1354    }
1355}