parallel_processor/buckets/
concurrent.rs

1use crate::buckets::bucket_writer::BucketItemSerializer;
2use crate::buckets::{BucketsCount, LockFreeBucket, MultiThreadBuckets};
3use crate::memory_data_size::MemoryDataSize;
4use crate::utils::panic_on_drop::PanicOnDrop;
5use std::sync::Arc;
6
7use super::ChunkingStatus;
8
9pub struct BucketsThreadBuffer {
10    buffers: Vec<Vec<u8>>,
11}
12
13impl BucketsThreadBuffer {
14    pub const EMPTY: Self = Self { buffers: vec![] };
15
16    pub fn new(max_buffer_size: MemoryDataSize, buckets_count: &BucketsCount) -> Self {
17        let mut buffers = Vec::with_capacity(buckets_count.total_buckets_count);
18        let capacity = max_buffer_size.as_bytes();
19        for _ in 0..buckets_count.total_buckets_count {
20            buffers.push(Vec::with_capacity(capacity));
21        }
22
23        Self { buffers }
24    }
25}
26
27pub struct BucketsThreadDispatcher<B: LockFreeBucket, S: BucketItemSerializer> {
28    mtb: Arc<MultiThreadBuckets<B>>,
29    thread_data: BucketsThreadBuffer,
30    drop_panic: PanicOnDrop,
31    serializers: Vec<S>,
32}
33
34impl<B: LockFreeBucket, S: BucketItemSerializer> BucketsThreadDispatcher<B, S> {
35    pub fn new(
36        mtb: &Arc<MultiThreadBuckets<B>>,
37        thread_data: BucketsThreadBuffer,
38        deserializer_init_data: S::InitData,
39    ) -> Self
40    where
41        S::InitData: Copy,
42    {
43        assert_eq!(mtb.active_buckets.len(), thread_data.buffers.len());
44        Self {
45            mtb: mtb.clone(),
46            thread_data,
47            drop_panic: PanicOnDrop::new("buckets thread dispatcher not finalized"),
48            serializers: (0..mtb.active_buckets.len())
49                .map(|_| S::new(deserializer_init_data))
50                .collect(),
51        }
52    }
53
54    #[inline(always)]
55    pub fn get_buckets_count(&self) -> &BucketsCount {
56        self.mtb.get_buckets_count()
57    }
58
59    #[inline]
60    pub fn add_element_extended(
61        &mut self,
62        bucket: u16,
63        extra_data: &S::ExtraData,
64        extra_data_buffer: &S::ExtraDataBuffer,
65        element: &S::InputElementType<'_>,
66    ) -> ChunkingStatus {
67        let bucket_buf = &mut self.thread_data.buffers[bucket as usize];
68        let mut chunking_status = ChunkingStatus::SameChunk;
69        if self.serializers[bucket as usize].get_size(element, extra_data) + bucket_buf.len()
70            > bucket_buf.capacity()
71            && bucket_buf.len() > 0
72        {
73            chunking_status = self.mtb.add_data(bucket, bucket_buf.as_slice());
74            bucket_buf.clear();
75            self.serializers[bucket as usize].reset();
76        }
77        self.serializers[bucket as usize].write_to(
78            element,
79            bucket_buf,
80            extra_data,
81            extra_data_buffer,
82        );
83        chunking_status
84    }
85
86    #[inline]
87    pub fn add_element(
88        &mut self,
89        bucket: u16,
90        extra_data: &S::ExtraData,
91        element: &S::InputElementType<'_>,
92    ) where
93        S: BucketItemSerializer<ExtraDataBuffer = ()>,
94    {
95        self.add_element_extended(bucket, extra_data, &(), element);
96    }
97
98    pub fn finalize(mut self) -> (BucketsThreadBuffer, Arc<MultiThreadBuckets<B>>) {
99        for (index, vec) in self.thread_data.buffers.iter_mut().enumerate() {
100            if vec.len() == 0 {
101                continue;
102            }
103            self.mtb.add_data(index as u16, vec.as_slice());
104            vec.clear();
105        }
106        self.drop_panic.disengage();
107        (self.thread_data, self.mtb)
108    }
109}