Skip to main content

arcis_compiler/utils/
packing.rs

1use crate::{ArcisField, STATISTICAL_SECURITY_FACTOR};
2use ff::PrimeField;
3
4#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
5pub enum DataSize {
6    /// Size is a power of two.
7    PowerOfTwo(u8),
8    /// Size is the same as a container. Container size does not have to be a power of two.
9    Full, // Needs to be after `PowerOfTwo` for `PartialOrd` derivation.
10}
11
12impl DataSize {
13    pub fn size(&self, full_size: Size) -> Size {
14        match self {
15            DataSize::PowerOfTwo(pow) => (1 as Size) << pow,
16            DataSize::Full => full_size,
17        }
18    }
19}
20
21type Location = usize;
22type Size = u16;
23
24#[derive(Debug, Clone, Copy)]
25#[non_exhaustive]
26pub struct PackLocation {
27    /// The index of the container.
28    pub index: Location,
29    /// The offset in bits of the location of the data in the container.
30    pub bit_offset: Size,
31}
32
33impl PackLocation {
34    fn new(index: Location, bit_offset: Size) -> Self {
35        PackLocation { index, bit_offset }
36    }
37}
38
39#[derive(Debug, Default)]
40struct CurrentState {
41    /// The size of a container, in bits. Only relevant for `DataSize::PowerOfTwo` items.
42    container_size: Size,
43    /// Number of containers containing `DataSize::Full` items.
44    n_full_containers: Location,
45    /// Loads of the containers. MUST be decreasing.
46    loads: Vec<Size>,
47    /// Last place where something was inserted.
48    last_insert: Option<Location>,
49}
50impl CurrentState {
51    pub fn new(container_size: Size) -> CurrentState {
52        CurrentState {
53            container_size,
54            ..Default::default()
55        }
56    }
57    /// Load of the context to the left of index.
58    fn left_load(&self, index: Location) -> Size {
59        if index == 0 {
60            self.container_size
61        } else {
62            self.loads[index - 1]
63        }
64    }
65    /// Load of the container at index.
66    fn location_load(&self, index: Location) -> Size {
67        self.loads.get(index).copied().unwrap_or(0)
68    }
69    /// Inserts an item of size `item_size` at location `index`,
70    /// creating a new container if necessary.
71    fn insert_at(&mut self, index: Location, item_size: Size) -> Size {
72        let res = self.location_load(index);
73        let new_load = res + item_size;
74        assert!(new_load <= self.left_load(index));
75        assert!(new_load <= self.container_size);
76        if index >= self.loads.len() {
77            assert_eq!(index, self.loads.len());
78            self.loads.resize(index + 1, 0);
79        }
80        self.loads[index] = new_load;
81        self.last_insert = Some(index);
82        res
83    }
84    /// Is there enough room to insert an item of size `item_size` at `index`.
85    fn can_insert_at(&mut self, index: Location, item_size: Size) -> bool {
86        let at_size = self.location_load(index);
87        let new_size = at_size + item_size;
88        new_size <= self.container_size
89    }
90    /// Inserts an item of size `data_size`, returning its location.
91    /// Note: `data_size` must be given in decreasing order for the function to work.
92    pub fn insert(&mut self, data_size: DataSize) -> PackLocation {
93        match data_size {
94            DataSize::PowerOfTwo(pow) => {
95                let item_size = (1 as Size) << pow;
96                let last = self.last_insert.unwrap_or(0);
97                // Because everything is a power of two, everything has the same load in the
98                // interval [self.n_full_containers, last[. So we only need to check these 2 places.
99                let insert_loc = [self.n_full_containers, last]
100                    .into_iter()
101                    .find(|index| self.can_insert_at(*index, item_size))
102                    .unwrap_or(last + 1);
103                let old_size = self.location_load(insert_loc);
104                self.insert_at(insert_loc, item_size);
105                PackLocation::new(insert_loc, old_size)
106            }
107            DataSize::Full => {
108                let res = PackLocation::new(self.n_full_containers, 0);
109                assert_eq!(self.n_full_containers, self.loads.len());
110                self.n_full_containers += 1;
111                self.loads.push(self.container_size);
112                res
113            }
114        }
115    }
116    pub fn state(self) -> Vec<Option<Size>> {
117        let len = self.loads.len();
118        (0..len)
119            .map(|index| {
120                if index < self.n_full_containers {
121                    None
122                } else {
123                    Some(self.loads[index])
124                }
125            })
126            .collect()
127    }
128}
129impl DataSize {
130    /// Returns in the same order as data, for each item,
131    /// its container_id and where in the container it starts.
132    /// The algorithm is First-fit-decreasing (FFD).
133    /// See https://en.wikipedia.org/wiki/First-fit-decreasing_bin_packing
134    /// "A special case of divisible item sizes occurs in memory allocation in computer systems,
135    /// where the item sizes are all powers of 2. In this case it always finds the optimal packing."
136    /// Returns:
137    /// * Where each data is put.
138    /// * The final state of each container,
139    ///   * `None` if filled by `DataSize::Full`
140    ///   * `Some(size)` if loaded up to `size`.
141    fn pack(data: Vec<DataSize>, container_size: Size) -> (Vec<PackLocation>, Vec<Option<Size>>) {
142        let mut to_sort: Vec<_> = data
143            .into_iter()
144            .enumerate()
145            .map(|(i, size)| (size, i))
146            .collect();
147        // sort according to reverse order of DataSize
148        to_sort.sort_by(|a, b| b.0.cmp(&a.0));
149        let mut current_state = CurrentState::new(container_size);
150        let mut res = vec![PackLocation::new(0, 0); to_sort.len()];
151        for (size, index) in to_sort {
152            res[index] = current_state.insert(size);
153        }
154        (res, current_state.state())
155    }
156    pub fn pack_arcis(data: Vec<DataSize>) -> (Vec<PackLocation>, Vec<Option<Size>>) {
157        Self::pack(data, Self::PACKING_SIZE)
158    }
159    pub const PACKING_SIZE: Size =
160        ArcisField::CAPACITY as Size - (STATISTICAL_SECURITY_FACTOR as Size);
161}
162
163#[cfg(test)]
164mod tests {
165    use super::*;
166    use rand::Rng;
167    fn test_pack(sizes: Vec<DataSize>, container_size: Size, expected_len: Option<usize>) {
168        let computed_expected = {
169            let max_exp = sizes.iter().fold(0, |acc, x| {
170                if let DataSize::PowerOfTwo(pow) = x {
171                    acc.max(*pow)
172                } else {
173                    acc
174                }
175            });
176            let mut pow_count = vec![0usize; 1 + max_exp as usize];
177            let mut n_full = 0usize;
178            for size in &sizes {
179                match size {
180                    DataSize::PowerOfTwo(pow) => {
181                        pow_count[*pow as usize] += 1;
182                    }
183                    DataSize::Full => {
184                        n_full += 1;
185                    }
186                }
187            }
188            let mut free_spaces = 0usize;
189            let mut n_used_by_pows = 0usize;
190            for (exp, count) in pow_count.iter().enumerate().rev() {
191                let current_pow = 1 << exp;
192                if container_size & current_pow != 0 {
193                    free_spaces += n_used_by_pows * (current_pow as usize);
194                }
195                let count_spaces = count * current_pow as usize;
196                if count_spaces <= free_spaces {
197                    // We pack in all the free spaces.
198                    free_spaces -= count_spaces;
199                    continue;
200                }
201                let remaining_to_pack = (count_spaces - free_spaces) / current_pow as usize;
202                let n_packable_per_pack = container_size >> exp;
203                let ceil = remaining_to_pack.div_ceil(n_packable_per_pack as usize);
204                n_used_by_pows += ceil;
205                free_spaces = (ceil * n_packable_per_pack as usize - remaining_to_pack)
206                    * current_pow as usize;
207            }
208            n_full + n_used_by_pows
209        };
210        if let Some(expected_len) = expected_len {
211            assert_eq!(computed_expected, expected_len);
212        }
213        let (chosen_pack, v) = DataSize::pack(sizes.clone(), container_size);
214        assert_eq!(v.len(), computed_expected);
215        let mut packs = vec![false; computed_expected * container_size as usize];
216        for (size, location) in std::iter::zip(sizes, chosen_pack) {
217            let size_begin = location.bit_offset;
218            let location = location.index;
219            assert!(location < computed_expected);
220            let data_size = size.size(container_size);
221            let size_end = size_begin + data_size;
222            assert!(size_end <= container_size);
223            for index in size_begin..size_end {
224                let location = location * container_size as usize + index as usize;
225                assert!(!packs[location]);
226                packs[location] = true;
227            }
228        }
229    }
230    #[test]
231    fn powers_of_two() {
232        let rng = &mut crate::utils::test_rng::get();
233        for exp in [1u8, 2, 3, 4, 5, 6, 7, 8] {
234            let pow_2 = (1 as Size) << exp;
235            for is_size_exact_pow_2 in [true, false, false, false] {
236                let offset = if is_size_exact_pow_2 {
237                    0
238                } else {
239                    rng.gen_range(0..pow_2)
240                };
241                let container_size = pow_2 + offset;
242                for n_items in [1, 4, 16, 64, 256, 1024, 4096] {
243                    let mut sizes = Vec::new();
244                    for _ in 0..n_items {
245                        if rng.gen_bool(0.5) {
246                            sizes.push(DataSize::PowerOfTwo(rng.gen_range(0..=exp)));
247                        } else {
248                            sizes.push(DataSize::Full);
249                        }
250                    }
251                    let optimal = if is_size_exact_pow_2 {
252                        Some(
253                            sizes
254                                .iter()
255                                .map(|size| size.size(container_size) as usize)
256                                .sum::<usize>()
257                                .div_ceil(container_size as usize),
258                        )
259                    } else {
260                        None
261                    };
262                    test_pack(sizes, container_size, optimal);
263                }
264            }
265        }
266    }
267    #[test]
268    fn all_full() {
269        let container_size = 7;
270        let n_data = 63;
271        let sizes = vec![DataSize::Full; n_data];
272        test_pack(sizes, container_size, Some(n_data));
273    }
274}