1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
#![deny(missing_docs)]
#![doc = include_str!("../README.md")]

use std::{
    collections::{HashSet, VecDeque},
    hash::Hash,
    ops::Index,
};

/// Packs `Tile`s (composed of [comparable](Eq) `Symbol`s) into pages (essentially `[Symbol; page_size]`).
/// Returns a vector of “assignments”:
///
/// ```rust
/// # use std::collections::HashSet;
/// # use pagination_packing::overload_and_remove;
/// #
/// // Each lesson focuses on a single sentence, and refers to one page of the vocabulary booklet.
/// let vocabulary: [&[_]; 5] = [
///     &[], // The introductory lesson actually does not contain a sentence.
///     &["little", "lamb"], // "Mary had a little lamb."
///     &["tailor", "rich"], // "My tailor is rich."
///     &["rich", "little", "people"], // "Those rich little people!"
///     &["send", "dress", "tailor"], // "Send your dress to my tailor."
/// ];
/// // Each page of the booklet can contain 4 words of vocabulary.
/// let assignments = overload_and_remove::<_, _, [_]>(&vocabulary, 4);
///
/// let booklet: Vec<HashSet<_>> = assignments
///     .into_iter()
///     .map(|page_assignments| {
///        page_assignments.referenced_tiles(&vocabulary).copied().flatten().collect()
///     })
///     .collect();
/// for (i, page) in booklet.iter().enumerate() {
///     println!("Page {}: {page:?}", i + 1);
/// }
///
/// // Each element of `vocabulary` is fully contained within one of the pages.
/// assert_eq!( // As you can see, we can't find a lesson...
///     vocabulary.iter().find(|&&lesson| {
///         let set = HashSet::from_iter(lesson);
///         // ...such that there isn't a page that the lesson is a subset of.
///         !booklet.iter().any(|page| set.is_subset(page))
///     }),
///     None,
/// );
/// ```
///
/// This algorithm has quadratic complexity (compared to the linear complexity of gluttonous heuristics like “first-fit”), but performs better on many more inputs.
pub fn overload_and_remove<
    Symbol: Eq + Hash,
    Tile: AsRef<[Symbol]> + ?Sized,
    C: Index<usize, Output = Tile> + ?Sized,
>(
    tiles: &C,
    page_size: usize,
) -> Vec<AssignedTiles>
where
    for<'a> &'a C: IntoIterator,
    for<'a> <&'a C as IntoIterator>::IntoIter: ExactSizeIterator, // This gives the C(ollection)'s length.
{
    // Sort the tiles by size, which improves the packing algorithm's efficiency.
    let mut queue = VecDeque::with_capacity(tiles.into_iter().len());
    for i in 0..tiles.into_iter().len() {
        queue.insert(
            // Note that we want them sorted *largest first*!
            // (Ties are broken so that the new element is inserted as late as possible, to avoid moving data too much.)
            queue.partition_point(|tile_ref: &TileRef| {
                tiles[tile_ref.tile_idx].as_ref().len() >= tiles[i].as_ref().len()
            }),
            TileRef::new(i),
        );
    }

    let mut assignments: Vec<AssignedTiles> = Vec::new();
    while let Some(tile_ref) = queue.pop_front() {
        let tile = &tiles[tile_ref.tile_idx];

        let best_page_index = assignments
            .iter()
            .enumerate()
            .filter(|(i, _assignment)| tile_ref.is_banned_from(*i))
            .fold(
                (assignments.len(), tile.as_ref().len() as f64),
                |(best_idx, best_rel_size), (i, assignment)| {
                    let relative_size = assignment.relative_size_of(tile, tiles);
                    if relative_size < best_rel_size {
                        (i, relative_size)
                    } else {
                        (best_idx, best_rel_size)
                    }
                },
            )
            .0;

        if let Some(best_page) = assignments.get_mut(best_page_index) {
            // Add the tile to that page.
            best_page.assigned.push(tile_ref);

            // If this overloads the page, get it back to normal (if possible).
            while best_page.volume(tiles) > page_size {
                // Look for a tile minimising "efficiency", i.e. size divided by relative size.
                let efficiency = |tile: &Tile| {
                    (tile.as_ref().len()) as f64 / best_page.relative_size_of(tile, tiles)
                };
                let [(min_efficiency, (min_idx, _min_tile)), (max_efficiency, (_max_idx, _max_tile))] =
                    minmax_by_key(
                        best_page.referenced_tiles(tiles).enumerate(),
                        |(_i, tile_ref)| efficiency(tile_ref),
                    )
                    .expect("Pages should not be empty!");

                // Give up if all tiles have the same efficiency.
                // All efficiencies are identical if and only if the min is equal to the max.
                // FIXME: yikes for float comparison! This threshold is quite arbitrary, too... :/
                if max_efficiency - min_efficiency < 0.001 {
                    break;
                }

                // Remove the tile with minimal efficiency.
                let mut min_tile = best_page.assigned.swap_remove(min_idx);
                min_tile.ban_from(best_page_index); // Prevent it from going back into this page, to ensure that the algorithm terminates.
                queue.push_back(min_tile); // Push it back into the queue.
            }
        } else {
            // Found nowhere to put this tile, create a new page for it.
            assignments.push(AssignedTiles {
                assigned: vec![tile_ref],
            });
        }
    }

    // Deal with pages still overloaded, by emptying them and re-distributing their tiles.
    for page in &mut assignments {
        if page.volume(tiles) > page_size {
            for tile_ref in page.assigned.drain(..) {
                // We are about to re-insert them via first-fit, so sorting the queue by size improves its efficiency.
                queue.insert(
                    queue.partition_point(|other_ref| {
                        tiles[other_ref.tile_idx].as_ref().len()
                            >= tiles[tile_ref.tile_idx].as_ref().len()
                    }),
                    tile_ref,
                );
            }
        }
    }
    // Place back any tiles now in the queue via first-fit.
    for tile_ref in queue {
        let tile = &tiles[tile_ref.tile_idx];
        if let Some(page) = assignments
            .iter_mut()
            .find(|page| page.can_fit(tile, page_size, tiles))
        {
            page.assigned.push(tile_ref);
        } else {
            // No page can accomodate this tile? Create a new page.
            assignments.push(AssignedTiles {
                assigned: vec![tile_ref],
            });
        }
    }

    // "Decant" the result.
    decant(&mut assignments, tiles, page_size);
    // Note that the result does not contain any empty pages.

    assignments
}

fn decant<
    Symbol: Eq + Hash,
    Tile: AsRef<[Symbol]> + ?Sized,
    C: Index<usize, Output = Tile> + ?Sized,
>(
    assignments: &mut Vec<AssignedTiles>,
    tiles: &C,
    page_size: usize,
) {
    // "Decanting" is the process of moving all *things* that can fit in a lower index there.
    fn decant_on<F: FnMut(&mut AssignedTiles, &mut AssignedTiles)>(
        mut try_decanting: F,
        assignments: &mut Vec<AssignedTiles>,
    ) {
        // No need to attempt decanting on page #0, as there are no pages to decant to.
        for from_idx in (1..assignments.len()).rev() {
            let (write_half, read_half) = assignments.split_at_mut(from_idx);
            let from = &mut read_half[0];
            // Scan all pages before this one.
            for to in write_half {
                try_decanting(to, from);

                // If the tile is now empty, remove it.
                // Doing this now reduces the the number of iterations performed by later steps.
                // NB: order is intentionally preserved so as not to alter the "decantation"'s properties.
                // NB: this does mean that the first step might get empty pages in its input!
                // NB: this is safe to do because we go towards the beginning of the vector, thereby not
                //     invalidating our iteration (thus, iterators should not be used to drive the outer loop).
                if from.assigned.is_empty() {
                    assignments.remove(from_idx);
                    break;
                }
            }
        }
    }

    // Decant on pages.
    decant_on(
        |to, from| {
            // If the entire pages can be merged, move all of `from`'s tiles.
            if to.combined_volume(
                from.assigned
                    .iter()
                    .flat_map(|tile_ref| tiles[tile_ref.tile_idx].as_ref()),
                tiles,
            ) <= page_size
            {
                to.assigned.append(&mut from.assigned);
            }
        },
        assignments,
    );

    // Decant on "components" (= tiles sharing symbols).
    decant_on(
        |to, from| {
            // We need to iterate on all the "components", which are groups of tiles sharing at least
            // one symbol with another tile in the group.
            // (But they need not all share one common symbol!)
            // We do this by adding the first available tile, and then looking for tiles with common
            // symbols. (As an optimisation, we know we can skip tiles already scanned.)
            // Note that, after adding a tile, we need to re-iterate on all of them!
            // Some tiles that have been iterated past may be "connected" to the one that has been
            // newly added (but none other in the set).
            // Note that we have to round the number of bytes up, not down.
            let mut processed = vec![0u8; (from.assigned.len() + 7) / 8];
            let mut symbols = HashSet::new();
            let mut members = Vec::new();
            // Find the first byte that reports an un-processed symbol.
            while let Some(byte_idx) = processed.iter().position(|&byte| byte != 0xFF) {
                let bit_idx = processed[byte_idx].trailing_ones() as usize;
                let idx = byte_idx * 8 + bit_idx;
                // Due to the rounding-up, it's possible that the resulting index is actually higher
                // than the number of tile refs.
                let Some(slice) = from.assigned.get(idx..) else {
                    break;
                };
                let Some(first) = slice.first() else {
                    // It is possible for the slice to be empty.
                    break;
                };

                // Seed the "component" with the
                members.clear();
                members.push(idx);
                symbols.extend(tiles[first.tile_idx].as_ref());
                let mut rescan = true;
                while rescan {
                    rescan = false;
                    for (i, tile_ref) in std::iter::zip(idx.., slice) {
                        let processed_byte = &mut processed[i / 8];
                        let processed_mask = 1 << (i % 8);
                        if *processed_byte & processed_mask != 0 {
                            continue;
                        }
                        let tile = &tiles[tile_ref.tile_idx];

                        // If this is the first tile, or if at least one of its symbols matches, add it.
                        let tile_symbols: HashSet<_> = tile.as_ref().iter().collect();
                        if !symbols.is_disjoint(&tile_symbols) {
                            symbols.extend(tile_symbols);
                            // We need to keep the array sorted for the later removal to work.
                            members.insert(members.partition_point(|&j| j <= i), i);
                            *processed_byte |= processed_mask;
                            rescan = true;
                        }
                    }
                }

                // So, we have the list of refs in `members`, and the set of symbols they contain in
                // `symbols`.
                // The latter has to be sorted, because removing an element alters the indices of at
                // least one of the subsequent ones, which could mess up subsequent removals.
                debug_assert_eq!(
                    members.windows(2).find(|pair| pair[0] > pair[1]),
                    None,
                    "Members array {members:?} is not sorted"
                );
                if to.combined_volume(symbols.drain(), tiles) <= page_size {
                    // Extract the "component" into the target page.
                    for idx in members.iter().rev() {
                        let symbol = from.assigned.swap_remove(*idx);
                        to.assigned.push(symbol);
                    }
                }
            }
        },
        assignments,
    );

    // And finally, decant on individual tiles.
    decant_on(
        |to, from| {
            for i in (0..from.assigned.len()).rev() {
                if to.combined_volume(tiles[from.assigned[i].tile_idx].as_ref(), tiles) <= page_size
                {
                    let symbol = from.assigned.swap_remove(i);
                    to.assigned.push(symbol);
                }
            }
        },
        assignments,
    );
}

#[derive(Debug, Clone, PartialEq, Eq, Hash)]
struct TileRef {
    tile_idx: usize,
    banned_from: Vec<u8>,
}

/// A collection of references to tiles.
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct AssignedTiles {
    assigned: Vec<TileRef>,
}

impl TileRef {
    fn new(tile_idx: usize) -> Self {
        Self {
            tile_idx,
            banned_from: vec![],
        }
    }
    fn is_banned_from(&self, page_idx: usize) -> bool {
        let byte_idx = page_idx / 8;
        let bit_idx = page_idx % 8;
        self.banned_from
            .get(byte_idx)
            .map_or(false, |byte| byte & (1 << bit_idx) != 0)
    }

    fn ban_from(&mut self, page_idx: usize) {
        let byte_idx = page_idx / 8;
        let bit_idx = page_idx % 8;
        if self.banned_from.len() <= byte_idx {
            self.banned_from.resize(byte_idx + 1, 0);
        }
        self.banned_from[byte_idx] |= 1 << bit_idx;
    }
}

impl AssignedTiles {
    /// Returns an iterator through the IDs of the tiles referenced by this page.
    pub fn referenced_tile_ids(&self) -> impl Iterator<Item = usize> + '_ {
        self.assigned.iter().map(|tile_ref| tile_ref.tile_idx)
    }

    /// Returns an iterator through the tiles referenced by this page.
    pub fn referenced_tiles<
        'this,
        'tiles: 'this,
        Tile: 'tiles + ?Sized,
        C: Index<usize, Output = Tile> + ?Sized,
    >(
        &'this self,
        tiles: &'tiles C,
    ) -> impl Iterator<Item = &'tiles Tile> + '_ {
        self.assigned
            .iter()
            .map(|tile_ref| &tiles[tile_ref.tile_idx])
    }

    /// Gather a set of all the symbols contained in this page's tiles.
    ///
    /// Due to [the nature of a set][HashSet], each symbol is only referenced a single time.
    pub fn unique_symbols<
        'tiles,
        Symbol: Eq + Hash,
        Tile: AsRef<[Symbol]> + ?Sized + 'tiles,
        C: Index<usize, Output = Tile> + ?Sized,
    >(
        &self,
        tiles: &'tiles C,
    ) -> HashSet<&'tiles Symbol> {
        self.referenced_tiles(tiles)
            .flat_map(|tile| tile.as_ref().iter())
            .collect()
    }

    fn volume<
        Symbol: Eq + Hash,
        Tile: AsRef<[Symbol]> + ?Sized,
        C: Index<usize, Output = Tile> + ?Sized,
    >(
        &self,
        tiles: &C,
    ) -> usize {
        self.unique_symbols(tiles).len()
    }

    fn can_fit<
        Symbol: Eq + Hash,
        Tile: AsRef<[Symbol]> + ?Sized,
        C: Index<usize, Output = Tile> + ?Sized,
    >(
        &self,
        tile: &Tile,
        page_size: usize,
        tiles: &C,
    ) -> bool {
        let mut unique_symbols = self.unique_symbols(tiles);
        unique_symbols.extend(tile.as_ref().iter());
        unique_symbols.len() <= page_size
    }

    fn relative_size_of<
        Symbol: Eq + Hash,
        Tile: AsRef<[Symbol]> + ?Sized,
        C: Index<usize, Output = Tile> + ?Sized,
    >(
        &self,
        tile: &Tile,
        tiles: &C,
    ) -> f64 {
        tile.as_ref().iter().fold(0., |acc, symbol| {
            let n = self
                .referenced_tiles(tiles)
                .filter(|referenced_tile| referenced_tile.as_ref().contains(symbol))
                .count();
            acc + 1. / (1 + n) as f64
        })
    }

    fn combined_volume<
        'tiles,
        Symbol: Eq + Hash + 'tiles,
        Tile: AsRef<[Symbol]> + ?Sized + 'tiles,
        C: Index<usize, Output = Tile> + ?Sized,
        It: IntoIterator<Item = &'tiles Symbol>,
    >(
        &self,
        iter: It,
        tiles: &'tiles C,
    ) -> usize {
        let mut symbols = self.unique_symbols(tiles);
        symbols.extend(iter);
        symbols.len()
    }
}

fn minmax_by_key<K: PartialOrd + Copy, T: Copy, It: IntoIterator<Item = T>, F: FnMut(T) -> K>(
    it: It,
    mut key_func: F,
) -> Option<[(K, T); 2]> {
    let mut iter = it.into_iter().map(|item| (key_func(item), item));
    let first = iter.next()?;

    let mut min = first;
    let mut max = first;
    for item in iter {
        if item.0 < min.0 {
            min = item;
        } else if item.0 > max.0 {
            max = item;
        }
    }

    Some([min, max])
}