matrix_sdk_common/linked_chunk/
builder.rs

1// Copyright 2024 The Matrix.org Foundation C.I.C.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::{
16    collections::{BTreeMap, HashSet},
17    marker::PhantomData,
18};
19
20use tracing::error;
21
22use super::{
23    Chunk, ChunkContent, ChunkIdentifier, ChunkIdentifierGenerator, Ends, LinkedChunk,
24    ObservableUpdates, RawChunk,
25};
26
27/// A temporary chunk representation in the [`LinkedChunkBuilder`].
28///
29/// Instead of using linking the chunks with pointers, this uses
30/// [`ChunkIdentifier`] as the temporary links to the previous and next chunks,
31/// which will get resolved later when re-building the full data structure. This
32/// allows using chunks that references other chunks that aren't known yet.
33struct TemporaryChunk<Item, Gap> {
34    previous: Option<ChunkIdentifier>,
35    next: Option<ChunkIdentifier>,
36    content: ChunkContent<Item, Gap>,
37}
38
39/// A data structure to rebuild a linked chunk from its raw representation.
40///
41/// A linked chunk can be rebuilt incrementally from its internal
42/// representation, with the chunks being added *in any order*, as long as they
43/// form a single connected component eventually (viz., there's no
44/// subgraphs/sublists isolated from the one final linked list). If they don't,
45/// then the final call to [`LinkedChunkBuilder::build()`] will result in an
46/// error).
47#[allow(missing_debug_implementations)]
48pub struct LinkedChunkBuilder<const CAP: usize, Item, Gap> {
49    /// Work-in-progress chunks.
50    chunks: BTreeMap<ChunkIdentifier, TemporaryChunk<Item, Gap>>,
51
52    /// Is the final `LinkedChunk` expected to include an update history, as if
53    /// it were created with [`LinkedChunk::new_with_update_history`]?
54    build_with_update_history: bool,
55}
56
57impl<const CAP: usize, Item, Gap> Default for LinkedChunkBuilder<CAP, Item, Gap> {
58    fn default() -> Self {
59        Self::new()
60    }
61}
62
63impl<const CAP: usize, Item, Gap> LinkedChunkBuilder<CAP, Item, Gap> {
64    /// Create an empty [`LinkedChunkBuilder`] with no update history.
65    pub fn new() -> Self {
66        Self { chunks: Default::default(), build_with_update_history: false }
67    }
68
69    /// Stash a gap chunk with its content.
70    ///
71    /// This can be called even if the previous and next chunks have not been
72    /// added yet. Resolving these chunks will happen at the time of calling
73    /// [`LinkedChunkBuilder::build()`].
74    pub fn push_gap(
75        &mut self,
76        previous: Option<ChunkIdentifier>,
77        id: ChunkIdentifier,
78        next: Option<ChunkIdentifier>,
79        content: Gap,
80    ) {
81        let chunk = TemporaryChunk { previous, next, content: ChunkContent::Gap(content) };
82        self.chunks.insert(id, chunk);
83    }
84
85    /// Stash an item chunk with its contents.
86    ///
87    /// This can be called even if the previous and next chunks have not been
88    /// added yet. Resolving these chunks will happen at the time of calling
89    /// [`LinkedChunkBuilder::build()`].
90    pub fn push_items(
91        &mut self,
92        previous: Option<ChunkIdentifier>,
93        id: ChunkIdentifier,
94        next: Option<ChunkIdentifier>,
95        items: impl IntoIterator<Item = Item>,
96    ) {
97        let chunk = TemporaryChunk {
98            previous,
99            next,
100            content: ChunkContent::Items(items.into_iter().collect()),
101        };
102        self.chunks.insert(id, chunk);
103    }
104
105    /// Request that the resulting linked chunk will have an update history, as
106    /// if it were created with [`LinkedChunk::new_with_update_history`].
107    pub fn with_update_history(&mut self) {
108        self.build_with_update_history = true;
109    }
110
111    /// Run all error checks before reconstructing the full linked chunk.
112    ///
113    /// Must be called after checking `self.chunks` isn't empty in
114    /// [`Self::build`].
115    ///
116    /// Returns the identifier of the first chunk.
117    fn check_consistency(&mut self) -> Result<ChunkIdentifier, LinkedChunkBuilderError> {
118        // Look for the first id.
119        let first_id =
120            self.chunks.iter().find_map(|(id, chunk)| chunk.previous.is_none().then_some(*id));
121
122        // There's no first chunk, but we've checked that `self.chunks` isn't empty:
123        // it's a malformed list.
124        let Some(first_id) = first_id else {
125            return Err(LinkedChunkBuilderError::MissingFirstChunk);
126        };
127
128        // We're going to iterate from the first to the last chunk.
129        // Keep track of chunks we've already visited.
130        let mut visited = HashSet::new();
131
132        // Start from the first chunk.
133        let mut maybe_cur = Some(first_id);
134
135        while let Some(cur) = maybe_cur {
136            // The chunk must be referenced in `self.chunks`.
137            let Some(chunk) = self.chunks.get(&cur) else {
138                return Err(LinkedChunkBuilderError::MissingChunk { id: cur });
139            };
140
141            if let ChunkContent::Items(items) = &chunk.content {
142                if items.len() > CAP {
143                    return Err(LinkedChunkBuilderError::ChunkTooLarge { id: cur });
144                }
145            }
146
147            // If it's not the first chunk,
148            if cur != first_id {
149                // It must have a previous link.
150                let Some(prev) = chunk.previous else {
151                    return Err(LinkedChunkBuilderError::MultipleFirstChunks {
152                        first_candidate: first_id,
153                        second_candidate: cur,
154                    });
155                };
156
157                // And we must have visited its predecessor at this point, since we've
158                // iterated from the first chunk.
159                if !visited.contains(&prev) {
160                    return Err(LinkedChunkBuilderError::MissingChunk { id: prev });
161                }
162            }
163
164            // Add the current chunk to the list of seen chunks.
165            if !visited.insert(cur) {
166                // If we didn't insert, then it was already visited: there's a cycle!
167                return Err(LinkedChunkBuilderError::Cycle { repeated: cur });
168            }
169
170            // Move on to the next chunk. If it's none, we'll quit the loop.
171            maybe_cur = chunk.next;
172        }
173
174        // If there are more chunks than those we've visited: some of them were not
175        // linked to the "main" branch of the linked list, so we had multiple connected
176        // components.
177        if visited.len() != self.chunks.len() {
178            return Err(LinkedChunkBuilderError::MultipleConnectedComponents);
179        }
180
181        Ok(first_id)
182    }
183
184    pub fn build(mut self) -> Result<Option<LinkedChunk<CAP, Item, Gap>>, LinkedChunkBuilderError> {
185        if self.chunks.is_empty() {
186            return Ok(None);
187        }
188
189        // Run checks.
190        let first_id = self.check_consistency()?;
191
192        // We're now going to iterate from the first to the last chunk. As we're doing
193        // this, we're also doing a few other things:
194        //
195        // - rebuilding the final `Chunk`s one by one, that will be linked using
196        //   pointers,
197        // - counting items from the item chunks we'll encounter,
198        // - finding the max `ChunkIdentifier` (`max_chunk_id`).
199
200        let mut max_chunk_id = first_id.index();
201
202        // Small helper to graduate a temporary chunk into a final one. As we're doing
203        // this, we're also updating the maximum chunk id (that will be used to
204        // set up the id generator), and the number of items in this chunk.
205
206        let mut graduate_chunk = |id: ChunkIdentifier| {
207            let temp = self.chunks.remove(&id)?;
208
209            // Update the maximum chunk identifier, while we're around.
210            max_chunk_id = max_chunk_id.max(id.index());
211
212            // Graduate the current temporary chunk into a final chunk.
213            let chunk_ptr = Chunk::new_leaked(id, temp.content);
214
215            Some((temp.next, chunk_ptr))
216        };
217
218        let Some((mut next_chunk_id, first_chunk_ptr)) = graduate_chunk(first_id) else {
219            // Can't really happen, but oh well.
220            return Err(LinkedChunkBuilderError::MissingFirstChunk);
221        };
222
223        let mut prev_chunk_ptr = first_chunk_ptr;
224
225        while let Some(id) = next_chunk_id {
226            let Some((new_next, mut chunk_ptr)) = graduate_chunk(id) else {
227                // Can't really happen, but oh well.
228                return Err(LinkedChunkBuilderError::MissingChunk { id });
229            };
230
231            let chunk = unsafe { chunk_ptr.as_mut() };
232
233            // Link the current chunk to its previous one.
234            let prev_chunk = unsafe { prev_chunk_ptr.as_mut() };
235            prev_chunk.next = Some(chunk_ptr);
236            chunk.previous = Some(prev_chunk_ptr);
237
238            // Prepare for the next iteration.
239            prev_chunk_ptr = chunk_ptr;
240            next_chunk_id = new_next;
241        }
242
243        debug_assert!(self.chunks.is_empty());
244
245        // Maintain the convention that `Ends::last` may be unset.
246        let last_chunk_ptr = prev_chunk_ptr;
247        let last_chunk_ptr =
248            if first_chunk_ptr == last_chunk_ptr { None } else { Some(last_chunk_ptr) };
249        let links = Ends { first: first_chunk_ptr, last: last_chunk_ptr };
250
251        let chunk_identifier_generator =
252            ChunkIdentifierGenerator::new_from_previous_chunk_identifier(ChunkIdentifier::new(
253                max_chunk_id,
254            ));
255
256        let updates =
257            if self.build_with_update_history { Some(ObservableUpdates::new()) } else { None };
258
259        Ok(Some(LinkedChunk { links, chunk_identifier_generator, updates, marker: PhantomData }))
260    }
261
262    /// Fills a linked chunk builder from all the given raw parts.
263    pub fn from_raw_parts(raws: Vec<RawChunk<Item, Gap>>) -> Self {
264        let mut this = Self::new();
265        for raw in raws {
266            match raw.content {
267                ChunkContent::Gap(gap) => {
268                    this.push_gap(raw.previous, raw.identifier, raw.next, gap);
269                }
270                ChunkContent::Items(vec) => {
271                    this.push_items(raw.previous, raw.identifier, raw.next, vec);
272                }
273            }
274        }
275        this
276    }
277}
278
279#[derive(thiserror::Error, Debug)]
280pub enum LinkedChunkBuilderError {
281    #[error("chunk with id {} is too large", id.index())]
282    ChunkTooLarge { id: ChunkIdentifier },
283
284    #[error("there's no first chunk")]
285    MissingFirstChunk,
286
287    #[error("there are multiple first chunks")]
288    MultipleFirstChunks { first_candidate: ChunkIdentifier, second_candidate: ChunkIdentifier },
289
290    #[error("unable to resolve chunk with id {}", id.index())]
291    MissingChunk { id: ChunkIdentifier },
292
293    #[error("rebuilt chunks form a cycle: repeated identifier: {}", repeated.index())]
294    Cycle { repeated: ChunkIdentifier },
295
296    #[error("multiple connected components")]
297    MultipleConnectedComponents,
298}
299
300#[cfg(test)]
301mod tests {
302    use assert_matches::assert_matches;
303
304    use super::LinkedChunkBuilder;
305    use crate::linked_chunk::{ChunkIdentifier, LinkedChunkBuilderError};
306
307    #[test]
308    fn test_empty() {
309        let lcb = LinkedChunkBuilder::<3, char, char>::new();
310
311        // Building an empty linked chunk works, and returns `None`.
312        let lc = lcb.build().unwrap();
313        assert!(lc.is_none());
314    }
315
316    #[test]
317    fn test_success() {
318        let mut lcb = LinkedChunkBuilder::<3, char, char>::new();
319
320        let cid0 = ChunkIdentifier::new(0);
321        let cid1 = ChunkIdentifier::new(1);
322        // Note: cid2 is missing on purpose, to confirm that it's fine to have holes in
323        // the chunk id space.
324        let cid3 = ChunkIdentifier::new(3);
325
326        // Check that we can successfully create a linked chunk, independently of the
327        // order in which chunks are added.
328        //
329        // The final chunk will contain [cid0 <-> cid1 <-> cid3], in this order.
330
331        // Adding chunk cid0.
332        lcb.push_items(None, cid0, Some(cid1), vec!['a', 'b', 'c']);
333        // Adding chunk cid3.
334        lcb.push_items(Some(cid1), cid3, None, vec!['d', 'e']);
335        // Adding chunk cid1.
336        lcb.push_gap(Some(cid0), cid1, Some(cid3), 'g');
337
338        let mut lc =
339            lcb.build().expect("building works").expect("returns a non-empty linked chunk");
340
341        // Check the entire content first.
342        assert_items_eq!(lc, ['a', 'b', 'c'] [-] ['d', 'e']);
343
344        // Run checks on the first chunk.
345        let mut chunks = lc.chunks();
346        let first_chunk = chunks.next().unwrap();
347        {
348            assert!(first_chunk.previous().is_none());
349            assert_eq!(first_chunk.identifier(), cid0);
350        }
351
352        // Run checks on the second chunk.
353        let second_chunk = chunks.next().unwrap();
354        {
355            assert_eq!(second_chunk.identifier(), first_chunk.next().unwrap().identifier());
356            assert_eq!(second_chunk.previous().unwrap().identifier(), first_chunk.identifier());
357            assert_eq!(second_chunk.identifier(), cid1);
358        }
359
360        // Run checks on the third chunk.
361        let third_chunk = chunks.next().unwrap();
362        {
363            assert_eq!(third_chunk.identifier(), second_chunk.next().unwrap().identifier());
364            assert_eq!(third_chunk.previous().unwrap().identifier(), second_chunk.identifier());
365            assert!(third_chunk.next().is_none());
366            assert_eq!(third_chunk.identifier(), cid3);
367        }
368
369        // There's no more chunk.
370        assert!(chunks.next().is_none());
371
372        // The linked chunk had 5 items.
373        assert_eq!(lc.num_items(), 5);
374
375        // Now, if we add a new chunk, its identifier should be the previous one we used
376        // + 1.
377        lc.push_gap_back('h');
378
379        let last_chunk = lc.chunks().last().unwrap();
380        assert_eq!(last_chunk.identifier(), ChunkIdentifier::new(cid3.index() + 1));
381    }
382
383    #[test]
384    fn test_chunk_too_large() {
385        let mut lcb = LinkedChunkBuilder::<3, char, char>::new();
386
387        let cid0 = ChunkIdentifier::new(0);
388
389        // Adding a chunk with 4 items will fail, because the max capacity specified in
390        // the builder generics is 3.
391        lcb.push_items(None, cid0, None, vec!['a', 'b', 'c', 'd']);
392
393        let res = lcb.build();
394        assert_matches!(res, Err(LinkedChunkBuilderError::ChunkTooLarge { id }) => {
395            assert_eq!(id, cid0);
396        });
397    }
398
399    #[test]
400    fn test_missing_first_chunk() {
401        let mut lcb = LinkedChunkBuilder::<3, char, char>::new();
402
403        let cid0 = ChunkIdentifier::new(0);
404        let cid1 = ChunkIdentifier::new(1);
405        let cid2 = ChunkIdentifier::new(2);
406
407        lcb.push_gap(Some(cid2), cid0, Some(cid1), 'g');
408        lcb.push_items(Some(cid0), cid1, Some(cid2), ['a', 'b', 'c']);
409        lcb.push_items(Some(cid1), cid2, Some(cid0), ['d', 'e', 'f']);
410
411        let res = lcb.build();
412        assert_matches!(res, Err(LinkedChunkBuilderError::MissingFirstChunk));
413    }
414
415    #[test]
416    fn test_multiple_first_chunks() {
417        let mut lcb = LinkedChunkBuilder::<3, char, char>::new();
418
419        let cid0 = ChunkIdentifier::new(0);
420        let cid1 = ChunkIdentifier::new(1);
421
422        lcb.push_gap(None, cid0, Some(cid1), 'g');
423        // Second chunk lies and pretends to be the first too.
424        lcb.push_items(None, cid1, Some(cid0), ['a', 'b', 'c']);
425
426        let res = lcb.build();
427        assert_matches!(res, Err(LinkedChunkBuilderError::MultipleFirstChunks { first_candidate, second_candidate }) => {
428            assert_eq!(first_candidate, cid0);
429            assert_eq!(second_candidate, cid1);
430        });
431    }
432
433    #[test]
434    fn test_missing_chunk() {
435        let mut lcb = LinkedChunkBuilder::<3, char, char>::new();
436
437        let cid0 = ChunkIdentifier::new(0);
438        let cid1 = ChunkIdentifier::new(1);
439        lcb.push_gap(None, cid0, Some(cid1), 'g');
440
441        let res = lcb.build();
442        assert_matches!(res, Err(LinkedChunkBuilderError::MissingChunk { id }) => {
443            assert_eq!(id, cid1);
444        });
445    }
446
447    #[test]
448    fn test_cycle() {
449        let mut lcb = LinkedChunkBuilder::<3, char, char>::new();
450
451        let cid0 = ChunkIdentifier::new(0);
452        let cid1 = ChunkIdentifier::new(1);
453        lcb.push_gap(None, cid0, Some(cid1), 'g');
454        lcb.push_gap(Some(cid0), cid1, Some(cid0), 'g');
455
456        let res = lcb.build();
457        assert_matches!(res, Err(LinkedChunkBuilderError::Cycle { repeated }) => {
458            assert_eq!(repeated, cid0);
459        });
460    }
461
462    #[test]
463    fn test_multiple_connected_components() {
464        let mut lcb = LinkedChunkBuilder::<3, char, char>::new();
465
466        let cid0 = ChunkIdentifier::new(0);
467        let cid1 = ChunkIdentifier::new(1);
468        let cid2 = ChunkIdentifier::new(2);
469
470        // cid0 and cid1 are linked to each other.
471        lcb.push_gap(None, cid0, Some(cid1), 'g');
472        lcb.push_items(Some(cid0), cid1, None, ['a', 'b', 'c']);
473        // cid2 stands on its own.
474        lcb.push_items(None, cid2, None, ['d', 'e', 'f']);
475
476        let res = lcb.build();
477        assert_matches!(res, Err(LinkedChunkBuilderError::MultipleConnectedComponents));
478    }
479}