Skip to main content

screeps_map_processing/run_length_encoding/
generic_rle.rs

1//! Provides a generic implementation of Run Length Encoding for use with arbitrary types.
2
3use rle::{AppendRle, MergableSpan};
4
5/// Represents an individual run of a specific token.
6///
7/// Individual runs are of indefinite length by themselves. The end of the run (and thus the length) is
8/// dictated by the start value of the next run in your sequence.
9///
10/// Type `T` is the token value that you're encoding.
11/// Type `S` is the sequence index type. The default of `usize` should work for most cases, but you
12/// can save space if you know that your token sequences have a length that can be specified with a smaller
13/// sized type.
14#[derive(Clone)]
15pub struct IndexedRLE<T: Clone + Eq, S = usize> {
16    pub token: T,
17    pub start: S,
18}
19
20impl<T: Clone + Eq, S: Copy + PartialEq + PartialOrd> MergableSpan for IndexedRLE<T, S> {
21    fn can_append(&self, other: &Self) -> bool {
22        // Since this is an indefinite-length run, we only need to check for start value orderings
23        (self.token == other.token) & (self.start <= other.start)
24    }
25
26    fn append(&mut self, other: Self) {
27        // Appending the same token does nothing, since this just measures the start of the run
28        ()
29    }
30
31    fn prepend(&mut self, other: Self) {
32        // Unlike when appending, when prepending we do need to keep track of which run starts
33        // sooner, since if the other run starts sooner, we need to extend this run back to that
34        // one.
35        if other.start < self.start {
36            self.start = other.start;
37        }
38    }
39}
40
41impl<T: Clone + Eq, S> IndexedRLE<T, S> {
42    pub fn new(token: T, start: S) -> Self {
43        Self { token, start }
44    }
45
46    /// The amount of memory it takes to store this data.
47    pub fn memory_size(&self) -> usize {
48        size_of::<T>() + size_of::<S>()
49    }
50}
51
52/// An ordered sequence of runs, searchable in O(lg(n)) time via binary search.
53pub struct BinarySearchRLE<T: Clone + Eq, S = usize> {
54    vec: Vec<IndexedRLE<T, S>>,
55}
56
57impl<T: Clone + Eq, S: Copy + PartialEq + PartialOrd> BinarySearchRLE<T, S> {
58    /// Create a new, empty BinarySearchRLE.
59    pub fn new() -> Self {
60        Self {
61            vec: Vec::new(),
62        }
63    }
64
65    /// Appends the provided run to the sequence.
66    ///
67    /// Returns true if the run was added to the end of the internal list, and returns false if it
68    /// was instead merged into the last run.
69    ///
70    /// Runs are expected to be inserted in ascending sorted order; this does no sorting.
71    pub fn append_run(&mut self, run: IndexedRLE<T, S>) -> bool {
72        self.vec.push_rle(run)
73    }
74
75    /// Appends an individual token to the sequence as its own run.
76    ///
77    /// Returns true if the token was added to the end of the internal list as its own run, and
78    /// returns false if it was instead merged into the last run.
79    ///
80    /// Tokens are expected to be inserted in ascending sorted order; this does no sorting.
81    pub fn append_token(&mut self, token: T, start: S) -> bool {
82        let run = IndexedRLE::new(token, start);
83        self.append_run(run)
84    }
85
86    /// Search for the token value at a particular index in the sequence.
87    ///
88    /// Returns None if:
89    /// - The sequence is empty (there are no runs)
90    /// - The sequence index requested is before the start of the first run
91    ///
92    /// Otherwise, this returns the token value for the run that contains the requested index.
93    pub fn find_token_at_index(&self, index: S) -> Option<T> {
94        if self.vec.len() == 0 {
95            None
96        } else {
97            // Edge case: If the token index requested is before the first run, return None and
98            // avoid a fruitless search
99            if index < self.vec[0].start {
100                None
101            } else {
102                // Slices already implement binary search, so we can avoid all the manual implementation
103                let idx = (&self.vec).partition_point(|item| item.start < index);
104
105                let run_idx = if idx == self.vec.len() {
106                    // If the token index requested is after the start of the last run, the partition point can
107                    // return self.vec.len() as the run index
108                    idx - 1
109                } else {
110                    // Two cases:
111                    // - The token index is at the start of a run; this means we want the current
112                    // run that partition point gave us
113                    // - The token index is in the middle of a run; this means we want the previous
114                    // run from what `partition_point` gave us
115                    let current_run = &self.vec[idx];
116                    if current_run.start == index {
117                        idx
118                    } else {
119                        idx - 1
120                    }
121                };
122
123                Some(self.vec[run_idx].token.clone())
124            }
125        }
126    }
127
128    /// Returns the total number of runs contained in the sequence.
129    pub fn num_runs(&self) -> usize {
130        self.vec.len()
131    }
132
133    /// The amount of memory it takes to store this data.
134    pub fn memory_size(&self) -> usize {
135        self.vec.len() * size_of::<IndexedRLE<T, S>>() + size_of::<Vec<IndexedRLE<T, S>>>()
136    }
137}
138
139
140#[cfg(test)]
141mod test {
142    use super::*;
143    use itertools::Itertools;
144
145    #[test]
146    pub fn indexed_rle_can_append_accepts_valid_runs() {
147        let max_start: usize = 1000;
148        for start in 0..=max_start {
149            for after in (start + 1)..=(max_start + 1) {
150                let lower_rle: IndexedRLE<bool> = IndexedRLE::new(true, start);
151                let higher_matching_rle: IndexedRLE<bool> = IndexedRLE::new(true, after);
152                let higher_nonmatching_rle: IndexedRLE<bool> = IndexedRLE::new(false, after);
153
154                assert!(lower_rle.can_append(&lower_rle), "Valid self append failed: {start} = {after}");
155                assert!(lower_rle.can_append(&higher_matching_rle), "Valid append failed: {start} < {after}");
156                assert!(!lower_rle.can_append(&higher_nonmatching_rle), "Non-matching token append succeeded");
157                assert!(!higher_matching_rle.can_append(&lower_rle), "Out-of-order matching token append succeeded: {start} < {after}");
158                assert!(!higher_nonmatching_rle.can_append(&lower_rle), "Out-of-order non-matching token append succeeded");
159            }
160        }
161    }
162
163    #[test]
164    pub fn binary_search_rle_append_run_merges_properly() {
165        let mut rle_data = BinarySearchRLE::<bool>::new();
166
167        let start = 10;
168        let after = 20;
169
170        let lower_rle: IndexedRLE<bool> = IndexedRLE::new(true, start);
171        let higher_matching_rle: IndexedRLE<bool> = IndexedRLE::new(true, after);
172        let higher_nonmatching_rle: IndexedRLE<bool> = IndexedRLE::new(false, after);
173
174        assert_eq!(rle_data.num_runs(), 0); // There should be no runs before we've started anything
175
176        // Add the initial run
177        rle_data.append_run(lower_rle);
178        assert_eq!(rle_data.num_runs(), 1);
179
180        // Test that appending a matching token run doesn't increase the length of the internal
181        // data vector
182        rle_data.append_run(higher_matching_rle);
183        assert_eq!(rle_data.num_runs(), 1); // We appended a matching run, so there should not be an increase in the total number of runs
184
185        // Test that appending a non-matching token run increases the length of the internal data
186        // vector
187        rle_data.append_run(higher_nonmatching_rle);
188        assert_eq!(rle_data.num_runs(), 2); // We appended a non-matching run, so there should be an increase in the total number of runs
189    }
190
191    #[test]
192    pub fn binary_search_rle_append_token_merges_properly() {
193        let mut rle_data = BinarySearchRLE::<bool>::new();
194
195        let start = 10;
196        let after = 20;
197
198        let matching_token = true;
199        let nonmatching_token = false;
200
201        let lower_rle: IndexedRLE<bool> = IndexedRLE::new(true, start);
202
203        assert_eq!(rle_data.num_runs(), 0); // There should be no runs before we've started anything
204
205        // Add the initial run
206        rle_data.append_run(lower_rle);
207        assert_eq!(rle_data.num_runs(), 1);
208
209        // Test that appending a matching token doesn't increase the length of the internal
210        // data vector
211        rle_data.append_token(matching_token, after);
212        assert_eq!(rle_data.num_runs(), 1); // We appended a matching token, so there should not be an increase in the total number of runs
213
214        // Test that appending a non-matching token increases the length of the internal data
215        // vector
216        rle_data.append_token(nonmatching_token, after);
217        assert_eq!(rle_data.num_runs(), 2); // We appended a non-matching token, so there should be an increase in the total number of runs
218    }
219
220    #[test]
221    pub fn binary_search_rle_find_token_at_index_returns_none_when_empty() {
222        let mut rle_data = BinarySearchRLE::<bool>::new();
223        assert_eq!(rle_data.find_token_at_index(0), None);
224    }
225
226    #[test]
227    pub fn binary_search_rle_find_token_at_index_returns_none_for_index_before_first_run() {
228        let mut rle_data = BinarySearchRLE::<bool>::new();
229        let rle: IndexedRLE<bool> = IndexedRLE::new(true, 10);
230        rle_data.append_run(rle);
231        assert_eq!(rle_data.num_runs(), 1);
232        assert_eq!(rle_data.find_token_at_index(5), None);
233    }
234
235    #[test]
236    pub fn binary_search_rle_find_token_at_index_works() {
237        let mut rle_data = BinarySearchRLE::<bool>::new();
238
239        let (first_start, first_len) = (10, 4);
240        let (second_start, second_len) = ((first_start + first_len), 13);
241        let (third_start, third_len) = ((second_start + second_len), 2);
242        let (fourth_start, fourth_len) = ((third_start + third_len), 1);
243        let (fifth_start, fifth_len) = ((fourth_start + fourth_len), 21);
244
245        let first_end_inclusive = second_start - 1;
246        let second_end_inclusive = third_start - 1;
247        let third_end_inclusive = fourth_start - 1;
248        let fourth_end_inclusive = fifth_start - 1;
249        let fifth_end_inclusive = fifth_start + fifth_len;
250
251        let first_rle: IndexedRLE<bool> = IndexedRLE::new(true, first_start);
252        let second_rle: IndexedRLE<bool> = IndexedRLE::new(false, second_start);
253        let third_rle: IndexedRLE<bool> = IndexedRLE::new(true, third_start);
254        let fourth_rle: IndexedRLE<bool> = IndexedRLE::new(false, fourth_start);
255        let fifth_rle: IndexedRLE<bool> = IndexedRLE::new(true, fifth_start);
256
257        let true_token_ranges: Vec<(usize, usize)> = vec!((first_start, second_start), (third_start, fourth_start), (fifth_start, fifth_end_inclusive+1));
258        let false_token_ranges: Vec<(usize, usize)> = vec!((second_start, third_start), (fourth_start, fifth_start));
259
260        println!("First range inclusive: {first_start} - {first_end_inclusive}");
261        println!("Second range inclusive: {second_start} - {second_end_inclusive}");
262        println!("Third range inclusive: {third_start} - {third_end_inclusive}");
263        println!("Fourth range inclusive: {fourth_start} - {fourth_end_inclusive}");
264        println!("Fifth range inclusive: {fifth_start} - {fifth_end_inclusive}");
265
266        assert_eq!(rle_data.num_runs(), 0); // There should be no runs before we've started anything
267
268        // Add the runs
269        rle_data.append_run(first_rle);
270        rle_data.append_run(second_rle);
271        rle_data.append_run(third_rle);
272        rle_data.append_run(fourth_rle);
273        rle_data.append_run(fifth_rle);
274        assert_eq!(rle_data.num_runs(), 5);
275
276        // Validate that the known token ranges match
277        for (range_start, range_end) in true_token_ranges {
278            for token_index in range_start..range_end {
279                assert_eq!(rle_data.find_token_at_index(token_index), Some(true), "Token index {token_index} expected to be true");
280            }
281        }
282
283        for (range_start, range_end) in false_token_ranges {
284            for token_index in range_start..range_end {
285                assert_eq!(rle_data.find_token_at_index(token_index), Some(false), "Token index {token_index} expected to be false");
286            }
287        }
288    }
289}