1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
//! An indirection-collapsing container that generalizes [nested](https://crates.io/crates/nested).
//!
//! A `FlatVec` can be used like a `Vec<String>` or `Vec<Vec<u8>>`, but with a maximum of 2 heap
//! allocations instead of n + 1.
//!
//! Insertion into and retrieval from a `FlatVec` is mediated by two traits, `IntoFlat` and
//! `FromFlat`, which are both parameterized on two types. The simplest way to use this crate is to
//! `impl IntoFlat<T> for T` and `impl FromFlat<'_, T> for T` for some type `T` in your crate.
//!
//! But in general, the interface is `impl IntoFlat<Flattened> for Source` and `impl FromFlat<'a,
//! Dest> for Flattened`. An owned or borrowed type `Source` is effectively serialized into a
//! `FlatVec`, then an instance of a type that defines its ability to be constructed from a
//! `FlatVec<Flattened>` can be produced, possibly borrowing the data in the `FlatVec` to return a
//! specialized type that does not copy the data stored in the `FlatVec` to present a view of it.
//!
//! This interface is extremely powerful and essentially amounts to in-memory serialization and
//! conversion all in one. For example, a user can construct a `FlatVec` that compreses all of its
//! elements with gzip. I'm not saying that's a good idea. But you can.

#![forbid(unsafe_code)]

use std::marker::PhantomData;

/// An indirection-collapsing container with minimal allocation
#[derive(Clone)]
pub struct FlatVec<T> {
    data: Vec<u8>,
    ends: Vec<usize>,
    marker: PhantomData<T>,
}

impl<T> std::fmt::Debug for FlatVec<T> {
    fn fmt(&self, fmt: &mut std::fmt::Formatter) -> std::fmt::Result {
        fmt.debug_struct("FlatVec")
            .field("data", &self.data)
            .field("ends", &self.ends)
            .finish()
    }
}

impl<T> Default for FlatVec<T> {
    #[inline]
    fn default() -> Self {
        Self {
            data: Vec::new(),
            ends: Vec::new(),
            marker: PhantomData::default(),
        }
    }
}

impl<'a, T: 'a> FlatVec<T> {
    /// Create a new `FlatVec`
    #[inline]
    pub fn new() -> Self {
        Self::default()
    }

    /// Returns the number of `T` in a `FlatVec<T>`
    #[inline]
    pub fn len(&self) -> usize {
        self.ends.len()
    }

    /// Returns the number heap bytes used to store the elements of a `FlatVec`, but not the bytes
    /// used to store the indices
    #[inline]
    pub fn data_len(&self) -> usize {
        self.data.len()
    }

    /// Returns true if the len is 0
    #[inline]
    pub fn is_empty(&self) -> bool {
        self.ends.len() == 0
    }

    /// Removes the `index`th element of a `FlatVec`.
    /// This function is `O(self.len() + self.data_len())`
    #[inline]
    pub fn remove(&mut self, index: usize) {
        let end = self.ends[index];
        let start = if index == 0 { 0 } else { self.ends[index - 1] };
        self.data.drain(start..end);
        self.ends.remove(index);
        let len = end - start;
        self.ends.iter_mut().skip(index).for_each(|end| *end -= len);
    }

    /// Appends an element to the back of the collection
    #[inline]
    pub fn push<Source>(&mut self, input: Source)
    where
        Source: IntoFlat<T>,
    {
        input.into_flat(&mut Storage(&mut self.data));
        self.ends.push(self.data.len());
    }

    /// Construct a `Dest` from the `index`th element's stored representation
    #[inline]
    pub fn get<Dest: 'a>(&'a self, index: usize) -> Option<Dest>
    where
        Dest: FromFlat<'a, T>,
    {
        let end = *self.ends.get(index)?;
        let start = if index == 0 { 0 } else { self.ends[index - 1] };
        Some(Dest::from_flat(&self.data[start..end]))
    }

    /// Returns an iterator that constructs a `Dest` from each element's stored representation
    #[inline]
    pub fn iter<Dest: 'a>(&'a self) -> impl Iterator<Item = Dest> + 'a
    where
        Dest: FromFlat<'a, T>,
    {
        std::iter::once(&0usize)
            .chain(self.ends.iter())
            .zip(self.ends.iter())
            .map(move |(&start, &end)| Dest::from_flat(&self.data[start..end]))
    }
}

/// A wrapper over the innards of a `FlatVec` which exposes mutating operations which cannot
/// corrupt other elements during a push
pub struct Storage<'a>(&'a mut Vec<u8>);

impl Storage<'_> {
    /// Reserves capacity for at least `len` additional bytes
    #[inline]
    pub fn reserve(&mut self, len: usize) {
        self.0.reserve(len);
    }

    /// Inserts the bytes described by `iter`
    #[inline]
    pub fn extend<Iter, T>(&mut self, iter: Iter)
    where
        Iter: IntoIterator<Item = T>,
        T: std::borrow::Borrow<u8>,
    {
        self.0.extend(iter.into_iter().map(|b| *b.borrow()));
    }
}

/// Implement `IntoFlat<Flattened> for Source` to insert a `Source` into a `FlatVec<Flattened>`
pub trait IntoFlat<Flattened> {
    fn into_flat(self, storage: &mut Storage);
}

/// Implement `FromFlat<'a, Flattened> for Dest` to get a `Dest` from a `FlatVec<Flattened>`
pub trait FromFlat<'a, Flattened> {
    fn from_flat(data: &'a [u8]) -> Self;
}

impl IntoFlat<String> for String {
    #[inline]
    fn into_flat(self, store: &mut Storage) {
        store.extend(self.bytes());
    }
}

impl FromFlat<'_, String> for String {
    #[inline]
    fn from_flat(data: &[u8]) -> Self {
        String::from_utf8(data.to_vec()).unwrap()
    }
}

impl IntoFlat<String> for &str {
    #[inline]
    fn into_flat(self, store: &mut Storage) {
        store.extend(self.bytes());
    }
}

impl<'a> FromFlat<'a, String> for &'a str {
    #[inline]
    fn from_flat(data: &'a [u8]) -> &'a str {
        std::str::from_utf8(&data).unwrap()
    }
}

impl<Iter, T> IntoFlat<Vec<u8>> for Iter
where
    Iter: IntoIterator<Item = T>,
    T: std::borrow::Borrow<u8>,
{
    #[inline]
    fn into_flat(self, store: &mut Storage) {
        store.extend(self.into_iter());
    }
}

impl FromFlat<'_, Vec<u8>> for Vec<u8> {
    #[inline]
    fn from_flat(data: &[u8]) -> Vec<u8> {
        data.to_vec()
    }
}

impl<'a> FromFlat<'a, Vec<u8>> for &'a [u8] {
    #[inline]
    fn from_flat(data: &'a [u8]) -> &'a [u8] {
        data
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn push_get() {
        let mut names = FlatVec::new();
        names.push("Cerryl");
        names.push("Jeslek".to_string());
        assert_eq!(names.get(0), Some("Cerryl"));
        assert_eq!(names.get(1), Some("Jeslek"));
        assert_eq!(names.get::<String>(2), None);
    }

    #[test]
    fn iter() {
        let mut names = FlatVec::new();
        names.push("Cerryl".to_string());
        names.push("Jeslek".to_string());
        let as_vec = names.iter::<String>().collect::<Vec<_>>();
        assert_eq!(as_vec, vec!["Cerryl".to_string(), "Jeslek".to_string()]);
    }
}