assembly_data/fdb/store/
mod.rs

1//! # Arena-Store & Writer
2//!
3//! This module contains a (currently write-only) structure that represents a complete
4//! FDB file. This structure can be used to create new FDB files.
5//!
6//! ## Usage
7//!
8//! ```
9//! use assembly_data::fdb::{
10//!     common::{ValueType, Latin1String},
11//!     core::{Field},
12//!     store::{Database, Table},
13//! };
14//!
15//! // Create a new database
16//! let mut db = Database::new();
17//!
18//! // Create a table
19//! let mut table = Table::new(16);
20//!
21//! // Add columns to the table
22//! table.push_column(Latin1String::encode("ID"), ValueType::Integer);
23//!
24//! // Add data to the table
25//! table.push_row(1, &[Field::Integer(1)]);
26//! table.push_row(2, &[Field::Integer(2)]);
27//! table.push_row(5, &[Field::Integer(5)]);
28//! table.push_row(6, &[Field::Integer(6)]);
29//!
30//! // Add table to the database
31//! db.push_table(Latin1String::encode("Table"), table);
32//!
33//! // Write the database to a type that implements [`std::io::Write`]
34//! let mut out: Vec<u8> = Vec::new();
35//! db.write(&mut out).expect("success");
36//! ```
37
38use std::{
39    collections::BTreeMap,
40    convert::{TryFrom, TryInto},
41    io,
42    mem::size_of,
43};
44
45use self::writer::WriteLE;
46
47use super::{
48    common::{Context, Latin1Str, Latin1String, Value, ValueMapperMut, ValueType},
49    core::OwnedContext,
50    file::{
51        ArrayHeader, FDBBucketHeader, FDBColumnHeader, FDBFieldData, FDBHeader, FDBRowHeader,
52        FDBRowHeaderListEntry, FDBTableDataHeader, FDBTableDefHeader, FDBTableHeader,
53    },
54};
55
56mod writer;
57
58#[cfg(test)]
59mod tests;
60
61/// The whole database
62pub struct Database {
63    tables: BTreeMap<Latin1String, Table>,
64}
65
66impl Default for Database {
67    fn default() -> Self {
68        Self::new()
69    }
70}
71
72impl Database {
73    /// Create a new database
74    pub fn new() -> Self {
75        Self {
76            tables: BTreeMap::new(),
77        }
78    }
79
80    /// Push a table to the database
81    pub fn push_table<S>(&mut self, name: S, table: Table)
82    where
83        S: Into<Latin1String>,
84    {
85        self.tables.insert(name.into(), table);
86    }
87
88    /// Computes the size of the serialized database
89    pub fn compute_size(&self) -> usize {
90        let table_size: usize = self
91            .tables
92            .iter()
93            .map(|(n, t)| t.compute_size(n))
94            .map(|x| x.def + x.data)
95            .sum();
96        8 // FDBHeader
97        + table_size
98    }
99
100    /// Write the database to an output stream
101    pub fn write<O: io::Write>(&self, out: &mut O) -> io::Result<()> {
102        let base_offset = 8;
103        let count = self
104            .tables
105            .len()
106            .try_into()
107            .expect("tables.len() does not fit in u32");
108        let header = FDBHeader {
109            tables: ArrayHeader { base_offset, count },
110        };
111        header.tables.write_le(out)?;
112        let len_vec: Vec<_> = self.tables.iter().map(|(n, t)| t.compute_size(n)).collect();
113        let mut start_vec = Vec::with_capacity(self.tables.len());
114        let table_list_base = base_offset + count * size_of::<FDBTableHeader>() as u32;
115        let mut start = table_list_base;
116        for len in len_vec.iter().copied() {
117            start_vec.push(start);
118            Table::write_header(&mut start, &len, out)?;
119        }
120
121        let mut start = table_list_base;
122        for (table_name, table) in &self.tables {
123            start = table.write(table_name, start, out)?;
124        }
125
126        Ok(())
127    }
128}
129
130#[derive(Debug, Copy, Clone)]
131struct TableSize {
132    def: usize,
133    data: usize,
134}
135
136/// A single table
137pub struct Table {
138    columns: Vec<Column>,
139    strings: StringArena,
140    i64s: Vec<i64>,
141    buckets: Vec<Bucket>,
142    rows: Vec<Row>,
143    fields: Vec<Field>,
144}
145
146type StringArena = BTreeMap<usize, Vec<Latin1String>>;
147
148struct StoreMapper<'t> {
149    strings: &'t mut StringArena,
150    i64s: &'t mut Vec<i64>,
151}
152
153impl<'t> ValueMapperMut<OwnedContext, StoreContext> for StoreMapper<'t> {
154    fn map_string(&mut self, from: &String) -> TextRef {
155        let s = Latin1String::encode(from).into_owned();
156        let lkey = s.req_buf_len();
157        let lstrings = self.strings.entry(lkey).or_default();
158        let inner = /*if let Some(index) = lstrings.iter().position(|p| s == *p) {
159            index
160        } else */{
161            let len = lstrings.len();
162            lstrings.push(s);
163            len
164        };
165        TextRef { outer: lkey, inner }
166    }
167
168    fn map_i64(&mut self, from: &i64) -> I64Ref {
169        let index = self.i64s.len();
170        self.i64s.push(*from);
171        I64Ref { index }
172    }
173
174    fn map_xml(&mut self, from: &String) -> TextRef {
175        self.map_string(from)
176    }
177}
178
179impl Table {
180    /// Creates a new table
181    pub fn new(bucket_count: usize) -> Self {
182        Table {
183            buckets: vec![
184                Bucket {
185                    first_row_last: None
186                };
187                bucket_count
188            ],
189            columns: vec![],
190            fields: vec![],
191            strings: BTreeMap::new(),
192            rows: vec![],
193            i64s: vec![],
194        }
195    }
196
197    /// Get all columns
198    pub fn columns(&self) -> &[Column] {
199        &self.columns
200    }
201
202    /// Add a column to this table
203    pub fn push_column<S>(&mut self, name: S, data_type: ValueType)
204    where
205        S: Into<Latin1String>,
206    {
207        self.columns.push(Column {
208            data_type,
209            name: name.into(),
210        })
211    }
212
213    /// Push a row into this table
214    pub fn push_row(&mut self, pk: usize, fields: &[super::core::Field]) {
215        let first_field_index = self.fields.len();
216        let row = self.rows.len();
217
218        // find out where to place it
219        let bucket_index = pk % self.buckets.len();
220        let bucket = &mut self.buckets[bucket_index];
221
222        // Add to linked list
223        if let Some((_, last)) = &mut bucket.first_row_last {
224            self.rows[*last].next_row = Some(row);
225            *last = row;
226        } else {
227            bucket.first_row_last = Some((row, row))
228        }
229
230        self.rows.push(Row {
231            first_field_index,
232            count: fields.len().try_into().unwrap(),
233            next_row: None,
234        });
235
236        let mut mapper = StoreMapper {
237            strings: &mut self.strings,
238            i64s: &mut self.i64s,
239        };
240        for field in fields {
241            self.fields.push(field.map(&mut mapper));
242        }
243    }
244
245    fn write_header<IO: io::Write>(
246        start: &mut u32,
247        len: &TableSize,
248        out: &mut IO,
249    ) -> io::Result<()> {
250        let table_def_header_addr = *start;
251        let table_data_header_addr = *start + u32::try_from(len.def).unwrap();
252
253        FDBTableHeader {
254            table_def_header_addr,
255            table_data_header_addr,
256        }
257        .write_le(out)?;
258
259        *start = table_data_header_addr + u32::try_from(len.data).unwrap();
260        Ok(())
261    }
262
263    fn write<IO: io::Write>(
264        &self,
265        table_name: &Latin1Str,
266        start: u32,
267        out: &mut IO,
268    ) -> io::Result<u32> {
269        // Serialize table definition
270        let column_count = self.columns.len().try_into().unwrap();
271        let column_header_list_addr = start + size_of::<FDBTableDefHeader>() as u32;
272        let table_name_addr =
273            column_header_list_addr + size_of::<FDBColumnHeader>() as u32 * column_count;
274
275        FDBTableDefHeader {
276            column_count,
277            table_name_addr,
278            column_header_list_addr,
279        }
280        .write_le(out)?;
281
282        let mut column_name_addr = table_name_addr + (table_name.req_buf_len() as u32 * 4);
283        for column in &self.columns {
284            FDBColumnHeader {
285                column_data_type: column.data_type.into(),
286                column_name_addr,
287            }
288            .write_le(out)?;
289            column_name_addr += column.name.req_buf_len() as u32 * 4;
290        }
291
292        table_name.write_le(out)?;
293        for column in &self.columns {
294            column.name.write_le(out)?;
295        }
296
297        // Serialize table data
298        let bucket_base_offset = column_name_addr + size_of::<FDBTableDataHeader>() as u32;
299        let bucket_count = self.buckets.len().try_into().unwrap();
300
301        FDBTableDataHeader {
302            buckets: ArrayHeader {
303                count: bucket_count,
304                base_offset: bucket_base_offset,
305            },
306        }
307        .write_le(out)?;
308
309        let row_header_list_base =
310            bucket_base_offset + bucket_count * size_of::<FDBBucketHeader>() as u32;
311
312        let map_row_entry =
313            &|index| row_header_list_base + (index * size_of::<FDBRowHeaderListEntry>()) as u32;
314
315        for bucket in &self.buckets {
316            let row_header_list_head_addr = bucket
317                .first_row_last
318                .map(|(first, _)| first)
319                .map(map_row_entry)
320                .unwrap_or(0xffffffff);
321
322            FDBBucketHeader {
323                row_header_list_head_addr,
324            }
325            .write_le(out)?;
326        }
327
328        let row_count: u32 = self.rows.len().try_into().unwrap();
329        let row_header_base =
330            row_header_list_base + row_count * size_of::<FDBRowHeaderListEntry>() as u32;
331
332        for (index, row) in self.rows.iter().enumerate() {
333            let row_header_addr = row_header_base + (index * size_of::<FDBRowHeader>()) as u32;
334            let row_header_list_next_addr = row.next_row.map(map_row_entry).unwrap_or(0xffffffff);
335            FDBRowHeaderListEntry {
336                row_header_addr,
337                row_header_list_next_addr,
338            }
339            .write_le(out)?;
340        }
341
342        let field_base_offset = row_header_base + row_count * size_of::<FDBRowHeader>() as u32;
343
344        for row in &self.rows {
345            let fields = ArrayHeader {
346                base_offset: field_base_offset
347                    + (row.first_field_index * size_of::<FDBFieldData>()) as u32,
348                count: row.count,
349            };
350            FDBRowHeader { fields }.write_le(out)?;
351        }
352
353        let i64s_base_offset =
354            field_base_offset + (self.fields.len() * size_of::<FDBFieldData>()) as u32;
355        let strings_base_offset = i64s_base_offset + (self.i64s.len() * size_of::<u64>()) as u32;
356
357        let mut string_len_base = strings_base_offset;
358        let mut string_len_offsets = BTreeMap::new();
359        for (&key, value) in &self.strings {
360            let string_len = key * 4;
361            string_len_offsets.insert(key, string_len_base);
362            string_len_base += (string_len * value.len()) as u32;
363        }
364
365        const TRUE_LE32: [u8; 4] = [1, 0, 0, 0];
366        const FALSE_LE32: [u8; 4] = [0, 0, 0, 0];
367
368        for field in &self.fields {
369            let (data_type, value) = match field {
370                Field::Nothing => (0, [0; 4]),
371                Field::Integer(i) => (1, i.to_le_bytes()),
372                Field::Float(f) => (3, f.to_le_bytes()),
373                Field::Text(TextRef { outer, inner }) => (4, {
374                    let v = string_len_offsets.get(outer).unwrap() + (inner * outer * 4) as u32;
375                    v.to_le_bytes()
376                }),
377                Field::Boolean(b) => (5, if *b { TRUE_LE32 } else { FALSE_LE32 }),
378                Field::BigInt(i64_ref) => (6, {
379                    let v = i64s_base_offset + (i64_ref.index * size_of::<u64>()) as u32;
380                    v.to_le_bytes()
381                }),
382                Field::VarChar(text_ref) => (8, {
383                    let v = string_len_offsets.get(&text_ref.outer).unwrap()
384                        + (text_ref.inner * text_ref.outer * 4) as u32;
385                    v.to_le_bytes()
386                }),
387            };
388            FDBFieldData { data_type, value }.write_le(out)?;
389        }
390
391        // Write out all i64s
392        for &num in &self.i64s {
393            out.write_all(&num.to_le_bytes())?;
394        }
395
396        // Write out all strings
397        for value in self.strings.values() {
398            for string in value {
399                string.write_le(out)?;
400            }
401        }
402
403        // Increment final offset
404        Ok(string_len_base)
405    }
406
407    fn compute_def_size(&self, name: &Latin1Str) -> usize {
408        size_of::<FDBTableDefHeader>()
409            + name.req_buf_len() * 4
410            + size_of::<FDBColumnHeader>() * self.columns.len()
411            + self
412                .columns
413                .iter()
414                .map(|c| c.name.req_buf_len())
415                .sum::<usize>()
416                * 4
417    }
418
419    fn compute_data_size(&self) -> usize {
420        let string_size: usize = self.strings.iter().map(|(k, v)| k * v.len()).sum(); // Strings
421        size_of::<FDBTableDataHeader>()
422            + size_of::<FDBBucketHeader>() * self.buckets.len()
423            + size_of::<FDBRowHeaderListEntry>() * self.rows.len()
424            + size_of::<FDBRowHeader>() * self.rows.len()
425            + size_of::<FDBFieldData>() * self.fields.len()
426            + 4 * string_size
427            + size_of::<u64>() * self.i64s.len()
428    }
429
430    fn compute_size(&self, name: &Latin1Str) -> TableSize {
431        TableSize {
432            def: self.compute_def_size(name),
433            data: self.compute_data_size(),
434        }
435    }
436}
437
438/// A single column
439pub struct Column {
440    name: Latin1String,
441    data_type: ValueType,
442}
443
444impl Column {
445    /// Get the data type of this column
446    pub fn value_type(&self) -> ValueType {
447        self.data_type
448    }
449}
450
451/// A single bucket
452#[derive(Debug, Copy, Clone)]
453struct Bucket {
454    first_row_last: Option<(usize, usize)>,
455}
456
457/// A single row
458struct Row {
459    first_field_index: usize,
460    count: u32,
461    next_row: Option<usize>,
462}
463
464/// The [`Context`] for this modules [`Field`]
465struct StoreContext;
466
467/// Reference to an arena allocated string
468struct TextRef {
469    /// The length-key of the string
470    outer: usize,
471    /// The index in the strings array
472    inner: usize,
473}
474
475/// Reference to an arena allocated i64
476struct I64Ref {
477    /// The offset of the value
478    index: usize,
479}
480
481impl Context for StoreContext {
482    type String = TextRef;
483    type I64 = I64Ref;
484    type XML = TextRef;
485}
486
487type Field = Value<StoreContext>;