assembly_fdb/store/
mod.rs

1//! # Arena-Store & Writer
2//!
3//! This module contains a (currently write-only) structure that represents a complete
4//! FDB file. This structure can be used to create new FDB files.
5//!
6//! ## Usage
7//!
8//! ```
9//! use latin1str::Latin1String;
10//! use assembly_fdb::{
11//!     value::{ValueType, owned::{Field}},
12//!     store::{Database, Table},
13//! };
14//!
15//! // Create a new database
16//! let mut db = Database::new();
17//!
18//! // Create a table
19//! let mut table = Table::new(16);
20//!
21//! // Add columns to the table
22//! table.push_column(Latin1String::encode("ID"), ValueType::Integer);
23//!
24//! // Add data to the table
25//! table.push_row(1, &[Field::Integer(1)]);
26//! table.push_row(2, &[Field::Integer(2)]);
27//! table.push_row(5, &[Field::Integer(5)]);
28//! table.push_row(6, &[Field::Integer(6)]);
29//!
30//! // Add table to the database
31//! db.push_table(Latin1String::encode("Table"), table);
32//!
33//! // Write the database to a type that implements [`std::io::Write`]
34//! let mut out: Vec<u8> = Vec::new();
35//! db.write(&mut out).expect("success");
36//! ```
37
38use crate::io::write::WriteLE;
39use assembly_fdb_core::{
40    file::{
41        ArrayHeader, FDBBucketHeader, FDBColumnHeader, FDBFieldData, FDBHeader, FDBRowHeader,
42        FDBRowHeaderListEntry, FDBTableDataHeader, FDBTableDefHeader, FDBTableHeader,
43    },
44    value::{owned::OwnedContext, Context, Value, ValueMapperMut, ValueType},
45};
46use latin1str::{Latin1Str, Latin1String};
47use std::{
48    collections::BTreeMap,
49    convert::{TryFrom, TryInto},
50    io,
51    mem::size_of,
52};
53
54#[cfg(test)]
55mod tests;
56
57/// Calculates the number of 4-byte units that are needed to store
58/// this string with at least one null terminator.
59fn req_buf_len(s: &Latin1Str) -> usize {
60    s.len() / 4 + 1
61}
62
63/// The whole database
64pub struct Database {
65    tables: BTreeMap<Latin1String, Table>,
66}
67
68impl Default for Database {
69    fn default() -> Self {
70        Self::new()
71    }
72}
73
74impl Database {
75    /// Create a new database
76    pub fn new() -> Self {
77        Self {
78            tables: BTreeMap::new(),
79        }
80    }
81
82    /// Push a table to the database
83    pub fn push_table<S>(&mut self, name: S, table: Table)
84    where
85        S: Into<Latin1String>,
86    {
87        self.tables.insert(name.into(), table);
88    }
89
90    /// Computes the size of the serialized database
91    pub fn compute_size(&self) -> usize {
92        let table_size: usize = self
93            .tables
94            .iter()
95            .map(|(n, t)| t.compute_size(n))
96            .map(|x| x.def + x.data)
97            .sum();
98        8 // FDBHeader
99        + table_size
100    }
101
102    /// Write the database to an output stream
103    pub fn write<O: io::Write>(&self, out: &mut O) -> io::Result<()> {
104        let base_offset = 8;
105        let count = self
106            .tables
107            .len()
108            .try_into()
109            .expect("tables.len() does not fit in u32");
110        let header = FDBHeader {
111            tables: ArrayHeader { base_offset, count },
112        };
113        header.tables.write_le(out)?;
114        let len_vec: Vec<_> = self.tables.iter().map(|(n, t)| t.compute_size(n)).collect();
115        let mut start_vec = Vec::with_capacity(self.tables.len());
116        let table_list_base = base_offset + count * size_of::<FDBTableHeader>() as u32;
117        let mut start = table_list_base;
118        for len in len_vec.iter() {
119            start_vec.push(start);
120            Table::write_header(&mut start, len, out)?;
121        }
122
123        let mut start = table_list_base;
124        for (table_name, table) in &self.tables {
125            start = table.write(table_name, start, out)?;
126        }
127
128        Ok(())
129    }
130}
131
132#[derive(Debug, Copy, Clone)]
133struct TableSize {
134    def: usize,
135    data: usize,
136}
137
138/// A single table
139pub struct Table {
140    columns: Vec<Column>,
141    strings: StringArena,
142    i64s: Vec<i64>,
143    buckets: Vec<Bucket>,
144    rows: Vec<Row>,
145    fields: Vec<Field>,
146}
147
148type StringArena = BTreeMap<usize, Vec<Latin1String>>;
149
150struct StoreMapper<'t> {
151    strings: &'t mut StringArena,
152    i64s: &'t mut Vec<i64>,
153}
154
155impl<'t> ValueMapperMut<OwnedContext, StoreContext> for StoreMapper<'t> {
156    fn map_string(&mut self, from: &String) -> TextRef {
157        let s = Latin1String::encode(from).into_owned();
158        let lkey = req_buf_len(&s);
159        let lstrings = self.strings.entry(lkey).or_default();
160        let inner = /*if let Some(index) = lstrings.iter().position(|p| s == *p) {
161            index
162        } else */{
163            let len = lstrings.len();
164            lstrings.push(s);
165            len
166        };
167        TextRef { outer: lkey, inner }
168    }
169
170    fn map_i64(&mut self, from: &i64) -> I64Ref {
171        let index = self.i64s.len();
172        self.i64s.push(*from);
173        I64Ref { index }
174    }
175
176    fn map_xml(&mut self, from: &String) -> TextRef {
177        self.map_string(from)
178    }
179}
180
181impl Table {
182    /// Creates a new table
183    pub fn new(bucket_count: usize) -> Self {
184        Table {
185            buckets: vec![
186                Bucket {
187                    first_row_last: None
188                };
189                bucket_count
190            ],
191            columns: vec![],
192            fields: vec![],
193            strings: BTreeMap::new(),
194            rows: vec![],
195            i64s: vec![],
196        }
197    }
198
199    /// Get all columns
200    pub fn columns(&self) -> &[Column] {
201        &self.columns
202    }
203
204    /// Add a column to this table
205    pub fn push_column<S>(&mut self, name: S, data_type: ValueType)
206    where
207        S: Into<Latin1String>,
208    {
209        self.columns.push(Column {
210            data_type,
211            name: name.into(),
212        })
213    }
214
215    /// Push a row into this table
216    pub fn push_row(&mut self, pk: usize, fields: &[crate::value::owned::Field]) {
217        let first_field_index = self.fields.len();
218        let row = self.rows.len();
219
220        // find out where to place it
221        let bucket_index = pk % self.buckets.len();
222        let bucket = &mut self.buckets[bucket_index];
223
224        // Add to linked list
225        if let Some((_, last)) = &mut bucket.first_row_last {
226            self.rows[*last].next_row = Some(row);
227            *last = row;
228        } else {
229            bucket.first_row_last = Some((row, row))
230        }
231
232        self.rows.push(Row {
233            first_field_index,
234            count: fields.len().try_into().unwrap(),
235            next_row: None,
236        });
237
238        let mut mapper = StoreMapper {
239            strings: &mut self.strings,
240            i64s: &mut self.i64s,
241        };
242        for field in fields {
243            self.fields.push(field.map(&mut mapper));
244        }
245    }
246
247    fn write_header<IO: io::Write>(
248        start: &mut u32,
249        len: &TableSize,
250        out: &mut IO,
251    ) -> io::Result<()> {
252        let table_def_header_addr = *start;
253        let table_data_header_addr = *start + u32::try_from(len.def).unwrap();
254
255        FDBTableHeader {
256            table_def_header_addr,
257            table_data_header_addr,
258        }
259        .write_le(out)?;
260
261        *start = table_data_header_addr + u32::try_from(len.data).unwrap();
262        Ok(())
263    }
264
265    fn write<IO: io::Write>(
266        &self,
267        table_name: &Latin1Str,
268        start: u32,
269        out: &mut IO,
270    ) -> io::Result<u32> {
271        // Serialize table definition
272        let column_count = self.columns.len().try_into().unwrap();
273        let column_header_list_addr = start + size_of::<FDBTableDefHeader>() as u32;
274        let table_name_addr =
275            column_header_list_addr + size_of::<FDBColumnHeader>() as u32 * column_count;
276
277        FDBTableDefHeader {
278            column_count,
279            table_name_addr,
280            column_header_list_addr,
281        }
282        .write_le(out)?;
283
284        let mut column_name_addr = table_name_addr + (req_buf_len(table_name) as u32 * 4);
285        for column in &self.columns {
286            FDBColumnHeader {
287                column_data_type: column.data_type.into(),
288                column_name_addr,
289            }
290            .write_le(out)?;
291            column_name_addr += req_buf_len(&column.name) as u32 * 4;
292        }
293
294        table_name.write_le(out)?;
295        for column in &self.columns {
296            column.name.write_le(out)?;
297        }
298
299        // Serialize table data
300        let bucket_base_offset = column_name_addr + size_of::<FDBTableDataHeader>() as u32;
301        let bucket_count = self.buckets.len().try_into().unwrap();
302
303        FDBTableDataHeader {
304            buckets: ArrayHeader {
305                count: bucket_count,
306                base_offset: bucket_base_offset,
307            },
308        }
309        .write_le(out)?;
310
311        let row_header_list_base =
312            bucket_base_offset + bucket_count * size_of::<FDBBucketHeader>() as u32;
313
314        let map_row_entry =
315            &|index| row_header_list_base + (index * size_of::<FDBRowHeaderListEntry>()) as u32;
316
317        for bucket in &self.buckets {
318            let row_header_list_head_addr = bucket
319                .first_row_last
320                .map(|(first, _)| first)
321                .map(map_row_entry)
322                .unwrap_or(0xffffffff);
323
324            FDBBucketHeader {
325                row_header_list_head_addr,
326            }
327            .write_le(out)?;
328        }
329
330        let row_count: u32 = self.rows.len().try_into().unwrap();
331        let row_header_base =
332            row_header_list_base + row_count * size_of::<FDBRowHeaderListEntry>() as u32;
333
334        for (index, row) in self.rows.iter().enumerate() {
335            let row_header_addr = row_header_base + (index * size_of::<FDBRowHeader>()) as u32;
336            let row_header_list_next_addr = row.next_row.map(map_row_entry).unwrap_or(0xffffffff);
337            FDBRowHeaderListEntry {
338                row_header_addr,
339                row_header_list_next_addr,
340            }
341            .write_le(out)?;
342        }
343
344        let field_base_offset = row_header_base + row_count * size_of::<FDBRowHeader>() as u32;
345
346        for row in &self.rows {
347            let fields = ArrayHeader {
348                base_offset: field_base_offset
349                    + (row.first_field_index * size_of::<FDBFieldData>()) as u32,
350                count: row.count,
351            };
352            FDBRowHeader { fields }.write_le(out)?;
353        }
354
355        let i64s_base_offset =
356            field_base_offset + (self.fields.len() * size_of::<FDBFieldData>()) as u32;
357        let strings_base_offset = i64s_base_offset + (self.i64s.len() * size_of::<u64>()) as u32;
358
359        let mut string_len_base = strings_base_offset;
360        let mut string_len_offsets = BTreeMap::new();
361        for (&key, value) in &self.strings {
362            let string_len = key * 4;
363            string_len_offsets.insert(key, string_len_base);
364            string_len_base += (string_len * value.len()) as u32;
365        }
366
367        const TRUE_LE32: [u8; 4] = [1, 0, 0, 0];
368        const FALSE_LE32: [u8; 4] = [0, 0, 0, 0];
369
370        for field in &self.fields {
371            let (data_type, value) = match field {
372                Field::Nothing => (0, [0; 4]),
373                Field::Integer(i) => (1, i.to_le_bytes()),
374                Field::Float(f) => (3, f.to_le_bytes()),
375                Field::Text(TextRef { outer, inner }) => (4, {
376                    let v = string_len_offsets.get(outer).unwrap() + (inner * outer * 4) as u32;
377                    v.to_le_bytes()
378                }),
379                Field::Boolean(b) => (5, if *b { TRUE_LE32 } else { FALSE_LE32 }),
380                Field::BigInt(i64_ref) => (6, {
381                    let v = i64s_base_offset + (i64_ref.index * size_of::<u64>()) as u32;
382                    v.to_le_bytes()
383                }),
384                Field::VarChar(text_ref) => (8, {
385                    let v = string_len_offsets.get(&text_ref.outer).unwrap()
386                        + (text_ref.inner * text_ref.outer * 4) as u32;
387                    v.to_le_bytes()
388                }),
389            };
390            FDBFieldData { data_type, value }.write_le(out)?;
391        }
392
393        // Write out all i64s
394        for &num in &self.i64s {
395            out.write_all(&num.to_le_bytes())?;
396        }
397
398        // Write out all strings
399        for value in self.strings.values() {
400            for string in value {
401                string.write_le(out)?;
402            }
403        }
404
405        // Increment final offset
406        Ok(string_len_base)
407    }
408
409    fn compute_def_size(&self, name: &Latin1Str) -> usize {
410        size_of::<FDBTableDefHeader>()
411            + req_buf_len(name) * 4
412            + size_of::<FDBColumnHeader>() * self.columns.len()
413            + self
414                .columns
415                .iter()
416                .map(|c| req_buf_len(&c.name))
417                .sum::<usize>()
418                * 4
419    }
420
421    fn compute_data_size(&self) -> usize {
422        let string_size: usize = self.strings.iter().map(|(k, v)| k * v.len()).sum(); // Strings
423        size_of::<FDBTableDataHeader>()
424            + size_of::<FDBBucketHeader>() * self.buckets.len()
425            + size_of::<FDBRowHeaderListEntry>() * self.rows.len()
426            + size_of::<FDBRowHeader>() * self.rows.len()
427            + size_of::<FDBFieldData>() * self.fields.len()
428            + 4 * string_size
429            + size_of::<u64>() * self.i64s.len()
430    }
431
432    fn compute_size(&self, name: &Latin1Str) -> TableSize {
433        TableSize {
434            def: self.compute_def_size(name),
435            data: self.compute_data_size(),
436        }
437    }
438}
439
440/// A single column
441pub struct Column {
442    name: Latin1String,
443    data_type: ValueType,
444}
445
446impl Column {
447    /// Get the data type of this column
448    pub fn value_type(&self) -> ValueType {
449        self.data_type
450    }
451}
452
453/// A single bucket
454#[derive(Debug, Copy, Clone)]
455struct Bucket {
456    first_row_last: Option<(usize, usize)>,
457}
458
459/// A single row
460struct Row {
461    first_field_index: usize,
462    count: u32,
463    next_row: Option<usize>,
464}
465
466/// The [`Context`] for this modules [`Field`]
467struct StoreContext;
468
469/// Reference to an arena allocated string
470struct TextRef {
471    /// The length-key of the string
472    outer: usize,
473    /// The index in the strings array
474    inner: usize,
475}
476
477/// Reference to an arena allocated i64
478struct I64Ref {
479    /// The offset of the value
480    index: usize,
481}
482
483impl Context for StoreContext {
484    type String = TextRef;
485    type I64 = I64Ref;
486    type XML = TextRef;
487}
488
489type Field = Value<StoreContext>;