assembly_fdb/mem/
mod.rs

1//! Low-Level API that is suitable for non-little-endian machines
2//!
3//! This is the default in-memory API the the FDB file format. It is useful
4//! for batch processing because it is fast and only loads the values that
5//! are accessed.
6//!
7//! The reference structures in this module all implement [`Copy`].
8//!
9//! The only limitation is, that all references are bounded by the lifetime
10//! of the original database buffer.
11use assembly_core::buffer::{self, Buffer, Repr, LEI64};
12pub use assembly_fdb_core::value::mem::{Field, MemContext};
13use assembly_fdb_core::value::{
14    file::{FDBFieldValue, FileContext, IndirectValue},
15    owned::OwnedContext,
16    ValueMapperMut, ValueType,
17};
18use buffer::CastError;
19use latin1str::Latin1Str;
20
21mod c;
22
23//use super::ro::{Handle, RefHandle, TryFromHandle};
24use crate::{
25    handle::{self, Handle, RefHandle, TryFromHandle},
26    util::compare_bytes,
27};
28use c::{
29    FDBBucketHeaderC, FDBColumnHeaderC, FDBFieldDataC, FDBHeaderC, FDBRowHeaderListEntryC,
30    FDBTableDataHeaderC, FDBTableDefHeaderC, FDBTableHeaderC,
31};
32use std::{
33    borrow::Cow,
34    convert::{Infallible, TryFrom},
35};
36
37pub mod iter;
38
39use iter::{BucketIter, TableRowIter};
40pub use iter::{FieldIter, RowHeaderIter, TableIter}; // < FIXME> remove with next major update
41
42fn get_latin1_str(buf: &[u8], offset: u32) -> &Latin1Str {
43    let (_, haystack) = buf.split_at(offset as usize);
44    // FIXME: this silently ignores end of file problems
45    Latin1Str::from_bytes_until_nul(haystack)
46}
47
48/// A complete in-memory read-only database
49///
50/// This struct contains a reference to the complete byte buffer of an FDB file.
51#[derive(Copy, Clone)]
52pub struct Database<'a> {
53    inner: Handle<'a, ()>,
54}
55
56impl<'a> Database<'a> {
57    /// Create a new database reference
58    pub fn new(buf: &'a [u8]) -> Self {
59        let inner = Handle::new(buf);
60        Self { inner }
61    }
62
63    /// Get a reference to the header
64    pub fn header(self) -> Result<Header<'a>, CastError> {
65        let inner = self.inner.try_map_cast(0)?;
66        Ok(Header { inner })
67    }
68
69    /// Returns a reference to the tables array
70    pub fn tables(self) -> Result<Tables<'a>, CastError> {
71        let header = self.header()?;
72        let tables = header.tables()?;
73        Ok(tables)
74    }
75}
76
77#[derive(Copy, Clone)]
78/// Reference to the tables array
79pub struct Header<'a> {
80    inner: RefHandle<'a, FDBHeaderC>,
81}
82
83impl<'a> Header<'a> {
84    fn tables(self) -> Result<Tables<'a>, CastError> {
85        let header = self.inner.map_extract();
86        let inner = self.inner.try_map_cast_array(header.into_raw().tables)?;
87        Ok(Tables { inner })
88    }
89}
90
91fn map_table_header<'a>(handle: RefHandle<'a, FDBTableHeaderC>) -> Result<Table<'a>, CastError> {
92    let table_header = handle.into_raw().extract();
93
94    let def_header: &'a FDBTableDefHeaderC =
95        handle.buf().try_cast(table_header.table_def_header_addr)?;
96    let def_header = def_header.extract();
97
98    let data_header: &'a FDBTableDataHeaderC =
99        handle.buf().try_cast(table_header.table_data_header_addr)?;
100    let data_header = data_header.extract();
101
102    let name = get_latin1_str(handle.buf(), def_header.table_name_addr);
103
104    let columns: RefHandle<'a, [FDBColumnHeaderC]> =
105        handle.try_map_cast_slice(def_header.column_header_list_addr, def_header.column_count)?;
106
107    let buckets: RefHandle<'a, [FDBBucketHeaderC]> =
108        handle.try_map_cast_array(data_header.buckets)?;
109
110    Ok(Table::new(handle.wrap(InnerTable {
111        name,
112        columns: columns.raw(),
113        buckets: buckets.raw(),
114    })))
115}
116
117#[derive(Copy, Clone)]
118/// Reference to the tables array
119pub struct Tables<'a> {
120    inner: RefHandle<'a, [FDBTableHeaderC]>,
121}
122
123impl<'a> Tables<'a> {
124    /// Returns the length of the tables array
125    pub fn len(self) -> usize {
126        self.inner.into_raw().len()
127    }
128
129    /// Checks whether the tables array is empty
130    pub fn is_empty(self) -> bool {
131        self.inner.into_raw().len() == 0
132    }
133
134    /// Get the table reference at the specified index
135    pub fn get(self, index: usize) -> Option<Result<Table<'a>, CastError>> {
136        self.inner.get(index).map(map_table_header)
137    }
138
139    /// Get an interator over all tables
140    pub fn iter(&self) -> TableIter<'a> {
141        TableIter::new(&self.inner)
142    }
143
144    /// Get a table by its name
145    pub fn by_name(&self, name: &str) -> Option<Result<Table<'a>, CastError>> {
146        let bytes = name.as_bytes();
147        self.inner
148            .into_raw()
149            .binary_search_by(|table_header| {
150                let def_header_addr = table_header.table_def_header_addr.extract();
151                let def_header =
152                    buffer::cast::<FDBTableDefHeaderC>(self.inner.buf(), def_header_addr);
153
154                let name_addr = def_header.table_name_addr.extract() as usize;
155                let name_bytes = &self.inner.buf()[name_addr..];
156
157                compare_bytes(bytes, name_bytes)
158            })
159            .ok()
160            .and_then(|index| self.get(index))
161    }
162}
163
164#[allow(clippy::needless_lifetimes)] // <- clippy gets this wrong, presumably because of impl trait?
165fn map_column_header<'a>(
166    buf: &'a [u8],
167) -> impl Fn(&'a FDBColumnHeaderC) -> Column<'a> + Copy + Clone {
168    move |header: &FDBColumnHeaderC| {
169        let column_header = header.extract();
170        let name = get_latin1_str(buf, column_header.column_name_addr);
171        // FIXME: remove unwrap
172        let domain = ValueType::try_from(column_header.column_data_type).unwrap();
173
174        Column { name, domain }
175    }
176}
177
178fn get_row_header_list_entry(buf: &[u8], addr: u32) -> Option<&FDBRowHeaderListEntryC> {
179    if addr == u32::MAX {
180        None
181    } else {
182        Some(buf.cast::<FDBRowHeaderListEntryC>(addr))
183    }
184}
185
186/*#[allow(clippy::needless_lifetimes)] // <- clippy gets this wrong
187fn map_bucket_header<'a>(buf: &'a [u8]) -> impl Fn(&'a FDBBucketHeaderC) -> Bucket<'a> {
188    move |header: &FDBBucketHeaderC| {
189        let bucket_header = header.extract();
190        let addr = bucket_header.row_header_list_head_addr;
191        let first = get_row_header_list_entry(buf, addr);
192        Bucket { buf, first }
193    }
194}*/
195
196#[derive(Copy, Clone)]
197struct InnerTable<'a> {
198    name: &'a Latin1Str,
199    columns: &'a [FDBColumnHeaderC],
200    buckets: &'a [FDBBucketHeaderC],
201}
202
203#[derive(Copy, Clone)]
204/// Reference to a single table
205pub struct Table<'a> {
206    inner: Handle<'a, InnerTable<'a>>,
207}
208
209impl<'a> Table<'a> {
210    fn new(inner: Handle<'a, InnerTable<'a>>) -> Self {
211        Self { inner }
212    }
213
214    /// Get the undecoded name of the table
215    pub fn name_raw(&self) -> &'a Latin1Str {
216        self.inner.raw.name
217    }
218
219    /// Get the name of the table
220    pub fn name(&self) -> Cow<'a, str> {
221        self.inner.raw.name.decode()
222    }
223
224    /// Get a list of rows by index
225    pub fn index_iter(&self, id: u32) -> impl Iterator<Item = Row<'a>> {
226        let bucket: usize = id as usize % self.bucket_count();
227        self.bucket_at(bucket).into_iter().flat_map(move |b| {
228            b.row_iter()
229                .filter(move |r| r.field_at(0) == Some(Field::Integer(id as i32)))
230        })
231    }
232
233    /// Get the column at the index
234    ///
235    /// **Note**: This does some computation, call only once per colum if possible
236    pub fn column_at(&self, index: usize) -> Option<Column<'a>> {
237        self.inner
238            .raw
239            .columns
240            .get(index)
241            .map(map_column_header(self.inner.mem))
242    }
243
244    /// Get the column iterator
245    ///
246    /// **Note**: This does some computation, call only once if possible
247    pub fn column_iter(&self) -> impl Iterator<Item = Column<'a>> + Clone {
248        self.inner
249            .raw
250            .columns
251            .iter()
252            .map(map_column_header(self.inner.mem))
253    }
254
255    /// The amount of columns in this table
256    pub fn column_count(&self) -> usize {
257        self.inner.raw.columns.len()
258    }
259
260    /// Get the bucket at the index
261    ///
262    /// **Note**: This does some computation, call only once per bucket if possible
263    pub fn bucket_at(&self, index: usize) -> Option<Bucket<'a>> {
264        self.inner
265            .map_val(|raw| raw.buckets)
266            .get(index)
267            .map(|e| {
268                e.map_extract()
269                    .map_val(|r| r.row_header_list_head_addr)
270                    .map(get_row_header_list_entry)
271                    .transpose()
272            })
273            .map(Bucket::new)
274    }
275
276    /// Get the bucket for the given hash
277    ///
278    /// **Note**: This always calls [Table::bucket_at] exactly once
279    pub fn bucket_for_hash(&self, hash: u32) -> Bucket<'a> {
280        let index = hash as usize % self.inner.raw.buckets.len();
281        self.bucket_at(index).unwrap()
282    }
283
284    /// Get the bucket iterator
285    ///
286    /// **Note**: This does some computation, call only once if possible
287    pub fn bucket_iter(&self) -> BucketIter<'a> {
288        BucketIter::new(&self.inner.map_val(|r| r.buckets))
289    }
290
291    /// Get the amount of buckets
292    pub fn bucket_count(&self) -> usize {
293        self.inner.raw.buckets.len()
294    }
295
296    /// Get an iterator over all rows
297    pub fn row_iter(&self) -> TableRowIter<'a> {
298        TableRowIter::new(self.bucket_iter())
299    }
300}
301
302/// Reference to a column definition
303pub struct Column<'a> {
304    name: &'a Latin1Str,
305    domain: ValueType,
306}
307
308impl<'a> Column<'a> {
309    /// Returns the name of a column
310    pub fn name(&self) -> Cow<'a, str> {
311        self.name.decode()
312    }
313
314    /// Returns the name of a column
315    pub fn name_raw(&self) -> &'a Latin1Str {
316        self.name
317    }
318
319    /// Returns the default value type of the column
320    pub fn value_type(&self) -> ValueType {
321        self.domain
322    }
323}
324
325/// Reference to a single bucket
326#[derive(Debug, Copy, Clone)]
327pub struct Bucket<'a> {
328    inner: Option<RefHandle<'a, FDBRowHeaderListEntryC>>,
329}
330
331impl<'a> Bucket<'a> {
332    /// Returns an iterator over all rows in this bucket
333    pub fn row_iter(&self) -> RowHeaderIter<'a> {
334        RowHeaderIter::new(self.inner)
335    }
336
337    /// Check whether the bucket is empty
338    pub fn is_empty(&self) -> bool {
339        self.inner.is_none()
340    }
341
342    fn new(inner: Option<RefHandle<'a, FDBRowHeaderListEntryC>>) -> Self {
343        Self { inner }
344    }
345}
346
347#[derive(Copy, Clone)]
348/// Reference to a single row
349pub struct Row<'a> {
350    inner: RefHandle<'a, [FDBFieldDataC]>,
351}
352
353fn get_field<'a>(buf: &'a [u8], data: &'a FDBFieldDataC) -> Field<'a> {
354    let data_type = ValueType::try_from(data.data_type.extract()).unwrap();
355    let bytes = data.value.0;
356    get_field_raw(buf, data_type, bytes)
357}
358
359fn get_field_raw(buf: &[u8], data_type: ValueType, bytes: [u8; 4]) -> Field {
360    match data_type {
361        ValueType::Nothing => Field::Nothing,
362        ValueType::Integer => Field::Integer(i32::from_le_bytes(bytes)),
363        ValueType::Float => Field::Float(f32::from_le_bytes(bytes)),
364        ValueType::Text => {
365            let addr = u32::from_le_bytes(bytes);
366            let text = get_latin1_str(buf, addr);
367            Field::Text(text)
368        }
369        ValueType::Boolean => Field::Boolean(bytes != [0, 0, 0, 0]),
370        ValueType::BigInt => {
371            let addr = u32::from_le_bytes(bytes);
372            let val = buf.cast::<LEI64>(addr).extract();
373            Field::BigInt(val)
374        }
375        ValueType::VarChar => {
376            let addr = u32::from_le_bytes(bytes);
377            let text = get_latin1_str(buf, addr);
378            Field::VarChar(text)
379        }
380    }
381}
382
383impl<'a> Row<'a> {
384    fn new(inner: RefHandle<'a, [FDBFieldDataC]>) -> Self {
385        Self { inner }
386    }
387
388    /// Get the field at the index
389    pub fn field_at(&self, index: usize) -> Option<Field<'a>> {
390        self.inner.get(index).map(|f| f.map(get_field).into_raw())
391    }
392
393    /// Get the iterator over all fields
394    pub fn field_iter(&self) -> FieldIter<'a> {
395        FieldIter::new(self.inner)
396    }
397
398    /// Get the count of fields
399    pub fn field_count(&self) -> usize {
400        self.inner.raw().len()
401    }
402}
403
404impl<'a> IntoIterator for Row<'a> {
405    type Item = Field<'a>;
406    type IntoIter = FieldIter<'a>;
407
408    fn into_iter(self) -> Self::IntoIter {
409        self.field_iter()
410    }
411}
412
413/// Map [MemContext] values to [OwnedContext] values
414pub struct MemToOwned;
415
416impl<'a> ValueMapperMut<MemContext<'a>, OwnedContext> for MemToOwned {
417    fn map_string(&mut self, from: &&'a Latin1Str) -> String {
418        from.decode().into_owned()
419    }
420
421    fn map_i64(&mut self, from: &i64) -> i64 {
422        *from
423    }
424
425    fn map_xml(&mut self, from: &&'a Latin1Str) -> String {
426        from.decode().into_owned()
427    }
428}
429
430struct MemFromFile<'a>(&'a [u8]);
431
432impl<'a> ValueMapperMut<FileContext, MemContext<'a>> for MemFromFile<'a> {
433    fn map_string(&mut self, from: &IndirectValue) -> &'a Latin1Str {
434        handle::get_string(self.0, from.addr).unwrap()
435    }
436
437    fn map_i64(&mut self, from: &IndirectValue) -> i64 {
438        handle::get_i64(self.0, from.addr).unwrap()
439    }
440
441    fn map_xml(&mut self, from: &IndirectValue) -> &'a Latin1Str {
442        handle::get_string(self.0, from.addr).unwrap()
443    }
444}
445
446impl<'a> TryFromHandle<'a, FDBFieldValue> for Field<'a> {
447    type Error = Infallible;
448
449    fn try_from(value: Handle<'a, FDBFieldValue>) -> Result<Self, Self::Error> {
450        let mut mem = MemFromFile(value.buf());
451        Ok(value.raw().map(&mut mem))
452    }
453}