assembly_data/fdb/mem/
mod.rs

1//! Low-Level API that is suitable for non-little-endian machines
2//!
3//! This is the default in-memory API the the FDB file format. It is useful
4//! for batch processing because it is fast and only loads the values that
5//! are accessed.
6//!
7//! The reference structures in this module all implement [`Copy`].
8//!
9//! The only limitation is, that all references are bounded by the lifetime
10//! of the original database buffer.
11use assembly_core::buffer::{self, Repr, LEI64};
12use buffer::CastError;
13use memchr::memchr;
14
15mod c;
16use super::{
17    common::{Context, Latin1Str, Value, ValueMapperMut, ValueType},
18    file::{FDBFieldValue, FileContext, IndirectValue},
19    ro::{
20        buffer::{compare_bytes, Buffer},
21        Handle, RefHandle,
22    },
23};
24use c::{
25    FDBBucketHeaderC, FDBColumnHeaderC, FDBFieldDataC, FDBHeaderC, FDBRowHeaderC,
26    FDBRowHeaderListEntryC, FDBTableDataHeaderC, FDBTableDefHeaderC, FDBTableHeaderC,
27};
28use std::{
29    borrow::Cow,
30    convert::{Infallible, TryFrom},
31};
32
33fn get_latin1_str(buf: &[u8], offset: u32) -> &Latin1Str {
34    let (_, haystack) = buf.split_at(offset as usize);
35    if let Some(end) = memchr(0, haystack) {
36        let (content, _) = haystack.split_at(end);
37        unsafe { Latin1Str::from_bytes_unchecked(content) }
38    } else {
39        panic!(
40            "Offset {} is supposed to be a string but does not have a null-terminator",
41            offset
42        );
43    }
44}
45
46/// A complete in-memory read-only database
47///
48/// This struct contains a reference to the complete byte buffer of an FDB file.
49#[derive(Copy, Clone)]
50pub struct Database<'a> {
51    inner: Handle<'a, ()>,
52}
53
54impl<'a> Database<'a> {
55    /// Create a new database reference
56    pub fn new(buf: &'a [u8]) -> Self {
57        let inner = Handle::new_ref(buf);
58        Self { inner }
59    }
60
61    /// Get a reference to the header
62    pub fn header(self) -> Result<Header<'a>, CastError> {
63        let inner = self.inner.try_map_cast(0)?;
64        Ok(Header { inner })
65    }
66
67    /// Returns a reference to the tables array
68    pub fn tables(self) -> Result<Tables<'a>, CastError> {
69        let header = self.header()?;
70        let tables = header.tables()?;
71        Ok(tables)
72    }
73}
74
75#[derive(Copy, Clone)]
76/// Reference to the tables array
77pub struct Header<'a> {
78    inner: RefHandle<'a, FDBHeaderC>,
79}
80
81impl<'a> Header<'a> {
82    fn tables(self) -> Result<Tables<'a>, CastError> {
83        let header = self.inner.map_extract();
84        let inner = self.inner.try_map_cast_array(header.into_raw().tables)?;
85        Ok(Tables { inner })
86    }
87}
88
89fn map_table_header<'a>(handle: RefHandle<'a, FDBTableHeaderC>) -> Result<Table<'a>, CastError> {
90    let table_header = handle.into_raw().extract();
91
92    let def_header: &'a FDBTableDefHeaderC =
93        handle.buf().try_cast(table_header.table_def_header_addr)?;
94    let def_header = def_header.extract();
95
96    let data_header: &'a FDBTableDataHeaderC =
97        handle.buf().try_cast(table_header.table_data_header_addr)?;
98    let data_header = data_header.extract();
99
100    let name = get_latin1_str(handle.buf().as_bytes(), def_header.table_name_addr);
101
102    let columns: RefHandle<'a, [FDBColumnHeaderC]> =
103        handle.try_map_cast_slice(def_header.column_header_list_addr, def_header.column_count)?;
104
105    let buckets: RefHandle<'a, [FDBBucketHeaderC]> =
106        handle.try_map_cast_array(data_header.buckets)?;
107
108    Ok(Table::new(handle.wrap(InnerTable {
109        name,
110        columns: columns.raw(),
111        buckets: buckets.raw(),
112    })))
113}
114
115#[derive(Copy, Clone)]
116/// Reference to the tables array
117pub struct Tables<'a> {
118    inner: RefHandle<'a, [FDBTableHeaderC]>,
119}
120
121impl<'a> Tables<'a> {
122    /// Returns the length of the tables array
123    pub fn len(self) -> usize {
124        self.inner.into_raw().len()
125    }
126
127    /// Checks whether the tables array is empty
128    pub fn is_empty(self) -> bool {
129        self.inner.into_raw().len() == 0
130    }
131
132    /// Get the table reference at the specified index
133    pub fn get(self, index: usize) -> Option<Result<Table<'a>, CastError>> {
134        self.inner.get(index).map(map_table_header)
135    }
136
137    /// Get an interator over all tables
138    pub fn iter(&self) -> impl Iterator<Item = Result<Table<'a>, CastError>> {
139        TableIter {
140            inner: self.inner.map_val(<[FDBTableHeaderC]>::iter),
141        }
142    }
143
144    /// Get a table by its name
145    pub fn by_name(&self, name: &str) -> Option<Result<Table<'a>, CastError>> {
146        let bytes = name.as_bytes();
147        self.inner
148            .into_raw()
149            .binary_search_by(|table_header| {
150                let def_header_addr = table_header.table_def_header_addr.extract();
151                let def_header = buffer::cast::<FDBTableDefHeaderC>(
152                    self.inner.buf().as_bytes(),
153                    def_header_addr,
154                );
155
156                let name_addr = def_header.table_name_addr.extract() as usize;
157                let name_bytes = &self.inner.buf().as_bytes()[name_addr..];
158
159                compare_bytes(bytes, name_bytes)
160            })
161            .ok()
162            .and_then(|index| self.get(index))
163    }
164}
165
166#[allow(clippy::needless_lifetimes)] // <- clippy gets this wrong, presumably because of impl trait?
167fn map_column_header<'a>(buf: &'a [u8]) -> impl Fn(&'a FDBColumnHeaderC) -> Column<'a> + Copy + Clone {
168    move |header: &FDBColumnHeaderC| {
169        let column_header = header.extract();
170        let name = get_latin1_str(buf, column_header.column_name_addr);
171        // FIXME: remove unwrap
172        let domain = ValueType::try_from(column_header.column_data_type).unwrap();
173
174        Column { name, domain }
175    }
176}
177
178fn get_row_header_list_entry(buf: &[u8], addr: u32) -> Option<&FDBRowHeaderListEntryC> {
179    if addr == u32::MAX {
180        None
181    } else {
182        Some(buffer::cast::<FDBRowHeaderListEntryC>(buf, addr))
183    }
184}
185
186#[allow(clippy::needless_lifetimes)] // <- clippy gets this wrong
187fn map_bucket_header<'a>(buf: &'a [u8]) -> impl Fn(&'a FDBBucketHeaderC) -> Bucket<'a> {
188    move |header: &FDBBucketHeaderC| {
189        let bucket_header = header.extract();
190        let addr = bucket_header.row_header_list_head_addr;
191        let first = get_row_header_list_entry(buf, addr);
192        Bucket { buf, first }
193    }
194}
195
196#[derive(Clone)]
197/// An iterator over tables
198pub struct TableIter<'a> {
199    inner: Handle<'a, std::slice::Iter<'a, FDBTableHeaderC>>,
200}
201
202impl<'a> Iterator for TableIter<'a> {
203    type Item = Result<Table<'a>, CastError>;
204
205    fn next(&mut self) -> Option<Self::Item> {
206        self.inner
207            .raw_mut()
208            .next()
209            .map(|raw| self.inner.wrap(raw))
210            .map(map_table_header)
211    }
212}
213
214#[derive(Copy, Clone)]
215struct InnerTable<'a> {
216    name: &'a Latin1Str,
217    columns: &'a [FDBColumnHeaderC],
218    buckets: &'a [FDBBucketHeaderC],
219}
220
221#[derive(Copy, Clone)]
222/// Reference to a single table
223pub struct Table<'a> {
224    inner: Handle<'a, InnerTable<'a>>,
225}
226
227impl<'a> Table<'a> {
228    fn new(inner: Handle<'a, InnerTable<'a>>) -> Self {
229        Self { inner }
230    }
231
232    /// Get the undecoded name of the table
233    pub fn name_raw(&self) -> &'a Latin1Str {
234        self.inner.raw.name
235    }
236
237    /// Get the name of the table
238    pub fn name(&self) -> Cow<'a, str> {
239        self.inner.raw.name.decode()
240    }
241
242    /// Get a list of rows by index
243    pub fn index_iter(&self, id: u32) -> impl Iterator<Item = Row<'a>> {
244        let bucket: usize = id as usize % self.bucket_count();
245        self.bucket_at(bucket).into_iter().flat_map(move |b| {
246            b.row_iter()
247                .filter(move |r| r.field_at(0) == Some(Field::Integer(id as i32)))
248        })
249    }
250
251    /// Get the column at the index
252    ///
253    /// **Note**: This does some computation, call only once per colum if possible
254    pub fn column_at(&self, index: usize) -> Option<Column<'a>> {
255        self.inner
256            .raw
257            .columns
258            .get(index)
259            .map(map_column_header(self.inner.mem.as_bytes()))
260    }
261
262    /// Get the column iterator
263    ///
264    /// **Note**: This does some computation, call only once if possible
265    pub fn column_iter(&self) -> impl Iterator<Item = Column<'a>> + Clone {
266        self.inner
267            .raw
268            .columns
269            .iter()
270            .map(map_column_header(self.inner.mem.as_bytes()))
271    }
272
273    /// The amount of columns in this table
274    pub fn column_count(&self) -> usize {
275        self.inner.raw.columns.len()
276    }
277
278    /// Get the bucket at the index
279    ///
280    /// **Note**: This does some computation, call only once per bucket if possible
281    pub fn bucket_at(&self, index: usize) -> Option<Bucket<'a>> {
282        self.inner
283            .raw
284            .buckets
285            .get(index)
286            .map(map_bucket_header(self.inner.mem.as_bytes()))
287    }
288
289    /// Get the bucket for the given hash
290    ///
291    /// **Note**: This always calls [Table::bucket_at] exactly once
292    pub fn bucket_for_hash(&self, hash: u32) -> Bucket<'a> {
293        let index = hash as usize % self.inner.raw.buckets.len();
294        self.bucket_at(index).unwrap()
295    }
296
297    /// Get the bucket iterator
298    ///
299    /// **Note**: This does some computation, call only once if possible
300    pub fn bucket_iter(&self) -> impl Iterator<Item = Bucket<'a>> {
301        self.inner
302            .raw
303            .buckets
304            .iter()
305            .map(map_bucket_header(self.inner.mem.as_bytes()))
306    }
307
308    /// Get the amount of buckets
309    pub fn bucket_count(&self) -> usize {
310        self.inner.raw.buckets.len()
311    }
312
313    /// Get an iterator over all rows
314    pub fn row_iter(&self) -> impl Iterator<Item = Row<'a>> {
315        self.bucket_iter().map(|b| b.row_iter()).flatten()
316    }
317}
318
319/// Reference to a column definition
320pub struct Column<'a> {
321    name: &'a Latin1Str,
322    domain: ValueType,
323}
324
325impl<'a> Column<'a> {
326    /// Returns the name of a column
327    pub fn name(&self) -> Cow<'a, str> {
328        self.name.decode()
329    }
330
331    /// Returns the name of a column
332    pub fn name_raw(&self) -> &'a Latin1Str {
333        self.name
334    }
335
336    /// Returns the default value type of the column
337    pub fn value_type(&self) -> ValueType {
338        self.domain
339    }
340}
341
342/// Reference to a single bucket
343#[derive(Debug)]
344pub struct Bucket<'a> {
345    buf: &'a [u8],
346    first: Option<&'a FDBRowHeaderListEntryC>,
347}
348
349impl<'a> Bucket<'a> {
350    /// Returns an iterator over all rows in this bucket
351    pub fn row_iter(&self) -> RowHeaderIter<'a> {
352        RowHeaderIter {
353            buf: self.buf,
354            next: self.first,
355        }
356    }
357
358    /// Check whether the bucket is empty
359    pub fn is_empty(&self) -> bool {
360        self.first.is_none()
361    }
362}
363
364/// Struct that implements [`Bucket::row_iter`].
365pub struct RowHeaderIter<'a> {
366    buf: &'a [u8],
367    next: Option<&'a FDBRowHeaderListEntryC>,
368}
369
370impl<'a> Iterator for RowHeaderIter<'a> {
371    type Item = Row<'a>;
372
373    fn next(&mut self) -> Option<Self::Item> {
374        if let Some(next) = self.next {
375            let entry = next.extract();
376            self.next = get_row_header_list_entry(self.buf, entry.row_header_list_next_addr);
377            let row_header =
378                buffer::cast::<FDBRowHeaderC>(self.buf, entry.row_header_addr).extract();
379
380            let fields = buffer::cast_slice::<FDBFieldDataC>(
381                self.buf,
382                row_header.fields.base_offset,
383                row_header.fields.count,
384            );
385
386            Some(Row {
387                buf: self.buf,
388                fields,
389            })
390        } else {
391            None
392        }
393    }
394}
395
396#[derive(Copy, Clone)]
397/// Reference to a single row
398pub struct Row<'a> {
399    buf: &'a [u8],
400    fields: &'a [FDBFieldDataC],
401}
402
403fn get_field<'a>(data: &'a FDBFieldDataC, buf: &'a [u8]) -> Field<'a> {
404    let data_type = ValueType::try_from(data.data_type.extract()).unwrap();
405    let bytes = data.value.0;
406    get_field_raw(data_type, bytes, buf)
407}
408
409fn get_field_raw(data_type: ValueType, bytes: [u8; 4], buf: &[u8]) -> Field {
410    match data_type {
411        ValueType::Nothing => Field::Nothing,
412        ValueType::Integer => Field::Integer(i32::from_le_bytes(bytes)),
413        ValueType::Float => Field::Float(f32::from_le_bytes(bytes)),
414        ValueType::Text => {
415            let addr = u32::from_le_bytes(bytes);
416            let text = get_latin1_str(buf, addr);
417            Field::Text(text)
418        }
419        ValueType::Boolean => Field::Boolean(bytes != [0, 0, 0, 0]),
420        ValueType::BigInt => {
421            let addr = u32::from_le_bytes(bytes);
422            let val = buffer::cast::<LEI64>(buf, addr).extract();
423            Field::BigInt(val)
424        }
425        ValueType::VarChar => {
426            let addr = u32::from_le_bytes(bytes);
427            let text = get_latin1_str(buf, addr);
428            Field::VarChar(text)
429        }
430    }
431}
432
433/// An iterator over fields in a row
434pub struct FieldIter<'a> {
435    buf: &'a [u8],
436    iter: std::slice::Iter<'a, FDBFieldDataC>,
437}
438
439impl<'a> Iterator for FieldIter<'a> {
440    type Item = Field<'a>;
441
442    fn next(&mut self) -> Option<Self::Item> {
443        self.iter.next().map(|data| get_field(data, self.buf))
444    }
445}
446
447impl<'a> Row<'a> {
448    /// Get the field at the index
449    pub fn field_at(&self, index: usize) -> Option<Field<'a>> {
450        self.fields.get(index).map(|data| get_field(data, self.buf))
451    }
452
453    /// Get the iterator over all fields
454    pub fn field_iter(&self) -> FieldIter<'a> {
455        FieldIter {
456            iter: self.fields.iter(),
457            buf: self.buf,
458        }
459    }
460
461    /// Get the count of fields
462    pub fn field_count(&self) -> usize {
463        self.fields.len()
464    }
465}
466
467impl<'a> IntoIterator for Row<'a> {
468    type Item = Field<'a>;
469    type IntoIter = FieldIter<'a>;
470
471    fn into_iter(self) -> Self::IntoIter {
472        self.field_iter()
473    }
474}
475
476#[derive(Debug, PartialEq)]
477/// The context for `mem::Field`
478pub struct MemContext<'a> {
479    _m: std::marker::PhantomData<fn() -> &'a ()>,
480}
481
482impl<'a> Context for MemContext<'a> {
483    type String = &'a Latin1Str;
484    type I64 = i64;
485    type XML = &'a Latin1Str;
486}
487
488/// Value of or reference to a field value
489pub type Field<'a> = Value<MemContext<'a>>;
490
491struct MemFromFile<'a>(Buffer<'a>);
492
493impl<'a> ValueMapperMut<FileContext, MemContext<'a>> for MemFromFile<'a> {
494    fn map_string(&mut self, from: &IndirectValue) -> &'a Latin1Str {
495        self.0.string(from.addr).unwrap()
496    }
497
498    fn map_i64(&mut self, from: &IndirectValue) -> i64 {
499        self.0.i64(from.addr).unwrap()
500    }
501
502    fn map_xml(&mut self, from: &IndirectValue) -> &'a Latin1Str {
503        self.0.string(from.addr).unwrap()
504    }
505}
506
507impl<'a> TryFrom<Handle<'a, FDBFieldValue>> for Field<'a> {
508    type Error = Infallible;
509
510    fn try_from(value: Handle<'a, FDBFieldValue>) -> Result<Self, Self::Error> {
511        let mut mem = MemFromFile(value.buf());
512        Ok(value.raw().map(&mut mem))
513    }
514}