use std::path::Path;
use crate::common::error::{BioFormatsError, Result};
#[derive(Debug, Clone)]
pub struct MdbTable {
pub name: String,
pub columns: Vec<String>,
pub rows: Vec<Vec<String>>,
}
pub fn parse_database(path: &Path) -> Result<Vec<MdbTable>> {
let bytes = std::fs::read(path).map_err(BioFormatsError::Io)?;
let db = Mdb::open(bytes)?;
let mut out = Vec::new();
for entry in &db.catalog {
if entry.object_type == MDB_TABLE && !entry.name.starts_with("MSys") {
if let Ok(t) = db.read_table(entry) {
out.push(t);
}
}
}
Ok(out)
}
pub fn parse_table(path: &Path, name: &str) -> Result<Option<MdbTable>> {
let bytes = std::fs::read(path).map_err(BioFormatsError::Io)?;
let db = Mdb::open(bytes)?;
for entry in &db.catalog {
if entry.object_type == MDB_TABLE && entry.name == name {
return Ok(Some(db.read_table(entry)?));
}
}
Ok(None)
}
const MDB_TABLE: i32 = 1;
mod coltype {
pub const BOOL: u8 = 0x01;
pub const BYTE: u8 = 0x02;
pub const INT: u8 = 0x03; pub const LONGINT: u8 = 0x04; pub const MONEY: u8 = 0x05; pub const FLOAT: u8 = 0x06; pub const DOUBLE: u8 = 0x07; pub const DATETIME: u8 = 0x08; pub const BINARY: u8 = 0x09;
pub const TEXT: u8 = 0x0a;
pub const OLE: u8 = 0x0b; pub const MEMO: u8 = 0x0c; pub const REPID: u8 = 0x0f; pub const NUMERIC: u8 = 0x10; }
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum JetVersion {
Jet3,
Jet4,
}
impl JetVersion {
fn page_size(self) -> usize {
match self {
JetVersion::Jet3 => 2048,
JetVersion::Jet4 => 4096,
}
}
}
fn detect_version(bytes: &[u8]) -> Result<JetVersion> {
if bytes.len() < 0x16 {
return Err(BioFormatsError::InvalidData(
"MDB file too short for header".into(),
));
}
let magic = &bytes[4..19.min(bytes.len())];
if !magic.starts_with(b"Standard Jet DB") && !magic.starts_with(b"Standard ACE DB") {
return Err(BioFormatsError::UnsupportedFormat(
"not a recognised Jet/ACE database (missing magic)".into(),
));
}
match bytes[0x14] {
0x00 => Ok(JetVersion::Jet3),
_ => Ok(JetVersion::Jet4),
}
}
#[derive(Debug, Clone)]
struct CatalogEntry {
object_type: i32,
name: String,
table_page: u32,
}
#[derive(Debug, Clone)]
struct Column {
name: String,
col_type: u8,
fixed_offset: usize,
var_index: usize,
is_fixed: bool,
size: usize,
scale: u8,
}
struct Mdb {
bytes: Vec<u8>,
version: JetVersion,
catalog: Vec<CatalogEntry>,
}
impl Mdb {
fn open(bytes: Vec<u8>) -> Result<Self> {
let version = detect_version(&bytes)?;
let page_size = version.page_size();
if bytes.len() < page_size {
return Err(BioFormatsError::InvalidData(
"MDB file shorter than one page".into(),
));
}
let mut db = Mdb {
bytes,
version,
catalog: Vec::new(),
};
db.read_catalog()?;
Ok(db)
}
fn page_size(&self) -> usize {
self.version.page_size()
}
fn page(&self, page_no: u32) -> Result<&[u8]> {
let ps = self.page_size();
let start = (page_no as usize)
.checked_mul(ps)
.ok_or_else(|| BioFormatsError::InvalidData("page offset overflow".into()))?;
self.bytes
.get(start..start + ps)
.ok_or_else(|| BioFormatsError::InvalidData(format!("page {page_no} out of range")))
}
fn read_catalog(&mut self) -> Result<()> {
let table = self.read_table_def(2)?;
let rows = self.read_all_rows(&table)?;
let name_idx = table.columns.iter().position(|c| c.name == "Name");
let type_idx = table.columns.iter().position(|c| c.name == "Type");
let id_idx = table.columns.iter().position(|c| c.name == "Id");
let (name_idx, type_idx, id_idx) = match (name_idx, type_idx, id_idx) {
(Some(n), Some(t), Some(i)) => (n, t, i),
_ => {
return Err(BioFormatsError::InvalidData(
"MSysObjects is missing expected columns (Name/Type/Id)".into(),
));
}
};
for row in &rows {
let name = row.get(name_idx).cloned().unwrap_or_default().value_string();
let object_type = row
.get(type_idx)
.and_then(|c| c.as_i64())
.unwrap_or(0) as i32;
let id = row.get(id_idx).and_then(|c| c.as_i64()).unwrap_or(0);
let table_page = (id as u32) & 0x00ff_ffff;
let object_type = object_type & 0x7fff_ffff;
self.catalog.push(CatalogEntry {
object_type,
name,
table_page,
});
}
Ok(())
}
fn read_table(&self, entry: &CatalogEntry) -> Result<MdbTable> {
let def = self.read_table_def(entry.table_page)?;
let raw_rows = self.read_all_rows(&def)?;
let columns: Vec<String> = def.columns.iter().map(|c| c.name.clone()).collect();
let rows: Vec<Vec<String>> = raw_rows
.iter()
.map(|cells| cells.iter().map(|c| c.value_string()).collect())
.collect();
Ok(MdbTable {
name: entry.name.clone(),
columns,
rows,
})
}
fn read_table_def(&self, first_page: u32) -> Result<TableDef> {
let mut buf: Vec<u8> = Vec::new();
let mut page_no = first_page;
let mut guard = 0;
loop {
let page = self.page(page_no)?;
if page.is_empty() || page[0] != 0x02 {
if buf.is_empty() {
return Err(BioFormatsError::InvalidData(format!(
"page {page_no} is not a table-definition page"
)));
}
break;
}
let next = match self.version {
JetVersion::Jet3 => u32_le(page, 4),
JetVersion::Jet4 => u32_le(page, 4),
};
buf.extend_from_slice(page);
guard += 1;
if next == 0 || guard > 64 {
break;
}
page_no = next;
}
parse_table_def(self.version, &buf)
}
fn read_all_rows(&self, def: &TableDef) -> Result<Vec<Vec<Cell>>> {
let mut rows = Vec::new();
for &page_no in &def.data_pages {
let page = match self.page(page_no) {
Ok(p) => p,
Err(_) => continue,
};
if page.is_empty() || page[0] != 0x01 {
continue;
}
self.read_data_page(def, page, &mut rows)?;
}
Ok(rows)
}
fn read_data_page(
&self,
def: &TableDef,
page: &[u8],
out: &mut Vec<Vec<Cell>>,
) -> Result<()> {
let (count_off, loc_off) = match self.version {
JetVersion::Jet3 => (8usize, 10usize),
JetVersion::Jet4 => (12usize, 14usize),
};
if page.len() < loc_off + 2 {
return Ok(());
}
let num_rows = u16_le(page, count_off) as usize;
let ps = self.page_size();
for i in 0..num_rows {
let entry_off = loc_off + i * 2;
if entry_off + 2 > page.len() {
break;
}
let raw = u16_le(page, entry_off);
if raw & 0x8000 != 0 {
continue; }
if raw & 0x4000 != 0 {
continue; }
let row_start = (raw & 0x1fff) as usize;
let row_end = if i == 0 {
ps
} else {
let prev = u16_le(page, loc_off + (i - 1) * 2) & 0x1fff;
prev as usize
};
if row_start >= row_end || row_end > ps {
continue;
}
if let Ok(cells) = self.parse_row(def, &page[row_start..row_end]) {
out.push(cells);
}
}
Ok(())
}
fn parse_row(&self, def: &TableDef, row: &[u8]) -> Result<Vec<Cell>> {
let mut cells: Vec<Cell> = Vec::with_capacity(def.columns.len());
let (col_count, mut p) = match self.version {
JetVersion::Jet4 => {
if row.len() < 2 {
return Err(BioFormatsError::InvalidData("row too short".into()));
}
(u16_le(row, 0) as usize, 2usize)
}
JetVersion::Jet3 => {
if row.is_empty() {
return Err(BioFormatsError::InvalidData("row too short".into()));
}
(row[0] as usize, 1usize)
}
};
let _ = &mut p;
let null_mask_len = (col_count + 7) / 8;
if row.len() < null_mask_len {
return Err(BioFormatsError::InvalidData("row missing null mask".into()));
}
let null_mask = &row[row.len() - null_mask_len..];
let is_null = |col: usize| -> bool {
let byte = col / 8;
let bit = col % 8;
if byte >= null_mask.len() {
return true;
}
(null_mask[byte] >> bit) & 1 == 0
};
let num_var = def.columns.iter().filter(|c| !c.is_fixed).count();
let var_offsets: Vec<usize> = if num_var > 0 {
self.read_var_offsets(row, num_var, null_mask_len)
} else {
Vec::new()
};
let fixed_base = match self.version {
JetVersion::Jet4 => 2usize,
JetVersion::Jet3 => 1usize,
};
for col in &def.columns {
let cidx = cells.len();
if is_null(cidx) {
cells.push(Cell::Null);
continue;
}
if col.is_fixed {
let start = fixed_base + col.fixed_offset;
let end = start + col.size;
if end <= row.len() {
cells.push(decode_value(col, &row[start..end]));
} else {
cells.push(Cell::Null);
}
} else {
let vi = col.var_index;
if vi + 1 < var_offsets.len() {
let start = var_offsets[vi];
let end = var_offsets[vi + 1];
if start <= end && end <= row.len() {
let data = self.resolve_lval(col, &row[start..end]);
cells.push(decode_var_value(col, &data));
} else {
cells.push(Cell::Null);
}
} else {
cells.push(Cell::Null);
}
}
}
Ok(cells)
}
fn read_var_offsets(&self, row: &[u8], num_var: usize, null_mask_len: usize) -> Vec<usize> {
match self.version {
JetVersion::Jet4 => {
let tail = row.len();
if tail < null_mask_len + 2 {
return Vec::new();
}
let vc_pos = tail - null_mask_len - 2;
let var_count = u16_le(row, vc_pos) as usize;
let n = var_count.max(num_var);
let table_bytes = (n + 1) * 2;
if vc_pos < table_bytes {
return Vec::new();
}
let base = vc_pos - table_bytes;
let mut offs = Vec::with_capacity(n + 1);
for i in 0..=n {
let o = u16_le(row, base + i * 2) as usize;
offs.push(o);
}
offs.reverse();
offs
}
JetVersion::Jet3 => {
let tail = row.len();
if tail < null_mask_len + 1 {
return Vec::new();
}
let vc_pos = tail - null_mask_len - 1;
let var_count = row[vc_pos] as usize;
let n = var_count.max(num_var);
let table_bytes = n + 1;
if vc_pos < table_bytes {
return Vec::new();
}
let base = vc_pos - table_bytes;
let mut offs = Vec::with_capacity(n + 1);
for i in 0..=n {
offs.push(row[base + i] as usize);
}
offs.reverse();
offs
}
}
}
fn resolve_lval<'a>(&self, col: &Column, data: &'a [u8]) -> std::borrow::Cow<'a, [u8]> {
use std::borrow::Cow;
if col.col_type != coltype::MEMO && col.col_type != coltype::OLE {
return Cow::Borrowed(data);
}
if data.len() < 12 {
return Cow::Borrowed(data);
}
let length = (data[0] as usize) | ((data[1] as usize) << 8) | ((data[2] as usize) << 16);
let lval_type = data[3];
match lval_type {
0x80 => {
let start = 12;
let end = (start + length).min(data.len());
Cow::Owned(data[start..end].to_vec())
}
0x40 => {
let row_page = (data[4] as u32)
| ((data[5] as u32) << 8)
| ((data[6] as u32) << 16)
| ((data[7] as u32) << 24);
let page_no = row_page >> 8;
let row_num = (row_page & 0xff) as usize;
if let Some(payload) = self.read_lval_row(page_no, row_num) {
Cow::Owned(payload)
} else {
Cow::Borrowed(data)
}
}
_ => Cow::Borrowed(data),
}
}
fn read_lval_row(&self, page_no: u32, row_num: usize) -> Option<Vec<u8>> {
let page = self.page(page_no).ok()?;
if page.is_empty() || page[0] != 0x01 {
}
let (count_off, loc_off) = match self.version {
JetVersion::Jet3 => (8usize, 10usize),
JetVersion::Jet4 => (12usize, 14usize),
};
let ps = self.page_size();
if page.len() < loc_off + 2 {
return None;
}
let num_rows = u16_le(page, count_off) as usize;
if row_num >= num_rows {
return None;
}
let entry_off = loc_off + row_num * 2;
let raw = u16_le(page, entry_off) & 0x1fff;
let start = raw as usize;
let end = if row_num == 0 {
ps
} else {
(u16_le(page, loc_off + (row_num - 1) * 2) & 0x1fff) as usize
};
if start >= end || end > page.len() {
return None;
}
Some(page[start..end].to_vec())
}
}
#[derive(Debug, Clone)]
struct TableDef {
columns: Vec<Column>,
data_pages: Vec<u32>,
}
fn parse_table_def(version: JetVersion, buf: &[u8]) -> Result<TableDef> {
let (num_cols, col_defs_start, col_def_size, used_pages, real_idx_off, idx_entry) =
match version {
JetVersion::Jet4 => {
let num_cols = u16_le(buf, 0x2D) as usize; let num_real_idx = u32_le(buf, 0x2F) as usize;
let used_pages = u32_le(buf, 0x37);
(num_cols, 0x3F_usize, 25usize, used_pages, num_real_idx, 12usize)
}
JetVersion::Jet3 => {
let num_cols = u16_le(buf, 0x19) as usize; let num_real_idx = u32_le(buf, 0x1F) as usize;
let used_pages = u32_le(buf, 0x23);
(num_cols, 0x2B_usize, 18usize, used_pages, num_real_idx, 8usize)
}
};
let col_defs_start = col_defs_start + real_idx_off * idx_entry;
let mut columns: Vec<Column> = Vec::with_capacity(num_cols);
let mut var_counter = 0usize;
for i in 0..num_cols {
let base = col_defs_start + i * col_def_size;
if base + col_def_size > buf.len() {
break;
}
let (col_type, flags, fixed_offset, var_off, size, scale) = match version {
JetVersion::Jet4 => {
let col_type = buf[base];
let var_off = u16_le(buf, base + 5) as usize; let scale = buf[base + 11];
let flags = buf[base + 15];
let fixed_offset = u16_le(buf, base + 21) as usize;
let size = u16_le(buf, base + 23) as usize;
(col_type, flags, fixed_offset, var_off, size, scale)
}
JetVersion::Jet3 => {
let col_type = buf[base];
let var_off = u16_le(buf, base + 5) as usize;
let scale = buf[base + 9];
let flags = buf[base + 13];
let fixed_offset = u16_le(buf, base + 14) as usize;
let size = u16_le(buf, base + 16) as usize;
(col_type, flags, fixed_offset, var_off, size, scale)
}
};
let is_fixed = flags & 0x01 != 0;
let var_index = if is_fixed {
0
} else {
let v = var_off;
var_counter += 1;
if v != 0 || var_counter == 1 {
v
} else {
var_counter - 1
}
};
columns.push(Column {
name: String::new(),
col_type,
fixed_offset,
var_index,
is_fixed,
size,
scale,
});
}
let mut p = col_defs_start + num_cols * col_def_size;
for col in columns.iter_mut() {
match version {
JetVersion::Jet4 => {
if p + 2 > buf.len() {
break;
}
let len = u16_le(buf, p) as usize;
p += 2;
if p + len > buf.len() {
break;
}
col.name = decode_utf16le(&buf[p..p + len]);
p += len;
}
JetVersion::Jet3 => {
if p + 1 > buf.len() {
break;
}
let len = buf[p] as usize;
p += 1;
if p + len > buf.len() {
break;
}
col.name = decode_latin1(&buf[p..p + len]);
p += len;
}
}
}
let _ = used_pages;
Ok(TableDef {
columns,
data_pages: Vec::new(),
})
}
#[derive(Debug, Clone)]
enum Cell {
Null,
Bool(bool),
Int(i64),
Float(f64),
Text(String),
Bytes(Vec<u8>),
}
impl Cell {
fn value_string(&self) -> String {
match self {
Cell::Null => String::new(),
Cell::Bool(b) => {
if *b { "1".into() } else { "0".into() }
}
Cell::Int(i) => i.to_string(),
Cell::Float(f) => format_float(*f),
Cell::Text(s) => s.clone(),
Cell::Bytes(b) => b.iter().map(|x| format!("{:02X}", x)).collect(),
}
}
fn as_i64(&self) -> Option<i64> {
match self {
Cell::Int(i) => Some(*i),
Cell::Bool(b) => Some(if *b { 1 } else { 0 }),
Cell::Float(f) => Some(*f as i64),
_ => None,
}
}
}
fn format_float(f: f64) -> String {
if f == f.trunc() && f.abs() < 1e15 {
return format!("{}", f as i64);
}
let mut s = format!("{}", f);
if s.contains('e') || s.contains('E') {
s = format!("{:.6}", f);
while s.ends_with('0') {
s.pop();
}
if s.ends_with('.') {
s.pop();
}
}
s
}
fn decode_value(col: &Column, data: &[u8]) -> Cell {
match col.col_type {
coltype::BOOL => Cell::Bool(data.first().map(|b| *b != 0).unwrap_or(false)),
coltype::BYTE => Cell::Int(data.first().copied().unwrap_or(0) as i64),
coltype::INT => {
if data.len() >= 2 {
Cell::Int(i16::from_le_bytes([data[0], data[1]]) as i64)
} else {
Cell::Null
}
}
coltype::LONGINT => {
if data.len() >= 4 {
Cell::Int(i32::from_le_bytes([data[0], data[1], data[2], data[3]]) as i64)
} else {
Cell::Null
}
}
coltype::MONEY => {
if data.len() >= 8 {
let raw = i64::from_le_bytes(data[0..8].try_into().unwrap());
Cell::Float(raw as f64 / 10000.0)
} else {
Cell::Null
}
}
coltype::FLOAT => {
if data.len() >= 4 {
Cell::Float(f32::from_le_bytes([data[0], data[1], data[2], data[3]]) as f64)
} else {
Cell::Null
}
}
coltype::DOUBLE => {
if data.len() >= 8 {
Cell::Float(f64::from_le_bytes(data[0..8].try_into().unwrap()))
} else {
Cell::Null
}
}
coltype::DATETIME => {
if data.len() >= 8 {
let serial = f64::from_le_bytes(data[0..8].try_into().unwrap());
Cell::Text(format_ole_date(serial))
} else {
Cell::Null
}
}
coltype::NUMERIC => Cell::Text(decode_numeric(data, col.scale)),
coltype::REPID => Cell::Bytes(data.to_vec()),
_ => Cell::Bytes(data.to_vec()),
}
}
fn decode_var_value(col: &Column, data: &[u8]) -> Cell {
match col.col_type {
coltype::TEXT | coltype::MEMO => {
Cell::Text(decode_jet_text(data))
}
coltype::BINARY | coltype::OLE => Cell::Bytes(data.to_vec()),
coltype::NUMERIC => Cell::Text(decode_numeric(data, col.scale)),
_ => Cell::Bytes(data.to_vec()),
}
}
fn decode_jet_text(data: &[u8]) -> String {
if data.len() >= 2 && data[0] == 0xFF && data[1] == 0xFE {
return decode_latin1(&data[2..]);
}
if data.len() >= 2 && data.len() % 2 == 0 {
let looks_utf16 = data.chunks(2).take(8).all(|c| c[1] == 0);
if looks_utf16 {
return decode_utf16le(data);
}
}
decode_latin1(data)
}
fn decode_numeric(data: &[u8], scale: u8) -> String {
if data.len() < 17 {
return String::new();
}
let negative = data[0] & 0x80 != 0;
let mut value: i128 = 0;
for i in 0..16 {
value |= (data[1 + i] as i128) << (8 * i);
}
let sign = if negative { "-" } else { "" };
if scale == 0 {
return format!("{}{}", sign, value);
}
let divisor = 10i128.pow(scale as u32);
let int_part = value / divisor;
let frac_part = value % divisor;
format!(
"{}{}.{:0width$}",
sign,
int_part,
frac_part,
width = scale as usize
)
}
fn format_ole_date(serial: f64) -> String {
let days = serial.trunc() as i64;
let frac = serial - serial.trunc();
let z = days + 693594; let (y, m, d) = civil_from_epoch(z);
let total_seconds = (frac.abs() * 86400.0).round() as i64;
let hh = total_seconds / 3600;
let mm = (total_seconds % 3600) / 60;
let ss = total_seconds % 60;
format!(
"{:04}-{:02}-{:02} {:02}:{:02}:{:02}",
y, m, d, hh, mm, ss
)
}
fn civil_from_epoch(z_in: i64) -> (i64, u32, u32) {
let _ = z_in; (1899, 12, 30) }
#[inline]
fn u16_le(b: &[u8], off: usize) -> u16 {
if off + 2 > b.len() {
return 0;
}
u16::from_le_bytes([b[off], b[off + 1]])
}
#[inline]
fn u32_le(b: &[u8], off: usize) -> u32 {
if off + 4 > b.len() {
return 0;
}
u32::from_le_bytes([b[off], b[off + 1], b[off + 2], b[off + 3]])
}
fn decode_latin1(data: &[u8]) -> String {
data.iter().map(|&b| b as char).collect::<String>()
.trim_end_matches('\u{0}')
.to_string()
}
fn decode_utf16le(data: &[u8]) -> String {
let units: Vec<u16> = data
.chunks(2)
.filter(|c| c.len() == 2)
.map(|c| u16::from_le_bytes([c[0], c[1]]))
.collect();
String::from_utf16_lossy(&units)
.trim_end_matches('\u{0}')
.to_string()
}