use std::fs::File;
use std::io::{Read, Seek, SeekFrom};
use std::path::Path;
use crate::{Result, Triple};
const MAGIC_HEADER: &[u8] = b"SYNAPSEDB";
const FILE_VERSION: u32 = 2;
const FILE_HEADER_LENGTH: u64 = 64;
#[derive(Debug, Clone)]
struct SectionPointer {
offset: u32,
length: u32,
}
#[derive(Debug)]
struct FileLayout {
dictionary: SectionPointer,
triples: SectionPointer,
#[allow(dead_code)] indexes: SectionPointer,
properties: SectionPointer,
}
pub struct LegacyDatabase {
file: File,
layout: FileLayout,
dictionary: Vec<String>,
}
impl LegacyDatabase {
pub fn open<P: AsRef<Path>>(path: P) -> Result<Self> {
let mut file = File::open(path)?;
let layout = Self::read_header(&mut file)?;
let dictionary = Self::read_dictionary(&mut file, &layout.dictionary)?;
Ok(Self {
file,
layout,
dictionary,
})
}
fn read_header(file: &mut File) -> Result<FileLayout> {
let mut header = vec![0u8; FILE_HEADER_LENGTH as usize];
file.read_exact(&mut header)?;
if &header[..MAGIC_HEADER.len()] != MAGIC_HEADER {
return Err(crate::Error::Other(
"Invalid .synapsedb file: magic header mismatch".into(),
));
}
let version = u32::from_le_bytes([
header[MAGIC_HEADER.len()],
header[MAGIC_HEADER.len() + 1],
header[MAGIC_HEADER.len() + 2],
header[MAGIC_HEADER.len() + 3],
]);
if version != FILE_VERSION {
return Err(crate::Error::Other(format!(
"Unsupported file version: {} (expected {})",
version, FILE_VERSION
)));
}
let read_section = |offset: usize| -> SectionPointer {
let base = 16 + offset * 8;
SectionPointer {
offset: u32::from_le_bytes([
header[base],
header[base + 1],
header[base + 2],
header[base + 3],
]),
length: u32::from_le_bytes([
header[base + 4],
header[base + 5],
header[base + 6],
header[base + 7],
]),
}
};
Ok(FileLayout {
dictionary: read_section(0),
triples: read_section(1),
indexes: read_section(2),
properties: read_section(3),
})
}
fn read_dictionary(file: &mut File, section: &SectionPointer) -> Result<Vec<String>> {
if section.length == 0 {
return Ok(Vec::new());
}
file.seek(SeekFrom::Start(section.offset as u64))?;
let mut data = vec![0u8; section.length as usize];
file.read_exact(&mut data)?;
if data.len() < 4 {
return Err(crate::Error::Other("Invalid dictionary section".into()));
}
let count = u32::from_le_bytes([data[0], data[1], data[2], data[3]]) as usize;
let mut dictionary = Vec::with_capacity(count);
let mut pos = 4;
for _ in 0..count {
if pos + 4 > data.len() {
return Err(crate::Error::Other("Truncated dictionary entry".into()));
}
let str_len =
u32::from_le_bytes([data[pos], data[pos + 1], data[pos + 2], data[pos + 3]])
as usize;
pos += 4;
if pos + str_len > data.len() {
return Err(crate::Error::Other("Truncated dictionary string".into()));
}
let s = String::from_utf8(data[pos..pos + str_len].to_vec())
.map_err(|e| crate::Error::Other(format!("Invalid UTF-8 in dictionary: {}", e)))?;
dictionary.push(s);
pos += str_len;
}
Ok(dictionary)
}
pub fn dictionary(&self) -> &[String] {
&self.dictionary
}
pub fn read_triples(&mut self) -> Result<Vec<Triple>> {
if self.layout.triples.length == 0 {
return Ok(Vec::new());
}
self.file
.seek(SeekFrom::Start(self.layout.triples.offset as u64))?;
let mut data = vec![0u8; self.layout.triples.length as usize];
self.file.read_exact(&mut data)?;
if data.len() < 4 {
return Err(crate::Error::Other("Invalid triples section".into()));
}
let count = u32::from_le_bytes([data[0], data[1], data[2], data[3]]) as usize;
let mut triples = Vec::with_capacity(count);
let mut pos = 4;
for _ in 0..count {
if pos + 24 > data.len() {
return Err(crate::Error::Other("Truncated triple entry".into()));
}
let subject_id = u64::from_le_bytes([
data[pos],
data[pos + 1],
data[pos + 2],
data[pos + 3],
data[pos + 4],
data[pos + 5],
data[pos + 6],
data[pos + 7],
]);
let predicate_id = u64::from_le_bytes([
data[pos + 8],
data[pos + 9],
data[pos + 10],
data[pos + 11],
data[pos + 12],
data[pos + 13],
data[pos + 14],
data[pos + 15],
]);
let object_id = u64::from_le_bytes([
data[pos + 16],
data[pos + 17],
data[pos + 18],
data[pos + 19],
data[pos + 20],
data[pos + 21],
data[pos + 22],
data[pos + 23],
]);
triples.push(Triple {
subject_id,
predicate_id,
object_id,
});
pos += 24;
}
Ok(triples)
}
pub fn read_properties(&mut self) -> Result<Vec<u8>> {
if self.layout.properties.length == 0 {
return Ok(Vec::new());
}
self.file
.seek(SeekFrom::Start(self.layout.properties.offset as u64))?;
let mut data = vec![0u8; self.layout.properties.length as usize];
self.file.read_exact(&mut data)?;
Ok(data)
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::io::Write;
use tempfile::NamedTempFile;
fn create_test_synapsedb() -> NamedTempFile {
let mut file = NamedTempFile::new().unwrap();
let mut header = vec![0u8; FILE_HEADER_LENGTH as usize];
header[..MAGIC_HEADER.len()].copy_from_slice(MAGIC_HEADER);
header[MAGIC_HEADER.len()..MAGIC_HEADER.len() + 4]
.copy_from_slice(&FILE_VERSION.to_le_bytes());
file.write_all(&header).unwrap();
file.flush().unwrap();
file
}
#[test]
fn test_open_legacy_database() {
let file = create_test_synapsedb();
let result = LegacyDatabase::open(file.path());
assert!(result.is_ok());
}
}