#![allow(missing_docs)]
use crate::dict_sect_pfc;
use crate::triples::Id;
use crate::{ControlInfo, DictSectPFC};
use std::io::BufRead;
#[cfg(not(any(target_arch = "wasm32", target_arch = "wasm64")))]
use std::thread::JoinHandle;
use thiserror::Error;
pub type Result<T> = core::result::Result<T, Error>;
#[derive(Debug, Clone, Copy)]
pub enum IdKind {
Subject,
Predicate,
Object,
}
impl IdKind {
pub const KINDS: [IdKind; 3] = [IdKind::Subject, IdKind::Predicate, IdKind::Object];
}
#[cfg_attr(test, derive(PartialEq))]
#[derive(Debug)]
pub struct FourSectDict {
pub shared: DictSectPFC,
pub subjects: DictSectPFC,
pub predicates: DictSectPFC,
pub objects: DictSectPFC,
}
#[derive(Debug)]
pub enum SectKind {
Shared,
Subject,
Predicate,
Object,
}
#[derive(Error, Debug)]
#[error("four sect dict error id_to_string({id},IdKind::{id_kind:?}) in the {sect_kind:?} section, caused by {e}")]
pub struct ExtractError {
#[source]
e: dict_sect_pfc::ExtractError,
id: Id,
id_kind: IdKind,
sect_kind: SectKind,
}
#[derive(Error, Debug)]
#[error("four sect dict section error in the {sect_kind:?} section")]
pub struct DictSectError {
#[source]
e: dict_sect_pfc::Error,
sect_kind: SectKind,
}
#[derive(Error, Debug)]
pub enum Error {
#[error("failed to read FourSectDict control info")]
ControlInfo(#[from] crate::containers::control_info::Error),
#[error("failed to read FourSectDict section")]
DictSect(#[from] DictSectError),
#[error("failed to read FourSectDict: {0}")]
Other(String),
}
#[derive(Debug, PartialEq, Eq, Hash)]
pub struct EncodedTripleId {
pub subject: Id,
pub predicate: Id,
pub object: Id,
}
impl FourSectDict {
pub fn id_to_string(&self, id: Id, id_kind: IdKind) -> core::result::Result<String, ExtractError> {
use SectKind::*;
let shared_size = self.shared.num_strings() as Id;
let d = id.saturating_sub(shared_size);
match id_kind {
IdKind::Subject => {
if id <= shared_size {
self.shared.extract(id).map_err(|e| ExtractError { e, id, id_kind, sect_kind: Shared })
} else {
self.subjects.extract(d).map_err(|e| ExtractError { e, id, id_kind, sect_kind: Subject })
}
}
IdKind::Predicate => {
self.predicates.extract(id).map_err(|e| ExtractError { e, id, id_kind, sect_kind: Predicate })
}
IdKind::Object => {
if id <= shared_size {
self.shared.extract(id).map_err(|e| ExtractError { e, id, id_kind, sect_kind: Shared })
} else {
self.objects.extract(d).map_err(|e| ExtractError { e, id, id_kind, sect_kind: Object })
}
}
}
}
pub fn string_to_id(&self, s: &str, id_kind: IdKind) -> Id {
let shared_size = self.shared.num_strings();
match id_kind {
IdKind::Subject => {
let mut id = self.shared.string_to_id(s);
if id == 0 {
id = self.subjects.string_to_id(s);
if id > 0 {
id += shared_size as Id;
}
}
id
}
IdKind::Predicate => self.predicates.string_to_id(s),
IdKind::Object => {
let mut id = self.shared.string_to_id(s);
if id == 0 {
id = self.objects.string_to_id(s);
if id > 0 {
id += shared_size as Id;
}
}
id
}
}
}
pub fn read<R: BufRead>(reader: &mut R) -> Result<UnvalidatedFourSectDict> {
use SectKind::*;
let dict_ci = ControlInfo::read(reader)?;
if dict_ci.format != "<http://purl.org/HDT/hdt#dictionaryFour>" {
return Err(Error::Other("Implementation only supports four section dictionaries".to_owned()));
}
let mut f = |sect_kind| DictSectPFC::read(reader).map_err(|e| DictSectError { e, sect_kind });
Ok(UnvalidatedFourSectDict([f(Shared)?, f(Subject)?, f(Predicate)?, f(Object)?]))
}
pub fn write(&self, write: &mut impl std::io::Write) -> Result<()> {
use SectKind::*;
ControlInfo::four_sect_dict().write(write)?;
self.shared.write(write).map_err(|e| DictSectError { e, sect_kind: Shared })?;
self.subjects.write(write).map_err(|e| DictSectError { e, sect_kind: Subject })?;
self.predicates.write(write).map_err(|e| DictSectError { e, sect_kind: Predicate })?;
self.objects.write(write).map_err(|e| DictSectError { e, sect_kind: Object })?;
Ok(())
}
pub fn size_in_bytes(&self) -> usize {
self.shared.size_in_bytes()
+ self.subjects.size_in_bytes()
+ self.predicates.size_in_bytes()
+ self.objects.size_in_bytes()
}
}
#[cfg(not(any(target_arch = "wasm32", target_arch = "wasm64")))]
pub struct UnvalidatedFourSectDict([JoinHandle<dict_sect_pfc::Result<DictSectPFC>>; 4]);
#[cfg(any(target_arch = "wasm32", target_arch = "wasm64"))]
pub struct UnvalidatedFourSectDict([DictSectPFC; 4]);
impl UnvalidatedFourSectDict {
#[cfg(not(any(target_arch = "wasm32", target_arch = "wasm64")))]
pub fn validate(self) -> Result<FourSectDict> {
use SectKind::*;
let r: Vec<_> = [Shared, Subject, Predicate, Object]
.into_iter()
.zip(self.0)
.map(|(sect_kind, handle)| handle.join().unwrap().map_err(|e| DictSectError { e, sect_kind }))
.collect::<std::result::Result<Vec<DictSectPFC>, DictSectError>>()?;
let [shared, subjects, predicates, objects]: [DictSectPFC; 4] = r.try_into().unwrap();
Ok(FourSectDict { shared, subjects, predicates, objects })
}
#[cfg(any(target_arch = "wasm32", target_arch = "wasm64"))]
pub fn validate(self) -> Result<FourSectDict> {
let [shared, subjects, predicates, objects] = self.0;
Ok(FourSectDict { shared, subjects, predicates, objects })
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::header::Header;
use crate::tests::init;
use fs_err::File;
use pretty_assertions::assert_eq;
use std::io::BufReader;
#[test]
fn read_write_dict() -> color_eyre::Result<()> {
init();
let file = File::open("tests/resources/snikmeta.hdt")?;
let mut reader = BufReader::new(file);
ControlInfo::read(&mut reader)?;
Header::read(&mut reader)?;
let dict = FourSectDict::read(&mut reader)?.validate()?;
assert_eq!(dict.shared.num_strings(), 43, "wrong number of strings in the shared section");
assert_eq!(dict.subjects.num_strings(), 6, "wrong number of strings in the subject section");
assert_eq!(dict.predicates.num_strings(), 23, "wrong number of strings in the predicates section");
assert_eq!(dict.objects.num_strings(), 133, "wrong number of strings in the objects section");
assert_eq!(dict.string_to_id("_:b1", IdKind::Subject), 1);
assert_eq!("http://www.snik.eu/ontology/meta/uses", dict.id_to_string(43, IdKind::Subject)?);
assert_eq!("http://www.snik.eu/ontology/meta/Chapter", dict.id_to_string(3, IdKind::Subject)?);
assert_eq!("http://www.snik.eu/ontology/meta/DataSetType", dict.id_to_string(5, IdKind::Subject)?);
for id in 1..dict.shared.num_strings() {
let s = dict.id_to_string(id, IdKind::Subject)?;
let back = dict.string_to_id(&s, IdKind::Subject);
assert_eq!(id, back, "shared id {} -> subject {} -> id {}", id, s, back);
let s = dict.id_to_string(id, IdKind::Object)?;
let back = dict.string_to_id(&s, IdKind::Object);
assert_eq!(id, back, "shared id {} -> object {} -> id {}", id, s, back);
}
for (sect, kind, name, offset) in [
(&dict.subjects, IdKind::Subject, "subject", dict.shared.num_strings()),
(&dict.objects, IdKind::Object, "object", dict.shared.num_strings()),
(&dict.predicates, IdKind::Predicate, "predicate", 0),
] {
for id in offset + 1..offset + sect.num_strings() {
let s = dict.id_to_string(id, kind)?;
let back = dict.string_to_id(&s, kind);
assert_eq!(id, back, "{} id {} -> {} {} -> id {}", name, id, name, s, back);
}
}
let mut buf = Vec::new();
dict.write(&mut buf)?;
let dict2 = FourSectDict::read(&mut std::io::Cursor::new(buf))?.validate()?;
assert_eq!(dict, dict2);
Ok(())
}
}