use io::ErrorKind::UnexpectedEof;
use std::collections::HashMap;
use std::io::{self, BufRead, Write};
use std::str;
pub type Result<T> = core::result::Result<T, Error>;
pub const TERMINATOR: [u8; 1] = [0];
const HDT_HEADER: &[u8] = b"$HDT";
#[allow(missing_docs)]
#[repr(u8)]
#[derive(Debug, Default, PartialEq, Eq, Clone, Copy)]
pub enum ControlType {
#[default]
Unknown = 0,
Global = 1,
Header = 2,
Dictionary = 3,
Triples = 4,
Index = 5,
}
impl TryFrom<u8> for ControlType {
type Error = ControlInfoReadErrorKind;
fn try_from(original: u8) -> core::result::Result<Self, Self::Error> {
match original {
0 => Ok(ControlType::Unknown),
1 => Ok(ControlType::Global),
2 => Ok(ControlType::Header),
3 => Ok(ControlType::Dictionary),
4 => Ok(ControlType::Triples),
5 => Ok(ControlType::Index),
_ => Err(ControlInfoReadErrorKind::InvalidControlType(original)),
}
}
}
#[derive(Debug, Default, Clone, PartialEq, Eq)]
pub struct ControlInfo {
pub control_type: ControlType,
pub format: String,
pub properties: HashMap<String, String>,
}
#[derive(thiserror::Error, Debug)]
#[error("failed to read HDT control info")]
pub struct Error(#[from] ControlInfoReadErrorKind);
#[derive(thiserror::Error, Debug)]
pub enum ControlInfoReadErrorKind {
#[error("IO error")]
Io(#[from] std::io::Error),
#[error("chunk {0:?} '{1}' does not equal the HDT cookie '$HDT'")]
HdtCookie([u8; 4], String),
#[error("invalid separator while reading format")]
InvalidSeparator,
#[error("invalid CRC16-ANSI checksum")]
InvalidChecksum,
#[error("invalid UTF8")]
Utf8(#[from] std::string::FromUtf8Error),
#[error("invalid control type '{0}'")]
InvalidControlType(u8),
}
const HDT_CONTAINER: &str = "<http://purl.org/HDT/hdt#HDTv1>";
const DICTIONARY_FOUR: &str = "<http://purl.org/HDT/hdt#dictionaryFour>";
const TRIPLES_BITMAP: &str = "<http://purl.org/HDT/hdt#triplesBitmap>";
impl ControlInfo {
pub fn global() -> ControlInfo {
let mut properties = HashMap::<String, String>::new();
properties.insert("Software".to_owned(), "hdt_rs".to_owned());
ControlInfo { control_type: ControlType::Global, format: HDT_CONTAINER.to_owned(), properties }
}
pub fn header(length: usize) -> ControlInfo {
let mut properties = HashMap::<String, String>::new();
properties.insert("length".to_owned(), length.to_string());
ControlInfo { control_type: ControlType::Header, format: "ntriples".to_owned(), properties }
}
pub fn four_sect_dict() -> ControlInfo {
ControlInfo {
control_type: ControlType::Dictionary,
format: DICTIONARY_FOUR.to_owned(),
..Default::default()
}
}
pub fn bitmap_triples(order: u32, num_triples: u32) -> ControlInfo {
let mut properties = HashMap::<String, String>::new();
properties.insert("order".to_owned(), order.to_string());
properties.insert("numTriples".to_owned(), num_triples.to_string());
ControlInfo { control_type: ControlType::Triples, format: TRIPLES_BITMAP.to_owned(), properties }
}
pub fn read<R: BufRead>(reader: &mut R) -> Result<Self> {
Ok(Self::read_kind(reader)?)
}
fn read_kind<R: BufRead>(reader: &mut R) -> core::result::Result<Self, ControlInfoReadErrorKind> {
use ControlInfoReadErrorKind::*;
let crc = crc::Crc::<u16>::new(&crc::CRC_16_ARC);
let mut digest = crc.digest();
let mut hdt_cookie: [u8; 4] = [0; 4];
reader.read_exact(&mut hdt_cookie)?;
if &hdt_cookie != b"$HDT" {
return Err(HdtCookie(hdt_cookie, String::from_utf8_lossy(&hdt_cookie).to_string()));
}
digest.update(&hdt_cookie);
let mut control_type: [u8; 1] = [0; 1];
reader.read_exact(&mut control_type)?;
digest.update(&control_type);
let control_type = ControlType::try_from(control_type[0])?;
let mut format = Vec::new();
reader.read_until(0x00, &mut format)?;
digest.update(&format);
if format.pop() != Some(0x00) {
return Err(InvalidSeparator);
}
let format = String::from_utf8(format)?;
let mut prop_str = Vec::new();
reader.read_until(0x00, &mut prop_str)?;
digest.update(&prop_str);
if prop_str.pop() != Some(0x00) {
return Err(std::io::Error::new(UnexpectedEof, "reading the properties").into());
}
let prop_str = String::from_utf8(prop_str)?;
let mut properties = HashMap::new();
for item in prop_str.split(';') {
if let Some(index) = item.find('=') {
let (key, val) = item.split_at(index);
properties.insert(String::from(key), String::from(&val[1..]));
}
}
let mut crc_code = [0_u8; 2];
reader.read_exact(&mut crc_code)?;
let crc_code: u16 = u16::from_le_bytes(crc_code);
if digest.finalize() != crc_code {
return Err(InvalidChecksum);
}
Ok(ControlInfo { control_type, format, properties })
}
pub fn write(&self, write: &mut impl Write) -> Result<()> {
Ok(self.write_kind(write)?)
}
fn write_kind(&self, dest_writer: &mut impl Write) -> core::result::Result<(), ControlInfoReadErrorKind> {
let crc = crc::Crc::<u16>::new(&crc::CRC_16_ARC);
let mut hasher = crc.digest();
dest_writer.write_all(HDT_HEADER)?;
hasher.update(HDT_HEADER);
let type_: [u8; 1] = [self.control_type as u8];
dest_writer.write_all(&type_)?;
hasher.update(&type_);
let format = self.format.as_bytes();
dest_writer.write_all(format)?;
hasher.update(format);
dest_writer.write_all(&TERMINATOR)?;
hasher.update(&TERMINATOR);
let mut properties_string = String::new();
for (key, value) in &self.properties {
properties_string.push_str(key);
properties_string.push('=');
properties_string.push_str(value);
properties_string.push(';');
}
dest_writer.write_all(properties_string.as_bytes())?;
hasher.update(properties_string.as_bytes());
dest_writer.write_all(&TERMINATOR)?;
hasher.update(&TERMINATOR);
let checksum = hasher.finalize();
dest_writer.write_all(&checksum.to_le_bytes())?;
Ok(())
}
pub fn get(&self, key: &str) -> Option<String> {
self.properties.get(key).cloned()
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::tests::init;
use std::io::BufReader;
#[test]
fn read_info() -> color_eyre::Result<()> {
init();
let info = b"$HDT\x01<http://purl.org/HDT/hdt#HDTv1>\x00\x00\x76\x35";
let mut reader = BufReader::new(&info[..]);
let info = ControlInfo::read(&mut reader)?;
assert_eq!(info.control_type, ControlType::Global);
assert_eq!(info.format, "<http://purl.org/HDT/hdt#HDTv1>");
assert!(info.properties.is_empty());
Ok(())
}
#[test]
fn write_info() -> color_eyre::Result<()> {
init();
let info = ControlInfo::global();
let mut buffer = Vec::new();
assert!(info.write(&mut buffer).is_ok());
let expected = b"$HDT\x01<http://purl.org/HDT/hdt#HDTv1>\x00Software=hdt_rs;\x00\x52\x22";
assert_eq!(
buffer,
expected,
"buffer {} expected {}",
String::from_utf8_lossy(&buffer),
String::from_utf8_lossy(expected)
);
let mut reader = BufReader::new(&expected[..]);
let info2 = ControlInfo::read(&mut reader)?;
assert_eq!(info2.control_type, ControlType::Global);
assert_eq!(info2.format, "<http://purl.org/HDT/hdt#HDTv1>");
assert_eq!(info2.properties.get("Software").unwrap(), "hdt_rs");
assert_eq!(info, info2);
Ok(())
}
}