use std::io;
use serde::{Deserialize, Serialize};
use super::core::Dafsa;
const MAX_RAT_LEN: usize = i8::MAX as usize;
const FORMAT_TAG: &str = "tilezz-rat-dafsa";
const FORMAT_VERSION: u32 = 1;
pub const JSON_SCHEMA_DOC: &str = include_str!("schemas/rat_schema.txt");
#[derive(Debug, Clone)]
pub struct RatDafsa {
inner: Dafsa,
}
impl RatDafsa {
pub fn from_rats<I, S>(rats: I) -> Self
where
I: IntoIterator<Item = S>,
S: AsRef<[i8]>,
{
let mut prefixed: Vec<Vec<i8>> = rats
.into_iter()
.map(|r| {
let r = r.as_ref();
assert!(
r.len() <= MAX_RAT_LEN,
"RatDafsa: rat length {} exceeds the single-byte length-prefix limit ({})",
r.len(),
MAX_RAT_LEN
);
let mut v = Vec::with_capacity(r.len() + 1);
v.push(r.len() as i8);
v.extend_from_slice(r);
v
})
.collect();
prefixed.sort_unstable();
prefixed.dedup();
let inner = Dafsa::from_seqs(prefixed.iter().map(|v| v.as_slice()));
Self { inner }
}
pub fn from_sorted_unique_rats<I, S>(rats: I) -> Self
where
I: IntoIterator<Item = S>,
S: AsRef<[i8]>,
{
let prefixed = rats.into_iter().map(|r| {
let r = r.as_ref();
assert!(
r.len() <= MAX_RAT_LEN,
"RatDafsa: rat length {} exceeds the single-byte length-prefix limit ({})",
r.len(),
MAX_RAT_LEN
);
let mut v = Vec::with_capacity(r.len() + 1);
v.push(r.len() as i8);
v.extend_from_slice(r);
v
});
let inner = Dafsa::from_seqs(prefixed);
Self { inner }
}
pub(crate) fn inner(&self) -> &Dafsa {
&self.inner
}
pub fn len(&self) -> usize {
self.inner.len()
}
pub fn is_empty(&self) -> bool {
self.inner.is_empty()
}
pub fn get(&self, i: usize) -> Option<Vec<i8>> {
self.inner.get(i).map(strip_prefix)
}
pub fn index_of<S: AsRef<[i8]>>(&self, rat: S) -> Option<u64> {
let prefixed = prefix(rat.as_ref());
self.inner.lex_rank_of(&prefixed)
}
pub fn contains<S: AsRef<[i8]>>(&self, rat: S) -> bool {
let prefixed = prefix(rat.as_ref());
self.inner.contains(&prefixed)
}
pub fn iter(&self) -> impl Iterator<Item = Vec<i8>> + '_ {
self.inner.iter().map(strip_prefix)
}
pub fn to_json_string(&self) -> String {
let inner = self.inner.to_json_string();
let form = RatDafsaSerForm {
format: FORMAT_TAG.to_string(),
version: FORMAT_VERSION,
inner_format: "tilezz-dafsa".to_string(),
note: NOTE.to_string(),
dafsa: serde_json::from_str(&inner).expect("inner Dafsa JSON is always valid"),
};
serde_json::to_string(&form).expect("RatDafsaSerForm always serializes")
}
pub fn from_json_str(s: &str) -> io::Result<Self> {
let form: RatDafsaSerForm =
serde_json::from_str(s).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
Self::from_form(form)
}
pub fn write_json<W: io::Write>(&self, w: W) -> io::Result<()> {
let s = self.to_json_string();
let mut w = w;
w.write_all(s.as_bytes())
}
pub fn read_json<R: io::Read>(r: R) -> io::Result<Self> {
let form: RatDafsaSerForm = serde_json::from_reader(r)
.map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
Self::from_form(form)
}
pub fn write_json_gz<W: io::Write>(&self, w: W) -> io::Result<()> {
let mut enc = flate2::write::GzEncoder::new(w, flate2::Compression::default());
self.write_json(&mut enc)?;
enc.finish()?;
Ok(())
}
pub fn read_json_gz<R: io::Read>(r: R) -> io::Result<Self> {
let dec = flate2::read::GzDecoder::new(r);
Self::read_json(dec)
}
fn from_form(form: RatDafsaSerForm) -> io::Result<Self> {
if form.format != FORMAT_TAG {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
format!(
"RatDafsa: bad format tag (expected '{}', got '{}')",
FORMAT_TAG, form.format
),
));
}
if form.version != FORMAT_VERSION {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
format!(
"RatDafsa: unsupported version (expected {}, got {})",
FORMAT_VERSION, form.version
),
));
}
let dafsa_json = serde_json::to_string(&form.dafsa)
.map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
let inner = Dafsa::from_json_str(&dafsa_json)?;
Ok(Self { inner })
}
}
const NOTE: &str = "Each accepted sequence in the inner DAFSA is the angle sequence of a rat \
prefixed with its length: stored = [len(rat), rat[0], rat[1], ...]. \
Strip the first byte to recover the rat.";
#[derive(Serialize, Deserialize)]
struct RatDafsaSerForm {
format: String,
version: u32,
inner_format: String,
note: String,
dafsa: serde_json::Value,
}
fn prefix(rat: &[i8]) -> Vec<i8> {
assert!(
rat.len() <= MAX_RAT_LEN,
"RatDafsa: rat length {} exceeds the single-byte length-prefix limit ({})",
rat.len(),
MAX_RAT_LEN
);
let mut v = Vec::with_capacity(rat.len() + 1);
v.push(rat.len() as i8);
v.extend_from_slice(rat);
v
}
fn strip_prefix(mut prefixed: Vec<i8>) -> Vec<i8> {
if !prefixed.is_empty() {
prefixed.remove(0);
}
prefixed
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn rats_order_and_roundtrip() {
let input: Vec<Vec<i8>> = vec![
vec![1, 2, 3],
vec![-1],
vec![-2, 1, 1],
vec![2],
vec![-2],
vec![1, 2],
vec![-1, 0, 1],
vec![0],
];
let mut expected = input.clone();
expected.sort_by(|a, b| a.len().cmp(&b.len()).then_with(|| a.cmp(b)));
let rd = RatDafsa::from_rats(input.iter().map(|v| v.as_slice()));
assert_eq!(rd.len(), expected.len());
let iter_out: Vec<Vec<i8>> = rd.iter().collect();
assert_eq!(iter_out, expected);
for (i, e) in expected.iter().enumerate() {
assert_eq!(rd.get(i).as_ref(), Some(e));
}
assert_eq!(rd.get(expected.len()), None);
for (i, e) in expected.iter().enumerate() {
assert_eq!(rd.index_of(e.as_slice()), Some(i as u64));
}
assert_eq!(rd.index_of([42i8, 42].as_slice()), None);
for e in &expected {
assert!(rd.contains(e.as_slice()));
}
assert!(!rd.contains([42i8].as_slice()));
let mut buf: Vec<u8> = Vec::new();
rd.write_json_gz(&mut buf).expect("write_json_gz");
assert!(buf.starts_with(&[0x1f, 0x8b]), "missing gzip magic");
let restored = RatDafsa::read_json_gz(&buf[..]).expect("read_json_gz");
let restored_iter: Vec<Vec<i8>> = restored.iter().collect();
assert_eq!(restored_iter, expected);
for (i, e) in expected.iter().enumerate() {
assert_eq!(restored.get(i).as_ref(), Some(e));
assert_eq!(restored.index_of(e.as_slice()), Some(i as u64));
}
}
#[test]
fn from_sorted_unique_matches_from_rats() {
let input: Vec<Vec<i8>> = vec![
vec![1, 2, 3],
vec![-1],
vec![-2, 1, 1],
vec![2],
vec![-2],
vec![1, 2],
vec![-1, 0, 1],
vec![0],
];
let buffered = RatDafsa::from_rats(input.iter().map(|v| v.as_slice()));
let mut sorted = input.clone();
sorted.sort_by(|a, b| a.len().cmp(&b.len()).then_with(|| a.cmp(b)));
sorted.dedup();
let streamed = RatDafsa::from_sorted_unique_rats(sorted.iter().map(|v| v.as_slice()));
assert_eq!(streamed.len(), buffered.len());
assert_eq!(
streamed.iter().collect::<Vec<_>>(),
buffered.iter().collect::<Vec<_>>()
);
for i in 0..buffered.len() {
assert_eq!(streamed.get(i), buffered.get(i));
}
for r in &sorted {
assert_eq!(
streamed.index_of(r.as_slice()),
buffered.index_of(r.as_slice())
);
}
}
#[test]
#[should_panic]
#[cfg(debug_assertions)]
fn from_sorted_unique_rejects_out_of_order_debug() {
let bad: Vec<&[i8]> = vec![&[1, 2][..], &[1, 1][..]]; let _ = RatDafsa::from_sorted_unique_rats(bad.iter().copied());
}
#[test]
fn dedups_duplicates() {
let input: Vec<Vec<i8>> = vec![vec![1, 2], vec![1, 2], vec![3], vec![1, 2]];
let rd = RatDafsa::from_rats(input.iter().map(|v| v.as_slice()));
assert_eq!(rd.len(), 2);
assert_eq!(rd.get(0).as_deref(), Some(&[3i8][..]));
assert_eq!(rd.get(1).as_deref(), Some(&[1i8, 2][..]));
}
#[test]
fn format_tag_is_distinct() {
let rd = RatDafsa::from_rats([[1i8, 2].as_slice(), [3i8].as_slice()].iter().copied());
let json = rd.to_json_string();
assert!(json.contains("\"tilezz-rat-dafsa\""));
assert!(
json.contains("length-prefixed") || json.contains("length"),
"missing length-prefix note in serialized form"
);
assert!(
Dafsa::from_json_str(&json).is_err(),
"Dafsa reader silently accepted a RatDafsa document"
);
}
}