use std::collections::HashMap;
use std::io;
#[derive(Debug, Clone)]
pub struct Dafsa {
states: Vec<StateInfo>,
edges: Vec<Edge>,
pub(crate) counts: Vec<usize>,
}
#[derive(Debug, Clone, Copy)]
struct StateInfo {
is_accept: bool,
edges_start: u32,
}
#[derive(Debug, Clone, Copy)]
struct Edge {
label: i8,
target: u32,
}
impl Dafsa {
pub fn builder() -> DafsaBuilder {
DafsaBuilder::new()
}
pub fn from_seqs<I, S>(seqs: I) -> Self
where
I: IntoIterator<Item = S>,
S: AsRef<[i8]>,
{
let mut builder = DafsaBuilder::new();
for s in seqs {
builder.insert(s.as_ref());
}
builder.finish()
}
pub fn iter(&self) -> DafsaIter<'_> {
DafsaIter::new(self)
}
pub fn len(&self) -> usize {
self.counts.first().copied().unwrap_or(0)
}
pub fn is_empty(&self) -> bool {
self.len() == 0
}
pub fn contains<S: AsRef<[i8]>>(&self, seq: S) -> bool {
let seq = seq.as_ref();
let mut s = 0u32;
for &c in seq {
match self.find_edge(s, c) {
Some(t) => s = t,
None => return false,
}
}
self.states[s as usize].is_accept
}
pub fn get(&self, i: usize) -> Option<Vec<i8>> {
if i >= self.len() {
return None;
}
let mut out = Vec::new();
let mut s = 0u32;
let mut remaining = i;
loop {
if self.states[s as usize].is_accept {
if remaining == 0 {
return Some(out);
}
remaining -= 1;
}
let edge_range = self.edge_range(s);
for &Edge { label, target } in &self.edges[edge_range] {
let n = self.counts[target as usize];
if remaining < n {
out.push(label);
s = target;
break;
}
remaining -= n;
}
}
}
pub(crate) fn raw_n_states(&self) -> usize {
self.states.len()
}
pub(crate) fn raw_n_edges(&self) -> usize {
self.edges.len()
}
pub(crate) fn raw_edges_start(&self) -> Vec<u32> {
self.states.iter().map(|s| s.edges_start).collect()
}
pub(crate) fn raw_is_accept(&self) -> Vec<bool> {
self.states.iter().map(|s| s.is_accept).collect()
}
pub(crate) fn raw_labels(&self) -> Vec<i8> {
self.edges.iter().map(|e| e.label).collect()
}
pub(crate) fn raw_targets(&self) -> Vec<u32> {
self.edges.iter().map(|e| e.target).collect()
}
pub(crate) fn raw_counts(&self) -> &[usize] {
&self.counts
}
pub fn lex_rank_of<S: AsRef<[i8]>>(&self, seq: S) -> Option<u64> {
let seq = seq.as_ref();
let mut s = 0u32;
let mut rank: u64 = 0;
for &c in seq {
if self.states[s as usize].is_accept {
rank += 1;
}
let edges = &self.edges[self.edge_range(s)];
let mut next: Option<u32> = None;
for e in edges {
match e.label.cmp(&c) {
std::cmp::Ordering::Less => rank += self.counts[e.target as usize] as u64,
std::cmp::Ordering::Equal => {
next = Some(e.target);
break;
}
std::cmp::Ordering::Greater => return None,
}
}
s = next?;
}
if !self.states[s as usize].is_accept {
return None;
}
Some(rank)
}
pub fn walk_prefix<S: AsRef<[i8]>>(&self, prefix: S) -> Option<DafsaCursor<'_>> {
let prefix = prefix.as_ref();
let mut s = 0u32;
for &c in prefix {
s = self.find_edge(s, c)?;
}
Some(DafsaCursor {
dafsa: self,
state: s,
})
}
fn find_edge(&self, state: u32, label: i8) -> Option<u32> {
let edges = &self.edges[self.edge_range(state)];
edges
.binary_search_by(|e| e.label.cmp(&label))
.ok()
.map(|idx| edges[idx].target)
}
fn edge_range(&self, state: u32) -> std::ops::Range<usize> {
let start = self.states[state as usize].edges_start as usize;
let end = if (state as usize + 1) < self.states.len() {
self.states[state as usize + 1].edges_start as usize
} else {
self.edges.len()
};
start..end
}
}
#[derive(Debug, Clone, Copy)]
pub struct DafsaCursor<'a> {
dafsa: &'a Dafsa,
state: u32,
}
impl DafsaCursor<'_> {
pub fn is_accept(&self) -> bool {
self.dafsa.states[self.state as usize].is_accept
}
pub fn count(&self) -> usize {
self.dafsa.counts[self.state as usize]
}
}
pub struct DafsaIter<'a> {
dafsa: &'a Dafsa,
stack: Vec<DfsFrame>,
current: Vec<i8>,
pending_accept: bool,
}
#[derive(Debug, Clone, Copy)]
struct DfsFrame {
next_edge: u32,
edges_end: u32,
}
impl<'a> DafsaIter<'a> {
fn new(dafsa: &'a Dafsa) -> Self {
let edges_end = if dafsa.states.len() > 1 {
dafsa.states[1].edges_start
} else {
dafsa.edges.len() as u32
};
let stack = vec![DfsFrame {
next_edge: dafsa.states[0].edges_start,
edges_end,
}];
DafsaIter {
dafsa,
stack,
current: Vec::new(),
pending_accept: dafsa.states[0].is_accept,
}
}
}
impl Iterator for DafsaIter<'_> {
type Item = Vec<i8>;
fn next(&mut self) -> Option<Self::Item> {
loop {
if self.pending_accept {
self.pending_accept = false;
return Some(self.current.clone());
}
let frame = self.stack.last_mut()?;
if frame.next_edge < frame.edges_end {
let edge = self.dafsa.edges[frame.next_edge as usize];
frame.next_edge += 1;
self.current.push(edge.label);
let target = edge.target;
let edges_start = self.dafsa.states[target as usize].edges_start;
let edges_end = if (target as usize + 1) < self.dafsa.states.len() {
self.dafsa.states[target as usize + 1].edges_start
} else {
self.dafsa.edges.len() as u32
};
self.stack.push(DfsFrame {
next_edge: edges_start,
edges_end,
});
self.pending_accept = self.dafsa.states[target as usize].is_accept;
continue;
}
self.stack.pop();
self.current.pop();
}
}
}
#[derive(Debug, Clone)]
struct BuildState {
is_accept: bool,
edges: Vec<(i8, u32)>,
}
pub struct DafsaBuilder {
path: Vec<BuildState>,
path_labels: Vec<i8>,
frozen: Vec<BuildState>,
register: HashMap<(Vec<(i8, u32)>, bool), u32>,
last_seq: Vec<i8>,
seen_any: bool,
}
impl DafsaBuilder {
fn new() -> Self {
DafsaBuilder {
path: vec![BuildState {
is_accept: false,
edges: Vec::new(),
}],
path_labels: Vec::new(),
frozen: Vec::new(),
register: HashMap::new(),
last_seq: Vec::new(),
seen_any: false,
}
}
fn freeze(&mut self, state: BuildState) -> u32 {
let key = (state.edges.clone(), state.is_accept);
if let Some(&id) = self.register.get(&key) {
return id;
}
let id = self.frozen.len() as u32;
self.register.insert(key, id);
self.frozen.push(state);
id
}
pub fn insert<S: AsRef<[i8]>>(&mut self, seq: S) {
self.insert_slice(seq.as_ref());
}
fn insert_slice(&mut self, seq: &[i8]) {
if self.seen_any {
debug_assert!(
self.last_seq.as_slice() < seq,
"Dafsa::from_seqs: input must be strictly lex-sorted"
);
}
self.seen_any = true;
let common = self
.last_seq
.iter()
.zip(seq.iter())
.take_while(|(a, b)| a == b)
.count();
while self.path.len() > common + 1 {
let label = self.path_labels.pop().unwrap();
let state = self.path.pop().unwrap();
let frozen_id = self.freeze(state);
self.path.last_mut().unwrap().edges.push((label, frozen_id));
}
for &c in &seq[common..] {
self.path_labels.push(c);
self.path.push(BuildState {
is_accept: false,
edges: Vec::new(),
});
}
self.path.last_mut().unwrap().is_accept = true;
self.last_seq.clear();
self.last_seq.extend_from_slice(seq);
}
pub fn finish(mut self) -> Dafsa {
while self.path.len() > 1 {
let label = self.path_labels.pop().unwrap();
let state = self.path.pop().unwrap();
let frozen_id = self.freeze(state);
self.path.last_mut().unwrap().edges.push((label, frozen_id));
}
let root_state = self.path.pop().unwrap();
let root_id = self.freeze(root_state);
let n = self.frozen.len();
let mut new_id = vec![u32::MAX; n];
let mut order: Vec<u32> = Vec::with_capacity(n);
let mut stack: Vec<u32> = vec![root_id];
while let Some(orig) = stack.pop() {
if new_id[orig as usize] != u32::MAX {
continue;
}
new_id[orig as usize] = order.len() as u32;
order.push(orig);
let s = &self.frozen[orig as usize];
for &(_, child) in s.edges.iter().rev() {
if new_id[child as usize] == u32::MAX {
stack.push(child);
}
}
}
debug_assert_eq!(
order.len(),
n,
"DFS pre-order must visit every reachable state"
);
let mut states = Vec::with_capacity(n);
let mut edges: Vec<Edge> = Vec::new();
for &orig_id in &order {
let s = &self.frozen[orig_id as usize];
states.push(StateInfo {
is_accept: s.is_accept,
edges_start: edges.len() as u32,
});
for &(label, child) in &s.edges {
edges.push(Edge {
label,
target: new_id[child as usize],
});
}
}
let mut counts = vec![0usize; n];
Self::compute_counts(&states, &edges, &mut counts);
Dafsa {
states,
edges,
counts,
}
}
fn compute_counts(states: &[StateInfo], edges: &[Edge], counts: &mut [usize]) {
let n = states.len();
let mut order: Vec<u32> = Vec::with_capacity(n);
let mut visited = vec![false; n];
let mut stack: Vec<(u32, bool)> = vec![(0, false)];
while let Some((s, expanded)) = stack.pop() {
if expanded {
order.push(s);
continue;
}
if visited[s as usize] {
continue;
}
visited[s as usize] = true;
stack.push((s, true));
let start = states[s as usize].edges_start as usize;
let end = if (s as usize + 1) < states.len() {
states[s as usize + 1].edges_start as usize
} else {
edges.len()
};
for edge in &edges[start..end] {
if !visited[edge.target as usize] {
stack.push((edge.target, false));
}
}
}
for s in &order {
let s = *s as usize;
let mut total = if states[s].is_accept { 1 } else { 0 };
let start = states[s].edges_start as usize;
let end = if (s + 1) < states.len() {
states[s + 1].edges_start as usize
} else {
edges.len()
};
for edge in &edges[start..end] {
total += counts[edge.target as usize];
}
counts[s] = total;
}
}
}
const FORMAT_TAG: &str = "tilezz-dafsa";
const FORMAT_VERSION: u32 = 1;
pub const JSON_SCHEMA_DOC: &str = include_str!("schemas/core_schema.txt");
const SCALAR_TAG: &str = "i8";
#[derive(serde::Serialize, serde::Deserialize)]
struct DafsaSerForm {
format: String,
version: u32,
scalar: String,
n_states: usize,
n_edges: usize,
edges_start: Vec<u32>,
labels: Vec<i8>,
targets: Vec<u32>,
counts: Vec<u64>,
}
impl Dafsa {
fn to_ser_form(&self) -> DafsaSerForm {
let n_states = self.states.len();
let n_edges = self.edges.len();
let edges_start: Vec<u32> = self.states.iter().map(|s| s.edges_start).collect();
let mut labels = Vec::with_capacity(n_edges);
let mut targets = Vec::with_capacity(n_edges);
for e in &self.edges {
labels.push(e.label);
targets.push(e.target);
}
DafsaSerForm {
format: FORMAT_TAG.to_string(),
version: FORMAT_VERSION,
scalar: SCALAR_TAG.to_string(),
n_states,
n_edges,
edges_start,
labels,
targets,
counts: self.counts.iter().map(|&c| c as u64).collect(),
}
}
fn from_ser_form(form: DafsaSerForm) -> io::Result<Self> {
if form.format != FORMAT_TAG {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
format!(
"Dafsa: bad format tag (expected '{}', got '{}')",
FORMAT_TAG, form.format
),
));
}
if form.version != FORMAT_VERSION {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
format!(
"Dafsa: unsupported version (expected {}, got {})",
FORMAT_VERSION, form.version
),
));
}
if form.scalar != SCALAR_TAG {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
format!(
"Dafsa: scalar mismatch (file is '{}', expected '{}')",
form.scalar, SCALAR_TAG
),
));
}
let n_states = form.n_states;
let n_edges = form.n_edges;
let len_ok = form.edges_start.len() == n_states
&& form.counts.len() == n_states
&& form.labels.len() == n_edges
&& form.targets.len() == n_edges;
if !len_ok {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
"Dafsa: array length disagrees with n_states/n_edges",
));
}
let counts: Vec<usize> = form.counts.iter().map(|&c| c as usize).collect();
let mut is_accept = vec![false; n_states];
for s in 0..n_states {
let start = form.edges_start[s] as usize;
let end = if s + 1 < n_states {
form.edges_start[s + 1] as usize
} else {
n_edges
};
let children_sum: usize = form.targets[start..end]
.iter()
.map(|&t| counts[t as usize])
.sum();
let diff = counts[s].checked_sub(children_sum).ok_or_else(|| {
io::Error::new(
io::ErrorKind::InvalidData,
format!("Dafsa: counts[{s}] < sum of children counts (corrupt file)"),
)
})?;
match diff {
0 => is_accept[s] = false,
1 => is_accept[s] = true,
_ => {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
format!(
"Dafsa: counts[{s}] disagrees with children-sum by {diff}, expected 0 or 1"
),
));
}
}
}
let states: Vec<StateInfo> = form
.edges_start
.into_iter()
.zip(is_accept)
.map(|(edges_start, is_accept)| StateInfo {
is_accept,
edges_start,
})
.collect();
let edges: Vec<Edge> = form
.labels
.into_iter()
.zip(form.targets)
.map(|(label, target)| Edge { label, target })
.collect();
Ok(Dafsa {
states,
edges,
counts,
})
}
pub fn to_json_string(&self) -> String {
serde_json::to_string(&self.to_ser_form()).expect("DafsaSerForm is always serializable")
}
pub fn to_json_string_pretty(&self) -> String {
serde_json::to_string_pretty(&self.to_ser_form())
.expect("DafsaSerForm is always serializable")
}
pub fn from_json_str(s: &str) -> io::Result<Self> {
let form: DafsaSerForm =
serde_json::from_str(s).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
Self::from_ser_form(form)
}
pub fn write_json<W: io::Write>(&self, w: W) -> io::Result<()> {
serde_json::to_writer(w, &self.to_ser_form()).map_err(io::Error::other)
}
pub fn read_json<R: io::Read>(r: R) -> io::Result<Self> {
let form: DafsaSerForm = serde_json::from_reader(r)
.map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
Self::from_ser_form(form)
}
pub fn write_json_gz<W: io::Write>(&self, w: W) -> io::Result<()> {
let mut enc = flate2::write::GzEncoder::new(w, flate2::Compression::default());
self.write_json(&mut enc)?;
enc.finish()?;
Ok(())
}
pub fn read_json_gz<R: io::Read>(r: R) -> io::Result<Self> {
let dec = flate2::read::GzDecoder::new(r);
Self::read_json(dec)
}
}
#[cfg(test)]
mod tests {
use super::*;
fn build(seqs: &[&[i8]]) -> Dafsa {
Dafsa::from_seqs(seqs.iter().copied())
}
#[test]
fn empty_set() {
let dafsa = build(&[]);
assert_eq!(dafsa.len(), 0);
assert!(dafsa.is_empty());
assert!(!dafsa.contains([1i8, 2, 3].as_slice()));
assert!(dafsa.get(0).is_none());
assert_eq!(dafsa.iter().count(), 0);
}
#[test]
fn empty_sequence_only() {
let empty: &[i8] = &[];
let dafsa = build(&[empty]);
assert_eq!(dafsa.len(), 1);
assert!(dafsa.contains(empty));
assert_eq!(dafsa.get(0), Some(Vec::<i8>::new()));
let out: Vec<Vec<i8>> = dafsa.iter().collect();
assert_eq!(out, vec![Vec::<i8>::new()]);
}
#[test]
fn singleton() {
let seqs: &[&[i8]] = &[&[1, 2, 3]];
let dafsa = build(seqs);
assert_eq!(dafsa.len(), 1);
assert!(dafsa.contains([1i8, 2, 3].as_slice()));
assert!(!dafsa.contains([1i8, 2].as_slice()));
assert!(!dafsa.contains([1i8, 2, 3, 4].as_slice()));
assert_eq!(dafsa.get(0), Some(vec![1, 2, 3]));
assert_eq!(dafsa.iter().collect::<Vec<_>>(), vec![vec![1, 2, 3]]);
}
#[test]
fn roundtrip_small_sorted_set() {
let seqs: Vec<Vec<i8>> = vec![vec![1, 2], vec![1, 2, 3], vec![1, 3], vec![2], vec![2, 1]];
let dafsa = Dafsa::from_seqs(seqs.iter().map(|v| v.as_slice()));
assert_eq!(dafsa.len(), seqs.len());
let out: Vec<Vec<i8>> = dafsa.iter().collect();
assert_eq!(out, seqs);
}
#[test]
fn suffix_sharing_compresses() {
let seqs: Vec<Vec<i8>> = vec![
vec![b'b' as i8, b'a' as i8, b't' as i8],
vec![b'c' as i8, b'a' as i8, b't' as i8],
];
let dafsa = Dafsa::from_seqs(seqs.iter().map(|v| v.as_slice()));
assert!(dafsa.states.len() <= 4, "expected suffix sharing");
assert_eq!(dafsa.iter().collect::<Vec<_>>(), seqs);
assert!(dafsa.contains([b'b' as i8, b'a' as i8, b't' as i8].as_slice()));
assert!(dafsa.contains([b'c' as i8, b'a' as i8, b't' as i8].as_slice()));
assert!(!dafsa.contains([b'a' as i8, b't' as i8].as_slice()));
}
#[test]
fn indexed_get_matches_iter() {
let seqs: Vec<Vec<i8>> = vec![
vec![-3, 1, 2],
vec![-3, 1, 2, 4],
vec![-3, 2],
vec![-2],
vec![-2, 1],
vec![-1, 1, 2],
vec![0],
vec![0, 1],
];
let dafsa = Dafsa::from_seqs(seqs.iter().map(|v| v.as_slice()));
for (i, seq) in seqs.iter().enumerate() {
assert_eq!(dafsa.get(i).as_ref(), Some(seq));
}
assert!(dafsa.get(seqs.len()).is_none());
}
#[test]
fn walk_prefix_classifies_correctly() {
let seqs: Vec<Vec<i8>> = vec![vec![1, 2, 3], vec![1, 2, 3, 4], vec![1, 2, 5], vec![2]];
let dafsa = Dafsa::from_seqs(seqs.iter().map(|v| v.as_slice()));
let c = dafsa.walk_prefix([1i8].as_slice()).unwrap();
assert!(!c.is_accept());
assert_eq!(c.count(), 3);
let c = dafsa.walk_prefix([1i8, 2, 3].as_slice()).unwrap();
assert!(c.is_accept());
assert_eq!(c.count(), 2);
assert!(dafsa.walk_prefix([1i8, 2, 9].as_slice()).is_none());
let c = dafsa.walk_prefix([2i8].as_slice()).unwrap();
assert!(c.is_accept());
assert_eq!(c.count(), 1);
}
#[test]
fn builder_matches_from_seqs() {
let seqs: Vec<Vec<i8>> = vec![
vec![1, 2],
vec![1, 2, 3],
vec![1, 3],
vec![2],
vec![2, 1],
vec![2, 1, 4],
];
let one_shot = Dafsa::from_seqs(seqs.iter().map(|v| v.as_slice()));
let mut b = Dafsa::builder();
for s in &seqs {
b.insert(s.as_slice());
}
let streamed = b.finish();
assert_eq!(one_shot.states.len(), streamed.states.len());
assert_eq!(one_shot.edges.len(), streamed.edges.len());
assert_eq!(streamed.iter().collect::<Vec<_>>(), seqs);
assert_eq!(one_shot.iter().collect::<Vec<_>>(), seqs);
}
#[test]
fn builder_finish_empty() {
let dafsa: Dafsa = Dafsa::builder().finish();
assert!(dafsa.is_empty());
assert_eq!(dafsa.len(), 0);
assert_eq!(dafsa.iter().count(), 0);
}
#[test]
#[cfg(debug_assertions)]
#[should_panic(expected = "strictly lex-sorted")]
fn builder_panics_on_out_of_order() {
let mut b = Dafsa::builder();
b.insert([1i8, 2].as_slice());
b.insert([1i8, 1].as_slice());
}
#[test]
#[cfg(debug_assertions)]
#[should_panic(expected = "strictly lex-sorted")]
fn builder_panics_on_duplicate() {
let mut b = Dafsa::builder();
b.insert([1i8, 2, 3].as_slice());
b.insert([1i8, 2, 3].as_slice());
}
#[test]
fn builder_stream_from_generator() {
let generator =
(0..4i8).flat_map(|a| (a..4).flat_map(move |b| (b..4).map(move |c| vec![a, b, c])));
let materialized: Vec<Vec<i8>> = generator.clone().collect();
let mut b = Dafsa::builder();
for s in generator {
b.insert(&s);
}
let dafsa = b.finish();
assert_eq!(dafsa.len(), materialized.len());
assert_eq!(dafsa.iter().collect::<Vec<_>>(), materialized);
for (i, s) in materialized.iter().enumerate() {
assert!(dafsa.contains(s.as_slice()));
assert_eq!(dafsa.get(i).as_ref(), Some(s));
}
}
#[test]
fn roundtrip_filtered_dense_set() {
let mut seqs: Vec<Vec<i8>> = Vec::new();
for a in 0..4 {
for b in a..4 {
for c in b..4 {
for d in c..4 {
seqs.push(vec![a, b, c, d]);
}
}
}
}
let dafsa = Dafsa::from_seqs(seqs.iter().map(|v| v.as_slice()));
assert_eq!(dafsa.len(), seqs.len());
let out: Vec<Vec<i8>> = dafsa.iter().collect();
assert_eq!(out, seqs);
for (i, seq) in seqs.iter().enumerate() {
assert_eq!(dafsa.get(i).as_ref(), Some(seq));
}
}
fn json_roundtrip_corpus() -> Vec<Vec<i8>> {
vec![
vec![-3, 1, 2],
vec![-3, 1, 2, 4],
vec![-3, 2],
vec![-2],
vec![-2, 1],
vec![-1, 1, 2],
vec![0],
vec![0, 1],
]
}
#[test]
fn json_roundtrip_preserves_behavior() {
let seqs = json_roundtrip_corpus();
let original = Dafsa::from_seqs(seqs.iter().map(|v| v.as_slice()));
let json = original.to_json_string();
let restored: Dafsa = Dafsa::from_json_str(&json).expect("parse succeeds");
assert_eq!(restored.len(), original.len());
assert_eq!(restored.iter().collect::<Vec<_>>(), seqs);
for (i, seq) in seqs.iter().enumerate() {
assert!(restored.contains(seq.as_slice()));
assert_eq!(restored.get(i).as_ref(), Some(seq));
}
}
#[test]
fn json_pretty_roundtrip() {
let seqs = json_roundtrip_corpus();
let original = Dafsa::from_seqs(seqs.iter().map(|v| v.as_slice()));
let json_pretty = original.to_json_string_pretty();
assert!(
json_pretty.contains('\n'),
"pretty form should be multi-line"
);
let restored: Dafsa = Dafsa::from_json_str(&json_pretty).expect("parse succeeds");
assert_eq!(restored.iter().collect::<Vec<_>>(), seqs);
}
#[test]
fn json_roundtrip_empty() {
let original: Dafsa = Dafsa::from_seqs(std::iter::empty::<&[i8]>());
let json = original.to_json_string();
let restored: Dafsa = Dafsa::from_json_str(&json).expect("parse succeeds");
assert!(restored.is_empty());
assert_eq!(restored.iter().count(), 0);
}
#[test]
fn json_roundtrip_via_writer_reader() {
let seqs = json_roundtrip_corpus();
let original = Dafsa::from_seqs(seqs.iter().map(|v| v.as_slice()));
let mut buf: Vec<u8> = Vec::new();
original.write_json(&mut buf).expect("write succeeds");
let restored: Dafsa = Dafsa::read_json(buf.as_slice()).expect("read succeeds");
assert_eq!(restored.iter().collect::<Vec<_>>(), seqs);
}
#[test]
fn json_rejects_bad_format_tag() {
let bad = r#"{
"format": "not-a-dafsa",
"version": 1,
"scalar": "i8",
"n_states": 0,
"n_edges": 0,
"edges_start": [],
"labels": [],
"targets": [],
"counts": []
}"#;
let err = Dafsa::from_json_str(bad).unwrap_err();
assert!(err.to_string().contains("format tag"));
}
#[test]
fn json_rejects_bad_version() {
let bad = r#"{
"format": "tilezz-dafsa",
"version": 999,
"scalar": "i8",
"n_states": 0,
"n_edges": 0,
"edges_start": [],
"labels": [],
"targets": [],
"counts": []
}"#;
let err = Dafsa::from_json_str(bad).unwrap_err();
assert!(err.to_string().contains("version"));
}
#[test]
fn json_rejects_scalar_mismatch() {
let bad = r#"{
"format": "tilezz-dafsa",
"version": 1,
"scalar": "i32",
"n_states": 0,
"n_edges": 0,
"edges_start": [],
"labels": [],
"targets": [],
"counts": []
}"#;
let err = Dafsa::from_json_str(bad).unwrap_err();
assert!(err.to_string().contains("scalar"));
}
#[test]
fn json_rejects_length_disagreement() {
let bad = r#"{
"format": "tilezz-dafsa",
"version": 1,
"scalar": "i8",
"n_states": 3,
"n_edges": 0,
"edges_start": [0],
"labels": [],
"targets": [],
"counts": [0]
}"#;
let err = Dafsa::from_json_str(bad).unwrap_err();
assert!(err.to_string().contains("length"));
}
#[test]
fn json_rejects_inconsistent_counts() {
let bad = r#"{
"format": "tilezz-dafsa",
"version": 1,
"scalar": "i8",
"n_states": 1,
"n_edges": 0,
"edges_start": [0],
"labels": [],
"targets": [],
"counts": [5]
}"#;
let err = Dafsa::from_json_str(bad).unwrap_err();
let msg = err.to_string();
assert!(
msg.contains("counts") && msg.contains("children"),
"unexpected error: {msg}"
);
}
#[test]
fn json_omits_redundant_fields() {
let dafsa = Dafsa::from_seqs([[1i8, 2].as_slice(), [1, 3].as_slice()].iter().copied());
let json = dafsa.to_json_string();
assert!(
!json.contains("is_accept"),
"is_accept must not appear on disk; it is derived from counts"
);
assert!(
!json.contains("accept_count"),
"accept_count must not appear on disk; it equals counts[0]"
);
}
#[test]
fn json_schema_keys_present() {
let dafsa = Dafsa::from_seqs([[1i8, 2].as_slice(), [1, 3].as_slice()].iter().copied());
let json = dafsa.to_json_string();
for key in [
"\"format\"",
"\"version\"",
"\"scalar\"",
"\"n_states\"",
"\"n_edges\"",
"\"edges_start\"",
"\"labels\"",
"\"targets\"",
"\"counts\"",
] {
assert!(json.contains(key), "missing schema key {key}");
}
assert!(json.contains("\"tilezz-dafsa\""));
assert!(json.contains("\"version\":1"));
assert!(json.contains("\"scalar\":\"i8\""));
}
#[test]
fn gz_roundtrip() {
let seqs: Vec<Vec<i8>> = vec![vec![1, 2], vec![1, 2, 3], vec![1, 3], vec![2], vec![2, 1]];
let dafsa = Dafsa::from_seqs(seqs.iter().map(|v| v.as_slice()));
let mut buf: Vec<u8> = Vec::new();
dafsa.write_json_gz(&mut buf).expect("write_json_gz");
assert!(buf.starts_with(&[0x1f, 0x8b]), "missing gzip magic");
let decoded = Dafsa::read_json_gz(&buf[..]).expect("read_json_gz");
let out: Vec<Vec<i8>> = decoded.iter().collect();
assert_eq!(out, seqs);
}
}