use std::collections::{BTreeMap, BTreeSet, HashMap};
use std::sync::Arc;
use std::time::Instant;
use rustc_hash::{FxHashMap, FxHashSet};
use serde::{Deserialize, Serialize};
use crate::analysis::matchfinder::{BpSeed, MatchFinder};
use crate::cyclotomic::IsRing;
use crate::geom::rat::Rat;
use crate::geom::snake::Snake;
use crate::geom::tileset::TileSet;
use crate::stringmatch::cyclic_contains;
pub fn replay_glue<T: IsRing>(
source: &Rat<T>,
tile: &Rat<T>,
start_a: usize,
start_b: usize,
len: usize,
) -> Result<Rat<T>, String> {
source
.try_glue_precomputed((start_a as i64, len, start_b as i64), tile, true)
.map_err(|e| e.to_string())
}
struct TrieNode {
children: HashMap<i8, usize>,
contributor: Option<usize>,
}
impl TrieNode {
fn new() -> Self {
TrieNode {
children: HashMap::new(),
contributor: None,
}
}
}
struct SeqTrie {
nodes: Vec<TrieNode>,
}
impl SeqTrie {
fn new() -> Self {
SeqTrie {
nodes: vec![TrieNode::new()],
}
}
fn insert_cyclic_subseqs(&mut self, seq: &[i8], k: usize, rat_id: usize) -> (usize, Vec<bool>) {
let n = seq.len();
if n == 0 {
return (0, Vec::new());
}
let max_len = k.min(n);
let mut new_count = 0;
let mut active = vec![false; n];
for start in 0..n {
let mut node = 0;
let mut any_new = false;
for l in 0..max_len {
let angle = seq[(start + l) % n];
let next = self.nodes[node].children.get(&angle).copied();
node = match next {
Some(child) => child,
None => {
let new_node = self.nodes.len();
self.nodes.push(TrieNode::new());
self.nodes[node].children.insert(angle, new_node);
self.nodes[new_node].contributor = Some(rat_id);
new_count += 1;
any_new = true;
new_node
}
};
}
if any_new {
active[start] = true;
}
}
(new_count, active)
}
fn len(&self) -> usize {
self.nodes.len() - 1
}
fn sequences_by_rat_id(&self) -> BTreeMap<usize, Vec<Vec<i8>>> {
let mut result: BTreeMap<usize, Vec<Vec<i8>>> = BTreeMap::new();
self.dfs_collect(0, &mut Vec::new(), &mut result);
result
}
fn dfs_collect(
&self,
node: usize,
path: &mut Vec<i8>,
result: &mut BTreeMap<usize, Vec<Vec<i8>>>,
) {
if let Some(rat_id) = self.nodes[node].contributor {
result.entry(rat_id).or_default().push(path.clone());
}
let mut sorted_children: Vec<(i8, usize)> = self.nodes[node]
.children
.iter()
.map(|(&a, &c)| (a, c))
.collect();
sorted_children.sort_by_key(|(a, _)| *a);
for (angle, child) in sorted_children {
path.push(angle);
self.dfs_collect(child, path, result);
path.pop();
}
}
}
fn glue_active_mask(
raw_active: &[bool],
k: usize,
match_len: usize,
source_len: usize,
) -> Vec<bool> {
let n = raw_active.len();
let cw_j = source_len - match_len;
let ccw_j = 0usize;
let mut active = vec![false; n];
let check_radius = k.min(n);
let dilate = k.saturating_sub(1).min(n);
for &j in &[ccw_j, cw_j] {
let mut induced = false;
for d in 0..check_radius {
if raw_active[(j + n - d) % n] {
induced = true;
break;
}
}
if induced {
for d in 0..=dilate {
active[(j + n - d) % n] = true;
active[(j + d) % n] = true;
}
}
}
active
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum Provenance {
Seed {
tile_idx: usize,
},
Glue {
source_rat_id: usize,
tile_idx: usize,
start_a: usize,
start_b: usize,
len: usize,
},
}
impl Provenance {
pub fn apply<T: IsRing>(&self, explorer: &SeqExplorer<T>) -> Result<Rat<T>, String> {
match self {
Self::Seed { tile_idx } => Ok(explorer.tileset().rat(*tile_idx).clone()),
Self::Glue {
source_rat_id,
tile_idx,
start_a,
start_b,
len,
} => {
let source = explorer.rat(*source_rat_id);
let tile = explorer.tileset().rat(*tile_idx);
replay_glue(source, tile, *start_a, *start_b, *len)
}
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Collection {
pub ring: String,
pub tile_angles: Vec<Vec<i8>>,
pub provenances: Vec<Provenance>,
pub subseqs: Vec<(usize, Vec<i8>)>,
}
impl Collection {
pub fn from_explorer<T>(explorer: &SeqExplorer<T>, ring: impl Into<String>) -> Self
where
T: IsRing,
{
let tile_angles = explorer
.tileset()
.rats()
.iter()
.map(|r| r.seq().to_vec())
.collect();
let provenances = explorer.provenances().to_vec();
let mut subseqs: Vec<(usize, Vec<i8>)> = Vec::new();
for (rat_id, seqs) in explorer.sequences_by_rat_id() {
for seq in seqs {
subseqs.push((rat_id, seq));
}
}
Collection {
ring: ring.into(),
tile_angles,
provenances,
subseqs,
}
}
pub fn replay_rats<T>(&self, tile_ts: &TileSet<T>) -> Result<Vec<Rat<T>>, String>
where
T: IsRing,
{
let mut rats: Vec<Rat<T>> = Vec::with_capacity(self.provenances.len());
for (id, prov) in self.provenances.iter().enumerate() {
let rat = match prov {
Provenance::Seed { tile_idx } => tile_ts.rat(*tile_idx).clone(),
Provenance::Glue {
source_rat_id,
tile_idx,
start_a,
start_b,
len,
} => {
let source = &rats[*source_rat_id];
let tile = tile_ts.rat(*tile_idx);
replay_glue(source, tile, *start_a, *start_b, *len)
.map_err(|e| format!("rat {id}: glue replay failed: {e}"))?
}
};
rats.push(rat);
}
Ok(rats)
}
pub fn presence_errors<T>(&self, rats: &[Rat<T>]) -> Vec<(usize, Vec<i8>)>
where
T: IsRing,
{
self.subseqs
.iter()
.filter(|(id, seq)| !cyclic_contains(rats[*id].seq(), seq))
.cloned()
.collect()
}
}
#[derive(Debug, Default, Clone)]
pub struct FixedPointReport {
pub matches_checked: usize,
pub missing: Vec<Vec<i8>>,
}
impl FixedPointReport {
pub fn is_complete(&self) -> bool {
self.missing.is_empty()
}
}
const FIXED_POINT_BATCH: usize = 500;
pub fn check_fixed_point<T>(
tileset: &Arc<TileSet<T>>,
rats: &[Rat<T>],
witness_ids: &[usize],
known: &BTreeSet<Vec<i8>>,
max_subseq_len: usize,
) -> FixedPointReport
where
T: IsRing,
{
let mut report = FixedPointReport::default();
let mut seen_missing: BTreeSet<Vec<i8>> = BTreeSet::new();
if witness_ids.is_empty() {
return report;
}
let bp_seed = BpSeed::new(Arc::clone(tileset));
for chunk in witness_ids.chunks(FIXED_POINT_BATCH) {
let witness_rats: Vec<Rat<T>> = chunk.iter().map(|&id| rats[id].clone()).collect();
let witness_ts = Arc::new(TileSet::new(witness_rats));
let mf = MatchFinder::crossing_with_seed(Arc::clone(&witness_ts), bp_seed.clone());
for wi in 0..witness_ts.num_tiles() {
for ti in 0..tileset.num_tiles() {
for mt in mf.valid_matches(wi, ti) {
report.matches_checked += 1;
let glued = crate::analysis::matchtypes::apply_match(
&mt,
witness_ts.rats(),
tileset.rats(),
);
if Snake::<T>::try_from(glued.seq()).is_err() {
continue;
}
let seq = glued.seq();
let n = seq.len();
let max_len = max_subseq_len.min(n);
for start in 0..n {
for l in 1..=max_len {
let sub: Vec<i8> = (0..l).map(|j| seq[(start + j) % n]).collect();
if !known.contains(&sub) && seen_missing.insert(sub.clone()) {
report.missing.push(sub);
}
}
}
}
}
}
}
report
}
pub struct SeqExplorer<T: IsRing> {
tileset: Arc<TileSet<T>>,
trie: SeqTrie,
rats: Vec<Rat<T>>,
provenances: Vec<Provenance>,
rat_to_id: FxHashMap<Rat<T>, usize>,
max_subseq_len: usize,
}
impl<T: IsRing> SeqExplorer<T> {
pub fn new(tileset: Arc<TileSet<T>>) -> Self {
Self::build(tileset, false)
}
pub fn new_verbose(tileset: Arc<TileSet<T>>) -> Self {
Self::build(tileset, true)
}
fn build(tileset: Arc<TileSet<T>>, verbose: bool) -> Self {
let mut b = Builder::new(tileset, verbose);
b.seed_phase();
while !b.pending.is_empty() {
b.run_layer();
}
b.into_explorer()
}
pub fn num_subseqs(&self) -> usize {
self.trie.len()
}
pub fn num_rats(&self) -> usize {
self.rats.len()
}
pub fn max_subseq_len(&self) -> usize {
self.max_subseq_len
}
pub fn rats(&self) -> &[Rat<T>] {
&self.rats
}
pub fn provenances(&self) -> &[Provenance] {
&self.provenances
}
pub fn provenance(&self, rat_id: usize) -> &Provenance {
&self.provenances[rat_id]
}
pub fn rat(&self, id: usize) -> &Rat<T> {
&self.rats[id]
}
pub fn rat_id_of(&self, rat: &Rat<T>) -> Option<usize> {
self.rat_to_id.get(rat).copied()
}
pub fn sequences_by_rat_id(&self) -> BTreeMap<usize, Vec<Vec<i8>>> {
self.trie.sequences_by_rat_id()
}
pub fn tileset(&self) -> &TileSet<T> {
&self.tileset
}
pub fn tileset_arc(&self) -> Arc<TileSet<T>> {
Arc::clone(&self.tileset)
}
}
struct LayerStats {
total: usize,
contributing: usize,
len_min: usize,
len_max: usize,
}
struct Builder<T: IsRing> {
tileset: Arc<TileSet<T>>,
trie: SeqTrie,
rats: Vec<Rat<T>>,
provenances: Vec<Provenance>,
rat_to_id: FxHashMap<Rat<T>, usize>,
max_subseq_len: usize,
pending: BTreeSet<usize>,
active_map: HashMap<usize, Vec<bool>>,
seen_dead: FxHashSet<Rat<T>>,
dead_count: usize,
bp_seed: BpSeed<T>,
verbose: bool,
layer: usize,
started: Instant,
}
impl<T: IsRing> Builder<T> {
fn new(tileset: Arc<TileSet<T>>, verbose: bool) -> Self {
let max_subseq_len = tileset.rats().iter().map(|r| r.len()).max().unwrap_or(0);
let bp_seed = BpSeed::new(Arc::clone(&tileset));
Builder {
tileset,
trie: SeqTrie::new(),
rats: Vec::new(),
provenances: Vec::new(),
rat_to_id: FxHashMap::default(),
max_subseq_len,
pending: BTreeSet::new(),
active_map: HashMap::new(),
seen_dead: FxHashSet::default(),
dead_count: 0,
bp_seed,
verbose,
layer: 0,
started: Instant::now(),
}
}
fn seed_phase(&mut self) {
for tile_idx in 0..self.tileset.num_tiles() {
let rat = self.tileset.rat(tile_idx).clone();
if self.rat_to_id.contains_key(&rat) || self.seen_dead.contains(&rat) {
continue;
}
let tentative_rat_id = self.rats.len();
let (new_count, _raw) =
self.trie
.insert_cyclic_subseqs(rat.seq(), self.max_subseq_len, tentative_rat_id);
if new_count == 0 {
self.seen_dead.insert(rat);
self.dead_count += 1;
continue;
}
let rat_id = tentative_rat_id;
self.rat_to_id.insert(rat.clone(), rat_id);
self.active_map.insert(rat_id, vec![true; rat.seq().len()]);
self.pending.insert(rat_id);
self.rats.push(rat);
self.provenances.push(Provenance::Seed { tile_idx });
}
if self.verbose {
eprintln!(
" Seeds: {} witnesses, {} dead, {} subseqs, {} pending",
self.rats.len(),
self.dead_count,
self.trie.len(),
self.pending.len(),
);
}
}
fn run_layer(&mut self) {
self.layer += 1;
let batch_ids: Vec<usize> = std::mem::take(&mut self.pending).into_iter().collect();
let (batch_ts, global_from_batch, batch_active) = self.build_batch(&batch_ids);
let new_entries = self.enumerate_layer_glues(&batch_ts, &global_from_batch, &batch_active);
let stats = self.integrate_new_rats(new_entries);
if self.verbose {
eprintln!(
" Layer {}: batch={} new_rats={} new_contrib={} trie={} pending={} lens=[{},{}]",
self.layer,
batch_ts.num_tiles(),
stats.total,
stats.contributing,
self.trie.len(),
self.pending.len(),
stats.len_min,
stats.len_max,
);
}
}
fn build_batch(
&mut self,
batch_ids: &[usize],
) -> (Arc<TileSet<T>>, Vec<usize>, Vec<Vec<bool>>) {
let batch_rats: Vec<Rat<T>> = batch_ids.iter().map(|&id| self.rats[id].clone()).collect();
let batch_ts = Arc::new(TileSet::new(batch_rats));
assert_eq!(batch_ts.num_tiles(), batch_ids.len());
let global_from_batch: Vec<usize> = (0..batch_ts.num_tiles())
.map(|i| {
*self
.rat_to_id
.get(batch_ts.rat(i))
.expect("batch rat must be in registry")
})
.collect();
let batch_active: Vec<Vec<bool>> = global_from_batch
.iter()
.map(|&id| self.active_map.remove(&id).unwrap_or_default())
.collect();
(batch_ts, global_from_batch, batch_active)
}
fn enumerate_layer_glues(
&self,
batch_ts: &Arc<TileSet<T>>,
global_from_batch: &[usize],
batch_active: &[Vec<bool>],
) -> Vec<(Rat<T>, Provenance)> {
let mf = MatchFinder::crossing_with_seed(Arc::clone(batch_ts), self.bp_seed.clone());
let mut new_entries: Vec<(Rat<T>, Provenance)> = Vec::new();
let mut seen_new: FxHashSet<Rat<T>> = FxHashSet::default();
let mut seen_invalid: FxHashSet<Rat<T>> = FxHashSet::default();
for batch_idx in 0..batch_ts.num_tiles() {
let active = &batch_active[batch_idx];
let source_rat_id = global_from_batch[batch_idx];
for tile_idx in 0..self.tileset.num_tiles() {
for (glued, mt) in mf.valid_matches_filtered(batch_idx, tile_idx, active) {
if self.rat_to_id.contains_key(&glued)
|| self.seen_dead.contains(&glued)
|| seen_new.contains(&glued)
|| seen_invalid.contains(&glued)
{
continue;
}
if Snake::<T>::try_from(glued.seq()).is_err() {
seen_invalid.insert(glued);
continue;
}
seen_new.insert(glued.clone());
new_entries.push((
glued,
Provenance::Glue {
source_rat_id,
tile_idx,
start_a: mt.a.range.start_offset,
start_b: mt.b.range.start_offset,
len: mt.len(),
},
));
}
}
}
new_entries
}
fn integrate_new_rats(&mut self, new_entries: Vec<(Rat<T>, Provenance)>) -> LayerStats {
let mut stats = LayerStats {
total: new_entries.len(),
contributing: 0,
len_min: usize::MAX,
len_max: 0,
};
for (rat, _) in &new_entries {
let l = rat.len();
stats.len_min = stats.len_min.min(l);
stats.len_max = stats.len_max.max(l);
}
for (rat, prov) in new_entries {
let (source_rat_id, match_len) = match &prov {
Provenance::Glue {
source_rat_id, len, ..
} => (*source_rat_id, *len),
Provenance::Seed { .. } => unreachable!("BFS layer only produces glues"),
};
let source_len = self.rats[source_rat_id].len();
let tentative_rat_id = self.rats.len();
let (new_count, raw_active) =
self.trie
.insert_cyclic_subseqs(rat.seq(), self.max_subseq_len, tentative_rat_id);
if new_count == 0 {
self.seen_dead.insert(rat);
self.dead_count += 1;
continue;
}
let rat_id = tentative_rat_id;
let mask = glue_active_mask(&raw_active, self.max_subseq_len, match_len, source_len);
self.active_map.insert(rat_id, mask);
self.pending.insert(rat_id);
self.rat_to_id.insert(rat.clone(), rat_id);
self.rats.push(rat);
self.provenances.push(prov);
stats.contributing += 1;
}
stats
}
fn into_explorer(self) -> SeqExplorer<T> {
if self.verbose {
eprintln!(
" Done: {} layers, {} witnesses, {} dead, {} subseqs, time={:.2?}",
self.layer,
self.rats.len(),
self.dead_count,
self.trie.len(),
self.started.elapsed(),
);
}
SeqExplorer {
tileset: self.tileset,
trie: self.trie,
rats: self.rats,
provenances: self.provenances,
rat_to_id: self.rat_to_id,
max_subseq_len: self.max_subseq_len,
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::cyclotomic::ZZ12;
use crate::geom::tileset;
fn hex_ts() -> Arc<TileSet<ZZ12>> {
tileset::hex::<ZZ12>()
}
fn sq_ts() -> Arc<TileSet<ZZ12>> {
tileset::square::<ZZ12>()
}
fn assert_invariants(label: &str, explorer: &SeqExplorer<ZZ12>) {
let n_rats = explorer.num_rats();
let n_subseqs = explorer.num_subseqs();
let k = explorer.max_subseq_len();
assert_eq!(explorer.rats().len(), n_rats);
assert_eq!(explorer.provenances().len(), n_rats);
for rat_id in 0..n_rats {
assert_eq!(
explorer.rat_id_of(explorer.rat(rat_id)),
Some(rat_id),
"{label}: rat_id_of round-trip failed for rat {rat_id}"
);
}
let n_tiles = explorer.tileset().num_tiles();
for (rat_id, prov) in explorer.provenances().iter().enumerate() {
match prov {
Provenance::Seed { tile_idx } => {
assert!(*tile_idx < n_tiles, "{label}: seed tile_idx out of range");
}
Provenance::Glue {
source_rat_id,
tile_idx,
..
} => {
assert!(
*source_rat_id < rat_id,
"{label}: glue source ({source_rat_id}) must precede dest ({rat_id})"
);
assert!(*tile_idx < n_tiles, "{label}: glue tile_idx out of range");
}
}
}
for rat_id in 0..n_rats {
let reconstructed = explorer
.provenance(rat_id)
.apply(explorer)
.unwrap_or_else(|e| panic!("{label}: rat {rat_id} apply failed: {e}"));
assert_eq!(
&reconstructed,
explorer.rat(rat_id),
"{label}: provenance.apply doesn't reproduce rat {rat_id}"
);
}
let by_rat = explorer.sequences_by_rat_id();
assert_eq!(
by_rat.len(),
n_rats,
"{label}: every catalogued rat must contribute"
);
let mut total_seqs = 0usize;
for (&rat_id, seqs) in &by_rat {
assert!(rat_id < n_rats);
assert!(
!seqs.is_empty(),
"{label}: contributor entry must be non-empty"
);
let host = explorer.rat(rat_id).seq();
for seq in seqs {
assert!(!seq.is_empty(), "{label}: empty subseq emitted");
assert!(
seq.len() <= k,
"{label}: subseq of length {} exceeds max_subseq_len {}",
seq.len(),
k
);
assert!(
cyclic_contains(host, seq),
"{label}: subseq {:?} is not a cyclic substring of rat {} ({:?})",
seq,
rat_id,
host
);
total_seqs += 1;
}
}
assert_eq!(
total_seqs, n_subseqs,
"{label}: subseqs across map ({total_seqs}) must equal trie size ({n_subseqs})"
);
}
#[test]
fn hex_explorer_known_values_and_invariants() {
let explorer = SeqExplorer::new(hex_ts());
assert_invariants("hex", &explorer);
assert_eq!(explorer.max_subseq_len(), 6);
assert_eq!(explorer.num_subseqs(), 120);
assert_eq!(explorer.num_rats(), 27);
}
#[test]
fn square_explorer_known_values_and_invariants() {
let explorer = SeqExplorer::new(sq_ts());
assert_invariants("square", &explorer);
assert_eq!(explorer.max_subseq_len(), 4);
assert_eq!(explorer.num_subseqs(), 110);
assert_eq!(explorer.num_rats(), 43);
}
#[test]
fn single_tile_seed_emits_exactly_its_cyclic_substrings() {
let explorer = SeqExplorer::new(hex_ts());
assert_eq!(explorer.tileset().num_tiles(), 1);
assert!(matches!(
explorer.provenance(0),
Provenance::Seed { tile_idx: 0 }
));
let by_rat = explorer.sequences_by_rat_id();
let hex_seqs = by_rat.get(&0).expect("seed must be a contributor");
assert!(!hex_seqs.is_empty());
}
#[test]
fn tileset_accessors_compile_and_agree() {
let explorer = SeqExplorer::new(hex_ts());
let n_via_ref: usize = explorer.tileset().num_tiles();
let arc = explorer.tileset_arc();
assert_eq!(n_via_ref, arc.num_tiles());
}
fn cyclic_substrings_at_max_len(seq: &[i8], k: usize) -> Vec<Vec<i8>> {
let n = seq.len();
let len = k.min(n);
if len == 0 || n == 0 {
return Vec::new();
}
(0..n)
.map(|start| (0..len).map(|i| seq[(start + i) % n]).collect())
.collect()
}
fn assert_bfs_fixed_point(label: &str, tileset: Arc<TileSet<ZZ12>>) {
let explorer = SeqExplorer::new(Arc::clone(&tileset));
let k = explorer.max_subseq_len();
let known: BTreeSet<Vec<i8>> = explorer
.sequences_by_rat_id()
.into_values()
.flatten()
.collect();
assert_eq!(
known.len(),
explorer.num_subseqs(),
"{label}: |known set| must match trie size",
);
let by_rat = explorer.sequences_by_rat_id();
for &rat_id in by_rat.keys() {
let reconstructed = explorer
.provenance(rat_id)
.apply(&explorer)
.unwrap_or_else(|e| panic!("{label}: witness {rat_id} apply failed: {e}"));
assert_eq!(
&reconstructed,
explorer.rat(rat_id),
"{label}: provenance.apply doesn't reproduce witness {rat_id}",
);
for s in cyclic_substrings_at_max_len(reconstructed.seq(), k) {
assert!(
known.contains(&s),
"{label}: witness {rat_id} has cyclic substring {s:?} \
not in trie",
);
}
}
let witness_ids: Vec<usize> = by_rat.keys().copied().collect();
let report = check_fixed_point(
&explorer.tileset_arc(),
explorer.rats(),
&witness_ids,
&known,
k,
);
assert!(
report.is_complete(),
"{label}: completeness scan found {} missing substrings: {:?}",
report.missing.len(),
report.missing.iter().take(5).collect::<Vec<_>>(),
);
}
#[test]
fn hex_bfs_is_at_fixed_point() {
assert_bfs_fixed_point("hex", hex_ts());
}
#[test]
fn square_bfs_is_at_fixed_point() {
assert_bfs_fixed_point("square", sq_ts());
}
}