1const ROOT_TBL: usize = 128;
21
22
23pub struct Sam {
31 node: Vec<[u32; 4]>,
37 edges: Vec<u64>,
41 firstpos: Vec<u32>, lastpos: Vec<u32>, root_next: Vec<i32>,
47 dfs_in: Vec<u32>, dfs_cnt: Vec<u32>, seg_data: Vec<u32>,
55 seg_off: Vec<u32>,
56 seg_n: usize, epos: Vec<u32>,
60 epmeta: Vec<[u32; 4]>,
63}
64
65const LINEAR_MAX: usize = 256;
68
69impl Sam {
70 fn empty() -> Self {
71 Sam {
72 node: Vec::new(),
73 edges: Vec::new(),
74 firstpos: Vec::new(),
75 lastpos: Vec::new(),
76 root_next: Vec::new(),
77 dfs_in: Vec::new(),
78 dfs_cnt: Vec::new(),
79 seg_data: Vec::new(),
80 seg_off: Vec::new(),
81 seg_n: 0,
82 epos: Vec::new(),
83 epmeta: Vec::new(),
84 }
85 }
86
87 fn seg_node(&self, i: usize) -> &[u32] {
89 &self.seg_data[self.seg_off[i] as usize..self.seg_off[i + 1] as usize]
90 }
91
92 fn endpos_range_m(m: &[u32; 4], seg_n: usize) -> (usize, usize) {
94 let s = m[2] as usize; (s + seg_n, s + m[3] as usize + seg_n) }
97
98 fn max_le(&self, state: u32, x: u32) -> Option<u32> {
100 #[allow(clippy::undocumented_unsafe_blocks)]
103 let m = unsafe { self.epmeta.get_unchecked(state as usize) }; if m[1] <= x {
106 return Some(m[1]); }
108 if m[0] > x {
109 return None; }
111 let cnt = m[3] as usize;
112 if cnt <= LINEAR_MAX {
113 let lo = m[2] as usize;
116 let mut best = 0u32;
117 #[allow(clippy::undocumented_unsafe_blocks)]
118 for &v in unsafe { self.epos.get_unchecked(lo..lo + cnt) } {
119 best = best.max(if v <= x { v } else { 0 });
120 }
121 return Some(best);
122 }
123 let (mut l, mut r) = Self::endpos_range_m(m, self.seg_n);
125 let mut best: Option<u32> = None;
126 while l < r {
127 if l & 1 == 1 {
128 best = best.max(node_max_le(self.seg_node(l), x));
129 l += 1;
130 }
131 if r & 1 == 1 {
132 r -= 1;
133 best = best.max(node_max_le(self.seg_node(r), x));
134 }
135 l >>= 1;
136 r >>= 1;
137 }
138 best
139 }
140
141 fn min_in(&self, state: u32, lo: u32, hi: u32) -> Option<u32> {
143 #[allow(clippy::undocumented_unsafe_blocks)]
145 let m = unsafe { self.epmeta.get_unchecked(state as usize) };
146 if m[0] >= lo {
148 return (m[0] <= hi).then_some(m[0]); }
150 if m[1] < lo {
151 return None; }
153 let cnt = m[3] as usize;
154 if cnt <= LINEAR_MAX {
155 let off = m[2] as usize;
157 let mut best = u32::MAX;
158 #[allow(clippy::undocumented_unsafe_blocks)]
159 for &v in unsafe { self.epos.get_unchecked(off..off + cnt) } {
160 best = best.min(if v >= lo && v <= hi { v } else { u32::MAX });
161 }
162 return (best != u32::MAX).then_some(best);
163 }
164 let (mut l, mut r) = Self::endpos_range_m(m, self.seg_n);
166 let mut best: Option<u32> = None;
167 while l < r {
168 if l & 1 == 1 {
169 best = merge_min(best, node_min_in(self.seg_node(l), lo, hi));
170 l += 1;
171 }
172 if r & 1 == 1 {
173 r -= 1;
174 best = merge_min(best, node_min_in(self.seg_node(r), lo, hi));
175 }
176 l >>= 1;
177 r >>= 1;
178 }
179 best
180 }
181}
182
183fn node_max_le(sorted: &[u32], x: u32) -> Option<u32> {
185 let k = sorted.partition_point(|&v| v <= x);
186 (k > 0).then(|| sorted[k - 1])
187}
188
189fn node_min_in(sorted: &[u32], lo: u32, hi: u32) -> Option<u32> {
191 let k = sorted.partition_point(|&v| v < lo);
192 sorted.get(k).copied().filter(|&v| v <= hi)
193}
194
195fn merge_min(a: Option<u32>, b: Option<u32>) -> Option<u32> {
196 match (a, b) {
197 (Some(x), Some(y)) => Some(x.min(y)),
198 (x, None) => x,
199 (None, y) => y,
200 }
201}
202
203struct Builder {
205 edge_char: Vec<char>,
206 edge_to: Vec<u32>,
207 edge_next: Vec<i32>,
208 head: Vec<i32>,
209 link: Vec<i32>,
210 len: Vec<u32>,
211 firstpos: Vec<u32>,
212 primary: Vec<bool>, last: u32,
214}
215
216impl Builder {
217 fn empty() -> Self {
218 Builder {
219 edge_char: Vec::new(),
220 edge_to: Vec::new(),
221 edge_next: Vec::new(),
222 head: Vec::new(),
223 link: Vec::new(),
224 len: Vec::new(),
225 firstpos: Vec::new(),
226 primary: Vec::new(),
227 last: 0,
228 }
229 }
230
231 fn new(cap: usize) -> Self {
232 let mut b = Builder::empty();
233 b.reset(cap);
234 b
235 }
236
237 fn reset(&mut self, cap: usize) {
239 self.edge_char.clear();
240 self.edge_to.clear();
241 self.edge_next.clear();
242 self.head.clear();
243 self.link.clear();
244 self.len.clear();
245 self.firstpos.clear();
246 self.primary.clear();
247 self.edge_char.reserve(3 * cap);
248 self.edge_to.reserve(3 * cap);
249 self.edge_next.reserve(3 * cap);
250 self.primary.push(false); self.head.push(-1);
252 self.link.push(-1);
253 self.len.push(0);
254 self.firstpos.push(0);
255 self.last = 0;
256 }
257
258 #[allow(clippy::cast_sign_loss)]
259 fn find(&self, state: u32, c: char) -> Option<u32> {
260 let mut e = self.head[state as usize];
261 while e != -1 {
262 let idx = e as usize;
263 if self.edge_char[idx] == c {
264 return Some(self.edge_to[idx]);
265 }
266 e = self.edge_next[idx];
267 }
268 None
269 }
270
271 #[allow(clippy::cast_possible_truncation, clippy::cast_possible_wrap)]
272 fn add_edge(&mut self, state: u32, c: char, to: u32) {
273 self.edge_char.push(c);
274 self.edge_to.push(to);
275 self.edge_next.push(self.head[state as usize]);
276 self.head[state as usize] = (self.edge_char.len() - 1) as i32;
277 }
278
279 #[allow(clippy::cast_sign_loss)]
280 fn set_edge(&mut self, state: u32, c: char, to: u32) {
281 let mut e = self.head[state as usize];
282 while e != -1 {
283 let idx = e as usize;
284 if self.edge_char[idx] == c {
285 self.edge_to[idx] = to;
286 return;
287 }
288 e = self.edge_next[idx];
289 }
290 self.add_edge(state, c, to);
291 }
292
293 #[allow(clippy::cast_possible_truncation)]
294 fn new_state(&mut self, len: u32, link: i32, firstpos: u32, primary: bool) -> u32 {
295 self.head.push(-1);
296 self.len.push(len);
297 self.link.push(link);
298 self.firstpos.push(firstpos);
299 self.primary.push(primary);
300 (self.head.len() - 1) as u32
301 }
302
303 #[allow(clippy::cast_possible_truncation, clippy::cast_possible_wrap, clippy::cast_sign_loss)]
304 fn extend(&mut self, c: char, pos: usize) {
305 let cur = self.new_state(self.len[self.last as usize] + 1, -1, pos as u32, true);
306 let mut p = self.last as i32;
307 while p != -1 && self.find(p as u32, c).is_none() {
308 self.add_edge(p as u32, c, cur);
309 p = self.link[p as usize];
310 }
311 if p == -1 {
312 self.link[cur as usize] = 0;
313 } else {
314 let q = self.find(p as u32, c).unwrap();
315 if self.len[p as usize] + 1 == self.len[q as usize] {
316 self.link[cur as usize] = q as i32;
317 } else {
318 let clone = self.new_state(self.len[p as usize] + 1, self.link[q as usize], self.firstpos[q as usize], false);
319 let mut e = self.head[q as usize];
320 while e != -1 {
321 let idx = e as usize;
322 self.add_edge(clone, self.edge_char[idx], self.edge_to[idx]);
323 e = self.edge_next[idx];
324 }
325 while p != -1 && self.find(p as u32, c) == Some(q) {
326 self.set_edge(p as u32, c, clone);
327 p = self.link[p as usize];
328 }
329 self.link[q as usize] = clone as i32;
330 self.link[cur as usize] = clone as i32;
331 }
332 }
333 self.last = cur;
334 }
335
336 #[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss, clippy::needless_range_loop)]
339 fn finalize_into(&self, out: &mut Sam) {
340 let nstates = self.head.len();
341 let nedges = self.edge_char.len();
342 out.firstpos.clear();
343 out.firstpos.extend_from_slice(&self.firstpos);
344 let mut off = vec![0u32; nstates + 1];
346 for state in 0..nstates {
347 let mut e = self.head[state];
348 while e != -1 {
349 off[state + 1] += 1;
350 e = self.edge_next[e as usize];
351 }
352 }
353 for state in 0..nstates {
354 off[state + 1] += off[state];
355 }
356 out.edges.clear();
359 out.edges.resize(nedges, 0);
360 let mut scratch: Vec<u64> = Vec::new(); for state in 0..nstates {
362 scratch.clear();
363 let mut e = self.head[state];
364 while e != -1 {
365 let idx = e as usize;
366 scratch.push((u64::from(self.edge_char[idx] as u32) << 32) | u64::from(self.edge_to[idx]));
367 e = self.edge_next[idx];
368 }
369 scratch.sort_unstable();
370 let base = off[state] as usize;
371 for (k, &packed) in scratch.iter().enumerate() {
372 out.edges[base + k] = packed;
373 }
374 }
375 out.node.clear();
377 out.node.reserve(nstates);
378 for s in 0..nstates {
379 let link = if self.link[s] < 0 { 0 } else { self.link[s] as u32 };
380 out.node.push([self.len[s], link, off[s], off[s + 1]]);
381 }
382 out.root_next.clear();
384 out.root_next.resize(ROOT_TBL, -1);
385 for k in off[0] as usize..off[1] as usize {
386 let e = out.edges[k];
387 let ci = (e >> 32) as usize;
388 if ci < ROOT_TBL {
389 out.root_next[ci] = i32::try_from((e & 0xFFFF_FFFF) as u32).expect("state fits i32");
390 }
391 }
392 self.build_endpos(out, nstates);
393 }
394
395 #[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
398 fn build_endpos(&self, out: &mut Sam, nstates: usize) {
399 let mut child_head = vec![0u32; nstates + 1];
401 for s in 1..nstates {
402 child_head[self.link[s] as usize + 1] += 1;
403 }
404 for s in 0..nstates {
405 child_head[s + 1] += child_head[s];
406 }
407 let mut child_arr = vec![0u32; nstates.saturating_sub(1)];
408 let mut fill = child_head.clone();
409 for s in 1..nstates {
410 let p = self.link[s] as usize;
411 child_arr[fill[p] as usize] = s as u32;
412 fill[p] += 1;
413 }
414 out.dfs_in.clear();
417 out.dfs_in.resize(nstates, 0);
418 out.dfs_cnt.clear();
419 out.dfs_cnt.resize(nstates, 0);
420 out.lastpos.clear();
421 out.lastpos.resize(nstates, 0);
422 out.epos.clear();
423 let mut stack: Vec<(u32, bool)> = vec![(0, false)];
424 while let Some((s, exit)) = stack.pop() {
425 let su = s as usize;
426 if exit {
427 out.dfs_cnt[su] = out.epos.len() as u32 - out.dfs_in[su];
428 let mut lp = if self.primary[su] { self.firstpos[su] } else { 0 };
429 for k in child_head[su]..child_head[su + 1] {
430 lp = lp.max(out.lastpos[child_arr[k as usize] as usize]);
431 }
432 out.lastpos[su] = lp;
433 continue;
434 }
435 out.dfs_in[su] = out.epos.len() as u32;
436 if self.primary[su] {
437 out.epos.push(self.firstpos[su]);
438 }
439 stack.push((s, true));
440 for k in child_head[su]..child_head[su + 1] {
441 stack.push((child_arr[k as usize], false));
442 }
443 }
444 let n = out.epos.len();
447 let mut seg_n = 1usize;
448 while seg_n < n.max(1) {
449 seg_n <<= 1;
450 }
451 out.seg_n = seg_n;
452 let mut tree: Vec<Vec<u32>> = vec![Vec::new(); 2 * seg_n];
453 for i in 0..n {
454 tree[seg_n + i] = vec![out.epos[i]];
455 }
456 for i in (1..seg_n).rev() {
457 let (lhs, rhs) = (&tree[2 * i], &tree[2 * i + 1]);
458 let mut acc = Vec::with_capacity(lhs.len() + rhs.len());
459 let (mut x, mut y) = (0usize, 0usize);
460 while x < lhs.len() && y < rhs.len() {
461 if lhs[x] <= rhs[y] {
462 acc.push(lhs[x]);
463 x += 1;
464 } else {
465 acc.push(rhs[y]);
466 y += 1;
467 }
468 }
469 acc.extend_from_slice(&lhs[x..]);
470 acc.extend_from_slice(&rhs[y..]);
471 tree[i] = acc;
472 }
473 out.seg_off.clear();
474 out.seg_off.reserve(2 * seg_n + 1);
475 out.seg_data.clear();
476 let mut off = 0u32;
477 out.seg_off.push(0);
478 for node in &tree {
479 out.seg_data.extend_from_slice(node);
480 off += u32::try_from(node.len()).expect("epos fits u32");
481 out.seg_off.push(off);
482 }
483 out.epmeta.clear();
485 out.epmeta.reserve(nstates);
486 for s in 0..nstates {
487 out.epmeta.push([out.firstpos[s], out.lastpos[s], out.dfs_in[s], out.dfs_cnt[s]]);
488 }
489 }
490
491 fn finalize(self) -> Sam {
492 let mut out = Sam::empty();
493 self.finalize_into(&mut out);
494 out
495 }
496}
497
498#[must_use]
500pub fn build_sam(b: &[char]) -> Sam {
501 let mut bld = Builder::new(b.len());
502 for (i, &c) in b.iter().enumerate() {
503 bld.extend(c, i);
504 }
505 bld.finalize()
506}
507
508#[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
514fn longest_in(sam: &Sam, fstate: &[u32], fmatch: &[u32], al: usize, ar: usize, bl: usize, br: usize)
515 -> (usize, usize, usize) {
516 let blo = bl as u32;
517 let hi = br as u32 - 1; let (mut best_len, mut best_a, mut best_b) = (0usize, 0usize, 0usize);
519 let node = sam.node.as_slice();
520 #[allow(clippy::undocumented_unsafe_blocks)]
523 unsafe {
524 for i in al..ar {
525 let cap = i - al + 1; let eff = (*fmatch.get_unchecked(i) as usize).min(cap);
527 if eff <= best_len {
528 continue; }
530 let mut cur = *fstate.get_unchecked(i);
533 loop {
534 if cur == 0 {
535 break;
536 }
537 let nd = *node.get_unchecked(cur as usize); let curlen = eff.min(nd[0] as usize);
539 if curlen <= best_len {
540 break;
541 }
542 let band_min = *node.get_unchecked(nd[1] as usize); let band_min = band_min[0] as usize + 1; if curlen >= band_min {
545 if let Some(pmax) = sam.max_le(cur, hi) {
546 if pmax >= blo {
547 let l = curlen.min((pmax - blo) as usize + 1); if l >= band_min {
549 if l > best_len {
551 if let Some(pmin) = sam.min_in(cur, blo + l as u32 - 1, hi) {
552 best_len = l;
553 best_a = i + 1 - l;
554 best_b = pmin as usize + 1 - l;
555 }
556 }
557 break; }
559 }
560 }
561 }
562 let link = nd[1];
563 if link == 0 {
564 break; }
566 cur = link;
567 }
568 }
569 }
570 (best_a, best_b, best_len)
571}
572
573#[allow(clippy::cast_sign_loss, clippy::cast_possible_truncation)]
578fn matching_stats_into(a: &[char], sam_b: &Sam, fstate: &mut Vec<u32>, fmatch: &mut Vec<u32>) {
579 let n = a.len();
580 fstate.clear();
585 fstate.reserve(n);
586 fmatch.clear();
587 fmatch.reserve(n);
588 #[allow(clippy::undocumented_unsafe_blocks)]
589 unsafe {
590 fstate.set_len(n);
591 fmatch.set_len(n);
592 }
593 let node = sam_b.node.as_slice();
596 let edges = sam_b.edges.as_slice();
597 let root = sam_b.root_next.as_slice();
598 let mut state = 0u32;
599 let mut matched = 0u32;
600 #[allow(clippy::undocumented_unsafe_blocks)]
605 unsafe {
606 for i in 0..n {
607 let c = *a.get_unchecked(i);
608 loop {
609 let nx: i64 = if state == 0 {
612 let ci = c as usize;
613 if ci < ROOT_TBL {
614 i64::from(*root.get_unchecked(ci))
615 } else {
616 let nd = node.get_unchecked(0);
617 csr_lookup(edges, nd[2] as usize, nd[3] as usize, c)
618 }
619 } else {
620 let nd = node.get_unchecked(state as usize);
621 csr_lookup(edges, nd[2] as usize, nd[3] as usize, c)
622 };
623 if nx >= 0 {
624 state = nx as u32;
625 matched += 1;
626 break;
627 }
628 if state == 0 {
629 matched = 0;
630 break;
631 }
632 let link = node.get_unchecked(state as usize)[1]; state = link;
634 matched = node.get_unchecked(link as usize)[0]; }
636 *fstate.get_unchecked_mut(i) = state;
637 *fmatch.get_unchecked_mut(i) = matched;
638 }
639 }
640}
641
642#[inline]
645#[allow(clippy::cast_possible_truncation)]
646fn csr_lookup(edges: &[u64], mut lo: usize, hi: usize, c: char) -> i64 {
647 let mut hi = hi;
648 let key = c as u32;
649 while lo < hi {
650 let mid = lo + (hi - lo) / 2;
651 let e = unsafe { *edges.get_unchecked(mid) };
653 let mc = (e >> 32) as u32; if mc == key {
655 return i64::from((e & 0xFFFF_FFFF) as u32); }
657 if mc < key {
658 lo = mid + 1;
659 } else {
660 hi = mid;
661 }
662 }
663 -1
664}
665
666thread_local! {
667 static MS_BUF: std::cell::RefCell<(Vec<u32>, Vec<u32>)> =
669 const { std::cell::RefCell::new((Vec::new(), Vec::new())) };
670 static STACK_BUF: std::cell::RefCell<Vec<(usize, usize, usize, usize)>> =
673 const { std::cell::RefCell::new(Vec::new()) };
674}
675
676#[must_use]
680pub fn matching_stats_cost(a: &[char], sam_b: &Sam) -> u64 {
681 MS_BUF.with_borrow_mut(|(fstate, fmatch)| {
682 matching_stats_into(a, sam_b, fstate, fmatch);
683 fmatch.iter().map(|&x| u64::from(x)).sum()
684 })
685}
686
687fn gestalt_m_with(a: &[char], b: &[char], sam_b: &Sam) -> usize {
688 let n = a.len();
689 if n == 0 {
690 return 0;
691 }
692 MS_BUF.with_borrow_mut(|(fstate, fmatch)| {
693 matching_stats_into(a, sam_b, fstate, fmatch);
694 gestalt_m_recur(a, b, sam_b, fstate, fmatch)
695 })
696}
697
698#[allow(clippy::cast_sign_loss, clippy::many_single_char_names)]
699fn gestalt_m_recur(a: &[char], b: &[char], sam_b: &Sam, fstate: &[u32], fmatch: &[u32]) -> usize {
700 let n = a.len();
701 let mut total = 0usize;
702 STACK_BUF.with_borrow_mut(|stack| {
703 stack.clear();
704 stack.push((0, n, 0, b.len()));
705 while let Some((al, ar, bl, br)) = stack.pop() {
706 if al >= ar || bl >= br {
707 continue;
708 }
709 let (i, j, l) = longest_in(sam_b, fstate, fmatch, al, ar, bl, br);
710 if l == 0 {
711 continue;
712 }
713 total += l;
714 stack.push((al, i, bl, j));
715 stack.push((i + l, ar, j + l, br));
716 }
717 });
718 total
719}
720
721#[allow(
726 clippy::cast_precision_loss,
727 clippy::cast_sign_loss,
728 clippy::cast_possible_truncation,
729 clippy::many_single_char_names
730)]
731#[must_use]
732pub fn gestalt_qualifies(a: &[char], b: &[char], sam_b: &Sam, threshold: f64) -> bool {
733 let nb = b.len();
734 let total = a.len() + nb;
735 if total == 0 {
736 return true;
737 }
738 let need = (threshold * total as f64 / 2.0).ceil() as usize;
739 if need == 0 {
740 return true;
741 }
742 let n = a.len();
743 if n == 0 || nb == 0 {
744 return false; }
746 MS_BUF.with_borrow_mut(|(fstate, fmatch)| {
747 matching_stats_into(a, sam_b, fstate, fmatch);
748 gestalt_qualifies_ms(n, nb, sam_b, threshold, fstate, fmatch)
749 })
750}
751
752#[allow(
755 clippy::cast_precision_loss,
756 clippy::cast_sign_loss,
757 clippy::cast_possible_truncation,
758 clippy::many_single_char_names
759)]
760#[must_use]
761pub fn gestalt_qualifies_ms(na: usize, nb: usize, sam_b: &Sam, threshold: f64, fstate: &[u32], fmatch: &[u32]) -> bool {
762 let total = na + nb;
763 if total == 0 {
764 return true;
765 }
766 let need = (threshold * total as f64 / 2.0).ceil() as usize;
767 if need == 0 {
768 return true;
769 }
770 if na == 0 || nb == 0 {
771 return false;
772 }
773 STACK_BUF.with_borrow_mut(|stack| {
774 let mut m = 0usize;
775 let mut pending = na.min(nb);
776 stack.clear();
777 stack.push((0, na, 0, nb));
778 while let Some((al, ar, bl, br)) = stack.pop() {
779 pending -= (ar - al).min(br - bl);
780 if al >= ar || bl >= br {
781 continue;
782 }
783 let (i, j, l) = longest_in(sam_b, fstate, fmatch, al, ar, bl, br);
784 if l == 0 {
785 continue;
786 }
787 m += l;
788 if m >= need {
789 return true;
790 }
791 pending += (i - al).min(j - bl) + (ar - i - l).min(br - j - l);
792 if m + pending < need {
793 return false;
794 }
795 stack.push((al, i, bl, j));
796 stack.push((i + l, ar, j + l, br));
797 }
798 m >= need
799 })
800}
801
802
803#[allow(
809 clippy::cast_precision_loss,
810 clippy::cast_sign_loss,
811 clippy::cast_possible_truncation,
812 clippy::many_single_char_names
813)]
814#[must_use]
815pub fn gestalt_edge(a: &[char], b: &[char], sam_b: &Sam, threshold: f64) -> Option<f64> {
816 let na = a.len();
817 let nb = b.len();
818 let total = na + nb;
819 if total == 0 {
820 return Some(1.0);
821 }
822 let need = (threshold * total as f64 / 2.0).ceil() as usize;
823 if na == 0 || nb == 0 {
824 return (need == 0).then_some(0.0);
825 }
826 MS_BUF.with_borrow_mut(|(fstate, fmatch)| {
827 matching_stats_into(a, sam_b, fstate, fmatch);
828 STACK_BUF.with_borrow_mut(|stack| {
829 let mut m = 0usize;
830 let mut pending = na.min(nb);
831 stack.clear();
832 stack.push((0, na, 0, nb));
833 while let Some((al, ar, bl, br)) = stack.pop() {
834 pending -= (ar - al).min(br - bl);
835 if al >= ar || bl >= br {
836 continue;
837 }
838 let (i, j, l) = longest_in(sam_b, fstate, fmatch, al, ar, bl, br);
839 if l == 0 {
840 continue;
841 }
842 m += l;
843 pending += (i - al).min(j - bl) + (ar - i - l).min(br - j - l);
844 if m + pending < need {
845 return None; }
847 stack.push((al, i, bl, j));
848 stack.push((i + l, ar, j + l, br));
849 }
850 (m >= need).then(|| 2.0 * m as f64 / total as f64)
851 })
852 })
853}
854
855#[allow(
861 clippy::cast_precision_loss,
862 clippy::cast_sign_loss,
863 clippy::cast_possible_truncation,
864 clippy::many_single_char_names
865)]
866#[must_use]
867pub fn gestalt_ratio_capped(a: &[char], b: &[char], sam_b: &Sam, cap: f64) -> f64 {
868 let na = a.len();
869 let nb = b.len();
870 let total = na + nb;
871 if total == 0 {
872 return 1.0; }
874 if na == 0 || nb == 0 {
875 return 0.0; }
877 let exceed = (cap * total as f64 / 2.0).floor() as usize + 1;
879 MS_BUF.with_borrow_mut(|(fstate, fmatch)| {
880 matching_stats_into(a, sam_b, fstate, fmatch);
881 STACK_BUF.with_borrow_mut(|stack| {
882 let mut m = 0usize;
883 stack.clear();
884 stack.push((0, na, 0, nb));
885 while let Some((al, ar, bl, br)) = stack.pop() {
886 if al >= ar || bl >= br {
887 continue;
888 }
889 let (i, j, l) = longest_in(sam_b, fstate, fmatch, al, ar, bl, br);
890 if l == 0 {
891 continue;
892 }
893 m += l;
894 if m >= exceed {
895 return 2.0; }
897 stack.push((al, i, bl, j));
898 stack.push((i + l, ar, j + l, br));
899 }
900 2.0 * m as f64 / total as f64 })
902 })
903}
904
905#[allow(clippy::cast_precision_loss)]
907#[must_use]
908pub fn gestalt_ratio_prebuilt(a: &[char], b: &[char], sam_b: &Sam) -> f64 {
909 let total = a.len() + b.len();
910 if total == 0 {
911 return 1.0;
912 }
913 2.0 * gestalt_m_with(a, b, sam_b) as f64 / total as f64
914}
915
916#[allow(clippy::cast_precision_loss)]
917#[must_use]
918pub fn gestalt_ratio_chars(a: &[char], b: &[char]) -> f64 {
919 if a.is_empty() && b.is_empty() {
920 return 1.0;
921 }
922 let sam_b = build_sam(b);
923 gestalt_ratio_prebuilt(a, b, &sam_b)
924}
925
926#[must_use]
928pub fn gestalt_ratio(a: &str, b: &str) -> f64 {
929 let av: Vec<char> = a.chars().collect();
930 let bv: Vec<char> = b.chars().collect();
931 gestalt_ratio_chars(&av, &bv)
932}
933
934#[cfg(test)]
935mod tests {
936 #![allow(clippy::float_cmp, clippy::unreadable_literal)]
937 use super::{build_sam, gestalt_edge, gestalt_qualifies, gestalt_ratio_capped, gestalt_ratio_chars};
938
939 fn r(a: &str, b: &str) -> f64 {
940 gestalt_ratio_chars(&a.chars().collect::<Vec<_>>(), &b.chars().collect::<Vec<_>>())
941 }
942
943 #[test]
944 fn matches_difflib_reference_values() {
945 assert_eq!(r("", ""), 1.0);
946 assert_eq!(r("", "x"), 0.0);
947 assert_eq!(r("abc", "abc"), 1.0);
948 assert_eq!(r("abc", "abd"), 0.6666666666666666);
949 assert_eq!(r("the quick brown fox", "the quick brown dog"), 0.8947368421052632);
950 assert_eq!(r("tide", "diet"), 0.25);
951 assert_eq!(r("ПриветМир", "ПриветМирЪ"), 0.9473684210526315);
952 assert_eq!(r("aaaaabbbbbccccc", "aaaaaxbbbbbxccccc"), 0.9375);
953 }
954
955 #[test]
957 fn qualifies_matches_ratio_threshold() {
958 let mut s: u64 = 0x1234_5678_9abc_def1;
960 let mut next = || {
961 s ^= s << 13;
962 s ^= s >> 7;
963 s ^= s << 17;
964 s
965 };
966 for _ in 0..3000 {
967 let la = (next() % 40) as usize;
968 let lb = (next() % 40) as usize;
969 let mk = |n: usize, rng: &mut dyn FnMut() -> u64| -> Vec<char> {
970 (0..n).map(|_| char::from(b'a' + (rng() % 4) as u8)).collect()
971 };
972 let a = mk(la, &mut next);
973 let b = mk(lb, &mut next);
974 let sam_b = build_sam(&b);
975 let ratio = gestalt_ratio_chars(&a, &b);
976 for &t in &[0.0_f64, 0.25, 0.5, 0.75, 0.9, 1.0] {
977 assert_eq!(
978 gestalt_qualifies(&a, &b, &sam_b, t),
979 ratio >= t,
980 "a={a:?} b={b:?} t={t} ratio={ratio}"
981 );
982 let edge = gestalt_edge(&a, &b, &sam_b, t);
984 assert_eq!(edge.is_some(), ratio >= t, "edge.is_some a={a:?} b={b:?} t={t} ratio={ratio}");
985 if let Some(r) = edge {
986 assert_eq!(r, ratio, "edge ratio a={a:?} b={b:?} t={t}");
987 }
988 let capped = gestalt_ratio_capped(&a, &b, &sam_b, t);
990 if ratio <= t {
991 assert_eq!(capped, ratio, "capped exact a={a:?} b={b:?} cap={t} ratio={ratio}");
992 } else {
993 assert!(capped > t, "capped prune a={a:?} b={b:?} cap={t} ratio={ratio} got={capped}");
994 }
995 }
996 }
997 }
998}