#![allow(
clippy::cast_possible_truncation,
clippy::cast_possible_wrap,
clippy::cast_sign_loss,
clippy::doc_markdown,
clippy::many_single_char_names,
clippy::items_after_statements,
clippy::iter_cloned_collect,
clippy::redundant_closure_for_method_calls,
clippy::type_complexity
)]
use crate::cluster_canonicals_chars;
#[cfg(all(feature = "gpu", target_os = "macos"))]
use crate::gpu::{BoostGuard, CorpusGpu, Gpu};
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Concurrency {
Cpu,
Gpu,
GpuPlusCpu,
}
impl Default for Concurrency {
#[allow(clippy::derivable_impls)]
fn default() -> Self {
Self::GpuPlusCpu
}
}
pub struct RationerBuilder {
concurrency: Concurrency,
threads: Option<usize>,
delta: f64,
}
impl Default for RationerBuilder {
fn default() -> Self {
Self { concurrency: Concurrency::default(), threads: None, delta: 0.0 }
}
}
impl RationerBuilder {
#[must_use]
pub fn concurrency(mut self, c: Concurrency) -> Self {
self.concurrency = c;
self
}
#[must_use]
pub fn threads(mut self, n: usize) -> Self {
self.threads = Some(n);
self
}
#[must_use]
pub fn delta(mut self, d: f64) -> Self {
self.delta = d.clamp(0.0, 1.0);
self
}
#[must_use]
pub fn build(self) -> Rationer {
Rationer::new_with(self.concurrency, self.threads, self.delta)
}
}
pub struct Rationer {
concurrency: Concurrency,
pool: Option<rayon::ThreadPool>,
delta: f64,
#[cfg(all(feature = "gpu", target_os = "macos"))]
gpu: Option<Gpu>,
#[cfg(all(feature = "gpu", target_os = "macos"))]
_boost: Option<BoostGuard>,
}
impl Rationer {
#[must_use]
pub fn builder() -> RationerBuilder {
RationerBuilder::default()
}
#[must_use]
pub fn new() -> Self {
Self::new_with(Concurrency::default(), None, 0.0)
}
#[must_use]
pub fn delta(&self) -> f64 {
self.delta
}
fn new_with(concurrency: Concurrency, threads: Option<usize>, delta: f64) -> Self {
let pool = threads.and_then(|n| rayon::ThreadPoolBuilder::new().num_threads(n).build().ok());
#[cfg(all(feature = "gpu", target_os = "macos"))]
{
let want_gpu = matches!(concurrency, Concurrency::Gpu | Concurrency::GpuPlusCpu);
let (gpu, boost) = if want_gpu {
let b = BoostGuard::acquire();
let g = Gpu::new();
(g, Some(b))
} else {
(None, None)
};
Self { concurrency, pool, delta, gpu, _boost: boost }
}
#[cfg(not(all(feature = "gpu", target_os = "macos")))]
{
let _ = concurrency;
Self { concurrency: Concurrency::Cpu, pool, delta }
}
}
#[must_use]
pub fn concurrency(&self) -> Concurrency {
self.concurrency
}
fn with_pool<F, R>(&self, f: F) -> R
where
F: FnOnce() -> R + Send,
R: Send,
{
if let Some(pool) = &self.pool {
pool.install(f)
} else {
f()
}
}
#[must_use]
pub fn ratio(&self, a: &str, b: &str) -> f64 {
crate::gestalt_ratio(a, b)
}
#[must_use]
pub fn ratio_many<S1, S2>(&self, pairs: &[(S1, S2)]) -> Vec<f64>
where
S1: AsRef<str> + Sync,
S2: AsRef<str> + Sync,
{
#[cfg(all(feature = "gpu", target_os = "macos"))]
{
let want_gpu =
matches!(self.concurrency, Concurrency::Gpu | Concurrency::GpuPlusCpu);
let gpu_threshold: usize = std::env::var("DFGPU_RATIO_MANY_THRESHOLD")
.ok()
.and_then(|s| s.parse().ok())
.unwrap_or(usize::MAX);
if want_gpu && pairs.len() >= gpu_threshold {
if let Some(gpu) = &self.gpu {
let delta = self.delta;
return self.with_pool(|| ratio_many_via_gpu(gpu, pairs, delta));
}
}
}
self.with_pool(|| ratio_many_cpu(pairs))
}
#[must_use]
pub fn prepare<S: AsRef<str>>(&self, strings: &[S]) -> PreparedRationer<'_> {
use rayon::prelude::*;
let owned: Vec<String> = strings.iter().map(|s| s.as_ref().to_owned()).collect();
let chars_pool: Vec<Vec<char>> =
self.with_pool(|| owned.par_iter().map(|s| s.chars().collect()).collect());
let sams: Vec<crate::gestalt::Sam> =
self.with_pool(|| chars_pool.par_iter().map(|c| crate::gestalt::build_sam(c)).collect());
#[cfg(all(feature = "gpu", target_os = "macos"))]
{
let mut gpu_idx_for: Vec<i32> = vec![-1; owned.len()];
let mut ascii_strings: Vec<&str> = Vec::new();
let mut ascii_sams: Vec<crate::gestalt::Sam> = Vec::new();
for (i, s) in owned.iter().enumerate() {
if s.bytes().all(|b| b < 128) {
gpu_idx_for[i] = ascii_strings.len() as i32;
ascii_strings.push(s.as_str());
ascii_sams.push(sams[i].clone());
}
}
let corpus = if let Some(ref gpu) = self.gpu {
if ascii_strings.is_empty() {
None
} else {
let byte_refs: Vec<&[u8]> = ascii_strings.iter().map(|s| s.as_bytes()).collect();
Some(CorpusGpu::build(gpu, &byte_refs, &ascii_sams))
}
} else {
None
};
PreparedRationer {
rationer: self,
strings: owned,
chars_pool,
sams,
corpus,
gpu_idx_for,
}
}
#[cfg(not(all(feature = "gpu", target_os = "macos")))]
{
PreparedRationer { rationer: self, strings: owned, chars_pool, sams }
}
}
#[must_use]
pub fn cluster_canonicals_chars(
&self,
chars: &[Vec<char>],
threshold: f64,
) -> Vec<(Vec<usize>, f64)> {
#[cfg(all(feature = "gpu", target_os = "macos"))]
{
let want_gpu =
matches!(self.concurrency, Concurrency::Gpu | Concurrency::GpuPlusCpu);
if want_gpu {
if let Some(gpu) = &self.gpu {
let gpu_threshold: usize = std::env::var("DFGPU_CLUSTER_THRESHOLD")
.ok()
.and_then(|s| s.parse().ok())
.unwrap_or(300);
if chars.len() >= gpu_threshold
&& chars.iter().all(|c| c.iter().all(|&ch| (ch as u32) < 128))
{
return self.with_pool(|| {
cluster_canonicals_chars_via_gpu(gpu, chars, threshold, self.delta)
});
}
}
}
}
self.with_pool(|| cluster_canonicals_chars(chars, threshold))
}
#[must_use]
pub fn cluster_canonicals(
&self,
canonicals: &[String],
threshold: f64,
) -> Vec<(Vec<usize>, f64)> {
let chars: Vec<Vec<char>> = canonicals.iter().map(|s| s.chars().collect()).collect();
self.cluster_canonicals_chars(&chars, threshold)
}
#[must_use]
#[allow(clippy::too_many_lines)]
pub fn cluster_canonicals_multi(
&self,
groups: &[Vec<String>],
threshold: f64,
) -> Vec<Vec<(Vec<usize>, f64)>> {
#[cfg(all(feature = "gpu", target_os = "macos"))]
{
let want_gpu =
matches!(self.concurrency, Concurrency::Gpu | Concurrency::GpuPlusCpu);
let gpu_threshold: usize = std::env::var("DFGPU_MULTI_THRESHOLD")
.ok()
.and_then(|s| s.parse().ok())
.unwrap_or(usize::MAX);
let total: usize = groups.iter().map(Vec::len).sum();
if want_gpu && total >= gpu_threshold {
if let Some(gpu) = &self.gpu {
return self
.with_pool(|| cluster_canonicals_multi_via_gpu(gpu, groups, threshold, self.delta));
}
}
}
use rayon::prelude::*;
self.with_pool(|| {
groups
.par_iter()
.map(|g| {
let chars: Vec<Vec<char>> =
g.iter().map(|s| s.chars().collect()).collect();
cluster_canonicals_chars(&chars, threshold)
})
.collect()
})
}
}
impl Default for Rationer {
fn default() -> Self {
Self::new()
}
}
pub struct PreparedRationer<'r> {
rationer: &'r Rationer,
strings: Vec<String>,
chars_pool: Vec<Vec<char>>,
sams: Vec<crate::gestalt::Sam>,
#[cfg(all(feature = "gpu", target_os = "macos"))]
corpus: Option<CorpusGpu>,
#[cfg(all(feature = "gpu", target_os = "macos"))]
gpu_idx_for: Vec<i32>,
}
impl PreparedRationer<'_> {
#[must_use]
pub fn len(&self) -> usize {
self.strings.len()
}
#[must_use]
pub fn is_empty(&self) -> bool {
self.strings.is_empty()
}
#[must_use]
pub fn ratio_many_idx(&self, pairs: &[(u32, u32)]) -> Vec<f64> {
let n = pairs.len();
if n == 0 {
return Vec::new();
}
let n_strings = self.strings.len() as u32;
for &(i, j) in pairs {
assert!(i < n_strings && j < n_strings, "ratio_many_idx: index out of bounds");
}
#[cfg(all(feature = "gpu", target_os = "macos"))]
{
if let (Some(corpus), Some(gpu)) = (self.corpus.as_ref(), self.rationer.gpu.as_ref()) {
let delta = self.rationer.delta;
return self.rationer.with_pool(|| {
ratio_many_via_prepared_gpu(gpu, corpus, &self.sams, &self.chars_pool, &self.gpu_idx_for, pairs, delta)
});
}
}
self.rationer.with_pool(|| ratio_many_via_prepared_cpu(&self.chars_pool, &self.sams, pairs))
}
}
#[cfg(all(feature = "gpu", target_os = "macos"))]
fn ratio_many_via_prepared_gpu(
gpu: &Gpu,
corpus: &CorpusGpu,
sams: &[crate::gestalt::Sam],
chars_pool: &[Vec<char>],
gpu_idx_for: &[i32],
pairs: &[(u32, u32)],
delta: f64,
) -> Vec<f64> {
use rayon::prelude::*;
let n = pairs.len();
let mut out = vec![0.0f64; n];
let mut gpu_pairs: Vec<(u32, u32)> = Vec::with_capacity(n);
let mut gpu_slot_for: Vec<usize> = Vec::with_capacity(n);
let mut cpu_slot_for: Vec<usize> = Vec::new();
for (slot, &(a, b)) in pairs.iter().enumerate() {
let ga = gpu_idx_for[a as usize];
let gb = gpu_idx_for[b as usize];
if ga >= 0 && gb >= 0 {
gpu_pairs.push((ga as u32, gb as u32));
gpu_slot_for.push(slot);
} else {
cpu_slot_for.push(slot);
}
}
if !gpu_pairs.is_empty() {
let mut orig_for: Vec<u32> = vec![u32::MAX; corpus.n_sams()];
for (orig, &gi) in gpu_idx_for.iter().enumerate() {
if gi >= 0 {
orig_for[gi as usize] = orig as u32;
}
}
let flat = gpu.matching_stats_by_b_partial_flat_with_timings(corpus, &gpu_pairs).0;
let fstate_all = flat.fstate_all();
let fmatch_all = flat.fmatch_all();
let results: Vec<(usize, f64)> = (0..gpu_pairs.len())
.into_par_iter()
.map(|slot| {
let orig_pair_idx = flat.pair_orig_idx[slot] as usize;
let (ga, gb) = gpu_pairs[orig_pair_idx];
let lo = flat.out_offsets[slot] as usize;
let hi = flat.out_offsets[slot + 1] as usize;
let fstate = &fstate_all[lo..hi];
let fmatch = &fmatch_all[lo..hi];
let oa = orig_for[ga as usize] as usize;
let ob = orig_for[gb as usize] as usize;
let r = crate::gestalt::gestalt_edge_with_ms_delta(
&chars_pool[oa],
&chars_pool[ob],
&sams[ob],
fstate,
fmatch,
0.0,
delta,
)
.unwrap_or(0.0);
(gpu_slot_for[orig_pair_idx], r)
})
.collect();
for (slot, r) in results {
out[slot] = r;
}
}
if !cpu_slot_for.is_empty() {
let cpu_results: Vec<(usize, f64)> = cpu_slot_for
.par_iter()
.map(|&slot| {
let (i, j) = pairs[slot];
let a = &chars_pool[i as usize];
let b = &chars_pool[j as usize];
let r = crate::gestalt::gestalt_ratio_prebuilt(a, b, &sams[j as usize]);
(slot, r)
})
.collect();
for (slot, r) in cpu_results {
out[slot] = r;
}
}
out
}
fn ratio_many_via_prepared_cpu(
chars_pool: &[Vec<char>],
sams: &[crate::gestalt::Sam],
pairs: &[(u32, u32)],
) -> Vec<f64> {
use rayon::prelude::*;
let n = pairs.len();
let mut perm: Vec<u32> = (0..n as u32).collect();
perm.sort_unstable_by_key(|&i| {
let (a, b) = pairs[i as usize];
(b, a)
});
let mut out = vec![0.0f64; n];
let results: Vec<(usize, f64)> = perm
.par_iter()
.map(|&pi| {
let pi_us = pi as usize;
let (i, j) = pairs[pi_us];
let a = &chars_pool[i as usize];
let b = &chars_pool[j as usize];
let r = crate::gestalt::gestalt_ratio_prebuilt(a, b, &sams[j as usize]);
(pi_us, r)
})
.collect();
for (pi, r) in results {
out[pi] = r;
}
out
}
fn ratio_many_cpu<S1, S2>(pairs: &[(S1, S2)]) -> Vec<f64>
where
S1: AsRef<str> + Sync,
S2: AsRef<str> + Sync,
{
use std::collections::HashMap;
use rayon::prelude::*;
if pairs.is_empty() {
return Vec::new();
}
let mut pool: Vec<String> = Vec::new();
let mut by_str: HashMap<String, u32> = HashMap::new();
let mut pair_idx: Vec<(u32, u32)> = Vec::with_capacity(pairs.len());
for p in pairs {
let a: &str = p.0.as_ref();
let b: &str = p.1.as_ref();
let mut intern = |s: &str| -> u32 {
if let Some(&id) = by_str.get(s) {
return id;
}
let idx = pool.len() as u32;
pool.push(s.to_owned());
by_str.insert(s.to_owned(), idx);
idx
};
let ai = intern(a);
let bi = intern(b);
pair_idx.push((ai, bi));
}
drop(by_str);
let chars_pool: Vec<Vec<char>> =
pool.par_iter().map(|s| s.chars().collect()).collect();
let sams: Vec<crate::gestalt::Sam> =
chars_pool.par_iter().map(|c| crate::gestalt::build_sam(c)).collect();
let n = pair_idx.len();
let mut perm: Vec<u32> = (0..n as u32).collect();
perm.sort_unstable_by_key(|&i| {
let (a, b) = pair_idx[i as usize];
(b, a)
});
let mut out = vec![0.0f64; n];
let results: Vec<(usize, f64)> = perm
.par_iter()
.map(|&pi| {
let pi_us = pi as usize;
let (ai, bi) = pair_idx[pi_us];
let a = &chars_pool[ai as usize];
let b = &chars_pool[bi as usize];
let sam_b = &sams[bi as usize];
let r = crate::gestalt::gestalt_ratio_prebuilt(a, b, sam_b);
(pi_us, r)
})
.collect();
for (pi, r) in results {
out[pi] = r;
}
out
}
#[cfg(all(feature = "gpu", target_os = "macos"))]
#[allow(clippy::too_many_lines)]
fn ratio_many_via_gpu<S1, S2>(gpu: &Gpu, pairs: &[(S1, S2)], delta: f64) -> Vec<f64>
where
S1: AsRef<str> + Sync,
S2: AsRef<str> + Sync,
{
use std::collections::HashMap;
use rayon::prelude::*;
use crate::gpu::CorpusGpu;
let n_pairs = pairs.len();
if n_pairs == 0 {
return Vec::new();
}
const NON_ASCII: u32 = u32::MAX;
let mut pool: Vec<String> = Vec::new();
let mut by_str: HashMap<String, u32> = HashMap::new();
let mut pair_idx: Vec<(u32, u32)> = Vec::with_capacity(n_pairs);
for p in pairs {
let a: &str = p.0.as_ref();
let b: &str = p.1.as_ref();
let mut intern = |s: &str| -> u32 {
if !s.chars().all(|c| (c as u32) < 128) {
return NON_ASCII;
}
if let Some(&id) = by_str.get(s) {
return id;
}
let idx = pool.len() as u32;
pool.push(s.to_owned());
by_str.insert(s.to_owned(), idx);
idx
};
let ai = intern(a);
let bi = intern(b);
pair_idx.push((ai, bi));
}
let n_unique = pool.len();
drop(by_str);
let mut gpu_pairs: Vec<(u32, u32)> = Vec::with_capacity(n_pairs);
let mut gpu_slot_for: Vec<usize> = Vec::with_capacity(n_pairs);
let mut cpu_slot_for: Vec<usize> = Vec::new();
for (i, &(a, b)) in pair_idx.iter().enumerate() {
if a == NON_ASCII || b == NON_ASCII {
cpu_slot_for.push(i);
} else {
gpu_pairs.push((a, b));
gpu_slot_for.push(i);
}
}
let chars_pool: Vec<Vec<char>> =
pool.par_iter().map(|s| s.chars().collect()).collect();
let sams: Vec<crate::gestalt::Sam> =
chars_pool.par_iter().map(|c| crate::gestalt::build_sam(c)).collect();
let byte_refs: Vec<&[u8]> = pool.iter().map(|s| s.as_bytes()).collect();
let corpus = CorpusGpu::build(gpu, &byte_refs, &sams);
let _ = n_unique;
let mut out = vec![0.0f64; n_pairs];
if !gpu_pairs.is_empty() {
let flat = gpu.matching_stats_by_b_partial_flat_with_timings(&corpus, &gpu_pairs).0;
let fstate_all = flat.fstate_all();
let fmatch_all = flat.fmatch_all();
let results: Vec<(usize, f64)> = (0..gpu_pairs.len())
.into_par_iter()
.map(|slot| {
let orig = flat.pair_orig_idx[slot] as usize;
let (a_idx, b_idx) = gpu_pairs[orig];
let lo = flat.out_offsets[slot] as usize;
let hi = flat.out_offsets[slot + 1] as usize;
let fstate = &fstate_all[lo..hi];
let fmatch = &fmatch_all[lo..hi];
let r = crate::gestalt::gestalt_edge_with_ms_delta(
&chars_pool[a_idx as usize],
&chars_pool[b_idx as usize],
&sams[b_idx as usize],
fstate,
fmatch,
0.0,
delta,
)
.unwrap_or(0.0);
(gpu_slot_for[orig], r)
})
.collect();
for (slot, r) in results {
out[slot] = r;
}
}
let cpu_results: Vec<(usize, f64)> = cpu_slot_for
.par_iter()
.map(|&i| {
let (a, b) = &pairs[i];
(i, crate::gestalt_ratio(a.as_ref(), b.as_ref()))
})
.collect();
for (slot, r) in cpu_results {
out[slot] = r;
}
out
}
#[cfg(all(feature = "gpu", target_os = "macos"))]
#[allow(clippy::too_many_lines)]
fn cluster_canonicals_multi_via_gpu(
gpu: &Gpu,
groups: &[Vec<String>],
threshold: f64,
delta: f64,
) -> Vec<Vec<(Vec<usize>, f64)>> {
use rayon::prelude::*;
use crate::gpu::CorpusGpu;
use crate::{assemble, char_counts, quick_ratio_counts, real_quick_ratio};
if groups.is_empty() {
return Vec::new();
}
let mut group_offsets: Vec<usize> = Vec::with_capacity(groups.len() + 1);
group_offsets.push(0);
for g in groups {
let end = group_offsets[group_offsets.len() - 1] + g.len();
group_offsets.push(end);
}
let total: usize = groups.iter().map(|g| g.len()).sum();
let mut unique_idx: std::collections::HashMap<&str, u32> = std::collections::HashMap::new();
let mut unique_strings: Vec<&str> = Vec::new();
let mut flat_to_unique: Vec<u32> = Vec::with_capacity(total);
for g in groups {
for s in g {
let s_ref = s.as_str();
let u = if let Some(&u) = unique_idx.get(s_ref) {
u
} else {
let u = unique_strings.len() as u32;
unique_strings.push(s_ref);
unique_idx.insert(s_ref, u);
u
};
flat_to_unique.push(u);
}
}
let unique_chars: Vec<Vec<char>> =
unique_strings.par_iter().map(|s| s.chars().collect()).collect();
let unique_sams: Vec<crate::gestalt::Sam> =
unique_chars.par_iter().map(|c| crate::gestalt::build_sam(c)).collect();
let unique_ascii: Vec<bool> =
unique_chars.iter().map(|c| c.iter().all(|&ch| (ch as u32) < 128)).collect();
let group_ascii: Vec<bool> = (0..groups.len())
.map(|gi| {
let lo = group_offsets[gi];
let hi = group_offsets[gi + 1];
(lo..hi).all(|i| unique_ascii[flat_to_unique[i] as usize])
})
.collect();
let unique_bytes: Vec<Vec<u8>> = unique_chars
.iter()
.map(|c| {
if c.iter().all(|&ch| (ch as u32) < 128) {
c.iter().map(|&ch| ch as u8).collect()
} else {
Vec::new()
}
})
.collect();
let byte_refs: Vec<&[u8]> = unique_bytes.iter().map(Vec::as_slice).collect();
let corpus = CorpusGpu::build(gpu, &byte_refs, &unique_sams);
let flat_chars = |flat_i: u32| -> &Vec<char> { &unique_chars[flat_to_unique[flat_i as usize] as usize] };
let unique_counts: Vec<Vec<(char, u32)>> =
unique_chars.par_iter().map(|c| char_counts(c)).collect();
let per_group_candidates: Vec<Vec<(u32, u32, u32, u32, u32)>> = (0..groups.len())
.into_par_iter()
.map(|gi| {
if !group_ascii[gi] {
return Vec::new();
}
let lo = group_offsets[gi];
let hi = group_offsets[gi + 1];
let n = hi - lo;
let mut order: Vec<usize> = (lo..hi).collect();
order.sort_by_key(|&i| flat_chars(i as u32).len());
let mut out: Vec<(u32, u32, u32, u32, u32)> = Vec::new();
#[allow(clippy::cast_possible_truncation)]
for p in 0..n {
let i = order[p];
let i_u = flat_to_unique[i] as usize;
for &j in &order[p + 1..] {
let j_u = flat_to_unique[j] as usize;
let ci = &unique_chars[i_u];
let cj = &unique_chars[j_u];
if real_quick_ratio(ci, cj) < threshold {
break;
}
if quick_ratio_counts(&unique_counts[i_u], &unique_counts[j_u], ci.len() + cj.len()) < threshold {
continue;
}
let (loi_flat, hii_flat) = if i < j { (i, j) } else { (j, i) };
let loi_u = flat_to_unique[loi_flat];
let hii_u = flat_to_unique[hii_flat];
out.push((loi_u, hii_u, loi_flat as u32, hii_flat as u32, gi as u32));
}
}
out
})
.collect();
let mut pairs_for_gpu: Vec<(u32, u32)> = Vec::new();
let mut pair_flat: Vec<(u32, u32)> = Vec::new();
let mut pair_group: Vec<u32> = Vec::new();
for group_pairs in &per_group_candidates {
for &(au, bu, af, bf, gi) in group_pairs {
pairs_for_gpu.push((au, bu));
pair_flat.push((af, bf));
pair_group.push(gi);
}
}
let mut per_group_edges: Vec<Vec<(usize, usize, f64)>> = vec![Vec::new(); groups.len()];
if !pairs_for_gpu.is_empty() {
let max_pairs_per_dispatch: usize = std::env::var("DFGPU_MAX_PAIRS")
.ok()
.and_then(|s| s.parse().ok())
.unwrap_or(250_000);
let mut edges_acc: Vec<(u32, u32, u32, f64)> = Vec::new(); for chunk_start in (0..pairs_for_gpu.len()).step_by(max_pairs_per_dispatch) {
let chunk_end = (chunk_start + max_pairs_per_dispatch).min(pairs_for_gpu.len());
let chunk_pairs = &pairs_for_gpu[chunk_start..chunk_end]; let chunk_pair_flat = &pair_flat[chunk_start..chunk_end];
let chunk_pair_group = &pair_group[chunk_start..chunk_end];
let flat = gpu.matching_stats_batched_flat(&corpus, chunk_pairs);
let fstate_all = flat.fstate_all();
let fmatch_all = flat.fmatch_all();
let chunk_edges: Vec<(u32, u32, u32, f64)> = (0..chunk_pairs.len())
.into_par_iter()
.filter_map(|slot| {
let orig = flat.pair_orig_idx[slot] as usize;
let (au, bu) = chunk_pairs[orig]; let (af, bf) = chunk_pair_flat[orig]; let gi = chunk_pair_group[orig];
let lo_st = flat.out_offsets[slot] as usize;
let hi_st = flat.out_offsets[slot + 1] as usize;
let fstate = &fstate_all[lo_st..hi_st];
let fmatch = &fmatch_all[lo_st..hi_st];
let ratio = crate::gestalt::gestalt_edge_with_ms_delta(
&unique_chars[au as usize],
&unique_chars[bu as usize],
&unique_sams[bu as usize],
fstate,
fmatch,
threshold,
delta,
)?;
Some((af, bf, gi, ratio))
})
.collect();
edges_acc.extend(chunk_edges);
}
for (af, bf, gi, ratio) in edges_acc {
let base = group_offsets[gi as usize];
per_group_edges[gi as usize].push((
af as usize - base,
bf as usize - base,
ratio,
));
}
}
for (gi, ascii) in group_ascii.iter().enumerate() {
if !ascii {
let lo = group_offsets[gi];
let hi = group_offsets[gi + 1];
let local_chars: Vec<Vec<char>> = (lo..hi)
.map(|i| unique_chars[flat_to_unique[i] as usize].clone())
.collect();
let clusters = cluster_canonicals_chars(&local_chars, threshold);
per_group_edges[gi].clear();
let mut out: Vec<Vec<(Vec<usize>, f64)>> = Vec::with_capacity(groups.len());
for (k, edges) in per_group_edges.into_iter().enumerate() {
if k == gi {
out.push(clusters.clone());
} else {
let n_k = group_offsets[k + 1] - group_offsets[k];
let lo_k = group_offsets[k];
let chars_k: Vec<Vec<char>> = (lo_k..lo_k + n_k)
.map(|i| unique_chars[flat_to_unique[i] as usize].clone())
.collect();
let sams_k: Vec<crate::gestalt::Sam> = (lo_k..lo_k + n_k)
.map(|i| unique_sams[flat_to_unique[i] as usize].clone())
.collect();
out.push(assemble(n_k, edges, &chars_k, &sams_k));
}
}
return out;
}
}
(0..groups.len())
.into_par_iter()
.map(|gi| {
let lo = group_offsets[gi];
let hi = group_offsets[gi + 1];
let n = hi - lo;
let chars_k: Vec<Vec<char>> = (lo..hi)
.map(|i| unique_chars[flat_to_unique[i] as usize].clone())
.collect();
let sams_k: Vec<crate::gestalt::Sam> = (lo..hi)
.map(|i| unique_sams[flat_to_unique[i] as usize].clone())
.collect();
assemble(n, std::mem::take(&mut per_group_edges[gi].clone()), &chars_k, &sams_k)
})
.collect()
}
#[cfg(all(feature = "gpu", target_os = "macos"))]
#[allow(clippy::too_many_lines)]
fn cluster_canonicals_chars_via_gpu(
gpu: &Gpu,
chars: &[Vec<char>],
threshold: f64,
delta: f64,
) -> Vec<(Vec<usize>, f64)> {
use rayon::prelude::*;
use crate::gpu::CorpusGpu;
use crate::{assemble, char_counts, quick_ratio_counts, real_quick_ratio};
let n = chars.len();
if n < 2 {
return Vec::new();
}
let sams: Vec<crate::gestalt::Sam> =
chars.par_iter().map(|c| crate::gestalt::build_sam(c)).collect();
let bytes: Vec<Vec<u8>> = chars.iter().map(|c| c.iter().map(|&ch| ch as u8).collect()).collect();
let byte_refs: Vec<&[u8]> = bytes.iter().map(Vec::as_slice).collect();
let corpus = CorpusGpu::build(gpu, &byte_refs, &sams);
let mut order: Vec<usize> = (0..n).collect();
order.sort_by_key(|&i| chars[i].len());
let counts: Vec<Vec<(char, u32)>> = chars.par_iter().map(|c| char_counts(c)).collect();
let candidates: Vec<(u32, u32)> = (0..n)
.into_par_iter()
.flat_map_iter(|p| {
let i = order[p];
let mut local: Vec<(u32, u32)> = Vec::new();
for &j in &order[p + 1..] {
if real_quick_ratio(&chars[i], &chars[j]) < threshold {
break;
}
if quick_ratio_counts(&counts[i], &counts[j], chars[i].len() + chars[j].len())
< threshold
{
continue;
}
let (lo, hi) = if i < j { (i, j) } else { (j, i) };
#[allow(clippy::cast_possible_truncation)]
local.push((lo as u32, hi as u32));
}
local
})
.collect();
if candidates.is_empty() {
return assemble(n, Vec::new(), chars, &sams);
}
let pairs_for_gpu: Vec<(u32, u32)> = candidates.iter().copied().collect();
let flat = gpu.matching_stats_batched_flat(&corpus, &pairs_for_gpu);
let fstate_all = flat.fstate_all();
let fmatch_all = flat.fmatch_all();
let edges: Vec<(usize, usize, f64)> = (0..pairs_for_gpu.len())
.into_par_iter()
.filter_map(|slot| {
let orig = flat.pair_orig_idx[slot] as usize;
let (a_idx, b_idx) = candidates[orig];
let lo_state = flat.out_offsets[slot] as usize;
let hi_state = flat.out_offsets[slot + 1] as usize;
let fstate = &fstate_all[lo_state..hi_state];
let fmatch = &fmatch_all[lo_state..hi_state];
let ratio = crate::gestalt::gestalt_edge_with_ms_delta(
&chars[a_idx as usize],
&chars[b_idx as usize],
&sams[b_idx as usize],
fstate,
fmatch,
threshold,
delta,
)?;
Some((a_idx as usize, b_idx as usize, ratio))
})
.collect();
assemble(n, edges, chars, &sams)
}
#[cfg(all(feature = "gpu", target_os = "macos"))]
unsafe impl Send for Rationer {}
#[cfg(all(feature = "gpu", target_os = "macos"))]
unsafe impl Sync for Rationer {}