use crate::kmer::{Kmer, KmerMode};
use ragc_common::hash::MurMur64Hash;
use std::slice;
#[repr(C)]
pub struct FindSplittersResult {
pub splitters_data: *mut u64,
pub splitters_len: usize,
pub fallbacks_data: *mut u64,
pub fallbacks_len: usize,
}
#[no_mangle]
pub extern "C" fn ragc_find_splitters_in_contig(
contig_data: *const u8,
contig_len: usize,
k: u32,
segment_size: u64,
candidate_kmers_ptr: *const u64,
candidate_kmers_len: usize,
fallback_threshold: u64,
) -> FindSplittersResult {
unsafe {
let contig = slice::from_raw_parts(contig_data, contig_len);
let candidate_kmers = slice::from_raw_parts(candidate_kmers_ptr, candidate_kmers_len);
eprintln!("[RUST FFI] find_splitters_in_contig called:");
eprintln!("[RUST FFI] contig_len: {}", contig_len);
eprintln!("[RUST FFI] k: {}", k);
eprintln!("[RUST FFI] segment_size: {}", segment_size);
eprintln!("[RUST FFI] candidate_kmers_len: {}", candidate_kmers_len);
if candidate_kmers_len > 0 {
eprintln!(
"[RUST FFI] first 5 candidates: {:?}",
&candidate_kmers[..candidate_kmers_len.min(5)]
);
}
let mut splitters = Vec::new();
let mut fallbacks: Vec<[u64; 4]> = Vec::new();
let mut current_len = segment_size;
let mut v_recent_kmers = Vec::new();
let mut kmer = Kmer::new(k, KmerMode::Canonical);
let mut prev_splitter = u64::MAX; let mut fallback_kmers_in_segment: Vec<(u64, bool)> = Vec::new();
const RND: u64 = 0xD73F8BF11046C40E;
kmer.reset();
let mut checked_count = 0;
let mut found_count = 0;
for &base in contig {
if base > 3 {
kmer.reset();
} else {
kmer.insert(base as u64);
if kmer.is_full() {
let kmer_value = kmer.data();
v_recent_kmers.push(kmer_value);
let passes_fallback_filter = if fallback_threshold == 0 {
false
} else {
(MurMur64Hash::hash(kmer_value) ^ RND) < fallback_threshold
};
if passes_fallback_filter && kmer.data_dir() != kmer.data_rc() {
fallback_kmers_in_segment.push((kmer_value, kmer.is_dir_oriented()));
}
if current_len >= segment_size {
checked_count += 1;
if is_splitter(kmer_value, candidate_kmers) {
found_count += 1;
if found_count <= 3 {
eprintln!("[RUST FFI] Found splitter #{}: kmer_value=0x{:016x} at current_len={}",
found_count, kmer_value, current_len);
eprintln!(
"[RUST FFI] kmer_dir=0x{:016x}, kmer_rc=0x{:016x}",
kmer.data_dir(),
kmer.data_rc()
);
}
splitters.push(kmer_value);
for &(fallback_kmer, is_dir) in &fallback_kmers_in_segment {
fallbacks.push([
prev_splitter,
kmer_value,
fallback_kmer,
is_dir as u64,
]);
}
fallback_kmers_in_segment.clear();
prev_splitter = kmer_value;
current_len = 0;
kmer.reset();
v_recent_kmers.clear();
}
}
}
}
current_len += 1;
}
for &kmer_value in v_recent_kmers.iter().rev() {
if is_splitter(kmer_value, candidate_kmers) {
found_count += 1;
eprintln!(
"[RUST FFI] Found rightmost splitter: kmer_value=0x{:016x}",
kmer_value
);
splitters.push(kmer_value);
for &(fallback_kmer, is_dir) in &fallback_kmers_in_segment {
fallbacks.push([prev_splitter, kmer_value, fallback_kmer, is_dir as u64]);
}
break;
}
}
eprintln!(
"[RUST FFI] Total: checked {} boundary positions, found {} splitters",
checked_count, found_count
);
let mut fallbacks_flat: Vec<u64> = Vec::with_capacity(fallbacks.len() * 4);
for entry in fallbacks {
fallbacks_flat.extend_from_slice(&entry);
}
let splitters_ptr = splitters.as_mut_ptr();
let splitters_len = splitters.len();
std::mem::forget(splitters);
let fallbacks_ptr = fallbacks_flat.as_mut_ptr();
let fallbacks_len = fallbacks_flat.len() / 4; std::mem::forget(fallbacks_flat);
FindSplittersResult {
splitters_data: splitters_ptr,
splitters_len,
fallbacks_data: fallbacks_ptr,
fallbacks_len,
}
}
}
#[no_mangle]
pub extern "C" fn ragc_free_find_splitters_result(result: FindSplittersResult) {
unsafe {
if !result.splitters_data.is_null() && result.splitters_len > 0 {
let _ = Vec::from_raw_parts(
result.splitters_data,
result.splitters_len,
result.splitters_len,
);
}
if !result.fallbacks_data.is_null() && result.fallbacks_len > 0 {
let flat_len = result.fallbacks_len * 4;
let _ = Vec::from_raw_parts(result.fallbacks_data, flat_len, flat_len);
}
}
}
#[inline]
fn is_splitter(kmer_value: u64, candidate_kmers: &[u64]) -> bool {
candidate_kmers.binary_search(&kmer_value).is_ok()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_find_splitters_simple() {
let contig = vec![0u8, 1, 2, 3, 0, 1, 2, 3];
let k = 3;
let segment_size = 5;
let candidate_kmers: Vec<u64> = vec![];
let result = ragc_find_splitters_in_contig(
contig.as_ptr(),
contig.len(),
k,
segment_size,
candidate_kmers.as_ptr(),
candidate_kmers.len(),
0, );
unsafe {
let splitters = slice::from_raw_parts(result.splitters_data, result.splitters_len);
assert_eq!(splitters.len(), 0);
}
ragc_free_find_splitters_result(result);
}
#[test]
fn test_find_splitters_with_candidates() {
let contig = vec![0u8, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3];
let k = 3;
let segment_size = 3;
let mut kmer = Kmer::new(k, KmerMode::Canonical);
let mut candidate_kmers = Vec::new();
for &base in &contig {
if base <= 3 {
kmer.insert(base as u64);
if kmer.is_full() {
candidate_kmers.push(kmer.data());
}
}
}
candidate_kmers.sort_unstable();
candidate_kmers.dedup();
let result = ragc_find_splitters_in_contig(
contig.as_ptr(),
contig.len(),
k,
segment_size,
candidate_kmers.as_ptr(),
candidate_kmers.len(),
0, );
unsafe {
let splitters = slice::from_raw_parts(result.splitters_data, result.splitters_len);
assert!(splitters.len() > 0);
}
ragc_free_find_splitters_result(result);
}
#[test]
fn test_fallback_filtering() {
let contig = vec![0u8, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3];
let k = 3;
let segment_size = 3;
let mut kmer = Kmer::new(k, KmerMode::Canonical);
let mut candidate_kmers = Vec::new();
for &base in &contig {
if base <= 3 {
kmer.insert(base as u64);
if kmer.is_full() {
candidate_kmers.push(kmer.data());
}
}
}
candidate_kmers.sort_unstable();
candidate_kmers.dedup();
let fallback_threshold = u64::MAX / 2;
let result = ragc_find_splitters_in_contig(
contig.as_ptr(),
contig.len(),
k,
segment_size,
candidate_kmers.as_ptr(),
candidate_kmers.len(),
fallback_threshold,
);
let _fallbacks_len = result.fallbacks_len;
ragc_free_find_splitters_result(result);
}
}