ragc_core/ffi/
splitters.rs

1// Rust FFI for splitter detection (called from C++ AGC)
2
3use crate::{determine_splitters, GenomeIO};
4use std::ffi::CStr;
5use std::fs::File;
6use std::os::raw::c_char;
7
8/// C-compatible result structure for splitter detection
9#[repr(C)]
10pub struct SplitterResult {
11    pub splitters: *mut u64,
12    pub n_splitters: usize,
13    pub singletons: *mut u64,
14    pub n_singletons: usize,
15    pub duplicates: *mut u64,
16    pub n_duplicates: usize,
17}
18
19/// Free memory allocated by splitter detection
20#[no_mangle]
21pub extern "C" fn ragc_free_splitters(result: *mut SplitterResult) {
22    if result.is_null() {
23        return;
24    }
25
26    unsafe {
27        let r = Box::from_raw(result);
28
29        if !r.splitters.is_null() {
30            drop(Vec::from_raw_parts(
31                r.splitters,
32                r.n_splitters,
33                r.n_splitters,
34            ));
35        }
36        if !r.singletons.is_null() {
37            drop(Vec::from_raw_parts(
38                r.singletons,
39                r.n_singletons,
40                r.n_singletons,
41            ));
42        }
43        if !r.duplicates.is_null() {
44            drop(Vec::from_raw_parts(
45                r.duplicates,
46                r.n_duplicates,
47                r.n_duplicates,
48            ));
49        }
50    }
51}
52
53/// Determine splitters from a FASTA file using RAGC's algorithm
54///
55/// This function is called from C++ AGC to replace its native splitter detection.
56/// It produces identical results but uses Rust implementation.
57///
58/// # Safety
59/// - `file_path` must be a valid null-terminated C string
60/// - Caller must call `ragc_free_splitters()` to free the result
61///
62/// # Returns
63/// Pointer to SplitterResult (NULL on error)
64#[no_mangle]
65pub extern "C" fn ragc_determine_splitters(
66    file_path: *const c_char,
67    k: u32,
68    segment_size: u32,
69    verbosity: u32,
70) -> *mut SplitterResult {
71    // Convert C string to Rust
72    let path_str = unsafe {
73        if file_path.is_null() {
74            eprintln!("ragc_determine_splitters: NULL file path");
75            return std::ptr::null_mut();
76        }
77        match CStr::from_ptr(file_path).to_str() {
78            Ok(s) => s,
79            Err(e) => {
80                eprintln!("ragc_determine_splitters: Invalid UTF-8 in path: {}", e);
81                return std::ptr::null_mut();
82            }
83        }
84    };
85
86    if verbosity > 0 {
87        eprintln!("[RAGC] Gathering reference k-mers from: {}", path_str);
88    }
89
90    // Read contigs from file
91    let file = match File::open(path_str) {
92        Ok(f) => f,
93        Err(e) => {
94            eprintln!("ragc_determine_splitters: Cannot open file: {}", e);
95            return std::ptr::null_mut();
96        }
97    };
98
99    let mut gio = GenomeIO::new(file);
100    let mut contigs = Vec::new();
101
102    while let Ok(Some((_name, contig))) = gio.read_contig_converted() {
103        contigs.push(contig);
104    }
105
106    if verbosity > 0 {
107        eprintln!("[RAGC] Loaded {} contigs", contigs.len());
108    }
109
110    // Run splitter detection
111    if verbosity > 0 {
112        eprintln!("[RAGC] Determination of splitters");
113    }
114
115    let (splitters_set, singletons_set, duplicates_set) =
116        determine_splitters(&contigs, k as usize, segment_size as usize);
117
118    if verbosity > 1 {
119        eprintln!("[RAGC] No. of singletons: {}", singletons_set.len());
120        eprintln!("[RAGC] No. of duplicates: {}", duplicates_set.len());
121        eprintln!("[RAGC] No. of splitters: {}", splitters_set.len());
122    }
123
124    // Convert to C-compatible arrays
125    let mut splitters: Vec<u64> = splitters_set.into_iter().collect();
126    let mut singletons: Vec<u64> = singletons_set.into_iter().collect();
127    let mut duplicates: Vec<u64> = duplicates_set.into_iter().collect();
128
129    // Sort for deterministic output (C++ AGC expects sorted)
130    splitters.sort_unstable();
131    singletons.sort_unstable();
132    duplicates.sort_unstable();
133
134    let n_splitters = splitters.len();
135    let n_singletons = singletons.len();
136    let n_duplicates = duplicates.len();
137
138    // Transfer ownership to C
139    let splitters_ptr = splitters.as_mut_ptr();
140    let singletons_ptr = singletons.as_mut_ptr();
141    let duplicates_ptr = duplicates.as_mut_ptr();
142
143    std::mem::forget(splitters);
144    std::mem::forget(singletons);
145    std::mem::forget(duplicates);
146
147    let result = Box::new(SplitterResult {
148        splitters: splitters_ptr,
149        n_splitters,
150        singletons: singletons_ptr,
151        n_singletons,
152        duplicates: duplicates_ptr,
153        n_duplicates,
154    });
155
156    Box::into_raw(result)
157}