ragc_core/ffi/
reverse_complement.rs

1// FFI helper for reverse complement operations
2// Core DNA sequence transformation used in segmentation
3
4/// Complement a single base
5///
6/// A (0) ↔ T (3)
7/// C (1) ↔ G (2)
8/// N (4+) → N (unchanged)
9#[inline]
10fn complement_base(base: u8) -> u8 {
11    if base < 4 {
12        3 - base
13    } else {
14        base // N or invalid stays unchanged
15    }
16}
17
18/// Reverse complement a sequence in-place
19///
20/// Matches C++ AGC's CAGCBasic::reverse_complement() (agc_basic.cpp:257)
21///
22/// # Safety
23/// - sequence must point to valid mutable memory of length bytes
24#[no_mangle]
25pub extern "C" fn ragc_reverse_complement_inplace(sequence: *mut u8, length: usize) {
26    if length == 0 {
27        return;
28    }
29
30    unsafe {
31        let seq = std::slice::from_raw_parts_mut(sequence, length);
32
33        let mut i = 0;
34        let mut j = length - 1;
35
36        while i < j {
37            let x = complement_base(seq[j]);
38            let y = complement_base(seq[i]);
39
40            seq[i] = x;
41            seq[j] = y;
42
43            i += 1;
44            j -= 1;
45        }
46
47        // Handle middle element if odd length
48        if i == j {
49            seq[i] = complement_base(seq[i]);
50        }
51    }
52}
53
54/// Reverse complement copy - create reverse complement in new buffer
55///
56/// Matches C++ AGC's CAGCBasic::reverse_complement_copy() (agc_basic.cpp:282)
57///
58/// # Safety
59/// - src must point to valid memory of src_len bytes
60/// - Returned buffer must be freed with ragc_free_sequence()
61#[repr(C)]
62pub struct Sequence {
63    pub data: *mut u8,
64    pub len: usize,
65}
66
67#[no_mangle]
68pub extern "C" fn ragc_reverse_complement_copy(src: *const u8, src_len: usize) -> Sequence {
69    if src_len == 0 {
70        return Sequence {
71            data: std::ptr::null_mut(),
72            len: 0,
73        };
74    }
75
76    unsafe {
77        let src_slice = std::slice::from_raw_parts(src, src_len);
78        let mut dest: Vec<u8> = src_slice
79            .iter()
80            .rev()
81            .map(|&base| complement_base(base))
82            .collect();
83
84        let ptr = dest.as_mut_ptr();
85        let len = dest.len();
86
87        std::mem::forget(dest);
88
89        Sequence { data: ptr, len }
90    }
91}
92
93#[no_mangle]
94pub extern "C" fn ragc_free_sequence(seq: Sequence) {
95    unsafe {
96        if !seq.data.is_null() && seq.len > 0 {
97            let _ = Vec::from_raw_parts(seq.data, seq.len, seq.len);
98        }
99    }
100}
101
102/// Complement a single base (public wrapper)
103#[no_mangle]
104pub extern "C" fn ragc_complement_base(base: u8) -> u8 {
105    complement_base(base)
106}
107
108#[cfg(test)]
109mod tests {
110    use super::*;
111
112    #[test]
113    fn test_complement_base() {
114        assert_eq!(complement_base(0), 3); // A -> T
115        assert_eq!(complement_base(1), 2); // C -> G
116        assert_eq!(complement_base(2), 1); // G -> C
117        assert_eq!(complement_base(3), 0); // T -> A
118        assert_eq!(complement_base(4), 4); // N -> N
119        assert_eq!(complement_base(5), 5); // Invalid -> Invalid
120    }
121
122    #[test]
123    fn test_reverse_complement_inplace() {
124        // Test with non-palindrome: ACG -> CGT
125        let mut seq = vec![0, 1, 2]; // ACG (0,1,2)
126        ragc_reverse_complement_inplace(seq.as_mut_ptr(), seq.len());
127        assert_eq!(seq, vec![1, 2, 3]); // CGT (1,2,3)
128
129        let mut seq2 = vec![0, 0, 0]; // AAA
130        ragc_reverse_complement_inplace(seq2.as_mut_ptr(), seq2.len());
131        assert_eq!(seq2, vec![3, 3, 3]); // AAA -> TTT
132    }
133
134    #[test]
135    fn test_reverse_complement_copy() {
136        // ACG = 0,1,2
137        // reverse = 2,1,0 = GCA
138        // complement of GCA = 1,2,3 = CGT
139        let src = vec![0, 1, 2]; // ACG
140        let result = ragc_reverse_complement_copy(src.as_ptr(), src.len());
141
142        unsafe {
143            let dest = std::slice::from_raw_parts(result.data, result.len);
144            assert_eq!(dest, &[1, 2, 3]); // CGT
145        }
146
147        ragc_free_sequence(result);
148    }
149
150    #[test]
151    fn test_reverse_complement_with_n() {
152        // ACNG = 0,1,4,2
153        // reverse = 2,4,1,0 = GNCA
154        // complement of GNCA = 1,4,2,3 = CNGT
155        let src = vec![0, 1, 4, 2]; // ACNG
156        let result = ragc_reverse_complement_copy(src.as_ptr(), src.len());
157
158        unsafe {
159            let dest = std::slice::from_raw_parts(result.data, result.len);
160            assert_eq!(dest, &[1, 4, 2, 3]); // CNGT
161        }
162
163        ragc_free_sequence(result);
164    }
165
166    #[test]
167    fn test_empty_sequence() {
168        let empty: Vec<u8> = vec![];
169        let result = ragc_reverse_complement_copy(empty.as_ptr(), 0);
170
171        assert_eq!(result.len, 0);
172        assert!(result.data.is_null());
173    }
174}