ragc_core/ffi/
segment_split.rs

1// FFI helper for segment splitting
2// Used when a segment needs to be split into two parts with overlap
3
4/// Split a segment into two parts with overlap
5///
6/// Matches C++ AGC's segment splitting logic (agc_compressor.cpp:1434-1438):
7/// ```cpp
8/// uint32_t seg2_start_pos = left_size - kmer_length / 2;
9/// segment2.assign(segment.begin() + seg2_start_pos, segment.end());
10/// segment.resize((size_t)seg2_start_pos + kmer_length);
11/// ```
12///
13/// # Arguments
14/// * `left_size` - Size of the left part (where split occurs)
15/// * `kmer_length` - Length of k-mers (for overlap calculation)
16///
17/// # Returns
18/// Position where second segment should start
19///
20/// # Safety
21/// This function performs safe arithmetic with overflow protection
22#[no_mangle]
23pub extern "C" fn ragc_calculate_seg2_start_pos(left_size: u32, kmer_length: u32) -> u32 {
24    // Match C++ AGC logic exactly
25    // seg2_start_pos = left_size - kmer_length / 2
26    left_size.saturating_sub(kmer_length / 2)
27}
28
29/// Calculate the new size for the first segment after split
30///
31/// Matches C++ AGC: segment.resize((size_t)seg2_start_pos + kmer_length);
32///
33/// # Arguments
34/// * `seg2_start_pos` - Starting position of second segment
35/// * `kmer_length` - Length of k-mers (for overlap)
36///
37/// # Returns
38/// New size for first segment
39#[no_mangle]
40pub extern "C" fn ragc_calculate_segment1_size(seg2_start_pos: u32, kmer_length: u32) -> u32 {
41    seg2_start_pos.saturating_add(kmer_length)
42}
43
44/// Perform complete segment split calculation
45///
46/// Returns all values needed to split a segment into two overlapping parts.
47///
48/// # Arguments
49/// * `left_size` - Size of the left part (where split occurs)
50/// * `kmer_length` - Length of k-mers
51///
52/// # Returns
53/// Struct containing seg2_start_pos and segment1_new_size
54#[repr(C)]
55pub struct SegmentSplitInfo {
56    pub seg2_start_pos: u32,
57    pub segment1_new_size: u32,
58}
59
60#[no_mangle]
61pub extern "C" fn ragc_calculate_segment_split(
62    left_size: u32,
63    kmer_length: u32,
64) -> SegmentSplitInfo {
65    let seg2_start_pos = left_size.saturating_sub(kmer_length / 2);
66    let segment1_new_size = seg2_start_pos + kmer_length;
67
68    SegmentSplitInfo {
69        seg2_start_pos,
70        segment1_new_size,
71    }
72}
73
74#[cfg(test)]
75mod tests {
76    use super::*;
77
78    #[test]
79    fn test_seg2_start_pos() {
80        // Standard case: k=21
81        assert_eq!(ragc_calculate_seg2_start_pos(100, 21), 90); // 100 - 21/2 = 100 - 10 = 90
82
83        // k=25
84        assert_eq!(ragc_calculate_seg2_start_pos(200, 25), 188); // 200 - 25/2 = 200 - 12 = 188
85
86        // Small left_size
87        assert_eq!(ragc_calculate_seg2_start_pos(5, 21), 0); // 5 - 10 = saturates to 0
88
89        // Zero left_size
90        assert_eq!(ragc_calculate_seg2_start_pos(0, 21), 0);
91    }
92
93    #[test]
94    fn test_segment1_size() {
95        // Standard case: k=21, seg2_start=90
96        assert_eq!(ragc_calculate_segment1_size(90, 21), 111); // 90 + 21 = 111
97
98        // k=25, seg2_start=188
99        assert_eq!(ragc_calculate_segment1_size(188, 25), 213); // 188 + 25 = 213
100
101        // Zero start
102        assert_eq!(ragc_calculate_segment1_size(0, 21), 21);
103    }
104
105    #[test]
106    fn test_complete_split() {
107        // Standard case: left_size=100, k=21
108        let info = ragc_calculate_segment_split(100, 21);
109        assert_eq!(info.seg2_start_pos, 90); // 100 - 10
110        assert_eq!(info.segment1_new_size, 111); // 90 + 21
111
112        // k=25
113        let info = ragc_calculate_segment_split(200, 25);
114        assert_eq!(info.seg2_start_pos, 188); // 200 - 12
115        assert_eq!(info.segment1_new_size, 213); // 188 + 25
116
117        // Edge case: small left_size
118        let info = ragc_calculate_segment_split(5, 21);
119        assert_eq!(info.seg2_start_pos, 0);
120        assert_eq!(info.segment1_new_size, 21);
121    }
122
123    #[test]
124    fn test_overlap_size() {
125        // The overlap should be kmer_length
126        // segment1 ends at seg2_start_pos + kmer_length
127        // segment2 starts at seg2_start_pos
128        // So overlap = kmer_length
129
130        let info = ragc_calculate_segment_split(100, 21);
131        let overlap = info.segment1_new_size - info.seg2_start_pos;
132        assert_eq!(overlap, 21); // Overlap equals kmer_length
133
134        let info = ragc_calculate_segment_split(200, 25);
135        let overlap = info.segment1_new_size - info.seg2_start_pos;
136        assert_eq!(overlap, 25);
137    }
138
139    #[test]
140    fn test_realistic_values() {
141        // Realistic scenario: segment of 500bp, split at position 250, k=21
142        let info = ragc_calculate_segment_split(250, 21);
143        assert_eq!(info.seg2_start_pos, 240); // 250 - 10
144        assert_eq!(info.segment1_new_size, 261); // 240 + 21
145
146        // segment1: [0..261) = 261 bases
147        // segment2: [240..500) = 260 bases
148        // overlap: [240..261) = 21 bases ✓
149    }
150}