ragc_core/ffi/segment_split.rs
1// FFI helper for segment splitting
2// Used when a segment needs to be split into two parts with overlap
3
4/// Split a segment into two parts with overlap
5///
6/// Matches C++ AGC's segment splitting logic (agc_compressor.cpp:1434-1438):
7/// ```cpp
8/// uint32_t seg2_start_pos = left_size - kmer_length / 2;
9/// segment2.assign(segment.begin() + seg2_start_pos, segment.end());
10/// segment.resize((size_t)seg2_start_pos + kmer_length);
11/// ```
12///
13/// # Arguments
14/// * `left_size` - Size of the left part (where split occurs)
15/// * `kmer_length` - Length of k-mers (for overlap calculation)
16///
17/// # Returns
18/// Position where second segment should start
19///
20/// # Safety
21/// This function performs safe arithmetic with overflow protection
22#[no_mangle]
23pub extern "C" fn ragc_calculate_seg2_start_pos(left_size: u32, kmer_length: u32) -> u32 {
24 // Match C++ AGC logic exactly
25 // seg2_start_pos = left_size - kmer_length / 2
26 left_size.saturating_sub(kmer_length / 2)
27}
28
29/// Calculate the new size for the first segment after split
30///
31/// Matches C++ AGC: segment.resize((size_t)seg2_start_pos + kmer_length);
32///
33/// # Arguments
34/// * `seg2_start_pos` - Starting position of second segment
35/// * `kmer_length` - Length of k-mers (for overlap)
36///
37/// # Returns
38/// New size for first segment
39#[no_mangle]
40pub extern "C" fn ragc_calculate_segment1_size(seg2_start_pos: u32, kmer_length: u32) -> u32 {
41 seg2_start_pos.saturating_add(kmer_length)
42}
43
44/// Perform complete segment split calculation
45///
46/// Returns all values needed to split a segment into two overlapping parts.
47///
48/// # Arguments
49/// * `left_size` - Size of the left part (where split occurs)
50/// * `kmer_length` - Length of k-mers
51///
52/// # Returns
53/// Struct containing seg2_start_pos and segment1_new_size
54#[repr(C)]
55pub struct SegmentSplitInfo {
56 pub seg2_start_pos: u32,
57 pub segment1_new_size: u32,
58}
59
60#[no_mangle]
61pub extern "C" fn ragc_calculate_segment_split(
62 left_size: u32,
63 kmer_length: u32,
64) -> SegmentSplitInfo {
65 let seg2_start_pos = left_size.saturating_sub(kmer_length / 2);
66 let segment1_new_size = seg2_start_pos + kmer_length;
67
68 SegmentSplitInfo {
69 seg2_start_pos,
70 segment1_new_size,
71 }
72}
73
74#[cfg(test)]
75mod tests {
76 use super::*;
77
78 #[test]
79 fn test_seg2_start_pos() {
80 // Standard case: k=21
81 assert_eq!(ragc_calculate_seg2_start_pos(100, 21), 90); // 100 - 21/2 = 100 - 10 = 90
82
83 // k=25
84 assert_eq!(ragc_calculate_seg2_start_pos(200, 25), 188); // 200 - 25/2 = 200 - 12 = 188
85
86 // Small left_size
87 assert_eq!(ragc_calculate_seg2_start_pos(5, 21), 0); // 5 - 10 = saturates to 0
88
89 // Zero left_size
90 assert_eq!(ragc_calculate_seg2_start_pos(0, 21), 0);
91 }
92
93 #[test]
94 fn test_segment1_size() {
95 // Standard case: k=21, seg2_start=90
96 assert_eq!(ragc_calculate_segment1_size(90, 21), 111); // 90 + 21 = 111
97
98 // k=25, seg2_start=188
99 assert_eq!(ragc_calculate_segment1_size(188, 25), 213); // 188 + 25 = 213
100
101 // Zero start
102 assert_eq!(ragc_calculate_segment1_size(0, 21), 21);
103 }
104
105 #[test]
106 fn test_complete_split() {
107 // Standard case: left_size=100, k=21
108 let info = ragc_calculate_segment_split(100, 21);
109 assert_eq!(info.seg2_start_pos, 90); // 100 - 10
110 assert_eq!(info.segment1_new_size, 111); // 90 + 21
111
112 // k=25
113 let info = ragc_calculate_segment_split(200, 25);
114 assert_eq!(info.seg2_start_pos, 188); // 200 - 12
115 assert_eq!(info.segment1_new_size, 213); // 188 + 25
116
117 // Edge case: small left_size
118 let info = ragc_calculate_segment_split(5, 21);
119 assert_eq!(info.seg2_start_pos, 0);
120 assert_eq!(info.segment1_new_size, 21);
121 }
122
123 #[test]
124 fn test_overlap_size() {
125 // The overlap should be kmer_length
126 // segment1 ends at seg2_start_pos + kmer_length
127 // segment2 starts at seg2_start_pos
128 // So overlap = kmer_length
129
130 let info = ragc_calculate_segment_split(100, 21);
131 let overlap = info.segment1_new_size - info.seg2_start_pos;
132 assert_eq!(overlap, 21); // Overlap equals kmer_length
133
134 let info = ragc_calculate_segment_split(200, 25);
135 let overlap = info.segment1_new_size - info.seg2_start_pos;
136 assert_eq!(overlap, 25);
137 }
138
139 #[test]
140 fn test_realistic_values() {
141 // Realistic scenario: segment of 500bp, split at position 250, k=21
142 let info = ragc_calculate_segment_split(250, 21);
143 assert_eq!(info.seg2_start_pos, 240); // 250 - 10
144 assert_eq!(info.segment1_new_size, 261); // 240 + 21
145
146 // segment1: [0..261) = 261 bases
147 // segment2: [240..500) = 260 bases
148 // overlap: [240..261) = 21 bases ✓
149 }
150}