ragc_core/
agc_compressor.rs

1// Queue-based streaming compressor API
2// Provides simple push() interface with automatic backpressure and constant memory usage
3
4use crate::kmer_extract::{enumerate_kmers, remove_non_singletons};
5use crate::lz_diff::LZDiff;
6use crate::memory_bounded_queue::MemoryBoundedQueue;
7use crate::segment::{split_at_splitters_with_size, MISSING_KMER};
8use crate::splitters::{determine_splitters, find_new_splitters_for_contig};
9use ahash::AHashSet;
10use anyhow::{Context, Result};
11use ragc_common::{Archive, CollectionV3, Contig, CONTIG_SEPARATOR};
12use std::collections::{BTreeMap, HashMap, HashSet};
13use std::path::Path;
14use std::sync::atomic::{AtomicI32, AtomicU32, AtomicUsize, Ordering};
15use std::sync::{Arc, Mutex, RwLock};
16use std::thread::{self, JoinHandle};
17
18/// MurmurHash64A implementation matching C++ AGC's MurMur64Hash
19/// This is the same hash function used by C++ AGC for fallback filtering
20fn murmur_hash_64a(key: u64) -> u64 {
21    const M: u64 = 0xc6a4a7935bd1e995;
22    const R: u32 = 47;
23
24    let mut h: u64 = 0xc70f6907u64.wrapping_mul(M);
25    let mut k = key;
26
27    k = k.wrapping_mul(M);
28    k ^= k >> R;
29    k = k.wrapping_mul(M);
30    h ^= k;
31    h = h.wrapping_mul(M);
32
33    h ^= h >> R;
34    h = h.wrapping_mul(M);
35    h ^= h >> R;
36
37    h
38}
39
40/// Fallback k-mer filter matching C++ AGC's kmer_filter_t
41/// Used to select a fraction of k-mers for fallback grouping
42#[derive(Debug, Clone)]
43struct FallbackFilter {
44    /// Threshold for hash comparison (0 = disabled, u64::MAX = all pass)
45    threshold: u64,
46    /// Random seed for hash mixing (matches C++ AGC's rnd constant)
47    rnd: u64,
48}
49
50impl FallbackFilter {
51    /// Create a new fallback filter with the given fraction
52    /// Matches C++ AGC's kmer_filter_t constructor
53    fn new(fraction: f64) -> Self {
54        let threshold = if fraction == 0.0 {
55            0
56        } else {
57            (u64::MAX as f64 * fraction) as u64
58        };
59        Self {
60            threshold,
61            rnd: 0xD73F8BF11046C40E, // Matches C++ AGC constant
62        }
63    }
64
65    /// Check if the filter is enabled (fraction > 0)
66    fn is_enabled(&self) -> bool {
67        self.threshold != 0
68    }
69
70    /// Check if a k-mer passes the filter
71    /// Matches C++ AGC's kmer_filter_t::operator()
72    fn passes(&self, kmer: u64) -> bool {
73        (murmur_hash_64a(kmer) ^ self.rnd) < self.threshold
74    }
75}
76
77/// Configuration for the streaming queue-based compressor
78#[derive(Debug, Clone)]
79pub struct StreamingQueueConfig {
80    /// K-mer length for splitters
81    pub k: usize,
82
83    /// Segment size for splitting contigs
84    pub segment_size: usize,
85
86    /// Minimum match length for LZ encoding
87    pub min_match_len: usize,
88
89    /// ZSTD compression level (1-22)
90    pub compression_level: i32,
91
92    /// Number of worker threads
93    pub num_threads: usize,
94
95    /// Queue capacity in bytes (default: 2 GB, like C++ AGC)
96    pub queue_capacity: usize,
97
98    /// Verbosity level
99    pub verbosity: usize,
100
101    /// Adaptive mode: find new splitters for samples that can't be segmented well
102    /// (matches C++ AGC -a flag)
103    pub adaptive_mode: bool,
104
105    /// Fallback fraction: fraction of minimizers to use for fallback grouping
106    /// (matches C++ AGC --fallback-frac parameter, default 0.0)
107    pub fallback_frac: f64,
108
109    /// Batch size: number of samples to accumulate before sorting and distributing
110    /// (matches C++ AGC pack_cardinality parameter, default 50)
111    /// Segments from batch_size samples are sorted by (sample, contig, seg_part_no)
112    /// before distribution to groups, ensuring consistent pack boundaries with C++ AGC.
113    pub batch_size: usize,
114
115    /// Pack size: number of segments per pack (matches C++ AGC contigs_in_pack)
116    /// When a group reaches this many segments, write a pack immediately
117    /// (default: 50, matching PACK_CARDINALITY)
118    pub pack_size: usize,
119
120    /// Concatenated genomes mode: if true, send sync tokens every pack_size contigs
121    /// If false (multiple input files), only send sync tokens at sample boundaries
122    /// Matches C++ AGC's concatenated_genomes behavior
123    pub concatenated_genomes: bool,
124}
125
126impl Default for StreamingQueueConfig {
127    fn default() -> Self {
128        Self {
129            k: 31,
130            segment_size: 60_000,
131            min_match_len: 20,
132            compression_level: 17,
133            num_threads: rayon::current_num_threads().max(4),
134            queue_capacity: 2 * 1024 * 1024 * 1024, // 2 GB like C++ AGC
135            verbosity: 1,
136            adaptive_mode: false, // Default matches C++ AGC (adaptive mode off)
137            fallback_frac: 0.0,   // Default matches C++ AGC (fallback disabled)
138            batch_size: 50,       // Default matches C++ AGC pack_cardinality
139            pack_size: 50,        // Default matches C++ AGC contigs_in_pack / PACK_CARDINALITY
140            concatenated_genomes: false, // Default: multiple input files (non-concatenated)
141        }
142    }
143}
144
145/// Task to be processed by workers
146/// Note: Contig is type alias for Vec<u8>, so we store the name separately
147///
148/// Priority ordering matches C++ AGC:
149/// - Higher sample_priority first (sample1 > sample2 > sample3...)
150/// - Within same sample, lexicographic order on contig_name (ascending)
151///
152/// NOTE: C++ AGC uses a multimap<pair<priority, cost>, T> where cost=contig.size().
153/// Since multimap iterates in ascending key order, smaller names come first.
154/// This results in lexicographic ordering: chrI, chrII, chrIII, chrIV, chrIX, chrMT, chrV...
155/// RAGC must match this ordering for byte-identical archives.
156#[derive(Clone)]
157struct ContigTask {
158    sample_name: String,
159    contig_name: String,
160    data: Contig,         // Vec<u8>
161    sample_priority: i32, // Higher = process first (decreases for each sample)
162    cost: usize,          // Contig size in bytes (matches C++ AGC cost calculation)
163    sequence: u64,        // Insertion order within sample - lower = processed first (FASTA order)
164    is_sync_token: bool, // True if this is a synchronization token (matches C++ AGC registration tokens)
165}
166
167// Implement priority ordering for BinaryHeap (max-heap)
168// BinaryHeap pops the "greatest" element, so we want:
169// - Higher sample_priority = greater (first sample processed first)
170// - Lexicographically SMALLER contig_name = greater (to be popped first)
171//
172// C++ AGC uses multimap which iterates in ascending order, so "chrI" < "chrIX" < "chrMT" < "chrV"
173// To match this with a max-heap, we reverse the contig_name comparison.
174impl PartialEq for ContigTask {
175    fn eq(&self, other: &Self) -> bool {
176        self.sample_priority == other.sample_priority
177            && self.cost == other.cost
178            && self.contig_name == other.contig_name
179    }
180}
181
182impl Eq for ContigTask {}
183
184impl PartialOrd for ContigTask {
185    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
186        Some(self.cmp(other))
187    }
188}
189
190impl Ord for ContigTask {
191    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
192        // C++ AGC uses (priority, cost) as the multimap key with PopLarge (rbegin).
193        // multimap is sorted by (priority, cost) in ASCENDING order.
194        // rbegin() returns LARGEST element, so within same priority, LARGEST cost is popped first.
195        //
196        // Example: AAA#0 contigs are processed by SIZE (largest first):
197        //   chrIV (1.5MB) → chrXV (1.1MB) → chrVII (1.1MB) → ... → chrMT (86KB)
198        //
199        // This is NOT file order! Instrumentation shows C++ AGC pops by (priority, cost).
200
201        // First compare by sample_priority (higher priority first)
202        match self.sample_priority.cmp(&other.sample_priority) {
203            std::cmp::Ordering::Equal => {
204                // Then by cost (LARGER cost = higher priority, processed first)
205                // Match C++ AGC's PopLarge behavior
206                match self.cost.cmp(&other.cost) {
207                    std::cmp::Ordering::Equal => {
208                        // CRITICAL TIE-BREAKER: When sizes are equal, use FASTA order (sequence field)
209                        // to ensure deterministic ordering. Without this, the BinaryHeap order is
210                        // non-deterministic, causing different segment splitting and 19% size difference.
211                        // LOWER sequence = earlier in FASTA = processed first (reverse comparison for max-heap)
212                        other.sequence.cmp(&self.sequence)
213                    }
214                    cost_ord => cost_ord,
215                }
216            }
217            priority_ord => priority_ord,
218        }
219    }
220}
221
222/// Segment group identified by flanking k-mers (matching batch mode)
223#[derive(Debug, Clone, PartialEq, Eq, Hash, Ord, PartialOrd)]
224struct SegmentGroupKey {
225    kmer_front: u64,
226    kmer_back: u64,
227}
228
229/// Pending segment for batch-local processing (before group assignment)
230/// Segments are sorted by (sample_name, contig_name, place) to match C++ AGC order
231#[derive(Debug, Clone, PartialEq, Eq)]
232struct PendingSegment {
233    key: SegmentGroupKey,
234    segment_data: Vec<u8>,
235    should_reverse: bool,
236    sample_name: String,
237    contig_name: String,
238    place: usize,
239    sample_priority: i32, // Sample processing order (higher = earlier)
240}
241
242// Match C++ AGC sorting order (agc_compressor.h lines 112-119)
243// Sort by: sample_name, then contig_name, then place (seg_part_no)
244impl PartialOrd for PendingSegment {
245    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
246        Some(self.cmp(other))
247    }
248}
249
250impl Ord for PendingSegment {
251    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
252        // Match C++ AGC: pure lexicographic ordering (no sample_priority)
253        // Sort by: sample_name, then contig_name, then place (seg_part_no)
254        match self.sample_name.cmp(&other.sample_name) {
255            std::cmp::Ordering::Equal => {
256                // Then by contig_name
257                match self.contig_name.cmp(&other.contig_name) {
258                    std::cmp::Ordering::Equal => {
259                        // Finally by place (seg_part_no)
260                        self.place.cmp(&other.place)
261                    }
262                    other => other,
263                }
264            }
265            other => other,
266        }
267    }
268}
269
270/// Buffered segment waiting to be packed
271#[derive(Debug, Clone, PartialEq, Eq)]
272struct BufferedSegment {
273    sample_name: String,
274    contig_name: String,
275    seg_part_no: usize,
276    data: Contig,
277    is_rev_comp: bool,
278    sample_priority: i32, // Sample processing order (higher = earlier)
279}
280
281// Match C++ AGC sorting order: pure lexicographic (no sample_priority)
282// Sort by: sample_name, contig_name, seg_part_no
283impl PartialOrd for BufferedSegment {
284    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
285        Some(self.cmp(other))
286    }
287}
288
289impl Ord for BufferedSegment {
290    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
291        // Match C++ AGC: pure lexicographic ordering (no sample_priority)
292        // Sort by: sample_name, then contig_name, then seg_part_no
293        match self.sample_name.cmp(&other.sample_name) {
294            std::cmp::Ordering::Equal => {
295                // Then by contig_name
296                match self.contig_name.cmp(&other.contig_name) {
297                    std::cmp::Ordering::Equal => {
298                        // Finally by seg_part_no
299                        self.seg_part_no.cmp(&other.seg_part_no)
300                    }
301                    other => other,
302                }
303            }
304            other => other,
305        }
306    }
307}
308
309// =============================================================================
310// RAW segment buffering for parallel Phase 1 (BEFORE classification)
311// =============================================================================
312
313/// Raw segment data buffered BEFORE k-mer classification.
314/// This allows parallel buffering without lock contention from find_group_with_one_kmer.
315/// Classification is deferred to Thread 0 at the barrier.
316#[derive(Clone)]
317struct RawBufferedSegment {
318    /// Raw segment data (numeric encoding: 0=A, 1=C, 2=G, 3=T)
319    data: Vec<u8>,
320    /// Precomputed reverse complement of data
321    data_rc: Vec<u8>,
322    /// Front k-mer from segment detection
323    front_kmer: u64,
324    /// Back k-mer from segment detection
325    back_kmer: u64,
326    /// Is front k-mer in canonical direction?
327    front_kmer_is_dir: bool,
328    /// Is back k-mer in canonical direction?
329    back_kmer_is_dir: bool,
330    /// Sample name for sorting and registration
331    sample_name: String,
332    /// Contig name for sorting and registration
333    contig_name: String,
334    /// Segment index within contig (before split adjustment)
335    original_place: usize,
336    /// Sample processing priority (higher = earlier)
337    sample_priority: i32,
338}
339
340// Implement Ord for deterministic sorting at barrier
341impl PartialEq for RawBufferedSegment {
342    fn eq(&self, other: &Self) -> bool {
343        self.sample_name == other.sample_name
344            && self.contig_name == other.contig_name
345            && self.original_place == other.original_place
346    }
347}
348impl Eq for RawBufferedSegment {}
349
350impl PartialOrd for RawBufferedSegment {
351    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
352        Some(self.cmp(other))
353    }
354}
355
356impl Ord for RawBufferedSegment {
357    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
358        // Sort by: sample_name, contig_name, original_place (matches C++ AGC order)
359        match self.sample_name.cmp(&other.sample_name) {
360            std::cmp::Ordering::Equal => match self.contig_name.cmp(&other.contig_name) {
361                std::cmp::Ordering::Equal => self.original_place.cmp(&other.original_place),
362                other => other,
363            },
364            other => other,
365        }
366    }
367}
368
369// =============================================================================
370// C++ AGC-style segment buffering for parallel compression (4-phase pattern)
371// =============================================================================
372
373/// Per-group segment buffer with its own mutex (C++ AGC: list_seg_part_t)
374/// Each group has independent locking to allow parallel writes during Phase 1
375struct PerGroupSegments {
376    segments: Vec<BufferedSegment>,
377}
378
379/// Segment waiting to be assigned a group ID (C++ AGC: kk_seg_part_t)
380/// Used during Phase 1 when segment's k-mer pair doesn't exist in map_segments yet
381#[derive(Clone)]
382struct NewSegment {
383    /// K-mer pair (normalized: front <= back)
384    kmer_front: u64,
385    kmer_back: u64,
386    /// Sort key for deterministic processing: (sample_priority, sample_name, contig_name, seg_part_no)
387    sample_priority: i32,
388    sample_name: String,
389    contig_name: String,
390    seg_part_no: usize,
391    /// Segment data
392    data: Contig,
393    should_reverse: bool,
394}
395
396// Implement Ord for NewSegment to match C++ AGC BTreeSet ordering
397impl PartialEq for NewSegment {
398    fn eq(&self, other: &Self) -> bool {
399        self.sample_priority == other.sample_priority
400            && self.sample_name == other.sample_name
401            && self.contig_name == other.contig_name
402            && self.seg_part_no == other.seg_part_no
403    }
404}
405impl Eq for NewSegment {}
406
407impl PartialOrd for NewSegment {
408    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
409        Some(self.cmp(other))
410    }
411}
412
413impl Ord for NewSegment {
414    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
415        // Higher sample_priority processes first (descending)
416        match other.sample_priority.cmp(&self.sample_priority) {
417            std::cmp::Ordering::Equal => {
418                // Then by sample_name, contig_name, seg_part_no (ascending)
419                match self.sample_name.cmp(&other.sample_name) {
420                    std::cmp::Ordering::Equal => match self.contig_name.cmp(&other.contig_name) {
421                        std::cmp::Ordering::Equal => self.seg_part_no.cmp(&other.seg_part_no),
422                        other => other,
423                    },
424                    other => other,
425                }
426            }
427            other => other,
428        }
429    }
430}
431
432/// Two-tier segment buffering for C++ AGC 4-phase pattern (C++ AGC: CBufferedSegPart)
433///
434/// Phase 1 (PARALLEL): Workers add segments using add_known() or add_new()
435/// Phase 2 (SINGLE): Thread 0 calls process_new() to assign group IDs
436/// Phase 3 (PARALLEL): Workers call get_vec_id() + get_part() for atomic work-stealing
437/// Phase 4: Thread 0 calls clear() for cleanup
438struct BufferedSegPart {
439    /// KNOWN segments: indexed by group_id, each has own mutex
440    /// RwLock on Vec allows process_new() to resize while add_known() reads
441    /// C++ AGC: vector<list_seg_part_t> vl_seg_part
442    vl_seg_part: RwLock<Vec<Mutex<PerGroupSegments>>>,
443
444    /// NEW segments: BTreeSet for deterministic iteration
445    /// C++ AGC: set<kk_seg_part_t> s_seg_part
446    s_seg_part: Mutex<std::collections::BTreeSet<NewSegment>>,
447
448    /// Atomic counter for work distribution (descending from num_groups-1 to -1)
449    /// C++ AGC: atomic<int32_t> a_v_part_id
450    a_v_part_id: AtomicI32,
451}
452
453impl BufferedSegPart {
454    fn new(initial_groups: usize) -> Self {
455        Self {
456            vl_seg_part: RwLock::new(
457                (0..initial_groups)
458                    .map(|_| {
459                        Mutex::new(PerGroupSegments {
460                            segments: Vec::new(),
461                        })
462                    })
463                    .collect(),
464            ),
465            s_seg_part: Mutex::new(std::collections::BTreeSet::new()),
466            a_v_part_id: AtomicI32::new(-1),
467        }
468    }
469
470    /// Add segment to KNOWN group (has group_id)
471    /// C++ AGC: add_known() - read lock on Vec, per-group lock on Mutex
472    fn add_known(&self, group_id: u32, segment: BufferedSegment) {
473        let groups = self.vl_seg_part.read().unwrap();
474        if (group_id as usize) < groups.len() {
475            groups[group_id as usize]
476                .lock()
477                .unwrap()
478                .segments
479                .push(segment);
480        } else {
481            // This should NOT happen - indicates a bug in group management
482            eprintln!("WARNING: add_known dropping segment! group_id={} >= groups.len()={} sample={} contig={}",
483                group_id, groups.len(), segment.sample_name, segment.contig_name);
484        }
485    }
486
487    /// Ensure capacity for group_id (grow vl_seg_part if needed)
488    /// Called when immediately registering groups during barrier classification
489    fn ensure_capacity(&self, min_group_id: u32) {
490        let current_len = self.vl_seg_part.read().unwrap().len();
491        if (min_group_id as usize) >= current_len {
492            let mut groups = self.vl_seg_part.write().unwrap();
493            while groups.len() <= min_group_id as usize {
494                groups.push(Mutex::new(PerGroupSegments {
495                    segments: Vec::new(),
496                }));
497            }
498        }
499    }
500
501    /// Add segment with UNKNOWN group (new k-mer pair)
502    /// C++ AGC: add_new() - global s_seg_part lock (but brief)
503    fn add_new(&self, segment: NewSegment) {
504        self.s_seg_part.lock().unwrap().insert(segment);
505    }
506
507    /// Process NEW segments, assign group IDs deterministically
508    /// C++ AGC: process_new() - ONLY called by thread 0 after barrier
509    /// DETERMINISM FIX: This is the ONLY place where group IDs are assigned.
510    /// The parallel classification phase only determines k-mer pairs.
511    fn process_new(
512        &self,
513        map_segments: &mut BTreeMap<SegmentGroupKey, u32>,
514        next_group_id: &mut u32,
515        reference_segments: &mut BTreeMap<u32, Vec<u8>>,
516        terminators: &mut BTreeMap<u64, Vec<u64>>,
517    ) -> u32 {
518        use crate::segment::MISSING_KMER;
519
520        let mut s = self.s_seg_part.lock().unwrap();
521        let mut m_kmers: BTreeMap<(u64, u64), u32> = BTreeMap::new();
522        let mut new_count = 0u32;
523
524        // First pass: assign group IDs (deterministic - BTreeSet order)
525        for seg in s.iter() {
526            let key = (seg.kmer_front, seg.kmer_back);
527            if !m_kmers.contains_key(&key)
528                && !map_segments.contains_key(&SegmentGroupKey {
529                    kmer_front: seg.kmer_front,
530                    kmer_back: seg.kmer_back,
531                })
532            {
533                m_kmers.insert(key, *next_group_id);
534                *next_group_id += 1;
535                new_count += 1;
536            }
537        }
538
539        // Resize vl_seg_part for new groups (requires write lock)
540        {
541            let mut groups = self.vl_seg_part.write().unwrap();
542            while groups.len() < *next_group_id as usize {
543                groups.push(Mutex::new(PerGroupSegments {
544                    segments: Vec::new(),
545                }));
546            }
547        }
548
549        // Second pass: move segments to vl_seg_part and update map_segments
550        // Also update reference_segments and terminators for new groups
551        let segments: Vec<NewSegment> = s.iter().cloned().collect();
552        s.clear();
553        drop(s);
554
555        // Track which groups have had their reference set (first segment wins)
556        let mut refs_set: std::collections::HashSet<u32> = std::collections::HashSet::new();
557
558        for seg in segments {
559            let key = SegmentGroupKey {
560                kmer_front: seg.kmer_front,
561                kmer_back: seg.kmer_back,
562            };
563
564            // Get group_id from either existing map or newly assigned
565            let (group_id, is_new_group) = if let Some(&id) = map_segments.get(&key) {
566                (id, false)
567            } else if let Some(&id) = m_kmers.get(&(seg.kmer_front, seg.kmer_back)) {
568                // Insert into map_segments
569                map_segments.insert(key.clone(), id);
570                (id, true)
571            } else {
572                continue; // Should not happen
573            };
574
575            // Store reference data for new groups (first segment in sorted order wins)
576            if is_new_group && !refs_set.contains(&group_id) {
577                reference_segments.insert(group_id, seg.data.clone());
578                refs_set.insert(group_id);
579
580                // Update terminators for new groups (C++ AGC lines 1015-1025)
581                if key.kmer_front != MISSING_KMER && key.kmer_back != MISSING_KMER {
582                    // Add kmer_front -> kmer_back
583                    let front_vec = terminators.entry(key.kmer_front).or_insert_with(Vec::new);
584                    if !front_vec.contains(&key.kmer_back) {
585                        front_vec.push(key.kmer_back);
586                        front_vec.sort();
587                    }
588                    // Add kmer_back -> kmer_front (if different)
589                    if key.kmer_front != key.kmer_back {
590                        let back_vec = terminators.entry(key.kmer_back).or_insert_with(Vec::new);
591                        if !back_vec.contains(&key.kmer_front) {
592                            back_vec.push(key.kmer_front);
593                            back_vec.sort();
594                        }
595                    }
596                }
597            }
598
599            // Add to per-group buffer (uses read lock internally)
600            let buffered = BufferedSegment {
601                sample_name: seg.sample_name,
602                contig_name: seg.contig_name,
603                seg_part_no: seg.seg_part_no,
604                data: seg.data,
605                is_rev_comp: seg.should_reverse,
606                sample_priority: seg.sample_priority,
607            };
608            self.add_known(group_id, buffered);
609        }
610
611        new_count
612    }
613
614    /// Sort known segments within each group for deterministic output
615    fn sort_known(&self) {
616        let groups = self.vl_seg_part.read().unwrap();
617        for group in groups.iter() {
618            group.lock().unwrap().segments.sort();
619        }
620    }
621
622    /// Reset atomic counter for work distribution
623    /// C++ AGC: restart_read_vec()
624    fn restart_read_vec(&self) {
625        let groups = self.vl_seg_part.read().unwrap();
626        self.a_v_part_id
627            .store(groups.len() as i32 - 1, Ordering::SeqCst);
628    }
629
630    /// Get next group_id to process (atomic decrement for work-stealing)
631    /// C++ AGC: get_vec_id() - returns -1 when all groups claimed
632    fn get_vec_id(&self) -> i32 {
633        self.a_v_part_id.fetch_sub(1, Ordering::Relaxed)
634    }
635
636    /// Get and remove one segment from group (for store phase)
637    /// C++ AGC: get_part()
638    fn get_part(&self, group_id: u32) -> Option<BufferedSegment> {
639        let groups = self.vl_seg_part.read().unwrap();
640        if (group_id as usize) < groups.len() {
641            groups[group_id as usize].lock().unwrap().segments.pop()
642        } else {
643            None
644        }
645    }
646
647    /// Get all segments from a group (for batch processing)
648    fn drain_group(&self, group_id: u32) -> Vec<BufferedSegment> {
649        let groups = self.vl_seg_part.read().unwrap();
650        if (group_id as usize) < groups.len() {
651            std::mem::take(&mut groups[group_id as usize].lock().unwrap().segments)
652        } else {
653            Vec::new()
654        }
655    }
656
657    /// Clear all buffers after batch
658    /// C++ AGC: clear()
659    fn clear(&self) {
660        let groups = self.vl_seg_part.read().unwrap();
661        for group in groups.iter() {
662            group.lock().unwrap().segments.clear();
663        }
664        self.s_seg_part.lock().unwrap().clear();
665    }
666
667    /// Check if any segments are buffered
668    fn has_segments(&self) -> bool {
669        let groups = self.vl_seg_part.read().unwrap();
670        for group in groups.iter() {
671            if !group.lock().unwrap().segments.is_empty() {
672                return true;
673            }
674        }
675        !self.s_seg_part.lock().unwrap().is_empty()
676    }
677
678    /// Total number of groups
679    fn num_groups(&self) -> usize {
680        self.vl_seg_part.read().unwrap().len()
681    }
682}
683
684// =============================================================================
685// Parallel flush coordination for Phase 3 (atomic work-stealing)
686// =============================================================================
687
688/// State for coordinating parallel Phase 3 segment storage
689/// Workers atomically claim buffers via next_idx, then process independently
690struct ParallelFlushState {
691    /// Extracted buffers to flush. Each slot has its own Mutex for independent access.
692    /// RwLock allows parallel read access to the Vec during Phase 3, avoiding serialization.
693    /// Workers only need read access to the Vec to reach their claimed slot's inner Mutex.
694    buffers: RwLock<Vec<Mutex<Option<(SegmentGroupKey, SegmentGroupBuffer)>>>>,
695    /// Compression results from each buffer (stored by workers, written by Thread 0)
696    results: RwLock<Vec<Mutex<Option<FlushPackResult>>>>,
697    /// Atomic index for work-stealing (starts at len-1, decrements to -1)
698    next_idx: AtomicI32,
699}
700
701impl ParallelFlushState {
702    fn new() -> Self {
703        Self {
704            buffers: RwLock::new(Vec::new()),
705            results: RwLock::new(Vec::new()),
706            next_idx: AtomicI32::new(-1),
707        }
708    }
709
710    /// Set up buffers to flush and reset atomic counter (called by Thread 0 in Phase 2)
711    fn prepare(&self, extracted: Vec<(SegmentGroupKey, SegmentGroupBuffer)>) {
712        let len = extracted.len();
713        let mut buffers = self.buffers.write().unwrap();
714        *buffers = extracted
715            .into_iter()
716            .map(|(k, b)| Mutex::new(Some((k, b))))
717            .collect();
718        // Initialize results slots (one per buffer)
719        let mut results = self.results.write().unwrap();
720        *results = (0..len).map(|_| Mutex::new(None)).collect();
721        self.next_idx.store(len as i32 - 1, Ordering::SeqCst);
722    }
723
724    /// Claim next buffer index (returns None when all claimed)
725    /// Called by ALL workers in Phase 3
726    fn claim_next_idx(&self) -> Option<usize> {
727        let idx = self.next_idx.fetch_sub(1, Ordering::Relaxed);
728        if idx < 0 {
729            None
730        } else {
731            Some(idx as usize)
732        }
733    }
734
735    /// Get buffer at claimed index (READ lock on Vec, exclusive on slot)
736    /// Called by workers after claiming an index in Phase 3
737    fn get_buffer_at(&self, idx: usize) -> Option<(SegmentGroupKey, SegmentGroupBuffer)> {
738        let buffers = self.buffers.read().unwrap();
739        if idx < buffers.len() {
740            buffers[idx].lock().unwrap().take()
741        } else {
742            None
743        }
744    }
745
746    /// Put buffer back after processing (READ lock on Vec, exclusive on slot)
747    fn return_buffer(&self, idx: usize, key: SegmentGroupKey, buffer: SegmentGroupBuffer) {
748        let buffers = self.buffers.read().unwrap();
749        if idx < buffers.len() {
750            *buffers[idx].lock().unwrap() = Some((key, buffer));
751        }
752    }
753
754    /// Drain all buffers back (called by Thread 0 in Phase 4 - needs WRITE lock)
755    fn drain_buffers(&self) -> Vec<(SegmentGroupKey, SegmentGroupBuffer)> {
756        let mut buffers = self.buffers.write().unwrap();
757        let result: Vec<_> = buffers
758            .iter_mut()
759            .filter_map(|slot| slot.lock().unwrap().take())
760            .collect();
761        buffers.clear();
762        self.next_idx.store(-1, Ordering::SeqCst);
763        result
764    }
765
766    /// Store compression result at given index (READ lock on Vec, exclusive on slot)
767    fn store_result(&self, idx: usize, result: FlushPackResult) {
768        let results = self.results.read().unwrap();
769        if idx < results.len() {
770            *results[idx].lock().unwrap() = Some(result);
771        }
772    }
773
774    /// Drain all results sorted by group_id (called by Thread 0 for deterministic writes)
775    fn drain_results_sorted(&self) -> Vec<FlushPackResult> {
776        let mut results_lock = self.results.write().unwrap();
777        let mut all_results: Vec<FlushPackResult> = results_lock
778            .iter_mut()
779            .filter_map(|slot| slot.lock().unwrap().take())
780            .collect();
781        results_lock.clear();
782        // Sort by group_id for deterministic write order
783        all_results.sort_by_key(|r| r.group_id);
784        all_results
785    }
786}
787
788/// Parallel write buffer with per-stream mutexes (C++ AGC pattern: per-segment mutex)
789/// Workers operating on different streams don't contend at all.
790/// BTreeMap ensures flush writes in sorted stream_id order for determinism.
791struct ParallelWriteBuffer {
792    /// Per-stream buffers: BTreeMap for sorted iteration, each stream has its own Mutex
793    /// RwLock allows concurrent reader access to find the right stream's Mutex
794    streams: RwLock<BTreeMap<usize, Mutex<Vec<(Vec<u8>, u64)>>>>,
795}
796
797impl ParallelWriteBuffer {
798    fn new() -> Self {
799        Self {
800            streams: RwLock::new(BTreeMap::new()),
801        }
802    }
803
804    /// Buffer a write for a specific stream (only locks that stream's mutex)
805    /// Workers on different streams can call this concurrently without contention
806    fn buffer_write(&self, stream_id: usize, data: Vec<u8>, metadata: u64) {
807        // First try with read lock - most common case (stream already exists)
808        {
809            let streams = self.streams.read().unwrap();
810            if let Some(stream_mutex) = streams.get(&stream_id) {
811                stream_mutex.lock().unwrap().push((data, metadata));
812                return;
813            }
814        }
815        // Stream doesn't exist - need write lock to create it
816        {
817            let mut streams = self.streams.write().unwrap();
818            // Double-check (another thread may have created it)
819            streams
820                .entry(stream_id)
821                .or_insert_with(|| Mutex::new(Vec::new()))
822                .lock()
823                .unwrap()
824                .push((data, metadata));
825        }
826    }
827
828    /// Flush all buffered writes to archive in sorted stream_id order
829    /// Called by Thread 0 after barrier - ensures deterministic output
830    fn flush_to_archive(&self, archive: &mut Archive) -> Result<()> {
831        let streams = self.streams.read().unwrap();
832        // BTreeMap iterates in sorted key order (stream_id)
833        for (stream_id, stream_mutex) in streams.iter() {
834            let parts = stream_mutex.lock().unwrap();
835            for (data, metadata) in parts.iter() {
836                // Use buffered writes to reduce syscalls
837                archive.add_part_buffered(*stream_id, data.clone(), *metadata);
838            }
839        }
840        Ok(())
841    }
842
843    /// Clear all buffers (called after flush)
844    fn clear(&self) {
845        let mut streams = self.streams.write().unwrap();
846        for (_, stream_mutex) in streams.iter_mut() {
847            stream_mutex.lock().unwrap().clear();
848        }
849    }
850}
851
852/// Buffer for a segment group (packs 50 segments together)
853struct SegmentGroupBuffer {
854    group_id: u32,
855    stream_id: usize,                           // Delta stream for packed segments
856    ref_stream_id: usize,                       // Reference stream for first segment
857    reference_segment: Option<BufferedSegment>, // First segment (reference for LZ encoding)
858    segments: Vec<BufferedSegment>, // Up to PACK_CARDINALITY segments (EXCLUDING reference)
859    ref_written: bool,              // Whether reference has been written
860    segments_written: u32,          // Counter for delta segments written (NOT including reference)
861    lz_diff: Option<LZDiff>, // LZ encoder prepared once with reference, reused for all segments (matches C++ AGC CSegment::lz_diff)
862    // CRITICAL: Partial pack persistence to ensure pack alignment with decompression expectations
863    // Pack N must contain entries for in_group_ids (N*50)+1 to (N+1)*50
864    // These fields persist unique deltas until we have exactly 50 for a complete pack
865    pending_deltas: Vec<Vec<u8>>, // Unique deltas waiting to be written (< 50)
866    pending_delta_ids: Vec<u32>,  // in_group_ids for pending deltas (for deduplication)
867    raw_placeholder_written: bool, // Whether raw group placeholder has been written
868}
869
870impl SegmentGroupBuffer {
871    fn new(group_id: u32, stream_id: usize, ref_stream_id: usize) -> Self {
872        Self {
873            group_id,
874            stream_id,
875            ref_stream_id,
876            reference_segment: None,
877            segments: Vec::new(),
878            ref_written: false,
879            segments_written: 0,
880            lz_diff: None, // Prepared when reference is written (matches C++ AGC segment.cpp line 43)
881            pending_deltas: Vec::new(),
882            pending_delta_ids: Vec::new(),
883            raw_placeholder_written: false,
884        }
885    }
886
887    /// Check if this group should write a pack (has >= pack_size segments)
888    /// Matches C++ AGC's logic for writing packs when full
889    fn should_flush_pack(&self, pack_size: usize) -> bool {
890        // Count buffered segments (excluding reference which is handled separately)
891        self.segments.len() >= pack_size
892    }
893
894    /// Get current segment count (for pack-full detection)
895    fn segment_count(&self) -> usize {
896        self.segments.len()
897    }
898}
899
900/// Batch-local state for processing new segments
901/// Equivalent to C++ AGC's `m_kmers` local variable in process_new()
902/// This is RESET at each sample boundary to match C++ AGC behavior
903struct BatchState {
904    /// New segments discovered in THIS batch (not found in global registry)
905    /// Key: (front_kmer, back_kmer)
906    /// Value: Vec of segments with that k-mer pair
907    new_segments: BTreeMap<(u64, u64), Vec<PendingSegment>>,
908
909    /// Starting group ID for this batch (continues from global count)
910    next_group_id: u32,
911}
912
913impl BatchState {
914    fn new(starting_group_id: u32) -> Self {
915        BatchState {
916            new_segments: BTreeMap::new(),
917            next_group_id: starting_group_id,
918        }
919    }
920
921    /// Clear batch state for next sample (resets new_segments map)
922    /// next_group_id continues incrementing
923    fn clear(&mut self) {
924        self.new_segments.clear();
925        // next_group_id NOT reset - it continues from where it left off
926    }
927
928    /// Add a new segment to this batch
929    fn add_segment(&mut self, key: (u64, u64), segment: PendingSegment) {
930        self.new_segments
931            .entry(key)
932            .or_insert_with(Vec::new)
933            .push(segment);
934    }
935}
936
937/// Pack size (C++ AGC default)
938const PACK_CARDINALITY: usize = 50;
939/// First 16 groups are raw-only (no LZ encoding)
940const NO_RAW_GROUPS: u32 = 16;
941
942/// Streaming compressor with queue-based API
943///
944/// # Example
945/// ```no_run
946/// use ragc_core::{StreamingQueueCompressor, StreamingQueueConfig};
947/// use ahash::AHashSet;
948///
949/// # fn main() -> anyhow::Result<()> {
950/// let config = StreamingQueueConfig::default();
951/// let splitters = AHashSet::new(); // Normally from reference
952/// let mut compressor = StreamingQueueCompressor::with_splitters(
953///     "output.agc",
954///     config,
955///     splitters
956/// )?;
957///
958/// // Push sequences (blocks when queue is full - automatic backpressure!)
959/// # let sequences = vec![("sample1".to_string(), "chr1".to_string(), vec![0u8; 1000])];
960/// for (sample, contig_name, data) in sequences {
961///     compressor.push(sample, contig_name, data)?;
962/// }
963///
964/// // Finalize - waits for all compression to complete
965/// compressor.finalize()?;
966/// # Ok(())
967/// # }
968/// ```
969pub struct StreamingQueueCompressor {
970    queue: Arc<MemoryBoundedQueue<ContigTask>>,
971    workers: Vec<JoinHandle<Result<()>>>,
972    barrier: Arc<std::sync::Barrier>, // Synchronization barrier for batch boundaries (matches C++ AGC bar.arrive_and_wait())
973    collection: Arc<Mutex<CollectionV3>>,
974    splitters: Arc<AHashSet<u64>>,
975    config: StreamingQueueConfig,
976    archive: Arc<Mutex<Archive>>,
977    segment_groups: Arc<Mutex<BTreeMap<SegmentGroupKey, SegmentGroupBuffer>>>,
978    group_counter: Arc<AtomicU32>,     // Starts at 16 for LZ groups
979    raw_group_counter: Arc<AtomicU32>, // Round-robin counter for raw groups (0-15)
980    reference_sample_name: Arc<Mutex<Option<String>>>, // First sample becomes reference
981    // Segment splitting support (Phase 1)
982    map_segments: Arc<RwLock<BTreeMap<SegmentGroupKey, u32>>>, // (front, back) -> group_id (BTreeMap for deterministic iteration)
983    map_segments_terminators: Arc<RwLock<BTreeMap<u64, Vec<u64>>>>, // kmer -> [connected kmers] (BTreeMap for determinism)
984
985    // FFI Grouping Engine - C++ AGC-compatible group assignment
986    #[cfg(feature = "cpp_agc")]
987    grouping_engine: Arc<Mutex<crate::ragc_ffi::GroupingEngine>>,
988
989    // Persistent reference segment storage (matches C++ AGC v_segments)
990    // Stores reference segment data even after groups are flushed, enabling LZ cost estimation
991    // for subsequent samples (fixes multi-sample group fragmentation bug)
992    reference_segments: Arc<RwLock<BTreeMap<u32, Vec<u8>>>>, // group_id -> reference segment data (BTreeMap for determinism)
993
994    // Reference orientation tracking - stores is_rev_comp for each group's reference segment
995    // When a delta segment joins an existing group, it MUST use the same orientation as the reference
996    // to ensure LZ encoding works correctly (fixes ZERO_MATCH bug in Case 3 terminator segments)
997    reference_orientations: Arc<RwLock<BTreeMap<u32, bool>>>, // group_id -> reference is_rev_comp (BTreeMap for determinism)
998
999    // Track segment splits for renumbering subsequent segments
1000    // Maps (sample_name, contig_name, original_place) -> number of splits inserted before this position
1001    split_offsets: Arc<Mutex<BTreeMap<(String, String, usize), usize>>>, // BTreeMap for determinism
1002
1003    // Priority assignment for interleaved processing (matches C++ AGC)
1004    // Higher priority = processed first (sample1 > sample2 > sample3...)
1005    sample_priorities: Arc<RwLock<BTreeMap<String, i32>>>, // sample_name -> priority (BTreeMap for determinism)
1006
1007    // Track last sample to detect sample boundaries for sync token insertion
1008    last_sample_name: Arc<Mutex<Option<String>>>, // Last sample that was pushed
1009
1010    // Batch-local group assignment (matches C++ AGC m_kmers per-batch behavior)
1011    // When batch_samples reaches batch_size, we flush pending segments and clear batch-local state
1012    batch_samples: Arc<Mutex<HashSet<String>>>, // Samples in current batch (matches C++ AGC pack_cardinality batch)
1013    batch_local_groups: Arc<Mutex<BTreeMap<SegmentGroupKey, u32>>>, // Batch-local m_kmers equivalent (BTreeMap for deterministic iteration)
1014    batch_local_terminators: Arc<Mutex<BTreeMap<u64, Vec<u64>>>>, // Batch-local terminators (BTreeMap for determinism)
1015    pending_batch_segments: Arc<Mutex<Vec<PendingSegment>>>, // Buffer segments until batch boundary
1016    // Two-tier segment buffering for C++ AGC 4-phase parallel pattern
1017    buffered_seg_part: Arc<BufferedSegPart>, // Per-group buffers for parallel Phase 1
1018    // Fallback minimizers map for segments with no terminator match (matches C++ AGC map_fallback_minimizers)
1019    map_fallback_minimizers: Arc<Mutex<BTreeMap<u64, Vec<(u64, u64)>>>>, // kmer -> [(front, back)] candidate group keys (BTreeMap for determinism)
1020    next_priority: Arc<Mutex<i32>>, // Decreases for each new sample (starts at i32::MAX)
1021    next_sequence: Arc<std::sync::atomic::AtomicU64>, // Increases for each contig (FASTA order)
1022    global_contig_count: Arc<AtomicUsize>, // GLOBAL contig counter for synchronization (C++ AGC: cnt_contigs_in_sample)
1023
1024    // Deferred metadata streams - written AFTER segment data (C++ AGC compatibility)
1025    // C++ AGC writes segment data first, then metadata streams at the end
1026    deferred_file_type_info: (usize, Vec<u8>), // (stream_id, data)
1027    deferred_params: (usize, Vec<u8>),         // (stream_id, data)
1028    deferred_splitters: (usize, Vec<u8>),      // (stream_id, data)
1029    deferred_segment_splitters: (usize, Vec<u8>), // (stream_id, data)
1030
1031    // Dynamic splitter discovery for adaptive mode (matches C++ AGC find_new_splitters)
1032    // Stores reference k-mers to exclude when finding new splitters for non-reference contigs
1033    ref_singletons: Arc<Vec<u64>>, // Sorted for binary search - reference singleton k-mers (v_candidate_kmers)
1034    ref_duplicates: Arc<AHashSet<u64>>, // Reference duplicate k-mers (v_duplicated_kmers)
1035
1036    // Parallel Phase 3 state for atomic work-stealing (matches C++ AGC architecture)
1037    parallel_state: Arc<ParallelFlushState>,
1038
1039    // Per-stream write buffer for parallel Phase 3 (C++ AGC pattern: per-segment mutex)
1040    // Workers on different streams can buffer writes concurrently without contention
1041    write_buffer: Arc<ParallelWriteBuffer>,
1042
1043    // RAW segment buffers for deferred classification (parallel Phase 1 optimization)
1044    // PER-WORKER buffers eliminate contention: each worker pushes to its own buffer
1045    // Thread 0 drains all buffers at barrier for classification
1046    raw_segment_buffers: Arc<Vec<Mutex<Vec<RawBufferedSegment>>>>,
1047}
1048
1049impl StreamingQueueCompressor {
1050    /// Create a new streaming compressor with pre-computed splitters
1051    ///
1052    /// Use this when you already have splitters (e.g., from a reference genome)
1053    ///
1054    /// # Arguments
1055    /// * `output_path` - Path to output AGC archive
1056    /// * `config` - Compression configuration
1057    /// * `splitters` - Pre-computed splitter k-mers
1058    pub fn with_splitters(
1059        output_path: impl AsRef<Path>,
1060        config: StreamingQueueConfig,
1061        splitters: AHashSet<u64>,
1062    ) -> Result<Self> {
1063        // Call internal with empty ref data (no dynamic splitter discovery)
1064        Self::with_splitters_internal(
1065            output_path,
1066            config,
1067            splitters,
1068            Arc::new(Vec::new()),
1069            Arc::new(AHashSet::new()),
1070        )
1071    }
1072
1073    /// Internal constructor that accepts all splitter data
1074    fn with_splitters_internal(
1075        output_path: impl AsRef<Path>,
1076        config: StreamingQueueConfig,
1077        splitters: AHashSet<u64>,
1078        ref_singletons: Arc<Vec<u64>>,
1079        ref_duplicates: Arc<AHashSet<u64>>,
1080    ) -> Result<Self> {
1081        let output_path = output_path.as_ref();
1082        let archive_path = output_path.to_string_lossy().to_string();
1083
1084        if config.verbosity > 0 {
1085            eprintln!("Initializing streaming compressor...");
1086            eprintln!(
1087                "  Queue capacity: {} GB",
1088                config.queue_capacity / (1024 * 1024 * 1024)
1089            );
1090            eprintln!("  Worker threads: {}", config.num_threads);
1091            eprintln!("  Splitters: {}", splitters.len());
1092        }
1093
1094        // Create archive
1095        let mut archive = Archive::new_writer();
1096        archive.open(output_path)?;
1097
1098        // Create collection
1099        let mut collection = CollectionV3::new();
1100        collection.set_config(config.segment_size as u32, config.k as u32, None);
1101
1102        // CRITICAL: Register collection streams FIRST (C++ AGC compatibility)
1103        // C++ AGC expects collection-samples at stream 0, collection-contigs at 1, collection-details at 2
1104        collection.prepare_for_compression(&mut archive)?;
1105
1106        // DEFERRED METADATA STREAMS (C++ AGC compatibility)
1107        // C++ AGC writes segment data FIRST, then metadata streams at the END.
1108        // We register streams now but defer writing data until finalize().
1109
1110        // Prepare file_type_info data (defer write)
1111        let deferred_file_type_info = {
1112            let mut data = Vec::new();
1113            let append_str = |data: &mut Vec<u8>, s: &str| {
1114                data.extend_from_slice(s.as_bytes());
1115                data.push(0);
1116            };
1117
1118            append_str(&mut data, "producer");
1119            append_str(&mut data, "ragc");
1120            append_str(&mut data, "producer_version_major");
1121            append_str(&mut data, &ragc_common::AGC_FILE_MAJOR.to_string());
1122            append_str(&mut data, "producer_version_minor");
1123            append_str(&mut data, &ragc_common::AGC_FILE_MINOR.to_string());
1124            append_str(&mut data, "producer_version_build");
1125            append_str(&mut data, "0");
1126            append_str(&mut data, "file_version_major");
1127            append_str(&mut data, &ragc_common::AGC_FILE_MAJOR.to_string());
1128            append_str(&mut data, "file_version_minor");
1129            append_str(&mut data, &ragc_common::AGC_FILE_MINOR.to_string());
1130            append_str(&mut data, "comment");
1131            append_str(
1132                &mut data,
1133                &format!(
1134                    "RAGC v.{}.{}",
1135                    ragc_common::AGC_FILE_MAJOR,
1136                    ragc_common::AGC_FILE_MINOR
1137                ),
1138            );
1139
1140            let stream_id = archive.register_stream("file_type_info");
1141            // DEFERRED: archive.add_part(stream_id, &data, 7) will be called in finalize()
1142            (stream_id, data)
1143        };
1144
1145        // Prepare params data (defer write)
1146        let deferred_params = {
1147            let stream_id = archive.register_stream("params");
1148            let mut data = Vec::new();
1149            data.extend_from_slice(&(config.k as u32).to_le_bytes());
1150            data.extend_from_slice(&(config.min_match_len as u32).to_le_bytes());
1151            data.extend_from_slice(&50u32.to_le_bytes()); // pack_cardinality (default)
1152            data.extend_from_slice(&(config.segment_size as u32).to_le_bytes());
1153            // DEFERRED: archive.add_part(stream_id, &data, 0) will be called in finalize()
1154            (stream_id, data)
1155        };
1156
1157        // Prepare empty splitters stream (defer write)
1158        let deferred_splitters = {
1159            let stream_id = archive.register_stream("splitters");
1160            let data = Vec::new();
1161            // DEFERRED: archive.add_part(stream_id, &data, 0) will be called in finalize()
1162            (stream_id, data)
1163        };
1164
1165        // Prepare empty segment-splitters stream (defer write)
1166        let deferred_segment_splitters = {
1167            let stream_id = archive.register_stream("segment-splitters");
1168            let data = Vec::new();
1169            // DEFERRED: archive.add_part(stream_id, &data, 0) will be called in finalize()
1170            (stream_id, data)
1171        };
1172
1173        let collection = Arc::new(Mutex::new(collection));
1174        let archive = Arc::new(Mutex::new(archive));
1175
1176        // Create memory-bounded queue
1177        let queue = Arc::new(MemoryBoundedQueue::new(config.queue_capacity));
1178
1179        let splitters = Arc::new(splitters);
1180        // ref_singletons and ref_duplicates are passed as parameters to ensure workers
1181        // get the same Arc as stored in self (critical for dynamic splitter discovery)
1182
1183        // Segment grouping for LZ packing (using BTreeMap for better memory efficiency)
1184        let segment_groups = Arc::new(Mutex::new(BTreeMap::new()));
1185        let group_counter = Arc::new(AtomicU32::new(NO_RAW_GROUPS)); // Start at 16 (LZ groups), group 0 reserved for orphan segments
1186        let raw_group_counter = Arc::new(AtomicU32::new(0)); // Round-robin counter for raw groups (0-15)
1187        let reference_sample_name = Arc::new(Mutex::new(None)); // Shared across all workers
1188
1189        // Segment splitting support (Phase 1)
1190        // Initialize map_segments with (MISSING_KMER, MISSING_KMER) → 0
1191        // This matches C++ AGC line 2396: map_segments[make_pair(~0ull, ~0ull)] = 0
1192        // All raw segments (both k-mers missing) will map to group 0
1193        let mut initial_map_segments = BTreeMap::new();
1194        initial_map_segments.insert(
1195            SegmentGroupKey {
1196                kmer_front: MISSING_KMER,
1197                kmer_back: MISSING_KMER,
1198            },
1199            0,
1200        );
1201        let map_segments: Arc<RwLock<BTreeMap<SegmentGroupKey, u32>>> =
1202            Arc::new(RwLock::new(initial_map_segments));
1203        let map_segments_terminators: Arc<RwLock<BTreeMap<u64, Vec<u64>>>> =
1204            Arc::new(RwLock::new(BTreeMap::new()));
1205        let split_offsets: Arc<Mutex<BTreeMap<(String, String, usize), usize>>> =
1206            Arc::new(Mutex::new(BTreeMap::new()));
1207
1208        // Persistent reference segment storage (matches C++ AGC v_segments)
1209        let reference_segments: Arc<RwLock<BTreeMap<u32, Vec<u8>>>> =
1210            Arc::new(RwLock::new(BTreeMap::new()));
1211
1212        // Reference orientation tracking (fixes ZERO_MATCH bug in Case 3 terminator segments)
1213        let reference_orientations: Arc<RwLock<BTreeMap<u32, bool>>> =
1214            Arc::new(RwLock::new(BTreeMap::new()));
1215
1216        // FFI Grouping Engine - C++ AGC-compatible group assignment
1217        #[cfg(feature = "cpp_agc")]
1218        let grouping_engine = Arc::new(Mutex::new(crate::ragc_ffi::GroupingEngine::new(
1219            config.k as u32,
1220            NO_RAW_GROUPS, // Start group IDs at 16 (group 0 reserved for orphan segments)
1221        )));
1222
1223        // Priority tracking for interleaved processing (matches C++ AGC)
1224        let sample_priorities: Arc<RwLock<BTreeMap<String, i32>>> =
1225            Arc::new(RwLock::new(BTreeMap::new()));
1226        let last_sample_name: Arc<Mutex<Option<String>>> = Arc::new(Mutex::new(None)); // Track last sample for boundary detection
1227        let next_priority = Arc::new(Mutex::new(i32::MAX)); // Start high, decrease for each sample
1228        let next_sequence = Arc::new(std::sync::atomic::AtomicU64::new(0)); // Increases for each contig (FASTA order)
1229        let global_contig_count = Arc::new(AtomicUsize::new(0)); // GLOBAL counter across all samples (C++ AGC: cnt_contigs_in_sample)
1230
1231        // Batch-local group assignment (matches C++ AGC m_kmers per-batch behavior)
1232        let batch_samples: Arc<Mutex<HashSet<String>>> = Arc::new(Mutex::new(HashSet::new()));
1233        let batch_local_groups: Arc<Mutex<BTreeMap<SegmentGroupKey, u32>>> =
1234            Arc::new(Mutex::new(BTreeMap::new()));
1235        let batch_local_terminators: Arc<Mutex<BTreeMap<u64, Vec<u64>>>> =
1236            Arc::new(Mutex::new(BTreeMap::new()));
1237        let pending_batch_segments: Arc<Mutex<Vec<PendingSegment>>> =
1238            Arc::new(Mutex::new(Vec::new()));
1239        // Two-tier segment buffering for C++ AGC 4-phase parallel pattern
1240        let buffered_seg_part: Arc<BufferedSegPart> =
1241            Arc::new(BufferedSegPart::new(NO_RAW_GROUPS as usize));
1242        let map_fallback_minimizers: Arc<Mutex<BTreeMap<u64, Vec<(u64, u64)>>>> =
1243            Arc::new(Mutex::new(BTreeMap::new()));
1244
1245        // Initialize barrier for sample boundary synchronization (matches C++ AGC barrier)
1246        // All workers must synchronize at sample boundaries to ensure batch flush completes before processing new samples
1247        let barrier = Arc::new(std::sync::Barrier::new(config.num_threads));
1248
1249        // Parallel Phase 3 state for atomic work-stealing (matches C++ AGC architecture)
1250        let parallel_state = Arc::new(ParallelFlushState::new());
1251
1252        // Per-stream write buffer for parallel Phase 3 (C++ AGC pattern: per-segment mutex)
1253        // Workers on different streams can buffer writes concurrently without contention
1254        let write_buffer = Arc::new(ParallelWriteBuffer::new());
1255
1256        // RAW segment buffers for deferred classification (parallel Phase 1 optimization)
1257        // PER-WORKER buffers eliminate contention: each worker pushes to its own buffer
1258        let raw_segment_buffers: Arc<Vec<Mutex<Vec<RawBufferedSegment>>>> = Arc::new(
1259            (0..config.num_threads)
1260                .map(|_| Mutex::new(Vec::new()))
1261                .collect(),
1262        );
1263
1264        // Spawn worker threads
1265        let mut workers = Vec::new();
1266        for worker_id in 0..config.num_threads {
1267            let queue = Arc::clone(&queue);
1268            let collection = Arc::clone(&collection);
1269            let splitters = Arc::clone(&splitters);
1270            let ref_singletons = Arc::clone(&ref_singletons);
1271            let ref_duplicates = Arc::clone(&ref_duplicates);
1272            let archive = Arc::clone(&archive);
1273            let segment_groups = Arc::clone(&segment_groups);
1274            let group_counter = Arc::clone(&group_counter);
1275            let raw_group_counter = Arc::clone(&raw_group_counter);
1276            let reference_sample_name = Arc::clone(&reference_sample_name);
1277            let map_segments = Arc::clone(&map_segments);
1278            let map_segments_terminators = Arc::clone(&map_segments_terminators);
1279            let reference_segments = Arc::clone(&reference_segments);
1280            let reference_orientations = Arc::clone(&reference_orientations);
1281            let split_offsets = Arc::clone(&split_offsets);
1282            #[cfg(feature = "cpp_agc")]
1283            let grouping_engine = Arc::clone(&grouping_engine);
1284            let batch_samples = Arc::clone(&batch_samples);
1285            let batch_local_groups = Arc::clone(&batch_local_groups);
1286            let batch_local_terminators = Arc::clone(&batch_local_terminators);
1287            let pending_batch_segments = Arc::clone(&pending_batch_segments);
1288            let buffered_seg_part = Arc::clone(&buffered_seg_part);
1289            let map_fallback_minimizers = Arc::clone(&map_fallback_minimizers);
1290            let barrier = Arc::clone(&barrier);
1291            let parallel_state = Arc::clone(&parallel_state);
1292            let write_buffer = Arc::clone(&write_buffer);
1293            let raw_segment_buffers = Arc::clone(&raw_segment_buffers);
1294            let config = config.clone();
1295
1296            let handle = thread::spawn(move || {
1297                worker_thread(
1298                    worker_id,
1299                    queue,
1300                    collection,
1301                    splitters,
1302                    ref_singletons,
1303                    ref_duplicates,
1304                    archive,
1305                    segment_groups,
1306                    group_counter,
1307                    raw_group_counter,
1308                    reference_sample_name,
1309                    map_segments,
1310                    map_segments_terminators,
1311                    reference_segments,
1312                    reference_orientations,
1313                    split_offsets,
1314                    #[cfg(feature = "cpp_agc")]
1315                    grouping_engine,
1316                    batch_samples,
1317                    batch_local_groups,
1318                    batch_local_terminators,
1319                    pending_batch_segments,
1320                    buffered_seg_part,
1321                    map_fallback_minimizers,
1322                    raw_segment_buffers,
1323                    barrier,
1324                    parallel_state,
1325                    write_buffer,
1326                    config,
1327                )
1328            });
1329
1330            workers.push(handle);
1331        }
1332
1333        if config.verbosity > 0 {
1334            eprintln!("Ready to receive sequences!");
1335        }
1336
1337        Ok(Self {
1338            queue,
1339            workers,
1340            barrier,
1341            collection,
1342            splitters,
1343            config,
1344            archive,
1345            segment_groups,
1346            group_counter,
1347            raw_group_counter,
1348            reference_sample_name,
1349            map_segments,
1350            map_segments_terminators,
1351            #[cfg(feature = "cpp_agc")]
1352            grouping_engine,
1353            reference_segments,
1354            reference_orientations,
1355            split_offsets,
1356            sample_priorities,
1357            last_sample_name,
1358            next_priority,
1359            batch_samples,
1360            batch_local_groups,
1361            batch_local_terminators,
1362            pending_batch_segments,
1363            buffered_seg_part,
1364            map_fallback_minimizers,
1365            next_sequence,
1366            global_contig_count,
1367            // Deferred metadata streams (written at end for C++ AGC compatibility)
1368            deferred_file_type_info,
1369            deferred_params,
1370            deferred_splitters,
1371            deferred_segment_splitters,
1372            // Dynamic splitter discovery - MUST use the SAME Arcs passed to workers!
1373            // (empty by default - populated with_full_splitter_data)
1374            ref_singletons,
1375            ref_duplicates,
1376            // Parallel Phase 3 state
1377            parallel_state,
1378            // Per-stream write buffer
1379            write_buffer,
1380            // Raw segment buffers for deferred classification (per-worker)
1381            raw_segment_buffers,
1382        })
1383    }
1384
1385    /// Create a new streaming compressor with full splitter data for dynamic discovery
1386    ///
1387    /// This is the preferred constructor when using adaptive mode. It accepts:
1388    /// - `splitters`: Pre-computed splitter k-mers from reference (for initial segmentation)
1389    /// - `singletons`: All singleton k-mers from reference (for exclusion in find_new_splitters)
1390    /// - `duplicates`: All duplicate k-mers from reference (for exclusion in find_new_splitters)
1391    ///
1392    /// # Arguments
1393    /// * `output_path` - Path to output AGC archive
1394    /// * `config` - Compression configuration
1395    /// * `splitters` - Pre-computed splitter k-mers
1396    /// * `singletons` - Reference singleton k-mers (sorted Vec for binary search)
1397    /// * `duplicates` - Reference duplicate k-mers
1398    pub fn with_full_splitter_data(
1399        output_path: impl AsRef<Path>,
1400        config: StreamingQueueConfig,
1401        splitters: AHashSet<u64>,
1402        singletons: Vec<u64>,
1403        duplicates: AHashSet<u64>,
1404    ) -> Result<Self> {
1405        // Sort singletons for binary search before creating compressor
1406        let mut sorted_singletons = singletons;
1407        sorted_singletons.sort_unstable();
1408
1409        let verbosity = config.verbosity;
1410        let ref_singletons = Arc::new(sorted_singletons);
1411        let ref_duplicates = Arc::new(duplicates);
1412
1413        if verbosity > 0 {
1414            eprintln!(
1415                "  Dynamic splitter discovery enabled: {} ref singletons, {} ref duplicates",
1416                ref_singletons.len(),
1417                ref_duplicates.len()
1418            );
1419        }
1420
1421        // Call internal constructor with ref data so workers get the correct Arcs
1422        Self::with_splitters_internal(
1423            output_path,
1424            config,
1425            splitters,
1426            ref_singletons,
1427            ref_duplicates,
1428        )
1429    }
1430
1431    /// Create compressor and determine splitters from first contig
1432    ///
1433    /// **Note**: This requires at least one contig to be pushed before workers start.
1434    /// Consider using `with_splitters()` instead if you have a reference genome.
1435    pub fn new(output_path: impl AsRef<Path>, config: StreamingQueueConfig) -> Result<Self> {
1436        // Start with empty splitters - will be determined from first push
1437        Self::with_splitters(output_path, config, AHashSet::new())
1438    }
1439
1440    /// Push a contig to the compression queue
1441    ///
1442    /// **BLOCKS** if the queue is full (automatic backpressure!)
1443    ///
1444    /// # Arguments
1445    /// * `sample_name` - Name of the sample
1446    /// * `contig_name` - Name of the contig
1447    /// * `data` - Contig sequence data (Vec<u8>)
1448    ///
1449    /// # Example
1450    /// ```no_run
1451    /// # use ragc_core::{StreamingQueueCompressor, StreamingQueueConfig};
1452    /// # use ahash::AHashSet;
1453    /// # let mut compressor = StreamingQueueCompressor::with_splitters("out.agc", StreamingQueueConfig::default(), AHashSet::new())?;
1454    /// compressor.push("sample1".to_string(), "chr1".to_string(), vec![b'A', b'T', b'G', b'C'])?;
1455    /// # Ok::<(), anyhow::Error>(())
1456    /// ```
1457    pub fn push(&mut self, sample_name: String, contig_name: String, data: Contig) -> Result<()> {
1458        // If no splitters yet, determine from this contig
1459        if self.splitters.is_empty() && self.workers.is_empty() {
1460            if self.config.verbosity > 0 {
1461                eprintln!("Determining splitters from first contig...");
1462            }
1463
1464            let (splitters, _, _) =
1465                determine_splitters(&[data.clone()], self.config.k, self.config.segment_size);
1466
1467            if self.config.verbosity > 0 {
1468                eprintln!("Found {} splitters", splitters.len());
1469            }
1470
1471            // Update splitters and spawn workers
1472            self.splitters = Arc::new(splitters);
1473
1474            // Spawn workers now that we have splitters
1475            for worker_id in 0..self.config.num_threads {
1476                let queue = Arc::clone(&self.queue);
1477                let collection = Arc::clone(&self.collection);
1478                let splitters = Arc::clone(&self.splitters);
1479                let ref_singletons = Arc::clone(&self.ref_singletons);
1480                let ref_duplicates = Arc::clone(&self.ref_duplicates);
1481                let archive = Arc::clone(&self.archive);
1482                let segment_groups = Arc::clone(&self.segment_groups);
1483                let group_counter = Arc::clone(&self.group_counter);
1484                let raw_group_counter = Arc::clone(&self.raw_group_counter);
1485                let reference_sample_name = Arc::clone(&self.reference_sample_name);
1486                let map_segments = Arc::clone(&self.map_segments);
1487                let map_segments_terminators = Arc::clone(&self.map_segments_terminators);
1488                let reference_segments = Arc::clone(&self.reference_segments);
1489                let reference_orientations = Arc::clone(&self.reference_orientations);
1490                let split_offsets = Arc::clone(&self.split_offsets);
1491                #[cfg(feature = "cpp_agc")]
1492                let grouping_engine = Arc::clone(&self.grouping_engine);
1493                let batch_samples = Arc::clone(&self.batch_samples);
1494                let batch_local_groups = Arc::clone(&self.batch_local_groups);
1495                let batch_local_terminators = Arc::clone(&self.batch_local_terminators);
1496                let pending_batch_segments = Arc::clone(&self.pending_batch_segments);
1497                let buffered_seg_part = Arc::clone(&self.buffered_seg_part);
1498                let map_fallback_minimizers = Arc::clone(&self.map_fallback_minimizers);
1499                let raw_segment_buffers = Arc::clone(&self.raw_segment_buffers);
1500                let barrier = Arc::clone(&self.barrier);
1501                let parallel_state = Arc::clone(&self.parallel_state);
1502                let write_buffer = Arc::clone(&self.write_buffer);
1503                let config = self.config.clone();
1504
1505                let handle = thread::spawn(move || {
1506                    worker_thread(
1507                        worker_id,
1508                        queue,
1509                        collection,
1510                        splitters,
1511                        ref_singletons,
1512                        ref_duplicates,
1513                        archive,
1514                        segment_groups,
1515                        group_counter,
1516                        raw_group_counter,
1517                        reference_sample_name,
1518                        map_segments,
1519                        map_segments_terminators,
1520                        reference_segments,
1521                        reference_orientations,
1522                        split_offsets,
1523                        #[cfg(feature = "cpp_agc")]
1524                        grouping_engine,
1525                        batch_samples,
1526                        batch_local_groups,
1527                        batch_local_terminators,
1528                        pending_batch_segments,
1529                        buffered_seg_part,
1530                        map_fallback_minimizers,
1531                        raw_segment_buffers,
1532                        barrier,
1533                        parallel_state,
1534                        write_buffer,
1535                        config,
1536                    )
1537                });
1538
1539                self.workers.push(handle);
1540            }
1541
1542            if self.config.verbosity > 0 {
1543                eprintln!("Workers spawned and ready!");
1544            }
1545        }
1546
1547        // Register contig in collection
1548        {
1549            let mut collection = self.collection.lock().unwrap();
1550            collection
1551                .register_sample_contig(&sample_name, &contig_name)
1552                .context("Failed to register contig")?;
1553        }
1554
1555        // Set first sample as reference (multi-file mode)
1556        {
1557            let mut ref_sample = self.reference_sample_name.lock().unwrap();
1558            if ref_sample.is_none() {
1559                if self.config.verbosity > 0 {
1560                    eprintln!("Using first sample ({}) as reference", sample_name);
1561                }
1562                *ref_sample = Some(sample_name.clone());
1563            }
1564        }
1565
1566        // Calculate task size
1567        let task_size = data.len();
1568
1569        // Get sequence number for FASTA ordering (lower = earlier = higher priority)
1570        let sequence = self
1571            .next_sequence
1572            .fetch_add(1, std::sync::atomic::Ordering::SeqCst);
1573
1574        // Get or assign priority for this sample (matches C++ AGC priority queue)
1575        // Higher priority = processed first (decreases for each new sample)
1576        // C++ AGC also decrements priority every 50 contigs WITHIN a sample (max_no_contigs_before_synchronization)
1577        let sample_priority = {
1578            let mut priorities = self.sample_priorities.write().unwrap();
1579            let current_priority = *priorities.entry(sample_name.clone()).or_insert_with(|| {
1580                // First time seeing this sample - assign new priority
1581                let mut next_p = self.next_priority.lock().unwrap();
1582                let priority = *next_p;
1583                *next_p -= 1; // Decrement for next sample (C++ AGC uses --sample_priority)
1584                priority
1585            });
1586
1587            // Track GLOBAL contig count and insert sync tokens every 50 contigs (pack_cardinality)
1588            // C++ AGC: if (++cnt_contigs_in_sample >= max_no_contigs_before_synchronization)
1589            // NOTE: Despite the name, C++ AGC's cnt_contigs_in_sample is GLOBAL, not per-sample!
1590            // FIX 5: Only send PACK_BOUNDARY sync tokens in concatenated mode (single file)
1591            // In non-concatenated mode (multiple files), only SAMPLE_BOUNDARY sync tokens are sent
1592            let count = self
1593                .global_contig_count
1594                .fetch_add(1, std::sync::atomic::Ordering::SeqCst);
1595            let need_sync =
1596                self.config.concatenated_genomes && (count + 1) % self.config.pack_size == 0;
1597
1598            if need_sync {
1599                // Reached synchronization point (every 50 contigs GLOBALLY)
1600                // C++ AGC does: cnt_contigs_in_sample = 0; --sample_priority;
1601                if let Some(priority) = priorities.get_mut(&sample_name) {
1602                    *priority -= 1;
1603                }
1604
1605                // Get the NEW priority (after decrement) for sync tokens
1606                let new_priority = *priorities.get(&sample_name).unwrap();
1607
1608                // Drop locks before inserting sync tokens to avoid deadlock
1609                drop(priorities);
1610
1611                // Insert sync tokens (matches C++ AGC EmplaceManyNoCost)
1612                // CRITICAL: Sync tokens must have HIGHER priority than subsequent contigs
1613                // to ensure they're processed before any contigs with the new_priority.
1614                if self.config.verbosity > 0 {
1615                    eprintln!(
1616                        "PACK_BOUNDARY: Inserting {} sync tokens after {} contigs (global count)",
1617                        self.config.num_threads,
1618                        count + 1
1619                    );
1620                }
1621
1622                for _ in 0..self.config.num_threads {
1623                    let sync_token = ContigTask {
1624                        sample_name: sample_name.clone(),
1625                        contig_name: String::from("<SYNC>"),
1626                        data: Vec::new(),
1627                        // Use large priority boost to ensure sync tokens are processed BEFORE any contigs
1628                        // With +1, contigs with same priority but higher cost were being popped first
1629                        // This caused barrier deadlock when some workers exited before others got sync tokens
1630                        sample_priority: new_priority + 1_000_000,
1631                        cost: 0,
1632                        sequence,
1633                        is_sync_token: true,
1634                    };
1635                    self.queue.push(sync_token, 0)?;
1636                }
1637
1638                // Return NEW priority for subsequent contigs
1639                new_priority
1640            } else {
1641                current_priority // Use priority BEFORE potential decrement (this contig uses current priority)
1642            }
1643        };
1644
1645        // Insert sync tokens at sample boundaries (matches C++ AGC registration tokens)
1646        // OPTIMIZATION: In multi-file mode, SKIP per-sample sync tokens for better parallelism
1647        // This batches all samples together - sync only happens at finalization
1648        // Set RAGC_SYNC_PER_SAMPLE=1 to force per-sample sync (matches old behavior)
1649        {
1650            let mut last_sample = self.last_sample_name.lock().unwrap();
1651            if let Some(ref last) = *last_sample {
1652                if last != &sample_name {
1653                    // Sample boundary detected
1654                    // Only insert sync tokens if forced by env var (for debugging/compatibility)
1655                    let force_sync = std::env::var("RAGC_SYNC_PER_SAMPLE")
1656                        .map(|v| v == "1")
1657                        .unwrap_or(false);
1658
1659                    if force_sync {
1660                        if self.config.verbosity > 0 {
1661                            eprintln!(
1662                                "SAMPLE_BOUNDARY: Inserting {} sync tokens (transitioning from {} to {})",
1663                                self.config.num_threads, last, sample_name
1664                            );
1665                        }
1666
1667                        // Insert num_threads sync tokens (matches C++ AGC EmplaceManyNoCost)
1668                        // All workers must pop a token and synchronize before processing new sample
1669                        // CRITICAL: Sync tokens must have MUCH HIGHER priority than any contigs
1670                        // to ensure they're pulled and processed BEFORE any contigs.
1671                        // Use large priority boost (+1_000_000) to overcome cost-based tie-breaking
1672                        // which was causing contigs to be popped before sync tokens at same priority.
1673                        for _ in 0..self.config.num_threads {
1674                            let sync_token = ContigTask {
1675                                sample_name: sample_name.clone(),
1676                                contig_name: String::from("<SYNC>"),
1677                                data: Vec::new(), // Empty data for sync token
1678                                sample_priority: sample_priority + 1_000_000, // Much higher priority than any contigs
1679                                cost: 0, // No cost for sync tokens
1680                                sequence,
1681                                is_sync_token: true,
1682                            };
1683                            self.queue.push(sync_token, 0)?; // 0 size for sync tokens
1684                        }
1685                    } else if self.config.verbosity > 1 {
1686                        eprintln!(
1687                            "SAMPLE_BOUNDARY: SKIPPING sync tokens (multi-file batching: {} -> {})",
1688                            last, sample_name
1689                        );
1690                    }
1691                }
1692            }
1693            // Update last sample name
1694            *last_sample = Some(sample_name.clone());
1695        }
1696
1697        // Create task with priority information
1698        // NOTE: sequence is used for FASTA ordering (lower = processed first)
1699        let cost = data.len(); // C++ AGC: auto cost = contig.size()
1700        let task = ContigTask {
1701            sample_name: sample_name.clone(),
1702            contig_name,
1703            data,
1704            sample_priority,
1705            cost,
1706            sequence,
1707            is_sync_token: false, // Normal contig task, not a sync token
1708        };
1709
1710        // Push to queue (BLOCKS if queue is full!)
1711        // Queue is now a priority queue - highest priority processed first
1712        // eprintln!("[RAGC PUSH] sample={} contig={} priority={} cost={} sequence={}",
1713        //           &task.sample_name, &task.contig_name, task.sample_priority, task.cost, task.sequence);
1714        self.queue
1715            .push(task, task_size)
1716            .context("Failed to push to queue")?;
1717
1718        Ok(())
1719    }
1720
1721    /// Finalize compression
1722    ///
1723    /// This will:
1724    /// 1. Close the queue (no more pushes allowed)
1725    /// 2. Wait for all worker threads to finish processing
1726    /// 3. Write metadata to the archive
1727    /// 4. Close the archive file
1728    ///
1729    /// # Example
1730    /// ```no_run
1731    /// # use ragc_core::{StreamingQueueCompressor, StreamingQueueConfig};
1732    /// # use ahash::AHashSet;
1733    /// # let mut compressor = StreamingQueueCompressor::with_splitters("out.agc", StreamingQueueConfig::default(), AHashSet::new())?;
1734    /// // ... push sequences ...
1735    /// compressor.finalize()?;
1736    /// # Ok::<(), anyhow::Error>(())
1737    /// ```
1738    pub fn drain(&self) -> Result<()> {
1739        if self.config.verbosity > 0 {
1740            eprintln!(
1741                "Draining queue (waiting for {} items to be processed)...",
1742                self.queue.len()
1743            );
1744        }
1745
1746        // Wait for queue to empty
1747        // Poll every 100ms until queue is empty
1748        while self.queue.len() > 0 {
1749            std::thread::sleep(std::time::Duration::from_millis(100));
1750        }
1751
1752        if self.config.verbosity > 0 {
1753            eprintln!("Queue drained - all queued contigs processed");
1754        }
1755
1756        Ok(())
1757    }
1758
1759    /// Insert sync tokens to trigger incremental compression of buffered segments.
1760    /// Call this after pushing a batch of samples to process them incrementally
1761    /// instead of waiting for finalize().
1762    pub fn sync_and_flush(&self, sample_name: &str) -> Result<()> {
1763        // Insert sync tokens for each worker
1764        let sequence = self
1765            .next_sequence
1766            .fetch_add(1, std::sync::atomic::Ordering::SeqCst);
1767
1768        for _ in 0..self.config.num_threads {
1769            let sync_token = ContigTask {
1770                sample_name: format!("<SYNC:{}>", sample_name),
1771                contig_name: String::from("<SYNC>"),
1772                data: Vec::new(),
1773                sample_priority: 1_000_000_i32, // High priority = processed after pending contigs
1774                cost: 0,
1775                sequence,
1776                is_sync_token: true,
1777            };
1778            self.queue.push(sync_token, 0)?;
1779        }
1780
1781        // Wait for sync tokens to be processed (queue empty)
1782        while self.queue.len() > 0 {
1783            std::thread::sleep(std::time::Duration::from_millis(10));
1784        }
1785
1786        Ok(())
1787    }
1788
1789    pub fn finalize(self) -> Result<()> {
1790        if self.config.verbosity > 0 {
1791            eprintln!("Finalizing compression...");
1792        }
1793
1794        // CRITICAL: Insert FINAL sync tokens before closing queue
1795        // This ensures buffered_seg_part data is processed and flushed
1796        // (matches C++ AGC line 2236-2244: final sync at end of input)
1797        if self.config.verbosity > 0 {
1798            eprintln!(
1799                "  Inserting {} final sync tokens...",
1800                self.config.num_threads
1801            );
1802        }
1803
1804        // Use sequence 0 and high priority to ensure sync tokens are processed last
1805        let sequence = 0;
1806
1807        for _ in 0..self.config.num_threads {
1808            let sync_token = ContigTask {
1809                sample_name: String::from("<FINAL>"),
1810                contig_name: String::from("<SYNC>"),
1811                data: Vec::new(),
1812                sample_priority: 1_000_000_i32, // Very high priority = processed after all real contigs
1813                cost: 0,
1814                sequence,
1815                is_sync_token: true,
1816            };
1817            self.queue.push(sync_token, 0)?;
1818        }
1819
1820        if self.config.verbosity > 0 {
1821            eprintln!("  Closing queue...");
1822        }
1823
1824        // Close queue - no more pushes allowed
1825        self.queue.close();
1826
1827        if self.config.verbosity > 0 {
1828            eprintln!("  Waiting for {} workers to finish...", self.workers.len());
1829        }
1830
1831        let wait_start = std::time::Instant::now();
1832        // Wait for all workers to finish
1833        for (i, handle) in self.workers.into_iter().enumerate() {
1834            handle
1835                .join()
1836                .expect("Worker thread panicked")
1837                .with_context(|| format!("Worker {} failed", i))?;
1838        }
1839
1840        if self.config.verbosity > 0 {
1841            eprintln!(
1842                "FINALIZE_TIMING: Wait for workers took {:?}",
1843                wait_start.elapsed()
1844            );
1845            eprintln!("All workers finished!");
1846            eprintln!("Flushing remaining segment packs...");
1847        }
1848
1849        // Flush all remaining partial packs using PARALLEL compression
1850        let flush_start = std::time::Instant::now();
1851        {
1852            use crate::segment_compression::compress_segment_configured;
1853            use rayon::prelude::*;
1854
1855            let mut groups = self.segment_groups.lock().unwrap();
1856            let num_groups = groups.len();
1857
1858            // Phase 1: Flush any groups with pending segments (rare, usually 0-1)
1859            let phase1_start = std::time::Instant::now();
1860            let mut phase1_count = 0;
1861            for (key, buffer) in groups.iter_mut() {
1862                if !buffer.segments.is_empty() || !buffer.ref_written {
1863                    phase1_count += 1;
1864                    if self.config.verbosity > 1 {
1865                        eprintln!(
1866                            "Flushing group {} with {} segments (k-mers: {:#x}, {:#x})",
1867                            buffer.group_id,
1868                            buffer.segments.len(),
1869                            key.kmer_front,
1870                            key.kmer_back
1871                        );
1872                    }
1873                    flush_pack(
1874                        buffer,
1875                        &self.collection,
1876                        &self.archive,
1877                        &self.config,
1878                        &self.reference_segments,
1879                    )
1880                    .context("Failed to flush remaining pack")?;
1881                }
1882            }
1883            if self.config.verbosity > 0 {
1884                eprintln!(
1885                    "FLUSH_PHASE1: {} groups with pending segments, took {:?}",
1886                    phase1_count,
1887                    phase1_start.elapsed()
1888                );
1889            }
1890
1891            // Phase 2: Collect and PARALLEL compress pending_deltas
1892            // Each entry: (stream_id, raw_data, compressed_data, raw_size)
1893            struct PartialPackData {
1894                stream_id: usize,
1895                raw_data: Vec<u8>,
1896                compressed: Vec<u8>,
1897                raw_size: usize,
1898                use_compressed: bool,
1899            }
1900
1901            let compression_level = self.config.compression_level;
1902            let verbosity = self.config.verbosity;
1903
1904            // Extract work items from groups
1905            let phase2_start = std::time::Instant::now();
1906            let work_items: Vec<_> = groups
1907                .iter_mut()
1908                .filter(|(_, buffer)| !buffer.pending_deltas.is_empty())
1909                .map(|(_, buffer)| {
1910                    // FIX: Raw groups are 0-15 (group_id < 16), not just group 0
1911                    let use_lz_encoding = buffer.group_id >= NO_RAW_GROUPS;
1912                    let mut packed_data = Vec::new();
1913
1914                    if !use_lz_encoding && !buffer.raw_placeholder_written {
1915                        packed_data.push(0x7f);
1916                        packed_data.push(CONTIG_SEPARATOR);
1917                    }
1918
1919                    for delta in buffer.pending_deltas.iter() {
1920                        packed_data.extend_from_slice(delta);
1921                        packed_data.push(CONTIG_SEPARATOR);
1922                    }
1923
1924                    let stream_id = buffer.stream_id as usize;
1925                    let group_id = buffer.group_id;
1926                    let delta_count = buffer.pending_deltas.len();
1927
1928                    buffer.pending_deltas.clear();
1929                    buffer.pending_delta_ids.clear();
1930
1931                    (stream_id, packed_data, group_id, delta_count)
1932                })
1933                .collect();
1934
1935            let work_items_count = work_items.len();
1936            if self.config.verbosity > 0 {
1937                eprintln!(
1938                    "FLUSH_PHASE2a: Collected {} work items, took {:?}",
1939                    work_items_count,
1940                    phase2_start.elapsed()
1941                );
1942            }
1943
1944            // Parallel compression using rayon
1945            // Use full compression level for final partial packs to match C++ AGC output.
1946            // Previously capped at level 9 for speed, but this caused 15% larger archives
1947            // when most compression happens in finalize (e.g., with per-sample sync).
1948            let partial_compression_level = compression_level;
1949            let compress_start = std::time::Instant::now();
1950            let compressed_packs: Vec<PartialPackData> = work_items
1951                .into_par_iter()
1952                .filter_map(|(stream_id, packed_data, group_id, delta_count)| {
1953                    if packed_data.is_empty() {
1954                        return None;
1955                    }
1956
1957                    let raw_size = packed_data.len();
1958                    let mut compressed = match compress_segment_configured(
1959                        &packed_data,
1960                        partial_compression_level,
1961                    ) {
1962                        Ok(c) => c,
1963                        Err(e) => {
1964                            eprintln!(
1965                                "Error compressing final partial pack for group {}: {}",
1966                                group_id, e
1967                            );
1968                            return None;
1969                        }
1970                    };
1971                    compressed.push(0); // Marker 0 = plain ZSTD
1972
1973                    let use_compressed = compressed.len() < raw_size;
1974
1975                    if verbosity > 1 {
1976                        eprintln!(
1977                            "  Compressed final partial pack for group {} with {} deltas",
1978                            group_id, delta_count
1979                        );
1980                    }
1981
1982                    Some(PartialPackData {
1983                        stream_id,
1984                        raw_data: packed_data,
1985                        compressed,
1986                        raw_size,
1987                        use_compressed,
1988                    })
1989                })
1990                .collect();
1991
1992            if self.config.verbosity > 0 {
1993                eprintln!(
1994                    "FLUSH_PHASE2b: Parallel compression of {} packs, took {:?}",
1995                    compressed_packs.len(),
1996                    compress_start.elapsed()
1997                );
1998            }
1999
2000            // Phase 3: Sequential writes to archive (sorted by stream_id for determinism)
2001            let phase3_start = std::time::Instant::now();
2002            let mut sorted_packs = compressed_packs;
2003            sorted_packs.sort_by_key(|p| p.stream_id);
2004
2005            let mut arch = self.archive.lock().unwrap();
2006            for pack in sorted_packs {
2007                if pack.use_compressed {
2008                    // Use buffered writes to reduce syscalls
2009                    arch.add_part_buffered(
2010                        pack.stream_id,
2011                        pack.compressed.clone(),
2012                        pack.raw_size as u64,
2013                    );
2014                } else {
2015                    arch.add_part_buffered(pack.stream_id, pack.raw_data.clone(), 0);
2016                }
2017            }
2018            drop(arch);
2019
2020            if self.config.verbosity > 0 {
2021                eprintln!(
2022                    "FLUSH_PHASE3: Sequential writes, took {:?}",
2023                    phase3_start.elapsed()
2024                );
2025                eprintln!("Flushed {} segment groups", num_groups);
2026                eprintln!("FINALIZE_TIMING: Flush took {:?}", flush_start.elapsed());
2027            }
2028        }
2029
2030        if self.config.verbosity > 0 {
2031            eprintln!("Writing metadata...");
2032        }
2033
2034        // Get total sample count for metadata writing
2035        let num_samples = {
2036            let coll = self.collection.lock().unwrap();
2037            coll.get_no_samples()
2038        };
2039
2040        // Write collection metadata to archive
2041        {
2042            let mut archive = self.archive.lock().unwrap();
2043            let mut collection = self.collection.lock().unwrap();
2044
2045            // DEFERRED METADATA WRITES (C++ AGC compatibility)
2046            // C++ AGC writes metadata streams AFTER segment data, in this order:
2047            // 1. params
2048            // 2. splitters
2049            // 3. segment-splitters
2050            // 4. collection metadata (samples, contigs, details)
2051            // 5. file_type_info
2052            let (params_stream_id, params_data) = &self.deferred_params;
2053            archive.add_part_buffered(*params_stream_id, params_data.clone(), 0);
2054
2055            let (splitters_stream_id, splitters_data) = &self.deferred_splitters;
2056            archive.add_part_buffered(*splitters_stream_id, splitters_data.clone(), 0);
2057
2058            let (seg_splitters_stream_id, seg_splitters_data) = &self.deferred_segment_splitters;
2059            archive.add_part_buffered(*seg_splitters_stream_id, seg_splitters_data.clone(), 0);
2060
2061            // Write sample names
2062            collection
2063                .store_batch_sample_names(&mut archive)
2064                .context("Failed to write sample names")?;
2065
2066            // Write contig names and segment details in batches of 50
2067            // (matches C++ AGC pack_cardinality default)
2068            const PACK_CARDINALITY: usize = 50;
2069            let mut i = 0;
2070            while i < num_samples {
2071                let batch_end = (i + PACK_CARDINALITY).min(num_samples);
2072                collection
2073                    .store_contig_batch(&mut archive, i, batch_end)
2074                    .context("Failed to write contig batch")?;
2075                i = batch_end;
2076            }
2077
2078            // Write file_type_info LAST (matches C++ AGC store_file_type_info order)
2079            let (file_type_info_stream_id, file_type_info_data) = &self.deferred_file_type_info;
2080            archive.add_part_buffered(*file_type_info_stream_id, file_type_info_data.clone(), 7);
2081
2082            // Flush all buffered writes to disk in one batch (reduces syscalls from ~200 to ~1)
2083            archive
2084                .flush_buffers()
2085                .context("Failed to flush archive buffers")?;
2086
2087            if self.config.verbosity > 0 {
2088                eprintln!("Collection metadata written successfully");
2089            }
2090
2091            // Close archive (writes footer)
2092            archive.close().context("Failed to close archive")?;
2093        }
2094
2095        if self.config.verbosity > 0 {
2096            eprintln!("Compression complete!");
2097        }
2098
2099        Ok(())
2100    }
2101
2102    /// Get current queue statistics
2103    pub fn queue_stats(&self) -> QueueStats {
2104        QueueStats {
2105            current_size_bytes: self.queue.current_size(),
2106            current_items: self.queue.len(),
2107            capacity_bytes: self.queue.capacity(),
2108            is_closed: self.queue.is_closed(),
2109        }
2110    }
2111}
2112
2113/// Queue statistics
2114#[derive(Debug, Clone)]
2115pub struct QueueStats {
2116    pub current_size_bytes: usize,
2117    pub current_items: usize,
2118    pub capacity_bytes: usize,
2119    pub is_closed: bool,
2120}
2121
2122/// Flush a complete pack of segments (compress, LZ encode, write to archive)
2123/// Pre-compressed data ready for archive write (no locks needed during compression)
2124struct PreCompressedPart {
2125    stream_id: usize,
2126    data: Vec<u8>,
2127    metadata: u64,
2128}
2129
2130/// Segment registration data for collection (batched for single lock acquisition)
2131struct SegmentRegistration {
2132    sample_name: String,
2133    contig_name: String,
2134    seg_part_no: usize,
2135    group_id: u32,
2136    in_group_id: u32,
2137    is_rev_comp: bool,
2138    raw_length: u32,
2139}
2140
2141/// Result of parallel compression phase (for deterministic sequential writes)
2142/// Workers produce these in parallel, then Thread 0 writes them in sorted order
2143struct FlushPackResult {
2144    group_id: u32,
2145    archive_writes: Vec<PreCompressedPart>,
2146    registrations: Vec<SegmentRegistration>,
2147    ref_to_store: Option<(u32, Vec<u8>)>,
2148}
2149
2150fn flush_pack(
2151    buffer: &mut SegmentGroupBuffer,
2152    collection: &Arc<Mutex<CollectionV3>>,
2153    archive: &Arc<Mutex<Archive>>,
2154    config: &StreamingQueueConfig,
2155    reference_segments: &Arc<RwLock<BTreeMap<u32, Vec<u8>>>>,
2156) -> Result<()> {
2157    use crate::segment_compression::{compress_reference_segment, compress_segment_configured};
2158
2159    // Skip if no segments to write (but still write reference if present)
2160    if buffer.segments.is_empty() && buffer.ref_written {
2161        return Ok(());
2162    }
2163
2164    let use_lz_encoding = buffer.group_id >= NO_RAW_GROUPS;
2165
2166    // CRITICAL FIX: Sort ALL segments FIRST by (sample_name, contig_name, seg_part_no)
2167    // BEFORE picking the reference. This ensures the lexicographically first segment
2168    // becomes the reference, matching C++ AGC's behavior.
2169    // (Previous code sorted AFTER picking reference, causing wrong reference selection)
2170    buffer.segments.sort();
2171
2172    // ============================================================
2173    // PHASE 1: Compress everything WITHOUT holding any locks
2174    // ============================================================
2175
2176    // Collect all pre-compressed writes for batched archive write
2177    let mut archive_writes: Vec<PreCompressedPart> = Vec::new();
2178    // Collect all segment registrations for batched collection update
2179    let mut registrations: Vec<SegmentRegistration> = Vec::new();
2180    // Reference data to store in global map (if any)
2181    let mut ref_to_store: Option<(u32, Vec<u8>)> = None;
2182
2183    // Write reference segment if not already written (first pack for this group)
2184    // Extract reference from sorted segments (matching C++ AGC: first segment after sort becomes reference)
2185    // NOTE: Raw groups (0-15) do NOT have a reference - all segments stored raw
2186    if use_lz_encoding && !buffer.ref_written && !buffer.segments.is_empty() {
2187        // Remove first segment (alphabetically first after sorting) to use as reference
2188        let ref_seg = buffer.segments.remove(0);
2189
2190        if crate::env_cache::debug_ref_write() {
2191            eprintln!(
2192                "DEBUG_REF_WRITE: group={} sample={} contig={} seg={} data_len={} segments_remaining={}",
2193                buffer.group_id, ref_seg.sample_name, ref_seg.contig_name,
2194                ref_seg.seg_part_no, ref_seg.data.len(), buffer.segments.len()
2195            );
2196        }
2197
2198        if config.verbosity > 1 {
2199            eprintln!(
2200                "  Flushing group {}: reference from {} (chosen from {} sorted segments)",
2201                buffer.group_id,
2202                ref_seg.sample_name,
2203                buffer.segments.len() + 1
2204            );
2205        }
2206
2207        // Compress reference using adaptive compression (NO LOCK)
2208        let (mut compressed, marker) =
2209            compress_reference_segment(&ref_seg.data).context("Failed to compress reference")?;
2210        compressed.push(marker);
2211
2212        // Metadata stores the uncompressed size
2213        let ref_size = ref_seg.data.len() as u64;
2214
2215        // CRITICAL: Check if compression helped (matching C++ AGC segment.h lines 179, 204)
2216        // C++ AGC: if(packed_size + 1u < (uint32_t) data.size())
2217        // If compression didn't help, write UNCOMPRESSED raw data with metadata=0
2218        if compressed.len() < ref_seg.data.len() {
2219            // Compression helped - write compressed data with metadata=original_size
2220            archive_writes.push(PreCompressedPart {
2221                stream_id: buffer.ref_stream_id,
2222                data: compressed,
2223                metadata: ref_size,
2224            });
2225        } else {
2226            // Compression didn't help - write UNCOMPRESSED data with metadata=0
2227            archive_writes.push(PreCompressedPart {
2228                stream_id: buffer.ref_stream_id,
2229                data: ref_seg.data.clone(),
2230                metadata: 0,
2231            });
2232        }
2233
2234        // Queue reference registration
2235        registrations.push(SegmentRegistration {
2236            sample_name: ref_seg.sample_name.clone(),
2237            contig_name: ref_seg.contig_name.clone(),
2238            seg_part_no: ref_seg.seg_part_no,
2239            group_id: buffer.group_id,
2240            in_group_id: 0, // Reference is always at position 0
2241            is_rev_comp: ref_seg.is_rev_comp,
2242            raw_length: ref_seg.data.len() as u32,
2243        });
2244
2245        buffer.ref_written = true;
2246
2247        // Queue reference for global map storage
2248        ref_to_store = Some((buffer.group_id, ref_seg.data.clone()));
2249
2250        buffer.reference_segment = Some(ref_seg.clone()); // Store for LZ encoding
2251
2252        // Prepare LZ encoder with reference (matching C++ AGC segment.cpp line 43: lz_diff->Prepare(s))
2253        // This is done ONCE when the reference is written, then reused for all subsequent segments
2254        if use_lz_encoding {
2255            let mut lz = LZDiff::new(config.min_match_len as u32);
2256            lz.prepare(&ref_seg.data);
2257            buffer.lz_diff = Some(lz);
2258        }
2259    }
2260
2261    // NOTE: Segments are already sorted at the start of flush_pack (line ~1003)
2262    // This sort was moved earlier to ensure correct reference selection.
2263
2264    // Pack segments together with delta deduplication (matching C++ AGC segment.cpp lines 66-74)
2265    // Note: segments do NOT include the reference - it's stored separately
2266    //
2267    // CRITICAL FIX: Partial packs must persist across flush_pack calls to ensure pack boundaries
2268    // align with decompression expectations. Pack N must contain entries for in_group_ids
2269    // (N*50)+1 to (N+1)*50. Only write a pack when it has exactly 50 entries (or at finalization).
2270    // Use buffer.pending_deltas and buffer.pending_delta_ids to persist partial packs.
2271
2272    let mut segment_in_group_ids: Vec<(usize, u32)> = Vec::new(); // (segment_index, in_group_id) for each segment
2273
2274    // Helper function to compress a complete pack (exactly 50 entries) - NO LOCK
2275    let compress_pack = |deltas: &[Vec<u8>],
2276                         needs_raw_placeholder: bool,
2277                         stream_id: usize,
2278                         compression_level: i32|
2279     -> Result<PreCompressedPart> {
2280        let mut packed_data = Vec::new();
2281
2282        // CRITICAL: Raw groups need a placeholder segment at position 0
2283        if needs_raw_placeholder {
2284            packed_data.push(0x7f);
2285            packed_data.push(CONTIG_SEPARATOR);
2286        }
2287
2288        for delta in deltas.iter() {
2289            packed_data.extend_from_slice(delta);
2290            packed_data.push(CONTIG_SEPARATOR);
2291        }
2292
2293        let total_raw_size = packed_data.len();
2294        let mut compressed = compress_segment_configured(&packed_data, compression_level)
2295            .context("Failed to compress pack")?;
2296        compressed.push(0); // Marker 0 = plain ZSTD
2297
2298        if compressed.len() < total_raw_size {
2299            Ok(PreCompressedPart {
2300                stream_id,
2301                data: compressed,
2302                metadata: total_raw_size as u64,
2303            })
2304        } else {
2305            Ok(PreCompressedPart {
2306                stream_id,
2307                data: packed_data,
2308                metadata: 0,
2309            })
2310        }
2311    };
2312
2313    for (seg_idx, seg) in buffer.segments.iter().enumerate() {
2314        let contig_data = if !use_lz_encoding || buffer.reference_segment.is_none() {
2315            // Raw segment: groups 0-15 OR groups without reference
2316            seg.data.clone()
2317        } else {
2318            // LZ-encoded segment (groups >= 16 with reference)
2319            // DEBUG: Log sizes before encoding
2320            if let Some(ref_seg) = &buffer.reference_segment {
2321                if config.verbosity > 1 {
2322                    eprintln!("  LZ encoding: group={} ref_len={} target_len={} sample={} contig={} part={}",
2323                        buffer.group_id, ref_seg.data.len(), seg.data.len(),
2324                        seg.sample_name, seg.contig_name, seg.seg_part_no);
2325                }
2326            }
2327            // Reuse prepared lz_diff (matching C++ AGC segment.cpp line 59: lz_diff->Encode(s, delta))
2328            let ragc_encoded = buffer
2329                .lz_diff
2330                .as_mut()
2331                .expect("lz_diff should be prepared when reference is written")
2332                .encode(&seg.data);
2333
2334            // Compare with C++ AGC encode (TEST HARNESS)
2335            #[cfg(feature = "cpp_agc")]
2336            if crate::env_cache::test_lz_encoding() {
2337                if let Some(ref_seg) = &buffer.reference_segment {
2338                    if let Some(cpp_encoded) = crate::ragc_ffi::lzdiff_v2_encode(
2339                        &ref_seg.data,
2340                        &seg.data,
2341                        config.min_match_len as u32,
2342                    ) {
2343                        if ragc_encoded != cpp_encoded {
2344                            eprintln!("\n========================================");
2345                            eprintln!("🔥 LZ ENCODING MISMATCH DETECTED!");
2346                            eprintln!("========================================");
2347                            eprintln!("Group:          {}", buffer.group_id);
2348                            eprintln!("Sample:         {}", seg.sample_name);
2349                            eprintln!("Contig:         {}", seg.contig_name);
2350                            eprintln!("Segment:        {}", seg.seg_part_no);
2351                            eprintln!("Reference len:  {}", ref_seg.data.len());
2352                            eprintln!("Target len:     {}", seg.data.len());
2353                            eprintln!("RAGC encoded:   {} bytes", ragc_encoded.len());
2354                            eprintln!("C++ AGC encoded: {} bytes", cpp_encoded.len());
2355                            eprintln!(
2356                                "Difference:     {} bytes",
2357                                (ragc_encoded.len() as i64 - cpp_encoded.len() as i64).abs()
2358                            );
2359                            eprintln!();
2360
2361                            // Find first difference
2362                            let mut first_diff_byte = None;
2363                            for (i, (r, c)) in
2364                                ragc_encoded.iter().zip(cpp_encoded.iter()).enumerate()
2365                            {
2366                                if r != c {
2367                                    first_diff_byte = Some(i);
2368                                    break;
2369                                }
2370                            }
2371
2372                            if let Some(i) = first_diff_byte {
2373                                eprintln!("First difference at byte {}", i);
2374                                let start = if i > 20 { i - 20 } else { 0 };
2375                                let end = (i + 30).min(ragc_encoded.len()).min(cpp_encoded.len());
2376
2377                                eprintln!("\nRAGC output around difference:");
2378                                let ragc_hex: Vec<_> = ragc_encoded[start..end]
2379                                    .iter()
2380                                    .map(|b| format!("{:02x}", b))
2381                                    .collect();
2382                                let ragc_ascii: String = ragc_encoded[start..end]
2383                                    .iter()
2384                                    .map(|&b| if b >= 32 && b < 127 { b as char } else { '.' })
2385                                    .collect();
2386                                eprintln!("  Hex:   {}", ragc_hex.join(" "));
2387                                eprintln!("  ASCII: {}", ragc_ascii);
2388
2389                                eprintln!("\nC++ AGC output around difference:");
2390                                let cpp_hex: Vec<_> = cpp_encoded[start..end]
2391                                    .iter()
2392                                    .map(|b| format!("{:02x}", b))
2393                                    .collect();
2394                                let cpp_ascii: String = cpp_encoded[start..end]
2395                                    .iter()
2396                                    .map(|&b| if b >= 32 && b < 127 { b as char } else { '.' })
2397                                    .collect();
2398                                eprintln!("  Hex:   {}", cpp_hex.join(" "));
2399                                eprintln!("  ASCII: {}", cpp_ascii);
2400
2401                                eprintln!("\nByte at position {}:", i);
2402                                eprintln!(
2403                                    "  RAGC:    0x{:02x} ('{}')",
2404                                    ragc_encoded[i],
2405                                    if ragc_encoded[i] >= 32 && ragc_encoded[i] < 127 {
2406                                        ragc_encoded[i] as char
2407                                    } else {
2408                                        '?'
2409                                    }
2410                                );
2411                                eprintln!(
2412                                    "  C++ AGC: 0x{:02x} ('{}')",
2413                                    cpp_encoded[i],
2414                                    if cpp_encoded[i] >= 32 && cpp_encoded[i] < 127 {
2415                                        cpp_encoded[i] as char
2416                                    } else {
2417                                        '?'
2418                                    }
2419                                );
2420                            } else if ragc_encoded.len() != cpp_encoded.len() {
2421                                eprintln!(
2422                                    "Encodings match for first {} bytes, but lengths differ",
2423                                    ragc_encoded.len().min(cpp_encoded.len())
2424                                );
2425                                if ragc_encoded.len() > cpp_encoded.len() {
2426                                    let extra_start = cpp_encoded.len();
2427                                    let extra_hex: Vec<_> = ragc_encoded[extra_start..]
2428                                        .iter()
2429                                        .take(40)
2430                                        .map(|b| format!("{:02x}", b))
2431                                        .collect();
2432                                    let extra_ascii: String = ragc_encoded[extra_start..]
2433                                        .iter()
2434                                        .take(40)
2435                                        .map(|&b| if b >= 32 && b < 127 { b as char } else { '.' })
2436                                        .collect();
2437                                    eprintln!(
2438                                        "RAGC has {} extra bytes:",
2439                                        ragc_encoded.len() - cpp_encoded.len()
2440                                    );
2441                                    eprintln!("  Hex:   {}", extra_hex.join(" "));
2442                                    eprintln!("  ASCII: {}", extra_ascii);
2443                                } else {
2444                                    let extra_start = ragc_encoded.len();
2445                                    let extra_hex: Vec<_> = cpp_encoded[extra_start..]
2446                                        .iter()
2447                                        .take(40)
2448                                        .map(|b| format!("{:02x}", b))
2449                                        .collect();
2450                                    let extra_ascii: String = cpp_encoded[extra_start..]
2451                                        .iter()
2452                                        .take(40)
2453                                        .map(|&b| if b >= 32 && b < 127 { b as char } else { '.' })
2454                                        .collect();
2455                                    eprintln!(
2456                                        "C++ AGC has {} extra bytes:",
2457                                        cpp_encoded.len() - ragc_encoded.len()
2458                                    );
2459                                    eprintln!("  Hex:   {}", extra_hex.join(" "));
2460                                    eprintln!("  ASCII: {}", extra_ascii);
2461                                }
2462                            }
2463
2464                            // Show last 10 bytes of each
2465                            eprintln!("\nLast 10 bytes of each encoding:");
2466                            let ragc_tail_start = if ragc_encoded.len() > 10 {
2467                                ragc_encoded.len() - 10
2468                            } else {
2469                                0
2470                            };
2471                            let ragc_tail_hex: Vec<_> = ragc_encoded[ragc_tail_start..]
2472                                .iter()
2473                                .map(|b| format!("{:02x}", b))
2474                                .collect();
2475                            let ragc_tail_ascii: String = ragc_encoded[ragc_tail_start..]
2476                                .iter()
2477                                .map(|&b| if b >= 32 && b < 127 { b as char } else { '.' })
2478                                .collect();
2479                            eprintln!(
2480                                "RAGC    (bytes {}-{}):",
2481                                ragc_tail_start,
2482                                ragc_encoded.len() - 1
2483                            );
2484                            eprintln!("  Hex:   {}", ragc_tail_hex.join(" "));
2485                            eprintln!("  ASCII: {}", ragc_tail_ascii);
2486
2487                            let cpp_tail_start = if cpp_encoded.len() > 10 {
2488                                cpp_encoded.len() - 10
2489                            } else {
2490                                0
2491                            };
2492                            let cpp_tail_hex: Vec<_> = cpp_encoded[cpp_tail_start..]
2493                                .iter()
2494                                .map(|b| format!("{:02x}", b))
2495                                .collect();
2496                            let cpp_tail_ascii: String = cpp_encoded[cpp_tail_start..]
2497                                .iter()
2498                                .map(|&b| if b >= 32 && b < 127 { b as char } else { '.' })
2499                                .collect();
2500                            eprintln!(
2501                                "C++ AGC (bytes {}-{}):",
2502                                cpp_tail_start,
2503                                cpp_encoded.len() - 1
2504                            );
2505                            eprintln!("  Hex:   {}", cpp_tail_hex.join(" "));
2506                            eprintln!("  ASCII: {}", cpp_tail_ascii);
2507
2508                            eprintln!("\n========================================");
2509                            eprintln!("Aborting on first LZ encoding mismatch!");
2510                            eprintln!("========================================\n");
2511
2512                            panic!("LZ encoding mismatch detected - see details above");
2513                        }
2514                    }
2515                }
2516            }
2517
2518            ragc_encoded
2519        };
2520
2521        // Handle LZ groups with IMPROVED_LZ_ENCODING: empty delta means same as reference
2522        // (matching C++ AGC segment.cpp lines 62-63)
2523        if use_lz_encoding && contig_data.is_empty() {
2524            // Same as reference - use in_group_id = 0
2525            segment_in_group_ids.push((seg_idx, 0));
2526            continue;
2527        }
2528
2529        // Check if this delta already exists in pending pack (matching C++ AGC segment.cpp line 66)
2530        // Note: deduplication is per-pack, not global
2531        if let Some(existing_idx) = buffer.pending_deltas.iter().position(|d| d == &contig_data) {
2532            // Reuse existing delta's in_group_id (matching C++ AGC segment.cpp line 69)
2533            let reused_id = buffer.pending_delta_ids[existing_idx];
2534            segment_in_group_ids.push((seg_idx, reused_id));
2535        } else {
2536            // New unique delta - assign next in_group_id (matching C++ AGC segment.cpp lines 74, 77)
2537            // FIX: Apply .max(1) BEFORE using segments_written to ensure unique IDs when no reference
2538            // Bug was: max(0,1)=1, increment to 1 → max(1,1)=1 (COLLISION!)
2539            // Fixed: max(0,1)=1, id=1, increment to 2 → id=2 (UNIQUE!)
2540            buffer.segments_written = buffer.segments_written.max(1);
2541            let in_group_id = buffer.segments_written;
2542            buffer.segments_written += 1;
2543            buffer.pending_delta_ids.push(in_group_id);
2544            segment_in_group_ids.push((seg_idx, in_group_id));
2545            buffer.pending_deltas.push(contig_data);
2546
2547            // CRITICAL: Flush pack when it reaches capacity
2548            // For raw groups (group_id < 16):
2549            //   - Pack 0: placeholder (position 0) + 49 segments (positions 1-49) = 50 positions
2550            //   - Pack 1+: 50 segments (positions 0-49)
2551            // This ensures extraction formula (pack_id = in_group_id / 50, position = in_group_id % 50) works correctly
2552            let flush_threshold = if !use_lz_encoding && !buffer.raw_placeholder_written {
2553                // Raw group pack 0: flush at 49 to leave room for placeholder
2554                PACK_CARDINALITY - 1
2555            } else {
2556                // All other packs (raw pack 1+ or LZ packs): flush at 50
2557                PACK_CARDINALITY
2558            };
2559
2560            if buffer.pending_deltas.len() == flush_threshold {
2561                // Compress pack WITHOUT holding any lock
2562                let needs_placeholder = !use_lz_encoding && !buffer.raw_placeholder_written;
2563                let pack = compress_pack(
2564                    &buffer.pending_deltas,
2565                    needs_placeholder,
2566                    buffer.stream_id,
2567                    config.compression_level,
2568                )?;
2569                archive_writes.push(pack);
2570                buffer.raw_placeholder_written = true;
2571
2572                // Clear for next pack - deduplication starts fresh
2573                buffer.pending_deltas.clear();
2574                buffer.pending_delta_ids.clear();
2575            }
2576        }
2577    }
2578
2579    // DO NOT write partial pack here - leave it in buffer.pending_deltas for next flush_pack call
2580    // Partial packs are only written in finalize() to ensure pack boundaries align with decompression
2581
2582    // Queue segment registrations (batched for single lock acquisition)
2583    for &(seg_idx, in_group_id) in segment_in_group_ids.iter() {
2584        let seg = &buffer.segments[seg_idx];
2585        registrations.push(SegmentRegistration {
2586            sample_name: seg.sample_name.clone(),
2587            contig_name: seg.contig_name.clone(),
2588            seg_part_no: seg.seg_part_no,
2589            group_id: buffer.group_id,
2590            in_group_id,
2591            is_rev_comp: seg.is_rev_comp,
2592            raw_length: seg.data.len() as u32,
2593        });
2594    }
2595
2596    // ============================================================
2597    // PHASE 2: Batched writes with minimal lock duration
2598    // ============================================================
2599
2600    // Buffer all pre-compressed data for archive (SINGLE lock acquisition)
2601    // Actual writes happen via flush_buffers() at end for fewer syscalls
2602    if !archive_writes.is_empty() {
2603        let mut arch = archive.lock().unwrap();
2604        for part in archive_writes {
2605            arch.add_part_buffered(part.stream_id, part.data, part.metadata);
2606        }
2607    }
2608
2609    // Store reference in global map (if any)
2610    if let Some((group_id, ref_data)) = ref_to_store {
2611        let mut ref_segs = reference_segments.write().unwrap();
2612        ref_segs.insert(group_id, ref_data);
2613    }
2614
2615    // Register all segments in collection (SINGLE lock acquisition)
2616    if !registrations.is_empty() {
2617        let mut coll = collection.lock().unwrap();
2618        for reg in registrations {
2619            coll.add_segment_placed(
2620                &reg.sample_name,
2621                &reg.contig_name,
2622                reg.seg_part_no,
2623                reg.group_id,
2624                reg.in_group_id,
2625                reg.is_rev_comp,
2626                reg.raw_length,
2627            )
2628            .context("Failed to register segment")?;
2629        }
2630    }
2631
2632    // Clear segments for next batch (but keep pending_deltas!)
2633    buffer.segments.clear();
2634
2635    Ok(())
2636}
2637
2638/// Compress-only version of flush_pack for deterministic parallel compression.
2639/// Workers call this in parallel to produce FlushPackResult, then Thread 0
2640/// writes all results in sorted group_id order for deterministic archives.
2641fn flush_pack_compress_only(
2642    buffer: &mut SegmentGroupBuffer,
2643    config: &StreamingQueueConfig,
2644) -> Result<FlushPackResult> {
2645    use crate::segment_compression::{compress_reference_segment, compress_segment_configured};
2646
2647    let mut archive_writes: Vec<PreCompressedPart> = Vec::new();
2648    let mut registrations: Vec<SegmentRegistration> = Vec::new();
2649    let mut ref_to_store: Option<(u32, Vec<u8>)> = None;
2650
2651    // Skip if no segments to write (but still write reference if present)
2652    if buffer.segments.is_empty() && buffer.ref_written {
2653        return Ok(FlushPackResult {
2654            group_id: buffer.group_id,
2655            archive_writes,
2656            registrations,
2657            ref_to_store,
2658        });
2659    }
2660
2661    let use_lz_encoding = buffer.group_id >= NO_RAW_GROUPS;
2662
2663    // Sort segments for deterministic reference selection
2664    buffer.segments.sort();
2665
2666    // Write reference segment if not already written
2667    if use_lz_encoding && !buffer.ref_written && !buffer.segments.is_empty() {
2668        let ref_seg = buffer.segments.remove(0);
2669
2670        // Compress reference
2671        let (mut compressed, marker) =
2672            compress_reference_segment(&ref_seg.data).context("Failed to compress reference")?;
2673        compressed.push(marker);
2674
2675        let ref_size = ref_seg.data.len() as u64;
2676
2677        if compressed.len() < ref_seg.data.len() {
2678            archive_writes.push(PreCompressedPart {
2679                stream_id: buffer.ref_stream_id,
2680                data: compressed,
2681                metadata: ref_size,
2682            });
2683        } else {
2684            archive_writes.push(PreCompressedPart {
2685                stream_id: buffer.ref_stream_id,
2686                data: ref_seg.data.clone(),
2687                metadata: 0,
2688            });
2689        }
2690
2691        registrations.push(SegmentRegistration {
2692            sample_name: ref_seg.sample_name.clone(),
2693            contig_name: ref_seg.contig_name.clone(),
2694            seg_part_no: ref_seg.seg_part_no,
2695            group_id: buffer.group_id,
2696            in_group_id: 0,
2697            is_rev_comp: ref_seg.is_rev_comp,
2698            raw_length: ref_seg.data.len() as u32,
2699        });
2700
2701        buffer.ref_written = true;
2702        ref_to_store = Some((buffer.group_id, ref_seg.data.clone()));
2703        buffer.reference_segment = Some(ref_seg.clone());
2704
2705        if use_lz_encoding {
2706            let mut lz = LZDiff::new(config.min_match_len as u32);
2707            lz.prepare(&ref_seg.data);
2708            buffer.lz_diff = Some(lz);
2709        }
2710    }
2711
2712    // Compress pack helper (same as flush_pack)
2713    let compress_pack = |deltas: &[Vec<u8>],
2714                         needs_raw_placeholder: bool,
2715                         stream_id: usize,
2716                         compression_level: i32|
2717     -> Result<PreCompressedPart> {
2718        let mut packed_data = Vec::new();
2719
2720        if needs_raw_placeholder {
2721            packed_data.push(0x7f);
2722            packed_data.push(CONTIG_SEPARATOR);
2723        }
2724
2725        for delta in deltas.iter() {
2726            packed_data.extend_from_slice(delta);
2727            packed_data.push(CONTIG_SEPARATOR);
2728        }
2729
2730        let total_raw_size = packed_data.len();
2731        let mut compressed = compress_segment_configured(&packed_data, compression_level)
2732            .context("Failed to compress pack")?;
2733        compressed.push(0);
2734
2735        if compressed.len() < total_raw_size {
2736            Ok(PreCompressedPart {
2737                stream_id,
2738                data: compressed,
2739                metadata: total_raw_size as u64,
2740            })
2741        } else {
2742            Ok(PreCompressedPart {
2743                stream_id,
2744                data: packed_data,
2745                metadata: 0,
2746            })
2747        }
2748    };
2749
2750    let mut segment_in_group_ids: Vec<(usize, u32)> = Vec::new();
2751
2752    for (seg_idx, seg) in buffer.segments.iter().enumerate() {
2753        let contig_data = if !use_lz_encoding || buffer.reference_segment.is_none() {
2754            seg.data.clone()
2755        } else {
2756            buffer
2757                .lz_diff
2758                .as_mut()
2759                .expect("lz_diff should be prepared")
2760                .encode(&seg.data)
2761        };
2762
2763        if use_lz_encoding && contig_data.is_empty() {
2764            segment_in_group_ids.push((seg_idx, 0));
2765            continue;
2766        }
2767
2768        if let Some(existing_idx) = buffer.pending_deltas.iter().position(|d| d == &contig_data) {
2769            let reused_id = buffer.pending_delta_ids[existing_idx];
2770            segment_in_group_ids.push((seg_idx, reused_id));
2771        } else {
2772            buffer.segments_written = buffer.segments_written.max(1);
2773            let in_group_id = buffer.segments_written;
2774            buffer.segments_written += 1;
2775            buffer.pending_delta_ids.push(in_group_id);
2776            segment_in_group_ids.push((seg_idx, in_group_id));
2777            buffer.pending_deltas.push(contig_data);
2778
2779            // CRITICAL: Flush pack when it reaches capacity
2780            // For raw groups pack 0: flush at 49 (placeholder takes position 0)
2781            // For all other packs: flush at 50
2782            let flush_threshold = if !use_lz_encoding && !buffer.raw_placeholder_written {
2783                PACK_CARDINALITY - 1
2784            } else {
2785                PACK_CARDINALITY
2786            };
2787
2788            if buffer.pending_deltas.len() == flush_threshold {
2789                let needs_placeholder = !use_lz_encoding && !buffer.raw_placeholder_written;
2790                let pack = compress_pack(
2791                    &buffer.pending_deltas,
2792                    needs_placeholder,
2793                    buffer.stream_id,
2794                    config.compression_level,
2795                )?;
2796                archive_writes.push(pack);
2797                buffer.raw_placeholder_written = true;
2798                buffer.pending_deltas.clear();
2799                buffer.pending_delta_ids.clear();
2800            }
2801        }
2802    }
2803
2804    for &(seg_idx, in_group_id) in segment_in_group_ids.iter() {
2805        let seg = &buffer.segments[seg_idx];
2806        registrations.push(SegmentRegistration {
2807            sample_name: seg.sample_name.clone(),
2808            contig_name: seg.contig_name.clone(),
2809            seg_part_no: seg.seg_part_no,
2810            group_id: buffer.group_id,
2811            in_group_id,
2812            is_rev_comp: seg.is_rev_comp,
2813            raw_length: seg.data.len() as u32,
2814        });
2815    }
2816
2817    buffer.segments.clear();
2818
2819    Ok(FlushPackResult {
2820        group_id: buffer.group_id,
2821        archive_writes,
2822        registrations,
2823        ref_to_store,
2824    })
2825}
2826
2827/// Write reference segment immediately when first segment arrives in group
2828/// (Matches C++ AGC segment.cpp lines 41-48: if (no_seqs == 0) writes reference right away)
2829/// This ensures LZ encoding works correctly for subsequent segments
2830fn write_reference_immediately(
2831    segment: &BufferedSegment,
2832    buffer: &mut SegmentGroupBuffer,
2833    collection: &Arc<Mutex<CollectionV3>>,
2834    archive: &Arc<Mutex<Archive>>,
2835    reference_segments: &Arc<RwLock<BTreeMap<u32, Vec<u8>>>>,
2836    reference_orientations: &Arc<RwLock<BTreeMap<u32, bool>>>,
2837    config: &StreamingQueueConfig,
2838) -> Result<()> {
2839    use crate::segment_compression::compress_reference_segment;
2840
2841    if crate::env_cache::debug_ref_write() {
2842        eprintln!(
2843            "DEBUG_REF_IMMEDIATE: group={} sample={} contig={} seg={} data_len={}",
2844            buffer.group_id,
2845            segment.sample_name,
2846            segment.contig_name,
2847            segment.seg_part_no,
2848            segment.data.len()
2849        );
2850    }
2851
2852    if config.verbosity > 1 {
2853        eprintln!(
2854            "  Writing immediate reference for group {}: {} {}:{} (part {})",
2855            buffer.group_id,
2856            segment.sample_name,
2857            segment.contig_name,
2858            segment.seg_part_no,
2859            segment.seg_part_no
2860        );
2861    }
2862
2863    // 1. Compress reference using adaptive compression (matching flush_pack lines 635-637)
2864    let (mut compressed, marker) =
2865        compress_reference_segment(&segment.data).context("Failed to compress reference")?;
2866    compressed.push(marker);
2867
2868    let ref_size = segment.data.len();
2869
2870    // 2. Write to archive immediately (matching C++ AGC segment.cpp line 43: store_in_archive)
2871    // CRITICAL: Check if compression helped (matching C++ AGC segment.h line 179)
2872    {
2873        let mut arch = archive.lock().unwrap();
2874        if compressed.len() < ref_size {
2875            // Compression helped - buffer compressed data with metadata=original_size
2876            arch.add_part_buffered(buffer.ref_stream_id, compressed, ref_size as u64);
2877        } else {
2878            // Compression didn't help - buffer UNCOMPRESSED data with metadata=0
2879            arch.add_part_buffered(buffer.ref_stream_id, segment.data.clone(), 0);
2880        }
2881    }
2882
2883    // 3. Register reference in collection with in_group_id = 0 (matching flush_pack lines 650-661)
2884    {
2885        let mut coll = collection.lock().unwrap();
2886        coll.add_segment_placed(
2887            &segment.sample_name,
2888            &segment.contig_name,
2889            segment.seg_part_no,
2890            buffer.group_id,
2891            0, // Reference is always at position 0
2892            segment.is_rev_comp,
2893            segment.data.len() as u32,
2894        )
2895        .context("Failed to register immediate reference")?;
2896    }
2897
2898    // 4. Mark reference as written and store for LZ encoding (matching flush_pack lines 663-664)
2899    buffer.ref_written = true;
2900    buffer.reference_segment = Some(segment.clone());
2901    // CRITICAL: Mark that in_group_id=0 is taken, so subsequent segments start from 1
2902    buffer.segments_written = 1;
2903
2904    // 4b. Store reference data persistently (matching C++ AGC v_segments)
2905    // This enables LZ cost estimation for subsequent samples even after flush
2906    {
2907        let mut ref_segs = reference_segments.write().unwrap();
2908        ref_segs.insert(buffer.group_id, segment.data.clone());
2909    }
2910
2911    // 4c. Store reference orientation for ZERO_MATCH bug fix
2912    // When a delta segment joins this group later, it MUST use the same orientation
2913    // as the reference to ensure LZ encoding works correctly
2914    {
2915        let mut ref_orients = reference_orientations.write().unwrap();
2916        ref_orients.insert(buffer.group_id, segment.is_rev_comp);
2917    }
2918
2919    // 5. Prepare LZ encoder with reference (matching C++ AGC segment.cpp line 43: lz_diff->Prepare(s))
2920    // This is done ONCE when the reference is written, then reused for all subsequent segments
2921    let use_lz_encoding = buffer.group_id >= NO_RAW_GROUPS;
2922    if use_lz_encoding {
2923        let mut lz = LZDiff::new(config.min_match_len as u32);
2924        lz.prepare(&segment.data);
2925        buffer.lz_diff = Some(lz);
2926    }
2927
2928    Ok(())
2929}
2930
2931/// Compute reverse complement of a sequence
2932fn reverse_complement_sequence(seq: &[u8]) -> Vec<u8> {
2933    use crate::kmer::reverse_complement;
2934    seq.iter()
2935        .rev()
2936        .map(|&base| reverse_complement(base as u64) as u8)
2937        .collect()
2938}
2939
2940/// Find best existing group for a segment with only one k-mer present
2941/// (Implements C++ AGC's find_cand_segment_with_one_splitter logic from lines 1659-1745)
2942fn find_group_with_one_kmer(
2943    kmer: u64,
2944    kmer_is_dir: bool,
2945    segment_data: &[u8],    // Segment data in forward orientation
2946    segment_data_rc: &[u8], // Segment data in reverse complement
2947    map_segments_terminators: &Arc<RwLock<BTreeMap<u64, Vec<u64>>>>,
2948    map_segments: &Arc<RwLock<BTreeMap<SegmentGroupKey, u32>>>,
2949    segment_groups: &Arc<Mutex<BTreeMap<SegmentGroupKey, SegmentGroupBuffer>>>,
2950    reference_segments: &Arc<RwLock<BTreeMap<u32, Vec<u8>>>>,
2951    config: &StreamingQueueConfig,
2952) -> (u64, u64, bool) {
2953    let segment_len = segment_data.len();
2954    use crate::segment::MISSING_KMER;
2955
2956    // Look up kmer in terminators map to find connected k-mers
2957    let connected_kmers = {
2958        let terminators = map_segments_terminators.read().unwrap();
2959        match terminators.get(&kmer) {
2960            Some(vec) => vec.clone(),
2961            None => {
2962                // No connections found - create new group with MISSING
2963                // Match C++ AGC lines 1671-1679: check is_dir_oriented()
2964                // Debug: log entry to no-connection path
2965                if crate::env_cache::debug_is_dir() {
2966                    eprintln!(
2967                        "RAGC_FIND_GROUP_NO_CONN: kmer={} kmer_is_dir={}",
2968                        kmer, kmer_is_dir
2969                    );
2970                }
2971                if kmer_is_dir {
2972                    // Dir-oriented: (kmer, MISSING) with rc=false
2973                    if config.verbosity > 1 {
2974                        #[cfg(feature = "verbose_debug")]
2975                        eprintln!("RAGC_CASE3_NO_CONNECTION: kmer={} is_dir=true -> ({}, MISSING) rc=false", kmer, kmer);
2976                    }
2977                    return (kmer, MISSING_KMER, false);
2978                } else {
2979                    // NOT dir-oriented: (MISSING, kmer) with rc=true
2980                    if config.verbosity > 1 {
2981                        #[cfg(feature = "verbose_debug")]
2982                        eprintln!("RAGC_CASE3_NO_CONNECTION: kmer={} is_dir=false -> (MISSING, {}) rc=true", kmer, kmer);
2983                    }
2984                    return (MISSING_KMER, kmer, true);
2985                }
2986            }
2987        }
2988    };
2989
2990    if config.verbosity > 1 {
2991        #[cfg(feature = "verbose_debug")]
2992        eprintln!(
2993            "RAGC_CASE3_FOUND_CONNECTIONS: kmer={} connections={}",
2994            kmer,
2995            connected_kmers.len()
2996        );
2997    }
2998    // Debug: log connections found
2999    if crate::env_cache::debug_is_dir() {
3000        eprintln!(
3001            "RAGC_FIND_GROUP_FOUND_CONN: kmer={} kmer_is_dir={} connections={:?}",
3002            kmer, kmer_is_dir, connected_kmers
3003        );
3004    }
3005
3006    // Build list of candidate groups
3007    // Each candidate: (key_front, key_back, needs_rc, ref_segment_size)
3008    let mut candidates: Vec<(u64, u64, bool, usize)> = Vec::new();
3009
3010    // OPTIMIZATION: Reduce lock scope - first collect candidate keys, then look up ref sizes
3011    // This minimizes the time segment_groups.lock() is held
3012
3013    // Phase 1: Build candidate orderings (no locks needed)
3014    let mut candidate_keys: Vec<(u64, u64, bool, SegmentGroupKey)> = Vec::new();
3015    for &cand_kmer in &connected_kmers {
3016        // Create candidate group key normalized (smaller, larger)
3017        // C++ AGC lines 1691-1704
3018        //
3019        // IMPORTANT: When cand_kmer is MISSING, we need to try BOTH orderings!
3020        // Groups with MISSING k-mers can be stored as either (MISSING, kmer) or (kmer, MISSING)
3021        // depending on kmer_is_dir when they were created. We must match the actual stored key.
3022        let orderings: Vec<(u64, u64, bool)> = if cand_kmer == MISSING_KMER {
3023            // MISSING is involved - try both orderings to find the group
3024            vec![
3025                (MISSING_KMER, kmer, true),  // (MISSING, kmer) with RC
3026                (kmer, MISSING_KMER, false), // (kmer, MISSING) without RC
3027            ]
3028        } else if cand_kmer < kmer {
3029            // cand_kmer is smaller - it goes first
3030            // This means we need to RC (C++ AGC line 1696: get<2>(ck) = true)
3031            vec![(cand_kmer, kmer, true)]
3032        } else {
3033            // kmer is smaller - it goes first
3034            // No RC needed (C++ AGC line 1703: get<2>(ck) = false)
3035            vec![(kmer, cand_kmer, false)]
3036        };
3037
3038        for (key_front, key_back, needs_rc) in orderings {
3039            let cand_key = SegmentGroupKey {
3040                kmer_front: key_front,
3041                kmer_back: key_back,
3042            };
3043            candidate_keys.push((key_front, key_back, needs_rc, cand_key));
3044        }
3045    }
3046
3047    // Phase 2: Quick check which candidates exist (brief locks)
3048    // First pass: check global registry (RwLock - can be concurrent)
3049    let mut existing_candidates: Vec<(u64, u64, bool, Option<u32>)> = Vec::new();
3050    {
3051        let seg_map = map_segments.read().unwrap();
3052        for (key_front, key_back, needs_rc, cand_key) in &candidate_keys {
3053            if let Some(&group_id) = seg_map.get(cand_key) {
3054                existing_candidates.push((*key_front, *key_back, *needs_rc, Some(group_id)));
3055            }
3056        }
3057    } // seg_map lock released
3058
3059    // Second pass: check batch-local buffer for remaining candidates (Mutex - exclusive)
3060    // Only if we didn't find candidates in global registry
3061    if existing_candidates.is_empty() {
3062        let groups = segment_groups.lock().unwrap();
3063        let mut already_found = std::collections::HashSet::new();
3064        for (key_front, key_back, needs_rc, cand_key) in &candidate_keys {
3065            if groups.contains_key(cand_key) {
3066                // Get ref size from buffer
3067                let ref_size = if let Some(group_buffer) = groups.get(cand_key) {
3068                    if let Some(ref_seg) = &group_buffer.reference_segment {
3069                        ref_seg.data.len()
3070                    } else {
3071                        segment_len
3072                    }
3073                } else {
3074                    segment_len
3075                };
3076
3077                // Debug trace
3078                if crate::env_cache::debug_is_dir() {
3079                    eprintln!("RAGC_FIND_GROUP_CAND_CHECK: cand_key=({},{}) exists_in_groups=true ref_size={}",
3080                        key_front, key_back, ref_size);
3081                }
3082
3083                // Use cand_kmer as key to deduplicate (only one match per connected_kmer)
3084                let connected = if *key_front == kmer {
3085                    *key_back
3086                } else {
3087                    *key_front
3088                };
3089                if !already_found.contains(&connected) {
3090                    candidates.push((*key_front, *key_back, *needs_rc, ref_size));
3091                    already_found.insert(connected);
3092                }
3093            }
3094        }
3095    } // groups lock released
3096
3097    // Phase 3: Get ref sizes for global candidates (brief RwLock)
3098    if !existing_candidates.is_empty() {
3099        let ref_segs = reference_segments.read().unwrap();
3100        let mut already_found = std::collections::HashSet::new();
3101        for (key_front, key_back, needs_rc, group_id_opt) in existing_candidates {
3102            let ref_size = if let Some(group_id) = group_id_opt {
3103                if let Some(ref_data) = ref_segs.get(&group_id) {
3104                    ref_data.len()
3105                } else {
3106                    segment_len
3107                }
3108            } else {
3109                segment_len
3110            };
3111
3112            // Debug trace
3113            if crate::env_cache::debug_is_dir() {
3114                eprintln!("RAGC_FIND_GROUP_CAND_CHECK: cand_key=({},{}) exists_in_seg_map=true ref_size={}",
3115                    key_front, key_back, ref_size);
3116            }
3117
3118            // Use cand_kmer as key to deduplicate (only one match per connected_kmer)
3119            let connected = if key_front == kmer {
3120                key_back
3121            } else {
3122                key_front
3123            };
3124            if !already_found.contains(&connected) {
3125                candidates.push((key_front, key_back, needs_rc, ref_size));
3126                already_found.insert(connected);
3127            }
3128        }
3129    } // ref_segs lock released
3130
3131    if candidates.is_empty() {
3132        // No existing groups found - create new with MISSING
3133        // Must match C++ AGC is_dir_oriented logic (same as no-connections case above)
3134        if crate::env_cache::debug_is_dir() {
3135            if kmer_is_dir {
3136                eprintln!("RAGC_FIND_GROUP_NO_CAND: kmer={} kmer_is_dir={} -> returning ({},MISSING,false)",
3137                    kmer, kmer_is_dir, kmer);
3138            } else {
3139                eprintln!("RAGC_FIND_GROUP_NO_CAND: kmer={} kmer_is_dir={} -> returning (MISSING,{},true)",
3140                    kmer, kmer_is_dir, kmer);
3141            }
3142        }
3143        if kmer_is_dir {
3144            // Dir-oriented: (kmer, MISSING) with rc=false
3145            if config.verbosity > 1 {
3146                #[cfg(feature = "verbose_debug")]
3147                eprintln!(
3148                    "RAGC_CASE3_NO_CANDIDATES: kmer={} is_dir=true -> ({}, MISSING) rc=false",
3149                    kmer, kmer
3150                );
3151            }
3152            return (kmer, MISSING_KMER, false);
3153        } else {
3154            // NOT dir-oriented: (MISSING, kmer) with rc=true
3155            if config.verbosity > 1 {
3156                #[cfg(feature = "verbose_debug")]
3157                eprintln!(
3158                    "RAGC_CASE3_NO_CANDIDATES: kmer={} is_dir=false -> (MISSING, {}) rc=true",
3159                    kmer, kmer
3160                );
3161            }
3162            return (MISSING_KMER, kmer, true);
3163        }
3164    }
3165
3166    // Sort candidates by reference segment size (C++ AGC lines 1710-1719)
3167    // Prefer candidates with ref size closest to our segment size
3168    candidates.sort_by(|a, b| {
3169        let a_diff = (a.3 as i64 - segment_len as i64).abs();
3170        let b_diff = (b.3 as i64 - segment_len as i64).abs();
3171
3172        if a_diff != b_diff {
3173            a_diff.cmp(&b_diff)
3174        } else {
3175            a.3.cmp(&b.3) // If equal distance, prefer smaller ref size
3176        }
3177    });
3178
3179    // Debug: Print sorted candidates before evaluation
3180    if config.verbosity > 2 {
3181        eprintln!(
3182            "RAGC_CASE3_SORTED_CANDIDATES: kmer={} segment_len={} n_candidates={}",
3183            kmer,
3184            segment_len,
3185            candidates.len()
3186        );
3187        for (i, &(kf, kb, rc, rs)) in candidates.iter().enumerate() {
3188            let size_diff = (rs as i64 - segment_len as i64).abs();
3189            eprintln!(
3190                "  CAND[{}]: ({},{}) rc={} ref_size={} size_diff={}",
3191                i, kf, kb, rc, rs, size_diff
3192            );
3193        }
3194    }
3195
3196    // Test compression for each candidate (C++ AGC lines 1726-1788)
3197    // Match C++ AGC's TWO-PASS approach:
3198    //   Pass 1: Compute all estimates, track minimum (lines 1726-1732)
3199    //   Pass 2: Pick candidate with minimum estimate (lines 1775-1787)
3200    //
3201    // CRITICAL: Initialize best_pk to (~0ull, ~0ull) like C++ AGC (line 1628)
3202    let mut best_key_front = u64::MAX; // ~0ull in C++
3203    let mut best_key_back = u64::MAX; // ~0ull in C++
3204    let mut best_needs_rc = false;
3205    let mut best_estim_size = if segment_len < 16 {
3206        segment_len
3207    } else {
3208        segment_len - 16
3209    };
3210
3211    // Pass 1: Compute estimates and find minimum
3212    // Store estimates alongside candidates: Vec<(front, back, needs_rc, ref_size, estim_size)>
3213    let mut candidate_estimates: Vec<(u64, u64, bool, usize, usize)> = Vec::new();
3214
3215    {
3216        let groups = segment_groups.lock().unwrap();
3217        let seg_map = map_segments.read().unwrap();
3218        let ref_segs = reference_segments.read().unwrap();
3219
3220        for &(key_front, key_back, needs_rc, ref_size) in &candidates {
3221            let cand_key = SegmentGroupKey {
3222                kmer_front: key_front,
3223                kmer_back: key_back,
3224            };
3225
3226            // Get the reference segment for this candidate from buffer OR persistent storage
3227            let (ref_data_opt, ref_source): (Option<&[u8]>, &str) = if let Some(group_buffer) =
3228                groups.get(&cand_key)
3229            {
3230                if config.verbosity > 2
3231                    && key_front == 1244212049458757632
3232                    && key_back == 1244212049458757632
3233                {
3234                    let ref_seg = group_buffer.reference_segment.as_ref();
3235                    let ref_len = ref_seg.map(|s| s.data.len()).unwrap_or(0);
3236                    let ref_first5: Vec<u8> = ref_seg
3237                        .map(|s| s.data.iter().take(5).cloned().collect())
3238                        .unwrap_or_default();
3239                    eprintln!("RAGC_REF_LOOKUP_BUFFER: degenerate key ({},{}) buffer ref_len={} ref[0..5]={:?}",
3240                        key_front, key_back, ref_len, ref_first5);
3241                }
3242                (
3243                    group_buffer
3244                        .reference_segment
3245                        .as_ref()
3246                        .map(|seg| seg.data.as_slice()),
3247                    "buffer",
3248                )
3249            } else if let Some(&group_id) = seg_map.get(&cand_key) {
3250                if config.verbosity > 2
3251                    && key_front == 1244212049458757632
3252                    && key_back == 1244212049458757632
3253                {
3254                    let ref_data = ref_segs.get(&group_id);
3255                    let ref_len = ref_data.map(|d| d.len()).unwrap_or(0);
3256                    let ref_first5: Vec<u8> = ref_data
3257                        .map(|d| d.iter().take(5).cloned().collect())
3258                        .unwrap_or_default();
3259                    eprintln!("RAGC_REF_LOOKUP_PERSISTENT: degenerate key ({},{}) -> group_id={} ref_len={} ref[0..5]={:?}",
3260                        key_front, key_back, group_id, ref_len, ref_first5);
3261                }
3262                (
3263                    ref_segs.get(&group_id).map(|data| data.as_slice()),
3264                    "persistent",
3265                )
3266            } else {
3267                (None, "none")
3268            };
3269            let ref_data_opt = ref_data_opt;
3270
3271            if let Some(ref_data) = ref_data_opt {
3272                // Test LZ encoding against this reference (C++ AGC line 1728: estimate())
3273                let target_data = if needs_rc {
3274                    segment_data_rc
3275                } else {
3276                    segment_data
3277                };
3278
3279                // Compute estimate - compare both RAGC native and C++ FFI when verbose
3280                let estim_size = {
3281                    let mut lz = LZDiff::new(config.min_match_len as u32);
3282                    lz.prepare(&ref_data.to_vec());
3283                    // Use estimate() which matches C++ CLZDiff_V2::Estimate exactly
3284                    lz.estimate(&target_data.to_vec(), best_estim_size as u32) as usize
3285                };
3286
3287                // Also compute with C++ FFI and compare
3288                #[cfg(feature = "cpp_agc")]
3289                let cpp_estim_size = crate::ragc_ffi::lzdiff_v2_estimate(
3290                    ref_data,
3291                    target_data,
3292                    config.min_match_len as u32,
3293                    best_estim_size as u32,
3294                ) as usize;
3295
3296                #[cfg(feature = "cpp_agc")]
3297                if estim_size != cpp_estim_size && config.verbosity > 0 {
3298                    eprintln!(
3299                        "ESTIMATE_MISMATCH: ragc={} cpp={} ref_len={} tgt_len={} bound={}",
3300                        estim_size,
3301                        cpp_estim_size,
3302                        ref_data.len(),
3303                        target_data.len(),
3304                        best_estim_size
3305                    );
3306                }
3307
3308                // DEBUG: Also compute estimate with initial threshold to check if tie would occur
3309                #[cfg(not(feature = "cpp_agc"))]
3310                let estim_no_bound = if config.verbosity > 2 {
3311                    let mut lz2 = LZDiff::new(config.min_match_len as u32);
3312                    lz2.prepare(&ref_data.to_vec());
3313                    lz2.estimate(&target_data.to_vec(), (segment_len - 16) as u32) as usize
3314                } else {
3315                    0
3316                };
3317
3318                if config.verbosity > 2 {
3319                    // Print detailed debug info including bound and first/last bytes
3320                    let ref_first: Vec<u8> = ref_data.iter().take(5).cloned().collect();
3321                    let ref_last: Vec<u8> = ref_data.iter().rev().take(5).cloned().collect();
3322                    let tgt_first: Vec<u8> = target_data.iter().take(5).cloned().collect();
3323                    let tgt_last: Vec<u8> = target_data.iter().rev().take(5).cloned().collect();
3324                    #[cfg(not(feature = "cpp_agc"))]
3325                    eprintln!(
3326                        "RAGC_CASE3_ESTIMATE: kmer={} cand=({},{}) rc={} ref_len={} target_len={} bound={} estim={} estim_nobound={} ref[0..5]={:?} ref[-5..]={:?} tgt[0..5]={:?} tgt[-5..]={:?}",
3327                        kmer, key_front, key_back, needs_rc, ref_data.len(), target_data.len(), best_estim_size, estim_size, estim_no_bound, ref_first, ref_last, tgt_first, tgt_last
3328                    );
3329                    #[cfg(feature = "cpp_agc")]
3330                    eprintln!(
3331                        "RAGC_CASE3_ESTIMATE: kmer={} cand=({},{}) rc={} ref_len={} target_len={} bound={} estim={} ref[0..5]={:?} ref[-5..]={:?} tgt[0..5]={:?} tgt[-5..]={:?}",
3332                        kmer, key_front, key_back, needs_rc, ref_data.len(), target_data.len(), best_estim_size, estim_size, ref_first, ref_last, tgt_first, tgt_last
3333                    );
3334                }
3335
3336                // Track minimum estim_size (C++ AGC lines 1730-1732)
3337                if estim_size < best_estim_size {
3338                    best_estim_size = estim_size;
3339                }
3340
3341                candidate_estimates.push((key_front, key_back, needs_rc, ref_size, estim_size));
3342            }
3343        }
3344    }
3345
3346    // Pass 2: Pick candidate with minimum estimate among ALL candidates, using tie-breakers
3347    // (C++ AGC lines 1775-1788)
3348    //
3349    // CRITICAL FIX: C++ AGC only picks candidates that BEAT the initial threshold (segment_size - 16).
3350    // If no candidate beats the threshold, best_pk stays at (~0ull, ~0ull) and fallback MISSING is used.
3351    //
3352    // The previous bug was unconditionally picking the first candidate (first_candidate = true).
3353    // This caused RAGC to always pick the first candidate even when its estimate was worse than threshold,
3354    // preventing fallback to existing MISSING groups.
3355    //
3356    // C++ AGC's selection logic (lines 1780-1787):
3357    //   if (v_estim_size[i] < best_estim_size || ...)
3358    // This only updates best_pk if estimate is BETTER than current best (initially threshold).
3359    for &(key_front, key_back, needs_rc, _ref_size, estim_size) in &candidate_estimates {
3360        let cand_pk = (key_front, key_back);
3361        let best_pk = (best_key_front, best_key_back);
3362
3363        // Match C++ AGC's selection logic exactly (lines 1780-1787):
3364        // Only pick candidate if:
3365        // - Smaller estimate than current best (initially threshold), OR
3366        // - Same estimate with lexicographically smaller pk, OR
3367        // - Same estimate+pk with better RC (prefers forward orientation)
3368        if estim_size < best_estim_size
3369            || (estim_size == best_estim_size && cand_pk < best_pk)
3370            || (estim_size == best_estim_size && cand_pk == best_pk && !needs_rc)
3371        {
3372            best_estim_size = estim_size;
3373            best_key_front = key_front;
3374            best_key_back = key_back;
3375            best_needs_rc = needs_rc;
3376        }
3377    }
3378
3379    // Debug: Print Pass 2 results
3380    if config.verbosity > 2 && !candidate_estimates.is_empty() {
3381        let threshold = if segment_len < 16 {
3382            segment_len
3383        } else {
3384            segment_len - 16
3385        };
3386        eprintln!(
3387            "RAGC_CASE3_PASS2_RESULTS: threshold={} best=({},{}) best_estim={}",
3388            threshold, best_key_front, best_key_back, best_estim_size
3389        );
3390        for (i, &(kf, kb, rc, rs, es)) in candidate_estimates.iter().enumerate() {
3391            let is_winner = kf == best_key_front && kb == best_key_back;
3392            let marker = if is_winner { "*WINNER*" } else { "" };
3393            eprintln!(
3394                "  RESULT[{}]: ({},{}) rc={} ref_size={} estimate={} {}",
3395                i, kf, kb, rc, rs, es, marker
3396            );
3397        }
3398    }
3399
3400    // If no candidate was selected (best_pk is still (~0ull, ~0ull)), create MISSING key
3401    // This matches C++ AGC lines 1791-1799: fallback to (kmer, MISSING) or (MISSING, kmer)
3402    if best_key_front == u64::MAX && best_key_back == u64::MAX {
3403        if kmer_is_dir {
3404            // Dir-oriented: (kmer, MISSING) with rc=false
3405            if config.verbosity > 1 {
3406                #[cfg(feature = "verbose_debug")]
3407                eprintln!(
3408                    "RAGC_CASE3_NO_WINNER: kmer={} is_dir=true -> ({}, MISSING) rc=false",
3409                    kmer, kmer
3410                );
3411            }
3412            return (kmer, MISSING_KMER, false);
3413        } else {
3414            // NOT dir-oriented: (MISSING, kmer) with rc=true
3415            if config.verbosity > 1 {
3416                #[cfg(feature = "verbose_debug")]
3417                eprintln!(
3418                    "RAGC_CASE3_NO_WINNER: kmer={} is_dir=false -> (MISSING, {}) rc=true",
3419                    kmer, kmer
3420                );
3421            }
3422            return (MISSING_KMER, kmer, true);
3423        }
3424    }
3425
3426    if config.verbosity > 1 {
3427        #[cfg(feature = "verbose_debug")]
3428        eprintln!(
3429            "RAGC_CASE3_PICKED: kmer={} best=({},{}) rc={} estim_size={} segment_size={}",
3430            kmer, best_key_front, best_key_back, best_needs_rc, best_estim_size, segment_len
3431        );
3432    }
3433
3434    (best_key_front, best_key_back, best_needs_rc)
3435}
3436
3437/// Find candidate segment using fallback minimizers
3438/// Matches C++ AGC's find_cand_segment_using_fallback_minimizers (lines 1807-1958)
3439///
3440/// This function is called when Case 3 (one k-mer present) fails to find a good match.
3441/// It scans the segment for k-mers that pass the fallback filter, looks them up in
3442/// the fallback minimizers map, and finds candidate groups with shared k-mers.
3443///
3444/// # Arguments
3445/// * `segment_data` - The segment data to search
3446/// * `k` - K-mer length
3447/// * `min_shared_kmers` - Minimum number of shared k-mers to consider a candidate
3448/// * `fallback_filter` - Filter to select which k-mers to check
3449/// * `map_fallback_minimizers` - Map from k-mer to candidate group keys
3450/// * `map_segments` - Map from group key to group ID
3451/// * `segment_groups` - Buffer of segment groups
3452/// * `reference_segments` - Stored reference segments
3453/// * `config` - Compression configuration
3454///
3455/// # Returns
3456/// (key_front, key_back, should_reverse) if a candidate is found, or (MISSING, MISSING, false) if none
3457#[allow(clippy::too_many_arguments)]
3458fn find_cand_segment_using_fallback_minimizers(
3459    segment_data: &[u8],
3460    segment_data_rc: &[u8],
3461    k: usize,
3462    min_shared_kmers: u64,
3463    fallback_filter: &FallbackFilter,
3464    map_fallback_minimizers: &Arc<Mutex<BTreeMap<u64, Vec<(u64, u64)>>>>,
3465    map_segments: &Arc<RwLock<BTreeMap<SegmentGroupKey, u32>>>,
3466    segment_groups: &Arc<Mutex<BTreeMap<SegmentGroupKey, SegmentGroupBuffer>>>,
3467    reference_segments: &Arc<RwLock<BTreeMap<u32, Vec<u8>>>>,
3468    config: &StreamingQueueConfig,
3469) -> (u64, u64, bool) {
3470    use crate::segment::MISSING_KMER;
3471
3472    const MAX_NUM_TO_ESTIMATE: usize = 10;
3473    let short_segments = config.segment_size <= 10000;
3474    let segment_len = segment_data.len();
3475
3476    if !fallback_filter.is_enabled() {
3477        return (MISSING_KMER, MISSING_KMER, false);
3478    }
3479
3480    // Scan segment for k-mers and count candidates
3481    // Map from candidate group key to list of shared k-mers
3482    let mut cand_seg_counts: BTreeMap<(u64, u64), Vec<u64>> = BTreeMap::new(); // BTreeMap for determinism
3483
3484    // K-mer scanning state (matches C++ AGC CKmer behavior)
3485    let mut kmer_data: u64 = 0;
3486    let mut kmer_rc: u64 = 0;
3487    let mut kmer_len: usize = 0;
3488    let mask: u64 = (1u64 << (2 * k)) - 1;
3489
3490    // Scan segment for k-mers
3491    for &base in segment_data {
3492        if base > 3 {
3493            // Non-ACGT character - reset k-mer
3494            kmer_data = 0;
3495            kmer_rc = 0;
3496            kmer_len = 0;
3497            continue;
3498        }
3499
3500        // Add base to forward k-mer (shift left, add at LSB)
3501        kmer_data = ((kmer_data << 2) | (base as u64)) & mask;
3502
3503        // Add complement to reverse k-mer (shift right, add at MSB)
3504        let comp = 3 - base; // A<->T, C<->G
3505        kmer_rc = (kmer_rc >> 2) | ((comp as u64) << (2 * (k - 1)));
3506
3507        kmer_len += 1;
3508
3509        if kmer_len >= k {
3510            // Use canonical k-mer (smaller of forward and reverse)
3511            let canonical = kmer_data.min(kmer_rc);
3512            let is_dir_oriented = kmer_data <= kmer_rc;
3513
3514            // Check if k-mer passes fallback filter and is not symmetric
3515            if fallback_filter.passes(canonical) && kmer_data != kmer_rc {
3516                // Look up in fallback minimizers map
3517                let fb_map = map_fallback_minimizers.lock().unwrap();
3518                if let Some(candidates) = fb_map.get(&canonical) {
3519                    for &(key1, key2) in candidates {
3520                        // Skip MISSING keys
3521                        if key1 == MISSING_KMER || key2 == MISSING_KMER {
3522                            continue;
3523                        }
3524
3525                        // Normalize based on orientation
3526                        let cand_key = if !is_dir_oriented {
3527                            (key2, key1)
3528                        } else {
3529                            (key1, key2)
3530                        };
3531
3532                        cand_seg_counts
3533                            .entry(cand_key)
3534                            .or_insert_with(Vec::new)
3535                            .push(canonical);
3536                    }
3537                }
3538            }
3539        }
3540    }
3541
3542    // Prune candidates to those with >= min_shared_kmers unique k-mers
3543    let mut pruned_candidates: Vec<(u64, (u64, u64))> = Vec::new();
3544    for (key, mut kmers) in cand_seg_counts {
3545        kmers.sort_unstable();
3546        kmers.dedup();
3547        let unique_count = kmers.len() as u64;
3548        if unique_count >= min_shared_kmers {
3549            pruned_candidates.push((unique_count, key));
3550        }
3551    }
3552
3553    if pruned_candidates.is_empty() {
3554        if config.verbosity > 1 {
3555            #[cfg(feature = "verbose_debug")]
3556            eprintln!(
3557                "RAGC_FALLBACK_NO_CANDIDATES: min_shared={}",
3558                min_shared_kmers
3559            );
3560        }
3561        return (MISSING_KMER, MISSING_KMER, false);
3562    }
3563
3564    // Sort by count (descending) and take top MAX_NUM_TO_ESTIMATE
3565    pruned_candidates.sort_by(|a, b| b.0.cmp(&a.0));
3566    if pruned_candidates.len() > MAX_NUM_TO_ESTIMATE {
3567        pruned_candidates.truncate(MAX_NUM_TO_ESTIMATE);
3568    }
3569
3570    // Avoid trying poor candidates (less than half the best count)
3571    let best_count = pruned_candidates[0].0;
3572    pruned_candidates.retain(|c| c.0 * 2 >= best_count);
3573
3574    if config.verbosity > 1 {
3575        #[cfg(feature = "verbose_debug")]
3576        eprintln!(
3577            "RAGC_FALLBACK_CANDIDATES: count={} best_shared={} min_shared={}",
3578            pruned_candidates.len(),
3579            best_count,
3580            min_shared_kmers
3581        );
3582    }
3583
3584    // For short segments, use fast decision based on shared k-mer count
3585    if short_segments {
3586        let (count, (key_front, key_back)) = pruned_candidates[0];
3587        if config.verbosity > 1 {
3588            #[cfg(feature = "verbose_debug")]
3589            eprintln!(
3590                "RAGC_FALLBACK_SHORT_SEGMENT: key=({},{}) shared_kmers={}",
3591                key_front, key_back, count
3592            );
3593        }
3594        // Normalize: ensure front <= back
3595        if key_front <= key_back {
3596            return (key_front, key_back, false);
3597        } else {
3598            return (key_back, key_front, true);
3599        }
3600    }
3601
3602    // For longer segments, estimate compression cost for each candidate
3603    let mut best_key: Option<(u64, u64)> = None;
3604    let mut best_estimate: usize = segment_len;
3605    let mut _best_is_rc = false;
3606
3607    {
3608        let groups = segment_groups.lock().unwrap();
3609        let seg_map = map_segments.read().unwrap();
3610        let ref_segs = reference_segments.read().unwrap();
3611
3612        for &(_count, (key_front, key_back)) in &pruned_candidates {
3613            // Normalize key
3614            let (norm_front, norm_back, is_seg_rc) = if key_front <= key_back {
3615                (key_front, key_back, false)
3616            } else {
3617                (key_back, key_front, true)
3618            };
3619
3620            let cand_key = SegmentGroupKey {
3621                kmer_front: norm_front,
3622                kmer_back: norm_back,
3623            };
3624
3625            // Get reference segment for this candidate
3626            let ref_data_opt: Option<&[u8]> = if let Some(group_buffer) = groups.get(&cand_key) {
3627                group_buffer
3628                    .reference_segment
3629                    .as_ref()
3630                    .map(|seg| seg.data.as_slice())
3631            } else if let Some(&group_id) = seg_map.get(&cand_key) {
3632                ref_segs.get(&group_id).map(|data| data.as_slice())
3633            } else {
3634                None
3635            };
3636
3637            if let Some(ref_data) = ref_data_opt {
3638                let target_data = if is_seg_rc {
3639                    segment_data_rc
3640                } else {
3641                    segment_data
3642                };
3643
3644                // Estimate compression cost
3645                #[cfg(feature = "cpp_agc")]
3646                let estimate = crate::ragc_ffi::lzdiff_v2_estimate(
3647                    ref_data,
3648                    target_data,
3649                    config.min_match_len as u32,
3650                    best_estimate as u32,
3651                ) as usize;
3652
3653                #[cfg(not(feature = "cpp_agc"))]
3654                let estimate = {
3655                    let mut lz = LZDiff::new(config.min_match_len as u32);
3656                    lz.prepare(&ref_data.to_vec());
3657                    // Use estimate() which matches C++ CLZDiff_V2::Estimate exactly
3658                    lz.estimate(&target_data.to_vec(), best_estimate as u32) as usize
3659                };
3660
3661                if config.verbosity > 2 {
3662                    #[cfg(feature = "verbose_debug")]
3663                    eprintln!(
3664                        "RAGC_FALLBACK_ESTIMATE: key=({},{}) rc={} estimate={}",
3665                        norm_front, norm_back, is_seg_rc, estimate
3666                    );
3667                }
3668
3669                // Track best (lowest estimate)
3670                if estimate > 0 && estimate < best_estimate {
3671                    best_estimate = estimate;
3672                    best_key = Some((norm_front, norm_back));
3673                    _best_is_rc = is_seg_rc;
3674                }
3675            }
3676        }
3677    }
3678
3679    // In adaptive mode, check if result is worth using
3680    if config.adaptive_mode {
3681        let threshold = if short_segments {
3682            (segment_len as f64 * 0.9) as usize
3683        } else {
3684            (segment_len as f64 * 0.2) as usize
3685        };
3686
3687        if best_estimate >= threshold {
3688            if config.verbosity > 1 {
3689                #[cfg(feature = "verbose_debug")]
3690                eprintln!(
3691                    "RAGC_FALLBACK_ADAPTIVE_REJECT: estimate={} threshold={}",
3692                    best_estimate, threshold
3693                );
3694            }
3695            return (MISSING_KMER, MISSING_KMER, false);
3696        }
3697    }
3698
3699    match best_key {
3700        Some((front, back)) => {
3701            // Normalize: ensure front <= back
3702            if front <= back {
3703                if config.verbosity > 1 {
3704                    #[cfg(feature = "verbose_debug")]
3705                    eprintln!(
3706                        "RAGC_FALLBACK_PICKED: key=({},{}) rc=false estimate={}",
3707                        front, back, best_estimate
3708                    );
3709                }
3710                (front, back, false)
3711            } else {
3712                if config.verbosity > 1 {
3713                    #[cfg(feature = "verbose_debug")]
3714                    eprintln!(
3715                        "RAGC_FALLBACK_PICKED: key=({},{}) rc=true estimate={}",
3716                        back, front, best_estimate
3717                    );
3718                }
3719                (back, front, true)
3720            }
3721        }
3722        None => {
3723            if config.verbosity > 1 {
3724                #[cfg(feature = "verbose_debug")]
3725                eprintln!("RAGC_FALLBACK_NO_WINNER: no candidate beat threshold");
3726            }
3727            (MISSING_KMER, MISSING_KMER, false)
3728        }
3729    }
3730}
3731
3732/// Add fallback mapping for a segment's k-mers
3733/// Matches C++ AGC's add_fallback_mapping (lines 1961-1989)
3734///
3735/// Called when a segment is assigned to a group to populate the fallback minimizers map.
3736fn add_fallback_mapping(
3737    segment_data: &[u8],
3738    k: usize,
3739    splitter1: u64,
3740    splitter2: u64,
3741    fallback_filter: &FallbackFilter,
3742    map_fallback_minimizers: &Arc<Mutex<BTreeMap<u64, Vec<(u64, u64)>>>>,
3743) {
3744    use crate::segment::MISSING_KMER;
3745
3746    if !fallback_filter.is_enabled() {
3747        return;
3748    }
3749
3750    // Skip if splitters are MISSING
3751    if splitter1 == MISSING_KMER || splitter2 == MISSING_KMER {
3752        return;
3753    }
3754
3755    let splitter_dir = (splitter1, splitter2);
3756    let splitter_rev = (splitter2, splitter1);
3757    let mask: u64 = (1u64 << (2 * k)) - 1;
3758
3759    // K-mer scanning state
3760    let mut kmer_data: u64 = 0;
3761    let mut kmer_rc: u64 = 0;
3762    let mut kmer_len: usize = 0;
3763
3764    let mut fb_map = map_fallback_minimizers.lock().unwrap();
3765
3766    for &base in segment_data {
3767        if base > 3 {
3768            kmer_data = 0;
3769            kmer_rc = 0;
3770            kmer_len = 0;
3771            continue;
3772        }
3773
3774        kmer_data = ((kmer_data << 2) | (base as u64)) & mask;
3775        let comp = 3 - base;
3776        kmer_rc = (kmer_rc >> 2) | ((comp as u64) << (2 * (k - 1)));
3777        kmer_len += 1;
3778
3779        if kmer_len >= k {
3780            let canonical = kmer_data.min(kmer_rc);
3781            let is_dir_oriented = kmer_data <= kmer_rc;
3782
3783            // Check filter and skip symmetric k-mers
3784            if fallback_filter.passes(canonical) && kmer_data != kmer_rc {
3785                let to_add = if is_dir_oriented {
3786                    splitter_dir
3787                } else {
3788                    splitter_rev
3789                };
3790                let entry = fb_map.entry(canonical).or_insert_with(Vec::new);
3791
3792                // Only add if not already present
3793                if !entry.contains(&to_add) {
3794                    entry.push(to_add);
3795                }
3796            }
3797        }
3798    }
3799}
3800
3801// =============================================================================
3802// Parallel batch processing functions (C++ AGC 4-phase pattern)
3803// =============================================================================
3804
3805/// Phase 2: Prepare batch for parallel processing
3806/// - Processes NEW segments from buffered_seg_part (assigns group_ids)
3807/// - Drains segments from buffered_seg_part into SegmentGroupBuffer entries
3808/// - Extracts buffers that need flushing into ParallelFlushState
3809/// Returns true if there are buffers to flush, false otherwise
3810#[allow(clippy::too_many_arguments)]
3811fn prepare_batch_parallel(
3812    segment_groups: &Arc<Mutex<BTreeMap<SegmentGroupKey, SegmentGroupBuffer>>>,
3813    buffered_seg_part: &Arc<BufferedSegPart>,
3814    batch_local_groups: &Arc<Mutex<BTreeMap<SegmentGroupKey, u32>>>,
3815    batch_local_terminators: &Arc<Mutex<BTreeMap<u64, Vec<u64>>>>,
3816    map_segments: &Arc<RwLock<BTreeMap<SegmentGroupKey, u32>>>,
3817    map_segments_terminators: &Arc<RwLock<BTreeMap<u64, Vec<u64>>>>,
3818    group_counter: &Arc<AtomicU32>,
3819    raw_group_counter: &Arc<AtomicU32>,
3820    archive: &Arc<Mutex<Archive>>,
3821    collection: &Arc<Mutex<CollectionV3>>,
3822    reference_segments: &Arc<RwLock<BTreeMap<u32, Vec<u8>>>>,
3823    reference_orientations: &Arc<RwLock<BTreeMap<u32, bool>>>,
3824    #[cfg(feature = "cpp_agc")] grouping_engine: &Arc<Mutex<crate::ragc_ffi::GroupingEngine>>,
3825    parallel_state: &ParallelFlushState,
3826    config: &StreamingQueueConfig,
3827) -> Result<bool> {
3828    use crate::segment::MISSING_KMER;
3829
3830    // Check if there's anything to process
3831    let batch_map_len = batch_local_groups.lock().unwrap().len();
3832    let batch_terms_len = batch_local_terminators.lock().unwrap().len();
3833    let has_buffered_segments = buffered_seg_part.has_segments();
3834
3835    if batch_map_len == 0 && batch_terms_len == 0 && !has_buffered_segments {
3836        return Ok(false);
3837    }
3838
3839    if config.verbosity > 0 {
3840        eprintln!("PREPARE_BATCH_PARALLEL: Processing {} batch-local groups, buffered segments: {}, {} terminator keys",
3841            batch_map_len, has_buffered_segments, batch_terms_len);
3842    }
3843
3844    // Phase 2a: Process NEW segments - assign group_ids deterministically
3845    // Get current group counter and update after process_new
3846    let mut next_group_id = group_counter.load(Ordering::SeqCst);
3847    {
3848        let mut global_map = map_segments.write().unwrap();
3849        let mut ref_seg = reference_segments.write().unwrap();
3850        let mut term_map = map_segments_terminators.write().unwrap();
3851        let new_count = buffered_seg_part.process_new(
3852            &mut global_map,
3853            &mut next_group_id,
3854            &mut ref_seg,
3855            &mut term_map,
3856        );
3857        if config.verbosity > 0 && new_count > 0 {
3858            eprintln!(
3859                "PREPARE_BATCH_PARALLEL: Assigned {} new group IDs",
3860                new_count
3861            );
3862        }
3863    }
3864    // Update the shared counter
3865    group_counter.store(next_group_id, Ordering::SeqCst);
3866
3867    // Phase 2b: Sort segments within each group for determinism
3868    buffered_seg_part.sort_known();
3869
3870    // Phase 2c: Drain segments from buffered_seg_part into SegmentGroupBuffer entries
3871    let mut groups_map = segment_groups.lock().unwrap();
3872
3873    // Build reverse lookup map (group_id -> key) ONCE to avoid O(n²) lookups
3874    let group_id_to_key: std::collections::HashMap<u32, SegmentGroupKey> = {
3875        let global_map = map_segments.read().unwrap();
3876        global_map
3877            .iter()
3878            .map(|(k, &gid)| (gid, k.clone()))
3879            .collect()
3880    };
3881
3882    // Phase 2c-1: Collect all segments with their keys (no locks needed)
3883    // FIX 18: For raw groups (0-15), use unique keys (raw_group_id, MISSING) instead of (MISSING, MISSING)
3884    // This ensures each raw group has its own buffer, matching C++ AGC's distribute_segments() behavior
3885    let num_groups = buffered_seg_part.num_groups();
3886    let mut collected_segments: Vec<(u32, SegmentGroupKey, BufferedSegment)> = Vec::new();
3887    for group_id in 0..num_groups as u32 {
3888        while let Some(seg) = buffered_seg_part.get_part(group_id) {
3889            // FIX 18: Raw groups (0-15) need unique keys to have separate buffers
3890            // The global map has (MISSING, MISSING) -> 0, but we need each raw group
3891            // to have its own buffer for proper distribution (matching C++ AGC)
3892            let key = if group_id < NO_RAW_GROUPS {
3893                // Raw group: use unique key (group_id, MISSING) to distinguish buffers
3894                SegmentGroupKey {
3895                    kmer_front: group_id as u64,
3896                    kmer_back: MISSING_KMER,
3897                }
3898            } else {
3899                // LZ group: use the actual key from the map
3900                group_id_to_key.get(&group_id).cloned().unwrap_or_else(|| {
3901                    // Fallback - shouldn't happen for LZ groups
3902                    SegmentGroupKey {
3903                        kmer_front: MISSING_KMER,
3904                        kmer_back: MISSING_KMER,
3905                    }
3906                })
3907            };
3908            collected_segments.push((group_id, key, seg));
3909        }
3910    }
3911
3912    // Phase 2c-2: Batch update batch_local_groups (ONE lock acquisition)
3913    {
3914        let mut batch_map = batch_local_groups.lock().unwrap();
3915        for (group_id, key, _) in &collected_segments {
3916            batch_map.insert(key.clone(), *group_id);
3917        }
3918    }
3919
3920    // Phase 2c-3: Register with FFI engine (ONE lock acquisition)
3921    #[cfg(feature = "cpp_agc")]
3922    {
3923        let mut eng = grouping_engine.lock().unwrap();
3924        for (group_id, key, _) in &collected_segments {
3925            if key.kmer_front != MISSING_KMER && key.kmer_back != MISSING_KMER {
3926                eng.register_group(key.kmer_front, key.kmer_back, *group_id);
3927            }
3928        }
3929    }
3930
3931    // Phase 2c-4: Batch update terminators (ONE lock acquisition)
3932    {
3933        let mut term_map = batch_local_terminators.lock().unwrap();
3934        for (_, key, _) in &collected_segments {
3935            if key.kmer_front != MISSING_KMER && key.kmer_back != MISSING_KMER {
3936                term_map
3937                    .entry(key.kmer_front)
3938                    .or_insert_with(Vec::new)
3939                    .push(key.kmer_back);
3940                if key.kmer_front != key.kmer_back {
3941                    term_map
3942                        .entry(key.kmer_back)
3943                        .or_insert_with(Vec::new)
3944                        .push(key.kmer_front);
3945                }
3946            }
3947        }
3948    }
3949
3950    // Phase 2c-5: Pre-register all streams for new groups (ONE lock acquisition)
3951    // Build a set of existing group_ids first for O(1) lookup
3952    let existing_group_ids: std::collections::HashSet<u32> =
3953        groups_map.values().map(|b| b.group_id).collect();
3954
3955    // Collect unique new group_ids (O(n) instead of O(n×m))
3956    // DETERMINISM FIX: Use BTreeSet instead of HashSet to ensure deterministic iteration order
3957    let new_group_ids: std::collections::BTreeSet<u32> = collected_segments
3958        .iter()
3959        .map(|(gid, _, _)| *gid)
3960        .filter(|gid| !existing_group_ids.contains(gid))
3961        .collect();
3962
3963    // Pre-register all streams in one lock acquisition
3964    // BTreeSet iteration is sorted, so stream registration order is deterministic
3965    let stream_registrations: std::collections::HashMap<u32, (usize, usize)> = if !new_group_ids
3966        .is_empty()
3967    {
3968        let archive_version = ragc_common::AGC_FILE_MAJOR * 1000 + ragc_common::AGC_FILE_MINOR;
3969        let mut arch = archive.lock().unwrap();
3970        new_group_ids
3971            .iter()
3972            .map(|&group_id| {
3973                let delta_stream_name = ragc_common::stream_delta_name(archive_version, group_id);
3974                let ref_stream_name = ragc_common::stream_ref_name(archive_version, group_id);
3975                let stream_id = arch.register_stream(&delta_stream_name);
3976                let ref_stream_id = arch.register_stream(&ref_stream_name);
3977                (group_id, (stream_id, ref_stream_id))
3978            })
3979            .collect()
3980    } else {
3981        std::collections::HashMap::new()
3982    };
3983
3984    // Phase 2c-6: Add segments to buffers (groups_map already locked)
3985    for (group_id, key, seg) in collected_segments {
3986        let buffer = groups_map.entry(key.clone()).or_insert_with(|| {
3987            let (stream_id, ref_stream_id) = stream_registrations
3988                .get(&group_id)
3989                .copied()
3990                .unwrap_or_else(|| {
3991                    // Fallback: register now (shouldn't happen if logic is correct)
3992                    let archive_version =
3993                        ragc_common::AGC_FILE_MAJOR * 1000 + ragc_common::AGC_FILE_MINOR;
3994                    let delta_stream_name =
3995                        ragc_common::stream_delta_name(archive_version, group_id);
3996                    let ref_stream_name = ragc_common::stream_ref_name(archive_version, group_id);
3997                    let mut arch = archive.lock().unwrap();
3998                    let sid = arch.register_stream(&delta_stream_name);
3999                    let rsid = arch.register_stream(&ref_stream_name);
4000                    (sid, rsid)
4001                });
4002            SegmentGroupBuffer::new(group_id, stream_id, ref_stream_id)
4003        });
4004        buffer.segments.push(seg);
4005    }
4006
4007    // Clear the buffered_seg_part after draining
4008    buffered_seg_part.clear();
4009
4010    // Extract buffers that need flushing
4011    let mut extracted: Vec<(SegmentGroupKey, SegmentGroupBuffer)> = Vec::new();
4012    let keys_to_remove: Vec<SegmentGroupKey> = groups_map
4013        .iter()
4014        .filter(|(_, buffer)| !buffer.segments.is_empty() || !buffer.ref_written)
4015        .map(|(k, _)| k.clone())
4016        .collect();
4017
4018    // Sort keys for deterministic processing order
4019    let mut sorted_keys = keys_to_remove;
4020    sorted_keys.sort();
4021
4022    for key in sorted_keys {
4023        if let Some(buffer) = groups_map.remove(&key) {
4024            extracted.push((key, buffer));
4025        }
4026    }
4027
4028    let has_work = !extracted.is_empty();
4029
4030    if config.verbosity > 0 {
4031        eprintln!(
4032            "PREPARE_BATCH_PARALLEL: Extracted {} buffers for parallel flush",
4033            extracted.len()
4034        );
4035    }
4036
4037    // Populate ParallelFlushState
4038    parallel_state.prepare(extracted);
4039
4040    Ok(has_work)
4041}
4042
4043/// Phase 4: Cleanup after parallel processing
4044/// - Re-inserts processed buffers
4045/// - Updates global maps
4046/// - Clears batch-local state
4047fn cleanup_batch_parallel(
4048    segment_groups: &Arc<Mutex<BTreeMap<SegmentGroupKey, SegmentGroupBuffer>>>,
4049    batch_local_groups: &Arc<Mutex<BTreeMap<SegmentGroupKey, u32>>>,
4050    batch_local_terminators: &Arc<Mutex<BTreeMap<u64, Vec<u64>>>>,
4051    map_segments: &Arc<RwLock<BTreeMap<SegmentGroupKey, u32>>>,
4052    map_segments_terminators: &Arc<RwLock<BTreeMap<u64, Vec<u64>>>>,
4053    parallel_state: &ParallelFlushState,
4054    config: &StreamingQueueConfig,
4055) {
4056    // Re-insert processed buffers
4057    let processed = parallel_state.drain_buffers();
4058    {
4059        let mut groups_map = segment_groups.lock().unwrap();
4060        for (key, buffer) in processed {
4061            groups_map.insert(key, buffer);
4062        }
4063    }
4064
4065    // Update global registry with batch-local groups
4066    {
4067        let batch_map = batch_local_groups.lock().unwrap();
4068        let mut global_map = map_segments.write().unwrap();
4069        for (key, group_id) in batch_map.iter() {
4070            global_map.entry(key.clone()).or_insert(*group_id);
4071        }
4072    }
4073
4074    // Merge batch-local terminators into global terminators
4075    {
4076        let batch_terms = batch_local_terminators.lock().unwrap();
4077        let mut global_terms = map_segments_terminators.write().unwrap();
4078        for (kmer, connections) in batch_terms.iter() {
4079            let entry = global_terms.entry(*kmer).or_insert_with(Vec::new);
4080            entry.extend(connections.iter().cloned());
4081            entry.sort_unstable();
4082            entry.dedup();
4083        }
4084    }
4085
4086    // Clear batch-local state
4087    batch_local_groups.lock().unwrap().clear();
4088    batch_local_terminators.lock().unwrap().clear();
4089
4090    if config.verbosity > 0 {
4091        eprintln!("CLEANUP_BATCH_PARALLEL: Batch cleanup complete");
4092    }
4093}
4094
4095/// Classify raw segments at barrier (Thread 0 only)
4096/// This eliminates lock contention by doing all classification single-threaded.
4097/// Raw segments are sorted for determinism, then classified using the same
4098/// Case 2/3a/3b logic as before, just without contention.
4099/// Includes fallback minimizer support to match C++ AGC grouping quality.
4100fn classify_raw_segments_at_barrier(
4101    raw_segment_buffers: &Arc<Vec<Mutex<Vec<RawBufferedSegment>>>>,
4102    buffered_seg_part: &Arc<BufferedSegPart>,
4103    map_segments: &Arc<RwLock<BTreeMap<SegmentGroupKey, u32>>>,
4104    map_segments_terminators: &Arc<RwLock<BTreeMap<u64, Vec<u64>>>>,
4105    segment_groups: &Arc<Mutex<BTreeMap<SegmentGroupKey, SegmentGroupBuffer>>>,
4106    reference_segments: &Arc<RwLock<BTreeMap<u32, Vec<u8>>>>,
4107    fallback_filter: &FallbackFilter,
4108    map_fallback_minimizers: &Arc<Mutex<BTreeMap<u64, Vec<(u64, u64)>>>>,
4109    group_counter: &Arc<AtomicU32>,
4110    raw_group_counter: &Arc<AtomicU32>, // FIX 18: For round-robin distribution of orphan segments
4111    config: &StreamingQueueConfig,
4112) {
4113    use crate::segment::MISSING_KMER;
4114
4115    // Drain all raw segments from ALL per-worker buffers into one Vec
4116    let mut raw_segs: Vec<RawBufferedSegment> = Vec::new();
4117    for buffer in raw_segment_buffers.iter() {
4118        let mut worker_segs = buffer.lock().unwrap();
4119        raw_segs.append(&mut *worker_segs);
4120    }
4121
4122    if raw_segs.is_empty() {
4123        return;
4124    }
4125
4126    // Sort for determinism: by sample_name, contig_name, original_place
4127    raw_segs.sort();
4128
4129    // Group segments by (sample, contig) for parallel processing
4130    // Each contig's segments will be processed sequentially for determinism,
4131    // but different contigs can be processed in parallel
4132    use std::collections::BTreeMap;
4133    let mut contig_groups: BTreeMap<(String, String), Vec<RawBufferedSegment>> = BTreeMap::new();
4134    for raw_seg in raw_segs.drain(..) {
4135        let key = (raw_seg.sample_name.clone(), raw_seg.contig_name.clone());
4136        contig_groups.entry(key).or_default().push(raw_seg);
4137    }
4138
4139    // Sort segments within each contig by original_place (should already be sorted, but ensure)
4140    for segs in contig_groups.values_mut() {
4141        segs.sort_by_key(|s| s.original_place);
4142    }
4143
4144    let num_contigs = contig_groups.len();
4145    let total_segments: usize = contig_groups.values().map(|v| v.len()).sum();
4146
4147    if config.verbosity > 0 {
4148        eprintln!(
4149            "CLASSIFY_RAW_BARRIER: Processing {} raw segments across {} contigs (parallel)",
4150            total_segments, num_contigs
4151        );
4152    }
4153
4154    // DETERMINISM FIX: Process contigs SEQUENTIALLY to ensure deterministic group creation order.
4155    // Parallelism caused non-deterministic group IDs because different threads created groups
4156    // in unpredictable order, affecting which segments could split into which groups.
4157    // The compression phase is still parallel - only classification needs to be sequential.
4158    let contig_vec: Vec<_> = contig_groups.into_iter().collect();
4159
4160    for ((sample_name, contig_name), contig_segs) in contig_vec.into_iter() {
4161        // Track seg_part_no for this contig (local to this parallel task)
4162        let mut seg_part_no: usize = 0;
4163
4164        for raw_seg in contig_segs {
4165            // Use local seg_part_no for this contig
4166            let output_seg_part_no = seg_part_no;
4167            // Case 2/3a/3b classification (same logic as before)
4168            let (key_front, key_back, should_reverse) =
4169                if raw_seg.front_kmer != MISSING_KMER && raw_seg.back_kmer != MISSING_KMER {
4170                    // Case 2: Both k-mers present
4171                    if raw_seg.front_kmer < raw_seg.back_kmer {
4172                        (raw_seg.front_kmer, raw_seg.back_kmer, false)
4173                    } else {
4174                        (raw_seg.back_kmer, raw_seg.front_kmer, true)
4175                    }
4176                } else if raw_seg.front_kmer != MISSING_KMER {
4177                    // Case 3a: Only front k-mer present
4178                    let (mut kf, mut kb, mut sr) = find_group_with_one_kmer(
4179                        raw_seg.front_kmer,
4180                        raw_seg.front_kmer_is_dir,
4181                        &raw_seg.data,
4182                        &raw_seg.data_rc,
4183                        map_segments_terminators,
4184                        map_segments,
4185                        segment_groups,
4186                        reference_segments,
4187                        config,
4188                    );
4189                    // Fallback: If Case 3a returned MISSING, try fallback minimizers
4190                    if (kf == MISSING_KMER || kb == MISSING_KMER) && fallback_filter.is_enabled() {
4191                        let (fb_kf, fb_kb, fb_sr) = find_cand_segment_using_fallback_minimizers(
4192                            &raw_seg.data,
4193                            &raw_seg.data_rc,
4194                            config.k,
4195                            5, // min_shared_kmers = 5 for Case 3
4196                            fallback_filter,
4197                            map_fallback_minimizers,
4198                            map_segments,
4199                            segment_groups,
4200                            reference_segments,
4201                            config,
4202                        );
4203                        if fb_kf != MISSING_KMER && fb_kb != MISSING_KMER {
4204                            kf = fb_kf;
4205                            kb = fb_kb;
4206                            sr = fb_sr;
4207                        }
4208                    }
4209                    (kf, kb, sr)
4210                } else if raw_seg.back_kmer != MISSING_KMER {
4211                    // Case 3b: Only back k-mer present
4212                    let kmer_is_dir_after_swap = !raw_seg.back_kmer_is_dir;
4213                    let (mut kf, mut kb, mut sr) = find_group_with_one_kmer(
4214                        raw_seg.back_kmer,
4215                        kmer_is_dir_after_swap,
4216                        &raw_seg.data_rc,
4217                        &raw_seg.data,
4218                        map_segments_terminators,
4219                        map_segments,
4220                        segment_groups,
4221                        reference_segments,
4222                        config,
4223                    );
4224                    sr = !sr;
4225                    // Fallback: If Case 3b returned MISSING, try fallback minimizers
4226                    // Note: C++ AGC uses segment_rc for fallback in Case 3b
4227                    if (kf == MISSING_KMER || kb == MISSING_KMER) && fallback_filter.is_enabled() {
4228                        let (fb_kf, fb_kb, fb_sr) = find_cand_segment_using_fallback_minimizers(
4229                            &raw_seg.data_rc, // Use RC for Case 3b (matches C++ AGC)
4230                            &raw_seg.data,
4231                            config.k,
4232                            5, // min_shared_kmers = 5 for Case 3
4233                            fallback_filter,
4234                            map_fallback_minimizers,
4235                            map_segments,
4236                            segment_groups,
4237                            reference_segments,
4238                            config,
4239                        );
4240                        if fb_kf != MISSING_KMER && fb_kb != MISSING_KMER {
4241                            kf = fb_kf;
4242                            kb = fb_kb;
4243                            sr = !fb_sr; // C++ AGC: store_rc = !store_dir_alt
4244                        }
4245                    }
4246                    (kf, kb, sr)
4247                } else {
4248                    // Case 1: Both MISSING - try fallback minimizers
4249                    let mut kf = MISSING_KMER;
4250                    let mut kb = MISSING_KMER;
4251                    let mut sr = false;
4252
4253                    if fallback_filter.is_enabled() {
4254                        let (fb_kf, fb_kb, fb_sr) = find_cand_segment_using_fallback_minimizers(
4255                            &raw_seg.data,
4256                            &raw_seg.data_rc,
4257                            config.k,
4258                            1, // min_shared_kmers = 1 for Case 1 (matches C++ AGC)
4259                            fallback_filter,
4260                            map_fallback_minimizers,
4261                            map_segments,
4262                            segment_groups,
4263                            reference_segments,
4264                            config,
4265                        );
4266                        if fb_kf != MISSING_KMER && fb_kb != MISSING_KMER {
4267                            kf = fb_kf;
4268                            kb = fb_kb;
4269                            sr = fb_sr;
4270                        }
4271                    }
4272                    (kf, kb, sr)
4273                };
4274
4275            let key = SegmentGroupKey {
4276                kmer_front: key_front,
4277                kmer_back: key_back,
4278            };
4279
4280            // Prepare segment data (reverse complement if needed)
4281            let segment_data = if should_reverse {
4282                raw_seg.data_rc.clone()
4283            } else {
4284                raw_seg.data.clone()
4285            };
4286
4287            // Add fallback mapping for this segment (matches C++ AGC add_fallback_mapping)
4288            // This populates the fallback minimizers map for use by later segments
4289            add_fallback_mapping(
4290                &segment_data,
4291                config.k,
4292                key.kmer_front,
4293                key.kmer_back,
4294                fallback_filter,
4295                map_fallback_minimizers,
4296            );
4297
4298            // Check if group exists (NO CONTENTION - we're single-threaded)
4299            let group_id_opt = {
4300                let seg_map = map_segments.read().unwrap();
4301                seg_map.get(&key).copied()
4302            };
4303
4304            if let Some(group_id) = group_id_opt {
4305                // KNOWN: add to per-group buffer
4306                // FIX 18: For orphan segments (key = MISSING, MISSING), use round-robin across groups 0-15
4307                // instead of always using group 0. This matches C++ AGC's distribute_segments() behavior.
4308                let actual_group_id =
4309                    if key.kmer_front == MISSING_KMER && key.kmer_back == MISSING_KMER {
4310                        // Orphan segment - distribute across raw groups 0-15 via round-robin
4311                        raw_group_counter.fetch_add(1, std::sync::atomic::Ordering::SeqCst)
4312                            % NO_RAW_GROUPS
4313                    } else {
4314                        group_id
4315                    };
4316                buffered_seg_part.add_known(
4317                    actual_group_id,
4318                    BufferedSegment {
4319                        sample_name: raw_seg.sample_name.clone(),
4320                        contig_name: raw_seg.contig_name.clone(),
4321                        seg_part_no: output_seg_part_no,
4322                        data: segment_data,
4323                        is_rev_comp: should_reverse,
4324                        sample_priority: raw_seg.sample_priority,
4325                    },
4326                );
4327                // Increment counter by 1 for non-split segment
4328                seg_part_no += 1;
4329            } else {
4330                // NEW: Try segment splitting before adding as new group
4331                // C++ AGC only attempts splits when key doesn't exist and both k-mers valid
4332                if config.verbosity > 0 {
4333                    eprintln!(
4334                        "BARRIER_NEW_SEGMENT: key=({},{}) sample={}",
4335                        key_front, key_back, raw_seg.sample_name
4336                    );
4337                }
4338                let mut was_split = false;
4339
4340                // Skip barrier splitting if disabled (for C++ AGC parity testing)
4341                let try_split = !crate::env_cache::disable_barrier_split()
4342                    && key_front != MISSING_KMER
4343                    && key_back != MISSING_KMER
4344                    && key_front != key_back;
4345
4346                if try_split {
4347                    // Try to find a middle splitter
4348                    let middle_kmer_opt = {
4349                        let terminators = map_segments_terminators.read().unwrap();
4350                        let front_conn = terminators.get(&key_front).map(|v| v.len()).unwrap_or(0);
4351                        let back_conn = terminators.get(&key_back).map(|v| v.len()).unwrap_or(0);
4352                        if config.verbosity > 0 {
4353                            // Always print the first few, then only when connections exist
4354                            eprintln!("BARRIER_SPLIT_TRY: key=({},{}) term_size={} front_conn={} back_conn={} sample={}",
4355                            key_front, key_back, terminators.len(), front_conn, back_conn, raw_seg.sample_name);
4356                        }
4357                        find_middle_splitter(key_front, key_back, &terminators)
4358                    };
4359
4360                    if let Some(middle_kmer) = middle_kmer_opt {
4361                        if config.verbosity > 0 {
4362                            eprintln!(
4363                                "BARRIER_SPLIT_FOUND_MIDDLE: key=({},{}) middle={}",
4364                                key_front, key_back, middle_kmer
4365                            );
4366                        }
4367                        // Found potential middle k-mer - check if both target groups exist
4368                        let left_key = if key_front <= middle_kmer {
4369                            SegmentGroupKey {
4370                                kmer_front: key_front,
4371                                kmer_back: middle_kmer,
4372                            }
4373                        } else {
4374                            SegmentGroupKey {
4375                                kmer_front: middle_kmer,
4376                                kmer_back: key_front,
4377                            }
4378                        };
4379                        let right_key = if middle_kmer <= key_back {
4380                            SegmentGroupKey {
4381                                kmer_front: middle_kmer,
4382                                kmer_back: key_back,
4383                            }
4384                        } else {
4385                            SegmentGroupKey {
4386                                kmer_front: key_back,
4387                                kmer_back: middle_kmer,
4388                            }
4389                        };
4390
4391                        let (left_group_id, right_group_id) = {
4392                            let seg_map = map_segments.read().unwrap();
4393                            (
4394                                seg_map.get(&left_key).copied(),
4395                                seg_map.get(&right_key).copied(),
4396                            )
4397                        };
4398
4399                        // Only split if BOTH target groups already exist
4400                        if left_group_id.is_none() || right_group_id.is_none() {
4401                            if config.verbosity > 0 {
4402                                eprintln!("BARRIER_SPLIT_MISSING_GROUP: left={:?} right={:?} left_key=({},{}) right_key=({},{})",
4403                                left_group_id, right_group_id, left_key.kmer_front, left_key.kmer_back, right_key.kmer_front, right_key.kmer_back);
4404                            }
4405                        }
4406                        if let (Some(left_gid), Some(right_gid)) = (left_group_id, right_group_id) {
4407                            // Get reference segment data for BOTH left and right
4408                            let (left_ref_data, right_ref_data) = {
4409                                let refs = reference_segments.read().unwrap();
4410                                let left = refs.get(&left_gid).cloned().unwrap_or_default();
4411                                let right = refs.get(&right_gid).cloned().unwrap_or_default();
4412                                (left, right)
4413                            };
4414
4415                            if config.verbosity > 1 {
4416                                eprintln!("BARRIER_SPLIT_GROUPS_EXIST: left_gid={} right_gid={} left_ref_len={} right_ref_len={} seg_len={}",
4417                                left_gid, right_gid, left_ref_data.len(), right_ref_data.len(), raw_seg.data.len());
4418                            }
4419
4420                            // Find split decision using cost-based optimization
4421                            // This matches C++ AGC's find_cand_segment_with_missing_middle_splitter
4422                            // which computes LZ encoding cost at every position and decides:
4423                            // - AssignToLeft: entire segment goes to (front, middle) group
4424                            // - AssignToRight: entire segment goes to (middle, back) group
4425                            // - SplitAt(pos): actually split the segment
4426                            //
4427                            // C++ AGC line 1393: passes (kmer1, kmer2) after normalization swap,
4428                            // and swaps segment_dir/segment_rc based on use_rc flag.
4429                            // When should_reverse=true (i.e., original front > back), we need to
4430                            // swap the segments to match C++ AGC's behavior.
4431                            let (seg_dir, seg_rc) = if should_reverse {
4432                                (&raw_seg.data_rc, &raw_seg.data) // Swap when use_rc=true
4433                            } else {
4434                                (&raw_seg.data, &raw_seg.data_rc)
4435                            };
4436                            let split_decision = find_split_by_cost(
4437                                seg_dir,
4438                                seg_rc,
4439                                &left_ref_data,
4440                                &right_ref_data,
4441                                key_front, // normalized k-mers (key_front < key_back)
4442                                key_back,
4443                                middle_kmer,
4444                                config.k,
4445                                config.min_match_len as u32,
4446                            );
4447
4448                            match split_decision {
4449                                SplitDecision::SplitAt(split_pos) => {
4450                                    // Actually split the segment into two parts
4451                                    let (left_data, right_data) = split_segment_at_position(
4452                                        &segment_data,
4453                                        split_pos,
4454                                        config.k,
4455                                    );
4456                                    let left_len = left_data.len();
4457                                    let right_len = right_data.len();
4458
4459                                    // FIX 27 v4: Compute orientations using ORIGINAL k-mers and should_reverse
4460                                    let (left_should_reverse, right_should_reverse) =
4461                                        if should_reverse {
4462                                            let left_rc = middle_kmer >= raw_seg.back_kmer;
4463                                            let right_rc = raw_seg.front_kmer >= middle_kmer;
4464                                            (left_rc, right_rc)
4465                                        } else {
4466                                            let left_rc = raw_seg.front_kmer >= middle_kmer;
4467                                            let right_rc = middle_kmer >= raw_seg.back_kmer;
4468                                            (left_rc, right_rc)
4469                                        };
4470
4471                                    let left_final = if left_should_reverse != should_reverse {
4472                                        reverse_complement_sequence(&left_data)
4473                                    } else {
4474                                        left_data
4475                                    };
4476                                    let right_final = if right_should_reverse != should_reverse {
4477                                        reverse_complement_sequence(&right_data)
4478                                    } else {
4479                                        right_data
4480                                    };
4481
4482                                    // FIX: When should_reverse=true, left_data is the RIGHT part of
4483                                    // the original and right_data is the LEFT part. C++ AGC swaps
4484                                    // left_size/right_size when use_rc=true (line 1419), so the
4485                                    // first segment stored is always the LEFT part of the original.
4486                                    // We need to swap seg_part_no assignments to match.
4487                                    let (left_seg_part, right_seg_part) = if should_reverse {
4488                                        (output_seg_part_no + 1, output_seg_part_no)
4489                                    } else {
4490                                        (output_seg_part_no, output_seg_part_no + 1)
4491                                    };
4492
4493                                    buffered_seg_part.add_known(
4494                                        left_gid,
4495                                        BufferedSegment {
4496                                            sample_name: raw_seg.sample_name.clone(),
4497                                            contig_name: raw_seg.contig_name.clone(),
4498                                            seg_part_no: left_seg_part,
4499                                            data: left_final,
4500                                            is_rev_comp: left_should_reverse,
4501                                            sample_priority: raw_seg.sample_priority,
4502                                        },
4503                                    );
4504
4505                                    buffered_seg_part.add_known(
4506                                        right_gid,
4507                                        BufferedSegment {
4508                                            sample_name: raw_seg.sample_name.clone(),
4509                                            contig_name: raw_seg.contig_name.clone(),
4510                                            seg_part_no: right_seg_part,
4511                                            data: right_final,
4512                                            is_rev_comp: right_should_reverse,
4513                                            sample_priority: raw_seg.sample_priority,
4514                                        },
4515                                    );
4516
4517                                    seg_part_no += 2;
4518                                    was_split = true;
4519                                    if config.verbosity > 0 {
4520                                        eprintln!("BARRIER_SPLIT_SUCCESS: sample={} contig={} place={} split_pos={} left_len={} right_len={}",
4521                                        raw_seg.sample_name, raw_seg.contig_name, raw_seg.original_place, split_pos, left_len, right_len);
4522                                    }
4523                                }
4524                                SplitDecision::AssignToLeft => {
4525                                    // Assign entire segment to left group (front -> middle)
4526                                    // C++ AGC lines 1408-1414: right_size == 0 case
4527                                    let assign_rc = if should_reverse {
4528                                        middle_kmer >= raw_seg.back_kmer
4529                                    } else {
4530                                        raw_seg.front_kmer >= middle_kmer
4531                                    };
4532                                    let assign_data = if assign_rc != should_reverse {
4533                                        reverse_complement_sequence(&segment_data)
4534                                    } else {
4535                                        segment_data.clone()
4536                                    };
4537
4538                                    buffered_seg_part.add_known(
4539                                        left_gid,
4540                                        BufferedSegment {
4541                                            sample_name: raw_seg.sample_name.clone(),
4542                                            contig_name: raw_seg.contig_name.clone(),
4543                                            seg_part_no: output_seg_part_no,
4544                                            data: assign_data,
4545                                            is_rev_comp: assign_rc,
4546                                            sample_priority: raw_seg.sample_priority,
4547                                        },
4548                                    );
4549                                    seg_part_no += 1;
4550                                    was_split = true;
4551                                    if config.verbosity > 0 {
4552                                        eprintln!("BARRIER_ASSIGN_LEFT: sample={} contig={} place={} group={}",
4553                                        raw_seg.sample_name, raw_seg.contig_name, raw_seg.original_place, left_gid);
4554                                    }
4555                                }
4556                                SplitDecision::AssignToRight => {
4557                                    // Assign entire segment to right group (middle -> back)
4558                                    // C++ AGC lines 1400-1406: left_size == 0 case
4559                                    let assign_rc = if should_reverse {
4560                                        raw_seg.front_kmer >= middle_kmer
4561                                    } else {
4562                                        middle_kmer >= raw_seg.back_kmer
4563                                    };
4564                                    let assign_data = if assign_rc != should_reverse {
4565                                        reverse_complement_sequence(&segment_data)
4566                                    } else {
4567                                        segment_data.clone()
4568                                    };
4569
4570                                    buffered_seg_part.add_known(
4571                                        right_gid,
4572                                        BufferedSegment {
4573                                            sample_name: raw_seg.sample_name.clone(),
4574                                            contig_name: raw_seg.contig_name.clone(),
4575                                            seg_part_no: output_seg_part_no,
4576                                            data: assign_data,
4577                                            is_rev_comp: assign_rc,
4578                                            sample_priority: raw_seg.sample_priority,
4579                                        },
4580                                    );
4581                                    seg_part_no += 1;
4582                                    was_split = true;
4583                                    if config.verbosity > 0 {
4584                                        eprintln!("BARRIER_ASSIGN_RIGHT: sample={} contig={} place={} group={}",
4585                                        raw_seg.sample_name, raw_seg.contig_name, raw_seg.original_place, right_gid);
4586                                    }
4587                                }
4588                                SplitDecision::NoDecision => {
4589                                    if config.verbosity > 0 {
4590                                        eprintln!("BARRIER_SPLIT_SKIPPED: sample={} contig={} place={} left_ref={} right_ref={}",
4591                                        raw_seg.sample_name, raw_seg.contig_name, raw_seg.original_place, left_ref_data.len(), right_ref_data.len());
4592                                    }
4593                                }
4594                            }
4595                        }
4596                    }
4597                }
4598
4599                if !was_split {
4600                    // Register group IMMEDIATELY so later segments can split into it.
4601                    // With SEQUENTIAL processing (not parallel), this is now DETERMINISTIC.
4602                    // C++ AGC store_segments() updates map_segments at barrier, and segments
4603                    // within the same barrier batch CAN reference groups from earlier segments.
4604                    let new_group_id = {
4605                        let mut seg_map = map_segments.write().unwrap();
4606                        if let Some(&existing_gid) = seg_map.get(&key) {
4607                            existing_gid
4608                        } else {
4609                            // Allocate new group ID (matches C++ AGC no_segments++)
4610                            let gid =
4611                                group_counter.fetch_add(1, std::sync::atomic::Ordering::SeqCst);
4612                            seg_map.insert(key.clone(), gid);
4613                            gid
4614                        }
4615                    };
4616
4617                    // Ensure buffered_seg_part has capacity for this group ID
4618                    buffered_seg_part.ensure_capacity(new_group_id);
4619
4620                    // Store reference data IMMEDIATELY for new groups
4621                    // C++ AGC: first segment becomes reference for LZ encoding
4622                    {
4623                        let mut ref_segs = reference_segments.write().unwrap();
4624                        ref_segs
4625                            .entry(new_group_id)
4626                            .or_insert_with(|| segment_data.clone());
4627                    }
4628
4629                    // Update terminators IMMEDIATELY (C++ AGC lines 1015-1025)
4630                    // This allows find_middle_splitter to find shared k-mers for splitting
4631                    if key.kmer_front != MISSING_KMER && key.kmer_back != MISSING_KMER {
4632                        let mut term_map = map_segments_terminators.write().unwrap();
4633                        let front_vec = term_map.entry(key.kmer_front).or_insert_with(Vec::new);
4634                        if !front_vec.contains(&key.kmer_back) {
4635                            front_vec.push(key.kmer_back);
4636                            front_vec.sort();
4637                        }
4638                        if key.kmer_front != key.kmer_back {
4639                            let back_vec = term_map.entry(key.kmer_back).or_insert_with(Vec::new);
4640                            if !back_vec.contains(&key.kmer_front) {
4641                                back_vec.push(key.kmer_front);
4642                                back_vec.sort();
4643                            }
4644                        }
4645                    }
4646
4647                    // Add segment to buffer
4648                    buffered_seg_part.add_new(NewSegment {
4649                        kmer_front: key.kmer_front,
4650                        kmer_back: key.kmer_back,
4651                        sample_priority: raw_seg.sample_priority,
4652                        sample_name: raw_seg.sample_name,
4653                        contig_name: raw_seg.contig_name,
4654                        seg_part_no: output_seg_part_no,
4655                        data: segment_data,
4656                        should_reverse,
4657                    });
4658                    seg_part_no += 1;
4659                }
4660            }
4661        } // end for raw_seg
4662    } // end for contig (sequential)
4663
4664    // DETERMINISM FIX: Process all new segments sequentially after parallel classification
4665    // This ensures group IDs are assigned in deterministic BTreeSet order (by k-mer pair)
4666    // regardless of how many threads were used during classification.
4667    {
4668        let mut map_seg = map_segments.write().unwrap();
4669        let mut ref_seg = reference_segments.write().unwrap();
4670        let mut term_map = map_segments_terminators.write().unwrap();
4671        let mut next_gid = group_counter.load(std::sync::atomic::Ordering::SeqCst);
4672
4673        let new_groups =
4674            buffered_seg_part.process_new(&mut map_seg, &mut next_gid, &mut ref_seg, &mut term_map);
4675
4676        // Update the atomic counter with the final value
4677        group_counter.store(next_gid, std::sync::atomic::Ordering::SeqCst);
4678
4679        if config.verbosity > 0 && new_groups > 0 {
4680            eprintln!(
4681                "CLASSIFY_RAW_BARRIER: Registered {} new groups deterministically",
4682                new_groups
4683            );
4684        }
4685    }
4686
4687    if config.verbosity > 0 {
4688        eprintln!("CLASSIFY_RAW_BARRIER: Classification complete");
4689    }
4690}
4691
4692/// Flush batch-local groups to global state (matches C++ AGC batch boundary)
4693/// This updates the global map_segments registry with batch-local groups,
4694/// then clears the batch-local state (like C++ AGC destroying m_kmers at batch end)
4695fn flush_batch(
4696    segment_groups: &Arc<Mutex<BTreeMap<SegmentGroupKey, SegmentGroupBuffer>>>,
4697    pending_batch_segments: &Arc<Mutex<Vec<PendingSegment>>>,
4698    batch_local_groups: &Arc<Mutex<BTreeMap<SegmentGroupKey, u32>>>,
4699    batch_local_terminators: &Arc<Mutex<BTreeMap<u64, Vec<u64>>>>,
4700    map_segments: &Arc<RwLock<BTreeMap<SegmentGroupKey, u32>>>,
4701    group_counter: &Arc<AtomicU32>,
4702    raw_group_counter: &Arc<AtomicU32>, // FIX 17: Round-robin counter for raw groups (0-15)
4703    map_segments_terminators: &Arc<RwLock<BTreeMap<u64, Vec<u64>>>>,
4704    archive: &Arc<Mutex<Archive>>,
4705    collection: &Arc<Mutex<CollectionV3>>,
4706    reference_segments: &Arc<RwLock<BTreeMap<u32, Vec<u8>>>>,
4707    reference_orientations: &Arc<RwLock<BTreeMap<u32, bool>>>,
4708    #[cfg(feature = "cpp_agc")] grouping_engine: &Arc<Mutex<crate::ragc_ffi::GroupingEngine>>,
4709    config: &StreamingQueueConfig,
4710) -> Result<()> {
4711    use crate::segment::MISSING_KMER;
4712
4713    // Get pending segments and check if anything needs flushing
4714    let mut pending = pending_batch_segments.lock().unwrap();
4715    let batch_map_len = batch_local_groups.lock().unwrap().len();
4716    let batch_terms_len = batch_local_terminators.lock().unwrap().len();
4717
4718    if batch_map_len == 0 && batch_terms_len == 0 && pending.is_empty() {
4719        #[cfg(feature = "verbose_debug")]
4720        if config.verbosity > 0 {
4721            eprintln!("FLUSH_BATCH: No pending groups to flush");
4722        }
4723        return Ok(());
4724    }
4725
4726    #[cfg(feature = "verbose_debug")]
4727    if config.verbosity > 0 {
4728        eprintln!("FLUSH_BATCH: Processing {} batch-local groups, {} pending segments, {} terminator keys",
4729            batch_map_len, pending.len(), batch_terms_len);
4730    }
4731
4732    // CRITICAL: Sort pending segments by (sample, contig, place) before assigning group_ids
4733    // This matches C++ AGC's BTreeSet iteration order (agc_compressor.cpp process_new())
4734    pending.sort();
4735
4736    if config.verbosity > 1 && !pending.is_empty() {
4737        eprintln!(
4738            "FLUSH_BATCH: Sorted {} pending segments for group_id assignment",
4739            pending.len()
4740        );
4741    }
4742
4743    // Process sorted pending segments - assign group_ids and write to archive
4744    let mut groups_map = segment_groups.lock().unwrap();
4745
4746    for pend in pending.iter() {
4747        // Assign group_id: Orphan segments (both k-mers MISSING) distributed across raw groups 0-15
4748        // For segments with k-mers: lookup existing group, or create new group if not found
4749        // FIX 17: Distribute orphan segments across groups 0-15 (round-robin) to match C++ AGC's
4750        // distribute_segments(0, 0, no_raw_groups) behavior (agc_compressor.cpp line 986)
4751        //
4752        // FIX 18: Create unique buffer keys for each raw group (0-15)
4753        // Previously all orphans used key (MISSING, MISSING), causing them to share one buffer.
4754        // Now orphans use key (raw_group_id, MISSING) so each raw group has its own buffer.
4755        let (group_id, buffer_key) = if pend.key.kmer_back == MISSING_KMER
4756            && pend.key.kmer_front == MISSING_KMER
4757        {
4758            // Round-robin distribution across raw groups 0-15
4759            let raw_group_id = raw_group_counter.fetch_add(1, Ordering::SeqCst) % NO_RAW_GROUPS;
4760            // Create unique buffer key for this raw group
4761            let unique_key = SegmentGroupKey {
4762                kmer_front: raw_group_id as u64, // Use raw_group_id to distinguish buffers
4763                kmer_back: MISSING_KMER,
4764            };
4765            // DEBUG: Trace orphan segment distribution
4766            if crate::env_cache::trace_group() {
4767                eprintln!("ORPHAN_SEGMENT: sample={} contig={} place={} raw_group_id={} buffer_key=({}, MISSING)",
4768                    pend.sample_name, pend.contig_name, pend.place, raw_group_id, raw_group_id);
4769            }
4770            (raw_group_id, unique_key)
4771        } else {
4772            // Check if this k-mer pair already has a group assigned
4773            let mut global_map = map_segments.write().unwrap();
4774            let gid = if let Some(&existing_group_id) = global_map.get(&pend.key) {
4775                // Use existing group
4776                if crate::env_cache::trace_group() {
4777                    eprintln!("GROUPING_LOOKUP_HIT: sample={} contig={} place={} front={} back={} found_group={}",
4778                        pend.sample_name, pend.contig_name, pend.place,
4779                        pend.key.kmer_front, pend.key.kmer_back, existing_group_id);
4780                }
4781                drop(global_map);
4782                existing_group_id
4783            } else {
4784                // Create new group
4785                let new_group_id = group_counter.fetch_add(1, Ordering::SeqCst);
4786                if crate::env_cache::trace_group() {
4787                    eprintln!("GROUPING_LOOKUP_MISS: sample={} contig={} place={} front={} back={} creating_group={} (map has {} entries)",
4788                        pend.sample_name, pend.contig_name, pend.place,
4789                        pend.key.kmer_front, pend.key.kmer_back, new_group_id, global_map.len());
4790                }
4791                global_map.insert(pend.key.clone(), new_group_id);
4792                drop(global_map);
4793                new_group_id
4794            };
4795            // For non-orphan segments, use pend.key as the buffer key
4796            (gid, pend.key.clone())
4797        };
4798
4799        if config.verbosity > 2 {
4800            eprintln!(
4801                "FLUSH_BATCH_ASSIGN: group_id={} front={} back={} sample={} contig={} place={}",
4802                group_id,
4803                pend.key.kmer_front,
4804                pend.key.kmer_back,
4805                pend.sample_name,
4806                pend.contig_name,
4807                pend.place
4808            );
4809        }
4810
4811        // Register orphan segments to global map (non-orphans already registered above)
4812        if pend.key.kmer_back == MISSING_KMER && pend.key.kmer_front == MISSING_KMER {
4813            let mut global_map = map_segments.write().unwrap();
4814            global_map.insert(pend.key.clone(), group_id);
4815        }
4816
4817        // TRACE: Log when segments from AAA#0 are registered
4818        if crate::env_cache::trace_group() && pend.sample_name.contains("AAA#0") {
4819            let global_map = map_segments.read().unwrap();
4820            eprintln!("TRACE_REGISTER: sample={} contig={} place={} front={} back={} group_id={} (map_segments now has {} entries)",
4821                pend.sample_name, pend.contig_name, pend.place,
4822                pend.key.kmer_front, pend.key.kmer_back, group_id, global_map.len());
4823        }
4824
4825        // Register to batch-local map
4826        {
4827            let mut batch_map = batch_local_groups.lock().unwrap();
4828            batch_map.insert(pend.key.clone(), group_id);
4829        }
4830
4831        // Register with FFI engine
4832        #[cfg(feature = "cpp_agc")]
4833        if pend.key.kmer_front != MISSING_KMER && pend.key.kmer_back != MISSING_KMER {
4834            let mut eng = grouping_engine.lock().unwrap();
4835            eng.register_group(pend.key.kmer_front, pend.key.kmer_back, group_id);
4836        }
4837
4838        // Update batch-local terminators (will be merged to global below)
4839        // Only for LZ groups (both k-mers non-MISSING)
4840        if pend.key.kmer_front != MISSING_KMER && pend.key.kmer_back != MISSING_KMER {
4841            let mut term_map = batch_local_terminators.lock().unwrap();
4842
4843            term_map
4844                .entry(pend.key.kmer_front)
4845                .or_insert_with(Vec::new)
4846                .push(pend.key.kmer_back);
4847
4848            if pend.key.kmer_front != pend.key.kmer_back {
4849                term_map
4850                    .entry(pend.key.kmer_back)
4851                    .or_insert_with(Vec::new)
4852                    .push(pend.key.kmer_front);
4853            }
4854        }
4855
4856        // Get or create SegmentGroupBuffer for this group
4857        // FIX 18: Use buffer_key (unique per raw group for orphans) instead of pend.key
4858        let buffer = groups_map.entry(buffer_key.clone()).or_insert_with(|| {
4859            // Register streams
4860            let archive_version = ragc_common::AGC_FILE_MAJOR * 1000 + ragc_common::AGC_FILE_MINOR;
4861            let delta_stream_name = ragc_common::stream_delta_name(archive_version, group_id);
4862            let ref_stream_name = ragc_common::stream_ref_name(archive_version, group_id);
4863
4864            let mut arch = archive.lock().unwrap();
4865            let stream_id = arch.register_stream(&delta_stream_name);
4866            let ref_stream_id = arch.register_stream(&ref_stream_name);
4867            drop(arch);
4868
4869            SegmentGroupBuffer::new(group_id, stream_id, ref_stream_id)
4870        });
4871
4872        // Add to buffer or write as reference
4873        let is_raw_group = group_id < NO_RAW_GROUPS; // Groups 0-15 are raw groups (match C++ AGC)
4874        if !is_raw_group && buffer.reference_segment.is_none() && buffer.segments.is_empty() {
4875            // First segment in LZ group - write as reference immediately
4876            // Create BufferedSegment with original orientation (reference sets the group orientation)
4877            let buffered = BufferedSegment {
4878                sample_name: pend.sample_name.clone(),
4879                contig_name: pend.contig_name.clone(),
4880                seg_part_no: pend.place,
4881                data: pend.segment_data.clone(),
4882                is_rev_comp: pend.should_reverse,
4883                sample_priority: pend.sample_priority,
4884            };
4885            if let Err(e) = write_reference_immediately(
4886                &buffered,
4887                buffer,
4888                collection,
4889                archive,
4890                reference_segments,
4891                reference_orientations,
4892                config,
4893            ) {
4894                eprintln!("ERROR in flush_batch: Failed to write reference: {}", e);
4895                buffer.segments.push(buffered);
4896            }
4897        } else {
4898            // Delta segment (joining existing group)
4899            // FIX 18: Do NOT adjust orientation to match reference - C++ AGC stores each segment
4900            // with its own computed is_rev_comp based on k-mer comparison (front < back -> false,
4901            // front >= back -> true). Segments in the same group can have different is_rev_comp.
4902            let buffered = BufferedSegment {
4903                sample_name: pend.sample_name.clone(),
4904                contig_name: pend.contig_name.clone(),
4905                seg_part_no: pend.place,
4906                data: pend.segment_data.clone(),
4907                is_rev_comp: pend.should_reverse,
4908                sample_priority: pend.sample_priority,
4909            };
4910            buffer.segments.push(buffered);
4911        }
4912
4913        // FIX 4: Removed mid-batch pack flush to match C++ AGC's batch-level sorting
4914        // C++ AGC calls sort_known() on ALL segments in batch BEFORE writing ANY
4915        // Flushing mid-batch would write segments in pack-level sorted order, not batch-level
4916        // All groups will be flushed at end of batch (after loop) instead
4917    }
4918
4919    // Clear pending segments
4920    pending.clear();
4921    drop(pending);
4922
4923    // FIX 4: Flush all group buffers at end of batch (match C++ AGC's sort_known + store_segments)
4924    // This ensures segments within each group are sorted globally across the entire batch,
4925    // not just within individual packs. Matches C++ AGC architecture:
4926    // - C++ AGC: register_segments() calls sort_known() on ALL segments, then store_segments() writes ALL
4927    // - RAGC: Accumulate all segments for batch, then flush_pack() sorts + writes at end
4928    for (_key, buffer) in groups_map.iter_mut() {
4929        if !buffer.segments.is_empty() || !buffer.ref_written {
4930            flush_pack(buffer, collection, archive, config, reference_segments)
4931                .context("Failed to flush pack at end of batch")?;
4932        }
4933    }
4934
4935    drop(groups_map);
4936
4937    // Update global registry with batch-local groups (from existing group processing)
4938    {
4939        let batch_map = batch_local_groups.lock().unwrap();
4940        let mut global_map = map_segments.write().unwrap();
4941        for (key, group_id) in batch_map.iter() {
4942            global_map.entry(key.clone()).or_insert(*group_id);
4943        }
4944    }
4945
4946    // CRITICAL: Merge batch-local terminators into global terminators
4947    // This is where C++ AGC makes terminators visible for find_middle in subsequent samples
4948    {
4949        let batch_terms = batch_local_terminators.lock().unwrap();
4950        let mut global_terms = map_segments_terminators.write().unwrap();
4951        for (kmer, connections) in batch_terms.iter() {
4952            let entry = global_terms.entry(*kmer).or_insert_with(Vec::new);
4953            entry.extend(connections.iter().cloned());
4954            entry.sort_unstable();
4955            entry.dedup();
4956        }
4957    }
4958
4959    // Clear batch-local state (like C++ AGC destroying m_kmers)
4960    batch_local_groups.lock().unwrap().clear();
4961    batch_local_terminators.lock().unwrap().clear();
4962
4963    #[cfg(feature = "verbose_debug")]
4964    if config.verbosity > 0 {
4965        eprintln!("FLUSH_BATCH: Batch flush complete, batch-local state cleared");
4966    }
4967
4968    Ok(())
4969}
4970
4971/// Helper function to fix orientation of segment data to match reference orientation.
4972/// Returns (fixed_data, fixed_is_rev_comp) tuple.
4973/// Used for both normal segments and split segments to ensure consistent orientation within groups.
4974fn fix_orientation_for_group(
4975    data: &[u8],
4976    should_reverse: bool,
4977    _key: &SegmentGroupKey,
4978    _map_segments: &Arc<RwLock<BTreeMap<SegmentGroupKey, u32>>>,
4979    _batch_local_groups: &Arc<Mutex<BTreeMap<SegmentGroupKey, u32>>>,
4980    _reference_orientations: &Arc<RwLock<BTreeMap<u32, bool>>>,
4981) -> (Vec<u8>, bool) {
4982    // FIX 18: Do NOT adjust orientation to match reference - C++ AGC stores each segment
4983    // with its own computed is_rev_comp based on k-mer comparison. Segments in the same
4984    // group can have different is_rev_comp values.
4985    (data.to_vec(), should_reverse)
4986}
4987
4988/// Worker thread that pulls from queue and compresses
4989fn worker_thread(
4990    worker_id: usize,
4991    queue: Arc<MemoryBoundedQueue<ContigTask>>,
4992    collection: Arc<Mutex<CollectionV3>>,
4993    splitters: Arc<AHashSet<u64>>,
4994    ref_singletons: Arc<Vec<u64>>, // For dynamic splitter discovery (sorted)
4995    ref_duplicates: Arc<AHashSet<u64>>, // For dynamic splitter discovery
4996    archive: Arc<Mutex<Archive>>,
4997    segment_groups: Arc<Mutex<BTreeMap<SegmentGroupKey, SegmentGroupBuffer>>>,
4998    group_counter: Arc<AtomicU32>,
4999    raw_group_counter: Arc<AtomicU32>,
5000    reference_sample_name: Arc<Mutex<Option<String>>>,
5001    map_segments: Arc<RwLock<BTreeMap<SegmentGroupKey, u32>>>,
5002    map_segments_terminators: Arc<RwLock<BTreeMap<u64, Vec<u64>>>>,
5003    reference_segments: Arc<RwLock<BTreeMap<u32, Vec<u8>>>>,
5004    reference_orientations: Arc<RwLock<BTreeMap<u32, bool>>>,
5005    split_offsets: Arc<Mutex<BTreeMap<(String, String, usize), usize>>>,
5006    #[cfg(feature = "cpp_agc")] grouping_engine: Arc<Mutex<crate::ragc_ffi::GroupingEngine>>,
5007    batch_samples: Arc<Mutex<HashSet<String>>>,
5008    batch_local_groups: Arc<Mutex<BTreeMap<SegmentGroupKey, u32>>>,
5009    batch_local_terminators: Arc<Mutex<BTreeMap<u64, Vec<u64>>>>,
5010    pending_batch_segments: Arc<Mutex<Vec<PendingSegment>>>,
5011    buffered_seg_part: Arc<BufferedSegPart>, // Per-group buffers for parallel Phase 1
5012    map_fallback_minimizers: Arc<Mutex<BTreeMap<u64, Vec<(u64, u64)>>>>,
5013    raw_segment_buffers: Arc<Vec<Mutex<Vec<RawBufferedSegment>>>>, // Per-worker buffers for deferred classification
5014    barrier: Arc<std::sync::Barrier>, // Synchronization barrier for batch boundaries
5015    parallel_state: Arc<ParallelFlushState>, // Shared state for parallel Phase 3
5016    write_buffer: Arc<ParallelWriteBuffer>, // Per-stream buffers for parallel writes
5017    config: StreamingQueueConfig,
5018) -> Result<()> {
5019    let mut processed_count = 0;
5020
5021    // Create fallback filter from config
5022    let fallback_filter = FallbackFilter::new(config.fallback_frac);
5023
5024    // Timing accumulators for performance analysis
5025    let mut total_queue_wait = std::time::Duration::ZERO;
5026    let mut total_segment_processing = std::time::Duration::ZERO;
5027    let mut total_barrier_wait = std::time::Duration::ZERO;
5028    let mut total_sync_processing = std::time::Duration::ZERO;
5029    let mut contig_count = 0usize;
5030    let mut sync_count = 0usize;
5031
5032    loop {
5033        // Pull from queue (blocks if empty, returns None when closed)
5034        let queue_start = std::time::Instant::now();
5035        let Some(task) = queue.pull() else {
5036            // Print timing summary on exit
5037            if config.verbosity > 0 {
5038                eprintln!("Worker {} TIMING: queue_wait={:?} segment_proc={:?} barrier_wait={:?} sync_proc={:?} contigs={} syncs={}",
5039                    worker_id, total_queue_wait, total_segment_processing, total_barrier_wait, total_sync_processing, contig_count, sync_count);
5040            }
5041            // Queue is closed and empty - flush any pending batch before exiting
5042            if config.verbosity > 0 {
5043                eprintln!("Worker {} flushing final batch before exit", worker_id);
5044            }
5045            flush_batch(
5046                &segment_groups,
5047                &pending_batch_segments,
5048                &batch_local_groups,
5049                &batch_local_terminators,
5050                &map_segments,
5051                &group_counter,
5052                &raw_group_counter, // FIX 17: Pass raw_group_counter for round-robin distribution
5053                &map_segments_terminators,
5054                &archive,
5055                &collection,
5056                &reference_segments,
5057                &reference_orientations,
5058                #[cfg(feature = "cpp_agc")]
5059                &grouping_engine,
5060                &config,
5061            )
5062            .ok(); // Ignore errors on final flush
5063
5064            if config.verbosity > 1 {
5065                eprintln!(
5066                    "Worker {} finished ({} contigs processed)",
5067                    worker_id, processed_count
5068                );
5069            }
5070            break;
5071        };
5072
5073        let queue_wait = queue_start.elapsed();
5074        total_queue_wait += queue_wait;
5075
5076        // Handle sync tokens with barrier synchronization (matches C++ AGC registration stage)
5077        if task.is_sync_token {
5078            let sync_start = std::time::Instant::now();
5079            sync_count += 1;
5080            if config.verbosity > 0 {
5081                eprintln!(
5082                    "Worker {} hit sync token for sample {}",
5083                    worker_id, task.sample_name
5084                );
5085            }
5086
5087            // =================================================================
5088            // C++ AGC 4-Phase Parallel Pattern
5089            // =================================================================
5090
5091            // Barrier 1: All workers arrive at sample boundary
5092            let barrier_start = std::time::Instant::now();
5093            barrier.wait();
5094            total_barrier_wait += barrier_start.elapsed();
5095
5096            // Phase 2 (Thread 0 only): Classify raw segments and prepare batch
5097            if worker_id == 0 {
5098                if config.verbosity > 0 {
5099                    eprintln!(
5100                        "Worker 0 preparing batch at sample boundary for {}",
5101                        task.sample_name
5102                    );
5103                }
5104
5105                let phase2_start = std::time::Instant::now();
5106
5107                // Step 1: Classify all raw segments (deferred from parallel segment loop)
5108                // This eliminates lock contention by doing classification single-threaded
5109                classify_raw_segments_at_barrier(
5110                    &raw_segment_buffers,
5111                    &buffered_seg_part,
5112                    &map_segments,
5113                    &map_segments_terminators,
5114                    &segment_groups,
5115                    &reference_segments,
5116                    &fallback_filter,
5117                    &map_fallback_minimizers,
5118                    &group_counter,
5119                    &raw_group_counter, // FIX 18: Pass raw_group_counter for orphan distribution
5120                    &config,
5121                );
5122
5123                let classify_time = phase2_start.elapsed();
5124                if config.verbosity > 0 {
5125                    eprintln!("TIMING: Classification took {:?}", classify_time);
5126                }
5127
5128                // Step 2: Prepare batch for parallel compression
5129                let prepare_start = std::time::Instant::now();
5130                prepare_batch_parallel(
5131                    &segment_groups,
5132                    &buffered_seg_part,
5133                    &batch_local_groups,
5134                    &batch_local_terminators,
5135                    &map_segments,
5136                    &map_segments_terminators,
5137                    &group_counter,
5138                    &raw_group_counter,
5139                    &archive,
5140                    &collection,
5141                    &reference_segments,
5142                    &reference_orientations,
5143                    #[cfg(feature = "cpp_agc")]
5144                    &grouping_engine,
5145                    &parallel_state,
5146                    &config,
5147                )?;
5148                let prepare_time = prepare_start.elapsed();
5149                if config.verbosity > 0 {
5150                    eprintln!("TIMING: Prepare took {:?}", prepare_time);
5151                }
5152            }
5153
5154            // Barrier 2: All workers see prepared buffers
5155            let barrier_start = std::time::Instant::now();
5156            barrier.wait();
5157            total_barrier_wait += barrier_start.elapsed();
5158
5159            let compress_start = std::time::Instant::now();
5160            // Phase 3a (ALL workers): Atomic work-stealing to COMPRESS and BUFFER writes
5161            // Workers compress segments and buffer archive writes (C++ AGC: AddPartBuffered)
5162            // Buffering is fast (memory only), flush happens after barrier
5163            loop {
5164                let Some(idx) = parallel_state.claim_next_idx() else {
5165                    break;
5166                };
5167
5168                if let Some((key, mut buffer)) = parallel_state.get_buffer_at(idx) {
5169                    // Compress this buffer
5170                    if !buffer.segments.is_empty() || !buffer.ref_written {
5171                        match flush_pack_compress_only(&mut buffer, &config) {
5172                            Ok(mut result) => {
5173                                // Buffer archive writes using per-stream mutexes (NO global lock!)
5174                                // Workers on different streams can buffer concurrently
5175                                for part in result.archive_writes.drain(..) {
5176                                    write_buffer.buffer_write(
5177                                        part.stream_id,
5178                                        part.data,
5179                                        part.metadata,
5180                                    );
5181                                }
5182                                // Store result (now without archive_writes)
5183                                parallel_state.store_result(idx, result);
5184                            }
5185                            Err(e) => {
5186                                eprintln!(
5187                                    "Worker {} error compressing group {}: {}",
5188                                    worker_id, buffer.group_id, e
5189                                );
5190                            }
5191                        }
5192                    }
5193                    // Return buffer
5194                    parallel_state.return_buffer(idx, key, buffer);
5195                }
5196            }
5197
5198            // Barrier 3: All workers done with compression and buffering
5199            let barrier_start = std::time::Instant::now();
5200            barrier.wait();
5201            total_barrier_wait += barrier_start.elapsed();
5202
5203            if worker_id == 0 && config.verbosity > 0 {
5204                eprintln!(
5205                    "TIMING: Compression took {:?} (all workers)",
5206                    compress_start.elapsed()
5207                );
5208            }
5209
5210            // Phase 3b + Phase 4 (Thread 0 only): Flush writes, registrations, and cleanup
5211            // Combined to reduce barrier overhead (was 2 separate barriers)
5212            if worker_id == 0 {
5213                // Phase 3b: Flush buffered writes and process registrations
5214                let sorted_results = parallel_state.drain_results_sorted();
5215
5216                // Take all locks once at the start
5217                let mut arch = archive.lock().unwrap();
5218                let mut ref_segs = reference_segments.write().unwrap();
5219                let mut coll = collection.lock().unwrap();
5220
5221                // Flush all buffered writes from per-stream buffer to archive
5222                // BTreeMap ensures sorted stream_id order for determinism
5223                if let Err(e) = write_buffer.flush_to_archive(&mut *arch) {
5224                    eprintln!("Thread 0 error flushing archive buffers: {}", e);
5225                }
5226                // Clear write buffer for next batch
5227                write_buffer.clear();
5228
5229                // Process ref_to_store and registrations in sorted group_id order
5230                for result in sorted_results {
5231                    // Store reference in global map
5232                    if let Some((group_id, ref_data)) = result.ref_to_store {
5233                        ref_segs.insert(group_id, ref_data);
5234                    }
5235
5236                    // Register segments in collection
5237                    for reg in result.registrations {
5238                        if let Err(e) = coll.add_segment_placed(
5239                            &reg.sample_name,
5240                            &reg.contig_name,
5241                            reg.seg_part_no,
5242                            reg.group_id,
5243                            reg.in_group_id,
5244                            reg.is_rev_comp,
5245                            reg.raw_length,
5246                        ) {
5247                            eprintln!("Thread 0 error registering segment: {}", e);
5248                        }
5249                    }
5250                }
5251                // Locks released here when guards go out of scope
5252                drop(arch);
5253                drop(ref_segs);
5254                drop(coll);
5255
5256                // Phase 4: Cleanup
5257                cleanup_batch_parallel(
5258                    &segment_groups,
5259                    &batch_local_groups,
5260                    &batch_local_terminators,
5261                    &map_segments,
5262                    &map_segments_terminators,
5263                    &parallel_state,
5264                    &config,
5265                );
5266
5267                // Clear batch-local state after flush (start fresh for new sample)
5268                let mut samples = batch_samples.lock().unwrap();
5269                samples.clear();
5270            }
5271
5272            // Barrier 4: All workers ready for next batch (reduced from 2 barriers)
5273            let barrier_start = std::time::Instant::now();
5274            barrier.wait();
5275            total_barrier_wait += barrier_start.elapsed();
5276
5277            // Track total sync token processing time
5278            total_sync_processing += sync_start.elapsed();
5279
5280            // Sync token processed - continue to next task
5281            continue;
5282        }
5283
5284        // Start timing for segment processing
5285        let segment_start = std::time::Instant::now();
5286        contig_count += 1;
5287
5288        // NOTE: Removed per-contig lock on batch_samples - not needed for deferred classification
5289        // The batch tracking is handled at the barrier level, not per-contig
5290
5291        // Split into segments
5292        // Dynamic splitter discovery for non-reference contigs (matches C++ AGC find_new_splitters)
5293        // OPTIMIZATION: Cache reference sample name check at thread level to avoid lock per contig
5294        let is_reference_sample = task.sample_priority >= 1_000_000; // Reference sample has boosted priority
5295
5296        let segments = if !is_reference_sample && !ref_singletons.is_empty() {
5297            // Non-reference contig with dynamic discovery enabled
5298            // Find NEW splitter k-mers unique to this contig (not in reference)
5299            // Position-based selection ensures only optimally-positioned k-mers become splitters
5300            let new_splitters = find_new_splitters_for_contig(
5301                &task.data,
5302                config.k,
5303                config.segment_size,
5304                &ref_singletons,
5305                &ref_duplicates,
5306            );
5307
5308            // Combine base splitters with new splitters
5309            let mut combined_splitters = (*splitters).clone();
5310            combined_splitters.extend(new_splitters.iter());
5311
5312            if config.verbosity > 2 && !new_splitters.is_empty() {
5313                eprintln!(
5314                    "DYNAMIC_SPLITTER: {} found {} new splitters for {} (total: {})",
5315                    task.sample_name,
5316                    new_splitters.len(),
5317                    task.contig_name,
5318                    combined_splitters.len()
5319                );
5320            }
5321
5322            split_at_splitters_with_size(
5323                &task.data,
5324                &combined_splitters,
5325                config.k,
5326                config.segment_size,
5327            )
5328        } else {
5329            // Reference contig or dynamic discovery disabled - use base splitters only
5330            split_at_splitters_with_size(&task.data, &splitters, config.k, config.segment_size)
5331        };
5332
5333        if config.verbosity > 2 {
5334            eprintln!(
5335                "Worker {} processing {} (split into {} segments)",
5336                worker_id,
5337                task.contig_name,
5338                segments.len()
5339            );
5340        }
5341
5342        // NOTE: split_offsets and local_splits are no longer needed in the parallel loop
5343        // since classification (including splits) is deferred to the barrier.
5344        // This eliminates a lock acquisition per contig that was causing contention.
5345
5346        // =================================================================
5347        // DEFERRED CLASSIFICATION: Buffer raw segments for parallel Phase 1
5348        // Classification is deferred to Thread 0 at the barrier to eliminate
5349        // lock contention from find_group_with_one_kmer and split logic.
5350        // =================================================================
5351        // OPTIMIZATION: Collect all segments for this contig locally, then push once
5352        // This reduces lock acquisitions from O(segments_per_contig) to O(1) per contig
5353        let contig_segments: Vec<RawBufferedSegment> = segments
5354            .iter()
5355            .enumerate()
5356            .map(|(original_place, segment)| {
5357                // Precompute reverse complement (NO LOCKS - can run in parallel)
5358                // Segment data uses numeric encoding: 0=A, 1=C, 2=G, 3=T
5359                let segment_data_rc: Vec<u8> = segment
5360                    .data
5361                    .iter()
5362                    .rev()
5363                    .map(|&base| {
5364                        match base {
5365                            0 => 3,    // A -> T
5366                            1 => 2,    // C -> G
5367                            2 => 1,    // G -> C
5368                            3 => 0,    // T -> A
5369                            _ => base, // N or other non-ACGT
5370                        }
5371                    })
5372                    .collect();
5373
5374                RawBufferedSegment {
5375                    data: segment.data.clone(),
5376                    data_rc: segment_data_rc,
5377                    front_kmer: segment.front_kmer,
5378                    back_kmer: segment.back_kmer,
5379                    front_kmer_is_dir: segment.front_kmer_is_dir,
5380                    back_kmer_is_dir: segment.back_kmer_is_dir,
5381                    sample_name: task.sample_name.clone(),
5382                    contig_name: task.contig_name.clone(),
5383                    original_place,
5384                    sample_priority: task.sample_priority,
5385                }
5386            })
5387            .collect();
5388
5389        // ONE lock acquisition for entire contig (reduces contention significantly)
5390        // Push to this worker's own buffer (NO CONTENTION - each worker has its own buffer)
5391        raw_segment_buffers[worker_id]
5392            .lock()
5393            .unwrap()
5394            .extend(contig_segments);
5395
5396        // End timing for segment processing
5397        total_segment_processing += segment_start.elapsed();
5398
5399        // OLD CODE BELOW - REPLACED BY DEFERRED CLASSIFICATION
5400        // This block is preserved but commented out for reference during the transition.
5401        // The classification logic has been moved to classify_raw_segments_at_barrier().
5402        #[cfg(feature = "old_immediate_classification")]
5403        for (original_place, segment) in std::iter::empty::<(usize, &crate::segment::Segment)>() {
5404            // Calculate adjusted place based on prior splits in this contig
5405            // (matches C++ AGC lines 2033-2036: increment seg_part_no twice when split occurs)
5406            // OPTIMIZATION: Count splits before current position from both prior and local sets
5407            let prior_count = prior_contig_splits.range(..original_place).count();
5408            let local_count = local_splits.range(..original_place).count();
5409            let place = original_place + prior_count + local_count;
5410
5411            // DEBUG: Output every segment for comparison with C++ AGC
5412            #[cfg(feature = "verbose_debug")]
5413            eprintln!(
5414                "RAGC_SEGMENT: sample={} contig={} part={} len={} front={} back={}",
5415                task.sample_name,
5416                task.contig_name,
5417                place,
5418                segment.data.len(),
5419                segment.front_kmer,
5420                segment.back_kmer
5421            );
5422
5423            // Match C++ AGC Case 2: Normalize segment group key by ensuring front <= back
5424            // (agc_compressor.cpp lines 1306-1327)
5425            use crate::segment::MISSING_KMER;
5426
5427            // Precompute reverse complement for all cases that might need it
5428            // Segment data uses numeric encoding: 0=A, 1=C, 2=G, 3=T
5429            let segment_data_rc: Vec<u8> = segment
5430                .data
5431                .iter()
5432                .rev()
5433                .map(|&base| {
5434                    match base {
5435                        0 => 3,    // A -> T
5436                        1 => 2,    // C -> G
5437                        2 => 1,    // G -> C
5438                        3 => 0,    // T -> A
5439                        _ => base, // N or other non-ACGT
5440                    }
5441                })
5442                .collect();
5443
5444            let (key_front, key_back, should_reverse) = if segment.front_kmer != MISSING_KMER
5445                && segment.back_kmer != MISSING_KMER
5446            {
5447                // Both k-mers present
5448                // C++ AGC uses `<` not `<=`, which means degenerate k-mers (front == back)
5449                // go to the else branch and get store_rc=true (lines 1306-1313)
5450                if segment.front_kmer < segment.back_kmer {
5451                    // Already normalized - keep original orientation
5452                    if config.verbosity > 2 {
5453                        #[cfg(feature = "verbose_debug")]
5454                        eprintln!(
5455                            "RAGC_CASE2_KEEP: sample={} front={} back={} len={}",
5456                            task.sample_name,
5457                            segment.front_kmer,
5458                            segment.back_kmer,
5459                            segment.data.len()
5460                        );
5461                    }
5462                    (segment.front_kmer, segment.back_kmer, false)
5463                } else {
5464                    // Swap k-mers and reverse complement data
5465                    if config.verbosity > 2 {
5466                        #[cfg(feature = "verbose_debug")]
5467                        eprintln!(
5468                            "RAGC_CASE2_SWAP: sample={} front={} back={} -> key=({},{}) len={}",
5469                            task.sample_name,
5470                            segment.front_kmer,
5471                            segment.back_kmer,
5472                            segment.back_kmer,
5473                            segment.front_kmer,
5474                            segment.data.len()
5475                        );
5476                    }
5477                    (segment.back_kmer, segment.front_kmer, true)
5478                }
5479            } else if segment.front_kmer != MISSING_KMER {
5480                // Case 3a: Only front k-mer present, back is MISSING (terminator)
5481                // Match C++ AGC lines 1315-1336: reverse complement and find candidate with one splitter
5482                // Use the actual is_dir_oriented value from segment detection
5483                #[cfg(feature = "verbose_debug")]
5484                eprintln!("RAGC_CASE3A_TERMINATOR: sample={} front={} front_is_dir={} back=MISSING -> finding best group",
5485                        task.sample_name, segment.front_kmer, segment.front_kmer_is_dir);
5486                // Debug: trace is_dir value before find_group call
5487                if crate::env_cache::debug_is_dir() {
5488                    eprintln!("RAGC_CASE3A_CALL: contig={} seg_part={} front_kmer={} front_kmer_is_dir={}",
5489                            task.contig_name, place, segment.front_kmer, segment.front_kmer_is_dir);
5490                }
5491                let (mut kf, mut kb, mut sr) = find_group_with_one_kmer(
5492                    segment.front_kmer,
5493                    segment.front_kmer_is_dir, // Use actual orientation from segment detection
5494                    &segment.data,
5495                    &segment_data_rc,
5496                    &map_segments_terminators,
5497                    &map_segments,
5498                    &segment_groups,
5499                    &reference_segments,
5500                    &config,
5501                );
5502
5503                // Fallback: If Case 3a returned MISSING, try fallback minimizers (C++ AGC lines 1322-1334)
5504                if (kf == MISSING_KMER || kb == MISSING_KMER) && fallback_filter.is_enabled() {
5505                    let (fb_kf, fb_kb, fb_sr) = find_cand_segment_using_fallback_minimizers(
5506                        &segment.data,
5507                        &segment_data_rc,
5508                        config.k,
5509                        5, // min_shared_kmers = 5 for Case 3 (matches C++ AGC)
5510                        &fallback_filter,
5511                        &map_fallback_minimizers,
5512                        &map_segments,
5513                        &segment_groups,
5514                        &reference_segments,
5515                        &config,
5516                    );
5517                    if fb_kf != MISSING_KMER && fb_kb != MISSING_KMER {
5518                        if config.verbosity > 1 {
5519                            #[cfg(feature = "verbose_debug")]
5520                            eprintln!(
5521                                "RAGC_CASE3A_FALLBACK: found ({},{}) rc={}",
5522                                fb_kf, fb_kb, fb_sr
5523                            );
5524                        }
5525                        kf = fb_kf;
5526                        kb = fb_kb;
5527                        sr = fb_sr;
5528                    }
5529                }
5530                (kf, kb, sr)
5531            } else if segment.back_kmer != MISSING_KMER {
5532                // Case 3b: Only back k-mer present, front is MISSING (terminator)
5533                // Match C++ AGC lines 1337-1360: swap_dir_rc() inverts is_dir_oriented()
5534                //
5535                // C++ AGC calls kmer.swap_dir_rc() which swaps kmer_dir and kmer_rc fields,
5536                // effectively inverting is_dir_oriented() (which checks kmer_dir <= kmer_rc).
5537                // So if back_kmer was originally dir-oriented, after swap it becomes NOT dir-oriented.
5538                let kmer_is_dir_after_swap = !segment.back_kmer_is_dir;
5539                #[cfg(feature = "verbose_debug")]
5540                eprintln!("RAGC_CASE3B_TERMINATOR: sample={} front=MISSING back={} back_is_dir={} -> kmer_is_dir_after_swap={}",
5541                        task.sample_name, segment.back_kmer, segment.back_kmer_is_dir, kmer_is_dir_after_swap);
5542
5543                // C++ AGC line 1344 passes (segment_rc, segment) to find_cand_segment_with_one_splitter
5544                // and then inverts the result: store_rc = !store_dir
5545                // So we swap the segment parameters here AND invert sr below
5546                let (mut kf, mut kb, mut sr) = find_group_with_one_kmer(
5547                    segment.back_kmer,      // Use original k-mer value
5548                    kmer_is_dir_after_swap, // Inverted due to swap_dir_rc()
5549                    &segment_data_rc,       // SWAPPED: RC first (matches C++ AGC segment_rc param)
5550                    &segment.data, // SWAPPED: Original second (matches C++ AGC segment param)
5551                    &map_segments_terminators,
5552                    &map_segments,
5553                    &segment_groups,
5554                    &reference_segments,
5555                    &config,
5556                );
5557                // Invert sr to match C++ AGC's store_rc = !store_dir
5558                sr = !sr;
5559
5560                // Fallback: If Case 3b returned MISSING, try fallback minimizers (C++ AGC lines 1347-1359)
5561                // Note: C++ AGC uses segment_rc for fallback in Case 3b
5562                if (kf == MISSING_KMER || kb == MISSING_KMER) && fallback_filter.is_enabled() {
5563                    let (fb_kf, fb_kb, fb_sr) = find_cand_segment_using_fallback_minimizers(
5564                        &segment_data_rc, // Use RC for Case 3b (matches C++ AGC)
5565                        &segment.data,
5566                        config.k,
5567                        5, // min_shared_kmers = 5 for Case 3 (matches C++ AGC)
5568                        &fallback_filter,
5569                        &map_fallback_minimizers,
5570                        &map_segments,
5571                        &segment_groups,
5572                        &reference_segments,
5573                        &config,
5574                    );
5575                    if fb_kf != MISSING_KMER && fb_kb != MISSING_KMER {
5576                        if config.verbosity > 1 {
5577                            #[cfg(feature = "verbose_debug")]
5578                            eprintln!(
5579                                "RAGC_CASE3B_FALLBACK: found ({},{}) rc={}",
5580                                fb_kf, fb_kb, !fb_sr
5581                            );
5582                        }
5583                        kf = fb_kf;
5584                        kb = fb_kb;
5585                        sr = !fb_sr; // C++ AGC: store_rc = !store_dir_alt
5586                    }
5587                }
5588                (kf, kb, sr)
5589            } else {
5590                // Case 1: Both MISSING - try fallback minimizers (C++ AGC lines 1286-1298)
5591                let mut kf = MISSING_KMER;
5592                let mut kb = MISSING_KMER;
5593                let mut sr = false;
5594
5595                if fallback_filter.is_enabled() {
5596                    let (fb_kf, fb_kb, fb_sr) = find_cand_segment_using_fallback_minimizers(
5597                        &segment.data,
5598                        &segment_data_rc,
5599                        config.k,
5600                        1, // min_shared_kmers = 1 for Case 1 (matches C++ AGC line 1293)
5601                        &fallback_filter,
5602                        &map_fallback_minimizers,
5603                        &map_segments,
5604                        &segment_groups,
5605                        &reference_segments,
5606                        &config,
5607                    );
5608                    if fb_kf != MISSING_KMER && fb_kb != MISSING_KMER {
5609                        if config.verbosity > 1 {
5610                            #[cfg(feature = "verbose_debug")]
5611                            eprintln!(
5612                                "RAGC_CASE1_FALLBACK: sample={} found ({},{}) rc={} len={}",
5613                                task.sample_name,
5614                                fb_kf,
5615                                fb_kb,
5616                                fb_sr,
5617                                segment.data.len()
5618                            );
5619                        }
5620                        kf = fb_kf;
5621                        kb = fb_kb;
5622                        sr = fb_sr;
5623                    }
5624                }
5625
5626                (kf, kb, sr)
5627            };
5628
5629            // Create grouping key from normalized k-mers
5630            // For raw segments (both k-mers MISSING), use the same key for all
5631            // This matches C++ AGC: map_segments[make_pair(~0ull, ~0ull)] = 0
5632            // All raw segments share the same grouping key and will be assigned to the same group
5633            let key = SegmentGroupKey {
5634                kmer_front: key_front,
5635                kmer_back: key_back,
5636            };
5637
5638            // Reverse complement data if needed (matching C++ AGC lines 1315-1316, 1320-1321)
5639            let segment_data = if should_reverse {
5640                segment
5641                    .data
5642                    .iter()
5643                    .rev()
5644                    .map(|&base| {
5645                        match base {
5646                            0 => 3,    // A -> T
5647                            1 => 2,    // C -> G
5648                            2 => 1,    // G -> C
5649                            3 => 0,    // T -> A
5650                            _ => base, // N or other non-ACGT
5651                        }
5652                    })
5653                    .collect()
5654            } else {
5655                segment.data.clone()
5656            };
5657
5658            // NOTE: Split check must happen BEFORE creating BufferedSegment
5659            // to avoid moving segment_data prematurely
5660            // PERF: Lock deferred - normal path doesn't need segment_groups lock
5661            {
5662                // Phase 1: Check if group already exists
5663                // (matches C++ AGC: seg_map_mtx.lock() then find at line 1020)
5664                let key_exists = {
5665                    let seg_map = map_segments.read().unwrap();
5666                    seg_map.contains_key(&key)
5667                };
5668
5669                // Phase 2: Try to split
5670                // C++ AGC only attempts splits when key doesn't exist (agc_compressor.cpp:1367)
5671                // This is the condition: p == map_segments.end() && both k-mers valid && both in terminators
5672                // Set RAGC_SPLIT_ALL=1 to try splitting even when key exists (experimental)
5673                // CRITICAL: C++ AGC lines 1374-1378 skip segment splitting when front == back!
5674                // When front == back, it just sets store_rc based on orientation, does NOT call
5675                // find_cand_segment_with_missing_middle_splitter. We must do the same.
5676                let split_allowed = if crate::env_cache::split_all() {
5677                    true
5678                } else {
5679                    !key_exists
5680                };
5681
5682                // Debug: trace split decision
5683                if crate::env_cache::debug_split()
5684                    && task.contig_name.contains("chrVII")
5685                    && place >= 2
5686                    && place <= 5
5687                {
5688                    eprintln!("RAGC_SPLIT_CHECK: contig={} seg={} key=({},{}) key_exists={} split_allowed={} front_missing={} back_missing={} front==back={}",
5689                        task.contig_name, place, key_front, key_back, key_exists, split_allowed,
5690                        key_front == MISSING_KMER, key_back == MISSING_KMER, key_front == key_back);
5691                }
5692
5693                if split_allowed
5694                    && key_front != MISSING_KMER
5695                    && key_back != MISSING_KMER
5696                    && key_front != key_back
5697                {
5698                    // CRITICAL: First attempt to find middle splitter
5699                    // Use ONLY global terminators (not batch-local) to match C++ AGC behavior
5700                    // C++ AGC only sees terminators from previous batches, not the current one
5701                    let middle_kmer_opt = {
5702                        let terminators = map_segments_terminators.read().unwrap();
5703                        let result = find_middle_splitter(key_front, key_back, &terminators);
5704                        // Debug: trace middle splitter result
5705                        if crate::env_cache::debug_split()
5706                            && task.contig_name.contains("chrVII")
5707                            && place >= 2
5708                            && place <= 5
5709                        {
5710                            let front_conn =
5711                                terminators.get(&key_front).map(|v| v.len()).unwrap_or(0);
5712                            let back_conn =
5713                                terminators.get(&key_back).map(|v| v.len()).unwrap_or(0);
5714                            eprintln!("RAGC_SPLIT_MIDDLE: contig={} seg={} key=({},{}) middle={:?} front_conn={} back_conn={}",
5715                                task.contig_name, place, key_front, key_back, result, front_conn, back_conn);
5716                        }
5717                        result
5718                    };
5719
5720                    #[cfg(feature = "verbose_debug")]
5721                    if config.verbosity > 0 {
5722                        if middle_kmer_opt.is_some() {
5723                            eprintln!(
5724                                "DEBUG_SPLIT: Found middle k-mer for ({},{}) sample={}",
5725                                key_front, key_back, task.sample_name
5726                            );
5727                        } else if config.verbosity > 1 {
5728                            eprintln!(
5729                                "SPLIT_NO_MIDDLE: ({},{}) sample={} place={} should_reverse={}",
5730                                key_front, key_back, task.sample_name, place, should_reverse
5731                            );
5732                        }
5733                    }
5734
5735                    if let Some(middle_kmer) = middle_kmer_opt {
5736                        // Found potential middle k-mer
5737                        // Now check if BOTH split groups already exist in map_segments
5738                        // (This is the key difference from just checking terminators!)
5739
5740                        // Debug: trace middle found
5741                        if crate::env_cache::debug_split() {
5742                            eprintln!(
5743                                "RAGC_SPLIT_FOUND_MIDDLE: contig={} seg={} middle={}",
5744                                task.contig_name, place, middle_kmer
5745                            );
5746                        }
5747
5748                        let left_key = if key_front <= middle_kmer {
5749                            SegmentGroupKey {
5750                                kmer_front: key_front,
5751                                kmer_back: middle_kmer,
5752                            }
5753                        } else {
5754                            SegmentGroupKey {
5755                                kmer_front: middle_kmer,
5756                                kmer_back: key_front,
5757                            }
5758                        };
5759
5760                        let right_key = if middle_kmer <= key_back {
5761                            SegmentGroupKey {
5762                                kmer_front: middle_kmer,
5763                                kmer_back: key_back,
5764                            }
5765                        } else {
5766                            SegmentGroupKey {
5767                                kmer_front: key_back,
5768                                kmer_back: middle_kmer,
5769                            }
5770                        };
5771
5772                        // CRITICAL: C++ AGC requires BOTH target groups to exist in map_segments
5773                        // at split decision time (agc_compressor.cpp lines 1472, 1486 use .at() which throws)
5774                        // If either group doesn't exist, C++ AGC aborts the split.
5775                        // We must check map_segments (global), not batch_local_groups, to match C++ behavior.
5776                        let (left_exists, right_exists) = {
5777                            let global_map = map_segments.read().unwrap();
5778                            (
5779                                global_map.contains_key(&left_key),
5780                                global_map.contains_key(&right_key),
5781                            )
5782                        };
5783
5784                        // EXPERIMENTAL: Allow split even when groups don't exist
5785                        // Set RAGC_SPLIT_CREATE_GROUPS=1 to enable creating new groups during split
5786                        // This is needed for streaming mode where non-reference samples may create
5787                        // new segment groups that the reference sample didn't have.
5788                        let allow_create_groups = crate::env_cache::split_create_groups();
5789
5790                        if !left_exists || !right_exists {
5791                            // Skip split - one or both target groups don't exist yet
5792                            // This matches C++ AGC behavior where .at() would throw
5793                            // UNLESS we're in experimental mode where we allow creating groups
5794                            if config.verbosity > 1 {
5795                                eprintln!(
5796                                    "SPLIT_SKIP_NO_GROUP: left_key=({},{}) exists={} right_key=({},{}) exists={} allow_create={}",
5797                                    left_key.kmer_front, left_key.kmer_back, left_exists,
5798                                    right_key.kmer_front, right_key.kmer_back, right_exists, allow_create_groups
5799                                );
5800                            }
5801                            if crate::env_cache::debug_split() {
5802                                eprintln!(
5803                                    "RAGC_SPLIT_SKIP_NO_GROUP: left=({},{}) exists={} right=({},{}) exists={} allow_create={}",
5804                                    left_key.kmer_front, left_key.kmer_back, left_exists,
5805                                    right_key.kmer_front, right_key.kmer_back, right_exists, allow_create_groups
5806                                );
5807                            }
5808                            if !allow_create_groups {
5809                                // Don't attempt split - fall through to normal segment processing
5810                            }
5811                        }
5812
5813                        // Proceed with split if groups exist OR if we allow creating groups
5814                        if (left_exists && right_exists) || allow_create_groups {
5815                            // Both groups exist - proceed with split cost calculation
5816                            #[cfg(feature = "verbose_debug")]
5817                            if config.verbosity > 0 {
5818                                eprintln!("DEBUG_SPLIT: Attempting cost-based split for ({},{}) sample={}",
5819                                key_front, key_back, task.sample_name);
5820                            }
5821
5822                            let split_result = try_split_segment_with_cost(
5823                                &segment_data,
5824                                key_front,
5825                                key_back,
5826                                middle_kmer,
5827                                &left_key,
5828                                &right_key,
5829                                &map_segments,
5830                                &map_segments_terminators,
5831                                &reference_segments,
5832                                &config,
5833                                should_reverse,
5834                                allow_create_groups, // Force split at middle k-mer position if refs are empty
5835                            );
5836
5837                            if let Some((left_data, right_data, _mid)) = split_result {
5838                                // PERF: Acquire lock only for split path (rare case)
5839                                // Normal segments bypass this entirely for better parallelism
5840                                let mut groups = segment_groups.lock().unwrap();
5841
5842                                // FIX 27 v4: Compute separate orientations for left and right parts
5843                                // C++ AGC lines 1526-1536 and 1540-1550:
5844                                //   store_rc = (kmer_front.data() >= split_match.first)  -- for left
5845                                //   store2_rc = (split_match.first >= kmer_back.data())  -- for right
5846                                //
5847                                // When should_reverse=true, the segment was RC'd before splitting,
5848                                // so "left" in the split is from original RIGHT, and "right" is from original LEFT.
5849                                // We need to swap the k-mer comparisons accordingly.
5850                                let (left_should_rc, right_should_rc) = if should_reverse {
5851                                    // Segment was RC'd: left is from original right, right is from original left
5852                                    // Swap the k-mer associations
5853                                    let left_should_rc = middle_kmer >= segment.back_kmer; // use back_kmer for "left"
5854                                    let right_should_rc = segment.front_kmer >= middle_kmer; // use front_kmer for "right"
5855                                    (left_should_rc, right_should_rc)
5856                                } else {
5857                                    // Normal: left is from original left, right is from original right
5858                                    let left_should_rc = segment.front_kmer >= middle_kmer;
5859                                    let right_should_rc = middle_kmer >= segment.back_kmer;
5860                                    (left_should_rc, right_should_rc)
5861                                };
5862
5863                                // Transform data if needed: current state is `should_reverse`
5864                                // If target state differs, we RC the data
5865                                let left_data = if left_should_rc != should_reverse {
5866                                    left_data
5867                                        .iter()
5868                                        .rev()
5869                                        .map(|&base| match base {
5870                                            0 => 3,
5871                                            1 => 2,
5872                                            2 => 1,
5873                                            3 => 0,
5874                                            _ => base,
5875                                        })
5876                                        .collect::<Vec<u8>>()
5877                                } else {
5878                                    left_data
5879                                };
5880                                let right_data = if right_should_rc != should_reverse {
5881                                    right_data
5882                                        .iter()
5883                                        .rev()
5884                                        .map(|&base| match base {
5885                                            0 => 3,
5886                                            1 => 2,
5887                                            2 => 1,
5888                                            3 => 0,
5889                                            _ => base,
5890                                        })
5891                                        .collect::<Vec<u8>>()
5892                                } else {
5893                                    right_data
5894                                };
5895
5896                                // Check if this is a degenerate split (one side empty)
5897                                let is_degenerate_left = left_data.is_empty();
5898                                let is_degenerate_right = right_data.is_empty();
5899
5900                                if config.verbosity > 1 {
5901                                    if is_degenerate_right {
5902                                        eprintln!(
5903                                            "SPLIT_DEGENERATE_RIGHT: ({},{}) -> left_only=({},{})",
5904                                            key_front,
5905                                            key_back,
5906                                            left_key.kmer_front,
5907                                            left_key.kmer_back
5908                                        );
5909                                    } else if is_degenerate_left {
5910                                        eprintln!(
5911                                            "SPLIT_DEGENERATE_LEFT: ({},{}) -> right_only=({},{})",
5912                                            key_front,
5913                                            key_back,
5914                                            right_key.kmer_front,
5915                                            right_key.kmer_back
5916                                        );
5917                                    } else {
5918                                        eprintln!(
5919                                            "SPLIT: original=({},{}) -> left=({},{}) right=({},{})",
5920                                            key_front,
5921                                            key_back,
5922                                            left_key.kmer_front,
5923                                            left_key.kmer_back,
5924                                            right_key.kmer_front,
5925                                            right_key.kmer_back
5926                                        );
5927                                    }
5928                                }
5929
5930                                // Determine emission order. By default match C++ logic:
5931                                // - Normal orientation (should_reverse=false): emit left then right
5932                                // - Reversed orientation (should_reverse=true): emit right then left
5933                                // Allow env override for diagnostics:
5934                                //   RAGC_EMIT_ORDER=left  -> force left-first
5935                                //   RAGC_EMIT_ORDER=right -> force right-first
5936                                //   RAGC_EMIT_ORDER=flip  -> invert default
5937                                //   RAGC_EMIT_ORDER=auto  -> default behavior (or if unset)
5938                                let emit_left_first = match std::env::var("RAGC_EMIT_ORDER") {
5939                                    Ok(val) => match val.to_ascii_lowercase().as_str() {
5940                                        "left" | "left-first" => true,
5941                                        "right" | "right-first" => false,
5942                                        "flip" => should_reverse, // invert default (!should_reverse)
5943                                        _ => !should_reverse,     // auto/default
5944                                    },
5945                                    Err(_) => !should_reverse,
5946                                };
5947                                if config.verbosity > 1 {
5948                                    eprintln!(
5949                                    "EMIT_ORDER: should_reverse={} -> emit_left_first={} (env RAGC_EMIT_ORDER)",
5950                                    should_reverse, emit_left_first
5951                                );
5952                                }
5953
5954                                // Optional targeted split trace for a specific (sample, contig, index)
5955                                if let (Ok(ts), Ok(tc), Ok(ti)) = (
5956                                    std::env::var("RAGC_TRACE_SAMPLE"),
5957                                    std::env::var("RAGC_TRACE_CONTIG"),
5958                                    std::env::var("RAGC_TRACE_INDEX").and_then(|s| {
5959                                        s.parse::<usize>()
5960                                            .map_err(|e| std::env::VarError::NotPresent)
5961                                    }),
5962                                ) {
5963                                    if ts == task.sample_name
5964                                        && tc == task.contig_name
5965                                        && ti == place
5966                                    {
5967                                        // Derive seg2_start from lengths (robust for both FFI and local mapping)
5968                                        let seg_len = segment_data.len();
5969                                        let right_len = right_data.len();
5970                                        let left_len = left_data.len();
5971                                        let seg2_start_derived = seg_len.saturating_sub(right_len);
5972                                        let left_end_derived = seg2_start_derived
5973                                            .saturating_add(config.k)
5974                                            .min(seg_len);
5975                                        eprintln!(
5976                                        "TRACE_SPLIT: {}/{} idx={} rev={} emit_left_first={} degL={} degR={} seg2_start={} left_end={} left_len={} right_len={}",
5977                                        task.sample_name, task.contig_name, place, should_reverse, emit_left_first,
5978                                        is_degenerate_left, is_degenerate_right, seg2_start_derived, left_end_derived, left_len, right_len
5979                                    );
5980                                    }
5981                                }
5982
5983                                // Emit in correct contig order
5984                                if emit_left_first {
5985                                    // left first
5986                                    if !is_degenerate_left {
5987                                        let left_buffer =
5988                                            groups.entry(left_key.clone()).or_insert_with(|| {
5989                                                // OPTIMIZATION: Read-check-write pattern to reduce lock contention
5990                                                // First check with read lock (fast path - most groups already exist)
5991                                                let group_id = {
5992                                                    let global_map = map_segments.read().unwrap();
5993                                                    if let Some(&existing_id) =
5994                                                        global_map.get(&left_key)
5995                                                    {
5996                                                        existing_id
5997                                                    } else {
5998                                                        drop(global_map);
5999                                                        // Group doesn't exist - upgrade to write lock
6000                                                        let mut global_map =
6001                                                            map_segments.write().unwrap();
6002                                                        // Double-check after acquiring write lock (race condition)
6003                                                        if let Some(&existing_id) =
6004                                                            global_map.get(&left_key)
6005                                                        {
6006                                                            existing_id
6007                                                        } else {
6008                                                            // Create new group ID and register IMMEDIATELY to global map
6009                                                            let new_id = group_counter
6010                                                                .fetch_add(1, Ordering::SeqCst);
6011                                                            global_map
6012                                                                .insert(left_key.clone(), new_id);
6013                                                            drop(global_map);
6014                                                            // Also register to batch-local for flush tracking
6015                                                            let mut batch_map =
6016                                                                batch_local_groups.lock().unwrap();
6017                                                            batch_map
6018                                                                .insert(left_key.clone(), new_id);
6019                                                            new_id
6020                                                        }
6021                                                    }
6022                                                };
6023                                                // Register with FFI engine
6024                                                #[cfg(feature = "cpp_agc")]
6025                                                if left_key.kmer_front != MISSING_KMER
6026                                                    && left_key.kmer_back != MISSING_KMER
6027                                                {
6028                                                    let mut eng = grouping_engine.lock().unwrap();
6029                                                    eng.register_group(
6030                                                        left_key.kmer_front,
6031                                                        left_key.kmer_back,
6032                                                        group_id,
6033                                                    );
6034                                                }
6035                                                // Update GLOBAL terminators map IMMEDIATELY (matches C++ AGC)
6036                                                if left_key.kmer_front != MISSING_KMER
6037                                                    && left_key.kmer_back != MISSING_KMER
6038                                                {
6039                                                    let mut term_map =
6040                                                        map_segments_terminators.write().unwrap();
6041                                                    term_map
6042                                                        .entry(left_key.kmer_front)
6043                                                        .or_insert_with(Vec::new)
6044                                                        .push(left_key.kmer_back);
6045                                                    if left_key.kmer_front != left_key.kmer_back {
6046                                                        term_map
6047                                                            .entry(left_key.kmer_back)
6048                                                            .or_insert_with(Vec::new)
6049                                                            .push(left_key.kmer_front);
6050                                                    }
6051                                                    if let Some(front_vec) =
6052                                                        term_map.get_mut(&left_key.kmer_front)
6053                                                    {
6054                                                        front_vec.sort_unstable();
6055                                                        front_vec.dedup();
6056                                                    }
6057                                                    if left_key.kmer_front != left_key.kmer_back {
6058                                                        if let Some(back_vec) =
6059                                                            term_map.get_mut(&left_key.kmer_back)
6060                                                        {
6061                                                            back_vec.sort_unstable();
6062                                                            back_vec.dedup();
6063                                                        }
6064                                                    }
6065                                                }
6066                                                // Register streams for this group
6067                                                let archive_version = ragc_common::AGC_FILE_MAJOR
6068                                                    * 1000
6069                                                    + ragc_common::AGC_FILE_MINOR;
6070                                                let delta_stream_name =
6071                                                    ragc_common::stream_delta_name(
6072                                                        archive_version,
6073                                                        group_id,
6074                                                    );
6075                                                let ref_stream_name = ragc_common::stream_ref_name(
6076                                                    archive_version,
6077                                                    group_id,
6078                                                );
6079                                                let mut arch = archive.lock().unwrap();
6080                                                let stream_id =
6081                                                    arch.register_stream(&delta_stream_name);
6082                                                let ref_stream_id =
6083                                                    arch.register_stream(&ref_stream_name);
6084                                                drop(arch);
6085                                                SegmentGroupBuffer::new(
6086                                                    group_id,
6087                                                    stream_id,
6088                                                    ref_stream_id,
6089                                                )
6090                                            });
6091                                        // FIX 27 v4: Use left_should_rc instead of should_reverse
6092                                        let (fixed_left_data, fixed_left_rc) =
6093                                            fix_orientation_for_group(
6094                                                &left_data,
6095                                                left_should_rc,
6096                                                &left_key,
6097                                                &map_segments,
6098                                                &batch_local_groups,
6099                                                &reference_orientations,
6100                                            );
6101                                        let left_buffered = BufferedSegment {
6102                                            sample_name: task.sample_name.clone(),
6103                                            contig_name: task.contig_name.clone(),
6104                                            seg_part_no: place,
6105                                            data: fixed_left_data,
6106                                            is_rev_comp: fixed_left_rc,
6107                                            sample_priority: task.sample_priority,
6108                                        };
6109                                        left_buffer.segments.push(left_buffered);
6110                                        // Flush pack if full (matches C++ AGC write-as-you-go behavior)
6111                                        if left_buffer.should_flush_pack(config.pack_size) {
6112                                            flush_pack(
6113                                                left_buffer,
6114                                                &collection,
6115                                                &archive,
6116                                                &config,
6117                                                &reference_segments,
6118                                            )
6119                                            .context("Failed to flush left pack")?;
6120                                        }
6121                                    }
6122                                    if !is_degenerate_right {
6123                                        let right_buffer =
6124                                            groups.entry(right_key.clone()).or_insert_with(|| {
6125                                                // OPTIMIZATION: Read-check-write pattern to reduce lock contention
6126                                                // First check with read lock (fast path - most groups already exist)
6127                                                let group_id = {
6128                                                    let global_map = map_segments.read().unwrap();
6129                                                    if let Some(&existing_id) =
6130                                                        global_map.get(&right_key)
6131                                                    {
6132                                                        existing_id
6133                                                    } else {
6134                                                        drop(global_map);
6135                                                        // Group doesn't exist - upgrade to write lock
6136                                                        let mut global_map =
6137                                                            map_segments.write().unwrap();
6138                                                        // Double-check after acquiring write lock (race condition)
6139                                                        if let Some(&existing_id) =
6140                                                            global_map.get(&right_key)
6141                                                        {
6142                                                            existing_id
6143                                                        } else {
6144                                                            // Create new group ID and register IMMEDIATELY to global map
6145                                                            let new_id = group_counter
6146                                                                .fetch_add(1, Ordering::SeqCst);
6147                                                            global_map
6148                                                                .insert(right_key.clone(), new_id);
6149                                                            drop(global_map);
6150                                                            // Also register to batch-local for flush tracking
6151                                                            let mut batch_map =
6152                                                                batch_local_groups.lock().unwrap();
6153                                                            batch_map
6154                                                                .insert(right_key.clone(), new_id);
6155                                                            new_id
6156                                                        }
6157                                                    }
6158                                                };
6159                                                // Register with FFI engine
6160                                                #[cfg(feature = "cpp_agc")]
6161                                                if right_key.kmer_front != MISSING_KMER
6162                                                    && right_key.kmer_back != MISSING_KMER
6163                                                {
6164                                                    let mut eng = grouping_engine.lock().unwrap();
6165                                                    eng.register_group(
6166                                                        right_key.kmer_front,
6167                                                        right_key.kmer_back,
6168                                                        group_id,
6169                                                    );
6170                                                }
6171                                                // Update GLOBAL terminators map IMMEDIATELY (matches C++ AGC)
6172                                                if right_key.kmer_front != MISSING_KMER
6173                                                    && right_key.kmer_back != MISSING_KMER
6174                                                {
6175                                                    let mut term_map =
6176                                                        map_segments_terminators.write().unwrap();
6177                                                    term_map
6178                                                        .entry(right_key.kmer_front)
6179                                                        .or_insert_with(Vec::new)
6180                                                        .push(right_key.kmer_back);
6181                                                    if right_key.kmer_front != right_key.kmer_back {
6182                                                        term_map
6183                                                            .entry(right_key.kmer_back)
6184                                                            .or_insert_with(Vec::new)
6185                                                            .push(right_key.kmer_front);
6186                                                    }
6187                                                    if let Some(front_vec) =
6188                                                        term_map.get_mut(&right_key.kmer_front)
6189                                                    {
6190                                                        front_vec.sort_unstable();
6191                                                        front_vec.dedup();
6192                                                    }
6193                                                    if right_key.kmer_front != right_key.kmer_back {
6194                                                        if let Some(back_vec) =
6195                                                            term_map.get_mut(&right_key.kmer_back)
6196                                                        {
6197                                                            back_vec.sort_unstable();
6198                                                            back_vec.dedup();
6199                                                        }
6200                                                    }
6201                                                }
6202                                                // Register streams for this group
6203                                                let archive_version = ragc_common::AGC_FILE_MAJOR
6204                                                    * 1000
6205                                                    + ragc_common::AGC_FILE_MINOR;
6206                                                let delta_stream_name =
6207                                                    ragc_common::stream_delta_name(
6208                                                        archive_version,
6209                                                        group_id,
6210                                                    );
6211                                                let ref_stream_name = ragc_common::stream_ref_name(
6212                                                    archive_version,
6213                                                    group_id,
6214                                                );
6215                                                let mut arch = archive.lock().unwrap();
6216                                                let stream_id =
6217                                                    arch.register_stream(&delta_stream_name);
6218                                                let ref_stream_id =
6219                                                    arch.register_stream(&ref_stream_name);
6220                                                drop(arch);
6221                                                SegmentGroupBuffer::new(
6222                                                    group_id,
6223                                                    stream_id,
6224                                                    ref_stream_id,
6225                                                )
6226                                            });
6227                                        let seg_part =
6228                                            if is_degenerate_left { place } else { place + 1 };
6229                                        // FIX 27 v4: Use right_should_rc instead of should_reverse
6230                                        let (fixed_right_data, fixed_right_rc) =
6231                                            fix_orientation_for_group(
6232                                                &right_data,
6233                                                right_should_rc,
6234                                                &right_key,
6235                                                &map_segments,
6236                                                &batch_local_groups,
6237                                                &reference_orientations,
6238                                            );
6239                                        let right_buffered = BufferedSegment {
6240                                            sample_name: task.sample_name.clone(),
6241                                            contig_name: task.contig_name.clone(),
6242                                            seg_part_no: seg_part,
6243                                            data: fixed_right_data,
6244                                            is_rev_comp: fixed_right_rc,
6245                                            sample_priority: task.sample_priority,
6246                                        };
6247                                        right_buffer.segments.push(right_buffered);
6248                                        // Flush pack if full (matches C++ AGC write-as-you-go behavior)
6249                                        if right_buffer.should_flush_pack(config.pack_size) {
6250                                            flush_pack(
6251                                                right_buffer,
6252                                                &collection,
6253                                                &archive,
6254                                                &config,
6255                                                &reference_segments,
6256                                            )
6257                                            .context("Failed to flush right pack")?;
6258                                        }
6259                                    }
6260                                } else {
6261                                    // reversed: right first
6262                                    if !is_degenerate_right {
6263                                        let right_buffer = groups.entry(right_key.clone()).or_insert_with(|| {
6264                                        // BATCH-LOCAL: Check global first, then batch-local (group must exist from earlier)
6265                                        let group_id = {
6266                                            let global_map = map_segments.read().unwrap();
6267                                            if let Some(&id) = global_map.get(&right_key) {
6268                                                id
6269                                            } else {
6270                                                drop(global_map);
6271                                                let batch_map = batch_local_groups.lock().unwrap();
6272                                                *batch_map.get(&right_key).expect("Split right group must exist in batch_local_groups or map_segments")
6273                                            }
6274                                        };
6275                                        let archive_version = ragc_common::AGC_FILE_MAJOR * 1000 + ragc_common::AGC_FILE_MINOR;
6276                                        let delta_stream_name = ragc_common::stream_delta_name(archive_version, group_id);
6277                                        let ref_stream_name = ragc_common::stream_ref_name(archive_version, group_id);
6278                                        let mut arch = archive.lock().unwrap();
6279                                        let stream_id = arch.register_stream(&delta_stream_name);
6280                                        let ref_stream_id = arch.register_stream(&ref_stream_name);
6281                                        drop(arch);
6282                                        SegmentGroupBuffer::new(group_id, stream_id, ref_stream_id)
6283                                    });
6284                                        // FIX 27 v4: Use right_should_rc instead of should_reverse
6285                                        let (fixed_right_data, fixed_right_rc) =
6286                                            fix_orientation_for_group(
6287                                                &right_data,
6288                                                right_should_rc,
6289                                                &right_key,
6290                                                &map_segments,
6291                                                &batch_local_groups,
6292                                                &reference_orientations,
6293                                            );
6294                                        let right_buffered = BufferedSegment {
6295                                            sample_name: task.sample_name.clone(),
6296                                            contig_name: task.contig_name.clone(),
6297                                            seg_part_no: place,
6298                                            data: fixed_right_data,
6299                                            is_rev_comp: fixed_right_rc,
6300                                            sample_priority: task.sample_priority,
6301                                        };
6302                                        right_buffer.segments.push(right_buffered);
6303                                        // Flush pack if full (matches C++ AGC write-as-you-go behavior)
6304                                        if right_buffer.should_flush_pack(config.pack_size) {
6305                                            flush_pack(
6306                                                right_buffer,
6307                                                &collection,
6308                                                &archive,
6309                                                &config,
6310                                                &reference_segments,
6311                                            )
6312                                            .context("Failed to flush right pack")?;
6313                                        }
6314                                    }
6315                                    if !is_degenerate_left {
6316                                        let left_buffer = groups.entry(left_key.clone()).or_insert_with(|| {
6317                                        // BATCH-LOCAL: Check global first, then batch-local (group must exist from earlier)
6318                                        let group_id = {
6319                                            let global_map = map_segments.read().unwrap();
6320                                            if let Some(&id) = global_map.get(&left_key) {
6321                                                id
6322                                            } else {
6323                                                drop(global_map);
6324                                                let batch_map = batch_local_groups.lock().unwrap();
6325                                                *batch_map.get(&left_key).expect("Split left group must exist in batch_local_groups or map_segments")
6326                                            }
6327                                        };
6328                                        let archive_version = ragc_common::AGC_FILE_MAJOR * 1000 + ragc_common::AGC_FILE_MINOR;
6329                                        let delta_stream_name = ragc_common::stream_delta_name(archive_version, group_id);
6330                                        let ref_stream_name = ragc_common::stream_ref_name(archive_version, group_id);
6331                                        let mut arch = archive.lock().unwrap();
6332                                        let stream_id = arch.register_stream(&delta_stream_name);
6333                                        let ref_stream_id = arch.register_stream(&ref_stream_name);
6334                                        drop(arch);
6335                                        SegmentGroupBuffer::new(group_id, stream_id, ref_stream_id)
6336                                    });
6337                                        let seg_part = if is_degenerate_right {
6338                                            place
6339                                        } else {
6340                                            place + 1
6341                                        };
6342                                        // FIX 27 v4: Use left_should_rc instead of should_reverse
6343                                        let (fixed_left_data, fixed_left_rc) =
6344                                            fix_orientation_for_group(
6345                                                &left_data,
6346                                                left_should_rc,
6347                                                &left_key,
6348                                                &map_segments,
6349                                                &batch_local_groups,
6350                                                &reference_orientations,
6351                                            );
6352                                        let left_buffered = BufferedSegment {
6353                                            sample_name: task.sample_name.clone(),
6354                                            contig_name: task.contig_name.clone(),
6355                                            seg_part_no: seg_part,
6356                                            data: fixed_left_data,
6357                                            is_rev_comp: fixed_left_rc,
6358                                            sample_priority: task.sample_priority,
6359                                        };
6360                                        left_buffer.segments.push(left_buffered);
6361                                        // Flush pack if full (matches C++ AGC write-as-you-go behavior)
6362                                        if left_buffer.should_flush_pack(config.pack_size) {
6363                                            flush_pack(
6364                                                left_buffer,
6365                                                &collection,
6366                                                &archive,
6367                                                &config,
6368                                                &reference_segments,
6369                                            )
6370                                            .context("Failed to flush left pack")?;
6371                                        }
6372                                    }
6373                                }
6374
6375                                // Optional: assert lengths vs C++ archive if provided
6376                                if let Some(assert_path) = crate::env_cache::assert_cpp_archive() {
6377                                    use crate::{Decompressor, DecompressorConfig};
6378                                    let mut dec = match Decompressor::open(
6379                                        &assert_path,
6380                                        DecompressorConfig { verbosity: 0 },
6381                                    ) {
6382                                        Ok(d) => d,
6383                                        Err(_) => {
6384                                            if config.verbosity > 1 {
6385                                                eprintln!(
6386                                                    "ASSERT_SKIP: cannot open {}",
6387                                                    assert_path
6388                                                );
6389                                            }
6390                                            return Ok(());
6391                                        }
6392                                    };
6393                                    if let Ok(all) = dec.get_all_segments() {
6394                                        if let Some((_, _, segs)) =
6395                                            all.into_iter().find(|(s, c, _)| {
6396                                                *s == task.sample_name && *c == task.contig_name
6397                                            })
6398                                        {
6399                                            // Compute our emitted lens and expected lens at indices
6400                                            let mut checks: Vec<(usize, usize)> = Vec::new();
6401                                            if emit_left_first {
6402                                                if !is_degenerate_left {
6403                                                    checks.push((place, left_data.len()));
6404                                                }
6405                                                if !is_degenerate_right {
6406                                                    checks.push((
6407                                                        if is_degenerate_left {
6408                                                            place
6409                                                        } else {
6410                                                            place + 1
6411                                                        },
6412                                                        right_data.len(),
6413                                                    ));
6414                                                }
6415                                            } else {
6416                                                if !is_degenerate_right {
6417                                                    checks.push((place, right_data.len()));
6418                                                }
6419                                                if !is_degenerate_left {
6420                                                    checks.push((
6421                                                        if is_degenerate_right {
6422                                                            place
6423                                                        } else {
6424                                                            place + 1
6425                                                        },
6426                                                        left_data.len(),
6427                                                    ));
6428                                                }
6429                                            }
6430
6431                                            // Derive segmentation geometry for detailed diagnostics
6432                                            let seg_len = segment_data.len();
6433                                            let right_len = right_data.len();
6434                                            let left_len = left_data.len();
6435                                            let seg2_start_derived =
6436                                                seg_len.saturating_sub(right_len);
6437                                            let left_end_derived = seg2_start_derived
6438                                                .saturating_add(config.k)
6439                                                .min(seg_len);
6440                                            let emit_idx_left = if emit_left_first {
6441                                                place
6442                                            } else {
6443                                                if is_degenerate_right {
6444                                                    place
6445                                                } else {
6446                                                    place + 1
6447                                                }
6448                                            };
6449                                            let emit_idx_right = if emit_left_first {
6450                                                if is_degenerate_left {
6451                                                    place
6452                                                } else {
6453                                                    place + 1
6454                                                }
6455                                            } else {
6456                                                place
6457                                            };
6458
6459                                            for (idx, got) in checks {
6460                                                if idx < segs.len() {
6461                                                    let exp = segs[idx].raw_length as usize;
6462                                                    if exp != got {
6463                                                        eprintln!("ASSERT_LEN_MISMATCH: {}/{} idx={} got={} exp={} keys L=({:#x},{:#x}) R=({:#x},{:#x})",
6464                                                        task.sample_name, task.contig_name, idx, got, exp,
6465                                                        left_key.kmer_front, left_key.kmer_back,
6466                                                        right_key.kmer_front, right_key.kmer_back);
6467                                                        // Extended context (guarded by env to limit noise)
6468                                                        if crate::env_cache::assert_verbose() {
6469                                                            eprintln!("  CONTEXT: place={} orig_place={} emit_left_first={} should_reverse={}",
6470                                                            place, original_place, emit_left_first, should_reverse);
6471                                                            eprintln!("  GEOM: seg_len={} left_len={} right_len={} seg2_start={} left_end={}",
6472                                                            seg_len, left_len, right_len, seg2_start_derived, left_end_derived);
6473                                                            eprintln!("  EMIT_IDX: left_at={} right_at={}", emit_idx_left, emit_idx_right);
6474                                                        }
6475                                                    }
6476                                                } else {
6477                                                    eprintln!(
6478                                                        "ASSERT_IDX_OOB: {}/{} idx={} (segs={})",
6479                                                        task.sample_name,
6480                                                        task.contig_name,
6481                                                        idx,
6482                                                        segs.len()
6483                                                    );
6484                                                }
6485                                            }
6486                                        }
6487                                    }
6488                                }
6489
6490                                // Record this split so subsequent segments from this contig get shifted
6491                                // (matches C++ AGC lines 2033-2036: ++seg_part_no twice when split)
6492                                // For degenerate splits, only increment once (no actual split)
6493                                if !is_degenerate_left && !is_degenerate_right {
6494                                    // OPTIMIZATION: Track locally for this task AND globally for other workers
6495                                    local_splits.insert(original_place);
6496                                    let mut offsets = split_offsets.lock().unwrap();
6497                                    offsets.insert(
6498                                        (
6499                                            task.sample_name.clone(),
6500                                            task.contig_name.clone(),
6501                                            original_place,
6502                                        ),
6503                                        1,
6504                                    );
6505                                }
6506
6507                                // Skip adding original segment - we've added the split/reclassified segment
6508                                continue;
6509                            }
6510                            // If split_result was None, fall through to normal path
6511                        } // end of else { both groups exist }
6512                    }
6513                }
6514
6515                // Phase 2.5: Secondary fallback attempt (C++ AGC lines 1477-1494)
6516                // If the group doesn't exist yet, try fallback minimizers one more time with min_shared=2
6517                // This helps segments find existing groups that share internal k-mers
6518                let (key, key_front, key_back, should_reverse) = {
6519                    // Re-check if key exists (may have changed since split logic ran)
6520                    let key_exists_now = {
6521                        let seg_map = map_segments.read().unwrap();
6522                        seg_map.contains_key(&key)
6523                    };
6524
6525                    // Debug: count how many segments could be eligible for secondary fallback
6526                    if crate::env_cache::debug_fallback2_enabled() {
6527                        if !key_exists_now
6528                            && key.kmer_front != MISSING_KMER
6529                            && key.kmer_back != MISSING_KMER
6530                        {
6531                            eprintln!(
6532                                "SECONDARY_FB_CANDIDATE: sample={} contig={} place={} key=({},{})",
6533                                task.sample_name,
6534                                task.contig_name,
6535                                place,
6536                                key.kmer_front,
6537                                key.kmer_back
6538                            );
6539                        }
6540                    }
6541
6542                    if !key_exists_now
6543                        && key.kmer_front != MISSING_KMER
6544                        && key.kmer_back != MISSING_KMER
6545                        && fallback_filter.is_enabled()
6546                    {
6547                        // Generate reverse complement for fallback lookup
6548                        let segment_data_rc_fb: Vec<u8> = segment_data
6549                            .iter()
6550                            .rev()
6551                            .map(|&b| if b > 3 { b } else { 3 - b })
6552                            .collect();
6553
6554                        let (fb_kf, fb_kb, fb_sr) = find_cand_segment_using_fallback_minimizers(
6555                            &segment_data,
6556                            &segment_data_rc_fb,
6557                            config.k,
6558                            2, // min_shared_kmers = 2 for secondary fallback (C++ AGC line 1482)
6559                            &fallback_filter,
6560                            &map_fallback_minimizers,
6561                            &map_segments,
6562                            &segment_groups,
6563                            &reference_segments,
6564                            &config,
6565                        );
6566
6567                        if crate::env_cache::debug_fallback2_enabled() {
6568                            if fb_kf == MISSING_KMER || fb_kb == MISSING_KMER {
6569                                eprintln!(
6570                                    "SECONDARY_FB_NO_MATCH: orig_key=({},{})",
6571                                    key.kmer_front, key.kmer_back
6572                                );
6573                            } else {
6574                                eprintln!(
6575                                    "SECONDARY_FB_FOUND: orig=({},{}) found=({},{}) rc={}",
6576                                    key.kmer_front, key.kmer_back, fb_kf, fb_kb, fb_sr
6577                                );
6578                            }
6579                        }
6580
6581                        if fb_kf != MISSING_KMER && fb_kb != MISSING_KMER {
6582                            // Verify the found group actually exists
6583                            let found_key = SegmentGroupKey {
6584                                kmer_front: fb_kf,
6585                                kmer_back: fb_kb,
6586                            };
6587                            let found_exists = {
6588                                let seg_map = map_segments.read().unwrap();
6589                                seg_map.contains_key(&found_key)
6590                            };
6591
6592                            if found_exists {
6593                                if config.verbosity > 1 {
6594                                    eprintln!(
6595                                        "SECONDARY_FALLBACK_SUCCESS: ({},{}) -> ({},{}) sr={}->{}",
6596                                        key_front, key_back, fb_kf, fb_kb, should_reverse, fb_sr
6597                                    );
6598                                }
6599                                (found_key, fb_kf, fb_kb, fb_sr)
6600                            } else {
6601                                // Fallback found k-mers but group doesn't exist - keep original
6602                                (key, key_front, key_back, should_reverse)
6603                            }
6604                        } else {
6605                            // Fallback didn't find anything - keep original
6606                            (key, key_front, key_back, should_reverse)
6607                        }
6608                    } else {
6609                        // Group exists or not eligible for fallback - keep original
6610                        (key, key_front, key_back, should_reverse)
6611                    }
6612                };
6613
6614                // Phase 3: Normal path - add segment to group as-is (group exists, or split failed/impossible)
6615
6616                // FIX 18: Do NOT adjust orientation to match reference - C++ AGC stores each segment
6617                // with its own computed is_rev_comp based on k-mer comparison. Segments in the same
6618                // group can have different is_rev_comp values.
6619                let (final_should_reverse, final_segment_data) = (should_reverse, segment_data);
6620
6621                if config.verbosity > 2 {
6622                    eprintln!(
6623                        "DEFER_SEGMENT: front={} back={} sample={} contig={} place={}",
6624                        key_front, key_back, task.sample_name, task.contig_name, place
6625                    );
6626                }
6627
6628                // PHASE 1 (PARALLEL): Add segment to buffered_seg_part
6629                // Check if group exists (brief read lock on map_segments)
6630                let group_id_opt = {
6631                    let seg_map = map_segments.read().unwrap();
6632                    seg_map.get(&key).copied()
6633                };
6634
6635                if let Some(group_id) = group_id_opt {
6636                    // KNOWN: add to per-group buffer (per-group lock only - PARALLEL)
6637                    buffered_seg_part.add_known(
6638                        group_id,
6639                        BufferedSegment {
6640                            sample_name: task.sample_name.clone(),
6641                            contig_name: task.contig_name.clone(),
6642                            seg_part_no: place,
6643                            data: final_segment_data,
6644                            is_rev_comp: final_should_reverse,
6645                            sample_priority: task.sample_priority,
6646                        },
6647                    );
6648                } else {
6649                    // NEW: add to s_seg_part (brief global lock on BTreeSet)
6650                    buffered_seg_part.add_new(NewSegment {
6651                        kmer_front: key.kmer_front,
6652                        kmer_back: key.kmer_back,
6653                        sample_priority: task.sample_priority,
6654                        sample_name: task.sample_name.clone(),
6655                        contig_name: task.contig_name.clone(),
6656                        seg_part_no: place,
6657                        data: final_segment_data,
6658                        should_reverse: final_should_reverse,
6659                    });
6660                }
6661
6662                // Segment will be handled in flush_batch at barrier synchronization point
6663            }
6664        }
6665
6666        processed_count += 1;
6667    }
6668
6669    Ok(())
6670}
6671
6672// ========== SEGMENT SPLITTING HELPER FUNCTIONS ==========
6673// (Phase 3-6 implementation)
6674
6675/// Phase 3: Find a k-mer that connects both front and back
6676/// Returns the first k-mer that appears in the terminator lists of BOTH front and back
6677/// (matches C++ AGC find_cand_segment_with_missing_middle_splitter lines 1531-1554)
6678fn find_middle_splitter(
6679    front_kmer: u64,
6680    back_kmer: u64,
6681    terminators: &BTreeMap<u64, Vec<u64>>,
6682) -> Option<u64> {
6683    let front_connections = terminators.get(&front_kmer)?;
6684    let back_connections = terminators.get(&back_kmer)?;
6685
6686    #[cfg(feature = "cpp_agc")]
6687    {
6688        if let Some(m) = crate::ragc_ffi::find_middle(front_connections, back_connections) {
6689            return Some(m);
6690        }
6691        if crate::env_cache::debug_split_find() {
6692            eprintln!(
6693                "DEBUG_FIND_MIDDLE_MISS: front={} back={} front_conn={} back_conn={} shared=0",
6694                front_kmer,
6695                back_kmer,
6696                front_connections.len(),
6697                back_connections.len()
6698            );
6699        }
6700        None
6701    }
6702
6703    #[cfg(not(feature = "cpp_agc"))]
6704    {
6705        // Fallback: local set_intersection
6706        let mut i = 0;
6707        let mut j = 0;
6708        while i < front_connections.len() && j < back_connections.len() {
6709            let a = front_connections[i];
6710            let b = back_connections[j];
6711            if a == b {
6712                if a != MISSING_KMER {
6713                    return Some(a);
6714                }
6715                i += 1;
6716                j += 1;
6717            } else if a < b {
6718                i += 1;
6719            } else {
6720                j += 1;
6721            }
6722        }
6723        if crate::env_cache::debug_split_find() {
6724            eprintln!(
6725                "DEBUG_FIND_MIDDLE_MISS: front={} back={} front_conn={} back_conn={} shared=0",
6726                front_kmer,
6727                back_kmer,
6728                front_connections.len(),
6729                back_connections.len()
6730            );
6731            eprintln!(
6732                "  front_connections: {:?}",
6733                &front_connections[..front_connections.len().min(5)]
6734            );
6735            eprintln!(
6736                "  back_connections: {:?}",
6737                &back_connections[..back_connections.len().min(5)]
6738            );
6739        }
6740        None
6741    }
6742}
6743
6744/// Find split position by searching for k-mers from the right reference segment
6745/// This is more robust than searching for a single middle k-mer, as mutations
6746/// may eliminate that specific k-mer but preserve nearby ones.
6747///
6748/// The algorithm:
6749/// 1. Extract the first few k-mers from the right reference segment
6750/// 2. Search for these k-mers in the MIDDLE portion of the current segment
6751/// 3. Return the position closest to the expected split (based on reference proportions)
6752fn find_split_by_kmer_match(segment_data: &[u8], right_ref_data: &[u8], k: usize) -> Option<usize> {
6753    use crate::kmer::{Kmer, KmerMode};
6754    use ahash::AHashSet;
6755
6756    let seg_len = segment_data.len();
6757
6758    // Both parts must be at least this size for a valid split
6759    let min_segment_size = 500; // Minimum 500 bytes per part
6760
6761    if seg_len < 2 * min_segment_size || right_ref_data.len() < k {
6762        return None;
6763    }
6764
6765    // Valid split range: ensure both parts are >= min_segment_size
6766    let min_pos = min_segment_size;
6767    let max_pos = seg_len.saturating_sub(min_segment_size);
6768
6769    if max_pos <= min_pos {
6770        return None;
6771    }
6772
6773    // Extract first N k-mers from the right reference segment
6774    // These are k-mers that should appear at the START of the right part
6775    let num_ref_kmers = 50.min(right_ref_data.len() / 2); // First 50 k-mers or half of ref
6776    let mut ref_kmers: AHashSet<u64> = AHashSet::new();
6777    let mut ref_kmer = Kmer::new(k as u32, KmerMode::Canonical);
6778
6779    for &base in right_ref_data.iter().take(num_ref_kmers + k) {
6780        ref_kmer.insert(base as u64);
6781        if ref_kmer.is_full() {
6782            ref_kmers.insert(ref_kmer.data());
6783        }
6784    }
6785
6786    if ref_kmers.is_empty() {
6787        return None;
6788    }
6789
6790    // Search for k-mers only in the VALID RANGE of the segment
6791    // This ensures we don't create tiny segments
6792    let mut seg_kmer = Kmer::new(k as u32, KmerMode::Canonical);
6793    let mut best_match: Option<usize> = None;
6794
6795    for (pos, &base) in segment_data.iter().enumerate() {
6796        seg_kmer.insert(base as u64);
6797
6798        if seg_kmer.is_full() {
6799            let current_kmer = seg_kmer.data();
6800            let split_pos = pos + 1;
6801
6802            if split_pos >= min_pos && split_pos <= max_pos && ref_kmers.contains(&current_kmer) {
6803                // Found a valid match - return the first one (earliest valid split)
6804                best_match = Some(split_pos);
6805                break;
6806            }
6807        }
6808    }
6809
6810    best_match
6811}
6812
6813/// Result of cost-based split analysis
6814/// Matches C++ AGC's find_cand_segment_with_missing_middle_splitter behavior (lines 1400-1454)
6815#[derive(Debug, Clone, Copy)]
6816enum SplitDecision {
6817    /// Assign entire segment to left group (best_pos == 0)
6818    AssignToLeft,
6819    /// Assign entire segment to right group (best_pos == seg_len)
6820    AssignToRight,
6821    /// Actually split at this position
6822    SplitAt(usize),
6823    /// Cannot determine (refs empty or segment too small)
6824    NoDecision,
6825}
6826
6827/// Find optimal split position using LZ encoding cost
6828/// Matches C++ AGC's find_cand_segment_with_missing_middle_splitter (lines 1502-1621)
6829///
6830/// This computes the LZ encoding cost at every position for both the left and right
6831/// reference segments, then finds the position where the total cost is minimized.
6832///
6833/// # Arguments
6834/// * `segment_dir` - The segment in original (forward) orientation
6835/// * `segment_rc` - The segment in reverse complement orientation
6836/// * `left_ref` - Reference data for the left segment (front_kmer -> middle_kmer)
6837/// * `right_ref` - Reference data for the right segment (middle_kmer -> back_kmer)
6838/// * `kmer_front` - Original front k-mer (not normalized)
6839/// * `kmer_back` - Original back k-mer (not normalized)
6840/// * `middle` - The middle splitter k-mer
6841/// * `k` - K-mer length
6842/// * `min_match_len` - Minimum match length for LZ encoding
6843///
6844/// # Returns
6845/// SplitDecision indicating whether to assign to left, right, or split at position
6846fn find_split_by_cost(
6847    segment_dir: &[u8],
6848    segment_rc: &[u8],
6849    left_ref: &[u8],
6850    right_ref: &[u8],
6851    kmer_front: u64,
6852    kmer_back: u64,
6853    middle: u64,
6854    k: usize,
6855    min_match_len: u32,
6856) -> SplitDecision {
6857    use crate::lz_diff::LZDiff;
6858
6859    let seg_len = segment_dir.len();
6860
6861    // C++ AGC uses kmer_length + 1 as minimum split size
6862    let min_size = k + 1;
6863
6864    // Need enough data on both sides
6865    if seg_len < 2 * min_size || left_ref.is_empty() || right_ref.is_empty() {
6866        return SplitDecision::NoDecision;
6867    }
6868
6869    // Compute left costs (seg1): cost of encoding segment against left reference
6870    // C++ AGC lines 1539-1548: choose orientation based on kmer_front vs middle
6871    let mut lz_left = LZDiff::new(min_match_len);
6872    lz_left.prepare(&left_ref.to_vec());
6873
6874    let left_cumsum: Vec<u32> = if kmer_front < middle {
6875        // C++ AGC line 1540: use segment_dir with prefix_costs=true
6876        let left_costs = lz_left.get_coding_cost_vector(&segment_dir.to_vec(), true);
6877        // Apply partial_sum forward
6878        let mut cumsum_vec = Vec::with_capacity(left_costs.len());
6879        let mut cumsum = 0u32;
6880        for &cost in &left_costs {
6881            cumsum = cumsum.saturating_add(cost);
6882            cumsum_vec.push(cumsum);
6883        }
6884        cumsum_vec
6885    } else {
6886        // C++ AGC lines 1543-1545: use segment_rc with prefix_costs=false
6887        // IMPORTANT: reverse the COSTS first, then partial_sum
6888        let mut left_costs = lz_left.get_coding_cost_vector(&segment_rc.to_vec(), false);
6889        // Reverse the costs BEFORE partial_sum (C++ line 1544)
6890        left_costs.reverse();
6891        // Apply partial_sum forward (C++ line 1547)
6892        let mut cumsum_vec = Vec::with_capacity(left_costs.len());
6893        let mut cumsum = 0u32;
6894        for &cost in &left_costs {
6895            cumsum = cumsum.saturating_add(cost);
6896            cumsum_vec.push(cumsum);
6897        }
6898        cumsum_vec
6899    };
6900
6901    // Compute right costs (seg2): cost of encoding segment against right reference
6902    // C++ AGC lines 1563-1573: choose orientation based on middle vs kmer_back
6903    let mut lz_right = LZDiff::new(min_match_len);
6904    lz_right.prepare(&right_ref.to_vec());
6905
6906    let right_cumsum: Vec<u32> = if middle < kmer_back {
6907        // C++ AGC lines 1565-1566: use segment_dir with prefix_costs=false
6908        // then partial_sum in reverse direction
6909        let right_costs = lz_right.get_coding_cost_vector(&segment_dir.to_vec(), false);
6910        // Apply partial_sum from the right (reverse direction)
6911        let mut cumsum_vec = vec![0u32; right_costs.len()];
6912        let mut cumsum = 0u32;
6913        for (i, &cost) in right_costs.iter().enumerate().rev() {
6914            cumsum = cumsum.saturating_add(cost);
6915            cumsum_vec[i] = cumsum;
6916        }
6917        cumsum_vec
6918    } else {
6919        // C++ AGC lines 1570-1572: use segment_rc with prefix_costs=true
6920        // partial_sum forward, then reverse
6921        let right_costs = lz_right.get_coding_cost_vector(&segment_rc.to_vec(), true);
6922        // Apply partial_sum forward
6923        let mut cumsum_vec = Vec::with_capacity(right_costs.len());
6924        let mut cumsum = 0u32;
6925        for &cost in &right_costs {
6926            cumsum = cumsum.saturating_add(cost);
6927            cumsum_vec.push(cumsum);
6928        }
6929        // Reverse the cumulative sums
6930        cumsum_vec.reverse();
6931        cumsum_vec
6932    };
6933
6934    // Find position with minimum combined cost
6935    // C++ AGC lines 1606-1614: loop over ALL positions, not just valid split range
6936    let mut best_sum = u32::MAX;
6937    let mut best_pos = 0usize;
6938
6939    // IMPORTANT: C++ AGC loops from 0 to size(), then post-processes
6940    // We must do the same to get equivalent AssignToLeft/AssignToRight decisions
6941    let cost_len = left_cumsum.len().min(right_cumsum.len());
6942    for i in 0..cost_len {
6943        let cs = left_cumsum[i].saturating_add(right_cumsum[i]);
6944        if cs < best_sum {
6945            best_sum = cs;
6946            best_pos = i;
6947        }
6948    }
6949
6950    // Post-process: if best_pos is too close to edges, set to 0 or seg_len
6951    // C++ AGC lines 1616-1619
6952    if best_pos < min_size {
6953        best_pos = 0;
6954    }
6955    if best_pos + min_size > seg_len {
6956        best_pos = seg_len;
6957    }
6958
6959    // Return decision based on best_pos
6960    // C++ AGC lines 1400-1454: left_size==0 means assign to right, right_size==0 means assign to left
6961    if best_pos == 0 {
6962        SplitDecision::AssignToRight
6963    } else if best_pos >= seg_len {
6964        SplitDecision::AssignToLeft
6965    } else {
6966        SplitDecision::SplitAt(best_pos)
6967    }
6968}
6969
6970/// Phase 4: Find split position by scanning for middle k-mer
6971/// Scans the segment to find where the middle k-mer actually occurs
6972/// Returns the split position (in bytes) at the END of the middle k-mer
6973#[allow(dead_code)]
6974fn find_split_position(
6975    segment_data: &[u8],
6976    middle_kmer: u64,
6977    segment_len: usize,
6978    k: usize,
6979) -> Option<usize> {
6980    use crate::kmer::{Kmer, KmerMode};
6981
6982    // Ensure we don't split too close to the ends
6983    // Need at least k+1 bytes on each side for valid segments
6984    if segment_len < 2 * (k + 1) {
6985        return None;
6986    }
6987
6988    // Scan segment to find where middle_kmer occurs
6989    // Use data() to get canonical k-mer (matching how segment boundaries are computed)
6990    let mut kmer = Kmer::new(k as u32, KmerMode::Canonical);
6991
6992    for (pos, &base) in segment_data.iter().enumerate() {
6993        kmer.insert(base as u64);
6994
6995        if kmer.is_full() {
6996            let current_kmer = kmer.data();
6997
6998            if current_kmer == middle_kmer {
6999                // Found the middle k-mer! Position is at the end of the k-mer
7000                let split_pos = pos + 1;
7001
7002                // Validate: ensure we have enough space on both sides
7003                let left_size = split_pos;
7004                let right_size = segment_len - split_pos + k;
7005
7006                if left_size >= k + 1 && right_size >= k + 1 {
7007                    return Some(split_pos);
7008                }
7009            }
7010        }
7011    }
7012
7013    // Not found - that's OK, the middle k-mer may not exist in this sample due to mutations
7014    None
7015}
7016
7017/// Phase 5: Split segment into two overlapping segments
7018/// Returns (left_segment, right_segment) with k-mer overlap
7019/// (matches C++ AGC lines 1461-1464)
7020fn split_segment_at_position(
7021    segment_data: &[u8],
7022    split_pos: usize,
7023    k: usize,
7024) -> (Vec<u8>, Vec<u8>) {
7025    // C++ AGC creates overlap of k bytes (not k/2!):
7026    //   seg2_start_pos = left_size - ceil(kmer_length / 2)
7027    //   segment2 starts at seg2_start_pos
7028    //   segment ends at seg2_start_pos + kmer_length
7029    // This creates k bytes of overlap: [split_pos - k/2 .. split_pos + k/2]
7030    let half_ceil = (k + 1) / 2;
7031    let seg2_start_pos = split_pos.saturating_sub(half_ceil);
7032
7033    // Right segment: [seg2_start_pos .. end]
7034    let right = segment_data[seg2_start_pos..].to_vec();
7035
7036    // Left segment: [0 .. seg2_start_pos + k]
7037    let left_end = seg2_start_pos + k;
7038    let left = segment_data[..left_end].to_vec();
7039
7040    (left, right)
7041}
7042
7043/// Split using seg2_start byte index (start of right segment) matching C++ layout
7044fn split_segment_from_start(
7045    segment_data: &[u8],
7046    seg2_start: usize,
7047    k: usize,
7048) -> (Vec<u8>, Vec<u8>) {
7049    let seg2_start_pos = seg2_start.min(segment_data.len());
7050    let right = segment_data[seg2_start_pos..].to_vec();
7051    let left_end = seg2_start_pos.saturating_add(k).min(segment_data.len());
7052    let left = segment_data[..left_end].to_vec();
7053    (left, right)
7054}
7055
7056/// Phase 6: Attempt to split using compression cost heuristic (EXACT C++ AGC algorithm)
7057/// Matches agc_compressor.cpp lines 1387-1503 and 1531-1663
7058/// Returns Some((left_data, right_data, middle_kmer)) if split is beneficial
7059/// Returns None if split would be degenerate (creates segments too small)
7060fn try_split_segment_with_cost(
7061    segment_data: &Contig,
7062    front_kmer: u64,
7063    back_kmer: u64,
7064    middle_kmer: u64,
7065    left_key: &SegmentGroupKey,
7066    right_key: &SegmentGroupKey,
7067    map_segments: &Arc<RwLock<BTreeMap<SegmentGroupKey, u32>>>,
7068    map_segments_terminators: &Arc<RwLock<BTreeMap<u64, Vec<u64>>>>,
7069    reference_segments: &Arc<RwLock<BTreeMap<u32, Vec<u8>>>>,
7070    config: &StreamingQueueConfig,
7071    should_reverse: bool,
7072    force_split_on_empty_refs: bool, // When true, split at middle k-mer position even if FFI says no
7073) -> Option<(Vec<u8>, Vec<u8>, u64)> {
7074    if config.verbosity > 1 {
7075        eprintln!(
7076            "SPLIT_ATTEMPT: front={} back={} middle={}",
7077            front_kmer, back_kmer, middle_kmer
7078        );
7079    }
7080
7081    // Debug: trace split attempt
7082    if crate::env_cache::debug_split() {
7083        eprintln!(
7084            "RAGC_SPLIT_TRY: front={} back={} middle={} left_key=({},{}) right_key=({},{})",
7085            front_kmer,
7086            back_kmer,
7087            middle_kmer,
7088            left_key.kmer_front,
7089            left_key.kmer_back,
7090            right_key.kmer_front,
7091            right_key.kmer_back
7092        );
7093    }
7094
7095    // Prepare LZDiff for both groups from persistent storage
7096    // C++ AGC uses global v_segments[segment_id] (agc_compressor.cpp:1535-1536)
7097    // RAGC uses reference_segments HashMap - ALWAYS prepare on-demand
7098    // Don't require groups to be in local buffer (other workers may have created them)
7099
7100    // Helper to prepare LZDiff from global reference_segments
7101    let prepare_on_demand = |key: &SegmentGroupKey, label: &str| -> Option<LZDiff> {
7102        let map_segments_locked = map_segments.read().unwrap();
7103        let ref_segments_locked = reference_segments.read().unwrap();
7104
7105        // C++ AGC uses map_segments[key] which returns 0 (default) if key doesn't exist.
7106        // v_segments[0] is a raw group initialized with empty_ctg = { 0x7f } (1 byte).
7107        // This gives maximum LZ cost (no compression) for non-existent groups.
7108        // To match C++ AGC behavior, use the actual reference if available,
7109        // otherwise use empty reference (gives max cost like C++ AGC's v_segments[0]).
7110        let segment_id = map_segments_locked.get(key).copied();
7111
7112        if let Some(ref_data) = segment_id.and_then(|id| ref_segments_locked.get(&id)) {
7113            // Reference exists! Prepare LZDiff on-demand
7114            if crate::env_cache::debug_split_ref() {
7115                eprintln!(
7116                    "RAGC_SPLIT_REF: {}_key=({},{}) segment_id={:?} ref_size={} (ACTUAL)",
7117                    label,
7118                    key.kmer_front,
7119                    key.kmer_back,
7120                    segment_id,
7121                    ref_data.len()
7122                );
7123            }
7124
7125            let mut lz = LZDiff::new(config.min_match_len as u32);
7126            lz.prepare(ref_data);
7127            return Some(lz);
7128        } else {
7129            // No reference data available for this group
7130            // C++ AGC uses v_segments[0] which is initialized with empty_ctg = { 0x7f } (1 byte)
7131            // This gives maximum LZ cost (no compression matches possible)
7132            // Return LZDiff prepared with empty reference to match C++ AGC behavior
7133            if crate::env_cache::debug_split_ref() {
7134                eprintln!(
7135                    "RAGC_SPLIT_REF: {}_key=({},{}) segment_id={:?} ref_size=1 (EMPTY FALLBACK)",
7136                    label, key.kmer_front, key.kmer_back, segment_id
7137                );
7138            }
7139
7140            // Use 1-byte dummy reference like C++ AGC's empty_ctg = { 0x7f }
7141            let empty_ref: Vec<u8> = vec![0x7f];
7142            let mut lz = LZDiff::new(config.min_match_len as u32);
7143            lz.prepare(&empty_ref);
7144            return Some(lz);
7145        }
7146    };
7147
7148    // Build segment in both orientations once
7149    let segment_dir = segment_data; // &Vec<u8>
7150                                    // Reverse-complement once
7151    let segment_rc_vec: Vec<u8> = reverse_complement_sequence(segment_data);
7152
7153    // Calculate compression costs and best split position using C++ FFI if enabled
7154    // Falls back to Rust implementation otherwise
7155    let maybe_best: Option<(usize, usize)> = None; // (best_pos, seg2_start)
7156    #[cfg(feature = "cpp_agc")]
7157    {
7158        // Inspect availability of left/right references and log keys
7159        let (left_seg_id_opt, right_seg_id_opt) = {
7160            let map_segments_locked = map_segments.read().unwrap();
7161            (
7162                map_segments_locked.get(left_key).copied(),
7163                map_segments_locked.get(right_key).copied(),
7164            )
7165        };
7166        let (left_have_ref, right_have_ref) = {
7167            let ref_segments_locked = reference_segments.read().unwrap();
7168            (
7169                left_seg_id_opt
7170                    .and_then(|id| ref_segments_locked.get(&id))
7171                    .is_some(),
7172                right_seg_id_opt
7173                    .and_then(|id| ref_segments_locked.get(&id))
7174                    .is_some(),
7175            )
7176        };
7177
7178        if config.verbosity > 1 {
7179            eprintln!(
7180                "SPLIT_KEYS: left=({:#x},{:#x}) right=({:#x},{:#x}) left_seg_id={:?} right_seg_id={:?} have_left_ref={} have_right_ref={}",
7181                left_key.kmer_front, middle_kmer, middle_kmer, right_key.kmer_back,
7182                left_seg_id_opt, right_seg_id_opt, left_have_ref, right_have_ref
7183            );
7184        }
7185
7186        // Prepare neighbor lists for FFI decision
7187        let (front_neighbors, back_neighbors) = {
7188            let term_map = map_segments_terminators.read().unwrap();
7189            (
7190                term_map.get(&front_kmer).cloned().unwrap_or_default(),
7191                term_map.get(&back_kmer).cloned().unwrap_or_default(),
7192            )
7193        };
7194
7195        // Always attempt FFI decision; if refs are missing, C++ will decide no-split
7196        let (ref_left_opt, ref_right_opt) = {
7197            let ref_segments_locked = reference_segments.read().unwrap();
7198            let l = left_seg_id_opt.and_then(|id| ref_segments_locked.get(&id).cloned());
7199            let r = right_seg_id_opt.and_then(|id| ref_segments_locked.get(&id).cloned());
7200            (l, r)
7201        };
7202        let empty: Vec<u8> = Vec::new();
7203        let ref_left = ref_left_opt.as_ref().unwrap_or(&empty);
7204        let ref_right = ref_right_opt.as_ref().unwrap_or(&empty);
7205
7206        if let Some((has_mid, mid, bp, s2, should)) = crate::ragc_ffi::decide_split(
7207            &front_neighbors,
7208            &back_neighbors,
7209            ref_left,
7210            ref_right,
7211            segment_dir,
7212            front_kmer,
7213            back_kmer,
7214            config.min_match_len as u32,
7215            config.k as u32,
7216            should_reverse,
7217        ) {
7218            if config.verbosity > 1 {
7219                eprintln!("FFI_DECIDE: has_middle={} middle={:#x} best_pos={} seg2_start={} should_split={} refs L={} R={}", has_mid, mid, bp, s2, should, ref_left.len(), ref_right.len());
7220            }
7221            if !has_mid {
7222                return None;
7223            }
7224
7225            // FFI found middle k-mer but may have said !should due to empty refs
7226            if should {
7227                maybe_best = Some((bp, s2));
7228            } else if force_split_on_empty_refs && ref_left.is_empty() && ref_right.is_empty() {
7229                // FALLBACK: FFI can't compute costs because refs are empty, but we want to
7230                // create new groups. Search for ANY terminator k-mer in the segment that can serve as a split point.
7231                // This handles the case where the exact middle_kmer from reference has a mutation in this sample.
7232                if config.verbosity > 1 {
7233                    eprintln!("SPLIT_FALLBACK: FFI said no but force_split_on_empty_refs=true, searching for any terminator k-mer in segment");
7234                }
7235
7236                // Build a set of potential middle k-mers from both neighbor lists
7237                let mut potential_middles: AHashSet<u64> = AHashSet::new();
7238                for &kmer in front_neighbors.iter() {
7239                    if kmer != MISSING_KMER && kmer != front_kmer && kmer != back_kmer {
7240                        potential_middles.insert(kmer);
7241                    }
7242                }
7243                for &kmer in back_neighbors.iter() {
7244                    if kmer != MISSING_KMER && kmer != front_kmer && kmer != back_kmer {
7245                        potential_middles.insert(kmer);
7246                    }
7247                }
7248
7249                if config.verbosity > 1 {
7250                    eprintln!(
7251                        "SPLIT_FALLBACK: {} potential middle k-mers from terminators",
7252                        potential_middles.len()
7253                    );
7254                    for &pm in potential_middles.iter().take(5) {
7255                        eprintln!("  potential_middle: {:#x}", pm);
7256                    }
7257                }
7258
7259                // Search for ANY terminator k-mer in the segment
7260                let k = config.k;
7261                if segment_dir.len() >= k && !potential_middles.is_empty() {
7262                    let mut found_pos: Option<(usize, u64)> = None; // (pos, kmer)
7263                    let mut kmer_obj =
7264                        crate::kmer::Kmer::new(k as u32, crate::kmer::KmerMode::Canonical);
7265                    for (i, &base) in segment_dir.iter().enumerate() {
7266                        if base > 3 {
7267                            kmer_obj.reset();
7268                        } else {
7269                            kmer_obj.insert(base as u64);
7270                            if kmer_obj.is_full() {
7271                                let kmer_at_pos = kmer_obj.data();
7272                                let pos = i + 1 - k; // Position of k-mer start
7273                                                     // Check if this k-mer is in our set of potential middles
7274                                if potential_middles.contains(&kmer_at_pos) {
7275                                    // Ensure we're not at the very beginning or end
7276                                    if pos > k && pos + k + k < segment_dir.len() {
7277                                        found_pos = Some((pos, kmer_at_pos));
7278                                        break;
7279                                    }
7280                                }
7281                            }
7282                        }
7283                    }
7284
7285                    if let Some((pos, found_kmer)) = found_pos {
7286                        // Split at position just after the found k-mer
7287                        let split_pos = pos + k;
7288                        if split_pos > k + 1 && split_pos + k + 1 < segment_dir.len() {
7289                            if config.verbosity > 1 {
7290                                eprintln!("SPLIT_FALLBACK_FOUND: terminator kmer={:#x} found at pos={}, splitting at {}", found_kmer, pos, split_pos);
7291                            }
7292                            maybe_best = Some((split_pos, split_pos));
7293                        } else if config.verbosity > 1 {
7294                            eprintln!(
7295                                "SPLIT_FALLBACK_DEGENERATE: pos={} split_pos={} segment_len={}",
7296                                pos,
7297                                split_pos,
7298                                segment_dir.len()
7299                            );
7300                        }
7301                    } else {
7302                        // FALLBACK 2: Terminators not found - discover a NEW singleton k-mer in the segment
7303                        // Similar to C++ AGC's find_new_splitters() but simpler: just find any singleton
7304                        if config.verbosity > 1 {
7305                            eprintln!("SPLIT_FALLBACK_DISCOVER: trying to find singleton k-mer in segment (len={})", segment_dir.len());
7306                        }
7307
7308                        // Collect all k-mers in the middle region of the segment
7309                        let min_margin = k * 2; // Don't split too close to edges
7310                        let search_start = min_margin;
7311                        let search_end = segment_dir.len().saturating_sub(min_margin);
7312
7313                        if search_end > search_start + k {
7314                            // Enumerate k-mers and find singletons
7315                            let mut kmer_positions: Vec<(u64, usize)> = Vec::new();
7316                            let mut kmer_obj2 =
7317                                crate::kmer::Kmer::new(k as u32, crate::kmer::KmerMode::Canonical);
7318
7319                            for (i, &base) in
7320                                segment_dir[search_start..search_end].iter().enumerate()
7321                            {
7322                                if base > 3 {
7323                                    kmer_obj2.reset();
7324                                } else {
7325                                    kmer_obj2.insert(base as u64);
7326                                    if kmer_obj2.is_full() {
7327                                        let kmer_val = kmer_obj2.data();
7328                                        let pos = search_start + i + 1 - k;
7329                                        kmer_positions.push((kmer_val, pos));
7330                                    }
7331                                }
7332                            }
7333
7334                            // Sort by k-mer value to find duplicates
7335                            kmer_positions.sort_by_key(|&(kmer, _)| kmer);
7336
7337                            // Find first singleton (k-mer that appears exactly once)
7338                            let mut singleton_pos: Option<usize> = None;
7339                            let mut i = 0;
7340                            while i < kmer_positions.len() {
7341                                let (kmer, pos) = kmer_positions[i];
7342                                let mut j = i + 1;
7343                                while j < kmer_positions.len() && kmer_positions[j].0 == kmer {
7344                                    j += 1;
7345                                }
7346                                // If exactly one occurrence, it's a singleton
7347                                if j == i + 1 {
7348                                    singleton_pos = Some(pos);
7349                                    if config.verbosity > 1 {
7350                                        eprintln!("SPLIT_FALLBACK_SINGLETON: found singleton kmer={:#x} at pos={}", kmer, pos);
7351                                    }
7352                                    break;
7353                                }
7354                                i = j;
7355                            }
7356
7357                            if let Some(pos) = singleton_pos {
7358                                let split_pos = pos + k;
7359                                if config.verbosity > 1 {
7360                                    eprintln!(
7361                                        "SPLIT_FALLBACK_SINGLETON_SPLIT: splitting at {}",
7362                                        split_pos
7363                                    );
7364                                }
7365                                maybe_best = Some((split_pos, split_pos));
7366                            } else if config.verbosity > 1 {
7367                                eprintln!("SPLIT_FALLBACK_NO_SINGLETON: no singleton k-mers found in middle region");
7368                            }
7369                        } else if config.verbosity > 1 {
7370                            eprintln!(
7371                                "SPLIT_FALLBACK_TOO_SHORT: segment too short for singleton search"
7372                            );
7373                        }
7374                    }
7375                }
7376            } else {
7377                return None;
7378            }
7379        } else if config.verbosity > 1 {
7380            eprintln!("FFI_DECIDE: unavailable (decide_split returned None)");
7381        }
7382    }
7383
7384    // If FFI provided best position, use it; otherwise compute costs in Rust
7385    let mut v_costs1 = if maybe_best.is_none() {
7386        if let Some(lz_left) = prepare_on_demand(left_key, "left") {
7387            #[cfg(feature = "cpp_agc")]
7388            {
7389                // Unused path when FFI returns best split; kept for completeness
7390                let ref_left = {
7391                    let map_segments_locked = map_segments.read().unwrap();
7392                    let ref_segments_locked = reference_segments.read().unwrap();
7393                    let seg_id = map_segments_locked.get(left_key).copied().unwrap_or(0);
7394                    ref_segments_locked.get(&seg_id).cloned()
7395                };
7396                if let Some(ref_data) = ref_left {
7397                    if front_kmer < middle_kmer {
7398                        crate::ragc_ffi::cost_vector(
7399                            true,
7400                            &ref_data,
7401                            segment_dir,
7402                            config.min_match_len as u32,
7403                        )
7404                    } else {
7405                        let mut v = crate::ragc_ffi::cost_vector(
7406                            false,
7407                            &ref_data,
7408                            &segment_rc_vec,
7409                            config.min_match_len as u32,
7410                        );
7411                        v.reverse();
7412                        v
7413                    }
7414                } else {
7415                    if config.verbosity > 1 {
7416                        eprintln!("SPLIT_SKIP: left group has no reference yet");
7417                    }
7418                    return None;
7419                }
7420            }
7421            #[cfg(not(feature = "cpp_agc"))]
7422            {
7423                if front_kmer < middle_kmer {
7424                    lz_left.get_coding_cost_vector(segment_dir, true)
7425                } else {
7426                    let mut v = lz_left.get_coding_cost_vector(&segment_rc_vec, false);
7427                    v.reverse();
7428                    v
7429                }
7430            }
7431        } else {
7432            if config.verbosity > 1 {
7433                eprintln!("SPLIT_SKIP: left group has no reference yet");
7434            }
7435            if crate::env_cache::debug_split() {
7436                eprintln!(
7437                    "RAGC_SPLIT_SKIP_LEFT: left_key=({},{}) has no reference",
7438                    left_key.kmer_front, left_key.kmer_back
7439                );
7440            }
7441            return None;
7442        }
7443    } else {
7444        Vec::new()
7445    };
7446
7447    // Cumulative sum forward for v_costs1
7448    let mut sum = 0u32;
7449    for cost in v_costs1.iter_mut() {
7450        sum = sum.saturating_add(*cost);
7451        *cost = sum;
7452    }
7453
7454    let v_costs2 = if maybe_best.is_none() {
7455        if let Some(lz_right) = prepare_on_demand(right_key, "right") {
7456            #[cfg(feature = "cpp_agc")]
7457            {
7458                let ref_right = {
7459                    let map_segments_locked = map_segments.read().unwrap();
7460                    let ref_segments_locked = reference_segments.read().unwrap();
7461                    let seg_id = map_segments_locked.get(right_key).copied().unwrap_or(0);
7462                    ref_segments_locked.get(&seg_id).cloned()
7463                };
7464                if let Some(ref_data) = ref_right {
7465                    let mut v = if middle_kmer < back_kmer {
7466                        // Suffix placement, cumulative sum right-to-left
7467                        crate::ragc_ffi::cost_vector(
7468                            false,
7469                            &ref_data,
7470                            segment_dir,
7471                            config.min_match_len as u32,
7472                        )
7473                    } else {
7474                        // RC + prefix placement; cumulative sum left-to-right then reverse
7475                        crate::ragc_ffi::cost_vector(
7476                            true,
7477                            &ref_data,
7478                            &segment_rc_vec,
7479                            config.min_match_len as u32,
7480                        )
7481                    };
7482                    if middle_kmer < back_kmer {
7483                        // Reverse cumulative sum
7484                        let mut acc = 0u32;
7485                        for cost in v.iter_mut().rev() {
7486                            acc = acc.saturating_add(*cost);
7487                            *cost = acc;
7488                        }
7489                        v
7490                    } else {
7491                        // Forward cumulative then reverse
7492                        let mut acc = 0u32;
7493                        for cost in v.iter_mut() {
7494                            acc = acc.saturating_add(*cost);
7495                            *cost = acc;
7496                        }
7497                        v.reverse();
7498                        v
7499                    }
7500                } else {
7501                    if config.verbosity > 1 {
7502                        eprintln!("SPLIT_SKIP: right group has no reference yet");
7503                    }
7504                    return None;
7505                }
7506            }
7507            #[cfg(not(feature = "cpp_agc"))]
7508            {
7509                if middle_kmer < back_kmer {
7510                    let mut v = lz_right.get_coding_cost_vector(segment_dir, false);
7511                    let mut acc = 0u32;
7512                    for cost in v.iter_mut().rev() {
7513                        acc = acc.saturating_add(*cost);
7514                        *cost = acc;
7515                    }
7516                    v
7517                } else {
7518                    let mut v = lz_right.get_coding_cost_vector(&segment_rc_vec, true);
7519                    let mut acc = 0u32;
7520                    for cost in v.iter_mut() {
7521                        acc = acc.saturating_add(*cost);
7522                        *cost = acc;
7523                    }
7524                    v.reverse();
7525                    v
7526                }
7527            }
7528        } else {
7529            if config.verbosity > 1 {
7530                eprintln!("SPLIT_SKIP: right group has no reference yet");
7531            }
7532            return None;
7533        }
7534    } else {
7535        Vec::new()
7536    };
7537
7538    if maybe_best.is_none() && (v_costs1.is_empty() || v_costs2.is_empty()) {
7539        if config.verbosity > 1 {
7540            eprintln!("SPLIT_SKIP: cost vectors empty");
7541        }
7542        return None;
7543    }
7544
7545    if maybe_best.is_none() && v_costs1.len() != v_costs2.len() {
7546        if config.verbosity > 1 {
7547            eprintln!("SPLIT_SKIP: cost vector length mismatch");
7548        }
7549        return None;
7550    }
7551
7552    // Find position with minimum combined cost
7553    // Matches C++ AGC agc_compressor.cpp:1663-1674
7554    let mut best_pos = if let Some((p, _)) = maybe_best {
7555        p
7556    } else {
7557        let mut best_sum = u32::MAX;
7558        let mut pos = 0usize;
7559        for i in 0..v_costs1.len() {
7560            let cs = v_costs1[i].saturating_add(v_costs2[i]);
7561            if cs < best_sum {
7562                best_sum = cs;
7563                pos = i;
7564            }
7565        }
7566        pos
7567    };
7568
7569    #[cfg(feature = "verbose_debug")]
7570    if crate::env_cache::debug_split_map() && maybe_best.is_none() {
7571        let start = best_pos.saturating_sub(3);
7572        let end = (best_pos + 4).min(v_costs1.len());
7573        eprintln!(
7574            "RAGC_COST_WINDOW: len={} best_pos={}",
7575            v_costs1.len(),
7576            best_pos
7577        );
7578        for i in start..end {
7579            eprintln!(
7580                "  i={} Lcum={} Rcum={} Sum={}{}",
7581                i,
7582                v_costs1[i],
7583                v_costs2[i],
7584                v_costs1[i].saturating_add(v_costs2[i]),
7585                if i == best_pos { "  <--" } else { "" }
7586            );
7587        }
7588    }
7589
7590    // Apply degenerate position rules ALWAYS to prevent tiny segments at boundaries.
7591    // Even when FFI or fallback paths provide best_pos, we must enforce this constraint
7592    // to match C++ AGC behavior (agc_compressor.cpp:1685-1688).
7593    let k = config.k;
7594    let original_best_pos = best_pos; // Save for logging
7595    if best_pos < k + 1 {
7596        best_pos = 0; // Too close to start
7597    }
7598    if best_pos + k + 1 > v_costs1.len() {
7599        best_pos = v_costs1.len(); // Too close to end
7600    }
7601
7602    if config.verbosity > 1 && original_best_pos != best_pos {
7603        eprintln!(
7604            "BOUNDARY_CLAMP: original_best_pos={} clamped_to={} (len={}, k+1={}) source={}",
7605            original_best_pos,
7606            best_pos,
7607            v_costs1.len(),
7608            k + 1,
7609            if maybe_best.is_some() {
7610                "FFI/fallback"
7611            } else {
7612                "cost_calc"
7613            }
7614        );
7615    }
7616
7617    // Check if split is degenerate (C++ AGC agc_compressor.cpp:1400-1415)
7618    // C++ AGC ACCEPTS degenerate splits and assigns whole segment to one group
7619    // First compute sizes with exact best_pos; map to bytes afterward.
7620    let left_size_pre = best_pos;
7621    let right_size_pre = segment_data.len().saturating_sub(best_pos);
7622
7623    if left_size_pre == 0 {
7624        // Degenerate: whole segment matches RIGHT group
7625        // Return empty left, full segment as right (C++ AGC line 1400-1407)
7626        if config.verbosity > 1 {
7627            eprintln!("SPLIT_DEGENERATE_RIGHT: best_pos=0, assigning whole segment to RIGHT group");
7628        }
7629        return Some((Vec::new(), segment_data.to_vec(), middle_kmer));
7630    }
7631
7632    if right_size_pre == 0 {
7633        // Degenerate: whole segment matches LEFT group
7634        // Return full segment as left, empty right (C++ AGC line 1408-1415)
7635        if config.verbosity > 1 {
7636            eprintln!("SPLIT_DEGENERATE_LEFT: best_pos=len, assigning whole segment to LEFT group");
7637        }
7638        return Some((segment_data.to_vec(), Vec::new(), middle_kmer));
7639    }
7640
7641    // Non-degenerate split: use FFI seg2_start directly (it already accounts for orientation)
7642    let (left_data, right_data) = if let Some((bp, s2)) = maybe_best {
7643        if config.verbosity > 1 {
7644            eprintln!(
7645                "SPLIT_GEOM_SELECT(FFI): best_pos={} seg2_start={} should_reverse={}",
7646                bp, s2, should_reverse
7647            );
7648        }
7649        split_segment_from_start(segment_data.as_slice(), s2, config.k)
7650    } else {
7651        let half = if should_reverse {
7652            (config.k + 1) / 2
7653        } else {
7654            config.k / 2
7655        };
7656        let seg2_start = best_pos.saturating_sub(half);
7657        if config.verbosity > 1 {
7658            eprintln!(
7659                "SPLIT_GEOM_SELECT(local): best_pos={} k={} half={} seg2_start={} should_reverse={}",
7660                best_pos, config.k, half, seg2_start, should_reverse
7661            );
7662        }
7663        split_segment_from_start(segment_data.as_slice(), seg2_start, config.k)
7664    };
7665
7666    if config.verbosity > 1 {
7667        eprintln!(
7668            "SPLIT_SUCCESS: best_pos={} cost={} left_len={} right_len={}",
7669            best_pos,
7670            0u32, // best_sum not available under FFI path; placeholder
7671            left_data.len(),
7672            right_data.len()
7673        );
7674    }
7675
7676    Some((left_data, right_data, middle_kmer))
7677}
7678
7679#[cfg(test)]
7680mod tests {
7681    use super::*;
7682
7683    #[test]
7684    fn test_create_compressor() {
7685        let config = StreamingQueueConfig::default();
7686        let splitters = AHashSet::new();
7687        let compressor =
7688            StreamingQueueCompressor::with_splitters("/tmp/test_stream.agc", config, splitters);
7689        assert!(compressor.is_ok());
7690    }
7691
7692    #[test]
7693    fn test_queue_stats() {
7694        let config = StreamingQueueConfig::default();
7695        let splitters = AHashSet::new();
7696        let compressor =
7697            StreamingQueueCompressor::with_splitters("/tmp/test_stats.agc", config, splitters)
7698                .unwrap();
7699
7700        let stats = compressor.queue_stats();
7701        assert_eq!(stats.current_size_bytes, 0);
7702        assert_eq!(stats.current_items, 0);
7703        assert_eq!(stats.capacity_bytes, 2 * 1024 * 1024 * 1024);
7704        assert!(!stats.is_closed);
7705    }
7706
7707    #[test]
7708    fn test_push_and_finalize() {
7709        let config = StreamingQueueConfig {
7710            verbosity: 0, // Quiet for tests
7711            ..Default::default()
7712        };
7713        let splitters = AHashSet::new();
7714        let mut compressor =
7715            StreamingQueueCompressor::with_splitters("/tmp/test_push.agc", config, splitters)
7716                .unwrap();
7717
7718        // Push a small contig
7719        let data = vec![b'A'; 1000];
7720        compressor
7721            .push("sample1".to_string(), "chr1".to_string(), data)
7722            .unwrap();
7723
7724        // Finalize
7725        compressor.finalize().unwrap();
7726    }
7727}