1#![allow(clippy::all)]
2#![allow(dead_code)]
3#![allow(unused_variables)]
4#![allow(unused_imports)]
5#![allow(private_interfaces)]
6#![allow(unexpected_cfgs)]
7pub mod agc_compressor;
121pub mod bloom_filter;
122pub mod contig_compression;
123pub mod contig_iterator;
124pub mod decompressor;
125pub mod env_cache;
126pub mod genome_io;
127pub mod kmer;
128pub mod kmer_extract;
129pub mod lz_diff;
130pub mod lz_matcher;
131pub mod memory_bounded_queue;
132pub mod preprocessing;
133pub mod priority_queue;
134pub mod segment;
135pub mod segment_buffer;
136pub mod segment_compression;
137pub mod splitters;
138pub mod task;
139pub mod tuple_packing;
140pub mod worker;
141pub mod zstd_pool;
142
143#[path = "ffi/agc_index.rs"]
145pub mod agc_index_ffi;
146
147#[path = "ffi/agc_compress.rs"]
148pub mod agc_compress_ffi;
149
150#[path = "ffi/splitters.rs"]
151pub mod splitters_ffi;
152
153#[path = "ffi/segment_helpers.rs"]
154pub mod segment_helpers_ffi;
155
156#[path = "ffi/kmer_helpers.rs"]
157pub mod kmer_helpers_ffi;
158
159#[path = "ffi/splitter_check.rs"]
160pub mod splitter_check_ffi;
161
162#[path = "ffi/segment_boundary.rs"]
163pub mod segment_boundary_ffi;
164
165#[path = "ffi/base_validation.rs"]
166pub mod base_validation_ffi;
167
168#[path = "ffi/reverse_complement.rs"]
169pub mod reverse_complement_ffi;
170
171#[path = "ffi/segment_split.rs"]
172pub mod segment_split_ffi;
173
174#[path = "ffi/kmer_pair.rs"]
175pub mod kmer_pair_ffi;
176
177#[path = "ffi/preprocessing.rs"]
178pub mod preprocessing_ffi;
179
180#[path = "ffi/find_splitters_in_contig.rs"]
181pub mod find_splitters_in_contig_ffi;
182
183#[cfg(feature = "cpp_agc")]
184pub mod ragc_ffi {
185 extern "C" {
186 pub fn agc_cost_vector(
187 prefix: i32,
188 ref_ptr: *const u8,
189 ref_len: usize,
190 text_ptr: *const u8,
191 text_len: usize,
192 min_match_len: u32,
193 out_costs: *mut u32,
194 ) -> usize;
195
196 pub fn agc_best_split(
197 left_ref: *const u8,
198 left_len: usize,
199 right_ref: *const u8,
200 right_len: usize,
201 text_ptr: *const u8,
202 text_len: usize,
203 min_match_len: u32,
204 k: u32,
205 front_lt_mid: i32,
206 mid_lt_back: i32,
207 should_reverse: i32,
208 out_best_pos: *mut u32,
209 out_seg2_start: *mut u32,
210 out_should_split: *mut i32,
211 ) -> i32;
212
213 pub fn agc_find_middle(
214 front_list: *const u64,
215 n_front: usize,
216 back_list: *const u64,
217 n_back: usize,
218 out_middle: *mut u64,
219 ) -> i32;
220
221 pub fn agc_decide_split(
222 front_list: *const u64,
223 n_front: usize,
224 back_list: *const u64,
225 n_back: usize,
226 left_ref: *const u8,
227 left_len: usize,
228 right_ref: *const u8,
229 right_len: usize,
230 text_ptr: *const u8,
231 text_len: usize,
232 front_kmer: u64,
233 back_kmer: u64,
234 min_match_len: u32,
235 k: u32,
236 should_reverse: i32,
237 out_has_middle: *mut i32,
238 out_middle: *mut u64,
239 out_best_pos: *mut u32,
240 out_seg2_start: *mut u32,
241 out_should_split: *mut i32,
242 ) -> i32;
243
244 pub fn agc_grouping_engine_create(k: u32, start_group_id: u32) -> *mut std::ffi::c_void;
246 pub fn agc_grouping_engine_destroy(engine: *mut std::ffi::c_void);
247 pub fn agc_grouping_engine_register(
248 engine: *mut std::ffi::c_void,
249 kmer_front: u64,
250 kmer_back: u64,
251 group_id: u32,
252 );
253 pub fn agc_grouping_engine_find_middle(
254 engine: *mut std::ffi::c_void,
255 front: u64,
256 back: u64,
257 out_middle: *mut u64,
258 ) -> i32;
259 pub fn agc_grouping_engine_group_exists(
260 engine: *mut std::ffi::c_void,
261 kmer_front: u64,
262 kmer_back: u64,
263 ) -> i32;
264 pub fn agc_grouping_engine_get_group_id(
265 engine: *mut std::ffi::c_void,
266 kmer_front: u64,
267 kmer_back: u64,
268 ) -> u32;
269 pub fn agc_grouping_engine_alloc_id(engine: *mut std::ffi::c_void) -> u32;
270
271 pub fn agc_estimate(
273 ref_ptr: *const u8,
274 ref_len: usize,
275 text_ptr: *const u8,
276 text_len: usize,
277 min_match_len: u32,
278 bound: u32,
279 ) -> u32;
280
281 pub fn agc_lzdiff_v2_estimate(
284 ref_ptr: *const u8,
285 ref_len: usize,
286 text_ptr: *const u8,
287 text_len: usize,
288 min_match_len: u32,
289 bound: u32,
290 ) -> u32;
291
292 pub fn agc_lzdiff_v2_encode(
294 ref_ptr: *const u8,
295 ref_len: usize,
296 text_ptr: *const u8,
297 text_len: usize,
298 min_match_len: u32,
299 out_buf: *mut u8,
300 out_buf_len: usize,
301 ) -> u32;
302 }
303
304 pub fn estimate(reference: &[u8], text: &[u8], min_match_len: u32, bound: u32) -> u32 {
306 unsafe {
307 agc_estimate(
308 reference.as_ptr(),
309 reference.len(),
310 text.as_ptr(),
311 text.len(),
312 min_match_len,
313 bound,
314 )
315 }
316 }
317
318 pub fn lzdiff_v2_estimate(
321 reference: &[u8],
322 text: &[u8],
323 min_match_len: u32,
324 bound: u32,
325 ) -> u32 {
326 unsafe {
327 agc_lzdiff_v2_estimate(
328 reference.as_ptr(),
329 reference.len(),
330 text.as_ptr(),
331 text.len(),
332 min_match_len,
333 bound,
334 )
335 }
336 }
337
338 pub fn lzdiff_v2_encode(reference: &[u8], text: &[u8], min_match_len: u32) -> Option<Vec<u8>> {
341 let mut out_buf = vec![0u8; text.len() * 2 + 1024];
343 unsafe {
344 let result = agc_lzdiff_v2_encode(
345 reference.as_ptr(),
346 reference.len(),
347 text.as_ptr(),
348 text.len(),
349 min_match_len,
350 out_buf.as_mut_ptr(),
351 out_buf.len(),
352 );
353 if result == u32::MAX {
354 None
355 } else {
356 out_buf.truncate(result as usize);
357 Some(out_buf)
358 }
359 }
360 }
361
362 pub fn cost_vector(
363 prefix: bool,
364 reference: &[u8],
365 text: &[u8],
366 min_match_len: u32,
367 ) -> Vec<u32> {
368 unsafe {
369 let mut out = vec![0u32; text.len()];
370 let _ = agc_cost_vector(
371 if prefix { 1 } else { 0 },
372 reference.as_ptr(),
373 reference.len(),
374 text.as_ptr(),
375 text.len(),
376 min_match_len,
377 out.as_mut_ptr(),
378 );
379 out
380 }
381 }
382
383 pub fn best_split(
384 left_ref: &[u8],
385 right_ref: &[u8],
386 text: &[u8],
387 min_match_len: u32,
388 k: u32,
389 front_lt_mid: bool,
390 mid_lt_back: bool,
391 should_reverse: bool,
392 ) -> Option<(usize, usize, bool)> {
393 unsafe {
394 let mut best: u32 = 0;
395 let mut seg2: u32 = 0;
396 let mut should: i32 = 0;
397 let ok = agc_best_split(
398 left_ref.as_ptr(),
399 left_ref.len(),
400 right_ref.as_ptr(),
401 right_ref.len(),
402 text.as_ptr(),
403 text.len(),
404 min_match_len,
405 k,
406 if front_lt_mid { 1 } else { 0 },
407 if mid_lt_back { 1 } else { 0 },
408 if should_reverse { 1 } else { 0 },
409 &mut best as *mut u32,
410 &mut seg2 as *mut u32,
411 &mut should as *mut i32,
412 );
413 if ok != 0 {
414 Some((best as usize, seg2 as usize, should != 0))
415 } else {
416 None
417 }
418 }
419 }
420
421 pub fn find_middle(front_neighbors: &[u64], back_neighbors: &[u64]) -> Option<u64> {
422 unsafe {
423 let mut out: u64 = 0;
424 let ok = agc_find_middle(
425 front_neighbors.as_ptr(),
426 front_neighbors.len(),
427 back_neighbors.as_ptr(),
428 back_neighbors.len(),
429 &mut out as *mut u64,
430 );
431 if ok != 0 {
432 Some(out)
433 } else {
434 None
435 }
436 }
437 }
438
439 pub fn decide_split(
440 front_neighbors: &[u64],
441 back_neighbors: &[u64],
442 left_ref: &[u8],
443 right_ref: &[u8],
444 text: &[u8],
445 front_kmer: u64,
446 back_kmer: u64,
447 min_match_len: u32,
448 k: u32,
449 should_reverse: bool,
450 ) -> Option<(bool, u64, usize, usize, bool)> {
451 unsafe {
452 let mut has_mid: i32 = 0;
453 let mut middle: u64 = 0;
454 let mut best: u32 = 0;
455 let mut seg2: u32 = 0;
456 let mut should: i32 = 0;
457 let ok = agc_decide_split(
458 front_neighbors.as_ptr(),
459 front_neighbors.len(),
460 back_neighbors.as_ptr(),
461 back_neighbors.len(),
462 left_ref.as_ptr(),
463 left_ref.len(),
464 right_ref.as_ptr(),
465 right_ref.len(),
466 text.as_ptr(),
467 text.len(),
468 front_kmer,
469 back_kmer,
470 min_match_len,
471 k,
472 if should_reverse { 1 } else { 0 },
473 &mut has_mid as *mut i32,
474 &mut middle as *mut u64,
475 &mut best as *mut u32,
476 &mut seg2 as *mut u32,
477 &mut should as *mut i32,
478 );
479 if ok != 0 {
480 Some((
481 has_mid != 0,
482 middle,
483 best as usize,
484 seg2 as usize,
485 should != 0,
486 ))
487 } else {
488 None
489 }
490 }
491 }
492
493 pub struct GroupingEngine {
495 ptr: *mut std::ffi::c_void,
496 }
497
498 impl GroupingEngine {
499 pub fn new(k: u32, start_group_id: u32) -> Self {
500 unsafe {
501 Self {
502 ptr: agc_grouping_engine_create(k, start_group_id),
503 }
504 }
505 }
506
507 pub fn register_group(&mut self, kmer_front: u64, kmer_back: u64, group_id: u32) {
508 unsafe {
509 agc_grouping_engine_register(self.ptr, kmer_front, kmer_back, group_id);
510 }
511 }
512
513 pub fn find_middle(&self, front: u64, back: u64) -> Option<u64> {
514 unsafe {
515 let mut out: u64 = 0;
516 let ok =
517 agc_grouping_engine_find_middle(self.ptr, front, back, &mut out as *mut u64);
518 if ok != 0 {
519 Some(out)
520 } else {
521 None
522 }
523 }
524 }
525
526 pub fn group_exists(&self, kmer_front: u64, kmer_back: u64) -> bool {
527 unsafe { agc_grouping_engine_group_exists(self.ptr, kmer_front, kmer_back) != 0 }
528 }
529
530 pub fn get_group_id(&self, kmer_front: u64, kmer_back: u64) -> Option<u32> {
531 unsafe {
532 let gid = agc_grouping_engine_get_group_id(self.ptr, kmer_front, kmer_back);
533 if gid == u32::MAX {
534 None
535 } else {
536 Some(gid)
537 }
538 }
539 }
540
541 pub fn alloc_group_id(&mut self) -> u32 {
542 unsafe { agc_grouping_engine_alloc_id(self.ptr) }
543 }
544 }
545
546 impl Drop for GroupingEngine {
547 fn drop(&mut self) {
548 unsafe {
549 agc_grouping_engine_destroy(self.ptr);
550 }
551 }
552 }
553
554 unsafe impl Send for GroupingEngine {}
556 unsafe impl Sync for GroupingEngine {}
557}
558
559pub use agc_compressor::{QueueStats, StreamingQueueCompressor, StreamingQueueConfig};
561pub use contig_iterator::{MultiFileIterator, PansnFileIterator};
562pub use decompressor::{Decompressor, DecompressorConfig};
563pub use genome_io::{GenomeIO, GenomeWriter};
564pub use kmer::{
565 canonical_kmer, decode_base, encode_base, reverse_complement, reverse_complement_kmer,
566};
567pub use kmer::{Kmer, KmerMode};
568pub use kmer_extract::{enumerate_kmers, find_candidate_kmers, remove_non_singletons};
569pub use lz_diff::LZDiff;
570pub use memory_bounded_queue::MemoryBoundedQueue;
571pub use segment::{split_at_splitters, split_at_splitters_with_size, Segment};
572pub use segment_compression::{
573 compress_reference_segment, compress_segment, compress_segment_configured, decompress_segment,
574 decompress_segment_with_marker,
575};
576pub use splitters::{
577 determine_splitters, determine_splitters_streaming, determine_splitters_streaming_first_sample,
578 find_candidate_kmers_multi, is_hard_contig, is_splitter, two_pass_splitter_discovery,
579};
580pub use worker::create_agc_archive;