rype 1.0.0-rc.1

High-performance genomic sequence classification using minimizer-based k-mer sketching in RY space
Documentation
//! Rype: High-performance genomic sequence classification using minimizer-based k-mer sketching.
//!
//! This library provides efficient classification of DNA sequences against reference indices
//! using RY-space (purine/pyrimidine) encoding and minimizer sketching.
//!
//! # Core Concepts
//!
//! - **RY Encoding**: Reduces the 4-base DNA alphabet to 2 bits (purines → 1, pyrimidines → 0)
//! - **Minimizers**: Representative k-mers selected from sliding windows for efficient sketching
//! - **Inverted Index**: Enables O(Q log U) lookups instead of O(B × Q × log M)
//!
//! # Main Types
//!
//! - [`InvertedIndex`]: Minimizer → bucket mappings for fast classification
//! - [`ShardedInvertedIndex`]: Memory-efficient sharded inverted index
//! - [`MinimizerWorkspace`]: Reusable workspace for minimizer extraction
//!
//! # Classification Functions
//!
//! - [`classify_batch_sharded_merge_join`]: Classify with sharded inverted index using merge-join (default)
//! - [`classify_batch_sharded_parallel_rg`]: Classify with parallel row group processing

// Internal modules
mod classify;
mod constants;
mod core;
mod error;
mod indices;
mod types;

// Public modules
pub mod c_api;
pub mod config;
pub mod memory;

// ============================================================================
// Timing utilities (cross-cutting concern)
// ============================================================================

/// Controls whether timing diagnostics are printed to stderr.
///
/// Set to `true` to enable timing output for classification operations.
/// This is useful for debugging and performance analysis.
pub static ENABLE_TIMING: std::sync::atomic::AtomicBool = std::sync::atomic::AtomicBool::new(false);

/// Print timing info to stderr if [`ENABLE_TIMING`] is enabled.
#[inline]
pub fn log_timing(phase: &str, elapsed_ms: u128) {
    if ENABLE_TIMING.load(std::sync::atomic::Ordering::Relaxed) {
        eprintln!("[TIMING] {}: {}ms", phase, elapsed_ms);
    }
}

// Arrow FFI integration (optional feature for exporting RecordBatches)
#[cfg(feature = "arrow-ffi")]
pub mod arrow;

// ============================================================================
// Essential types (commonly used in most workflows)
// ============================================================================

// Core types
pub use error::{FirstErrorCapture, Result as RypeResult, RypeError};
pub use types::{BucketFileStats, HitResult, IndexMetadata, QueryRecord};

// Primary index types
pub use indices::{InvertedIndex, ShardedInvertedIndex};

// Minimizer extraction
pub use core::{extract_into, get_paired_minimizers_into, MinimizerWorkspace, Strand};

// Classification functions
pub use classify::{
    classify_batch_sharded_merge_join, classify_batch_sharded_parallel_rg,
    classify_from_extracted_minimizers, classify_from_extracted_minimizers_parallel_rg,
    classify_from_query_index, classify_from_query_index_parallel_rg,
    classify_with_sharded_negative, extract_batch_minimizers, filter_best_hits,
};

// Log-ratio types and functions
pub use classify::log_ratio::{
    classify_log_ratio_batch, compute_log_ratio, partition_by_numerator_score,
    validate_compatible_indices, validate_log_ratio_indices, validate_single_bucket_index,
    FastPath, LogRatioResult, PartitionResult,
};

// ============================================================================
// Specialized types (for advanced use cases - consider using qualified paths)
// e.g., `rype::indices::sharded::ShardManifest` or `rype::parquet_index::ParquetWriteOptions`
// ============================================================================

// Core utilities (low-level extraction)
pub use core::{
    base_to_bit, count_hits, extract_dual_strand_into, extract_minimizer_set,
    extract_strand_minimizers, StrandMinimizers,
};

// Orientation utilities (for bucket building)
pub use core::{choose_orientation, choose_orientation_sampled, Orientation, ORIENTATION_FIRST_N};

// Merge utilities
pub use core::{kway_merge_dedup, merge_sorted_into};

// Constants
pub use constants::BUCKET_SOURCE_DELIM;

// Sharded index internals
pub use indices::{
    // Inverted index internals
    QueryInvertedIndex,
    ShardInfo,
    ShardManifest,
};

// Parquet index types (also available via `rype::parquet_index::*`)
pub use indices::{
    compute_source_hash, create_parquet_inverted_index, is_parquet_index, BucketData,
    BucketMetadata, InvertedManifest, InvertedShardInfo, ParquetCompression, ParquetManifest,
    ParquetReadOptions, ParquetWriteOptions,
};

// Index merge types (also available via `rype::parquet_index::merge::*`)
pub use indices::parquet::merge::{
    load_all_minimizers, merge_indices_streaming, MergeOptions, MergeStats,
};

// Re-export parquet module for qualified access
pub use indices::parquet as parquet_index;