chromsize/
lib.rs

1//! chromsize: fast chromosome size extraction from FASTA and 2bit.
2//!
3//! chromsize reads sequence data (FASTA or 2bit) from a file or stdin, detects
4//! the input format by content, and returns chromosome sizes as `(name, size)`
5//! pairs. Gzip-compressed input is auto-detected and decompressed for both
6//! files and stdin.
7//!
8//! ## CLI
9//! ```
10//! chromsize --sequence <SEQUENCE> --output <OUTPUT> [-t <THREADS>]
11//!
12//! -s, --sequence <SEQUENCE>  Sequence file (FASTA/2bit, use '-' or omit to read stdin)
13//! -o, --output <OUTPUT>      Output path for chrom.sizes
14//! -t, --threads <THREADS>    Number of threads (default: all cores)
15//! ```
16//!
17//! Examples:
18//! - stream FASTA: `cat genome.fa | chromsize -o chrom.sizes`
19//! - stream gzip FASTA: `zcat genome.fa.gz | chromsize -o chrom.sizes`
20//! - file input: `chromsize -s genome.fa -o chrom.sizes`
21//! - 2bit from stdin: `cat genome.2bit | chromsize -s - -o chrom.sizes`
22//!
23//! ## Library
24//! ```rust
25//! use std::path::PathBuf;
26//!
27//! let input = PathBuf::from("/path/to/genome.fa");
28//! let output = PathBuf::from("/path/to/chrom.sizes");
29//!
30//! let sizes = chromsize::get_sizes(&input).expect("failed to read input");
31//! chromsize::writer(&sizes, &output).expect("failed to write sizes");
32//! ```
33//!
34//! The `get_sizes` function auto-detects FASTA vs 2bit by content and supports
35//! stdin when the input path is `-`.
36pub mod size;
37pub use size::*;