Skip to main content

cyanea_seq/
lib.rs

1//! Sequence I/O and manipulation for the Cyanea bioinformatics ecosystem.
2//!
3//! Provides strongly-typed, validated biological sequence types with full IUPAC
4//! alphabet support, plus FASTA/FASTQ parsing:
5//!
6//! - **Alphabets** — [`DnaAlphabet`], [`RnaAlphabet`], [`ProteinAlphabet`]
7//! - **Sequences** — [`DnaSequence`], [`RnaSequence`], [`ProteinSequence`]
8//! - **Codon translation** — Standard genetic code (NCBI Table 1)
9//! - **K-mer iteration** — Zero-allocation [`KmerIter`]
10//! - **Quality scores** — [`QualityScores`] with Phred+33/64 support
11//! - **FASTA parsing** — [`FastaStats`] via [`parse_fasta_stats`]
12//! - **FASTQ parsing** — [`FastqRecord`], [`FastqStats`] via [`parse_fastq_file`]
13//!
14//! # Example
15//!
16//! ```
17//! use cyanea_seq::{DnaSequence, RnaSequence, ProteinSequence};
18//! use cyanea_core::Sequence;
19//!
20//! // Create a DNA sequence (lowercased input is normalized)
21//! let dna = DnaSequence::new(b"atgaaagcttaa").unwrap();
22//! assert_eq!(dna.as_bytes(), b"ATGAAAGCTTAA");
23//!
24//! // Reverse complement
25//! let rc = dna.reverse_complement();
26//! assert_eq!(rc.as_bytes(), b"TTAAGCTTTCAT");
27//!
28//! // Transcribe DNA → RNA
29//! let rna = dna.transcribe();
30//! assert_eq!(rna.as_bytes(), b"AUGAAAGCUUAA");
31//!
32//! // Translate RNA → Protein (stops at UAA)
33//! let protein = rna.translate().unwrap();
34//! assert_eq!(protein.as_bytes(), b"MKA");
35//! ```
36
37pub mod alphabet;
38pub mod assembly;
39pub mod bwt;
40pub mod codon;
41pub mod debruijn;
42pub mod fasta;
43#[cfg(feature = "std")]
44pub mod fasta_index;
45pub mod fastq;
46#[cfg(feature = "std")]
47pub mod paired;
48pub mod fm_index;
49pub mod fmd_index;
50pub mod kmer;
51pub mod masking;
52pub mod minhash;
53pub mod motif;
54pub mod motif_io;
55pub mod orf;
56pub mod pattern;
57pub mod protein_properties;
58pub mod pssm;
59pub mod restriction;
60pub mod rna_structure;
61pub mod quality;
62pub mod read_sim;
63pub mod seq;
64pub mod suffix;
65pub mod taxonomy;
66pub mod trim;
67pub mod twobit;
68pub mod types;
69
70// Re-export alphabet types
71pub use alphabet::{Alphabet, DnaAlphabet, ProteinAlphabet, RnaAlphabet};
72
73// Re-export the generic sequence type
74pub use seq::ValidatedSeq;
75
76// Re-export concrete type aliases and their methods
77pub use types::{DnaSequence, ProteinSequence, RnaSequence};
78
79// Re-export codon translation and analysis
80pub use codon::{
81    classify_substitution, codon_adaptation_index, count_syn_nonsyn_sites, translate_codon,
82    translate_sequence, CodonUsage, GeneticCode, GeneticCodeId, SubstitutionClass,
83};
84
85// Re-export k-mer iterator
86pub use kmer::KmerIter;
87
88// Re-export quality scores
89pub use quality::{PhredEncoding, QualityScores};
90
91// Re-export FASTA types
92pub use fasta::{parse_fasta_stats, FastaStats};
93
94// Re-export indexed FASTA types
95#[cfg(feature = "std")]
96pub use fasta_index::{FastaIndex, FastaIndexEntry, IndexedFastaReader};
97
98// Re-export FASTQ types
99pub use fastq::{parse_fastq_file, parse_fastq_stats, FastqRecord, FastqStats};
100
101// Re-export compact encoding and indexing types
102pub use twobit::TwoBitSequence;
103pub use suffix::SuffixArray;
104pub use fm_index::FmIndex;
105pub use fmd_index::{FmdIndex, BiInterval};
106
107// Re-export MinHash sketching types
108pub use minhash::{MinHash, FracMinHash};
109
110// Re-export pattern matching algorithms
111pub use pattern::{bndm, bom, horspool, kmp, myers_bitparallel, shift_and, ukkonen};
112
113// Re-export PSSM types and helpers
114pub use pssm::{dna_mapping, protein_mapping, Pssm, PssmDna, PssmProtein};
115
116// Re-export ORF finder
117pub use orf::{find_orfs, find_orfs_both_strands, find_orfs_with_codons, OrfResult, Strand};
118
119// Re-export BWT
120pub use bwt::Bwt;
121
122// Re-export quality trimming and filtering
123pub use trim::{TrimPipeline, TrimRange, TrimReport};
124#[cfg(feature = "std")]
125pub use trim::{OrphanPolicy, PairedTrimReport, PairedTrimResult};
126
127// Re-export paired-end FASTQ types
128#[cfg(feature = "std")]
129pub use paired::{
130    deinterleave_fastq_file, interleave_fastq_files, parse_interleaved_fastq,
131    parse_paired_fastq_files, parse_paired_fastq_stats, strip_read_suffix, validate_mate_pair,
132    validate_mate_pair_strict, write_interleaved_fastq, write_paired_fastq, MateValidation,
133    PairedFastqRecord, PairedFastqStats,
134};
135
136// Re-export protein sequence properties
137pub use protein_properties::{
138    amino_acid_composition, chou_fasman, extinction_coefficient, gor, gravy,
139    hydrophobicity_profile, isoelectric_point, predict_disorder, AminoAcidComposition,
140    DisorderPrediction, ExtinctionCoefficient, HydrophobicityScale, SecondaryStructure,
141    SecondaryStructurePrediction,
142};
143
144// Re-export RNA secondary structure prediction
145pub use rna_structure::{
146    base_pair_distance, mccaskill, mountain_distance, nussinov, zuker_mfe, MfeResult,
147    NussinovResult, PartitionResult, RnaSecondaryStructure,
148};
149
150// Re-export masking types
151pub use masking::{
152    apply_mask, dust, find_tandem_repeats, mask_dust, mask_seg, seg, DustParams, MaskMode,
153    MaskResult, MaskSource, MaskedRegion, SegParams, TandemRepeatParams,
154};
155
156// Re-export de Bruijn graph types
157pub use debruijn::{DeBruijnGraph, Unitig};
158
159// Re-export assembly QC
160pub use assembly::{assembly_stats, nx_values, AssemblyStats};
161
162// Re-export taxonomy types
163pub use taxonomy::{KmerClassifier, TaxonRank, TaxonomyNode, TaxonomyTree};
164
165// Re-export restriction enzyme types
166pub use restriction::{
167    common_enzymes, digest, find_cut_sites, fragment_sizes, CutSite, Fragment, Overhang,
168    RestrictionEnzyme,
169};
170
171// Re-export motif discovery types
172pub use motif::{discover_motifs, DiscoveredMotif, Pwm};
173
174// Re-export read simulator types
175pub use read_sim::{simulate_reads, ReadSimConfig, SimulatedRead};
176
177// Re-export motif format I/O
178pub use motif_io::{
179    motif_similarity, parse_jaspar, parse_meme, parse_transfac, write_jaspar, write_meme,
180    write_transfac, Motif, MotifAlphabet,
181};