Expand description
Sequence I/O and manipulation for the Cyanea bioinformatics ecosystem.
Provides strongly-typed, validated biological sequence types with full IUPAC alphabet support, plus FASTA/FASTQ parsing:
- Alphabets —
DnaAlphabet,RnaAlphabet,ProteinAlphabet - Sequences —
DnaSequence,RnaSequence,ProteinSequence - Codon translation — Standard genetic code (NCBI Table 1)
- K-mer iteration — Zero-allocation
KmerIter - Quality scores —
QualityScoreswith Phred+33/64 support - FASTA parsing —
FastaStatsviaparse_fasta_stats - FASTQ parsing —
FastqRecord,FastqStatsviaparse_fastq_file
§Example
use cyanea_seq::{DnaSequence, RnaSequence, ProteinSequence};
use cyanea_core::Sequence;
// Create a DNA sequence (lowercased input is normalized)
let dna = DnaSequence::new(b"atgaaagcttaa").unwrap();
assert_eq!(dna.as_bytes(), b"ATGAAAGCTTAA");
// Reverse complement
let rc = dna.reverse_complement();
assert_eq!(rc.as_bytes(), b"TTAAGCTTTCAT");
// Transcribe DNA → RNA
let rna = dna.transcribe();
assert_eq!(rna.as_bytes(), b"AUGAAAGCUUAA");
// Translate RNA → Protein (stops at UAA)
let protein = rna.translate().unwrap();
assert_eq!(protein.as_bytes(), b"MKA");Re-exports§
pub use alphabet::Alphabet;pub use alphabet::DnaAlphabet;pub use alphabet::ProteinAlphabet;pub use alphabet::RnaAlphabet;pub use seq::ValidatedSeq;pub use types::DnaSequence;pub use types::ProteinSequence;pub use types::RnaSequence;pub use codon::classify_substitution;pub use codon::codon_adaptation_index;pub use codon::count_syn_nonsyn_sites;pub use codon::translate_codon;pub use codon::translate_sequence;pub use codon::CodonUsage;pub use codon::GeneticCode;pub use codon::GeneticCodeId;pub use codon::SubstitutionClass;pub use kmer::KmerIter;pub use quality::PhredEncoding;pub use quality::QualityScores;pub use fasta::parse_fasta_stats;pub use fasta::FastaStats;pub use fasta_index::FastaIndex;pub use fasta_index::FastaIndexEntry;pub use fasta_index::IndexedFastaReader;pub use fastq::parse_fastq_file;pub use fastq::parse_fastq_stats;pub use fastq::FastqRecord;pub use fastq::FastqStats;pub use twobit::TwoBitSequence;pub use suffix::SuffixArray;pub use fm_index::FmIndex;pub use fmd_index::FmdIndex;pub use fmd_index::BiInterval;pub use minhash::MinHash;pub use minhash::FracMinHash;pub use pattern::bndm;pub use pattern::bom;pub use pattern::horspool;pub use pattern::kmp;pub use pattern::myers_bitparallel;pub use pattern::shift_and;pub use pattern::ukkonen;pub use pssm::dna_mapping;pub use pssm::protein_mapping;pub use pssm::Pssm;pub use pssm::PssmDna;pub use pssm::PssmProtein;pub use orf::find_orfs;pub use orf::find_orfs_both_strands;pub use orf::find_orfs_with_codons;pub use orf::OrfResult;pub use orf::Strand;pub use bwt::Bwt;pub use trim::TrimPipeline;pub use trim::TrimRange;pub use trim::TrimReport;pub use trim::OrphanPolicy;pub use trim::PairedTrimReport;pub use trim::PairedTrimResult;pub use paired::deinterleave_fastq_file;pub use paired::interleave_fastq_files;pub use paired::parse_interleaved_fastq;pub use paired::parse_paired_fastq_files;pub use paired::parse_paired_fastq_stats;pub use paired::strip_read_suffix;pub use paired::validate_mate_pair;pub use paired::validate_mate_pair_strict;pub use paired::write_interleaved_fastq;pub use paired::write_paired_fastq;pub use paired::MateValidation;pub use paired::PairedFastqRecord;pub use paired::PairedFastqStats;pub use protein_properties::amino_acid_composition;pub use protein_properties::chou_fasman;pub use protein_properties::extinction_coefficient;pub use protein_properties::gor;pub use protein_properties::gravy;pub use protein_properties::hydrophobicity_profile;pub use protein_properties::isoelectric_point;pub use protein_properties::predict_disorder;pub use protein_properties::AminoAcidComposition;pub use protein_properties::DisorderPrediction;pub use protein_properties::ExtinctionCoefficient;pub use protein_properties::HydrophobicityScale;pub use protein_properties::SecondaryStructure;pub use protein_properties::SecondaryStructurePrediction;pub use rna_structure::base_pair_distance;pub use rna_structure::mccaskill;pub use rna_structure::mountain_distance;pub use rna_structure::nussinov;pub use rna_structure::zuker_mfe;pub use rna_structure::MfeResult;pub use rna_structure::NussinovResult;pub use rna_structure::PartitionResult;pub use rna_structure::RnaSecondaryStructure;pub use masking::apply_mask;pub use masking::dust;pub use masking::find_tandem_repeats;pub use masking::mask_dust;pub use masking::mask_seg;pub use masking::seg;pub use masking::DustParams;pub use masking::MaskMode;pub use masking::MaskResult;pub use masking::MaskSource;pub use masking::MaskedRegion;pub use masking::SegParams;pub use masking::TandemRepeatParams;pub use debruijn::DeBruijnGraph;pub use debruijn::Unitig;pub use assembly::assembly_stats;pub use assembly::nx_values;pub use assembly::AssemblyStats;pub use taxonomy::KmerClassifier;pub use taxonomy::TaxonRank;pub use taxonomy::TaxonomyNode;pub use taxonomy::TaxonomyTree;pub use restriction::common_enzymes;pub use restriction::digest;pub use restriction::find_cut_sites;pub use restriction::fragment_sizes;pub use restriction::CutSite;pub use restriction::Fragment;pub use restriction::Overhang;pub use restriction::RestrictionEnzyme;pub use motif::discover_motifs;pub use motif::DiscoveredMotif;pub use motif::Pwm;pub use read_sim::simulate_reads;pub use read_sim::ReadSimConfig;pub use read_sim::SimulatedRead;pub use motif_io::motif_similarity;pub use motif_io::parse_jaspar;pub use motif_io::parse_meme;pub use motif_io::parse_transfac;pub use motif_io::write_jaspar;pub use motif_io::write_meme;pub use motif_io::write_transfac;pub use motif_io::Motif;pub use motif_io::MotifAlphabet;
Modules§
- alphabet
- Alphabet definitions for biological sequence validation.
- assembly
- Assembly quality-control metrics.
- bwt
- Burrows-Wheeler Transform (BWT) for arbitrary byte sequences.
- codon
- Codon translation, genetic codes, codon usage analysis, and CAI.
- debruijn
- De Bruijn graph construction and unitig extraction.
- fasta
- FASTA/FASTQ parsing and statistics.
- fasta_
index - Indexed FASTA reading with random access via
.faiindex files. - fastq
- FASTQ record type and parsing.
- fm_
index - FM-Index for fast substring search on DNA sequences.
- fmd_
index - Bidirectional FM-Index (FMD-Index) for strand-aware DNA search.
- kmer
- Zero-allocation k-mer iterator.
- masking
- Low-complexity and repeat masking for biological sequences.
- minhash
- MinHash and FracMinHash sketching for rapid genome comparison.
- motif
- DNA motif discovery — PWM construction, scanning, and EM-based de novo discovery.
- motif_
io - Motif format I/O — MEME, TRANSFAC, and JASPAR parsers/writers plus PWM comparison.
- orf
- Open Reading Frame (ORF) finder.
- paired
- Paired-end FASTQ support.
- pattern
- Pattern matching algorithms for biological sequences.
- protein_
properties - Protein sequence property analysis.
- pssm
- Position-Specific Scoring Matrices (PSSMs) for motif representation and scanning.
- quality
- Phred quality scores for sequencing reads.
- read_
sim - Illumina-style short-read simulator.
- restriction
- Restriction enzyme recognition, cut-site finding, and in-silico digestion.
- rna_
structure - RNA secondary structure prediction.
- seq
- Generic validated sequence type.
- suffix
- Suffix array construction via the SA-IS algorithm.
- taxonomy
- Taxonomic classification — taxonomy trees, LCA queries, k-mer classifiers.
- trim
- Quality trimming, adapter removal, and read filtering for FASTQ records.
- twobit
- 2-bit DNA encoding for compact storage.
- types
- Concrete sequence type aliases and biologically meaningful operations.