Crate gxf2bed

Crate gxf2bed 

Source
Expand description

§gxf2bed

The fastest GTF/GFF-to-BED converter chilling around.

This library provides high-performance conversion of GTF and GFF3 files to various BED formats. It leverages parallel processing and memory mapping for optimal performance on large genomic annotation files.

§Usage

use gxf2bed::{Config, run};
use std::path::PathBuf;

let config = Config {
    input: PathBuf::from("annotations.gtf"),
    output: PathBuf::from("output.bed"),
    threads: 4,
    parent_feature: Some("transcript".to_string()),
    child_features: Some(vec!["exon".to_string()]),
    parent_attribute: Some("transcript_id".to_string()),
    child_attribute: None,
    bed_type: gxf2bed::BedType::Bed12,
    additional_fields: None,
    chunks: 15000,
};

let stats = run(&config)?;
println!("Conversion completed in {:?}", stats.elapsed);
println!("Memory used: {:.2} MB", stats.mem_delta_mb);

§Examples

§Basic conversion

use gxf2bed::{Config, run, BedType};
use std::path::PathBuf;

let config = Config {
    input: PathBuf::from("input.gtf"),
    output: PathBuf::from("output.bed"),
    threads: num_cpus::get(),
    parent_feature: Some("transcript".to_string()),
    child_features: Some(vec!["exon".to_string()]),
    parent_attribute: Some("transcript_id".to_string()),
    child_attribute: None,
    bed_type: BedType::Bed12,
    additional_fields: None,
    chunks: 15000,
};

let stats = run(&config)?;

§Conversion with additional fields

use gxf2bed::{Config, run, BedType};
use std::path::PathBuf;

let config = Config {
    input: PathBuf::from("input.gtf"),
    output: PathBuf::from("output.bed"),
    threads: 4,
    parent_feature: Some("transcript".to_string()),
    child_features: Some(vec!["exon".to_string()]),
    parent_attribute: Some("transcript_id".to_string()),
    child_attribute: None,
    bed_type: BedType::Bed12,
    additional_fields: Some(vec!["gene_name".to_string(), "gene_biotype".to_string()]),
    chunks: 15000,
};

let stats = run(&config)?;

§Converting GFF3 files

use gxf2bed::{Config, run, BedType};
use std::path::PathBuf;

let config = Config {
    input: PathBuf::from("input.gff3"),
    output: PathBuf::from("output.bed"),
    threads: 4,
    parent_feature: Some("mRNA".to_string()),
    child_features: Some(vec!["exon".to_string()]),
    parent_attribute: Some("ID".to_string()),
    child_attribute: None,
    bed_type: BedType::Bed12,
    additional_fields: None,
    chunks: 15000,
};

let stats = run(&config)?;

Re-exports§

pub use cli::Args;
pub use cli::BedType;
pub use config::Config;
pub use convert::run;
pub use convert::RunStats;
pub use error::Gxf2BedError;
pub use error::Result;
pub use memory::max_mem_usage_mb;

Modules§

cli
config
convert
detect
error
memory