genomicframe-core 0.2.0

High-performance genomics I/O and interoperability layer
Documentation
//! Conversions from genomic record types to intervals
//!
//! This module provides `From` trait implementations for converting
//! format-specific records (BED, VCF, BAM, etc.) into GenomicInterval.

use super::GenomicInterval;

// BED conversions
impl From<&crate::formats::bed::BedRecord> for GenomicInterval {
    fn from(record: &crate::formats::bed::BedRecord) -> Self {
        GenomicInterval {
            chrom: record.chrom.clone(),
            start: record.start,
            end: record.end,
        }
    }
}

impl From<crate::formats::bed::BedRecord> for GenomicInterval {
    fn from(record: crate::formats::bed::BedRecord) -> Self {
        GenomicInterval {
            chrom: record.chrom,
            start: record.start,
            end: record.end,
        }
    }
}

// VCF conversions
impl From<&crate::formats::vcf::VcfRecord> for GenomicInterval {
    fn from(record: &crate::formats::vcf::VcfRecord) -> Self {
        // VCF uses 1-based coordinates, convert to 0-based
        // For SNPs: pos - 1 .. pos
        // For indels: depends on representation, but start at pos - 1
        let start = if record.pos > 0 {
            record.pos - 1
        } else {
            0
        };
        let end = start + record.reference.len() as u64;

        GenomicInterval {
            chrom: record.chrom.clone(),
            start,
            end,
        }
    }
}

impl From<crate::formats::vcf::VcfRecord> for GenomicInterval {
    fn from(record: crate::formats::vcf::VcfRecord) -> Self {
        let start = if record.pos > 0 {
            record.pos - 1
        } else {
            0
        };
        let end = start + record.reference.len() as u64;

        GenomicInterval {
            chrom: record.chrom,
            start,
            end,
        }
    }
}

// BAM conversions
// BamRecord.rname is resolved during parsing from refid using the header
impl From<&crate::formats::bam::BamRecord> for GenomicInterval {
    fn from(record: &crate::formats::bam::BamRecord) -> Self {
        // BAM uses 0-based coordinates
        // pos is the leftmost mapping position (0-based in our representation)
        // Calculate end from CIGAR string (reference-consuming operations)
        let start = if record.pos >= 0 {
            record.pos as u64
        } else {
            0
        };
        let mut length: u64 = 0;

        for op in &record.cigar {
            use crate::formats::bam::CigarOp;
            match op {
                CigarOp::Match(len) | CigarOp::Del(len) | CigarOp::RefSkip(len)
                | CigarOp::Equal(len) | CigarOp::Diff(len) => {
                    length += *len as u64;
                }
                _ => {} // Insertions, clips don't consume reference
            }
        }

        GenomicInterval {
            chrom: record.rname.clone(),
            start,
            end: start + length,
        }
    }
}

impl From<crate::formats::bam::BamRecord> for GenomicInterval {
    fn from(record: crate::formats::bam::BamRecord) -> Self {
        (&record).into()
    }
}

// SAM conversions
impl From<&crate::formats::sam::SamRecord> for GenomicInterval {
    fn from(record: &crate::formats::sam::SamRecord) -> Self {
        // SAM uses 1-based coordinates, convert to 0-based
        let start = if record.pos > 0 {
            (record.pos - 1) as u64
        } else {
            0 // pos = 0 means unmapped
        };
        let mut length: u64 = 0;

        for op in &record.cigar {
            use crate::formats::bam::CigarOp;
            match op {
                CigarOp::Match(len) | CigarOp::Del(len) | CigarOp::RefSkip(len)
                | CigarOp::Equal(len) | CigarOp::Diff(len) => {
                    length += *len as u64;
                }
                _ => {}
            }
        }

        GenomicInterval {
            chrom: record.rname.clone(),
            start,
            end: start + length,
        }
    }
}

impl From<crate::formats::sam::SamRecord> for GenomicInterval {
    fn from(record: crate::formats::sam::SamRecord) -> Self {
        // SAM uses 1-based coordinates, convert to 0-based
        let start = if record.pos > 0 {
            (record.pos - 1) as u64
        } else {
            0
        };
        let mut length: u64 = 0;

        for op in &record.cigar {
            use crate::formats::bam::CigarOp;
            match op {
                CigarOp::Match(len) | CigarOp::Del(len) | CigarOp::RefSkip(len)
                | CigarOp::Equal(len) | CigarOp::Diff(len) => {
                    length += *len as u64;
                }
                _ => {}
            }
        }

        GenomicInterval {
            chrom: record.rname,
            start,
            end: start + length,
        }
    }
}

// GFF conversions
impl From<&crate::formats::gff::GffRecord> for GenomicInterval {
    fn from(record: &crate::formats::gff::GffRecord) -> Self {
        // GFF uses 1-based, inclusive coordinates
        // Convert to 0-based, half-open
        let start = if record.start > 0 {
            record.start - 1
        } else {
            0
        };

        GenomicInterval {
            chrom: record.seqid.clone(),
            start,
            end: record.end, // GFF end is inclusive, but we want exclusive
        }
    }
}

impl From<crate::formats::gff::GffRecord> for GenomicInterval {
    fn from(record: crate::formats::gff::GffRecord) -> Self {
        let start = if record.start > 0 {
            record.start - 1
        } else {
            0
        };

        GenomicInterval {
            chrom: record.seqid,
            start,
            end: record.end,
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::formats::bed::BedRecord;

    #[test]
    fn test_bed_conversion() {
        let bed = BedRecord {
            chrom: "chr1".to_string(),
            start: 100,
            end: 200,
            name: None,
            score: None,
            strand: None,
            thick_start: None,
            thick_end: None,
            item_rgb: None,
            block_count: None,
            block_sizes: None,
            block_starts: None,
        };

        let interval: GenomicInterval = (&bed).into();
        assert_eq!(interval.chrom, "chr1");
        assert_eq!(interval.start, 100);
        assert_eq!(interval.end, 200);
    }

    // Additional tests would require creating proper record instances
    // which would need more setup. The conversions are straightforward
    // and will be tested through integration tests.
}