Skip to main content

genomicframe_core/interval/
conversions.rs

1//! Conversions from genomic record types to intervals
2//!
3//! This module provides `From` trait implementations for converting
4//! format-specific records (BED, VCF, BAM, etc.) into GenomicInterval.
5
6use super::GenomicInterval;
7
8// BED conversions
9impl From<&crate::formats::bed::BedRecord> for GenomicInterval {
10    fn from(record: &crate::formats::bed::BedRecord) -> Self {
11        GenomicInterval {
12            chrom: record.chrom.clone(),
13            start: record.start,
14            end: record.end,
15        }
16    }
17}
18
19impl From<crate::formats::bed::BedRecord> for GenomicInterval {
20    fn from(record: crate::formats::bed::BedRecord) -> Self {
21        GenomicInterval {
22            chrom: record.chrom,
23            start: record.start,
24            end: record.end,
25        }
26    }
27}
28
29// VCF conversions
30impl From<&crate::formats::vcf::VcfRecord> for GenomicInterval {
31    fn from(record: &crate::formats::vcf::VcfRecord) -> Self {
32        // VCF uses 1-based coordinates, convert to 0-based
33        // For SNPs: pos - 1 .. pos
34        // For indels: depends on representation, but start at pos - 1
35        let start = if record.pos > 0 {
36            record.pos - 1
37        } else {
38            0
39        };
40        let end = start + record.reference.len() as u64;
41
42        GenomicInterval {
43            chrom: record.chrom.clone(),
44            start,
45            end,
46        }
47    }
48}
49
50impl From<crate::formats::vcf::VcfRecord> for GenomicInterval {
51    fn from(record: crate::formats::vcf::VcfRecord) -> Self {
52        let start = if record.pos > 0 {
53            record.pos - 1
54        } else {
55            0
56        };
57        let end = start + record.reference.len() as u64;
58
59        GenomicInterval {
60            chrom: record.chrom,
61            start,
62            end,
63        }
64    }
65}
66
67// BAM conversions
68// BamRecord.rname is resolved during parsing from refid using the header
69impl From<&crate::formats::bam::BamRecord> for GenomicInterval {
70    fn from(record: &crate::formats::bam::BamRecord) -> Self {
71        // BAM uses 0-based coordinates
72        // pos is the leftmost mapping position (0-based in our representation)
73        // Calculate end from CIGAR string (reference-consuming operations)
74        let start = if record.pos >= 0 {
75            record.pos as u64
76        } else {
77            0
78        };
79        let mut length: u64 = 0;
80
81        for op in &record.cigar {
82            use crate::formats::bam::CigarOp;
83            match op {
84                CigarOp::Match(len) | CigarOp::Del(len) | CigarOp::RefSkip(len)
85                | CigarOp::Equal(len) | CigarOp::Diff(len) => {
86                    length += *len as u64;
87                }
88                _ => {} // Insertions, clips don't consume reference
89            }
90        }
91
92        GenomicInterval {
93            chrom: record.rname.clone(),
94            start,
95            end: start + length,
96        }
97    }
98}
99
100impl From<crate::formats::bam::BamRecord> for GenomicInterval {
101    fn from(record: crate::formats::bam::BamRecord) -> Self {
102        (&record).into()
103    }
104}
105
106// SAM conversions
107impl From<&crate::formats::sam::SamRecord> for GenomicInterval {
108    fn from(record: &crate::formats::sam::SamRecord) -> Self {
109        // SAM uses 1-based coordinates, convert to 0-based
110        let start = if record.pos > 0 {
111            (record.pos - 1) as u64
112        } else {
113            0 // pos = 0 means unmapped
114        };
115        let mut length: u64 = 0;
116
117        for op in &record.cigar {
118            use crate::formats::bam::CigarOp;
119            match op {
120                CigarOp::Match(len) | CigarOp::Del(len) | CigarOp::RefSkip(len)
121                | CigarOp::Equal(len) | CigarOp::Diff(len) => {
122                    length += *len as u64;
123                }
124                _ => {}
125            }
126        }
127
128        GenomicInterval {
129            chrom: record.rname.clone(),
130            start,
131            end: start + length,
132        }
133    }
134}
135
136impl From<crate::formats::sam::SamRecord> for GenomicInterval {
137    fn from(record: crate::formats::sam::SamRecord) -> Self {
138        // SAM uses 1-based coordinates, convert to 0-based
139        let start = if record.pos > 0 {
140            (record.pos - 1) as u64
141        } else {
142            0
143        };
144        let mut length: u64 = 0;
145
146        for op in &record.cigar {
147            use crate::formats::bam::CigarOp;
148            match op {
149                CigarOp::Match(len) | CigarOp::Del(len) | CigarOp::RefSkip(len)
150                | CigarOp::Equal(len) | CigarOp::Diff(len) => {
151                    length += *len as u64;
152                }
153                _ => {}
154            }
155        }
156
157        GenomicInterval {
158            chrom: record.rname,
159            start,
160            end: start + length,
161        }
162    }
163}
164
165// GFF conversions
166impl From<&crate::formats::gff::GffRecord> for GenomicInterval {
167    fn from(record: &crate::formats::gff::GffRecord) -> Self {
168        // GFF uses 1-based, inclusive coordinates
169        // Convert to 0-based, half-open
170        let start = if record.start > 0 {
171            record.start - 1
172        } else {
173            0
174        };
175
176        GenomicInterval {
177            chrom: record.seqid.clone(),
178            start,
179            end: record.end, // GFF end is inclusive, but we want exclusive
180        }
181    }
182}
183
184impl From<crate::formats::gff::GffRecord> for GenomicInterval {
185    fn from(record: crate::formats::gff::GffRecord) -> Self {
186        let start = if record.start > 0 {
187            record.start - 1
188        } else {
189            0
190        };
191
192        GenomicInterval {
193            chrom: record.seqid,
194            start,
195            end: record.end,
196        }
197    }
198}
199
200#[cfg(test)]
201mod tests {
202    use super::*;
203    use crate::formats::bed::BedRecord;
204
205    #[test]
206    fn test_bed_conversion() {
207        let bed = BedRecord {
208            chrom: "chr1".to_string(),
209            start: 100,
210            end: 200,
211            name: None,
212            score: None,
213            strand: None,
214            thick_start: None,
215            thick_end: None,
216            item_rgb: None,
217            block_count: None,
218            block_sizes: None,
219            block_starts: None,
220        };
221
222        let interval: GenomicInterval = (&bed).into();
223        assert_eq!(interval.chrom, "chr1");
224        assert_eq!(interval.start, 100);
225        assert_eq!(interval.end, 200);
226    }
227
228    // Additional tests would require creating proper record instances
229    // which would need more setup. The conversions are straightforward
230    // and will be tested through integration tests.
231}