Skip to main content

rustalign_io/
sam.rs

1//! SAM format output
2
3use rustalign_common::Nuc;
4use std::io::Write;
5
6/// SAM output configuration
7#[derive(Debug, Clone)]
8pub struct SamConfig {
9    /// Print AS:i: alignment score
10    pub print_as: bool,
11
12    /// Print XS:i: secondary alignment score
13    pub print_xs: bool,
14
15    /// Print YS:i: mate alignment score
16    pub print_ys: bool,
17
18    /// Print X0:i: number of best alignments
19    pub print_x0: bool,
20
21    /// Print X1:i: number of suboptimal alignments
22    pub print_x1: bool,
23
24    /// Print XN:i: number of Ns in reference
25    pub print_xn: bool,
26
27    /// Print XM:i: number of mismatches
28    pub print_xm: bool,
29
30    /// Print XO:i: number of gap opens
31    pub print_xo: bool,
32
33    /// Print XG:i: number of gap extensions
34    pub print_xg: bool,
35
36    /// Print NM:i: edit distance
37    pub print_nm: bool,
38
39    /// Print MD:Z: mismatch string
40    pub print_md: bool,
41
42    /// Print YF:Z: mate alignment flags
43    pub print_yf: bool,
44
45    /// Print YD:i: mate distance
46    pub print_yd: bool,
47
48    /// Print YT:Z: mate type
49    pub print_yt: bool,
50
51    /// Print ZS:Z: strand
52    pub print_zs: bool,
53
54    /// Print NH:i: hit count
55    pub print_nh: bool,
56
57    /// Print HI:i: hit index
58    pub print_hi: bool,
59
60    /// Print nM:i: genome mismatches
61    pub print_nm_genome: bool,
62
63    /// Print MD:n: mismatch as number
64    pub print_md_number: bool,
65
66    /// Print jM:i: junction mismatch
67    pub print_jm: bool,
68
69    /// Print jI:i: junction insertions
70    pub print_ji: bool,
71
72    /// Print full ref coords
73    pub full_ref: bool,
74}
75
76impl Default for SamConfig {
77    fn default() -> Self {
78        Self {
79            print_as: true,
80            print_xs: true,
81            print_ys: false,
82            print_x0: true,
83            print_x1: false,
84            print_xn: false,
85            print_xm: true,
86            print_xo: true,
87            print_xg: false,
88            print_nm: true,
89            print_md: true,
90            print_yf: false,
91            print_yd: false,
92            print_yt: true,
93            print_zs: false,
94            print_nh: true,
95            print_hi: false,
96            print_nm_genome: false,
97            print_md_number: false,
98            print_jm: false,
99            print_ji: false,
100            full_ref: false,
101        }
102    }
103}
104
105/// A SAM record
106#[derive(Debug, Clone)]
107pub struct SamRecord {
108    /// Query name
109    pub qname: String,
110
111    /// Bitwise flag
112    pub flag: u16,
113
114    /// Reference name
115    pub rname: String,
116
117    /// Position (1-based)
118    pub pos: i32,
119
120    /// Mapping quality
121    pub mapq: u8,
122
123    /// CIGAR string
124    pub cigar: String,
125
126    /// Reference name of mate
127    pub rnext: String,
128
129    /// Position of mate
130    pub pnext: i32,
131
132    /// Template length
133    pub tlen: i32,
134
135    /// Sequence
136    pub seq: Vec<Nuc>,
137
138    /// Quality scores
139    pub qual: Vec<u8>,
140
141    /// Optional fields
142    pub opt: Vec<SamOpt>,
143}
144
145/// Optional SAM field
146#[derive(Debug, Clone)]
147pub enum SamOpt {
148    /// Integer field
149    Int(String, i64),
150    /// String field
151    String(String, String),
152    /// Float field
153    Float(String, f64),
154}
155
156impl SamRecord {
157    /// Create a new SAM record
158    pub fn new(qname: String) -> Self {
159        Self {
160            qname,
161            flag: 0,
162            rname: "*".to_string(),
163            pos: 0,
164            mapq: 255,
165            cigar: "*".to_string(),
166            rnext: "*".to_string(),
167            pnext: 0,
168            tlen: 0,
169            seq: vec![Nuc::N],
170            qual: vec![255],
171            opt: Vec::new(),
172        }
173    }
174
175    /// Add an optional field
176    pub fn add_opt(&mut self, opt: SamOpt) {
177        self.opt.push(opt);
178    }
179}
180
181impl std::fmt::Display for SamRecord {
182    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
183        // Required fields
184        write!(
185            f,
186            "{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t",
187            self.qname,
188            self.flag,
189            self.rname,
190            self.pos,
191            self.mapq,
192            self.cigar,
193            self.rnext,
194            self.pnext,
195            self.tlen,
196            self.seq
197                .iter()
198                .map(|n| n.to_ascii() as char)
199                .collect::<String>()
200        )?;
201
202        // Quality
203        let qual_str: String = self.qual.iter().map(|&q| (q + 33) as char).collect();
204        write!(f, "{}", qual_str)?;
205
206        // Optional fields
207        for opt in &self.opt {
208            match opt {
209                SamOpt::Int(tag, val) => write!(f, "\t{}:i:{}", tag, val)?,
210                SamOpt::String(tag, val) => write!(f, "\t{}:Z:{}", tag, val)?,
211                SamOpt::Float(tag, val) => write!(f, "\t{}:f:{}", tag, val)?,
212            }
213        }
214
215        Ok(())
216    }
217}
218
219/// SAM file writer
220pub struct SamWriter<W: Write> {
221    writer: W,
222    #[allow(dead_code)]
223    config: SamConfig,
224}
225
226impl<W: Write> SamWriter<W> {
227    /// Create a new SAM writer
228    pub fn new(writer: W, config: SamConfig) -> Self {
229        Self { writer, config }
230    }
231
232    /// Write the SAM header
233    ///
234    /// # Arguments
235    /// * `ref_names` - Reference sequence names
236    /// * `ref_lengths` - Reference sequence lengths (must be same length as ref_names)
237    pub fn write_header(
238        &mut self,
239        ref_names: &[String],
240        ref_lengths: &[u32],
241    ) -> std::io::Result<()> {
242        writeln!(self.writer, "@HD\tVN:1.0\tSO:unsorted")?;
243        for (name, len) in ref_names.iter().zip(ref_lengths.iter()) {
244            writeln!(self.writer, "@SQ\tSN:{}\tLN:{}", name, len)?;
245        }
246        writeln!(self.writer, "@PG\tID:rustalign\tPN:rustalign\tVN:0.1.0")?;
247        Ok(())
248    }
249
250    /// Write a SAM record
251    pub fn write(&mut self, record: &SamRecord) -> std::io::Result<()> {
252        writeln!(self.writer, "{}", record)?;
253        Ok(())
254    }
255}
256
257#[cfg(test)]
258mod tests {
259    use super::*;
260
261    #[test]
262    fn test_sam_config_default() {
263        let config = SamConfig::default();
264        assert!(config.print_as);
265        assert!(config.print_md);
266        assert!(!config.print_ys);
267    }
268
269    #[test]
270    fn test_sam_record_new() {
271        let record = SamRecord::new("read1".to_string());
272        assert_eq!(record.qname, "read1");
273        assert_eq!(record.flag, 0);
274    }
275}