Skip to main content

ref_solver/parsing/
sam.rs

1use std::borrow::Cow;
2use std::io::BufReader;
3use std::path::Path;
4use thiserror::Error;
5use tracing::warn;
6
7use crate::core::contig::Contig;
8use crate::core::header::QueryHeader;
9use crate::utils::validation::{check_contig_limit, normalize_md5};
10
11#[derive(Error, Debug)]
12pub enum ParseError {
13    #[error("IO error: {0}")]
14    Io(#[from] std::io::Error),
15
16    #[error("Invalid SAM header format: {0}")]
17    InvalidFormat(String),
18
19    #[error("noodles error: {0}")]
20    Noodles(String),
21
22    #[error("Unsupported file format: {0}")]
23    UnsupportedFormat(String),
24
25    #[error("Too many contigs: {0} exceeds maximum allowed (100000)")]
26    TooManyContigs(usize),
27}
28
29/// Parse a SAM/BAM/CRAM file and extract the header
30///
31/// # Errors
32///
33/// Returns `ParseError::Io` if the file cannot be read, `ParseError::Noodles` if
34/// parsing fails, `ParseError::UnsupportedFormat` for unknown extensions,
35/// `ParseError::InvalidFormat` if no contigs are found, or
36/// `ParseError::TooManyContigs` if the limit is exceeded.
37pub fn parse_file(path: &Path) -> Result<QueryHeader, ParseError> {
38    let extension = path
39        .extension()
40        .and_then(|e| e.to_str())
41        .map(str::to_lowercase);
42
43    match extension.as_deref() {
44        Some("sam") => parse_sam_file(path),
45        Some("bam") => parse_bam_file(path),
46        Some("cram") => parse_cram_file(path),
47        Some(ext) => Err(ParseError::UnsupportedFormat(ext.to_string())),
48        None => {
49            // Try to detect from content - default to SAM
50            parse_sam_file(path)
51        }
52    }
53}
54
55/// Parse a SAM file (text format)
56fn parse_sam_file(path: &Path) -> Result<QueryHeader, ParseError> {
57    use noodles::sam;
58
59    let mut reader = std::fs::File::open(path)
60        .map(BufReader::new)
61        .map(sam::io::Reader::new)?;
62
63    let header = reader
64        .read_header()
65        .map_err(|e| ParseError::Noodles(e.to_string()))?;
66
67    header_to_query(&header, Some(path))
68}
69
70/// Parse a BAM file (binary format)
71fn parse_bam_file(path: &Path) -> Result<QueryHeader, ParseError> {
72    use noodles::bam;
73
74    let mut reader = std::fs::File::open(path).map(bam::io::Reader::new)?;
75
76    let header = reader
77        .read_header()
78        .map_err(|e| ParseError::Noodles(e.to_string()))?;
79
80    header_to_query(&header, Some(path))
81}
82
83/// Parse a CRAM file
84fn parse_cram_file(path: &Path) -> Result<QueryHeader, ParseError> {
85    use noodles::cram;
86
87    let mut reader = std::fs::File::open(path).map(cram::io::Reader::new)?;
88
89    // Read file definition
90    reader
91        .read_file_definition()
92        .map_err(|e| ParseError::Noodles(e.to_string()))?;
93
94    let header = reader
95        .read_file_header()
96        .map_err(|e| ParseError::Noodles(e.to_string()))?;
97
98    header_to_query(&header, Some(path))
99}
100
101/// Parse a BAM header from any reader (no file path required).
102///
103/// This enables parsing from in-memory buffers (e.g. `Cursor<Vec<u8>>`)
104/// without writing to a temporary file. Only the header is read;
105/// the reader does not need to contain complete record data.
106///
107/// # Errors
108///
109/// Returns `ParseError::Noodles` if the BAM header cannot be parsed,
110/// `ParseError::InvalidFormat` if no contigs are found, or
111/// `ParseError::TooManyContigs` if the limit is exceeded.
112pub fn parse_bam_from_reader<R: std::io::Read>(reader: R) -> Result<QueryHeader, ParseError> {
113    use noodles::bam;
114
115    let mut reader = bam::io::Reader::new(reader);
116
117    let header = reader
118        .read_header()
119        .map_err(|e| ParseError::Noodles(e.to_string()))?;
120
121    header_to_query(&header, None)
122}
123
124/// Parse a CRAM header from any reader (no file path required).
125///
126/// This enables parsing from in-memory buffers (e.g. `Cursor<Vec<u8>>`)
127/// without writing to a temporary file. Only the file definition and
128/// header are read.
129///
130/// # Errors
131///
132/// Returns `ParseError::Noodles` if the CRAM header cannot be parsed,
133/// `ParseError::InvalidFormat` if no contigs are found, or
134/// `ParseError::TooManyContigs` if the limit is exceeded.
135pub fn parse_cram_from_reader<R: std::io::Read>(reader: R) -> Result<QueryHeader, ParseError> {
136    use noodles::cram;
137
138    let mut reader = cram::io::Reader::new(reader);
139
140    reader
141        .read_file_definition()
142        .map_err(|e| ParseError::Noodles(e.to_string()))?;
143
144    let header = reader
145        .read_file_header()
146        .map_err(|e| ParseError::Noodles(e.to_string()))?;
147
148    header_to_query(&header, None)
149}
150
151/// Convert noodles header to `QueryHeader`
152fn header_to_query(
153    header: &noodles::sam::Header,
154    source: Option<&Path>,
155) -> Result<QueryHeader, ParseError> {
156    use noodles::sam::header::record::value::map::tag::Other;
157
158    let mut contigs = Vec::new();
159
160    for (name, map) in header.reference_sequences() {
161        let name_str = name.to_string();
162        let length = map.length().get() as u64;
163
164        let mut contig = Contig::new(name_str, length);
165
166        // Extract M5 (MD5) tag from other_fields
167        // The M5 tag contains the MD5 checksum of the sequence
168        if let Ok(m5_tag) = Other::try_from(*b"M5") {
169            if let Some(md5_value) = map.other_fields().get(&m5_tag) {
170                let md5_str = md5_value.to_string();
171                // Validate and normalize MD5 using centralized helper
172                if let Some(normalized) = normalize_md5(&md5_str) {
173                    contig.md5 = Some(normalized);
174                } else {
175                    warn!(
176                        contig = %contig.name,
177                        md5 = %md5_str,
178                        "Invalid MD5 checksum format, ignoring"
179                    );
180                }
181            }
182        }
183
184        // Extract AS (Assembly) tag
185        if let Ok(as_tag) = Other::try_from(*b"AS") {
186            if let Some(assembly_value) = map.other_fields().get(&as_tag) {
187                contig.assembly = Some(assembly_value.to_string());
188            }
189        }
190
191        // Extract UR (URI) tag
192        if let Ok(ur_tag) = Other::try_from(*b"UR") {
193            if let Some(uri_value) = map.other_fields().get(&ur_tag) {
194                contig.uri = Some(uri_value.to_string());
195            }
196        }
197
198        // Extract SP (Species) tag
199        if let Ok(sp_tag) = Other::try_from(*b"SP") {
200            if let Some(species_value) = map.other_fields().get(&sp_tag) {
201                contig.species = Some(species_value.to_string());
202            }
203        }
204
205        // Extract AN (Alternate Names) tag - comma-separated list of aliases
206        if let Ok(an_tag) = Other::try_from(*b"AN") {
207            if let Some(aliases_value) = map.other_fields().get(&an_tag) {
208                let aliases: Vec<String> = aliases_value
209                    .to_string()
210                    .split(',')
211                    .map(|s| s.trim().to_string())
212                    .filter(|s| !s.is_empty())
213                    .collect();
214                if !aliases.is_empty() {
215                    contig.aliases = aliases;
216                }
217            }
218        }
219
220        // Check contig limit for DOS protection
221        if check_contig_limit(contigs.len()).is_some() {
222            return Err(ParseError::TooManyContigs(contigs.len()));
223        }
224
225        contigs.push(contig);
226    }
227
228    let mut query = QueryHeader::new(contigs);
229    if let Some(path) = source {
230        query = query.with_source(path.display().to_string());
231    }
232
233    Ok(query)
234}
235
236/// Normalize SAM header lines that use spaces instead of tabs.
237///
238/// Browsers and copy-paste often convert tabs to spaces. This function detects
239/// SAM header lines (`@XX` prefix) where fields are space-separated instead of
240/// tab-separated and converts the spaces to tabs.
241///
242/// Only normalizes lines that start with a SAM header record type (`@HD`, `@SQ`,
243/// `@RG`, `@PG`) followed by a space and a TAG: pattern. Lines that already
244/// contain tabs are left unchanged. `@CO` (comment) lines are not normalized
245/// because their content is free-form text where spaces are meaningful.
246///
247/// Returns the (possibly normalized) text and a boolean indicating whether any
248/// normalization was performed. Uses `Cow` to avoid allocation when no
249/// normalization is needed (the common case for well-formed input).
250#[must_use]
251pub fn normalize_sam_whitespace(text: &str) -> (Cow<'_, str>, bool) {
252    // Fast path: check if any line needs normalization before allocating
253    if !text.lines().any(needs_space_to_tab_normalization) {
254        return (Cow::Borrowed(text), false);
255    }
256
257    let mut normalized = String::with_capacity(text.len());
258
259    for line in text.lines() {
260        if !normalized.is_empty() {
261            normalized.push('\n');
262        }
263
264        if needs_space_to_tab_normalization(line) {
265            let mut first = true;
266            for field in line.split_whitespace() {
267                if first {
268                    normalized.push_str(field);
269                    first = false;
270                } else {
271                    normalized.push('\t');
272                    normalized.push_str(field);
273                }
274            }
275        } else {
276            normalized.push_str(line);
277        }
278    }
279
280    // Preserve trailing newline if present
281    if text.ends_with('\n') {
282        normalized.push('\n');
283    }
284
285    (Cow::Owned(normalized), true)
286}
287
288/// Check if a line is a SAM header line that uses spaces instead of tabs.
289///
290/// Returns true only when the line starts with a recognized SAM record type
291/// followed by whitespace and a TAG: pattern, and the line contains NO tab
292/// characters.
293fn needs_space_to_tab_normalization(line: &str) -> bool {
294    if line.contains('\t') {
295        return false;
296    }
297
298    // @CO lines are free-form comments — do not normalize
299    let sam_prefixes = ["@HD ", "@SQ ", "@RG ", "@PG "];
300    if !sam_prefixes.iter().any(|p| line.starts_with(p)) {
301        return false;
302    }
303
304    // Must have at least one TAG:VALUE pattern after the record type
305    line.split_whitespace().skip(1).any(|field| {
306        field.len() >= 3
307            && field.as_bytes().get(2) == Some(&b':')
308            && field.as_bytes()[0].is_ascii_uppercase()
309            && field.as_bytes()[1].is_ascii_uppercase()
310    })
311}
312
313/// Parse header from raw text (stdin or pasted)
314///
315/// # Errors
316///
317/// Returns `ParseError::InvalidFormat` if the text has invalid format, missing
318/// required fields, or no contigs are found, or `ParseError::TooManyContigs`
319/// if the limit is exceeded.
320pub fn parse_header_text(text: &str) -> Result<QueryHeader, ParseError> {
321    let (normalized_text, _) = normalize_sam_whitespace(text);
322    let text = &normalized_text;
323    let mut contigs = Vec::new();
324
325    for line in text.lines() {
326        if !line.starts_with("@SQ") {
327            continue;
328        }
329
330        let mut name: Option<String> = None;
331        let mut length: Option<u64> = None;
332        let mut md5_raw: Option<String> = None;
333        let mut assembly: Option<String> = None;
334        let mut uri: Option<String> = None;
335        let mut species: Option<String> = None;
336        let mut aliases: Vec<String> = Vec::new();
337
338        for field in line.split('\t').skip(1) {
339            if let Some((tag, value)) = field.split_once(':') {
340                match tag {
341                    "SN" => name = Some(value.to_string()),
342                    "LN" => length = value.parse().ok(),
343                    "M5" => md5_raw = Some(value.to_string()),
344                    "AS" => assembly = Some(value.to_string()),
345                    "UR" => uri = Some(value.to_string()),
346                    "SP" => species = Some(value.to_string()),
347                    "AN" => {
348                        // Alternate names (aliases), comma-separated
349                        aliases = value
350                            .split(',')
351                            .map(|s| s.trim().to_string())
352                            .filter(|s| !s.is_empty())
353                            .collect();
354                    }
355                    _ => {}
356                }
357            }
358        }
359
360        if let (Some(ref name_str), Some(length)) = (&name, length) {
361            // Check contig limit for DOS protection
362            if check_contig_limit(contigs.len()).is_some() {
363                return Err(ParseError::TooManyContigs(contigs.len()));
364            }
365
366            // Validate and normalize MD5, warn if invalid
367            let md5 = if let Some(ref raw) = md5_raw {
368                if let Some(normalized) = normalize_md5(raw) {
369                    Some(normalized)
370                } else {
371                    warn!(
372                        contig = %name_str,
373                        md5 = %raw,
374                        "Invalid MD5 checksum format, ignoring"
375                    );
376                    None
377                }
378            } else {
379                None
380            };
381
382            let mut contig = Contig::new(name_str.clone(), length);
383            contig.md5 = md5;
384            contig.assembly = assembly;
385            contig.uri = uri;
386            contig.species = species;
387            contig.aliases = aliases;
388            contigs.push(contig);
389        }
390    }
391
392    if contigs.is_empty() {
393        return Err(ParseError::InvalidFormat(
394            "No @SQ lines found in header".to_string(),
395        ));
396    }
397
398    Ok(QueryHeader::new(contigs))
399}
400
401#[cfg(test)]
402mod tests {
403    use super::*;
404
405    #[test]
406    fn test_parse_header_text() {
407        let header = r"@HD	VN:1.6	SO:coordinate
408@SQ	SN:chr1	LN:248956422	M5:6aef897c3d6ff0c78aff06ac189178dd
409@SQ	SN:chr2	LN:242193529	M5:f98db672eb0993dcfdabafe2a882905c
410@SQ	SN:chrM	LN:16569
411@RG	ID:sample1
412";
413
414        let query = parse_header_text(header).unwrap();
415        assert_eq!(query.contigs.len(), 3);
416
417        assert_eq!(query.contigs[0].name, "chr1");
418        assert_eq!(query.contigs[0].length, 248_956_422);
419        assert_eq!(
420            query.contigs[0].md5,
421            Some("6aef897c3d6ff0c78aff06ac189178dd".to_string())
422        );
423
424        assert_eq!(query.contigs[1].name, "chr2");
425        assert_eq!(query.contigs[2].name, "chrM");
426        assert!(query.contigs[2].md5.is_none());
427    }
428
429    #[test]
430    fn test_parse_header_text_no_sq() {
431        let header = "@HD\tVN:1.6\n@RG\tID:sample1\n";
432        let result = parse_header_text(header);
433        assert!(result.is_err());
434    }
435
436    #[test]
437    fn test_parse_header_text_with_aliases() {
438        let header = r"@HD	VN:1.6
439@SQ	SN:chr1	LN:248956422	M5:6aef897c3d6ff0c78aff06ac189178dd	AN:1,NC_000001.11
440@SQ	SN:chrM	LN:16569	AN:MT,chrMT,NC_012920.1
441";
442
443        let query = parse_header_text(header).unwrap();
444        assert_eq!(query.contigs.len(), 2);
445
446        // Check chr1 aliases
447        assert_eq!(query.contigs[0].name, "chr1");
448        assert_eq!(
449            query.contigs[0].aliases,
450            vec!["1".to_string(), "NC_000001.11".to_string()]
451        );
452
453        // Check chrM aliases
454        assert_eq!(query.contigs[1].name, "chrM");
455        assert_eq!(
456            query.contigs[1].aliases,
457            vec![
458                "MT".to_string(),
459                "chrMT".to_string(),
460                "NC_012920.1".to_string()
461            ]
462        );
463    }
464
465    #[test]
466    fn test_normalize_sam_whitespace_spaces_to_tabs() {
467        let input = "@SQ SN:chr1 LN:248956422 M5:6aef897c3d6ff0c78aff06ac189178dd\n";
468        let (normalized, was_normalized) = normalize_sam_whitespace(input);
469        assert!(was_normalized);
470        assert_eq!(
471            normalized,
472            "@SQ\tSN:chr1\tLN:248956422\tM5:6aef897c3d6ff0c78aff06ac189178dd\n"
473        );
474    }
475
476    #[test]
477    fn test_normalize_sam_whitespace_already_tabs() {
478        let input = "@SQ\tSN:chr1\tLN:248956422\n";
479        let (normalized, was_normalized) = normalize_sam_whitespace(input);
480        assert!(!was_normalized);
481        assert_eq!(normalized, input);
482    }
483
484    #[test]
485    fn test_normalize_sam_whitespace_mixed_lines() {
486        let input =
487            "@HD VN:1.6 SO:coordinate\n@SQ SN:chr1 LN:248956422\n@SQ SN:chr2 LN:242193529\n";
488        let (normalized, was_normalized) = normalize_sam_whitespace(input);
489        assert!(was_normalized);
490        assert!(normalized.contains("@HD\tVN:1.6\tSO:coordinate"));
491        assert!(normalized.contains("@SQ\tSN:chr1\tLN:248956422"));
492        assert!(normalized.contains("@SQ\tSN:chr2\tLN:242193529"));
493    }
494
495    #[test]
496    fn test_normalize_sam_whitespace_multiple_spaces() {
497        let input = "@SQ  SN:chr1  LN:248956422\n";
498        let (normalized, was_normalized) = normalize_sam_whitespace(input);
499        assert!(was_normalized);
500        assert_eq!(normalized, "@SQ\tSN:chr1\tLN:248956422\n");
501    }
502
503    #[test]
504    fn test_normalize_sam_whitespace_preserves_non_header_lines() {
505        let input = "some random text with spaces\n@SQ SN:chr1 LN:100\n";
506        let (normalized, was_normalized) = normalize_sam_whitespace(input);
507        assert!(was_normalized);
508        assert!(normalized.starts_with("some random text with spaces\n"));
509        assert!(normalized.contains("@SQ\tSN:chr1\tLN:100"));
510    }
511
512    #[test]
513    fn test_normalize_sam_whitespace_tabs_and_spaces_mixed() {
514        // Line has some tabs and some spaces — leave it alone
515        let input = "@SQ\tSN:chr1 LN:248956422\n";
516        let (normalized, was_normalized) = normalize_sam_whitespace(input);
517        assert!(!was_normalized);
518        assert_eq!(normalized, input);
519    }
520
521    #[test]
522    fn test_normalize_sam_whitespace_skips_comment_lines() {
523        let input = "@CO This is a comment with VN:1.0 mentioned\n@SQ SN:chr1 LN:100\n";
524        let (normalized, was_normalized) = normalize_sam_whitespace(input);
525        assert!(was_normalized);
526        // Comment line should be preserved as-is
527        assert!(normalized.starts_with("@CO This is a comment with VN:1.0 mentioned\n"));
528        assert!(normalized.contains("@SQ\tSN:chr1\tLN:100"));
529    }
530
531    #[test]
532    fn test_parse_header_text_with_spaces() {
533        let header = "@SQ SN:chr1 LN:248956422 M5:6aef897c3d6ff0c78aff06ac189178dd\n\
534                      @SQ SN:chr2 LN:242193529\n";
535        let query = parse_header_text(header).unwrap();
536        assert_eq!(query.contigs.len(), 2);
537        assert_eq!(query.contigs[0].name, "chr1");
538        assert_eq!(query.contigs[0].length, 248_956_422);
539        assert_eq!(
540            query.contigs[0].md5,
541            Some("6aef897c3d6ff0c78aff06ac189178dd".to_string())
542        );
543        assert_eq!(query.contigs[1].name, "chr2");
544        assert_eq!(query.contigs[1].length, 242_193_529);
545    }
546}
547
548#[cfg(test)]
549mod reader_tests {
550    use super::*;
551    use std::io::Cursor;
552
553    /// Helper: create a minimal BAM file in memory with the given contigs.
554    fn create_test_bam(contigs: &[(&str, usize)]) -> Vec<u8> {
555        use noodles::bam;
556        use noodles::sam;
557        use noodles::sam::header::record::value::map::ReferenceSequence;
558        use noodles::sam::header::record::value::Map;
559        use std::num::NonZeroUsize;
560
561        let mut header = sam::Header::builder();
562        for &(name, length) in contigs {
563            header = header.add_reference_sequence(
564                name,
565                Map::<ReferenceSequence>::new(NonZeroUsize::new(length).unwrap()),
566            );
567        }
568        let header = header.build();
569
570        let mut buf = Vec::new();
571        {
572            let mut writer = bam::io::Writer::new(&mut buf);
573            writer.write_header(&header).unwrap();
574        }
575        buf
576    }
577
578    #[test]
579    fn test_parse_bam_from_reader_basic() {
580        let bam_bytes = create_test_bam(&[("chr1", 248_956_422), ("chr2", 242_193_529)]);
581        let cursor = Cursor::new(&bam_bytes);
582        let query = parse_bam_from_reader(cursor).unwrap();
583        assert_eq!(query.contigs.len(), 2);
584        assert_eq!(query.contigs[0].name, "chr1");
585        assert_eq!(query.contigs[0].length, 248_956_422);
586        assert_eq!(query.contigs[1].name, "chr2");
587        assert_eq!(query.contigs[1].length, 242_193_529);
588    }
589
590    #[test]
591    fn test_parse_bam_from_reader_truncated_after_header() {
592        let mut bam_bytes = create_test_bam(&[("chr1", 248_956_422)]);
593        bam_bytes.extend_from_slice(&[0u8; 100]);
594        let cursor = Cursor::new(&bam_bytes);
595        let query = parse_bam_from_reader(cursor).unwrap();
596        assert_eq!(query.contigs.len(), 1);
597        assert_eq!(query.contigs[0].name, "chr1");
598    }
599
600    #[test]
601    fn test_parse_bam_from_reader_empty() {
602        let cursor = Cursor::new(Vec::<u8>::new());
603        let result = parse_bam_from_reader(cursor);
604        assert!(result.is_err());
605    }
606
607    /// Helper: create a minimal CRAM file in memory with the given contigs.
608    ///
609    /// Each contig is `(name, length, md5)`.  The CRAM writer requires MD5
610    /// checksums on every reference sequence, so we must supply them here.
611    fn create_test_cram(contigs: &[(&str, usize, &str)]) -> Vec<u8> {
612        use noodles::cram;
613        use noodles::sam;
614        use noodles::sam::header::record::value::map::reference_sequence::tag;
615        use noodles::sam::header::record::value::map::ReferenceSequence;
616        use noodles::sam::header::record::value::Map;
617        use std::num::NonZeroUsize;
618
619        let mut header = sam::Header::builder();
620        for &(name, length, md5) in contigs {
621            let map = Map::<ReferenceSequence>::builder()
622                .set_length(NonZeroUsize::new(length).unwrap())
623                .insert(tag::MD5_CHECKSUM, md5)
624                .build()
625                .unwrap();
626            header = header.add_reference_sequence(name, map);
627        }
628        let header = header.build();
629
630        let mut buf = Vec::new();
631        {
632            let mut writer = cram::io::Writer::new(&mut buf);
633            writer.write_file_definition().unwrap();
634            writer.write_file_header(&header).unwrap();
635        }
636        buf
637    }
638
639    #[test]
640    fn test_parse_cram_from_reader_basic() {
641        let cram_bytes = create_test_cram(&[
642            ("chr1", 248_956_422, "6aef897c3d6ff0c78aff06ac189178dd"),
643            ("chrX", 156_040_895, "01234567890abcdef01234567890abcd"),
644        ]);
645        let cursor = Cursor::new(&cram_bytes);
646        let query = parse_cram_from_reader(cursor).unwrap();
647        assert_eq!(query.contigs.len(), 2);
648        assert_eq!(query.contigs[0].name, "chr1");
649        assert_eq!(query.contigs[0].length, 248_956_422);
650        assert_eq!(
651            query.contigs[0].md5,
652            Some("6aef897c3d6ff0c78aff06ac189178dd".to_string())
653        );
654        assert_eq!(query.contigs[1].name, "chrX");
655        assert_eq!(query.contigs[1].length, 156_040_895);
656        assert_eq!(
657            query.contigs[1].md5,
658            Some("01234567890abcdef01234567890abcd".to_string())
659        );
660    }
661
662    #[test]
663    fn test_parse_cram_from_reader_truncated_after_header() {
664        let mut cram_bytes =
665            create_test_cram(&[("chr1", 248_956_422, "6aef897c3d6ff0c78aff06ac189178dd")]);
666        // Append garbage bytes to simulate a truncated upload that ends mid-record.
667        // parse_cram_from_reader only reads the header, so this must still succeed.
668        cram_bytes.extend_from_slice(&[0u8; 100]);
669        let cursor = Cursor::new(&cram_bytes);
670        let query = parse_cram_from_reader(cursor).unwrap();
671        assert_eq!(query.contigs.len(), 1);
672        assert_eq!(query.contigs[0].name, "chr1");
673    }
674
675    #[test]
676    fn test_parse_cram_from_reader_empty() {
677        let cursor = Cursor::new(Vec::<u8>::new());
678        let result = parse_cram_from_reader(cursor);
679        assert!(result.is_err());
680    }
681}