Skip to main content

ref_solver/parsing/
dict.rs

1use std::path::Path;
2
3use crate::core::header::QueryHeader;
4use crate::parsing::sam::ParseError;
5
6/// Parse a Picard sequence dictionary (.dict) file
7///
8/// # Errors
9///
10/// Returns `ParseError::Io` if the file cannot be read, or other parse errors
11/// if the content is invalid.
12pub fn parse_dict_file(path: &Path) -> Result<QueryHeader, ParseError> {
13    let content = std::fs::read_to_string(path)?;
14    parse_dict_text(&content)
15}
16
17/// Parse dictionary from text
18///
19/// # Errors
20///
21/// Returns `ParseError::InvalidFormat` if the text is not valid dictionary format,
22/// or `ParseError::TooManyContigs` if the number of contigs exceeds the maximum.
23pub fn parse_dict_text(text: &str) -> Result<QueryHeader, ParseError> {
24    // .dict files are essentially SAM headers with only @HD and @SQ lines
25    crate::parsing::sam::parse_header_text(text)
26}
27
28#[cfg(test)]
29mod tests {
30    use super::*;
31
32    #[test]
33    fn test_parse_dict_text() {
34        let dict = r"@HD	VN:1.6
35@SQ	SN:chr1	LN:248956422	M5:6aef897c3d6ff0c78aff06ac189178dd	UR:file:///reference/hg38.fa
36@SQ	SN:chr2	LN:242193529	M5:f98db672eb0993dcfdabafe2a882905c	UR:file:///reference/hg38.fa
37";
38
39        let query = parse_dict_text(dict).unwrap();
40        assert_eq!(query.contigs.len(), 2);
41        assert_eq!(query.contigs[0].name, "chr1");
42        assert_eq!(
43            query.contigs[0].uri,
44            Some("file:///reference/hg38.fa".to_string())
45        );
46    }
47}