1use std::{
2 cmp::{max, min},
3 collections::HashMap,
4 io::{Read, Write},
5};
6
7use gen_core::{HashId, Strand, is_terminal};
8use gen_graph::{GraphNode, project_path};
9use gen_models::{
10 block_group::BlockGroup,
11 db::GraphConnection,
12 reference_alias::{ReferenceAlias, ReferenceAliasError},
13 sample::Sample,
14};
15use interavl::IntervalTree;
16use noodles::{
17 bed,
18 bed::feature::record_buf::{OtherFields, other_fields::Value},
19 core::Position,
20};
21use thiserror::Error;
22
23#[derive(Debug, Error)]
24pub enum BedError {
25 #[error("Couldn't translate BED entry: {0}")]
26 Io(#[from] std::io::Error),
27 #[error("Error loading reference aliases: {0}")]
28 ReferenceAliasError(#[from] ReferenceAliasError),
29}
30
31pub fn translate_bed<R, W>(
32 conn: &GraphConnection,
33 collection: &str,
34 sample: &str,
35 reader: R,
36 writer: &mut W,
37) -> Result<(), BedError>
38where
39 R: Read,
40 W: Write,
41{
42 let mut record = bed::Record::default();
43 let mut bed_reader = bed::io::reader::Builder::<3>.build_from_reader(reader);
44 let mut bed_writer = bed::io::Writer::<3, _>::new(writer);
45
46 let bgs = Sample::get_block_groups(conn, collection, sample);
47 let sample_bgs: HashMap<String, &BlockGroup> = HashMap::from_iter(
48 bgs.iter()
49 .map(|bg| (bg.name.clone(), bg))
50 .collect::<Vec<(String, &BlockGroup)>>(),
51 );
52
53 let references = sample_bgs.keys().cloned().collect::<Vec<String>>();
55 let references_by_alias = ReferenceAlias::get_references_by_alias(conn, references)?;
56
57 let mut paths: HashMap<HashId, IntervalTree<i64, (GraphNode, Strand)>> = HashMap::new();
58
59 while bed_reader.read_record(&mut record)? != 0 {
60 let ref_name = record.reference_sequence_name().to_string();
61 let ref_name = references_by_alias
62 .get(&ref_name)
63 .unwrap_or(&ref_name)
64 .to_string();
65 let start = record.feature_start().unwrap().get() as i64 - 1;
67 let end = record.feature_end().unwrap().unwrap().get() as i64;
68 if let Some(bg) = sample_bgs.get(&ref_name) {
69 let projection = paths.entry(bg.id).or_insert_with(|| {
70 let path = BlockGroup::get_current_path(conn, &bg.id);
71 let graph = BlockGroup::get_graph(conn, &bg.id);
72 let mut tree = IntervalTree::default();
73 let mut position: i64 = 0;
74 for (node, strand) in project_path(&graph, &path.blocks(conn)) {
75 if !is_terminal(node.node_id) {
76 let end_position = position + node.length();
77 tree.insert(position..end_position, (node, strand));
78 position = end_position;
79 }
80 }
81 tree
82 });
83 let range = start..end;
84 let values: Vec<_> = record.other_fields().iter().map(Value::from).collect();
85 let other_fields = OtherFields::from(values);
86 for (overlap, (node, _strand)) in projection.iter_overlaps(&range) {
87 let overlap_start = max(start, overlap.start) as usize;
88 let overlap_end = min(end, overlap.end) as usize;
89 let out_record = bed::feature::RecordBuf::<3>::builder()
90 .set_reference_sequence_name(format!("{nid}", nid = node.node_id))
91 .set_feature_start(Position::try_from(overlap_start + 1).unwrap())
92 .set_feature_end(Position::try_from(overlap_end).unwrap())
93 .set_other_fields(other_fields.clone())
94 .build();
95 bed_writer.write_feature_record(&out_record)?;
96 }
97 }
98 }
99 Ok(())
100}
101
102#[cfg(test)]
103mod tests {
104 use std::{fs::File, path::PathBuf};
105
106 use gen_models::{reference_alias::ReferenceAlias, sample::Sample};
107
108 use super::translate_bed;
109 use crate::test_helpers::{get_connection, setup_test_data};
110
111 #[test]
112 fn translates_coordinates_to_nodes() {
113 let conn = get_connection();
114 setup_test_data(&conn);
115
116 let bed_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("./fixtures/simple.bed");
117 let collection = "test".to_string();
118
119 let mut buffer = Vec::new();
120 translate_bed(
121 &conn,
122 &collection,
123 "foo",
124 File::open(bed_path.clone()).expect("should open fixture bed"),
125 &mut buffer,
126 )
127 .expect("should translate bed for sample foo");
128 let results = String::from_utf8(buffer).expect("translated output should be valid UTF-8");
129 assert_eq!(
130 results,
131 concat!(
132 "0cbb0b7e8171228e0ea97287f369c0099f0ccabc6a9950320650bc27bd974c8a\t1\t3\tabc123.1\t0\t-\t1\t10\t0,0,0\t3\t102,188,129,\t0,3508,4691,\n",
133 "0cbb0b7e8171228e0ea97287f369c0099f0ccabc6a9950320650bc27bd974c8a\t3\t4\tabc123.1\t0\t-\t1\t10\t0,0,0\t3\t102,188,129,\t0,3508,4691,\n",
134 "0cbb0b7e8171228e0ea97287f369c0099f0ccabc6a9950320650bc27bd974c8a\t4\t10\tabc123.1\t0\t-\t1\t10\t0,0,0\t3\t102,188,129,\t0,3508,4691,\n",
135 "0cbb0b7e8171228e0ea97287f369c0099f0ccabc6a9950320650bc27bd974c8a\t5\t8\txyz.1\t0\t-\t5\t8\t0,0,0\t1\t113,\t0,\n",
136 "0cbb0b7e8171228e0ea97287f369c0099f0ccabc6a9950320650bc27bd974c8a\t10\t16\txyz.2\t0\t+\t10\t16\t0,0,0\t2\t142,326,\t0,10710,\n",
137 "0cbb0b7e8171228e0ea97287f369c0099f0ccabc6a9950320650bc27bd974c8a\t14\t17\tfoo.1\t0\t+\t14\t23\t0,0,0\t2\t142,326,\t0,10710,\n",
138 "086ae30894dda8efdc19d4dfadd5e6e24af8066e9ee63e56abe897993bebd112\t17\t23\tfoo.1\t0\t+\t14\t23\t0,0,0\t2\t142,326,\t0,10710,\n",
139 )
140 );
141
142 let mut buffer = Vec::new();
143 translate_bed(
144 &conn,
145 &collection,
146 Sample::DEFAULT_NAME,
147 File::open(bed_path).expect("should open fixture bed"),
148 &mut buffer,
149 )
150 .expect("should translate bed for reference sample");
151 let results = String::from_utf8(buffer).expect("translated output should be valid UTF-8");
152 assert_eq!(
153 results,
154 concat!(
155 "0cbb0b7e8171228e0ea97287f369c0099f0ccabc6a9950320650bc27bd974c8a\t1\t10\tabc123.1\t0\t-\t1\t10\t0,0,0\t3\t102,188,129,\t0,3508,4691,\n",
156 "0cbb0b7e8171228e0ea97287f369c0099f0ccabc6a9950320650bc27bd974c8a\t5\t8\txyz.1\t0\t-\t5\t8\t0,0,0\t1\t113,\t0,\n",
157 "0cbb0b7e8171228e0ea97287f369c0099f0ccabc6a9950320650bc27bd974c8a\t10\t16\txyz.2\t0\t+\t10\t16\t0,0,0\t2\t142,326,\t0,10710,\n",
158 "0cbb0b7e8171228e0ea97287f369c0099f0ccabc6a9950320650bc27bd974c8a\t14\t17\tfoo.1\t0\t+\t14\t23\t0,0,0\t2\t142,326,\t0,10710,\n",
159 "086ae30894dda8efdc19d4dfadd5e6e24af8066e9ee63e56abe897993bebd112\t17\t23\tfoo.1\t0\t+\t14\t23\t0,0,0\t2\t142,326,\t0,10710,\n",
160 )
161 );
162 }
163
164 #[test]
165 fn translates_bed_using_reference_aliases() {
166 let conn = get_connection();
167 setup_test_data(&conn);
168
169 let bed_path = PathBuf::from(env!("CARGO_MANIFEST_DIR"))
170 .join("./fixtures/simple-with-reference-alias.bed");
171 let collection = "test".to_string();
172
173 ReferenceAlias::create(
175 &conn,
176 "alternate contig",
177 Some("m456".to_string()),
178 Some("m123".to_string()),
179 Some("m789".to_string()),
180 Some("chr1".to_string()),
181 None,
182 None,
183 )
184 .expect("should create reference alias");
185
186 let mut buffer = Vec::new();
187 translate_bed(
188 &conn,
189 &collection,
190 Sample::DEFAULT_NAME,
191 File::open(bed_path).expect("should open fixture bed"),
192 &mut buffer,
193 )
194 .expect("should translate bed for reference sample");
195
196 let results = String::from_utf8(buffer).expect("translated output should be valid UTF-8");
197 assert_eq!(
198 results,
199 concat!(
200 "0cbb0b7e8171228e0ea97287f369c0099f0ccabc6a9950320650bc27bd974c8a\t1\t10\tabc123.1\t0\t-\t1\t10\t0,0,0\t3\t102,188,129,\t0,3508,4691,\n",
201 "0cbb0b7e8171228e0ea97287f369c0099f0ccabc6a9950320650bc27bd974c8a\t5\t8\txyz.1\t0\t-\t5\t8\t0,0,0\t1\t113,\t0,\n",
202 "0cbb0b7e8171228e0ea97287f369c0099f0ccabc6a9950320650bc27bd974c8a\t10\t16\txyz.2\t0\t+\t10\t16\t0,0,0\t2\t142,326,\t0,10710,\n",
203 "0cbb0b7e8171228e0ea97287f369c0099f0ccabc6a9950320650bc27bd974c8a\t14\t17\tfoo.1\t0\t+\t14\t23\t0,0,0\t2\t142,326,\t0,10710,\n",
204 "086ae30894dda8efdc19d4dfadd5e6e24af8066e9ee63e56abe897993bebd112\t17\t23\tfoo.1\t0\t+\t14\t23\t0,0,0\t2\t142,326,\t0,10710,\n",
205 )
206 );
207 }
208}