1use std::{
2 collections::{HashMap, HashSet},
3 hash::Hash,
4 rc::Rc,
5};
6
7use gen_core::{
8 HashId, NO_CHROMOSOME_INDEX, NodeIntervalBlock, PATH_END_NODE_ID, PATH_START_NODE_ID,
9 PRESERVE_EDIT_SITE_CHROMOSOME_INDEX, PathBlock, Strand, calculate_hash, is_end_node,
10 is_start_node, is_terminal, traits::Capnp,
11};
12use gen_graph::{
13 GenGraph, GraphNode, all_intermediate_edges, all_reachable_nodes, all_simple_paths,
14 flatten_to_interval_tree,
15};
16use intervaltree::IntervalTree;
17use rusqlite::{Row, params, types::Value as SQLValue};
18use serde::{Deserialize, Serialize};
19
20use crate::{
21 accession::{Accession, AccessionEdge, AccessionEdgeData, AccessionPath},
22 block_group_edge::{AugmentedEdgeData, BlockGroupEdge, BlockGroupEdgeData},
23 db::GraphConnection,
24 edge::{Edge, EdgeData, GroupBlock},
25 errors::{ChangeError, QueryError},
26 gen_models_capnp::block_group,
27 node::Node,
28 path::{Path, PathData},
29 path_edge::PathEdge,
30 traits::*,
31};
32
33#[derive(Clone, Debug, Deserialize, Eq, Hash, Serialize, PartialEq)]
34pub struct BlockGroup {
35 pub id: HashId,
36 pub collection_name: String,
37 pub sample_name: Option<String>,
38 pub name: String,
39 pub created_on: i64,
40}
41
42impl<'a> Capnp<'a> for BlockGroup {
43 type Builder = block_group::Builder<'a>;
44 type Reader = block_group::Reader<'a>;
45
46 fn write_capnp(&self, builder: &mut Self::Builder) {
47 builder.set_id(&self.id.0).unwrap();
48 builder.set_collection_name(&self.collection_name);
49 match &self.sample_name {
50 None => {
51 builder.reborrow().get_sample_name().set_none(());
52 }
53 Some(n) => {
54 builder.reborrow().get_sample_name().set_some(n.clone());
55 }
56 }
57 builder.set_name(&self.name);
58 builder.set_created_on(self.created_on);
59 }
60
61 fn read_capnp(reader: Self::Reader) -> Self {
62 let id = reader
63 .get_id()
64 .unwrap()
65 .as_slice()
66 .unwrap()
67 .try_into()
68 .unwrap();
69 let collection_name = reader.get_collection_name().unwrap().to_string().unwrap();
70 let sample_name: Option<String> = match reader.get_sample_name().which().unwrap() {
71 block_group::sample_name::None(()) => None,
72 block_group::sample_name::Some(n) => Some(n.unwrap().to_string().unwrap()),
73 };
74 let name = reader.get_name().unwrap().to_string().unwrap();
75 let created_on = reader.get_created_on();
76
77 BlockGroup {
78 id,
79 collection_name,
80 sample_name,
81 name,
82 created_on,
83 }
84 }
85}
86
87#[derive(Clone, Debug, Eq, Hash, PartialEq)]
88pub struct BlockGroupData<'a> {
89 pub collection_name: &'a str,
90 pub sample_name: Option<&'a str>,
91 pub name: String,
92}
93
94#[derive(Clone, Debug)]
95pub struct PathChange {
96 pub block_group_id: HashId,
97 pub path: Path,
98 pub path_accession: Option<String>,
99 pub start: i64,
100 pub end: i64,
101 pub block: PathBlock,
102 pub chromosome_index: i64,
103 pub phased: i64,
104 pub preserve_edge: bool,
105}
106
107pub struct PathCache<'a> {
108 pub cache: HashMap<PathData, Path>,
109 pub intervaltree_cache: HashMap<Path, IntervalTree<i64, NodeIntervalBlock>>,
110 pub conn: &'a GraphConnection,
111}
112
113impl<'a> PathCache<'a> {
114 pub fn new(conn: &'a GraphConnection) -> PathCache<'a> {
115 PathCache {
116 cache: HashMap::<PathData, Path>::new(),
117 intervaltree_cache: HashMap::<Path, IntervalTree<i64, NodeIntervalBlock>>::new(),
118 conn,
119 }
120 }
121
122 pub fn lookup(path_cache: &mut PathCache, block_group_id: &HashId, name: String) -> Path {
123 let path_key = PathData {
124 name: name.clone(),
125 block_group_id: *block_group_id,
126 };
127 let path_lookup = path_cache.cache.get(&path_key);
128 if let Some(path) = path_lookup {
129 path.clone()
130 } else {
131 let conn = path_cache.conn;
132 let new_path = Path::query(
133 conn,
134 "select * from paths where block_group_id = ?1 AND name = ?2",
135 params![block_group_id, name],
136 )[0]
137 .clone();
138
139 path_cache.cache.insert(path_key, new_path.clone());
140 let tree = new_path.intervaltree(conn);
141 path_cache.intervaltree_cache.insert(new_path.clone(), tree);
142 new_path
143 }
144 }
145
146 pub fn get_intervaltree<'b>(
147 path_cache: &'b PathCache<'_>,
148 path: &Path,
149 ) -> Option<&'b IntervalTree<i64, NodeIntervalBlock>> {
150 path_cache.intervaltree_cache.get(path)
151 }
152}
153
154impl BlockGroup {
155 pub fn create(
156 conn: &GraphConnection,
157 collection_name: &str,
158 sample_name: Option<&str>,
159 name: &str,
160 ) -> BlockGroup {
161 let hash = HashId(calculate_hash(&format!(
162 "{collection_name}:{sample_name:?}:{name}"
163 )));
164 let timestamp = chrono::Utc::now().timestamp_nanos_opt().unwrap();
165 let query = "INSERT INTO block_groups (id, collection_name, sample_name, name, created_on) VALUES (?1, ?2, ?3, ?4, ?5);";
166 let mut stmt = conn.prepare(query).unwrap();
167 let bg = BlockGroup {
168 id: hash,
169 collection_name: collection_name.to_string(),
170 sample_name: sample_name.map(|s| s.to_string()),
171 name: name.to_string(),
172 created_on: timestamp,
173 };
174 match stmt.execute((hash, collection_name, sample_name, name, timestamp)) {
175 Ok(_) => bg,
176 Err(rusqlite::Error::SqliteFailure(err, _details)) => {
177 if err.code == rusqlite::ErrorCode::ConstraintViolation {
178 bg
179 } else {
180 panic!("something bad happened querying the database")
181 }
182 }
183 Err(_) => {
184 panic!("something bad happened querying the database")
185 }
186 }
187 }
188
189 pub fn delete(
190 conn: &GraphConnection,
191 collection_name: &str,
192 sample_name: Option<&str>,
193 name: &str,
194 ) {
195 if let Some(n) = sample_name {
196 let query = "delete from block_groups where collection_name = ?1 and sample_name = ?2 and name = ?3;";
197 conn.execute(
198 query,
199 params![collection_name.to_string(), n.to_string(), name.to_string()],
200 )
201 .unwrap();
202 } else {
203 let query = "delete from block_groups where collection_name = ?1 and sample_name is null and name = ?2;";
204 conn.execute(
205 query,
206 params![collection_name.to_string(), name.to_string()],
207 )
208 .unwrap();
209 }
210 }
211
212 pub fn get_by_id(conn: &GraphConnection, id: &HashId) -> BlockGroup {
213 let query = "SELECT * FROM block_groups WHERE id = ?1";
214 let mut stmt = conn.prepare(query).unwrap();
215 match stmt.query_row([id], |row| Ok(Self::process_row(row))) {
216 Ok(res) => res,
217 Err(rusqlite::Error::QueryReturnedNoRows) => panic!("No block group with id {id}"),
218 Err(_) => {
219 panic!("something bad happened querying the database")
220 }
221 }
222 }
223
224 pub fn duplicate(&self, conn: &GraphConnection, target_block_group_id: &HashId) {
225 let existing_paths = Path::query(
226 conn,
227 "SELECT * from paths where block_group_id = ?1;",
228 params![self.id],
229 );
230
231 let augmented_edges = BlockGroupEdge::edges_for_block_group(conn, &self.id);
232 let edge_ids = augmented_edges
233 .iter()
234 .map(|edge| edge.edge.id)
235 .collect::<Vec<_>>();
236 let new_block_group_edges = edge_ids
237 .iter()
238 .enumerate()
239 .map(|(i, edge_id)| BlockGroupEdgeData {
240 block_group_id: *target_block_group_id,
241 edge_id: *edge_id,
242 chromosome_index: augmented_edges[i].chromosome_index,
243 phased: augmented_edges[i].phased,
244 })
245 .collect::<Vec<_>>();
246 BlockGroupEdge::bulk_create(conn, &new_block_group_edges);
247
248 let mut path_map = HashMap::new();
249
250 for path in existing_paths.iter() {
251 let edge_ids = PathEdge::edges_for_path(conn, &path.id)
252 .into_iter()
253 .map(|edge| edge.id)
254 .collect::<Vec<_>>();
255 let new_path = Path::create(conn, &path.name, target_block_group_id, &edge_ids);
256 path_map.insert(&path.id, new_path.id);
257 }
258
259 for accession in Accession::query(
260 conn,
261 "select * from accessions where path_id IN rarray(?1);",
262 rusqlite::params!(Rc::new(
263 existing_paths
264 .iter()
265 .map(|path| SQLValue::from(path.id))
266 .collect::<Vec<SQLValue>>()
267 )),
268 ) {
269 let edges = AccessionPath::query(
270 conn,
271 "Select * from accession_paths where accession_id = ?1 order by index_in_path ASC;",
272 rusqlite::params!(SQLValue::from(accession.id)),
273 );
274 let new_path_id = &path_map[&accession.path_id];
275 let obj = Accession::create(
276 conn,
277 &accession.name,
278 new_path_id,
279 accession.parent_accession_id.as_ref(),
280 )
281 .expect("Unable to create accession in clone.");
282 AccessionPath::create(
283 conn,
284 &obj.id,
285 &edges.iter().map(|ap| ap.edge_id).collect::<Vec<_>>(),
286 );
287 }
288 }
289
290 pub fn get_or_create_sample_block_group(
291 conn: &GraphConnection,
292 collection_name: &str,
293 sample_name: &str,
294 group_name: &str,
295 parent_sample: Option<&str>,
296 ) -> Result<HashId, QueryError> {
297 let binding = BlockGroup::query(
298 conn,
299 "select * from block_groups where collection_name = ?1 AND sample_name = ?2 AND name = ?3",
300 params![collection_name, sample_name, group_name],
301 );
302 let initial_query_result = binding.first();
303
304 if let Some(block_group) = initial_query_result {
305 return Ok(block_group.id);
306 }
307
308 let block_groups = if let Some(parent_sample_name) = parent_sample {
309 BlockGroup::query(
311 conn,
312 "select * from block_groups where collection_name = ?1 AND sample_name = ?2 AND name = ?3",
313 params![collection_name, parent_sample_name, group_name],
314 )
315 } else {
316 BlockGroup::query(
317 conn,
318 "select * from block_groups where collection_name = ?1 AND sample_name IS null AND name = ?2",
319 params![collection_name, group_name],
320 )
321 };
322
323 if let Some(parent_block_group) = block_groups.first() {
324 let new_block_group =
325 BlockGroup::create(conn, collection_name, Some(sample_name), group_name);
326
327 parent_block_group.duplicate(conn, &new_block_group.id);
329
330 Ok(new_block_group.id)
331 } else {
332 let error_message = if let Some(parent_sample_name) = parent_sample {
333 format!(
334 "Block group not found for either new sample ({sample_name}) or parent sample ({parent_sample_name})"
335 )
336 } else {
337 format!(
338 "Block group not found for new sample ({sample_name}) and default parent sample"
339 )
340 };
341 Err(QueryError::ResultsNotFound(error_message))
342 }
343 }
344
345 pub fn get_id(collection_name: &str, sample_name: Option<&str>, group_name: &str) -> HashId {
346 HashId(calculate_hash(&format!(
347 "{collection_name}:{sample_name:?}:{group_name}"
348 )))
349 }
350
351 pub fn get_graph(conn: &GraphConnection, block_group_id: &HashId) -> GenGraph {
352 let edges = BlockGroupEdge::edges_for_block_group(conn, block_group_id);
353 let blocks = Edge::blocks_from_edges(conn, &edges);
354 let (graph, _) = Edge::build_graph(&edges, &blocks);
355 graph
356 }
357
358 pub fn prune_graph(graph: &mut GenGraph) {
359 let mut root_nodes = HashSet::new();
362 let mut edges_to_remove: Vec<(GraphNode, GraphNode)> = vec![];
363 for node in graph.nodes() {
364 if node.node_id == PATH_START_NODE_ID {
365 root_nodes.insert(node);
366 }
367 let mut edges_by_ci: HashMap<i64, (GraphNode, GraphNode, i64)> = HashMap::new();
368 for (source_node, target_node, edge_weights) in graph.edges(node) {
369 for edge_weight in edge_weights {
370 if edge_weight.chromosome_index == NO_CHROMOSOME_INDEX {
371 continue;
372 }
373 if edge_weight.chromosome_index == PRESERVE_EDIT_SITE_CHROMOSOME_INDEX {
374 edges_to_remove.push((source_node, target_node));
375 continue;
376 }
377 edges_by_ci
378 .entry(edge_weight.chromosome_index)
379 .and_modify(|(source, target, created_on)| {
380 if edge_weight.created_on > *created_on {
381 edges_to_remove.push((*source, *target));
382 *source = source_node;
383 *target = target_node;
384 *created_on = edge_weight.created_on;
385 } else {
386 edges_to_remove.push((source_node, target_node));
387 }
388 })
389 .or_insert((source_node, target_node, edge_weight.created_on));
390 }
391 }
392 }
393
394 for (source, target) in edges_to_remove.iter() {
395 graph.remove_edge(*source, *target);
396 }
397
398 let reachable_nodes = all_reachable_nodes(&*graph, &Vec::from_iter(root_nodes));
399 let mut to_remove = vec![];
400 for node in graph.nodes() {
401 if !reachable_nodes.contains(&node) {
402 to_remove.push(node);
403 }
404 }
405 for node in to_remove {
406 graph.remove_node(node);
407 }
408 }
409
410 pub fn get_all_sequences(
411 conn: &GraphConnection,
412 block_group_id: &HashId,
413 _prune: bool,
414 ) -> HashSet<String> {
415 let edges = BlockGroupEdge::edges_for_block_group(conn, block_group_id)
416 .into_iter()
417 .filter(|edge| edge.chromosome_index != PRESERVE_EDIT_SITE_CHROMOSOME_INDEX)
418 .collect::<Vec<_>>();
419 let blocks = Edge::blocks_from_edges(conn, &edges);
420
421 let (mut graph, _) = Edge::build_graph(&edges, &blocks);
422 BlockGroup::prune_graph(&mut graph);
423
424 let mut start_nodes = vec![];
425 let mut end_nodes = vec![];
426 for node in graph.nodes() {
427 if is_start_node(node.node_id) {
428 start_nodes.push(node);
429 } else if is_end_node(node.node_id) {
430 end_nodes.push(node);
431 }
432 }
433 let blocks_by_id = blocks
434 .clone()
435 .into_iter()
436 .map(|block| (block.id, block))
437 .collect::<HashMap<i64, GroupBlock>>();
438 let mut sequences = HashSet::<String>::new();
439
440 for start_node in start_nodes {
441 for end_node in &end_nodes {
442 if start_node == *end_node {
444 let block = blocks_by_id.get(&start_node.block_id).unwrap();
445 if block.node_id != PATH_START_NODE_ID && block.node_id != PATH_END_NODE_ID {
446 sequences.insert(block.sequence());
447 }
448 } else {
449 for path in all_simple_paths(&graph, start_node, *end_node) {
450 let mut current_sequence = "".to_string();
451 for node in path {
452 let block = blocks_by_id.get(&node.block_id).unwrap();
453 let block_sequence = block.sequence();
454 current_sequence.push_str(&block_sequence);
455 }
456 sequences.insert(current_sequence);
457 }
458 }
459 }
460 }
461
462 sequences
463 }
464
465 pub fn add_accession(
466 conn: &GraphConnection,
467 path: &Path,
468 name: &str,
469 start: i64,
470 end: i64,
471 cache: &mut PathCache,
472 ) -> Accession {
473 let tree = PathCache::get_intervaltree(cache, path).unwrap();
474 let start_blocks: Vec<&NodeIntervalBlock> =
475 tree.query_point(start).map(|x| &x.value).collect();
476 assert_eq!(start_blocks.len(), 1);
477 let start_block = start_blocks[0];
478 let end_blocks: Vec<&NodeIntervalBlock> = tree.query_point(end).map(|x| &x.value).collect();
479 assert_eq!(end_blocks.len(), 1);
480 let end_block = end_blocks[0];
481 let start_edge = AccessionEdgeData {
484 source_node_id: PATH_START_NODE_ID,
485 source_coordinate: -1,
486 source_strand: Strand::Forward,
487 target_node_id: start_block.node_id,
488 target_coordinate: start - start_block.start + start_block.sequence_start,
489 target_strand: Strand::Forward,
490 chromosome_index: 0,
491 };
492 let end_edge = AccessionEdgeData {
493 source_node_id: end_block.node_id,
494 source_coordinate: end - end_block.start + end_block.sequence_start,
495 source_strand: Strand::Forward,
496 target_node_id: PATH_END_NODE_ID,
497 target_coordinate: -1,
498 target_strand: Strand::Forward,
499 chromosome_index: 0,
500 };
501 let accession =
502 Accession::create(conn, name, &path.id, None).expect("Unable to create accession.");
503 let mut path_edges = vec![start_edge];
504 if start_block == end_block {
505 path_edges.push(end_edge);
506 } else {
507 let mut in_range = false;
508 let path_blocks: Vec<&NodeIntervalBlock> = tree
509 .iter_sorted()
510 .map(|x| &x.value)
511 .filter(|block| {
512 if block.block_id == start_block.block_id {
513 in_range = true;
514 } else if block.block_id == end_block.block_id {
515 in_range = false;
516 return true;
517 }
518 in_range
519 })
520 .collect::<Vec<_>>();
521 for (block, next_block) in path_blocks.iter().zip(path_blocks[1..].iter()) {
523 path_edges.push(AccessionEdgeData {
524 source_node_id: block.node_id,
525 source_coordinate: block.sequence_end,
526 source_strand: block.strand,
527 target_node_id: next_block.node_id,
528 target_coordinate: next_block.sequence_start,
529 target_strand: next_block.strand,
530 chromosome_index: 0,
531 })
532 }
533 path_edges.push(end_edge);
534 }
535 AccessionPath::create(
536 conn,
537 &accession.id,
538 &AccessionEdge::bulk_create(conn, &path_edges),
539 );
540 accession
541 }
542
543 pub fn insert_changes(
544 conn: &GraphConnection,
545 changes: &[PathChange],
546 cache: &mut PathCache,
547 modify_blockgroup: bool,
548 ) -> Result<(), ChangeError> {
549 let mut new_augmented_edges_by_block_group =
550 HashMap::<&HashId, Vec<AugmentedEdgeData>>::new();
551 let mut new_accession_edges = HashMap::new();
552 let mut tree_map = HashMap::new();
553 for change in changes {
554 let tree = if modify_blockgroup {
555 tree_map.entry(change.block_group_id).or_insert_with(|| {
556 BlockGroup::intervaltree_for(conn, &change.block_group_id, true)
557 })
558 } else {
559 PathCache::get_intervaltree(cache, &change.path).unwrap()
560 };
561 let new_augmented_edges = BlockGroup::set_up_new_edges(change, tree)?;
562 new_augmented_edges_by_block_group
563 .entry(&change.block_group_id)
564 .and_modify(|new_edge_data| new_edge_data.extend(new_augmented_edges.clone()))
565 .or_insert_with(|| new_augmented_edges.clone());
566 if let Some(accession) = &change.path_accession {
567 new_accession_edges
568 .entry((&change.path, accession))
569 .and_modify(|new_edge_data: &mut Vec<AugmentedEdgeData>| {
570 new_edge_data.extend(new_augmented_edges.clone())
571 })
572 .or_insert_with(|| new_augmented_edges.clone());
573 }
574 }
575
576 let mut edge_data_map = HashMap::new();
577
578 for (block_group_id, new_augmented_edges) in new_augmented_edges_by_block_group {
579 let new_edges = new_augmented_edges
580 .iter()
581 .map(|augmented_edge| augmented_edge.edge_data)
582 .collect::<Vec<_>>();
583 let edge_ids = Edge::bulk_create(conn, &new_edges);
584 for (i, edge_data) in new_edges.iter().enumerate() {
585 edge_data_map.insert(*edge_data, edge_ids[i]);
586 }
587 let new_block_group_edges = edge_ids
588 .iter()
589 .enumerate()
590 .map(|(i, edge_id)| BlockGroupEdgeData {
591 block_group_id: *block_group_id,
592 edge_id: *edge_id,
593 chromosome_index: new_augmented_edges[i].chromosome_index,
594 phased: new_augmented_edges[i].phased,
595 })
596 .collect::<Vec<_>>();
597 BlockGroupEdge::bulk_create(conn, &new_block_group_edges);
598 }
599
600 for ((path, accession_name), path_edges) in new_accession_edges {
601 match Accession::get(
602 conn,
603 "select * from accessions where name = ?1 AND path_id = ?2",
604 params![accession_name, path.id],
605 ) {
606 Ok(_) => {
607 println!(
608 "accession already exists, consider a better matching algorithm to determine if this is an error."
609 );
610 }
611 Err(_) => {
612 let acc_edges = AccessionEdge::bulk_create(
613 conn,
614 &path_edges
615 .iter()
616 .map(AccessionEdgeData::from)
617 .collect::<Vec<_>>(),
618 );
619 let acc = Accession::create(conn, accession_name, &path.id, None)
620 .expect("Accession could not be created.");
621 AccessionPath::create(conn, &acc.id, &acc_edges);
622 }
623 }
624 }
625 Ok(())
626 }
627
628 #[allow(clippy::ptr_arg)]
629 #[allow(clippy::needless_late_init)]
630 pub fn insert_change(
631 conn: &GraphConnection,
632 change: &PathChange,
633 tree: &IntervalTree<i64, NodeIntervalBlock>,
634 ) -> Result<(), ChangeError> {
635 let new_augmented_edges = BlockGroup::set_up_new_edges(change, tree)?;
636 let new_edges = new_augmented_edges
637 .iter()
638 .map(|augmented_edge| augmented_edge.edge_data)
639 .collect::<Vec<_>>();
640 let edge_ids = Edge::bulk_create(conn, &new_edges);
641 let new_block_group_edges = edge_ids
642 .iter()
643 .enumerate()
644 .map(|(i, edge_id)| BlockGroupEdgeData {
645 block_group_id: change.block_group_id,
646 edge_id: *edge_id,
647 chromosome_index: new_augmented_edges[i].chromosome_index,
648 phased: new_augmented_edges[i].phased,
649 })
650 .collect::<Vec<_>>();
651 BlockGroupEdge::bulk_create(conn, &new_block_group_edges);
652 Ok(())
653 }
654
655 fn set_up_new_edges(
656 change: &PathChange,
657 tree: &IntervalTree<i64, NodeIntervalBlock>,
658 ) -> Result<Vec<AugmentedEdgeData>, ChangeError> {
659 let start_blocks: Vec<&NodeIntervalBlock> =
660 tree.query_point(change.start).map(|x| &x.value).collect();
661 assert_eq!(start_blocks.len(), 1);
662 let previous_start_blocks: Vec<&NodeIntervalBlock> = tree
665 .query_point(change.start - 1)
666 .map(|x| &x.value)
667 .collect();
668 assert_eq!(previous_start_blocks.len(), 1);
669 let start_block = if start_blocks[0].start == change.start {
670 previous_start_blocks[0]
673 } else {
674 start_blocks[0]
675 };
676
677 if is_start_node(start_block.node_id) && change.start < start_block.end {
684 return Err(ChangeError::OutOfBounds(format!(
685 "Invalid change specified. Coordinate {pos} is before start of path range ({path_pos}).",
686 pos = change.start,
687 path_pos = start_block.end
688 )));
689 }
690
691 let end_blocks: Vec<&NodeIntervalBlock> =
692 tree.query_point(change.end).map(|x| &x.value).collect();
693 assert_eq!(end_blocks.len(), 1);
694 let end_block = end_blocks[0];
695
696 if is_end_node(end_block.node_id) && change.end > end_block.start {
697 return Err(ChangeError::OutOfBounds(format!(
698 "Invalid change specified. Coordinate {pos} is before start of path range ({path_pos}).",
699 pos = change.end,
700 path_pos = end_block.start
701 )));
702 }
703
704 let mut new_edges = vec![];
705
706 if change.block.sequence_start == change.block.sequence_end {
707 let source_coordinate = change.start - start_block.start + start_block.sequence_start;
709 let target_coordinate = change.end - end_block.start + end_block.sequence_start;
710 let mut aug_edges = vec![];
711 let new_edge = EdgeData {
712 source_node_id: start_block.node_id,
713 source_coordinate,
714 source_strand: Strand::Forward,
715 target_node_id: end_block.node_id,
716 target_coordinate,
717 target_strand: Strand::Forward,
718 };
719 aug_edges.push(AugmentedEdgeData {
720 edge_data: new_edge,
721 chromosome_index: change.chromosome_index,
722 phased: change.phased,
723 });
724
725 if change.start == 0 {
729 let target_coordinate = change.end - end_block.start + end_block.sequence_start;
730 let new_beginning_edge = EdgeData {
731 source_node_id: PATH_START_NODE_ID,
732 source_coordinate: 0,
733 source_strand: Strand::Forward,
734 target_node_id: end_block.node_id,
735 target_coordinate,
736 target_strand: Strand::Forward,
737 };
738 aug_edges.push(AugmentedEdgeData {
739 edge_data: new_beginning_edge,
740 chromosome_index: change.chromosome_index,
741 phased: change.phased,
742 });
743 if !is_terminal(end_block.node_id) {
744 new_edges.push(AugmentedEdgeData {
745 edge_data: EdgeData {
746 source_node_id: end_block.node_id,
747 source_coordinate: target_coordinate,
748 source_strand: Strand::Forward,
749 target_node_id: end_block.node_id,
750 target_coordinate,
751 target_strand: Strand::Forward,
752 },
753 chromosome_index: if change.preserve_edge {
754 0
755 } else {
756 PRESERVE_EDIT_SITE_CHROMOSOME_INDEX
757 },
758 phased: 0,
759 });
760 }
761 } else {
762 if !is_terminal(start_block.node_id) {
763 new_edges.push(AugmentedEdgeData {
764 edge_data: EdgeData {
765 source_node_id: start_block.node_id,
766 source_coordinate,
767 source_strand: Strand::Forward,
768 target_node_id: start_block.node_id,
769 target_coordinate: source_coordinate,
770 target_strand: Strand::Forward,
771 },
772 chromosome_index: if change.preserve_edge {
773 0
774 } else {
775 PRESERVE_EDIT_SITE_CHROMOSOME_INDEX
776 },
777 phased: 0,
778 });
779 };
780 if !is_terminal(end_block.node_id) {
781 new_edges.push(AugmentedEdgeData {
782 edge_data: EdgeData {
783 source_node_id: end_block.node_id,
784 source_coordinate: target_coordinate,
785 source_strand: Strand::Forward,
786 target_node_id: end_block.node_id,
787 target_coordinate,
788 target_strand: Strand::Forward,
789 },
790 chromosome_index: if change.preserve_edge {
791 0
792 } else {
793 PRESERVE_EDIT_SITE_CHROMOSOME_INDEX
794 },
795 phased: 0,
796 });
797 }
798 }
799 new_edges.extend(aug_edges);
800 } else {
804 let insertion_start_coordinate =
806 change.start - start_block.start + start_block.sequence_start;
807 let new_start_edge = EdgeData {
808 source_node_id: start_block.node_id,
809 source_coordinate: insertion_start_coordinate,
810 source_strand: Strand::Forward,
811 target_node_id: change.block.node_id,
812 target_coordinate: change.block.sequence_start,
813 target_strand: Strand::Forward,
814 };
815 let new_augmented_start_edge = AugmentedEdgeData {
816 edge_data: new_start_edge,
817 chromosome_index: change.chromosome_index,
818 phased: change.phased,
819 };
820 let insertion_end_coordinate = change.end - end_block.start + end_block.sequence_start;
821 let new_end_edge = EdgeData {
822 source_node_id: change.block.node_id,
823 source_coordinate: change.block.sequence_end,
824 source_strand: Strand::Forward,
825 target_node_id: end_block.node_id,
826 target_coordinate: insertion_end_coordinate,
827 target_strand: Strand::Forward,
828 };
829 let new_augmented_end_edge = AugmentedEdgeData {
830 edge_data: new_end_edge,
831 chromosome_index: change.chromosome_index,
832 phased: change.phased,
833 };
834
835 if change.start == 0 {
836 new_edges.push(AugmentedEdgeData {
837 edge_data: EdgeData {
838 source_node_id: PATH_START_NODE_ID,
839 source_coordinate: 0,
840 source_strand: Strand::Forward,
841 target_node_id: change.block.node_id,
842 target_coordinate: change.block.sequence_start,
843 target_strand: Strand::Forward,
844 },
845 chromosome_index: change.chromosome_index,
846 phased: 0,
847 });
848 }
849
850 if !is_terminal(start_block.node_id) {
851 new_edges.push(AugmentedEdgeData {
852 edge_data: EdgeData {
853 source_node_id: start_block.node_id,
854 source_coordinate: insertion_start_coordinate,
855 source_strand: Strand::Forward,
856 target_node_id: start_block.node_id,
857 target_coordinate: insertion_start_coordinate,
858 target_strand: Strand::Forward,
859 },
860 chromosome_index: if change.preserve_edge {
861 0
862 } else {
863 PRESERVE_EDIT_SITE_CHROMOSOME_INDEX
864 },
865 phased: 0,
866 });
867 }
868 if !is_terminal(end_block.node_id) {
869 new_edges.push(AugmentedEdgeData {
870 edge_data: EdgeData {
871 source_node_id: end_block.node_id,
872 source_coordinate: insertion_end_coordinate,
873 source_strand: Strand::Forward,
874 target_node_id: end_block.node_id,
875 target_coordinate: insertion_end_coordinate,
876 target_strand: Strand::Forward,
877 },
878 chromosome_index: if change.preserve_edge {
879 0
880 } else {
881 PRESERVE_EDIT_SITE_CHROMOSOME_INDEX
882 },
883 phased: 0,
884 });
885 }
886
887 new_edges.push(new_augmented_start_edge);
888 new_edges.push(new_augmented_end_edge);
889 }
890
891 Ok(new_edges)
892 }
893
894 pub fn intervaltree_for(
895 conn: &GraphConnection,
896 block_group_id: &HashId,
897 remove_ambiguous_positions: bool,
898 ) -> IntervalTree<i64, NodeIntervalBlock> {
899 let mut graph = BlockGroup::get_graph(conn, block_group_id);
901 BlockGroup::prune_graph(&mut graph);
902 flatten_to_interval_tree(&graph, remove_ambiguous_positions)
903 }
904
905 pub fn get_current_path(conn: &GraphConnection, block_group_id: &HashId) -> Path {
906 let paths = Path::query(
907 conn,
908 "SELECT * FROM paths WHERE block_group_id = ?1 ORDER BY created_on DESC",
909 params![block_group_id],
910 );
911 paths[0].clone()
912 }
913
914 pub fn get_path_by_name(
915 conn: &GraphConnection,
916 block_group_id: &HashId,
917 path_name: &str,
918 ) -> Option<Path> {
919 let paths = Path::query(
920 conn,
921 "SELECT * FROM paths WHERE block_group_id = ?1 ORDER BY created_on DESC",
922 params![block_group_id],
923 );
924
925 for path in &paths {
926 if path.name == path_name {
927 return Some(path.clone());
928 }
929 }
930
931 None
932 }
933
934 #[allow(clippy::too_many_arguments)]
935 pub fn derive_subgraph(
936 conn: &GraphConnection,
937 collection_name: &str,
938 child_sample_name: &str,
939 source_block_group_id: &HashId,
940 start_block: &NodeIntervalBlock,
941 end_block: &NodeIntervalBlock,
942 start_node_coordinate: i64,
943 end_node_coordinate: i64,
944 target_block_group_id: &HashId,
945 ) -> HashMap<HashId, HashId> {
946 let current_graph = BlockGroup::get_graph(conn, source_block_group_id);
947 let start_node = current_graph
948 .nodes()
949 .find(|node| {
950 node.node_id == start_block.node_id
951 && node.sequence_start <= start_node_coordinate
952 && node.sequence_end >= start_node_coordinate
953 })
954 .unwrap();
955 let end_node = current_graph
956 .nodes()
957 .find(|node| {
958 node.node_id == end_block.node_id
959 && node.sequence_start <= end_node_coordinate
960 && node.sequence_end >= end_node_coordinate
961 })
962 .unwrap();
963 let subgraph_edges = all_intermediate_edges(¤t_graph, start_node, end_node);
964
965 let subgraph_edge_ids = subgraph_edges
967 .iter()
968 .map(|(_to, _from, edge_info)| edge_info[0].edge_id)
969 .collect::<Vec<_>>();
970 let source_edges = Edge::query_by_ids(conn, &subgraph_edge_ids);
971
972 let mut old_node_ids = HashSet::new();
979 for source_edge in &source_edges {
980 old_node_ids.insert(source_edge.source_node_id);
981 old_node_ids.insert(source_edge.target_node_id);
982 }
983
984 old_node_ids.insert(start_block.node_id);
985 old_node_ids.insert(end_block.node_id);
986
987 let old_nodes = Node::query_by_ids(conn, &old_node_ids);
988 let old_nodes_by_id = old_nodes
989 .iter()
990 .map(|node| (node.id, node))
991 .collect::<HashMap<_, _>>();
992
993 let mut new_node_ids_by_old = HashMap::new();
994 let old_start_node = old_nodes_by_id.get(&start_block.node_id).unwrap();
995 let new_start_node_hash = HashId(calculate_hash(&format!(
996 "{}.{}.{}.bg-{}",
997 collection_name, child_sample_name, old_start_node.id, target_block_group_id
998 )));
999 new_node_ids_by_old.insert(
1000 old_start_node.id,
1001 Node::create(conn, &old_start_node.sequence_hash, &new_start_node_hash),
1002 );
1003 let old_end_node = old_nodes_by_id.get(&end_block.node_id).unwrap();
1004 let new_end_node_hash = HashId(calculate_hash(&format!(
1005 "{}.{}.{}.bg-{}",
1006 collection_name, child_sample_name, old_end_node.id, target_block_group_id
1007 )));
1008 new_node_ids_by_old.insert(
1009 old_end_node.id,
1010 Node::create(conn, &old_end_node.sequence_hash, &new_end_node_hash),
1011 );
1012
1013 let mut new_edges = vec![];
1014 for source_edge in &source_edges {
1015 let new_source_node_id = *new_node_ids_by_old
1016 .entry(source_edge.source_node_id)
1017 .or_insert_with(|| {
1018 let old_source_node = old_nodes_by_id.get(&source_edge.source_node_id).unwrap();
1019 let new_source_node_hash = HashId(calculate_hash(&format!(
1020 "{}.{}.{}.bg-{}",
1021 collection_name,
1022 child_sample_name,
1023 old_source_node.id,
1024 target_block_group_id
1025 )));
1026 Node::create(conn, &old_source_node.sequence_hash, &new_source_node_hash)
1027 });
1028 let new_target_node_id = *new_node_ids_by_old
1029 .entry(source_edge.target_node_id)
1030 .or_insert_with(|| {
1031 let old_target_node = old_nodes_by_id.get(&source_edge.target_node_id).unwrap();
1032 let new_target_node_hash = HashId(calculate_hash(&format!(
1033 "{}.{}.{}.bg-{}",
1034 collection_name,
1035 child_sample_name,
1036 old_target_node.id,
1037 target_block_group_id
1038 )));
1039 Node::create(conn, &old_target_node.sequence_hash, &new_target_node_hash)
1040 });
1041
1042 new_edges.push(EdgeData {
1043 source_node_id: new_source_node_id,
1044 source_coordinate: source_edge.source_coordinate,
1045 source_strand: source_edge.source_strand,
1046 target_node_id: new_target_node_id,
1047 target_coordinate: source_edge.target_coordinate,
1048 target_strand: source_edge.target_strand,
1049 });
1050 }
1051
1052 let new_edge_ids = Edge::bulk_create(conn, &new_edges);
1053 let new_edge_ids_by_old = source_edges
1054 .iter()
1055 .zip(new_edge_ids.iter())
1056 .map(|(source_edge, new_edge_id)| (&source_edge.id, new_edge_id))
1057 .collect::<HashMap<_, _>>();
1058
1059 let source_block_group_edges = BlockGroupEdge::specific_edges_for_block_group(
1060 conn,
1061 source_block_group_id,
1062 &subgraph_edge_ids,
1063 );
1064 let source_edge_ids = source_edges
1065 .iter()
1066 .map(|edge| &edge.id)
1067 .collect::<HashSet<_>>();
1068 let source_block_group_edges = source_block_group_edges
1069 .iter()
1070 .filter(|block_group_edge| source_edge_ids.contains(&block_group_edge.edge.id))
1071 .collect::<Vec<_>>();
1072 let source_block_group_edges_by_edge_id = source_block_group_edges
1073 .iter()
1074 .map(|block_group_edge| (&block_group_edge.edge.id, block_group_edge))
1075 .collect::<HashMap<_, _>>();
1076
1077 let subgraph_edge_inputs = source_block_group_edges
1078 .iter()
1079 .map(|edge| {
1080 let block_group_edge = source_block_group_edges_by_edge_id
1081 .get(&edge.edge.id)
1082 .unwrap();
1083 let new_edge_id = new_edge_ids_by_old.get(&edge.edge.id).unwrap();
1084 BlockGroupEdgeData {
1085 block_group_id: *target_block_group_id,
1086 edge_id: **new_edge_id,
1087 chromosome_index: block_group_edge.chromosome_index,
1088 phased: block_group_edge.phased,
1089 }
1090 })
1091 .collect::<Vec<_>>();
1092
1093 let new_start_node_id = new_node_ids_by_old.get(&start_block.node_id).unwrap();
1094 let new_start_edge = Edge::create(
1095 conn,
1096 PATH_START_NODE_ID,
1097 0,
1098 Strand::Forward,
1099 *new_start_node_id,
1100 start_node_coordinate,
1101 start_block.strand,
1102 );
1103 let new_start_edge_data = BlockGroupEdgeData {
1104 block_group_id: *target_block_group_id,
1105 edge_id: new_start_edge.id,
1106 chromosome_index: 0,
1107 phased: 0,
1108 };
1109 let new_end_node_id = new_node_ids_by_old.get(&end_block.node_id).unwrap();
1110 let new_end_edge = Edge::create(
1111 conn,
1112 *new_end_node_id,
1113 end_node_coordinate,
1114 end_block.strand,
1115 PATH_END_NODE_ID,
1116 0,
1117 Strand::Forward,
1118 );
1119 let new_end_edge_data = BlockGroupEdgeData {
1120 block_group_id: *target_block_group_id,
1121 edge_id: new_end_edge.id,
1122 chromosome_index: 0,
1123 phased: 0,
1124 };
1125 let mut all_edges = subgraph_edge_inputs.clone();
1126 all_edges.push(new_start_edge_data);
1127 all_edges.push(new_end_edge_data);
1128 BlockGroupEdge::bulk_create(conn, &all_edges);
1129
1130 new_node_ids_by_old
1131 }
1132}
1133
1134impl Query for BlockGroup {
1135 type Model = BlockGroup;
1136
1137 const TABLE_NAME: &'static str = "block_groups";
1138
1139 fn process_row(row: &Row) -> Self::Model {
1140 BlockGroup {
1141 id: row.get(0).unwrap(),
1142 collection_name: row.get(1).unwrap(),
1143 sample_name: row.get(2).unwrap(),
1144 name: row.get(3).unwrap(),
1145 created_on: row.get(4).unwrap(),
1146 }
1147 }
1148}
1149
1150#[cfg(test)]
1151mod tests {
1152 use core::ops::Range;
1153
1154 use capnp::message::TypedBuilder;
1155 use chrono::Utc;
1156 use gen_core::NO_CHROMOSOME_INDEX;
1157
1158 use super::*;
1159 use crate::{
1160 collection::Collection,
1161 node::Node,
1162 sample::Sample,
1163 sequence::Sequence,
1164 test_helpers::{get_connection, interval_tree_verify, setup_block_group},
1165 };
1166
1167 #[test]
1168 fn test_capnp_serialization() {
1169 let block_group = BlockGroup {
1170 id: HashId::pad_str(42),
1171 collection_name: "test_collection".to_string(),
1172 sample_name: Some("test_sample".to_string()),
1173 name: "test_block_group".to_string(),
1174 created_on: Utc::now().timestamp_nanos_opt().unwrap(),
1175 };
1176
1177 let mut message = TypedBuilder::<block_group::Owned>::new_default();
1178 let mut root = message.init_root();
1179 block_group.write_capnp(&mut root);
1180
1181 let deserialized = BlockGroup::read_capnp(root.into_reader());
1182 assert_eq!(block_group, deserialized);
1183 }
1184
1185 #[test]
1186 fn test_capnp_serialization_no_sample() {
1187 let block_group = BlockGroup {
1188 id: HashId::pad_str(43),
1189 collection_name: "test_collection_2".to_string(),
1190 sample_name: None,
1191 name: "test_block_group_2".to_string(),
1192 created_on: Utc::now().timestamp_nanos_opt().unwrap(),
1193 };
1194
1195 let mut message = TypedBuilder::<block_group::Owned>::new_default();
1196 let mut root = message.init_root();
1197 block_group.write_capnp(&mut root);
1198
1199 let deserialized = BlockGroup::read_capnp(root.into_reader());
1200 assert_eq!(block_group, deserialized);
1201 }
1202
1203 #[test]
1204 fn test_blockgroup_create() {
1205 let conn = &get_connection(None).unwrap();
1206 Collection::create(conn, "test");
1207 let bg1 = BlockGroup::create(conn, "test", None, "hg19");
1208 assert_eq!(bg1.collection_name, "test");
1209 assert_eq!(bg1.name, "hg19");
1210 Sample::get_or_create(conn, "sample");
1211 let bg2 = BlockGroup::create(conn, "test", Some("sample"), "hg19");
1212 assert_eq!(bg2.collection_name, "test");
1213 assert_eq!(bg2.name, "hg19");
1214 assert_eq!(bg2.sample_name, Some("sample".to_string()));
1215 assert_ne!(&bg1.id, &bg2.id);
1216 }
1217
1218 #[test]
1219 fn test_blockgroup_delete_without_sample() {
1220 let conn = &get_connection(None).unwrap();
1221 Collection::create(conn, "test");
1222 let bg1 = BlockGroup::create(conn, "test", None, "hg19");
1223 let bg2 = BlockGroup::create(conn, "test", None, "hg38");
1224
1225 BlockGroup::delete(conn, "test", None, &bg1.name);
1226
1227 let bgs = BlockGroup::all(conn);
1228 assert_eq!(bgs.len(), 1);
1229 assert_eq!(bgs[0], bg2);
1230 }
1231
1232 #[test]
1233 fn test_blockgroup_delete_with_sample() {
1234 let conn = &get_connection(None).unwrap();
1235 Collection::create(conn, "test");
1236 Sample::get_or_create(conn, "sample1");
1237 Sample::get_or_create(conn, "sample2");
1238 let bg1 = BlockGroup::create(conn, "test", Some("sample1"), "hg19");
1239 let bg2 = BlockGroup::create(conn, "test", Some("sample2"), "hg19");
1240
1241 BlockGroup::delete(conn, "test", bg1.sample_name.as_deref(), &bg1.name);
1242
1243 let bgs = BlockGroup::all(conn);
1244 assert_eq!(bgs.len(), 1);
1245 assert_eq!(bgs[0], bg2);
1246 }
1247
1248 #[test]
1249 fn test_blockgroup_clone() {
1250 let conn = &get_connection(None).unwrap();
1251 Collection::create(conn, "test");
1252 let bg1 = BlockGroup::create(conn, "test", None, "hg19");
1253 assert_eq!(bg1.collection_name, "test");
1254 assert_eq!(bg1.name, "hg19");
1255 Sample::get_or_create(conn, "sample");
1256 let bg2 =
1257 BlockGroup::get_or_create_sample_block_group(conn, "test", "sample", "hg19", None)
1258 .unwrap();
1259 assert_eq!(
1260 BlockGroupEdge::edges_for_block_group(conn, &bg1.id),
1261 BlockGroupEdge::edges_for_block_group(conn, &bg2)
1262 );
1263 }
1264
1265 #[test]
1266 fn test_blockgroup_clone_passes_accessions() {
1267 let conn = &get_connection(None).unwrap();
1268 let (bg_1, path) = setup_block_group(conn);
1269 let mut path_cache = PathCache::new(conn);
1270 PathCache::lookup(&mut path_cache, &bg_1, path.name.clone());
1271 let acc_1 = BlockGroup::add_accession(conn, &path, "test", 3, 7, &mut path_cache);
1272 assert_eq!(
1273 Accession::query(
1274 conn,
1275 "select * from accessions where name = ?1",
1276 rusqlite::params!(SQLValue::from("test".to_string())),
1277 ),
1278 vec![Accession {
1279 id: acc_1.id,
1280 name: "test".to_string(),
1281 path_id: path.id,
1282 parent_accession_id: None,
1283 }]
1284 );
1285
1286 Sample::get_or_create(conn, "sample2");
1287 let _bg2 =
1288 BlockGroup::get_or_create_sample_block_group(conn, "test", "sample2", "chr1", None)
1289 .unwrap();
1290 assert_eq!(
1291 Accession::query(
1292 conn,
1293 "select * from accessions where name = ?1",
1294 rusqlite::params!(SQLValue::from("test".to_string())),
1295 )
1296 .len(),
1297 2
1298 );
1299 }
1300
1301 #[test]
1302 fn insert_and_deletion_get_all() {
1303 let conn = get_connection(None).unwrap();
1304 let (block_group_id, path) = setup_block_group(&conn);
1305 let insert_sequence = Sequence::new()
1306 .sequence_type("DNA")
1307 .sequence("NNNN")
1308 .save(&conn);
1309 let insert_node_id = Node::create(&conn, &insert_sequence.hash, &HashId::convert_str("1"));
1310 let insert = PathBlock {
1311 id: 0,
1312 node_id: insert_node_id,
1313 block_sequence: insert_sequence.get_sequence(0, 4).to_string(),
1314 sequence_start: 0,
1315 sequence_end: 4,
1316 path_start: 7,
1317 path_end: 15,
1318 strand: Strand::Forward,
1319 };
1320 let change = PathChange {
1321 block_group_id,
1322 path: path.clone(),
1323 path_accession: None,
1324 start: 7,
1325 end: 15,
1326 block: insert,
1327 chromosome_index: 1,
1328 phased: 0,
1329 preserve_edge: true,
1330 };
1331 let tree = path.intervaltree(&conn);
1332 BlockGroup::insert_change(&conn, &change, &tree).unwrap();
1333
1334 let all_sequences = BlockGroup::get_all_sequences(&conn, &block_group_id, false);
1335 assert_eq!(
1336 all_sequences,
1337 HashSet::from_iter(vec![
1338 "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
1339 "AAAAAAANNNNTTTTTCCCCCCCCCCGGGGGGGGGG".to_string()
1340 ])
1341 );
1342
1343 let deletion_sequence = Sequence::new()
1344 .sequence_type("DNA")
1345 .sequence("")
1346 .save(&conn);
1347 let deletion_node_id =
1348 Node::create(&conn, &deletion_sequence.hash, &HashId::convert_str("2"));
1349 let deletion = PathBlock {
1350 id: 0,
1351 node_id: deletion_node_id,
1352 block_sequence: deletion_sequence.get_sequence(None, None),
1353 sequence_start: 0,
1354 sequence_end: 0,
1355 path_start: 19,
1356 path_end: 31,
1357 strand: Strand::Forward,
1358 };
1359
1360 let change = PathChange {
1361 block_group_id,
1362 path: path.clone(),
1363 path_accession: None,
1364 start: 19,
1365 end: 31,
1366 block: deletion,
1367 chromosome_index: 1,
1368 phased: 0,
1369 preserve_edge: true,
1370 };
1371 let tree = path.intervaltree(&conn);
1373 BlockGroup::insert_change(&conn, &change, &tree).unwrap();
1374 let all_sequences = BlockGroup::get_all_sequences(&conn, &block_group_id, false);
1375 assert_eq!(
1376 all_sequences,
1377 HashSet::from_iter(vec![
1378 "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
1379 "AAAAAAANNNNTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
1380 "AAAAAAAAAATTTTTTTTTGGGGGGGGG".to_string(),
1381 "AAAAAAANNNNTTTTGGGGGGGGG".to_string(),
1382 ])
1383 )
1384 }
1385
1386 #[test]
1387 fn simple_insert_get_all() {
1388 let conn = get_connection(None).unwrap();
1389 let (block_group_id, path) = setup_block_group(&conn);
1390 let insert_sequence = Sequence::new()
1391 .sequence_type("DNA")
1392 .sequence("NNNN")
1393 .save(&conn);
1394 let insert_node_id = Node::create(&conn, &insert_sequence.hash, &HashId::convert_str("1"));
1395 let insert = PathBlock {
1396 id: 0,
1397 node_id: insert_node_id,
1398 block_sequence: insert_sequence.get_sequence(0, 4).to_string(),
1399 sequence_start: 0,
1400 sequence_end: 4,
1401 path_start: 7,
1402 path_end: 15,
1403 strand: Strand::Forward,
1404 };
1405 let change = PathChange {
1406 block_group_id,
1407 path: path.clone(),
1408 path_accession: None,
1409 start: 7,
1410 end: 15,
1411 block: insert,
1412 chromosome_index: 1,
1413 phased: 0,
1414 preserve_edge: true,
1415 };
1416 let tree = path.intervaltree(&conn);
1417 BlockGroup::insert_change(&conn, &change, &tree).unwrap();
1418
1419 let all_sequences = BlockGroup::get_all_sequences(&conn, &block_group_id, false);
1420 assert_eq!(
1421 all_sequences,
1422 HashSet::from_iter(vec![
1423 "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
1424 "AAAAAAANNNNTTTTTCCCCCCCCCCGGGGGGGGGG".to_string()
1425 ])
1426 );
1427 }
1428
1429 #[test]
1430 fn insert_on_block_boundary_middle() {
1431 let conn = get_connection(None).unwrap();
1432 let (block_group_id, path) = setup_block_group(&conn);
1433 let insert_sequence = Sequence::new()
1434 .sequence_type("DNA")
1435 .sequence("NNNN")
1436 .save(&conn);
1437 let insert_node_id = Node::create(&conn, &insert_sequence.hash, &HashId::convert_str("1"));
1438 let insert = PathBlock {
1439 id: 0,
1440 node_id: insert_node_id,
1441 block_sequence: insert_sequence.get_sequence(0, 4).to_string(),
1442 sequence_start: 0,
1443 sequence_end: 4,
1444 path_start: 15,
1445 path_end: 15,
1446 strand: Strand::Forward,
1447 };
1448 let change = PathChange {
1449 block_group_id,
1450 path: path.clone(),
1451 path_accession: None,
1452 start: 15,
1453 end: 15,
1454 block: insert,
1455 chromosome_index: 1,
1456 phased: 0,
1457 preserve_edge: true,
1458 };
1459 let tree = path.intervaltree(&conn);
1460 BlockGroup::insert_change(&conn, &change, &tree).unwrap();
1461
1462 let all_sequences = BlockGroup::get_all_sequences(&conn, &block_group_id, false);
1463 assert_eq!(
1464 all_sequences,
1465 HashSet::from_iter(vec![
1466 "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
1467 "AAAAAAAAAATTTTTNNNNTTTTTCCCCCCCCCCGGGGGGGGGG".to_string()
1468 ])
1469 );
1470 }
1471
1472 #[test]
1473 fn insert_within_block() {
1474 let conn = get_connection(None).unwrap();
1475 let (block_group_id, path) = setup_block_group(&conn);
1476 let insert_sequence = Sequence::new()
1477 .sequence_type("DNA")
1478 .sequence("NNNN")
1479 .save(&conn);
1480 let insert_node_id = Node::create(&conn, &insert_sequence.hash, &HashId::convert_str("1"));
1481 let insert = PathBlock {
1482 id: 0,
1483 node_id: insert_node_id,
1484 block_sequence: insert_sequence.get_sequence(0, 4).to_string(),
1485 sequence_start: 0,
1486 sequence_end: 4,
1487 path_start: 12,
1488 path_end: 17,
1489 strand: Strand::Forward,
1490 };
1491 let change = PathChange {
1492 block_group_id,
1493 path: path.clone(),
1494 path_accession: None,
1495 start: 12,
1496 end: 17,
1497 block: insert,
1498 chromosome_index: 1,
1499 phased: 0,
1500 preserve_edge: true,
1501 };
1502 let tree = path.intervaltree(&conn);
1503 BlockGroup::insert_change(&conn, &change, &tree).unwrap();
1504
1505 let all_sequences = BlockGroup::get_all_sequences(&conn, &block_group_id, false);
1506 assert_eq!(
1507 all_sequences,
1508 HashSet::from_iter(vec![
1509 "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
1510 "AAAAAAAAAATTNNNNTTTCCCCCCCCCCGGGGGGGGGG".to_string()
1511 ])
1512 );
1513 }
1514
1515 #[test]
1516 fn insert_on_block_boundary_start() {
1517 let conn = get_connection(None).unwrap();
1518 let (block_group_id, path) = setup_block_group(&conn);
1519 let insert_sequence = Sequence::new()
1520 .sequence_type("DNA")
1521 .sequence("NNNN")
1522 .save(&conn);
1523 let insert_node_id = Node::create(&conn, &insert_sequence.hash, &HashId::convert_str("1"));
1524 let insert = PathBlock {
1525 id: 0,
1526 node_id: insert_node_id,
1527 block_sequence: insert_sequence.get_sequence(0, 4).to_string(),
1528 sequence_start: 0,
1529 sequence_end: 4,
1530 path_start: 10,
1531 path_end: 10,
1532 strand: Strand::Forward,
1533 };
1534 let change = PathChange {
1535 block_group_id,
1536 path: path.clone(),
1537 path_accession: None,
1538 start: 10,
1539 end: 10,
1540 block: insert,
1541 chromosome_index: 1,
1542 phased: 0,
1543 preserve_edge: true,
1544 };
1545 let tree = path.intervaltree(&conn);
1546 BlockGroup::insert_change(&conn, &change, &tree).unwrap();
1547
1548 let all_sequences = BlockGroup::get_all_sequences(&conn, &block_group_id, false);
1549 assert_eq!(
1550 all_sequences,
1551 HashSet::from_iter(vec![
1552 "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
1553 "AAAAAAAAAANNNNTTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string()
1554 ])
1555 );
1556 }
1557
1558 #[test]
1559 fn insert_on_block_boundary_end() {
1560 let conn = get_connection(None).unwrap();
1561 let (block_group_id, path) = setup_block_group(&conn);
1562 let insert_sequence = Sequence::new()
1563 .sequence_type("DNA")
1564 .sequence("NNNN")
1565 .save(&conn);
1566 let insert_node_id = Node::create(&conn, &insert_sequence.hash, &HashId::convert_str("1"));
1567 let insert = PathBlock {
1568 id: 0,
1569 node_id: insert_node_id,
1570 block_sequence: insert_sequence.get_sequence(0, 4).to_string(),
1571 sequence_start: 0,
1572 sequence_end: 4,
1573 path_start: 9,
1574 path_end: 9,
1575 strand: Strand::Forward,
1576 };
1577 let change = PathChange {
1578 block_group_id,
1579 path: path.clone(),
1580 path_accession: None,
1581 start: 9,
1582 end: 9,
1583 block: insert,
1584 chromosome_index: 1,
1585 phased: 0,
1586 preserve_edge: true,
1587 };
1588 let tree = path.intervaltree(&conn);
1589 BlockGroup::insert_change(&conn, &change, &tree).unwrap();
1590
1591 let all_sequences = BlockGroup::get_all_sequences(&conn, &block_group_id, false);
1592 assert_eq!(
1593 all_sequences,
1594 HashSet::from_iter(vec![
1595 "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
1596 "AAAAAAAAANNNNATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string()
1597 ])
1598 );
1599 }
1600
1601 #[test]
1602 fn insert_across_entire_block_boundary() {
1603 let conn = get_connection(None).unwrap();
1604 let (block_group_id, path) = setup_block_group(&conn);
1605 let insert_sequence = Sequence::new()
1606 .sequence_type("DNA")
1607 .sequence("NNNN")
1608 .save(&conn);
1609 let insert_node_id = Node::create(&conn, &insert_sequence.hash, &HashId::convert_str("1"));
1610 let insert = PathBlock {
1611 id: 0,
1612 node_id: insert_node_id,
1613 block_sequence: insert_sequence.get_sequence(0, 4).to_string(),
1614 sequence_start: 0,
1615 sequence_end: 4,
1616 path_start: 10,
1617 path_end: 20,
1618 strand: Strand::Forward,
1619 };
1620 let change = PathChange {
1621 block_group_id,
1622 path: path.clone(),
1623 path_accession: None,
1624 start: 10,
1625 end: 20,
1626 block: insert,
1627 chromosome_index: 1,
1628 phased: 0,
1629 preserve_edge: true,
1630 };
1631 let tree = path.intervaltree(&conn);
1632 BlockGroup::insert_change(&conn, &change, &tree).unwrap();
1633
1634 let all_sequences = BlockGroup::get_all_sequences(&conn, &block_group_id, false);
1635 assert_eq!(
1636 all_sequences,
1637 HashSet::from_iter(vec![
1638 "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
1639 "AAAAAAAAAANNNNCCCCCCCCCCGGGGGGGGGG".to_string()
1640 ])
1641 );
1642 }
1643
1644 #[test]
1645 fn insert_across_two_blocks() {
1646 let conn = get_connection(None).unwrap();
1647 let (block_group_id, path) = setup_block_group(&conn);
1648 let insert_sequence = Sequence::new()
1649 .sequence_type("DNA")
1650 .sequence("NNNN")
1651 .save(&conn);
1652 let insert_node_id = Node::create(&conn, &insert_sequence.hash, &HashId::convert_str("1"));
1653 let insert = PathBlock {
1654 id: 0,
1655 node_id: insert_node_id,
1656 block_sequence: insert_sequence.get_sequence(0, 4).to_string(),
1657 sequence_start: 0,
1658 sequence_end: 4,
1659 path_start: 15,
1660 path_end: 25,
1661 strand: Strand::Forward,
1662 };
1663 let change = PathChange {
1664 block_group_id,
1665 path: path.clone(),
1666 path_accession: None,
1667 start: 15,
1668 end: 25,
1669 block: insert,
1670 chromosome_index: 1,
1671 phased: 0,
1672 preserve_edge: true,
1673 };
1674 let tree = path.intervaltree(&conn);
1675 BlockGroup::insert_change(&conn, &change, &tree).unwrap();
1676
1677 let all_sequences = BlockGroup::get_all_sequences(&conn, &block_group_id, false);
1678 assert_eq!(
1679 all_sequences,
1680 HashSet::from_iter(vec![
1681 "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
1682 "AAAAAAAAAATTTTTNNNNCCCCCGGGGGGGGGG".to_string()
1683 ])
1684 );
1685 }
1686
1687 #[test]
1688 fn insert_spanning_blocks() {
1689 let conn = get_connection(None).unwrap();
1690 let (block_group_id, path) = setup_block_group(&conn);
1691 let insert_sequence = Sequence::new()
1692 .sequence_type("DNA")
1693 .sequence("NNNN")
1694 .save(&conn);
1695 let insert_node_id = Node::create(&conn, &insert_sequence.hash, &HashId::convert_str("1"));
1696 let insert = PathBlock {
1697 id: 0,
1698 node_id: insert_node_id,
1699 block_sequence: insert_sequence.get_sequence(0, 4).to_string(),
1700 sequence_start: 0,
1701 sequence_end: 4,
1702 path_start: 5,
1703 path_end: 35,
1704 strand: Strand::Forward,
1705 };
1706 let change = PathChange {
1707 block_group_id,
1708 path: path.clone(),
1709 path_accession: None,
1710 start: 5,
1711 end: 35,
1712 block: insert,
1713 chromosome_index: 1,
1714 phased: 0,
1715 preserve_edge: true,
1716 };
1717 let tree = path.intervaltree(&conn);
1718 BlockGroup::insert_change(&conn, &change, &tree).unwrap();
1719
1720 let all_sequences = BlockGroup::get_all_sequences(&conn, &block_group_id, false);
1721 assert_eq!(
1722 all_sequences,
1723 HashSet::from_iter(vec![
1724 "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
1725 "AAAAANNNNGGGGG".to_string()
1726 ])
1727 );
1728 }
1729
1730 #[test]
1731 fn simple_deletion() {
1732 let conn = get_connection(None).unwrap();
1733 let (block_group_id, path) = setup_block_group(&conn);
1734 let deletion_sequence = Sequence::new()
1735 .sequence_type("DNA")
1736 .sequence("")
1737 .save(&conn);
1738 let deletion_node_id =
1739 Node::create(&conn, &deletion_sequence.hash, &HashId::convert_str("1"));
1740 let deletion = PathBlock {
1741 id: 0,
1742 node_id: deletion_node_id,
1743 block_sequence: deletion_sequence.get_sequence(None, None),
1744 sequence_start: 0,
1745 sequence_end: 0,
1746 path_start: 19,
1747 path_end: 31,
1748 strand: Strand::Forward,
1749 };
1750
1751 let change = PathChange {
1752 block_group_id,
1753 path: path.clone(),
1754 path_accession: None,
1755 start: 19,
1756 end: 31,
1757 block: deletion,
1758 chromosome_index: 1,
1759 phased: 0,
1760 preserve_edge: true,
1761 };
1762
1763 let tree = path.intervaltree(&conn);
1765 BlockGroup::insert_change(&conn, &change, &tree).unwrap();
1766 let all_sequences = BlockGroup::get_all_sequences(&conn, &block_group_id, false);
1767 assert_eq!(
1768 all_sequences,
1769 HashSet::from_iter(vec![
1770 "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
1771 "AAAAAAAAAATTTTTTTTTGGGGGGGGG".to_string(),
1772 ])
1773 )
1774 }
1775
1776 #[test]
1777 fn doesnt_apply_same_insert_twice() {
1778 let conn = get_connection(None).unwrap();
1779 let (block_group_id, path) = setup_block_group(&conn);
1780 let insert_sequence = Sequence::new()
1781 .sequence_type("DNA")
1782 .sequence("NNNN")
1783 .save(&conn);
1784 let insert_node_id = Node::create(&conn, &insert_sequence.hash, &HashId::convert_str("1"));
1785 let insert = PathBlock {
1786 id: 0,
1787 node_id: insert_node_id,
1788 block_sequence: insert_sequence.get_sequence(0, 4).to_string(),
1789 sequence_start: 0,
1790 sequence_end: 4,
1791 path_start: 7,
1792 path_end: 15,
1793 strand: Strand::Forward,
1794 };
1795 let change = PathChange {
1796 block_group_id,
1797 path: path.clone(),
1798 path_accession: None,
1799 start: 7,
1800 end: 15,
1801 block: insert,
1802 chromosome_index: 1,
1803 phased: 0,
1804 preserve_edge: true,
1805 };
1806 let tree = path.intervaltree(&conn);
1807 BlockGroup::insert_change(&conn, &change, &tree).unwrap();
1808
1809 let all_sequences = BlockGroup::get_all_sequences(&conn, &block_group_id, false);
1810 assert_eq!(
1811 all_sequences,
1812 HashSet::from_iter(vec![
1813 "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
1814 "AAAAAAANNNNTTTTTCCCCCCCCCCGGGGGGGGGG".to_string()
1815 ])
1816 );
1817
1818 let tree = path.intervaltree(&conn);
1819 BlockGroup::insert_change(&conn, &change, &tree).unwrap();
1820
1821 let all_sequences = BlockGroup::get_all_sequences(&conn, &block_group_id, false);
1822 assert_eq!(
1823 all_sequences,
1824 HashSet::from_iter(vec![
1825 "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
1826 "AAAAAAANNNNTTTTTCCCCCCCCCCGGGGGGGGGG".to_string()
1827 ])
1828 );
1829 }
1830
1831 #[test]
1832 fn insert_at_beginning_of_path() {
1833 let conn = get_connection(None).unwrap();
1834 let (block_group_id, path) = setup_block_group(&conn);
1835 let insert_sequence = Sequence::new()
1836 .sequence_type("DNA")
1837 .sequence("NNNN")
1838 .save(&conn);
1839 let insert_node_id = Node::create(&conn, &insert_sequence.hash, &HashId::convert_str("1"));
1840 let insert = PathBlock {
1841 id: 0,
1842 node_id: insert_node_id,
1843 block_sequence: insert_sequence.get_sequence(0, 4).to_string(),
1844 sequence_start: 0,
1845 sequence_end: 4,
1846 path_start: 0,
1847 path_end: 0,
1848 strand: Strand::Forward,
1849 };
1850 let change = PathChange {
1851 block_group_id,
1852 path: path.clone(),
1853 path_accession: None,
1854 start: 0,
1855 end: 0,
1856 block: insert,
1857 chromosome_index: 1,
1858 phased: 0,
1859 preserve_edge: true,
1860 };
1861 let tree = path.intervaltree(&conn);
1862 BlockGroup::insert_change(&conn, &change, &tree).unwrap();
1863
1864 let all_sequences = BlockGroup::get_all_sequences(&conn, &block_group_id, false);
1865 assert_eq!(
1866 all_sequences,
1867 HashSet::from_iter(vec![
1868 "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
1869 "NNNNAAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
1870 ])
1871 );
1872 }
1873
1874 #[test]
1875 fn homozygous_insert_at_beginning_of_path() {
1876 let conn = get_connection(None).unwrap();
1877 let (block_group_id, path) = setup_block_group(&conn);
1878 let insert_sequence = Sequence::new()
1879 .sequence_type("DNA")
1880 .sequence("NNNN")
1881 .save(&conn);
1882 let insert_node_id = Node::create(&conn, &insert_sequence.hash, &HashId::convert_str("1"));
1883 let insert = PathBlock {
1884 id: 0,
1885 node_id: insert_node_id,
1886 block_sequence: insert_sequence.get_sequence(0, 4).to_string(),
1887 sequence_start: 0,
1888 sequence_end: 4,
1889 path_start: 0,
1890 path_end: 0,
1891 strand: Strand::Forward,
1892 };
1893 let change = PathChange {
1894 block_group_id,
1895 path: path.clone(),
1896 path_accession: None,
1897 start: 0,
1898 end: 0,
1899 block: insert,
1900 chromosome_index: 0,
1901 phased: 0,
1902 preserve_edge: true,
1903 };
1904 let tree = path.intervaltree(&conn);
1905 BlockGroup::insert_change(&conn, &change, &tree).unwrap();
1906
1907 let all_sequences = BlockGroup::get_all_sequences(&conn, &block_group_id, false);
1908 assert_eq!(
1909 all_sequences,
1910 HashSet::from_iter(vec![
1911 "NNNNAAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
1912 ])
1913 );
1914 }
1915
1916 #[test]
1917 fn insert_at_end_of_path() {
1918 let conn = get_connection(None).unwrap();
1919 let (block_group_id, path) = setup_block_group(&conn);
1920
1921 let insert_sequence = Sequence::new()
1922 .sequence_type("DNA")
1923 .sequence("NNNN")
1924 .save(&conn);
1925 let insert_node_id = Node::create(&conn, &insert_sequence.hash, &HashId::convert_str("1"));
1926 let insert = PathBlock {
1927 id: 0,
1928 node_id: insert_node_id,
1929 block_sequence: insert_sequence.get_sequence(0, 4).to_string(),
1930 sequence_start: 0,
1931 sequence_end: 4,
1932 path_start: 40,
1933 path_end: 40,
1934 strand: Strand::Forward,
1935 };
1936 let change = PathChange {
1937 block_group_id,
1938 path: path.clone(),
1939 path_accession: None,
1940 start: 40,
1941 end: 40,
1942 block: insert,
1943 chromosome_index: 1,
1944 phased: 0,
1945 preserve_edge: true,
1946 };
1947 let tree = path.intervaltree(&conn);
1948 BlockGroup::insert_change(&conn, &change, &tree).unwrap();
1949
1950 let all_sequences = BlockGroup::get_all_sequences(&conn, &block_group_id, false);
1951 assert_eq!(
1952 all_sequences,
1953 HashSet::from_iter(vec![
1954 "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
1955 "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGGNNNN".to_string(),
1956 ])
1957 );
1958 }
1959
1960 #[test]
1961 fn insert_at_one_bp_into_block() {
1962 let conn = get_connection(None).unwrap();
1963 let (block_group_id, path) = setup_block_group(&conn);
1964 let insert_sequence = Sequence::new()
1965 .sequence_type("DNA")
1966 .sequence("NNNN")
1967 .save(&conn);
1968 let insert_node_id = Node::create(&conn, &insert_sequence.hash, &HashId::convert_str("1"));
1969 let insert = PathBlock {
1970 id: 0,
1971 node_id: insert_node_id,
1972 block_sequence: insert_sequence.get_sequence(0, 4).to_string(),
1973 sequence_start: 0,
1974 sequence_end: 4,
1975 path_start: 10,
1976 path_end: 11,
1977 strand: Strand::Forward,
1978 };
1979 let change = PathChange {
1980 block_group_id,
1981 path: path.clone(),
1982 path_accession: None,
1983 start: 10,
1984 end: 11,
1985 block: insert,
1986 chromosome_index: 1,
1987 phased: 0,
1988 preserve_edge: true,
1989 };
1990 let tree = path.intervaltree(&conn);
1991 BlockGroup::insert_change(&conn, &change, &tree).unwrap();
1992
1993 let all_sequences = BlockGroup::get_all_sequences(&conn, &block_group_id, false);
1994 assert_eq!(
1995 all_sequences,
1996 HashSet::from_iter(vec![
1997 "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
1998 "AAAAAAAAAANNNNTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
1999 ])
2000 );
2001 }
2002
2003 #[test]
2004 fn insert_at_one_bp_from_end_of_block() {
2005 let conn = get_connection(None).unwrap();
2006 let (block_group_id, path) = setup_block_group(&conn);
2007 let insert_sequence = Sequence::new()
2008 .sequence_type("DNA")
2009 .sequence("NNNN")
2010 .save(&conn);
2011 let insert_node_id = Node::create(&conn, &insert_sequence.hash, &HashId::convert_str("1"));
2012 let insert = PathBlock {
2013 id: 0,
2014 node_id: insert_node_id,
2015 block_sequence: insert_sequence.get_sequence(0, 4).to_string(),
2016 sequence_start: 0,
2017 sequence_end: 4,
2018 path_start: 19,
2019 path_end: 20,
2020 strand: Strand::Forward,
2021 };
2022 let change = PathChange {
2023 block_group_id,
2024 path: path.clone(),
2025 path_accession: None,
2026 start: 19,
2027 end: 20,
2028 block: insert,
2029 chromosome_index: 1,
2030 phased: 0,
2031 preserve_edge: true,
2032 };
2033 let tree = path.intervaltree(&conn);
2034 BlockGroup::insert_change(&conn, &change, &tree).unwrap();
2035
2036 let all_sequences = BlockGroup::get_all_sequences(&conn, &block_group_id, false);
2037 assert_eq!(
2038 all_sequences,
2039 HashSet::from_iter(vec![
2040 "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
2041 "AAAAAAAAAATTTTTTTTTNNNNCCCCCCCCCCGGGGGGGGGG".to_string(),
2042 ])
2043 );
2044 }
2045
2046 #[test]
2047 fn delete_at_beginning_of_path() {
2048 let conn = get_connection(None).unwrap();
2049 let (block_group_id, path) = setup_block_group(&conn);
2050 let deletion_sequence = Sequence::new()
2051 .sequence_type("DNA")
2052 .sequence("")
2053 .save(&conn);
2054 let deletion_node_id =
2055 Node::create(&conn, &deletion_sequence.hash, &HashId::convert_str("1"));
2056 let deletion = PathBlock {
2057 id: 0,
2058 node_id: deletion_node_id,
2059 block_sequence: deletion_sequence.get_sequence(None, None),
2060 sequence_start: 0,
2061 sequence_end: 0,
2062 path_start: 0,
2063 path_end: 1,
2064 strand: Strand::Forward,
2065 };
2066 let change = PathChange {
2067 block_group_id,
2068 path: path.clone(),
2069 path_accession: None,
2070 start: 0,
2071 end: 1,
2072 block: deletion,
2073 chromosome_index: 1,
2074 phased: 0,
2075 preserve_edge: true,
2076 };
2077 let tree = path.intervaltree(&conn);
2078 BlockGroup::insert_change(&conn, &change, &tree).unwrap();
2079
2080 let all_sequences = BlockGroup::get_all_sequences(&conn, &block_group_id, false);
2081 assert_eq!(
2082 all_sequences,
2083 HashSet::from_iter(vec![
2084 "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
2085 "AAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
2086 ])
2087 );
2088 }
2089
2090 #[test]
2091 fn delete_at_end_of_path() {
2092 let conn = get_connection(None).unwrap();
2093 let (block_group_id, path) = setup_block_group(&conn);
2094 let deletion_sequence = Sequence::new()
2095 .sequence_type("DNA")
2096 .sequence("")
2097 .save(&conn);
2098 let deletion_node_id =
2099 Node::create(&conn, &deletion_sequence.hash, &HashId::convert_str("1"));
2100 let deletion = PathBlock {
2101 id: 0,
2102 node_id: deletion_node_id,
2103 block_sequence: deletion_sequence.get_sequence(None, None),
2104 sequence_start: 0,
2105 sequence_end: 0,
2106 path_start: 35,
2107 path_end: 40,
2108 strand: Strand::Forward,
2109 };
2110 let change = PathChange {
2111 block_group_id,
2112 path: path.clone(),
2113 path_accession: None,
2114 start: 35,
2115 end: 40,
2116 block: deletion,
2117 chromosome_index: 1,
2118 phased: 0,
2119 preserve_edge: true,
2120 };
2121 let tree = path.intervaltree(&conn);
2122 BlockGroup::insert_change(&conn, &change, &tree).unwrap();
2123
2124 let all_sequences = BlockGroup::get_all_sequences(&conn, &block_group_id, false);
2125 assert_eq!(
2126 all_sequences,
2127 HashSet::from_iter(vec![
2128 "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGG".to_string(),
2129 "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
2130 ])
2131 );
2132 }
2133
2134 #[test]
2135 fn error_on_out_of_bounds_change() {
2136 let conn = get_connection(None).unwrap();
2137 let (block_group_id, path) = setup_block_group(&conn);
2138 let deletion_sequence = Sequence::new()
2139 .sequence_type("DNA")
2140 .sequence("")
2141 .save(&conn);
2142 let deletion_node_id =
2143 Node::create(&conn, &deletion_sequence.hash, &HashId::convert_str("1"));
2144 let deletion = PathBlock {
2145 id: 0,
2146 node_id: deletion_node_id,
2147 block_sequence: deletion_sequence.get_sequence(None, None),
2148 sequence_start: 0,
2149 sequence_end: 0,
2150 path_start: 350,
2151 path_end: 400,
2152 strand: Strand::Forward,
2153 };
2154 let after_end_change = PathChange {
2155 block_group_id,
2156 path: path.clone(),
2157 path_accession: None,
2158 start: 350,
2159 end: 400,
2160 block: deletion.clone(),
2161 chromosome_index: 1,
2162 phased: 0,
2163 preserve_edge: true,
2164 };
2165 let before_start_change = PathChange {
2166 block_group_id,
2167 path: path.clone(),
2168 path_accession: None,
2169 start: -300,
2170 end: 400,
2171 block: deletion,
2172 chromosome_index: 1,
2173 phased: 0,
2174 preserve_edge: true,
2175 };
2176 let tree = path.intervaltree(&conn);
2177 let res = BlockGroup::insert_change(&conn, &after_end_change, &tree);
2178 assert!(matches!(res, Err(ChangeError::OutOfBounds(_))));
2179 let res = BlockGroup::insert_change(&conn, &before_start_change, &tree);
2180 assert!(matches!(res, Err(ChangeError::OutOfBounds(_))));
2181 }
2182
2183 #[test]
2184 fn deletion_starting_at_block_boundary() {
2185 let conn = get_connection(None).unwrap();
2186 let (block_group_id, path) = setup_block_group(&conn);
2187 let deletion_sequence = Sequence::new()
2188 .sequence_type("DNA")
2189 .sequence("")
2190 .save(&conn);
2191 let deletion_node_id =
2192 Node::create(&conn, &deletion_sequence.hash, &HashId::convert_str("1"));
2193 let deletion = PathBlock {
2194 id: 0,
2195 node_id: deletion_node_id,
2196 block_sequence: deletion_sequence.get_sequence(None, None),
2197 sequence_start: 0,
2198 sequence_end: 0,
2199 path_start: 10,
2200 path_end: 12,
2201 strand: Strand::Forward,
2202 };
2203 let change = PathChange {
2204 block_group_id,
2205 path: path.clone(),
2206 path_accession: None,
2207 start: 10,
2208 end: 12,
2209 block: deletion,
2210 chromosome_index: 1,
2211 phased: 0,
2212 preserve_edge: true,
2213 };
2214 let tree = path.intervaltree(&conn);
2215 BlockGroup::insert_change(&conn, &change, &tree).unwrap();
2216
2217 let all_sequences = BlockGroup::get_all_sequences(&conn, &block_group_id, false);
2218 assert_eq!(
2219 all_sequences,
2220 HashSet::from_iter(vec![
2221 "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
2222 "AAAAAAAAAATTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
2223 ])
2224 );
2225 }
2226
2227 #[test]
2228 fn deletion_ending_at_block_boundary() {
2229 let conn = get_connection(None).unwrap();
2230 let (block_group_id, path) = setup_block_group(&conn);
2231 let deletion_sequence = Sequence::new()
2232 .sequence_type("DNA")
2233 .sequence("")
2234 .save(&conn);
2235 let deletion_node_id =
2236 Node::create(&conn, &deletion_sequence.hash, &HashId::convert_str("1"));
2237 let deletion = PathBlock {
2238 id: 0,
2239 node_id: deletion_node_id,
2240 block_sequence: deletion_sequence.get_sequence(None, None),
2241 sequence_start: 0,
2242 sequence_end: 0,
2243 path_start: 18,
2244 path_end: 20,
2245 strand: Strand::Forward,
2246 };
2247 let change = PathChange {
2248 block_group_id,
2249 path: path.clone(),
2250 path_accession: None,
2251 start: 18,
2252 end: 20,
2253 block: deletion,
2254 chromosome_index: 1,
2255 phased: 0,
2256 preserve_edge: true,
2257 };
2258 let tree = path.intervaltree(&conn);
2259 BlockGroup::insert_change(&conn, &change, &tree).unwrap();
2260
2261 let all_sequences = BlockGroup::get_all_sequences(&conn, &block_group_id, false);
2262 assert_eq!(
2263 all_sequences,
2264 HashSet::from_iter(vec![
2265 "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
2266 "AAAAAAAAAATTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
2267 ])
2268 );
2269 }
2270
2271 #[test]
2272 fn test_blockgroup_interval_tree() {
2273 let conn = &get_connection(None).unwrap();
2274 let (block_group_id, path) = setup_block_group(conn);
2275 let _new_sample = Sample::get_or_create(conn, "child");
2276 let new_bg_id =
2277 BlockGroup::get_or_create_sample_block_group(conn, "test", "child", "chr1", None)
2278 .unwrap();
2279 let new_path = Path::query(
2280 conn,
2281 "select * from paths where block_group_id = ?1",
2282 params![new_bg_id],
2283 );
2284 let insert_sequence = Sequence::new()
2285 .sequence_type("DNA")
2286 .sequence("NNNN")
2287 .save(conn);
2288 let insert_node_id = Node::create(
2289 conn,
2290 &insert_sequence.hash,
2291 &HashId::convert_str("insert-node"),
2292 );
2293 let insert = PathBlock {
2294 id: 0,
2295 node_id: insert_node_id,
2296 block_sequence: insert_sequence.get_sequence(0, 4).to_string(),
2297 sequence_start: 0,
2298 sequence_end: 4,
2299 path_start: 7,
2300 path_end: 15,
2301 strand: Strand::Forward,
2302 };
2303 let change = PathChange {
2304 block_group_id: new_bg_id,
2305 path: new_path[0].clone(),
2306 path_accession: None,
2307 start: 7,
2308 end: 15,
2309 block: insert,
2310 chromosome_index: 1,
2311 phased: 0,
2312 preserve_edge: true,
2313 };
2314 let tree = path.intervaltree(conn);
2315 BlockGroup::insert_change(conn, &change, &tree).unwrap();
2316
2317 let tree = BlockGroup::intervaltree_for(conn, &block_group_id, false);
2318 let tree2 = BlockGroup::intervaltree_for(conn, &block_group_id, true);
2319 interval_tree_verify(
2320 &tree,
2321 3,
2322 &[NodeIntervalBlock {
2323 block_id: 3,
2324 node_id: HashId::convert_str("test-a-node"),
2325 start: 0,
2326 end: 10,
2327 sequence_start: 0,
2328 sequence_end: 10,
2329 strand: Strand::Forward,
2330 }],
2331 );
2332 interval_tree_verify(
2333 &tree2,
2334 3,
2335 &[NodeIntervalBlock {
2336 block_id: 3,
2337 node_id: HashId::convert_str("test-a-node"),
2338 start: 0,
2339 end: 10,
2340 sequence_start: 0,
2341 sequence_end: 10,
2342 strand: Strand::Forward,
2343 }],
2344 );
2345 interval_tree_verify(
2346 &tree,
2347 35,
2348 &[NodeIntervalBlock {
2349 block_id: 0,
2350 node_id: HashId::convert_str("test-g-node"),
2351 start: 30,
2352 end: 40,
2353 sequence_start: 0,
2354 sequence_end: 10,
2355 strand: Strand::Forward,
2356 }],
2357 );
2358 interval_tree_verify(
2359 &tree2,
2360 35,
2361 &[NodeIntervalBlock {
2362 block_id: 0,
2363 node_id: HashId::convert_str("test-g-node"),
2364 start: 30,
2365 end: 40,
2366 sequence_start: 0,
2367 sequence_end: 10,
2368 strand: Strand::Forward,
2369 }],
2370 );
2371
2372 let tree = BlockGroup::intervaltree_for(conn, &new_bg_id, false);
2374 let tree2 = BlockGroup::intervaltree_for(conn, &new_bg_id, true);
2375 interval_tree_verify(
2376 &tree,
2377 3,
2378 &[NodeIntervalBlock {
2379 block_id: 5,
2380 node_id: HashId::convert_str("test-a-node"),
2381 start: 0,
2382 end: 7,
2383 sequence_start: 0,
2384 sequence_end: 7,
2385 strand: Strand::Forward,
2386 }],
2387 );
2388 interval_tree_verify(
2389 &tree2,
2390 3,
2391 &[NodeIntervalBlock {
2392 block_id: 5,
2393 node_id: HashId::convert_str("test-a-node"),
2394 start: 0,
2395 end: 7,
2396 sequence_start: 0,
2397 sequence_end: 7,
2398 strand: Strand::Forward,
2399 }],
2400 );
2401 interval_tree_verify(
2402 &tree,
2403 30,
2404 &[
2405 NodeIntervalBlock {
2406 block_id: 0,
2407 node_id: HashId::convert_str("test-g-node"),
2408 start: 26,
2409 end: 36,
2410 sequence_start: 0,
2411 sequence_end: 10,
2412 strand: Strand::Forward,
2413 },
2414 NodeIntervalBlock {
2415 block_id: 0,
2416 node_id: HashId::convert_str("test-g-node"),
2417 start: 30,
2418 end: 40,
2419 sequence_start: 0,
2420 sequence_end: 10,
2421 strand: Strand::Forward,
2422 },
2423 ],
2424 );
2425 interval_tree_verify(&tree2, 30, &[]);
2426 interval_tree_verify(
2429 &tree2,
2430 9,
2431 &[
2432 NodeIntervalBlock {
2433 block_id: 4,
2434 node_id: HashId::convert_str("insert-node"),
2435 start: 7,
2436 end: 11,
2437 sequence_start: 0,
2438 sequence_end: 4,
2439 strand: Strand::Forward,
2440 },
2441 NodeIntervalBlock {
2442 block_id: 6,
2443 node_id: HashId::convert_str("test-a-node"),
2444 start: 7,
2445 end: 10,
2446 sequence_start: 7,
2447 sequence_end: 10,
2448 strand: Strand::Forward,
2449 },
2450 ],
2451 );
2452 }
2453
2454 #[test]
2455 fn test_changes_against_derivative_blockgroups() {
2456 let conn = &get_connection(None).unwrap();
2457 let (_block_group_id, _path) = setup_block_group(conn);
2458 let _new_sample = Sample::get_or_create(conn, "child");
2459 let new_bg_id =
2460 BlockGroup::get_or_create_sample_block_group(conn, "test", "child", "chr1", None)
2461 .unwrap();
2462 let new_path = Path::query(
2463 conn,
2464 "select * from paths where block_group_id = ?1",
2465 rusqlite::params!(SQLValue::from(new_bg_id)),
2466 );
2467 let insert_sequence = Sequence::new()
2468 .sequence_type("DNA")
2469 .sequence("NNNN")
2470 .save(conn);
2471 let insert_node_id = Node::create(conn, &insert_sequence.hash, &HashId::convert_str("1"));
2472 let insert = PathBlock {
2473 id: 0,
2474 node_id: insert_node_id,
2475 block_sequence: insert_sequence.get_sequence(0, 4).to_string(),
2476 sequence_start: 0,
2477 sequence_end: 4,
2478 path_start: 7,
2479 path_end: 15,
2480 strand: Strand::Forward,
2481 };
2482 let change = PathChange {
2483 block_group_id: new_bg_id,
2484 path: new_path[0].clone(),
2485 path_accession: None,
2486 start: 7,
2487 end: 15,
2488 block: insert,
2489 chromosome_index: 1,
2490 phased: 0,
2491 preserve_edge: false,
2492 };
2493 let tree = BlockGroup::intervaltree_for(conn, &new_bg_id, true);
2495 BlockGroup::insert_change(conn, &change, &tree).unwrap();
2496 let all_sequences = BlockGroup::get_all_sequences(conn, &new_bg_id, true);
2497 assert_eq!(
2498 all_sequences,
2499 HashSet::from_iter(vec!["AAAAAAANNNNTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),])
2500 );
2501
2502 let _new_sample = Sample::get_or_create(conn, "grandchild");
2504 let gc_bg_id = BlockGroup::get_or_create_sample_block_group(
2505 conn,
2506 "test",
2507 "grandchild",
2508 "chr1",
2509 Some("child"),
2510 )
2511 .unwrap();
2512 let new_path = Path::query(
2513 conn,
2514 "select * from paths where block_group_id = ?1",
2515 rusqlite::params!(SQLValue::from(gc_bg_id)),
2516 );
2517
2518 let insert = PathBlock {
2519 id: 0,
2520 node_id: insert_node_id,
2521 block_sequence: insert_sequence.get_sequence(0, 4).to_string(),
2522 sequence_start: 0,
2523 sequence_end: 4,
2524 path_start: 7,
2525 path_end: 15,
2526 strand: Strand::Forward,
2527 };
2528 let change = PathChange {
2529 block_group_id: gc_bg_id,
2530 path: new_path[0].clone(),
2531 path_accession: None,
2532 start: 7,
2533 end: 15,
2534 block: insert,
2535 chromosome_index: 1,
2536 phased: 0,
2537 preserve_edge: false,
2538 };
2539 let tree = BlockGroup::intervaltree_for(conn, &gc_bg_id, true);
2541 BlockGroup::insert_change(conn, &change, &tree).unwrap();
2542 let all_sequences = BlockGroup::get_all_sequences(conn, &gc_bg_id, true);
2543 assert_eq!(
2544 all_sequences,
2545 HashSet::from_iter(vec!["AAAAAAANNNNTCCCCCCCCCCGGGGGGGGGG".to_string(),])
2546 )
2547 }
2548
2549 #[test]
2550 fn test_changes_against_derivative_diploid_blockgroups() {
2551 let conn = &get_connection(None).unwrap();
2554 let (_block_group_id, _path) = setup_block_group(conn);
2555 let _new_sample = Sample::get_or_create(conn, "child");
2556 let new_bg_id =
2557 BlockGroup::get_or_create_sample_block_group(conn, "test", "child", "chr1", None)
2558 .unwrap();
2559 let new_path = Path::query(
2560 conn,
2561 "select * from paths where block_group_id = ?1",
2562 params![new_bg_id],
2563 );
2564 let insert_sequence = Sequence::new()
2565 .sequence_type("DNA")
2566 .sequence("NNNN")
2567 .save(conn);
2568 let insert_node_id = Node::create(conn, &insert_sequence.hash, &HashId::convert_str("1"));
2569 let insert = PathBlock {
2570 id: 0,
2571 node_id: insert_node_id,
2572 block_sequence: insert_sequence.get_sequence(0, 4).to_string(),
2573 sequence_start: 0,
2574 sequence_end: 4,
2575 path_start: 7,
2576 path_end: 11,
2577 strand: Strand::Forward,
2578 };
2579 let change = PathChange {
2580 block_group_id: new_bg_id,
2581 path: new_path[0].clone(),
2582 path_accession: None,
2583 start: 7,
2584 end: 11,
2585 block: insert,
2586 chromosome_index: 1,
2587 phased: 0,
2588 preserve_edge: true,
2589 };
2590 let tree = BlockGroup::intervaltree_for(conn, &new_bg_id, true);
2592 BlockGroup::insert_change(conn, &change, &tree).unwrap();
2593 let all_sequences = BlockGroup::get_all_sequences(conn, &new_bg_id, true);
2594 assert_eq!(
2595 all_sequences,
2596 HashSet::from_iter(vec![
2597 "AAAAAAANNNNTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
2598 "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
2599 ])
2600 );
2601
2602 let _new_sample = Sample::get_or_create(conn, "grandchild");
2604 let gc_bg_id = BlockGroup::get_or_create_sample_block_group(
2605 conn,
2606 "test",
2607 "grandchild",
2608 "chr1",
2609 Some("child"),
2610 )
2611 .unwrap();
2612 let new_path = Path::query(
2613 conn,
2614 "select * from paths where block_group_id = ?1",
2615 params![gc_bg_id],
2616 );
2617
2618 let insert_sequence = Sequence::new()
2619 .sequence_type("DNA")
2620 .sequence("NNNN")
2621 .save(conn);
2622 let insert_node_id = Node::create(
2623 conn,
2624 &insert_sequence.hash,
2625 &HashId::convert_str("new-hash"),
2626 );
2627
2628 let insert = PathBlock {
2629 id: 0,
2630 node_id: insert_node_id,
2631 block_sequence: insert_sequence.get_sequence(0, 4).to_string(),
2632 sequence_start: 0,
2633 sequence_end: 4,
2634 path_start: 20,
2635 path_end: 24,
2636 strand: Strand::Forward,
2637 };
2638 let change = PathChange {
2639 block_group_id: gc_bg_id,
2640 path: new_path[0].clone(),
2641 path_accession: None,
2642 start: 20,
2643 end: 24,
2644 block: insert,
2645 chromosome_index: 1,
2646 phased: 0,
2647 preserve_edge: true,
2648 };
2649 let tree = BlockGroup::intervaltree_for(conn, &gc_bg_id, true);
2651 BlockGroup::insert_change(conn, &change, &tree).unwrap();
2652 let all_sequences = BlockGroup::get_all_sequences(conn, &gc_bg_id, true);
2653 assert_eq!(
2654 all_sequences,
2655 HashSet::from_iter(vec![
2656 "AAAAAAANNNNTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
2657 "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
2658 "AAAAAAAAAATTTTTTTTTTNNNNCCCCCCGGGGGGGGGG".to_string(),
2659 "AAAAAAANNNNTTTTTTTTTNNNNCCCCCCGGGGGGGGGG".to_string()
2660 ])
2661 )
2662 }
2663
2664 #[test]
2665 #[should_panic]
2666 fn test_prohibits_out_of_frame_changes_against_derivative_diploid_blockgroups() {
2667 let conn = &get_connection(None).unwrap();
2669 let (_block_group_id, _path) = setup_block_group(conn);
2670 let _new_sample = Sample::get_or_create(conn, "child");
2671 let new_bg_id =
2672 BlockGroup::get_or_create_sample_block_group(conn, "test", "child", "chr1", None)
2673 .unwrap();
2674 let new_path = Path::query(
2675 conn,
2676 "select * from paths where block_group_id = ?1",
2677 rusqlite::params!(SQLValue::from(new_bg_id)),
2678 );
2679 let insert_sequence = Sequence::new()
2682 .sequence_type("DNA")
2683 .sequence("NNNN")
2684 .save(conn);
2685 let insert_node_id = Node::create(conn, &insert_sequence.hash, &HashId::convert_str("1"));
2686 let insert = PathBlock {
2687 id: 0,
2688 node_id: insert_node_id,
2689 block_sequence: insert_sequence.get_sequence(0, 4).to_string(),
2690 sequence_start: 0,
2691 sequence_end: 4,
2692 path_start: 7,
2693 path_end: 12,
2694 strand: Strand::Forward,
2695 };
2696 let change = PathChange {
2697 block_group_id: new_bg_id,
2698 path: new_path[0].clone(),
2699 path_accession: None,
2700 start: 7,
2701 end: 12,
2702 block: insert,
2703 chromosome_index: 1,
2704 phased: 0,
2705 preserve_edge: true,
2706 };
2707 let tree = BlockGroup::intervaltree_for(conn, &new_bg_id, true);
2709 BlockGroup::insert_change(conn, &change, &tree).unwrap();
2710 let all_sequences = BlockGroup::get_all_sequences(conn, &new_bg_id, true);
2711 assert_eq!(
2712 all_sequences,
2713 HashSet::from_iter(vec![
2714 "AAAAAAANNNNTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
2715 "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
2716 ])
2717 );
2718
2719 let _new_sample = Sample::get_or_create(conn, "grandchild");
2721 let gc_bg_id = BlockGroup::get_or_create_sample_block_group(
2722 conn,
2723 "test",
2724 "grandchild",
2725 "chr1",
2726 Some("child"),
2727 )
2728 .unwrap();
2729 let new_path = Path::query(
2730 conn,
2731 "select * from paths where block_group_id = ?1",
2732 rusqlite::params!(SQLValue::from(gc_bg_id)),
2733 );
2734
2735 let insert_sequence = Sequence::new()
2736 .sequence_type("DNA")
2737 .sequence("NNNN")
2738 .save(conn);
2739 let insert_node_id =
2740 Node::create(conn, &insert_sequence.hash, &HashId::pad_str("new-hash"));
2741
2742 let insert = PathBlock {
2743 id: 0,
2744 node_id: insert_node_id,
2745 block_sequence: insert_sequence.get_sequence(0, 4).to_string(),
2746 sequence_start: 0,
2747 sequence_end: 4,
2748 path_start: 20,
2749 path_end: 24,
2750 strand: Strand::Forward,
2751 };
2752 let change = PathChange {
2753 block_group_id: gc_bg_id,
2754 path: new_path[0].clone(),
2755 path_accession: None,
2756 start: 20,
2757 end: 24,
2758 block: insert,
2759 chromosome_index: 1,
2760 phased: 0,
2761 preserve_edge: true,
2762 };
2763 let tree = BlockGroup::intervaltree_for(conn, &gc_bg_id, true);
2765 BlockGroup::insert_change(conn, &change, &tree).unwrap();
2766 }
2767
2768 mod test_derive_subgraph {
2769
2770 use super::*;
2771 use crate::{
2772 collection::Collection,
2773 node::Node,
2774 sequence::Sequence,
2775 test_helpers::{get_connection, setup_block_group},
2776 };
2777
2778 #[test]
2779 fn test_derive_subgraph_one_insertion() {
2780 let conn = &get_connection(None).unwrap();
2787 Collection::create(conn, "test");
2788 let (block_group1_id, original_path) = setup_block_group(conn);
2789
2790 let intervaltree = original_path.intervaltree(conn);
2791 let insert_start_node_id = intervaltree.query_point(16).next().unwrap().value.node_id;
2792 let insert_end_node_id = intervaltree.query_point(24).next().unwrap().value.node_id;
2793
2794 let insert_sequence = Sequence::new()
2795 .sequence_type("DNA")
2796 .sequence("AAAAAAAA")
2797 .save(conn);
2798 let insert_node_id = Node::create(
2799 conn,
2800 &insert_sequence.hash,
2801 &HashId(calculate_hash(&format!(
2802 "test-insert-a-node.{}",
2803 insert_sequence.hash
2804 ))),
2805 );
2806 let edge_into_insert = Edge::create(
2807 conn,
2808 insert_start_node_id,
2809 6,
2810 Strand::Forward,
2811 insert_node_id,
2812 0,
2813 Strand::Forward,
2814 );
2815 let edge_out_of_insert = Edge::create(
2816 conn,
2817 insert_node_id,
2818 8,
2819 Strand::Forward,
2820 insert_end_node_id,
2821 4,
2822 Strand::Forward,
2823 );
2824 let ref_heal_1 = Edge::create(
2825 conn,
2826 insert_start_node_id,
2827 6,
2828 Strand::Forward,
2829 insert_start_node_id,
2830 6,
2831 Strand::Forward,
2832 );
2833 let ref_heal_2 = Edge::create(
2834 conn,
2835 insert_end_node_id,
2836 4,
2837 Strand::Forward,
2838 insert_end_node_id,
2839 4,
2840 Strand::Forward,
2841 );
2842
2843 let edge_ids = [
2844 &edge_into_insert.id,
2845 &edge_out_of_insert.id,
2846 &ref_heal_1.id,
2847 &ref_heal_2.id,
2848 ];
2849 let block_group_edges = edge_ids
2850 .iter()
2851 .enumerate()
2852 .map(|(i, edge_id)| BlockGroupEdgeData {
2853 block_group_id: block_group1_id,
2854 edge_id: *(*edge_id),
2855 chromosome_index: if i < 2 { 1 } else { 0 },
2856 phased: 0,
2857 })
2858 .collect::<Vec<BlockGroupEdgeData>>();
2859 BlockGroupEdge::bulk_create(conn, &block_group_edges);
2860
2861 let insert_path =
2862 original_path.new_path_with(conn, 16, 24, &edge_into_insert, &edge_out_of_insert);
2863 assert_eq!(
2864 insert_path.sequence(conn),
2865 "AAAAAAAAAATTTTTTAAAAAAAACCCCCCGGGGGGGGGG"
2866 );
2867
2868 let all_sequences = BlockGroup::get_all_sequences(conn, &block_group1_id, false);
2869 assert_eq!(
2870 all_sequences,
2871 HashSet::from_iter(vec![
2872 "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
2873 "AAAAAAAAAATTTTTTAAAAAAAACCCCCCGGGGGGGGGG".to_string(),
2874 ])
2875 );
2876
2877 let mut blocks = intervaltree
2878 .query(Range { start: 15, end: 25 })
2879 .map(|x| x.value)
2880 .collect::<Vec<_>>();
2881 blocks.sort_by(|a, b| a.start.cmp(&b.start));
2882 let start_block = blocks[0];
2883 let start_node_coordinate = 15 - start_block.start + start_block.sequence_start;
2884 let end_block = blocks[blocks.len() - 1];
2885 let end_node_coordinate = 25 - end_block.start + end_block.sequence_start;
2886
2887 let block_group2 = BlockGroup::create(conn, "test", None, "chr1.1");
2888 BlockGroup::derive_subgraph(
2889 conn,
2890 "test",
2891 "test",
2892 &block_group1_id,
2893 &start_block,
2894 &end_block,
2895 start_node_coordinate,
2896 end_node_coordinate,
2897 &block_group2.id,
2898 );
2899 let all_sequences2 = BlockGroup::get_all_sequences(conn, &block_group2.id, false);
2900 assert_eq!(
2901 all_sequences2,
2902 HashSet::from_iter(vec!["TTTTTCCCCC".to_string(), "TAAAAAAAAC".to_string(),])
2903 );
2904 }
2905
2906 #[test]
2907 fn test_derive_subgraph_two_independent_insertions() {
2908 let conn = &get_connection(None).unwrap();
2914 Collection::create(conn, "test");
2915 let (block_group1_id, original_path) = setup_block_group(conn);
2916
2917 let intervaltree = original_path.intervaltree(conn);
2918 let insert_start_node_id = intervaltree.query_point(16).next().unwrap().value.node_id;
2919 let insert_end_node_id = intervaltree.query_point(24).next().unwrap().value.node_id;
2920
2921 let insert_sequence = Sequence::new()
2922 .sequence_type("DNA")
2923 .sequence("AAAAAAAA")
2924 .save(conn);
2925 let insert_node_id = Node::create(
2926 conn,
2927 &insert_sequence.hash,
2928 &HashId(calculate_hash(&format!(
2929 "test-insert-a-node.{}",
2930 insert_sequence.hash
2931 ))),
2932 );
2933 let edge_into_insert = Edge::create(
2934 conn,
2935 insert_start_node_id,
2936 6,
2937 Strand::Forward,
2938 insert_node_id,
2939 0,
2940 Strand::Forward,
2941 );
2942 let edge_out_of_insert = Edge::create(
2943 conn,
2944 insert_node_id,
2945 8,
2946 Strand::Forward,
2947 insert_end_node_id,
2948 4,
2949 Strand::Forward,
2950 );
2951 let ref_heal_1 = Edge::create(
2952 conn,
2953 insert_start_node_id,
2954 6,
2955 Strand::Forward,
2956 insert_start_node_id,
2957 6,
2958 Strand::Forward,
2959 );
2960 let ref_heal_2 = Edge::create(
2961 conn,
2962 insert_end_node_id,
2963 4,
2964 Strand::Forward,
2965 insert_end_node_id,
2966 4,
2967 Strand::Forward,
2968 );
2969
2970 let edge_ids = [
2971 &edge_into_insert.id,
2972 &edge_out_of_insert.id,
2973 &ref_heal_1.id,
2974 &ref_heal_2.id,
2975 ];
2976 let block_group_edges = edge_ids
2977 .iter()
2978 .enumerate()
2979 .map(|(i, edge_id)| BlockGroupEdgeData {
2980 block_group_id: block_group1_id,
2981 edge_id: *(*edge_id),
2982 chromosome_index: if i < 2 { 1 } else { 0 },
2983 phased: 0,
2984 })
2985 .collect::<Vec<BlockGroupEdgeData>>();
2986 BlockGroupEdge::bulk_create(conn, &block_group_edges);
2987
2988 let insert_path =
2989 original_path.new_path_with(conn, 16, 24, &edge_into_insert, &edge_out_of_insert);
2990 assert_eq!(
2991 insert_path.sequence(conn),
2992 "AAAAAAAAAATTTTTTAAAAAAAACCCCCCGGGGGGGGGG"
2993 );
2994
2995 let insert2_start_node_id = intervaltree.query_point(28).next().unwrap().value.node_id;
2996 let insert2_end_node_id = intervaltree.query_point(32).next().unwrap().value.node_id;
2997
2998 let insert2_sequence = Sequence::new()
2999 .sequence_type("DNA")
3000 .sequence("TTTTTTTT")
3001 .save(conn);
3002 let insert2_node_id = Node::create(
3003 conn,
3004 &insert2_sequence.hash,
3005 &HashId(calculate_hash(&format!(
3006 "test-insert-t-node.{}",
3007 insert2_sequence.hash
3008 ))),
3009 );
3010 let edge_into_insert2 = Edge::create(
3011 conn,
3012 insert2_start_node_id,
3013 6,
3014 Strand::Forward,
3015 insert2_node_id,
3016 0,
3017 Strand::Forward,
3018 );
3019 let edge_out_of_insert2 = Edge::create(
3020 conn,
3021 insert2_node_id,
3022 8,
3023 Strand::Forward,
3024 insert2_end_node_id,
3025 4,
3026 Strand::Forward,
3027 );
3028 let ref_heal_1 = Edge::create(
3029 conn,
3030 insert2_start_node_id,
3031 6,
3032 Strand::Forward,
3033 insert2_start_node_id,
3034 6,
3035 Strand::Forward,
3036 );
3037 let ref_heal_2 = Edge::create(
3038 conn,
3039 insert2_end_node_id,
3040 4,
3041 Strand::Forward,
3042 insert2_end_node_id,
3043 4,
3044 Strand::Forward,
3045 );
3046
3047 let edge_ids = [
3048 &edge_into_insert2.id,
3049 &edge_out_of_insert2.id,
3050 &ref_heal_1.id,
3051 &ref_heal_2.id,
3052 ];
3053 let block_group_edges = edge_ids
3054 .iter()
3055 .enumerate()
3056 .map(|(i, edge_id)| BlockGroupEdgeData {
3057 block_group_id: block_group1_id,
3058 edge_id: *(*edge_id),
3059 chromosome_index: if i < 2 { 1 } else { 0 },
3060 phased: 0,
3061 })
3062 .collect::<Vec<BlockGroupEdgeData>>();
3063 BlockGroupEdge::bulk_create(conn, &block_group_edges);
3064
3065 let insert2_path =
3066 insert_path.new_path_with(conn, 28, 32, &edge_into_insert2, &edge_out_of_insert2);
3067 assert_eq!(
3068 insert2_path.sequence(conn),
3069 "AAAAAAAAAATTTTTTAAAAAAAACCTTTTTTTTGGGGGG"
3070 );
3071
3072 let all_sequences = BlockGroup::get_all_sequences(conn, &block_group1_id, false);
3073 assert_eq!(
3074 all_sequences,
3075 HashSet::from_iter(vec![
3076 "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
3077 "AAAAAAAAAATTTTTTAAAAAAAACCCCCCGGGGGGGGGG".to_string(),
3078 "AAAAAAAAAATTTTTTTTTTCCCCCCTTTTTTTTGGGGGG".to_string(),
3079 "AAAAAAAAAATTTTTTAAAAAAAACCTTTTTTTTGGGGGG".to_string(),
3080 ])
3081 );
3082
3083 let mut blocks = intervaltree
3084 .query(Range { start: 15, end: 36 })
3085 .map(|x| x.value)
3086 .collect::<Vec<_>>();
3087 blocks.sort_by(|a, b| a.start.cmp(&b.start));
3088 let start_block = blocks[0];
3089 let start_node_coordinate = 15 - start_block.start + start_block.sequence_start;
3090 let end_block = blocks[blocks.len() - 1];
3091 let end_node_coordinate = 36 - end_block.start + end_block.sequence_start;
3092
3093 let block_group2 = BlockGroup::create(conn, "test", None, "chr1.1");
3094 BlockGroup::derive_subgraph(
3095 conn,
3096 "test",
3097 "test",
3098 &block_group1_id,
3099 &start_block,
3100 &end_block,
3101 start_node_coordinate,
3102 end_node_coordinate,
3103 &block_group2.id,
3104 );
3105 let all_sequences2 = BlockGroup::get_all_sequences(conn, &block_group2.id, false);
3106 assert_eq!(
3107 all_sequences2,
3108 HashSet::from_iter(vec![
3109 "TTTTTCCCCCCCCCCGGGGGG".to_string(),
3110 "TAAAAAAAACCCCCCGGGGGG".to_string(),
3111 "TTTTTCCCCCCTTTTTTTTGG".to_string(),
3112 "TAAAAAAAACCTTTTTTTTGG".to_string(),
3113 ])
3114 );
3115 }
3116
3117 #[test]
3118 fn test_derive_subgraph_two_independent_insertions_and_one_deletion() {
3119 let conn = &get_connection(None).unwrap();
3128 Collection::create(conn, "test");
3129 let (block_group1_id, original_path) = setup_block_group(conn);
3130
3131 let intervaltree = original_path.intervaltree(conn);
3132 let insert_start_node_id = intervaltree.query_point(16).next().unwrap().value.node_id;
3133 let insert_end_node_id = intervaltree.query_point(24).next().unwrap().value.node_id;
3134
3135 let insert_sequence = Sequence::new()
3136 .sequence_type("DNA")
3137 .sequence("AAAAAAAA")
3138 .save(conn);
3139 let insert_node_id = Node::create(
3140 conn,
3141 &insert_sequence.hash,
3142 &HashId(calculate_hash(&format!(
3143 "test-insert-a-node.{}",
3144 insert_sequence.hash
3145 ))),
3146 );
3147 let edge_into_insert = Edge::create(
3148 conn,
3149 insert_start_node_id,
3150 6,
3151 Strand::Forward,
3152 insert_node_id,
3153 0,
3154 Strand::Forward,
3155 );
3156 let edge_out_of_insert = Edge::create(
3157 conn,
3158 insert_node_id,
3159 8,
3160 Strand::Forward,
3161 insert_end_node_id,
3162 4,
3163 Strand::Forward,
3164 );
3165 let ref_heal_1 = Edge::create(
3166 conn,
3167 insert_start_node_id,
3168 6,
3169 Strand::Forward,
3170 insert_start_node_id,
3171 6,
3172 Strand::Forward,
3173 );
3174 let ref_heal_2 = Edge::create(
3175 conn,
3176 insert_end_node_id,
3177 4,
3178 Strand::Forward,
3179 insert_end_node_id,
3180 4,
3181 Strand::Forward,
3182 );
3183
3184 let edge_ids = [
3185 &edge_into_insert.id,
3186 &edge_out_of_insert.id,
3187 &ref_heal_1.id,
3188 &ref_heal_2.id,
3189 ];
3190 let block_group_edges = edge_ids
3191 .iter()
3192 .map(|edge_id| BlockGroupEdgeData {
3193 block_group_id: block_group1_id,
3194 edge_id: *(*edge_id),
3195 chromosome_index: NO_CHROMOSOME_INDEX,
3196 phased: 0,
3197 })
3198 .collect::<Vec<BlockGroupEdgeData>>();
3199 BlockGroupEdge::bulk_create(conn, &block_group_edges);
3200
3201 let insert_path =
3202 original_path.new_path_with(conn, 16, 24, &edge_into_insert, &edge_out_of_insert);
3203 assert_eq!(
3204 insert_path.sequence(conn),
3205 "AAAAAAAAAATTTTTTAAAAAAAACCCCCCGGGGGGGGGG"
3206 );
3207
3208 let insert2_start_node_id = intervaltree.query_point(28).next().unwrap().value.node_id;
3209 let insert2_end_node_id = intervaltree.query_point(32).next().unwrap().value.node_id;
3210
3211 let insert2_sequence = Sequence::new()
3212 .sequence_type("DNA")
3213 .sequence("TTTTTTTT")
3214 .save(conn);
3215 let insert2_node_id = Node::create(
3216 conn,
3217 &insert2_sequence.hash,
3218 &HashId(calculate_hash(&format!(
3219 "test-insert-t-node.{}",
3220 insert2_sequence.hash
3221 ))),
3222 );
3223 let edge_into_insert2 = Edge::create(
3224 conn,
3225 insert2_start_node_id,
3226 6,
3227 Strand::Forward,
3228 insert2_node_id,
3229 0,
3230 Strand::Forward,
3231 );
3232 let edge_out_of_insert2 = Edge::create(
3233 conn,
3234 insert2_node_id,
3235 8,
3236 Strand::Forward,
3237 insert2_end_node_id,
3238 4,
3239 Strand::Forward,
3240 );
3241 let ref_heal_1 = Edge::create(
3242 conn,
3243 insert2_start_node_id,
3244 6,
3245 Strand::Forward,
3246 insert2_start_node_id,
3247 6,
3248 Strand::Forward,
3249 );
3250 let ref_heal_2 = Edge::create(
3251 conn,
3252 insert2_end_node_id,
3253 4,
3254 Strand::Forward,
3255 insert2_end_node_id,
3256 4,
3257 Strand::Forward,
3258 );
3259
3260 let edge_ids = [
3261 &edge_into_insert2.id,
3262 &edge_out_of_insert2.id,
3263 &ref_heal_1.id,
3264 &ref_heal_2.id,
3265 ];
3266 let block_group_edges = edge_ids
3267 .iter()
3268 .map(|edge_id| BlockGroupEdgeData {
3269 block_group_id: block_group1_id,
3270 edge_id: *(*edge_id),
3271 chromosome_index: NO_CHROMOSOME_INDEX,
3272 phased: 0,
3273 })
3274 .collect::<Vec<BlockGroupEdgeData>>();
3275 BlockGroupEdge::bulk_create(conn, &block_group_edges);
3276
3277 let insert2_path =
3278 insert_path.new_path_with(conn, 28, 32, &edge_into_insert2, &edge_out_of_insert2);
3279 assert_eq!(
3280 insert2_path.sequence(conn),
3281 "AAAAAAAAAATTTTTTAAAAAAAACCTTTTTTTTGGGGGG"
3282 );
3283
3284 let deletion_end_node_id = intervaltree.query_point(38).next().unwrap().value.node_id;
3285 let deletion_edge = Edge::create(
3286 conn,
3287 insert_node_id,
3288 8,
3289 Strand::Forward,
3290 deletion_end_node_id,
3291 8,
3292 Strand::Forward,
3293 );
3294 let ref_heal_1 = Edge::create(
3295 conn,
3296 insert_node_id,
3297 8,
3298 Strand::Forward,
3299 insert_node_id,
3300 8,
3301 Strand::Forward,
3302 );
3303 let ref_heal_2 = Edge::create(
3304 conn,
3305 deletion_end_node_id,
3306 8,
3307 Strand::Forward,
3308 deletion_end_node_id,
3309 8,
3310 Strand::Forward,
3311 );
3312 let block_group_edges = [
3313 BlockGroupEdgeData {
3314 block_group_id: block_group1_id,
3315 edge_id: deletion_edge.id,
3316 chromosome_index: NO_CHROMOSOME_INDEX,
3317 phased: 0,
3318 },
3319 BlockGroupEdgeData {
3320 block_group_id: block_group1_id,
3321 edge_id: ref_heal_1.id,
3322 chromosome_index: NO_CHROMOSOME_INDEX,
3323 phased: 0,
3324 },
3325 BlockGroupEdgeData {
3326 block_group_id: block_group1_id,
3327 edge_id: ref_heal_2.id,
3328 chromosome_index: NO_CHROMOSOME_INDEX,
3329 phased: 0,
3330 },
3331 ];
3332 BlockGroupEdge::bulk_create(conn, &block_group_edges);
3333
3334 let all_sequences = BlockGroup::get_all_sequences(conn, &block_group1_id, false);
3335 assert_eq!(
3336 all_sequences,
3337 HashSet::from_iter(vec![
3338 "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
3339 "AAAAAAAAAATTTTTTAAAAAAAACCCCCCGGGGGGGGGG".to_string(),
3340 "AAAAAAAAAATTTTTTTTTTCCCCCCTTTTTTTTGGGGGG".to_string(),
3341 "AAAAAAAAAATTTTTTAAAAAAAACCTTTTTTTTGGGGGG".to_string(),
3342 "AAAAAAAAAATTTTTTAAAAAAAAGG".to_string(), ])
3344 );
3345
3346 let mut blocks = intervaltree
3347 .query(Range { start: 15, end: 36 })
3348 .map(|x| x.value)
3349 .collect::<Vec<_>>();
3350 blocks.sort_by(|a, b| a.start.cmp(&b.start));
3351 let start_block = blocks[0];
3352 let start_node_coordinate = 15 - start_block.start + start_block.sequence_start;
3353 let end_block = blocks[blocks.len() - 1];
3354 let end_node_coordinate = 36 - end_block.start + end_block.sequence_start;
3355
3356 let block_group2 = BlockGroup::create(conn, "test", None, "chr1.1");
3357 BlockGroup::derive_subgraph(
3358 conn,
3359 "test",
3360 "test",
3361 &block_group1_id,
3362 &start_block,
3363 &end_block,
3364 start_node_coordinate,
3365 end_node_coordinate,
3366 &block_group2.id,
3367 );
3368 let all_sequences2 = BlockGroup::get_all_sequences(conn, &block_group2.id, false);
3369 assert_eq!(
3370 all_sequences2,
3371 HashSet::from_iter(vec![
3374 "TTTTTCCCCCCCCCCGGGGGG".to_string(),
3375 "TAAAAAAAACCCCCCGGGGGG".to_string(),
3376 "TTTTTCCCCCCTTTTTTTTGG".to_string(),
3377 "TAAAAAAAACCTTTTTTTTGG".to_string(),
3378 ])
3379 );
3380 }
3381 }
3382}