1use std::{
2 collections::{HashMap, HashSet},
3 hash::Hash,
4 rc::Rc,
5};
6
7use gen_core::{
8 HashId, INDETERMINATE_CHROMOSOME_INDEX, NO_CHROMOSOME_INDEX, NodeIntervalBlock,
9 PATH_END_NODE_ID, PATH_START_NODE_ID, PRESERVE_EDIT_SITE_CHROMOSOME_INDEX, PathBlock, Strand,
10 calculate_hash, is_end_node, is_start_node, is_terminal, traits::Capnp,
11};
12use gen_graph::{
13 GenGraph, GraphNode, all_intermediate_edges, all_reachable_nodes, all_simple_paths,
14 flatten_to_interval_tree,
15};
16use intervaltree::IntervalTree;
17use rusqlite::{Row, params, types::Value as SQLValue};
18use serde::{Deserialize, Serialize};
19
20use crate::{
21 accession::{Accession, AccessionEdge, AccessionEdgeData, AccessionPath},
22 block_group_edge::{AugmentedEdgeData, BlockGroupEdge, BlockGroupEdgeData},
23 db::GraphConnection,
24 edge::{Edge, EdgeData, GroupBlock},
25 errors::{ChangeError, QueryError},
26 gen_models_capnp::block_group,
27 node::Node,
28 path::{Path, PathData},
29 path_edge::PathEdge,
30 sample::Sample,
31 traits::*,
32};
33
34#[derive(Clone, Debug, Deserialize, Eq, Hash, Serialize, PartialEq)]
35pub struct BlockGroup {
36 pub id: HashId,
37 pub collection_name: String,
38 pub sample_name: String,
39 pub name: String,
40 pub created_on: i64,
41 pub parent_block_group_id: Option<HashId>,
42 pub is_default: bool,
43}
44
45impl<'a> Capnp<'a> for BlockGroup {
46 type Builder = block_group::Builder<'a>;
47 type Reader = block_group::Reader<'a>;
48
49 fn write_capnp(&self, builder: &mut Self::Builder) {
50 builder.set_id(&self.id.0).unwrap();
51 builder.set_collection_name(&self.collection_name);
52 builder.set_sample_name(&self.sample_name);
53 builder.set_name(&self.name);
54 builder.set_created_on(self.created_on);
55 match &self.parent_block_group_id {
56 None => {
57 builder.reborrow().get_parent_block_group_id().set_none(());
58 }
59 Some(parent_block_group_id) => {
60 builder
61 .reborrow()
62 .get_parent_block_group_id()
63 .set_some(&parent_block_group_id.0)
64 .unwrap();
65 }
66 }
67 builder.set_is_default(self.is_default);
68 }
69
70 fn read_capnp(reader: Self::Reader) -> Self {
71 let id = reader
72 .get_id()
73 .unwrap()
74 .as_slice()
75 .unwrap()
76 .try_into()
77 .unwrap();
78 let collection_name = reader.get_collection_name().unwrap().to_string().unwrap();
79 let sample_name = reader.get_sample_name().unwrap().to_string().unwrap();
80 let name = reader.get_name().unwrap().to_string().unwrap();
81 let created_on = reader.get_created_on();
82 let parent_block_group_id =
83 reader
84 .get_parent_block_group_id()
85 .which()
86 .ok()
87 .and_then(|parent_block_group_id| match parent_block_group_id {
88 block_group::parent_block_group_id::None(()) => None,
89 block_group::parent_block_group_id::Some(parent_block_group_id) => {
90 if let Ok(parent_block_group_id) = parent_block_group_id {
91 if let Some(parent_block_group_id) = parent_block_group_id.as_slice() {
92 parent_block_group_id.try_into().ok()
93 } else {
94 None
95 }
96 } else {
97 None
98 }
99 }
100 });
101 let is_default = reader.get_is_default();
102
103 BlockGroup {
104 id,
105 collection_name,
106 sample_name,
107 name,
108 created_on,
109 parent_block_group_id,
110 is_default,
111 }
112 }
113}
114
115#[derive(Clone, Debug, Eq, Hash, PartialEq)]
116pub struct BlockGroupData<'a> {
117 pub collection_name: &'a str,
118 pub sample_name: &'a str,
119 pub name: String,
120}
121
122#[derive(Clone, Debug, Default)]
123pub struct NewBlockGroup<'a> {
124 pub collection_name: &'a str,
125 pub sample_name: &'a str,
126 pub name: &'a str,
127 pub parent_block_group_id: Option<&'a HashId>,
128 pub is_default: bool,
129}
130
131#[derive(Clone, Debug)]
132pub struct PathChange {
133 pub block_group_id: HashId,
134 pub path: Path,
135 pub path_accession: Option<String>,
136 pub start: i64,
137 pub end: i64,
138 pub block: PathBlock,
139 pub chromosome_index: i64,
140 pub phased: i64,
141 pub preserve_edge: bool,
142}
143
144pub struct PathCache<'a> {
145 pub cache: HashMap<PathData, Path>,
146 pub intervaltree_cache: HashMap<Path, IntervalTree<i64, NodeIntervalBlock>>,
147 pub conn: &'a GraphConnection,
148}
149
150impl<'a> PathCache<'a> {
151 pub fn new(conn: &'a GraphConnection) -> PathCache<'a> {
152 PathCache {
153 cache: HashMap::<PathData, Path>::new(),
154 intervaltree_cache: HashMap::<Path, IntervalTree<i64, NodeIntervalBlock>>::new(),
155 conn,
156 }
157 }
158
159 pub fn lookup(path_cache: &mut PathCache, block_group_id: &HashId, name: String) -> Path {
160 let path_key = PathData {
161 name: name.clone(),
162 block_group_id: *block_group_id,
163 };
164 let path_lookup = path_cache.cache.get(&path_key);
165 if let Some(path) = path_lookup {
166 path.clone()
167 } else {
168 let conn = path_cache.conn;
169 let new_path = Path::query(
170 conn,
171 "select * from paths where block_group_id = ?1 AND name = ?2",
172 params![block_group_id, name],
173 )[0]
174 .clone();
175
176 path_cache.cache.insert(path_key, new_path.clone());
177 let tree = new_path.intervaltree(conn);
178 path_cache.intervaltree_cache.insert(new_path.clone(), tree);
179 new_path
180 }
181 }
182
183 pub fn get_intervaltree<'b>(
184 path_cache: &'b mut PathCache<'_>,
185 path: &Path,
186 ) -> &'b IntervalTree<i64, NodeIntervalBlock> {
187 if !path_cache.intervaltree_cache.contains_key(path) {
188 let tree = path.intervaltree(path_cache.conn);
189 path_cache.intervaltree_cache.insert(path.clone(), tree);
190 }
191
192 path_cache.intervaltree_cache.get(path).unwrap()
193 }
194}
195
196impl BlockGroup {
197 pub fn create(conn: &GraphConnection, new_block_group: NewBlockGroup<'_>) -> BlockGroup {
198 Sample::get_or_create(conn, new_block_group.sample_name);
199 let hash = BlockGroup::get_id(
200 new_block_group.collection_name,
201 new_block_group.sample_name,
202 new_block_group.name,
203 new_block_group.parent_block_group_id,
204 );
205 let timestamp = chrono::Utc::now().timestamp_nanos_opt().unwrap();
206 let query = "INSERT INTO block_groups (
207 id,
208 collection_name,
209 sample_name,
210 name,
211 created_on,
212 parent_block_group_id,
213 is_default
214 ) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7);";
215 let mut stmt = conn.prepare(query).unwrap();
216 let bg = BlockGroup {
217 id: hash,
218 collection_name: new_block_group.collection_name.to_string(),
219 sample_name: new_block_group.sample_name.to_string(),
220 name: new_block_group.name.to_string(),
221 created_on: timestamp,
222 parent_block_group_id: new_block_group.parent_block_group_id.copied(),
223 is_default: new_block_group.is_default,
224 };
225 match stmt.execute(params![
226 hash,
227 new_block_group.collection_name,
228 new_block_group.sample_name,
229 new_block_group.name,
230 timestamp,
231 new_block_group.parent_block_group_id,
232 new_block_group.is_default,
233 ]) {
234 Ok(_) => bg,
235 Err(rusqlite::Error::SqliteFailure(err, _details)) => {
236 if err.code == rusqlite::ErrorCode::ConstraintViolation {
237 bg
238 } else {
239 panic!("something bad happened querying the database")
240 }
241 }
242 Err(_) => {
243 panic!("something bad happened querying the database")
244 }
245 }
246 }
247
248 pub fn delete(conn: &GraphConnection, collection_name: &str, sample_name: &str, name: &str) {
249 let query = "delete from block_groups where collection_name = ?1 and sample_name = ?2 and name = ?3;";
250 conn.execute(
251 query,
252 params![
253 collection_name.to_string(),
254 sample_name.to_string(),
255 name.to_string()
256 ],
257 )
258 .unwrap();
259 }
260
261 pub fn get_by_id(conn: &GraphConnection, id: &HashId) -> BlockGroup {
262 let query = "SELECT * FROM block_groups WHERE id = ?1";
263 let mut stmt = conn.prepare(query).unwrap();
264 match stmt.query_row([id], |row| Ok(Self::process_row(row))) {
265 Ok(res) => res,
266 Err(rusqlite::Error::QueryReturnedNoRows) => panic!("No block group with id {id}"),
267 Err(_) => {
268 panic!("something bad happened querying the database")
269 }
270 }
271 }
272
273 fn copy_paths_and_accessions_into(
274 &self,
275 conn: &GraphConnection,
276 target_block_group_id: &HashId,
277 ) {
278 let existing_paths = Path::query(
279 conn,
280 "SELECT * from paths where block_group_id = ?1;",
281 params![self.id],
282 );
283
284 let mut path_map = HashMap::new();
285 for path in &existing_paths {
286 let edge_ids = PathEdge::edges_for_path(conn, &path.id)
287 .into_iter()
288 .map(|edge| edge.id)
289 .collect::<Vec<_>>();
290 let new_path = Path::create(conn, &path.name, target_block_group_id, &edge_ids);
291 path_map.insert(path.id, new_path.id);
292 }
293
294 for accession in Accession::query(
295 conn,
296 "select * from accessions where path_id IN rarray(?1);",
297 rusqlite::params!(Rc::new(
298 existing_paths
299 .iter()
300 .map(|path| SQLValue::from(path.id))
301 .collect::<Vec<SQLValue>>()
302 )),
303 ) {
304 let edges = AccessionPath::query(
305 conn,
306 "Select * from accession_paths where accession_id = ?1 order by index_in_path ASC;",
307 rusqlite::params!(SQLValue::from(accession.id)),
308 );
309 let new_path_id = path_map.get(&accession.path_id).unwrap();
310 let obj = Accession::get_or_create(
311 conn,
312 &accession.name,
313 new_path_id,
314 accession.parent_accession_id.as_ref(),
315 );
316 AccessionPath::create(
317 conn,
318 &obj.id,
319 &edges.iter().map(|ap| ap.edge_id).collect::<Vec<_>>(),
320 );
321 }
322 }
323
324 pub fn get_or_create_sample_block_groups(
325 conn: &GraphConnection,
326 collection_name: &str,
327 sample_name: &str,
328 group_name: &str,
329 parent_samples: Vec<String>,
330 ) -> Result<Vec<BlockGroup>, QueryError> {
331 let existing_block_groups = BlockGroup::query(
332 conn,
333 "select * from block_groups
334 where collection_name = ?1 AND sample_name = ?2 AND name = ?3
335 order by created_on, id",
336 params![collection_name, sample_name, group_name],
337 );
338
339 if !existing_block_groups.is_empty() {
340 return Ok(existing_block_groups);
341 }
342
343 let parent_block_groups = BlockGroup::find_parent_block_groups(
344 conn,
345 collection_name,
346 sample_name,
347 group_name,
348 &parent_samples,
349 );
350
351 if parent_block_groups.is_empty() {
352 return Ok(vec![BlockGroup::create(
353 conn,
354 NewBlockGroup {
355 collection_name,
356 sample_name,
357 name: group_name,
358 ..Default::default()
359 },
360 )]);
361 }
362
363 let new_block_groups = parent_block_groups
364 .iter()
365 .map(|parent_block_group| {
366 let new_block_group = BlockGroup::create(
367 conn,
368 NewBlockGroup {
369 collection_name,
370 sample_name,
371 name: group_name,
372 parent_block_group_id: Some(&parent_block_group.id),
373 ..Default::default()
374 },
375 );
376 new_block_group.copy_contents_from(conn, parent_block_group);
377 new_block_group
378 })
379 .collect::<Vec<_>>();
380
381 Ok(new_block_groups)
382 }
383
384 pub fn find_parent_block_groups(
385 conn: &GraphConnection,
386 collection_name: &str,
387 sample_name: &str,
388 group_name: &str,
389 parent_samples: &[String],
390 ) -> Vec<BlockGroup> {
391 let parent_samples = if parent_samples.is_empty() {
392 Sample::get_parent_names(conn, sample_name)
393 } else {
394 parent_samples.to_vec()
395 };
396 if parent_samples.is_empty() {
397 return vec![];
398 }
399
400 BlockGroup::query(
401 conn,
402 "select * from block_groups
403 where collection_name = ?1 AND sample_name IN rarray(?2) AND name = ?3
404 order by sample_name, created_on, id",
405 params![
406 collection_name,
407 Rc::new(
408 parent_samples
409 .iter()
410 .cloned()
411 .map(SQLValue::from)
412 .collect::<Vec<_>>()
413 ),
414 group_name
415 ],
416 )
417 }
418
419 fn copy_contents_from(&self, conn: &GraphConnection, source_block_group: &BlockGroup) {
420 let new_block_group_edges =
421 BlockGroupEdge::edges_for_block_group(conn, &source_block_group.id)
422 .into_iter()
423 .map(|edge| BlockGroupEdgeData {
424 block_group_id: self.id,
425 edge_id: edge.edge.id,
426 chromosome_index: edge.chromosome_index,
427 phased: edge.phased,
428 })
429 .collect::<Vec<_>>();
430 BlockGroupEdge::bulk_create(conn, &new_block_group_edges);
431 source_block_group.copy_paths_and_accessions_into(conn, &self.id);
432 }
433
434 pub fn get_id(
435 collection_name: &str,
436 sample_name: &str,
437 group_name: &str,
438 parent_block_group_id: Option<&HashId>,
439 ) -> HashId {
440 let lineage_key = parent_block_group_id
441 .map(ToString::to_string)
442 .unwrap_or_default();
443 HashId(calculate_hash(&format!(
444 "{collection_name}:{sample_name}:{group_name}:{lineage_key}"
445 )))
446 }
447
448 pub fn get_graph(conn: &GraphConnection, block_group_id: &HashId) -> GenGraph {
449 let edges = BlockGroupEdge::edges_for_block_group(conn, block_group_id);
450 let blocks = Edge::blocks_from_edges(conn, &edges);
451 let (graph, _) = Edge::build_graph(&edges, &blocks);
452 graph
453 }
454
455 pub fn prune_graph(graph: &mut GenGraph) {
456 let mut root_nodes = HashSet::new();
459 let mut edges_to_remove: Vec<(GraphNode, GraphNode)> = vec![];
460 for node in graph.nodes() {
461 if node.node_id == PATH_START_NODE_ID {
462 root_nodes.insert(node);
463 }
464 let mut edges_by_ci: HashMap<i64, (GraphNode, GraphNode, i64)> = HashMap::new();
465 for (source_node, target_node, edge_weights) in graph.edges(node) {
466 for edge_weight in edge_weights {
467 if edge_weight.chromosome_index == NO_CHROMOSOME_INDEX {
468 continue;
469 }
470 if edge_weight.chromosome_index == INDETERMINATE_CHROMOSOME_INDEX {
471 continue;
472 }
473 if edge_weight.chromosome_index == PRESERVE_EDIT_SITE_CHROMOSOME_INDEX {
474 edges_to_remove.push((source_node, target_node));
475 continue;
476 }
477 edges_by_ci
478 .entry(edge_weight.chromosome_index)
479 .and_modify(|(source, target, created_on)| {
480 if edge_weight.created_on > *created_on {
481 edges_to_remove.push((*source, *target));
482 *source = source_node;
483 *target = target_node;
484 *created_on = edge_weight.created_on;
485 } else {
486 edges_to_remove.push((source_node, target_node));
487 }
488 })
489 .or_insert((source_node, target_node, edge_weight.created_on));
490 }
491 }
492 }
493
494 for (source, target) in edges_to_remove.iter() {
495 graph.remove_edge(*source, *target);
496 }
497
498 let reachable_nodes = all_reachable_nodes(&*graph, &Vec::from_iter(root_nodes));
499 let mut to_remove = vec![];
500 for node in graph.nodes() {
501 if !reachable_nodes.contains(&node) {
502 to_remove.push(node);
503 }
504 }
505 for node in to_remove {
506 graph.remove_node(node);
507 }
508 }
509
510 pub fn get_all_sequences(
511 conn: &GraphConnection,
512 block_group_id: &HashId,
513 _prune: bool,
514 ) -> HashSet<String> {
515 let edges = BlockGroupEdge::edges_for_block_group(conn, block_group_id)
516 .into_iter()
517 .filter(|edge| edge.chromosome_index != PRESERVE_EDIT_SITE_CHROMOSOME_INDEX)
518 .collect::<Vec<_>>();
519 let blocks = Edge::blocks_from_edges(conn, &edges);
520
521 let (mut graph, _) = Edge::build_graph(&edges, &blocks);
522 BlockGroup::prune_graph(&mut graph);
523
524 let mut start_nodes = vec![];
525 let mut end_nodes = vec![];
526 for node in graph.nodes() {
527 if is_start_node(node.node_id) {
528 start_nodes.push(node);
529 } else if is_end_node(node.node_id) {
530 end_nodes.push(node);
531 }
532 }
533 let blocks_by_node = blocks
534 .iter()
535 .map(|block| {
536 (
537 GraphNode {
538 node_id: block.node_id,
539 sequence_start: block.start,
540 sequence_end: block.end,
541 },
542 block,
543 )
544 })
545 .collect::<HashMap<GraphNode, &GroupBlock>>();
546 let mut sequences = HashSet::<String>::new();
547
548 for start_node in start_nodes {
549 for end_node in &end_nodes {
550 if start_node == *end_node {
552 let block = blocks_by_node.get(&start_node).unwrap();
553 if block.node_id != PATH_START_NODE_ID && block.node_id != PATH_END_NODE_ID {
554 sequences.insert(block.sequence());
555 }
556 } else {
557 for path in all_simple_paths(&graph, start_node, *end_node) {
558 let mut current_sequence = "".to_string();
559 for node in path {
560 let block = blocks_by_node.get(&node).unwrap();
561 let block_sequence = block.sequence();
562 current_sequence.push_str(&block_sequence);
563 }
564 sequences.insert(current_sequence);
565 }
566 }
567 }
568 }
569
570 sequences
571 }
572
573 pub fn add_accession(
574 conn: &GraphConnection,
575 path: &Path,
576 name: &str,
577 start: i64,
578 end: i64,
579 cache: &mut PathCache,
580 ) -> Accession {
581 let tree = PathCache::get_intervaltree(cache, path);
582 let start_blocks: Vec<&NodeIntervalBlock> =
583 tree.query_point(start).map(|x| &x.value).collect();
584 assert_eq!(start_blocks.len(), 1);
585 let start_block = start_blocks[0];
586 let end_blocks: Vec<&NodeIntervalBlock> = tree.query_point(end).map(|x| &x.value).collect();
587 assert_eq!(end_blocks.len(), 1);
588 let end_block = end_blocks[0];
589 let start_edge = AccessionEdgeData {
592 source_node_id: PATH_START_NODE_ID,
593 source_coordinate: -1,
594 source_strand: Strand::Forward,
595 target_node_id: start_block.node_id,
596 target_coordinate: start - start_block.start + start_block.sequence_start,
597 target_strand: Strand::Forward,
598 chromosome_index: 0,
599 };
600 let end_edge = AccessionEdgeData {
601 source_node_id: end_block.node_id,
602 source_coordinate: end - end_block.start + end_block.sequence_start,
603 source_strand: Strand::Forward,
604 target_node_id: PATH_END_NODE_ID,
605 target_coordinate: -1,
606 target_strand: Strand::Forward,
607 chromosome_index: 0,
608 };
609 let accession =
610 Accession::create(conn, name, &path.id, None).expect("Unable to create accession.");
611 let mut path_edges = vec![start_edge];
612 if start_block == end_block {
613 path_edges.push(end_edge);
614 } else {
615 let mut in_range = false;
616 let path_blocks: Vec<&NodeIntervalBlock> = tree
617 .iter_sorted()
618 .map(|x| &x.value)
619 .filter(|block| {
620 if *block == start_block {
621 in_range = true;
622 } else if *block == end_block {
623 in_range = false;
624 return true;
625 }
626 in_range
627 })
628 .collect::<Vec<_>>();
629 for (block, next_block) in path_blocks.iter().zip(path_blocks[1..].iter()) {
631 path_edges.push(AccessionEdgeData {
632 source_node_id: block.node_id,
633 source_coordinate: block.sequence_end,
634 source_strand: block.strand,
635 target_node_id: next_block.node_id,
636 target_coordinate: next_block.sequence_start,
637 target_strand: next_block.strand,
638 chromosome_index: 0,
639 })
640 }
641 path_edges.push(end_edge);
642 }
643 AccessionPath::create(
644 conn,
645 &accession.id,
646 &AccessionEdge::bulk_create(conn, &path_edges),
647 );
648 accession
649 }
650
651 pub fn insert_changes(
652 conn: &GraphConnection,
653 changes: &[PathChange],
654 cache: &mut PathCache,
655 modify_blockgroup: bool,
656 ) -> Result<(), ChangeError> {
657 let mut new_augmented_edges_by_block_group =
658 HashMap::<&HashId, Vec<AugmentedEdgeData>>::new();
659 let mut new_accession_edges = HashMap::new();
660 let mut tree_map = HashMap::new();
661 for change in changes {
662 let tree = if modify_blockgroup {
663 tree_map.entry(change.block_group_id).or_insert_with(|| {
664 BlockGroup::intervaltree_for(conn, &change.block_group_id, true)
665 })
666 } else {
667 PathCache::get_intervaltree(cache, &change.path)
668 };
669 let new_augmented_edges = BlockGroup::set_up_new_edges(change, tree)?;
670 new_augmented_edges_by_block_group
671 .entry(&change.block_group_id)
672 .and_modify(|new_edge_data| new_edge_data.extend(new_augmented_edges.clone()))
673 .or_insert_with(|| new_augmented_edges.clone());
674 if let Some(accession) = &change.path_accession {
675 new_accession_edges
676 .entry((&change.path, accession))
677 .and_modify(|new_edge_data: &mut Vec<AugmentedEdgeData>| {
678 new_edge_data.extend(new_augmented_edges.clone())
679 })
680 .or_insert_with(|| new_augmented_edges.clone());
681 }
682 }
683
684 let mut edge_data_map = HashMap::new();
685
686 for (block_group_id, new_augmented_edges) in new_augmented_edges_by_block_group {
687 let new_edges = new_augmented_edges
688 .iter()
689 .map(|augmented_edge| augmented_edge.edge_data)
690 .collect::<Vec<_>>();
691 let edge_ids = Edge::bulk_create(conn, &new_edges);
692 for (i, edge_data) in new_edges.iter().enumerate() {
693 edge_data_map.insert(*edge_data, edge_ids[i]);
694 }
695 let new_block_group_edges = edge_ids
696 .iter()
697 .enumerate()
698 .map(|(i, edge_id)| BlockGroupEdgeData {
699 block_group_id: *block_group_id,
700 edge_id: *edge_id,
701 chromosome_index: new_augmented_edges[i].chromosome_index,
702 phased: new_augmented_edges[i].phased,
703 })
704 .collect::<Vec<_>>();
705 BlockGroupEdge::bulk_create(conn, &new_block_group_edges);
706 }
707
708 for ((path, accession_name), path_edges) in new_accession_edges {
709 match Accession::get(
710 conn,
711 "select * from accessions where name = ?1 AND path_id = ?2",
712 params![accession_name, path.id],
713 ) {
714 Ok(_) => {
715 println!(
716 "accession already exists, consider a better matching algorithm to determine if this is an error."
717 );
718 }
719 Err(_) => {
720 let acc_edges = AccessionEdge::bulk_create(
721 conn,
722 &path_edges
723 .iter()
724 .map(AccessionEdgeData::from)
725 .collect::<Vec<_>>(),
726 );
727 let acc = Accession::create(conn, accession_name, &path.id, None)
728 .expect("Accession could not be created.");
729 AccessionPath::create(conn, &acc.id, &acc_edges);
730 }
731 }
732 }
733 Ok(())
734 }
735
736 #[allow(clippy::ptr_arg)]
737 #[allow(clippy::needless_late_init)]
738 pub fn insert_change(
739 conn: &GraphConnection,
740 change: &PathChange,
741 tree: &IntervalTree<i64, NodeIntervalBlock>,
742 ) -> Result<(), ChangeError> {
743 let new_augmented_edges = BlockGroup::set_up_new_edges(change, tree)?;
744 let new_edges = new_augmented_edges
745 .iter()
746 .map(|augmented_edge| augmented_edge.edge_data)
747 .collect::<Vec<_>>();
748 let edge_ids = Edge::bulk_create(conn, &new_edges);
749 let new_block_group_edges = edge_ids
750 .iter()
751 .enumerate()
752 .map(|(i, edge_id)| BlockGroupEdgeData {
753 block_group_id: change.block_group_id,
754 edge_id: *edge_id,
755 chromosome_index: new_augmented_edges[i].chromosome_index,
756 phased: new_augmented_edges[i].phased,
757 })
758 .collect::<Vec<_>>();
759 BlockGroupEdge::bulk_create(conn, &new_block_group_edges);
760 Ok(())
761 }
762
763 fn set_up_new_edges(
764 change: &PathChange,
765 tree: &IntervalTree<i64, NodeIntervalBlock>,
766 ) -> Result<Vec<AugmentedEdgeData>, ChangeError> {
767 let start_blocks: Vec<&NodeIntervalBlock> =
768 tree.query_point(change.start).map(|x| &x.value).collect();
769 assert_eq!(start_blocks.len(), 1);
770 let previous_start_blocks: Vec<&NodeIntervalBlock> = tree
773 .query_point(change.start - 1)
774 .map(|x| &x.value)
775 .collect();
776 assert_eq!(previous_start_blocks.len(), 1);
777 let start_block = if start_blocks[0].start == change.start {
778 previous_start_blocks[0]
781 } else {
782 start_blocks[0]
783 };
784
785 if is_start_node(start_block.node_id) && change.start < start_block.end {
792 return Err(ChangeError::OutOfBounds(format!(
793 "Invalid change specified. Coordinate {pos} is before start of path range ({path_pos}).",
794 pos = change.start,
795 path_pos = start_block.end
796 )));
797 }
798 let end_blocks: Vec<&NodeIntervalBlock> =
799 tree.query_point(change.end).map(|x| &x.value).collect();
800 assert_eq!(end_blocks.len(), 1);
801 let end_block = end_blocks[0];
802
803 if is_end_node(end_block.node_id) && change.end > end_block.start {
804 return Err(ChangeError::OutOfBounds(format!(
805 "Invalid change specified. Coordinate {pos} is before start of path range ({path_pos}).",
806 pos = change.end,
807 path_pos = end_block.start
808 )));
809 }
810
811 let mut new_edges = vec![];
812
813 if change.block.sequence_start == change.block.sequence_end {
814 let source_coordinate = change.start - start_block.start + start_block.sequence_start;
816 let target_coordinate = change.end - end_block.start + end_block.sequence_start;
817 let mut aug_edges = vec![];
818 let new_edge = EdgeData {
819 source_node_id: start_block.node_id,
820 source_coordinate,
821 source_strand: Strand::Forward,
822 target_node_id: end_block.node_id,
823 target_coordinate,
824 target_strand: Strand::Forward,
825 };
826 aug_edges.push(AugmentedEdgeData {
827 edge_data: new_edge,
828 chromosome_index: change.chromosome_index,
829 phased: change.phased,
830 });
831
832 if change.start == 0 {
836 let target_coordinate = change.end - end_block.start + end_block.sequence_start;
837 let new_beginning_edge = EdgeData {
838 source_node_id: PATH_START_NODE_ID,
839 source_coordinate: 0,
840 source_strand: Strand::Forward,
841 target_node_id: end_block.node_id,
842 target_coordinate,
843 target_strand: Strand::Forward,
844 };
845 aug_edges.push(AugmentedEdgeData {
846 edge_data: new_beginning_edge,
847 chromosome_index: change.chromosome_index,
848 phased: change.phased,
849 });
850 if !is_terminal(end_block.node_id) {
851 new_edges.push(AugmentedEdgeData {
852 edge_data: EdgeData {
853 source_node_id: end_block.node_id,
854 source_coordinate: target_coordinate,
855 source_strand: Strand::Forward,
856 target_node_id: end_block.node_id,
857 target_coordinate,
858 target_strand: Strand::Forward,
859 },
860 chromosome_index: if change.preserve_edge {
861 0
862 } else {
863 PRESERVE_EDIT_SITE_CHROMOSOME_INDEX
864 },
865 phased: 0,
866 });
867 }
868 } else {
869 if !is_terminal(start_block.node_id) {
870 new_edges.push(AugmentedEdgeData {
871 edge_data: EdgeData {
872 source_node_id: start_block.node_id,
873 source_coordinate,
874 source_strand: Strand::Forward,
875 target_node_id: start_block.node_id,
876 target_coordinate: source_coordinate,
877 target_strand: Strand::Forward,
878 },
879 chromosome_index: if change.preserve_edge {
880 0
881 } else {
882 PRESERVE_EDIT_SITE_CHROMOSOME_INDEX
883 },
884 phased: 0,
885 });
886 };
887 if !is_terminal(end_block.node_id) {
888 new_edges.push(AugmentedEdgeData {
889 edge_data: EdgeData {
890 source_node_id: end_block.node_id,
891 source_coordinate: target_coordinate,
892 source_strand: Strand::Forward,
893 target_node_id: end_block.node_id,
894 target_coordinate,
895 target_strand: Strand::Forward,
896 },
897 chromosome_index: if change.preserve_edge {
898 0
899 } else {
900 PRESERVE_EDIT_SITE_CHROMOSOME_INDEX
901 },
902 phased: 0,
903 });
904 }
905 }
906 new_edges.extend(aug_edges);
907 } else {
911 let insertion_start_coordinate =
913 change.start - start_block.start + start_block.sequence_start;
914 let new_start_edge = EdgeData {
915 source_node_id: start_block.node_id,
916 source_coordinate: insertion_start_coordinate,
917 source_strand: Strand::Forward,
918 target_node_id: change.block.node_id,
919 target_coordinate: change.block.sequence_start,
920 target_strand: Strand::Forward,
921 };
922 let new_augmented_start_edge = AugmentedEdgeData {
923 edge_data: new_start_edge,
924 chromosome_index: change.chromosome_index,
925 phased: change.phased,
926 };
927 let insertion_end_coordinate = change.end - end_block.start + end_block.sequence_start;
928 let new_end_edge = EdgeData {
929 source_node_id: change.block.node_id,
930 source_coordinate: change.block.sequence_end,
931 source_strand: Strand::Forward,
932 target_node_id: end_block.node_id,
933 target_coordinate: insertion_end_coordinate,
934 target_strand: Strand::Forward,
935 };
936 let new_augmented_end_edge = AugmentedEdgeData {
937 edge_data: new_end_edge,
938 chromosome_index: change.chromosome_index,
939 phased: change.phased,
940 };
941
942 if change.start == 0 {
943 new_edges.push(AugmentedEdgeData {
944 edge_data: EdgeData {
945 source_node_id: PATH_START_NODE_ID,
946 source_coordinate: 0,
947 source_strand: Strand::Forward,
948 target_node_id: change.block.node_id,
949 target_coordinate: change.block.sequence_start,
950 target_strand: Strand::Forward,
951 },
952 chromosome_index: change.chromosome_index,
953 phased: 0,
954 });
955 }
956
957 if !is_terminal(start_block.node_id) {
958 new_edges.push(AugmentedEdgeData {
959 edge_data: EdgeData {
960 source_node_id: start_block.node_id,
961 source_coordinate: insertion_start_coordinate,
962 source_strand: Strand::Forward,
963 target_node_id: start_block.node_id,
964 target_coordinate: insertion_start_coordinate,
965 target_strand: Strand::Forward,
966 },
967 chromosome_index: if change.preserve_edge {
968 0
969 } else {
970 PRESERVE_EDIT_SITE_CHROMOSOME_INDEX
971 },
972 phased: 0,
973 });
974 }
975 if !is_terminal(end_block.node_id) {
976 new_edges.push(AugmentedEdgeData {
977 edge_data: EdgeData {
978 source_node_id: end_block.node_id,
979 source_coordinate: insertion_end_coordinate,
980 source_strand: Strand::Forward,
981 target_node_id: end_block.node_id,
982 target_coordinate: insertion_end_coordinate,
983 target_strand: Strand::Forward,
984 },
985 chromosome_index: if change.preserve_edge {
986 0
987 } else {
988 PRESERVE_EDIT_SITE_CHROMOSOME_INDEX
989 },
990 phased: 0,
991 });
992 }
993
994 new_edges.push(new_augmented_start_edge);
995 new_edges.push(new_augmented_end_edge);
996 }
997
998 Ok(new_edges)
999 }
1000
1001 pub fn intervaltree_for(
1002 conn: &GraphConnection,
1003 block_group_id: &HashId,
1004 remove_ambiguous_positions: bool,
1005 ) -> IntervalTree<i64, NodeIntervalBlock> {
1006 let mut graph = BlockGroup::get_graph(conn, block_group_id);
1008 BlockGroup::prune_graph(&mut graph);
1009 flatten_to_interval_tree(&graph, remove_ambiguous_positions)
1010 }
1011
1012 pub fn get_current_path(conn: &GraphConnection, block_group_id: &HashId) -> Path {
1013 let paths = Path::query(
1014 conn,
1015 "SELECT * FROM paths WHERE block_group_id = ?1 ORDER BY created_on DESC",
1016 params![block_group_id],
1017 );
1018 paths[0].clone()
1019 }
1020
1021 pub fn get_path_by_name(
1022 conn: &GraphConnection,
1023 block_group_id: &HashId,
1024 path_name: &str,
1025 ) -> Option<Path> {
1026 let paths = Path::query(
1027 conn,
1028 "SELECT * FROM paths WHERE block_group_id = ?1 ORDER BY created_on DESC",
1029 params![block_group_id],
1030 );
1031
1032 for path in &paths {
1033 if path.name == path_name {
1034 return Some(path.clone());
1035 }
1036 }
1037
1038 None
1039 }
1040
1041 #[allow(clippy::too_many_arguments)]
1042 pub fn derive_subgraph(
1043 conn: &GraphConnection,
1044 collection_name: &str,
1045 child_sample_name: &str,
1046 source_block_group_id: &HashId,
1047 start_block: &NodeIntervalBlock,
1048 end_block: &NodeIntervalBlock,
1049 start_node_coordinate: i64,
1050 end_node_coordinate: i64,
1051 target_block_group_id: &HashId,
1052 create_terminal_edges: bool,
1053 ) -> HashMap<HashId, HashId> {
1054 let current_graph = BlockGroup::get_graph(conn, source_block_group_id);
1055 let start_node = current_graph
1056 .nodes()
1057 .find(|node| {
1058 node.node_id == start_block.node_id
1059 && node.sequence_start <= start_node_coordinate
1060 && node.sequence_end >= start_node_coordinate
1061 })
1062 .unwrap();
1063 let end_node = current_graph
1064 .nodes()
1065 .find(|node| {
1066 node.node_id == end_block.node_id
1067 && node.sequence_start <= end_node_coordinate
1068 && node.sequence_end >= end_node_coordinate
1069 })
1070 .unwrap();
1071 let subgraph_edges = all_intermediate_edges(¤t_graph, start_node, end_node);
1072
1073 let subgraph_edge_ids = subgraph_edges
1075 .iter()
1076 .map(|(_to, _from, edge_info)| edge_info[0].edge_id)
1077 .collect::<Vec<_>>();
1078 let source_edges = Edge::query_by_ids(conn, &subgraph_edge_ids);
1079
1080 let mut old_node_ids = HashSet::new();
1087 for source_edge in &source_edges {
1088 old_node_ids.insert(source_edge.source_node_id);
1089 old_node_ids.insert(source_edge.target_node_id);
1090 }
1091
1092 old_node_ids.insert(start_block.node_id);
1093 old_node_ids.insert(end_block.node_id);
1094
1095 let old_nodes = Node::query_by_ids(conn, &old_node_ids);
1096 let old_nodes_by_id = old_nodes
1097 .iter()
1098 .map(|node| (node.id, node))
1099 .collect::<HashMap<_, _>>();
1100
1101 let mut new_node_ids_by_old = HashMap::new();
1102 let old_start_node = old_nodes_by_id.get(&start_block.node_id).unwrap();
1103 let new_start_node_hash = HashId(calculate_hash(&format!(
1104 "{}.{}.{}.bg-{}",
1105 collection_name, child_sample_name, old_start_node.id, target_block_group_id
1106 )));
1107 new_node_ids_by_old.insert(
1108 old_start_node.id,
1109 Node::create(conn, &old_start_node.sequence_hash, &new_start_node_hash),
1110 );
1111 let old_end_node = old_nodes_by_id.get(&end_block.node_id).unwrap();
1112 let new_end_node_hash = HashId(calculate_hash(&format!(
1113 "{}.{}.{}.bg-{}",
1114 collection_name, child_sample_name, old_end_node.id, target_block_group_id
1115 )));
1116 new_node_ids_by_old.insert(
1117 old_end_node.id,
1118 Node::create(conn, &old_end_node.sequence_hash, &new_end_node_hash),
1119 );
1120
1121 let mut new_edges = vec![];
1122 for source_edge in &source_edges {
1123 let new_source_node_id = *new_node_ids_by_old
1124 .entry(source_edge.source_node_id)
1125 .or_insert_with(|| {
1126 let old_source_node = old_nodes_by_id.get(&source_edge.source_node_id).unwrap();
1127 let new_source_node_hash = HashId(calculate_hash(&format!(
1128 "{}.{}.{}.bg-{}",
1129 collection_name,
1130 child_sample_name,
1131 old_source_node.id,
1132 target_block_group_id
1133 )));
1134 Node::create(conn, &old_source_node.sequence_hash, &new_source_node_hash)
1135 });
1136 let new_target_node_id = *new_node_ids_by_old
1137 .entry(source_edge.target_node_id)
1138 .or_insert_with(|| {
1139 let old_target_node = old_nodes_by_id.get(&source_edge.target_node_id).unwrap();
1140 let new_target_node_hash = HashId(calculate_hash(&format!(
1141 "{}.{}.{}.bg-{}",
1142 collection_name,
1143 child_sample_name,
1144 old_target_node.id,
1145 target_block_group_id
1146 )));
1147 Node::create(conn, &old_target_node.sequence_hash, &new_target_node_hash)
1148 });
1149
1150 new_edges.push(EdgeData {
1151 source_node_id: new_source_node_id,
1152 source_coordinate: source_edge.source_coordinate,
1153 source_strand: source_edge.source_strand,
1154 target_node_id: new_target_node_id,
1155 target_coordinate: source_edge.target_coordinate,
1156 target_strand: source_edge.target_strand,
1157 });
1158 }
1159
1160 let new_edge_ids = Edge::bulk_create(conn, &new_edges);
1161 let new_edge_ids_by_old = source_edges
1162 .iter()
1163 .zip(new_edge_ids.iter())
1164 .map(|(source_edge, new_edge_id)| (&source_edge.id, new_edge_id))
1165 .collect::<HashMap<_, _>>();
1166
1167 let source_block_group_edges = BlockGroupEdge::specific_edges_for_block_group(
1168 conn,
1169 source_block_group_id,
1170 &subgraph_edge_ids,
1171 );
1172 let source_edge_ids = source_edges
1173 .iter()
1174 .map(|edge| &edge.id)
1175 .collect::<HashSet<_>>();
1176 let source_block_group_edges = source_block_group_edges
1177 .iter()
1178 .filter(|block_group_edge| source_edge_ids.contains(&block_group_edge.edge.id))
1179 .collect::<Vec<_>>();
1180 let source_block_group_edges_by_edge_id = source_block_group_edges
1181 .iter()
1182 .map(|block_group_edge| (&block_group_edge.edge.id, block_group_edge))
1183 .collect::<HashMap<_, _>>();
1184
1185 let subgraph_edge_inputs = source_block_group_edges
1186 .iter()
1187 .map(|edge| {
1188 let block_group_edge = source_block_group_edges_by_edge_id
1189 .get(&edge.edge.id)
1190 .unwrap();
1191 let new_edge_id = new_edge_ids_by_old.get(&edge.edge.id).unwrap();
1192 BlockGroupEdgeData {
1193 block_group_id: *target_block_group_id,
1194 edge_id: **new_edge_id,
1195 chromosome_index: block_group_edge.chromosome_index,
1196 phased: block_group_edge.phased,
1197 }
1198 })
1199 .collect::<Vec<_>>();
1200
1201 let mut all_edges = subgraph_edge_inputs.clone();
1202
1203 if create_terminal_edges {
1204 let new_start_node_id = new_node_ids_by_old.get(&start_block.node_id).unwrap();
1205 let new_start_edge = Edge::create(
1206 conn,
1207 PATH_START_NODE_ID,
1208 0,
1209 Strand::Forward,
1210 *new_start_node_id,
1211 start_node_coordinate,
1212 start_block.strand,
1213 );
1214 let new_start_edge_data = BlockGroupEdgeData {
1215 block_group_id: *target_block_group_id,
1216 edge_id: new_start_edge.id,
1217 chromosome_index: 0,
1218 phased: 0,
1219 };
1220 let new_end_node_id = new_node_ids_by_old.get(&end_block.node_id).unwrap();
1221 let new_end_edge = Edge::create(
1222 conn,
1223 *new_end_node_id,
1224 end_node_coordinate,
1225 end_block.strand,
1226 PATH_END_NODE_ID,
1227 0,
1228 Strand::Forward,
1229 );
1230 let new_end_edge_data = BlockGroupEdgeData {
1231 block_group_id: *target_block_group_id,
1232 edge_id: new_end_edge.id,
1233 chromosome_index: 0,
1234 phased: 0,
1235 };
1236
1237 all_edges.push(new_start_edge_data);
1238 all_edges.push(new_end_edge_data);
1239 }
1240
1241 BlockGroupEdge::bulk_create(conn, &all_edges);
1242
1243 new_node_ids_by_old
1244 }
1245}
1246
1247impl Query for BlockGroup {
1248 type Model = BlockGroup;
1249
1250 const TABLE_NAME: &'static str = "block_groups";
1251
1252 fn process_row(row: &Row) -> Self::Model {
1253 BlockGroup {
1254 id: row.get(0).unwrap(),
1255 collection_name: row.get(1).unwrap(),
1256 sample_name: row.get(2).unwrap(),
1257 name: row.get(3).unwrap(),
1258 created_on: row.get(4).unwrap(),
1259 parent_block_group_id: row.get(5).unwrap(),
1260 is_default: row.get(6).unwrap(),
1261 }
1262 }
1263}
1264
1265#[cfg(test)]
1266mod tests {
1267 use core::ops::Range;
1268 use std::collections::HashSet;
1269
1270 use capnp::message::TypedBuilder;
1271 use chrono::Utc;
1272 use gen_core::NO_CHROMOSOME_INDEX;
1273
1274 use super::*;
1275 use crate::{
1276 collection::Collection,
1277 node::Node,
1278 sample::Sample,
1279 sequence::Sequence,
1280 test_helpers::{create_bg, get_connection, interval_tree_verify, setup_block_group},
1281 };
1282
1283 fn get_single_bg_id(
1284 conn: &GraphConnection,
1285 collection_name: &str,
1286 sample_name: &str,
1287 group_name: &str,
1288 parent_samples: Vec<String>,
1289 ) -> HashId {
1290 BlockGroup::get_or_create_sample_block_groups(
1291 conn,
1292 collection_name,
1293 sample_name,
1294 group_name,
1295 parent_samples,
1296 )
1297 .unwrap()
1298 .into_iter()
1299 .next()
1300 .unwrap()
1301 .id
1302 }
1303
1304 #[test]
1305 fn test_capnp_serialization() {
1306 let block_group = BlockGroup {
1307 id: HashId::pad_str(42),
1308 collection_name: "test_collection".to_string(),
1309 sample_name: "test_sample".to_string(),
1310 name: "test_block_group".to_string(),
1311 created_on: Utc::now().timestamp_nanos_opt().unwrap(),
1312 parent_block_group_id: None,
1313 is_default: false,
1314 };
1315
1316 let mut message = TypedBuilder::<block_group::Owned>::new_default();
1317 let mut root = message.init_root();
1318 block_group.write_capnp(&mut root);
1319
1320 let deserialized = BlockGroup::read_capnp(root.into_reader());
1321 assert_eq!(block_group, deserialized);
1322 }
1323
1324 #[test]
1325 fn test_capnp_deserialization_defaults_missing_parent_to_none() {
1326 let created_on = Utc::now().timestamp_nanos_opt().unwrap();
1327
1328 let mut message = TypedBuilder::<block_group::Owned>::new_default();
1329 let mut root = message.init_root();
1330 root.set_id(&HashId::pad_str(42).0).unwrap();
1331 root.set_collection_name("test_collection");
1332 root.set_sample_name("test_sample");
1333 root.set_name("test_block_group");
1334 root.set_created_on(created_on);
1335
1336 let deserialized = BlockGroup::read_capnp(root.into_reader());
1337 assert_eq!(deserialized.parent_block_group_id, None);
1338 assert!(!deserialized.is_default);
1339 }
1340
1341 #[test]
1342 fn test_blockgroup_create() {
1343 let conn = &get_connection(None).unwrap();
1344 Collection::create(conn, "test");
1345 Sample::get_or_create(conn, Sample::DEFAULT_NAME);
1346 let bg1 = create_bg(conn, "test", Sample::DEFAULT_NAME, "hg19");
1347 assert_eq!(bg1.collection_name, "test");
1348 assert_eq!(bg1.name, "hg19");
1349 Sample::get_or_create(conn, "sample");
1350 let bg2 = create_bg(conn, "test", "sample", "hg19");
1351 assert_eq!(bg2.collection_name, "test");
1352 assert_eq!(bg2.name, "hg19");
1353 assert_eq!(bg2.sample_name, "sample".to_string());
1354 assert_ne!(&bg1.id, &bg2.id);
1355 }
1356
1357 #[test]
1358 fn test_blockgroup_delete() {
1359 let conn = &get_connection(None).unwrap();
1360 Collection::create(conn, "test");
1361 Sample::get_or_create(conn, "sample1");
1362 Sample::get_or_create(conn, "sample2");
1363 let bg1 = create_bg(conn, "test", "sample1", "hg19");
1364 let bg2 = create_bg(conn, "test", "sample2", "hg19");
1365
1366 BlockGroup::delete(conn, "test", &bg1.sample_name, &bg1.name);
1367
1368 let bgs = BlockGroup::all(conn);
1369 assert_eq!(bgs.len(), 1);
1370 assert_eq!(bgs[0], bg2);
1371 }
1372
1373 #[test]
1374 fn test_blockgroup_clone() {
1375 let conn = &get_connection(None).unwrap();
1376 Collection::create(conn, "test");
1377 Sample::get_or_create(conn, Sample::DEFAULT_NAME);
1378 let bg1 = create_bg(conn, "test", Sample::DEFAULT_NAME, "hg19");
1379 assert_eq!(bg1.collection_name, "test");
1380 assert_eq!(bg1.name, "hg19");
1381 Sample::get_or_create(conn, "sample");
1382 let bg2 = get_single_bg_id(
1383 conn,
1384 "test",
1385 "sample",
1386 "hg19",
1387 vec![Sample::DEFAULT_NAME.to_string()],
1388 );
1389 assert_eq!(
1390 BlockGroupEdge::edges_for_block_group(conn, &bg1.id),
1391 BlockGroupEdge::edges_for_block_group(conn, &bg2)
1392 );
1393 }
1394
1395 #[test]
1396 fn test_blockgroup_copies_immediate_parent_block_groups() {
1397 let conn = &get_connection(None).unwrap();
1398 Collection::create(conn, "test");
1399 Sample::get_or_create(conn, "parent_a");
1400 Sample::get_or_create(conn, "parent_b");
1401 Sample::get_or_create(conn, "child");
1402
1403 let parent_a_bg = create_bg(conn, "test", "parent_a", "chr1");
1404 let parent_b_bg = create_bg(conn, "test", "parent_b", "chr1");
1405
1406 let seq_a = Sequence::new()
1407 .sequence_type("DNA")
1408 .sequence("AAAA")
1409 .save(conn);
1410 let seq_b = Sequence::new()
1411 .sequence_type("DNA")
1412 .sequence("CCCC")
1413 .save(conn);
1414 let node_a = Node::create(conn, &seq_a.hash, &HashId::convert_str("merge-parent-a"));
1415 let node_b = Node::create(conn, &seq_b.hash, &HashId::convert_str("merge-parent-b"));
1416
1417 let parent_a_edges = [
1418 Edge::create(
1419 conn,
1420 PATH_START_NODE_ID,
1421 0,
1422 Strand::Forward,
1423 node_a,
1424 0,
1425 Strand::Forward,
1426 ),
1427 Edge::create(
1428 conn,
1429 node_a,
1430 4,
1431 Strand::Forward,
1432 PATH_END_NODE_ID,
1433 0,
1434 Strand::Forward,
1435 ),
1436 ];
1437 let parent_b_edges = [
1438 Edge::create(
1439 conn,
1440 PATH_START_NODE_ID,
1441 0,
1442 Strand::Forward,
1443 node_b,
1444 0,
1445 Strand::Forward,
1446 ),
1447 Edge::create(
1448 conn,
1449 node_b,
1450 4,
1451 Strand::Forward,
1452 PATH_END_NODE_ID,
1453 0,
1454 Strand::Forward,
1455 ),
1456 ];
1457
1458 BlockGroupEdge::bulk_create(
1459 conn,
1460 &parent_a_edges
1461 .iter()
1462 .map(|edge| BlockGroupEdgeData {
1463 block_group_id: parent_a_bg.id,
1464 edge_id: edge.id,
1465 chromosome_index: 0,
1466 phased: 0,
1467 })
1468 .collect::<Vec<_>>(),
1469 );
1470 BlockGroupEdge::bulk_create(
1471 conn,
1472 &parent_b_edges
1473 .iter()
1474 .map(|edge| BlockGroupEdgeData {
1475 block_group_id: parent_b_bg.id,
1476 edge_id: edge.id,
1477 chromosome_index: 0,
1478 phased: 0,
1479 })
1480 .collect::<Vec<_>>(),
1481 );
1482
1483 let child_block_groups = BlockGroup::get_or_create_sample_block_groups(
1484 conn,
1485 "test",
1486 "child",
1487 "chr1",
1488 vec!["parent_a".to_string(), "parent_b".to_string()],
1489 )
1490 .unwrap();
1491 assert_eq!(child_block_groups.len(), 2);
1492
1493 let child_by_parent = child_block_groups
1494 .iter()
1495 .map(|block_group| (block_group.parent_block_group_id.unwrap(), block_group))
1496 .collect::<HashMap<_, _>>();
1497
1498 let child_a = child_by_parent.get(&parent_a_bg.id).unwrap();
1499 let child_b = child_by_parent.get(&parent_b_bg.id).unwrap();
1500
1501 let child_a_edges = BlockGroupEdge::query(
1502 conn,
1503 "select * from block_group_edges where block_group_id = ?1",
1504 params![child_a.id],
1505 );
1506 let child_b_edges = BlockGroupEdge::query(
1507 conn,
1508 "select * from block_group_edges where block_group_id = ?1",
1509 params![child_b.id],
1510 );
1511 assert_eq!(
1512 child_a_edges
1513 .iter()
1514 .map(|edge| edge.edge_id)
1515 .collect::<HashSet<_>>(),
1516 parent_a_edges
1517 .iter()
1518 .map(|edge| edge.id)
1519 .collect::<HashSet<_>>()
1520 );
1521 assert_eq!(
1522 child_b_edges
1523 .iter()
1524 .map(|edge| edge.edge_id)
1525 .collect::<HashSet<_>>(),
1526 parent_b_edges
1527 .iter()
1528 .map(|edge| edge.id)
1529 .collect::<HashSet<_>>()
1530 );
1531
1532 assert_eq!(
1533 BlockGroup::get_all_sequences(conn, &child_a.id, false),
1534 HashSet::from_iter(vec!["AAAA".to_string()])
1535 );
1536 assert_eq!(
1537 BlockGroup::get_all_sequences(conn, &child_b.id, false),
1538 HashSet::from_iter(vec!["CCCC".to_string()])
1539 );
1540 assert_eq!(
1541 Sample::get_all_sequences(conn, "test", "child", false),
1542 HashSet::from_iter(vec!["AAAA".to_string(), "CCCC".to_string()])
1543 );
1544 }
1545
1546 #[test]
1547 fn test_get_or_create_sample_block_groups_creates_root_block_group_if_no_parents() {
1548 let conn = &get_connection(None).unwrap();
1549 Collection::create(conn, "test");
1550 Sample::get_or_create(conn, "root_sample");
1551
1552 let block_groups = BlockGroup::get_or_create_sample_block_groups(
1553 conn,
1554 "test",
1555 "root_sample",
1556 "chr1",
1557 vec![],
1558 )
1559 .unwrap();
1560
1561 assert_eq!(block_groups.len(), 1);
1562 assert_eq!(block_groups[0].collection_name, "test");
1563 assert_eq!(block_groups[0].sample_name, "root_sample");
1564 assert_eq!(block_groups[0].name, "chr1");
1565 assert!(block_groups[0].parent_block_group_id.is_none());
1566 }
1567
1568 #[test]
1569 fn test_get_or_create_sample_block_groups_seeds_from_parents_without_block_groups() {
1570 let conn = &get_connection(None).unwrap();
1571 Collection::create(conn, "test");
1572 Sample::get_or_create(conn, "parent_sample");
1573 Sample::get_or_create(conn, "child_sample");
1574
1575 let block_groups = BlockGroup::get_or_create_sample_block_groups(
1576 conn,
1577 "test",
1578 "child_sample",
1579 "chr1",
1580 vec!["parent_sample".to_string()],
1581 )
1582 .unwrap();
1583
1584 assert_eq!(block_groups.len(), 1);
1585 assert_eq!(block_groups[0].collection_name, "test");
1586 assert_eq!(block_groups[0].sample_name, "child_sample");
1587 assert_eq!(block_groups[0].name, "chr1");
1588 assert!(block_groups[0].parent_block_group_id.is_none());
1589 assert!(BlockGroupEdge::edges_for_block_group(conn, &block_groups[0].id).is_empty());
1590 }
1591
1592 #[test]
1593 fn test_blockgroup_merge_from_multiple_parents_preserves_paths_and_accessions() {
1594 let conn = &get_connection(None).unwrap();
1595 Collection::create(conn, "test");
1596 Sample::get_or_create(conn, "parent_a");
1597 Sample::get_or_create(conn, "parent_b");
1598 Sample::get_or_create(conn, "child");
1599
1600 let parent_a_bg = create_bg(conn, "test", "parent_a", "chr1");
1601 let parent_b_bg = create_bg(conn, "test", "parent_b", "chr1");
1602
1603 let seq_a = Sequence::new()
1604 .sequence_type("DNA")
1605 .sequence("AAAA")
1606 .save(conn);
1607 let seq_b = Sequence::new()
1608 .sequence_type("DNA")
1609 .sequence("CCCC")
1610 .save(conn);
1611 let node_a = Node::create(conn, &seq_a.hash, &HashId::convert_str("metadata-parent-a"));
1612 let node_b = Node::create(conn, &seq_b.hash, &HashId::convert_str("metadata-parent-b"));
1613
1614 let parent_a_edges = [
1615 Edge::create(
1616 conn,
1617 PATH_START_NODE_ID,
1618 0,
1619 Strand::Forward,
1620 node_a,
1621 0,
1622 Strand::Forward,
1623 ),
1624 Edge::create(
1625 conn,
1626 node_a,
1627 4,
1628 Strand::Forward,
1629 PATH_END_NODE_ID,
1630 0,
1631 Strand::Forward,
1632 ),
1633 ];
1634 let parent_b_edges = [
1635 Edge::create(
1636 conn,
1637 PATH_START_NODE_ID,
1638 0,
1639 Strand::Forward,
1640 node_b,
1641 0,
1642 Strand::Forward,
1643 ),
1644 Edge::create(
1645 conn,
1646 node_b,
1647 4,
1648 Strand::Forward,
1649 PATH_END_NODE_ID,
1650 0,
1651 Strand::Forward,
1652 ),
1653 ];
1654
1655 BlockGroupEdge::bulk_create(
1656 conn,
1657 &parent_a_edges
1658 .iter()
1659 .map(|edge| BlockGroupEdgeData {
1660 block_group_id: parent_a_bg.id,
1661 edge_id: edge.id,
1662 chromosome_index: 0,
1663 phased: 0,
1664 })
1665 .collect::<Vec<_>>(),
1666 );
1667 BlockGroupEdge::bulk_create(
1668 conn,
1669 &parent_b_edges
1670 .iter()
1671 .map(|edge| BlockGroupEdgeData {
1672 block_group_id: parent_b_bg.id,
1673 edge_id: edge.id,
1674 chromosome_index: 0,
1675 phased: 0,
1676 })
1677 .collect::<Vec<_>>(),
1678 );
1679
1680 let parent_a_path = Path::create(
1681 conn,
1682 "chr1",
1683 &parent_a_bg.id,
1684 &parent_a_edges
1685 .iter()
1686 .map(|edge| edge.id)
1687 .collect::<Vec<_>>(),
1688 );
1689 let parent_b_path = Path::create(
1690 conn,
1691 "chr1",
1692 &parent_b_bg.id,
1693 &parent_b_edges
1694 .iter()
1695 .map(|edge| edge.id)
1696 .collect::<Vec<_>>(),
1697 );
1698 let parent_b_alt_path = Path::create(
1699 conn,
1700 "chr1-alt",
1701 &parent_b_bg.id,
1702 &parent_b_edges
1703 .iter()
1704 .map(|edge| edge.id)
1705 .collect::<Vec<_>>(),
1706 );
1707
1708 let mut path_cache = PathCache::new(conn);
1709 let parent_a_path_len = parent_a_path.length(conn);
1710 let parent_b_path_len = parent_b_path.length(conn);
1711 let parent_b_alt_path_len = parent_b_alt_path.length(conn);
1712 BlockGroup::add_accession(
1713 conn,
1714 &parent_a_path,
1715 "parent-a-acc",
1716 0,
1717 parent_a_path_len,
1718 &mut path_cache,
1719 );
1720 BlockGroup::add_accession(
1721 conn,
1722 &parent_b_path,
1723 "parent-b-acc",
1724 0,
1725 parent_b_path_len,
1726 &mut path_cache,
1727 );
1728 BlockGroup::add_accession(
1729 conn,
1730 &parent_b_alt_path,
1731 "parent-b-alt-acc",
1732 0,
1733 parent_b_alt_path_len,
1734 &mut path_cache,
1735 );
1736
1737 let child_block_groups = BlockGroup::get_or_create_sample_block_groups(
1738 conn,
1739 "test",
1740 "child",
1741 "chr1",
1742 vec!["parent_a".to_string(), "parent_b".to_string()],
1743 )
1744 .unwrap();
1745 let child_by_parent = child_block_groups
1746 .iter()
1747 .map(|block_group| (block_group.parent_block_group_id.unwrap(), block_group))
1748 .collect::<HashMap<_, _>>();
1749 let child_a = child_by_parent.get(&parent_a_bg.id).unwrap();
1750 let child_b = child_by_parent.get(&parent_b_bg.id).unwrap();
1751
1752 let child_a_paths = Path::query(
1753 conn,
1754 "select * from paths where block_group_id = ?1 order by name",
1755 params![child_a.id],
1756 );
1757 assert_eq!(
1758 child_a_paths
1759 .iter()
1760 .map(|path| path.name.as_str())
1761 .collect::<Vec<_>>(),
1762 vec!["chr1"]
1763 );
1764
1765 let child_b_paths = Path::query(
1766 conn,
1767 "select * from paths where block_group_id = ?1 order by name",
1768 params![child_b.id],
1769 );
1770 assert_eq!(
1771 child_b_paths
1772 .iter()
1773 .map(|path| path.name.as_str())
1774 .collect::<Vec<_>>(),
1775 vec!["chr1", "chr1-alt"]
1776 );
1777
1778 let child_a_accessions = Accession::query(
1779 conn,
1780 "select accessions.* from accessions join paths on accessions.path_id = paths.id where paths.block_group_id = ?1 order by accessions.name",
1781 params![child_a.id],
1782 );
1783 assert_eq!(
1784 child_a_accessions
1785 .iter()
1786 .map(|accession| accession.name.as_str())
1787 .collect::<Vec<_>>(),
1788 vec!["parent-a-acc"]
1789 );
1790
1791 let child_b_accessions = Accession::query(
1792 conn,
1793 "select accessions.* from accessions join paths on accessions.path_id = paths.id where paths.block_group_id = ?1 order by accessions.name",
1794 params![child_b.id],
1795 );
1796 assert_eq!(
1797 child_b_accessions
1798 .iter()
1799 .map(|accession| accession.name.as_str())
1800 .collect::<Vec<_>>(),
1801 vec!["parent-b-acc", "parent-b-alt-acc"]
1802 );
1803 }
1804
1805 #[test]
1806 fn test_blockgroup_clone_passes_accessions() {
1807 let conn = &get_connection(None).unwrap();
1808 let (_bg_1, path) = setup_block_group(conn);
1809 let mut path_cache = PathCache::new(conn);
1810 let acc_1 = BlockGroup::add_accession(conn, &path, "test", 3, 7, &mut path_cache);
1811 assert_eq!(
1812 Accession::query(
1813 conn,
1814 "select * from accessions where name = ?1",
1815 rusqlite::params!(SQLValue::from("test".to_string())),
1816 ),
1817 vec![Accession {
1818 id: acc_1.id,
1819 name: "test".to_string(),
1820 path_id: path.id,
1821 parent_accession_id: None,
1822 }]
1823 );
1824
1825 Sample::get_or_create(conn, "sample2");
1826 let _bg2 = get_single_bg_id(conn, "test", "sample2", "chr1", vec!["test".to_string()]);
1827 assert_eq!(
1828 Accession::query(
1829 conn,
1830 "select * from accessions where name = ?1",
1831 rusqlite::params!(SQLValue::from("test".to_string())),
1832 )
1833 .len(),
1834 2
1835 );
1836 }
1837
1838 #[test]
1839 fn insert_and_deletion_get_all() {
1840 let conn = get_connection(None).unwrap();
1841 let (block_group_id, path) = setup_block_group(&conn);
1842 let insert_sequence = Sequence::new()
1843 .sequence_type("DNA")
1844 .sequence("NNNN")
1845 .save(&conn);
1846 let insert_node_id = Node::create(&conn, &insert_sequence.hash, &HashId::convert_str("1"));
1847 let insert = PathBlock {
1848 node_id: insert_node_id,
1849 block_sequence: insert_sequence.get_sequence(0, 4).to_string(),
1850 sequence_start: 0,
1851 sequence_end: 4,
1852 path_start: 7,
1853 path_end: 15,
1854 strand: Strand::Forward,
1855 };
1856 let change = PathChange {
1857 block_group_id,
1858 path: path.clone(),
1859 path_accession: None,
1860 start: 7,
1861 end: 15,
1862 block: insert,
1863 chromosome_index: 1,
1864 phased: 0,
1865 preserve_edge: true,
1866 };
1867 let tree = path.intervaltree(&conn);
1868 BlockGroup::insert_change(&conn, &change, &tree).unwrap();
1869
1870 let all_sequences = BlockGroup::get_all_sequences(&conn, &block_group_id, false);
1871 assert_eq!(
1872 all_sequences,
1873 HashSet::from_iter(vec![
1874 "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
1875 "AAAAAAANNNNTTTTTCCCCCCCCCCGGGGGGGGGG".to_string()
1876 ])
1877 );
1878
1879 let deletion_sequence = Sequence::new()
1880 .sequence_type("DNA")
1881 .sequence("")
1882 .save(&conn);
1883 let deletion_node_id =
1884 Node::create(&conn, &deletion_sequence.hash, &HashId::convert_str("2"));
1885 let deletion = PathBlock {
1886 node_id: deletion_node_id,
1887 block_sequence: deletion_sequence.get_sequence(None, None),
1888 sequence_start: 0,
1889 sequence_end: 0,
1890 path_start: 19,
1891 path_end: 31,
1892 strand: Strand::Forward,
1893 };
1894
1895 let change = PathChange {
1896 block_group_id,
1897 path: path.clone(),
1898 path_accession: None,
1899 start: 19,
1900 end: 31,
1901 block: deletion,
1902 chromosome_index: 1,
1903 phased: 0,
1904 preserve_edge: true,
1905 };
1906 let tree = path.intervaltree(&conn);
1908 BlockGroup::insert_change(&conn, &change, &tree).unwrap();
1909 let all_sequences = BlockGroup::get_all_sequences(&conn, &block_group_id, false);
1910 assert_eq!(
1911 all_sequences,
1912 HashSet::from_iter(vec![
1913 "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
1914 "AAAAAAANNNNTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
1915 "AAAAAAAAAATTTTTTTTTGGGGGGGGG".to_string(),
1916 "AAAAAAANNNNTTTTGGGGGGGGG".to_string(),
1917 ])
1918 )
1919 }
1920
1921 #[test]
1922 fn simple_insert_get_all() {
1923 let conn = get_connection(None).unwrap();
1924 let (block_group_id, path) = setup_block_group(&conn);
1925 let insert_sequence = Sequence::new()
1926 .sequence_type("DNA")
1927 .sequence("NNNN")
1928 .save(&conn);
1929 let insert_node_id = Node::create(&conn, &insert_sequence.hash, &HashId::convert_str("1"));
1930 let insert = PathBlock {
1931 node_id: insert_node_id,
1932 block_sequence: insert_sequence.get_sequence(0, 4).to_string(),
1933 sequence_start: 0,
1934 sequence_end: 4,
1935 path_start: 7,
1936 path_end: 15,
1937 strand: Strand::Forward,
1938 };
1939 let change = PathChange {
1940 block_group_id,
1941 path: path.clone(),
1942 path_accession: None,
1943 start: 7,
1944 end: 15,
1945 block: insert,
1946 chromosome_index: 1,
1947 phased: 0,
1948 preserve_edge: true,
1949 };
1950 let tree = path.intervaltree(&conn);
1951 BlockGroup::insert_change(&conn, &change, &tree).unwrap();
1952
1953 let all_sequences = BlockGroup::get_all_sequences(&conn, &block_group_id, false);
1954 assert_eq!(
1955 all_sequences,
1956 HashSet::from_iter(vec![
1957 "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
1958 "AAAAAAANNNNTTTTTCCCCCCCCCCGGGGGGGGGG".to_string()
1959 ])
1960 );
1961 }
1962
1963 #[test]
1964 fn insert_on_block_boundary_middle() {
1965 let conn = get_connection(None).unwrap();
1966 let (block_group_id, path) = setup_block_group(&conn);
1967 let insert_sequence = Sequence::new()
1968 .sequence_type("DNA")
1969 .sequence("NNNN")
1970 .save(&conn);
1971 let insert_node_id = Node::create(&conn, &insert_sequence.hash, &HashId::convert_str("1"));
1972 let insert = PathBlock {
1973 node_id: insert_node_id,
1974 block_sequence: insert_sequence.get_sequence(0, 4).to_string(),
1975 sequence_start: 0,
1976 sequence_end: 4,
1977 path_start: 15,
1978 path_end: 15,
1979 strand: Strand::Forward,
1980 };
1981 let change = PathChange {
1982 block_group_id,
1983 path: path.clone(),
1984 path_accession: None,
1985 start: 15,
1986 end: 15,
1987 block: insert,
1988 chromosome_index: 1,
1989 phased: 0,
1990 preserve_edge: true,
1991 };
1992 let tree = path.intervaltree(&conn);
1993 BlockGroup::insert_change(&conn, &change, &tree).unwrap();
1994
1995 let all_sequences = BlockGroup::get_all_sequences(&conn, &block_group_id, false);
1996 assert_eq!(
1997 all_sequences,
1998 HashSet::from_iter(vec![
1999 "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
2000 "AAAAAAAAAATTTTTNNNNTTTTTCCCCCCCCCCGGGGGGGGGG".to_string()
2001 ])
2002 );
2003 }
2004
2005 #[test]
2006 fn insert_within_block() {
2007 let conn = get_connection(None).unwrap();
2008 let (block_group_id, path) = setup_block_group(&conn);
2009 let insert_sequence = Sequence::new()
2010 .sequence_type("DNA")
2011 .sequence("NNNN")
2012 .save(&conn);
2013 let insert_node_id = Node::create(&conn, &insert_sequence.hash, &HashId::convert_str("1"));
2014 let insert = PathBlock {
2015 node_id: insert_node_id,
2016 block_sequence: insert_sequence.get_sequence(0, 4).to_string(),
2017 sequence_start: 0,
2018 sequence_end: 4,
2019 path_start: 12,
2020 path_end: 17,
2021 strand: Strand::Forward,
2022 };
2023 let change = PathChange {
2024 block_group_id,
2025 path: path.clone(),
2026 path_accession: None,
2027 start: 12,
2028 end: 17,
2029 block: insert,
2030 chromosome_index: 1,
2031 phased: 0,
2032 preserve_edge: true,
2033 };
2034 let tree = path.intervaltree(&conn);
2035 BlockGroup::insert_change(&conn, &change, &tree).unwrap();
2036
2037 let all_sequences = BlockGroup::get_all_sequences(&conn, &block_group_id, false);
2038 assert_eq!(
2039 all_sequences,
2040 HashSet::from_iter(vec![
2041 "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
2042 "AAAAAAAAAATTNNNNTTTCCCCCCCCCCGGGGGGGGGG".to_string()
2043 ])
2044 );
2045 }
2046
2047 #[test]
2048 fn insert_on_block_boundary_start() {
2049 let conn = get_connection(None).unwrap();
2050 let (block_group_id, path) = setup_block_group(&conn);
2051 let insert_sequence = Sequence::new()
2052 .sequence_type("DNA")
2053 .sequence("NNNN")
2054 .save(&conn);
2055 let insert_node_id = Node::create(&conn, &insert_sequence.hash, &HashId::convert_str("1"));
2056 let insert = PathBlock {
2057 node_id: insert_node_id,
2058 block_sequence: insert_sequence.get_sequence(0, 4).to_string(),
2059 sequence_start: 0,
2060 sequence_end: 4,
2061 path_start: 10,
2062 path_end: 10,
2063 strand: Strand::Forward,
2064 };
2065 let change = PathChange {
2066 block_group_id,
2067 path: path.clone(),
2068 path_accession: None,
2069 start: 10,
2070 end: 10,
2071 block: insert,
2072 chromosome_index: 1,
2073 phased: 0,
2074 preserve_edge: true,
2075 };
2076 let tree = path.intervaltree(&conn);
2077 BlockGroup::insert_change(&conn, &change, &tree).unwrap();
2078
2079 let all_sequences = BlockGroup::get_all_sequences(&conn, &block_group_id, false);
2080 assert_eq!(
2081 all_sequences,
2082 HashSet::from_iter(vec![
2083 "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
2084 "AAAAAAAAAANNNNTTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string()
2085 ])
2086 );
2087 }
2088
2089 #[test]
2090 fn insert_on_block_boundary_end() {
2091 let conn = get_connection(None).unwrap();
2092 let (block_group_id, path) = setup_block_group(&conn);
2093 let insert_sequence = Sequence::new()
2094 .sequence_type("DNA")
2095 .sequence("NNNN")
2096 .save(&conn);
2097 let insert_node_id = Node::create(&conn, &insert_sequence.hash, &HashId::convert_str("1"));
2098 let insert = PathBlock {
2099 node_id: insert_node_id,
2100 block_sequence: insert_sequence.get_sequence(0, 4).to_string(),
2101 sequence_start: 0,
2102 sequence_end: 4,
2103 path_start: 9,
2104 path_end: 9,
2105 strand: Strand::Forward,
2106 };
2107 let change = PathChange {
2108 block_group_id,
2109 path: path.clone(),
2110 path_accession: None,
2111 start: 9,
2112 end: 9,
2113 block: insert,
2114 chromosome_index: 1,
2115 phased: 0,
2116 preserve_edge: true,
2117 };
2118 let tree = path.intervaltree(&conn);
2119 BlockGroup::insert_change(&conn, &change, &tree).unwrap();
2120
2121 let all_sequences = BlockGroup::get_all_sequences(&conn, &block_group_id, false);
2122 assert_eq!(
2123 all_sequences,
2124 HashSet::from_iter(vec![
2125 "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
2126 "AAAAAAAAANNNNATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string()
2127 ])
2128 );
2129 }
2130
2131 #[test]
2132 fn insert_across_entire_block_boundary() {
2133 let conn = get_connection(None).unwrap();
2134 let (block_group_id, path) = setup_block_group(&conn);
2135 let insert_sequence = Sequence::new()
2136 .sequence_type("DNA")
2137 .sequence("NNNN")
2138 .save(&conn);
2139 let insert_node_id = Node::create(&conn, &insert_sequence.hash, &HashId::convert_str("1"));
2140 let insert = PathBlock {
2141 node_id: insert_node_id,
2142 block_sequence: insert_sequence.get_sequence(0, 4).to_string(),
2143 sequence_start: 0,
2144 sequence_end: 4,
2145 path_start: 10,
2146 path_end: 20,
2147 strand: Strand::Forward,
2148 };
2149 let change = PathChange {
2150 block_group_id,
2151 path: path.clone(),
2152 path_accession: None,
2153 start: 10,
2154 end: 20,
2155 block: insert,
2156 chromosome_index: 1,
2157 phased: 0,
2158 preserve_edge: true,
2159 };
2160 let tree = path.intervaltree(&conn);
2161 BlockGroup::insert_change(&conn, &change, &tree).unwrap();
2162
2163 let all_sequences = BlockGroup::get_all_sequences(&conn, &block_group_id, false);
2164 assert_eq!(
2165 all_sequences,
2166 HashSet::from_iter(vec![
2167 "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
2168 "AAAAAAAAAANNNNCCCCCCCCCCGGGGGGGGGG".to_string()
2169 ])
2170 );
2171 }
2172
2173 #[test]
2174 fn insert_across_two_blocks() {
2175 let conn = get_connection(None).unwrap();
2176 let (block_group_id, path) = setup_block_group(&conn);
2177 let insert_sequence = Sequence::new()
2178 .sequence_type("DNA")
2179 .sequence("NNNN")
2180 .save(&conn);
2181 let insert_node_id = Node::create(&conn, &insert_sequence.hash, &HashId::convert_str("1"));
2182 let insert = PathBlock {
2183 node_id: insert_node_id,
2184 block_sequence: insert_sequence.get_sequence(0, 4).to_string(),
2185 sequence_start: 0,
2186 sequence_end: 4,
2187 path_start: 15,
2188 path_end: 25,
2189 strand: Strand::Forward,
2190 };
2191 let change = PathChange {
2192 block_group_id,
2193 path: path.clone(),
2194 path_accession: None,
2195 start: 15,
2196 end: 25,
2197 block: insert,
2198 chromosome_index: 1,
2199 phased: 0,
2200 preserve_edge: true,
2201 };
2202 let tree = path.intervaltree(&conn);
2203 BlockGroup::insert_change(&conn, &change, &tree).unwrap();
2204
2205 let all_sequences = BlockGroup::get_all_sequences(&conn, &block_group_id, false);
2206 assert_eq!(
2207 all_sequences,
2208 HashSet::from_iter(vec![
2209 "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
2210 "AAAAAAAAAATTTTTNNNNCCCCCGGGGGGGGGG".to_string()
2211 ])
2212 );
2213 }
2214
2215 #[test]
2216 fn insert_spanning_blocks() {
2217 let conn = get_connection(None).unwrap();
2218 let (block_group_id, path) = setup_block_group(&conn);
2219 let insert_sequence = Sequence::new()
2220 .sequence_type("DNA")
2221 .sequence("NNNN")
2222 .save(&conn);
2223 let insert_node_id = Node::create(&conn, &insert_sequence.hash, &HashId::convert_str("1"));
2224 let insert = PathBlock {
2225 node_id: insert_node_id,
2226 block_sequence: insert_sequence.get_sequence(0, 4).to_string(),
2227 sequence_start: 0,
2228 sequence_end: 4,
2229 path_start: 5,
2230 path_end: 35,
2231 strand: Strand::Forward,
2232 };
2233 let change = PathChange {
2234 block_group_id,
2235 path: path.clone(),
2236 path_accession: None,
2237 start: 5,
2238 end: 35,
2239 block: insert,
2240 chromosome_index: 1,
2241 phased: 0,
2242 preserve_edge: true,
2243 };
2244 let tree = path.intervaltree(&conn);
2245 BlockGroup::insert_change(&conn, &change, &tree).unwrap();
2246
2247 let all_sequences = BlockGroup::get_all_sequences(&conn, &block_group_id, false);
2248 assert_eq!(
2249 all_sequences,
2250 HashSet::from_iter(vec![
2251 "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
2252 "AAAAANNNNGGGGG".to_string()
2253 ])
2254 );
2255 }
2256
2257 #[test]
2258 fn simple_deletion() {
2259 let conn = get_connection(None).unwrap();
2260 let (block_group_id, path) = setup_block_group(&conn);
2261 let deletion_sequence = Sequence::new()
2262 .sequence_type("DNA")
2263 .sequence("")
2264 .save(&conn);
2265 let deletion_node_id =
2266 Node::create(&conn, &deletion_sequence.hash, &HashId::convert_str("1"));
2267 let deletion = PathBlock {
2268 node_id: deletion_node_id,
2269 block_sequence: deletion_sequence.get_sequence(None, None),
2270 sequence_start: 0,
2271 sequence_end: 0,
2272 path_start: 19,
2273 path_end: 31,
2274 strand: Strand::Forward,
2275 };
2276
2277 let change = PathChange {
2278 block_group_id,
2279 path: path.clone(),
2280 path_accession: None,
2281 start: 19,
2282 end: 31,
2283 block: deletion,
2284 chromosome_index: 1,
2285 phased: 0,
2286 preserve_edge: true,
2287 };
2288
2289 let tree = path.intervaltree(&conn);
2291 BlockGroup::insert_change(&conn, &change, &tree).unwrap();
2292 let all_sequences = BlockGroup::get_all_sequences(&conn, &block_group_id, false);
2293 assert_eq!(
2294 all_sequences,
2295 HashSet::from_iter(vec![
2296 "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
2297 "AAAAAAAAAATTTTTTTTTGGGGGGGGG".to_string(),
2298 ])
2299 )
2300 }
2301
2302 #[test]
2303 fn doesnt_apply_same_insert_twice() {
2304 let conn = get_connection(None).unwrap();
2305 let (block_group_id, path) = setup_block_group(&conn);
2306 let insert_sequence = Sequence::new()
2307 .sequence_type("DNA")
2308 .sequence("NNNN")
2309 .save(&conn);
2310 let insert_node_id = Node::create(&conn, &insert_sequence.hash, &HashId::convert_str("1"));
2311 let insert = PathBlock {
2312 node_id: insert_node_id,
2313 block_sequence: insert_sequence.get_sequence(0, 4).to_string(),
2314 sequence_start: 0,
2315 sequence_end: 4,
2316 path_start: 7,
2317 path_end: 15,
2318 strand: Strand::Forward,
2319 };
2320 let change = PathChange {
2321 block_group_id,
2322 path: path.clone(),
2323 path_accession: None,
2324 start: 7,
2325 end: 15,
2326 block: insert,
2327 chromosome_index: 1,
2328 phased: 0,
2329 preserve_edge: true,
2330 };
2331 let tree = path.intervaltree(&conn);
2332 BlockGroup::insert_change(&conn, &change, &tree).unwrap();
2333
2334 let all_sequences = BlockGroup::get_all_sequences(&conn, &block_group_id, false);
2335 assert_eq!(
2336 all_sequences,
2337 HashSet::from_iter(vec![
2338 "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
2339 "AAAAAAANNNNTTTTTCCCCCCCCCCGGGGGGGGGG".to_string()
2340 ])
2341 );
2342
2343 let tree = path.intervaltree(&conn);
2344 BlockGroup::insert_change(&conn, &change, &tree).unwrap();
2345
2346 let all_sequences = BlockGroup::get_all_sequences(&conn, &block_group_id, false);
2347 assert_eq!(
2348 all_sequences,
2349 HashSet::from_iter(vec![
2350 "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
2351 "AAAAAAANNNNTTTTTCCCCCCCCCCGGGGGGGGGG".to_string()
2352 ])
2353 );
2354 }
2355
2356 #[test]
2357 fn insert_at_beginning_of_path() {
2358 let conn = get_connection(None).unwrap();
2359 let (block_group_id, path) = setup_block_group(&conn);
2360 let insert_sequence = Sequence::new()
2361 .sequence_type("DNA")
2362 .sequence("NNNN")
2363 .save(&conn);
2364 let insert_node_id = Node::create(&conn, &insert_sequence.hash, &HashId::convert_str("1"));
2365 let insert = PathBlock {
2366 node_id: insert_node_id,
2367 block_sequence: insert_sequence.get_sequence(0, 4).to_string(),
2368 sequence_start: 0,
2369 sequence_end: 4,
2370 path_start: 0,
2371 path_end: 0,
2372 strand: Strand::Forward,
2373 };
2374 let change = PathChange {
2375 block_group_id,
2376 path: path.clone(),
2377 path_accession: None,
2378 start: 0,
2379 end: 0,
2380 block: insert,
2381 chromosome_index: 1,
2382 phased: 0,
2383 preserve_edge: true,
2384 };
2385 let tree = path.intervaltree(&conn);
2386 BlockGroup::insert_change(&conn, &change, &tree).unwrap();
2387
2388 let all_sequences = BlockGroup::get_all_sequences(&conn, &block_group_id, false);
2389 assert_eq!(
2390 all_sequences,
2391 HashSet::from_iter(vec![
2392 "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
2393 "NNNNAAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
2394 ])
2395 );
2396 }
2397
2398 #[test]
2399 fn homozygous_insert_at_beginning_of_path() {
2400 let conn = get_connection(None).unwrap();
2401 let (block_group_id, path) = setup_block_group(&conn);
2402 let insert_sequence = Sequence::new()
2403 .sequence_type("DNA")
2404 .sequence("NNNN")
2405 .save(&conn);
2406 let insert_node_id = Node::create(&conn, &insert_sequence.hash, &HashId::convert_str("1"));
2407 let insert = PathBlock {
2408 node_id: insert_node_id,
2409 block_sequence: insert_sequence.get_sequence(0, 4).to_string(),
2410 sequence_start: 0,
2411 sequence_end: 4,
2412 path_start: 0,
2413 path_end: 0,
2414 strand: Strand::Forward,
2415 };
2416 let change = PathChange {
2417 block_group_id,
2418 path: path.clone(),
2419 path_accession: None,
2420 start: 0,
2421 end: 0,
2422 block: insert,
2423 chromosome_index: 0,
2424 phased: 0,
2425 preserve_edge: true,
2426 };
2427 let tree = path.intervaltree(&conn);
2428 BlockGroup::insert_change(&conn, &change, &tree).unwrap();
2429
2430 let all_sequences = BlockGroup::get_all_sequences(&conn, &block_group_id, false);
2431 assert_eq!(
2432 all_sequences,
2433 HashSet::from_iter(vec![
2434 "NNNNAAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
2435 ])
2436 );
2437 }
2438
2439 #[test]
2440 fn insert_at_end_of_path() {
2441 let conn = get_connection(None).unwrap();
2442 let (block_group_id, path) = setup_block_group(&conn);
2443
2444 let insert_sequence = Sequence::new()
2445 .sequence_type("DNA")
2446 .sequence("NNNN")
2447 .save(&conn);
2448 let insert_node_id = Node::create(&conn, &insert_sequence.hash, &HashId::convert_str("1"));
2449 let insert = PathBlock {
2450 node_id: insert_node_id,
2451 block_sequence: insert_sequence.get_sequence(0, 4).to_string(),
2452 sequence_start: 0,
2453 sequence_end: 4,
2454 path_start: 40,
2455 path_end: 40,
2456 strand: Strand::Forward,
2457 };
2458 let change = PathChange {
2459 block_group_id,
2460 path: path.clone(),
2461 path_accession: None,
2462 start: 40,
2463 end: 40,
2464 block: insert,
2465 chromosome_index: 1,
2466 phased: 0,
2467 preserve_edge: true,
2468 };
2469 let tree = path.intervaltree(&conn);
2470 BlockGroup::insert_change(&conn, &change, &tree).unwrap();
2471
2472 let all_sequences = BlockGroup::get_all_sequences(&conn, &block_group_id, false);
2473 assert_eq!(
2474 all_sequences,
2475 HashSet::from_iter(vec![
2476 "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
2477 "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGGNNNN".to_string(),
2478 ])
2479 );
2480 }
2481
2482 #[test]
2483 fn insert_at_one_bp_into_block() {
2484 let conn = get_connection(None).unwrap();
2485 let (block_group_id, path) = setup_block_group(&conn);
2486 let insert_sequence = Sequence::new()
2487 .sequence_type("DNA")
2488 .sequence("NNNN")
2489 .save(&conn);
2490 let insert_node_id = Node::create(&conn, &insert_sequence.hash, &HashId::convert_str("1"));
2491 let insert = PathBlock {
2492 node_id: insert_node_id,
2493 block_sequence: insert_sequence.get_sequence(0, 4).to_string(),
2494 sequence_start: 0,
2495 sequence_end: 4,
2496 path_start: 10,
2497 path_end: 11,
2498 strand: Strand::Forward,
2499 };
2500 let change = PathChange {
2501 block_group_id,
2502 path: path.clone(),
2503 path_accession: None,
2504 start: 10,
2505 end: 11,
2506 block: insert,
2507 chromosome_index: 1,
2508 phased: 0,
2509 preserve_edge: true,
2510 };
2511 let tree = path.intervaltree(&conn);
2512 BlockGroup::insert_change(&conn, &change, &tree).unwrap();
2513
2514 let all_sequences = BlockGroup::get_all_sequences(&conn, &block_group_id, false);
2515 assert_eq!(
2516 all_sequences,
2517 HashSet::from_iter(vec![
2518 "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
2519 "AAAAAAAAAANNNNTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
2520 ])
2521 );
2522 }
2523
2524 #[test]
2525 fn insert_at_one_bp_from_end_of_block() {
2526 let conn = get_connection(None).unwrap();
2527 let (block_group_id, path) = setup_block_group(&conn);
2528 let insert_sequence = Sequence::new()
2529 .sequence_type("DNA")
2530 .sequence("NNNN")
2531 .save(&conn);
2532 let insert_node_id = Node::create(&conn, &insert_sequence.hash, &HashId::convert_str("1"));
2533 let insert = PathBlock {
2534 node_id: insert_node_id,
2535 block_sequence: insert_sequence.get_sequence(0, 4).to_string(),
2536 sequence_start: 0,
2537 sequence_end: 4,
2538 path_start: 19,
2539 path_end: 20,
2540 strand: Strand::Forward,
2541 };
2542 let change = PathChange {
2543 block_group_id,
2544 path: path.clone(),
2545 path_accession: None,
2546 start: 19,
2547 end: 20,
2548 block: insert,
2549 chromosome_index: 1,
2550 phased: 0,
2551 preserve_edge: true,
2552 };
2553 let tree = path.intervaltree(&conn);
2554 BlockGroup::insert_change(&conn, &change, &tree).unwrap();
2555
2556 let all_sequences = BlockGroup::get_all_sequences(&conn, &block_group_id, false);
2557 assert_eq!(
2558 all_sequences,
2559 HashSet::from_iter(vec![
2560 "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
2561 "AAAAAAAAAATTTTTTTTTNNNNCCCCCCCCCCGGGGGGGGGG".to_string(),
2562 ])
2563 );
2564 }
2565
2566 #[test]
2567 fn delete_at_beginning_of_path() {
2568 let conn = get_connection(None).unwrap();
2569 let (block_group_id, path) = setup_block_group(&conn);
2570 let deletion_sequence = Sequence::new()
2571 .sequence_type("DNA")
2572 .sequence("")
2573 .save(&conn);
2574 let deletion_node_id =
2575 Node::create(&conn, &deletion_sequence.hash, &HashId::convert_str("1"));
2576 let deletion = PathBlock {
2577 node_id: deletion_node_id,
2578 block_sequence: deletion_sequence.get_sequence(None, None),
2579 sequence_start: 0,
2580 sequence_end: 0,
2581 path_start: 0,
2582 path_end: 1,
2583 strand: Strand::Forward,
2584 };
2585 let change = PathChange {
2586 block_group_id,
2587 path: path.clone(),
2588 path_accession: None,
2589 start: 0,
2590 end: 1,
2591 block: deletion,
2592 chromosome_index: 1,
2593 phased: 0,
2594 preserve_edge: true,
2595 };
2596 let tree = path.intervaltree(&conn);
2597 BlockGroup::insert_change(&conn, &change, &tree).unwrap();
2598
2599 let all_sequences = BlockGroup::get_all_sequences(&conn, &block_group_id, false);
2600 assert_eq!(
2601 all_sequences,
2602 HashSet::from_iter(vec![
2603 "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
2604 "AAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
2605 ])
2606 );
2607 }
2608
2609 #[test]
2610 fn delete_at_end_of_path() {
2611 let conn = get_connection(None).unwrap();
2612 let (block_group_id, path) = setup_block_group(&conn);
2613 let deletion_sequence = Sequence::new()
2614 .sequence_type("DNA")
2615 .sequence("")
2616 .save(&conn);
2617 let deletion_node_id =
2618 Node::create(&conn, &deletion_sequence.hash, &HashId::convert_str("1"));
2619 let deletion = PathBlock {
2620 node_id: deletion_node_id,
2621 block_sequence: deletion_sequence.get_sequence(None, None),
2622 sequence_start: 0,
2623 sequence_end: 0,
2624 path_start: 35,
2625 path_end: 40,
2626 strand: Strand::Forward,
2627 };
2628 let change = PathChange {
2629 block_group_id,
2630 path: path.clone(),
2631 path_accession: None,
2632 start: 35,
2633 end: 40,
2634 block: deletion,
2635 chromosome_index: 1,
2636 phased: 0,
2637 preserve_edge: true,
2638 };
2639 let tree = path.intervaltree(&conn);
2640 BlockGroup::insert_change(&conn, &change, &tree).unwrap();
2641
2642 let all_sequences = BlockGroup::get_all_sequences(&conn, &block_group_id, false);
2643 assert_eq!(
2644 all_sequences,
2645 HashSet::from_iter(vec![
2646 "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGG".to_string(),
2647 "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
2648 ])
2649 );
2650 }
2651
2652 #[test]
2653 fn error_on_out_of_bounds_change() {
2654 let conn = get_connection(None).unwrap();
2655 let (block_group_id, path) = setup_block_group(&conn);
2656 let deletion_sequence = Sequence::new()
2657 .sequence_type("DNA")
2658 .sequence("")
2659 .save(&conn);
2660 let deletion_node_id =
2661 Node::create(&conn, &deletion_sequence.hash, &HashId::convert_str("1"));
2662 let deletion = PathBlock {
2663 node_id: deletion_node_id,
2664 block_sequence: deletion_sequence.get_sequence(None, None),
2665 sequence_start: 0,
2666 sequence_end: 0,
2667 path_start: 350,
2668 path_end: 400,
2669 strand: Strand::Forward,
2670 };
2671 let after_end_change = PathChange {
2672 block_group_id,
2673 path: path.clone(),
2674 path_accession: None,
2675 start: 350,
2676 end: 400,
2677 block: deletion.clone(),
2678 chromosome_index: 1,
2679 phased: 0,
2680 preserve_edge: true,
2681 };
2682 let before_start_change = PathChange {
2683 block_group_id,
2684 path: path.clone(),
2685 path_accession: None,
2686 start: -300,
2687 end: 400,
2688 block: deletion,
2689 chromosome_index: 1,
2690 phased: 0,
2691 preserve_edge: true,
2692 };
2693 let tree = path.intervaltree(&conn);
2694 let res = BlockGroup::insert_change(&conn, &after_end_change, &tree);
2695 assert!(matches!(res, Err(ChangeError::OutOfBounds(_))));
2696 let res = BlockGroup::insert_change(&conn, &before_start_change, &tree);
2697 assert!(matches!(res, Err(ChangeError::OutOfBounds(_))));
2698 }
2699
2700 #[test]
2701 fn deletion_starting_at_block_boundary() {
2702 let conn = get_connection(None).unwrap();
2703 let (block_group_id, path) = setup_block_group(&conn);
2704 let deletion_sequence = Sequence::new()
2705 .sequence_type("DNA")
2706 .sequence("")
2707 .save(&conn);
2708 let deletion_node_id =
2709 Node::create(&conn, &deletion_sequence.hash, &HashId::convert_str("1"));
2710 let deletion = PathBlock {
2711 node_id: deletion_node_id,
2712 block_sequence: deletion_sequence.get_sequence(None, None),
2713 sequence_start: 0,
2714 sequence_end: 0,
2715 path_start: 10,
2716 path_end: 12,
2717 strand: Strand::Forward,
2718 };
2719 let change = PathChange {
2720 block_group_id,
2721 path: path.clone(),
2722 path_accession: None,
2723 start: 10,
2724 end: 12,
2725 block: deletion,
2726 chromosome_index: 1,
2727 phased: 0,
2728 preserve_edge: true,
2729 };
2730 let tree = path.intervaltree(&conn);
2731 BlockGroup::insert_change(&conn, &change, &tree).unwrap();
2732
2733 let all_sequences = BlockGroup::get_all_sequences(&conn, &block_group_id, false);
2734 assert_eq!(
2735 all_sequences,
2736 HashSet::from_iter(vec![
2737 "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
2738 "AAAAAAAAAATTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
2739 ])
2740 );
2741 }
2742
2743 #[test]
2744 fn deletion_ending_at_block_boundary() {
2745 let conn = get_connection(None).unwrap();
2746 let (block_group_id, path) = setup_block_group(&conn);
2747 let deletion_sequence = Sequence::new()
2748 .sequence_type("DNA")
2749 .sequence("")
2750 .save(&conn);
2751 let deletion_node_id =
2752 Node::create(&conn, &deletion_sequence.hash, &HashId::convert_str("1"));
2753 let deletion = PathBlock {
2754 node_id: deletion_node_id,
2755 block_sequence: deletion_sequence.get_sequence(None, None),
2756 sequence_start: 0,
2757 sequence_end: 0,
2758 path_start: 18,
2759 path_end: 20,
2760 strand: Strand::Forward,
2761 };
2762 let change = PathChange {
2763 block_group_id,
2764 path: path.clone(),
2765 path_accession: None,
2766 start: 18,
2767 end: 20,
2768 block: deletion,
2769 chromosome_index: 1,
2770 phased: 0,
2771 preserve_edge: true,
2772 };
2773 let tree = path.intervaltree(&conn);
2774 BlockGroup::insert_change(&conn, &change, &tree).unwrap();
2775
2776 let all_sequences = BlockGroup::get_all_sequences(&conn, &block_group_id, false);
2777 assert_eq!(
2778 all_sequences,
2779 HashSet::from_iter(vec![
2780 "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
2781 "AAAAAAAAAATTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
2782 ])
2783 );
2784 }
2785
2786 #[test]
2787 fn test_blockgroup_interval_tree() {
2788 let conn = &get_connection(None).unwrap();
2789 let (block_group_id, path) = setup_block_group(conn);
2790 let _new_sample = Sample::get_or_create(conn, "child");
2791 let new_bg_id = get_single_bg_id(conn, "test", "child", "chr1", vec!["test".to_string()]);
2792 let new_path = Path::query(
2793 conn,
2794 "select * from paths where block_group_id = ?1",
2795 params![new_bg_id],
2796 );
2797 let insert_sequence = Sequence::new()
2798 .sequence_type("DNA")
2799 .sequence("NNNN")
2800 .save(conn);
2801 let insert_node_id = Node::create(
2802 conn,
2803 &insert_sequence.hash,
2804 &HashId::convert_str("insert-node"),
2805 );
2806 let insert = PathBlock {
2807 node_id: insert_node_id,
2808 block_sequence: insert_sequence.get_sequence(0, 4).to_string(),
2809 sequence_start: 0,
2810 sequence_end: 4,
2811 path_start: 7,
2812 path_end: 15,
2813 strand: Strand::Forward,
2814 };
2815 let change = PathChange {
2816 block_group_id: new_bg_id,
2817 path: new_path[0].clone(),
2818 path_accession: None,
2819 start: 7,
2820 end: 15,
2821 block: insert,
2822 chromosome_index: 1,
2823 phased: 0,
2824 preserve_edge: true,
2825 };
2826 let tree = path.intervaltree(conn);
2827 BlockGroup::insert_change(conn, &change, &tree).unwrap();
2828
2829 let tree = BlockGroup::intervaltree_for(conn, &block_group_id, false);
2830 let tree2 = BlockGroup::intervaltree_for(conn, &block_group_id, true);
2831 interval_tree_verify(
2832 &tree,
2833 3,
2834 &[NodeIntervalBlock {
2835 node_id: HashId::convert_str("test-a-node"),
2836 start: 0,
2837 end: 10,
2838 sequence_start: 0,
2839 sequence_end: 10,
2840 strand: Strand::Forward,
2841 }],
2842 );
2843 interval_tree_verify(
2844 &tree2,
2845 3,
2846 &[NodeIntervalBlock {
2847 node_id: HashId::convert_str("test-a-node"),
2848 start: 0,
2849 end: 10,
2850 sequence_start: 0,
2851 sequence_end: 10,
2852 strand: Strand::Forward,
2853 }],
2854 );
2855 interval_tree_verify(
2856 &tree,
2857 35,
2858 &[NodeIntervalBlock {
2859 node_id: HashId::convert_str("test-g-node"),
2860 start: 30,
2861 end: 40,
2862 sequence_start: 0,
2863 sequence_end: 10,
2864 strand: Strand::Forward,
2865 }],
2866 );
2867 interval_tree_verify(
2868 &tree2,
2869 35,
2870 &[NodeIntervalBlock {
2871 node_id: HashId::convert_str("test-g-node"),
2872 start: 30,
2873 end: 40,
2874 sequence_start: 0,
2875 sequence_end: 10,
2876 strand: Strand::Forward,
2877 }],
2878 );
2879
2880 let tree = BlockGroup::intervaltree_for(conn, &new_bg_id, false);
2882 let tree2 = BlockGroup::intervaltree_for(conn, &new_bg_id, true);
2883 interval_tree_verify(
2884 &tree,
2885 3,
2886 &[NodeIntervalBlock {
2887 node_id: HashId::convert_str("test-a-node"),
2888 start: 0,
2889 end: 7,
2890 sequence_start: 0,
2891 sequence_end: 7,
2892 strand: Strand::Forward,
2893 }],
2894 );
2895 interval_tree_verify(
2896 &tree2,
2897 3,
2898 &[NodeIntervalBlock {
2899 node_id: HashId::convert_str("test-a-node"),
2900 start: 0,
2901 end: 7,
2902 sequence_start: 0,
2903 sequence_end: 7,
2904 strand: Strand::Forward,
2905 }],
2906 );
2907 interval_tree_verify(
2908 &tree,
2909 30,
2910 &[
2911 NodeIntervalBlock {
2912 node_id: HashId::convert_str("test-g-node"),
2913 start: 26,
2914 end: 36,
2915 sequence_start: 0,
2916 sequence_end: 10,
2917 strand: Strand::Forward,
2918 },
2919 NodeIntervalBlock {
2920 node_id: HashId::convert_str("test-g-node"),
2921 start: 30,
2922 end: 40,
2923 sequence_start: 0,
2924 sequence_end: 10,
2925 strand: Strand::Forward,
2926 },
2927 ],
2928 );
2929 interval_tree_verify(&tree2, 30, &[]);
2930 interval_tree_verify(
2933 &tree2,
2934 9,
2935 &[
2936 NodeIntervalBlock {
2937 node_id: HashId::convert_str("insert-node"),
2938 start: 7,
2939 end: 11,
2940 sequence_start: 0,
2941 sequence_end: 4,
2942 strand: Strand::Forward,
2943 },
2944 NodeIntervalBlock {
2945 node_id: HashId::convert_str("test-a-node"),
2946 start: 7,
2947 end: 10,
2948 sequence_start: 7,
2949 sequence_end: 10,
2950 strand: Strand::Forward,
2951 },
2952 ],
2953 );
2954 }
2955
2956 #[test]
2957 fn test_changes_against_derivative_blockgroups() {
2958 let conn = &get_connection(None).unwrap();
2959 let (_block_group_id, _path) = setup_block_group(conn);
2960 let _new_sample = Sample::get_or_create(conn, "child");
2961 let new_bg_id = get_single_bg_id(conn, "test", "child", "chr1", vec!["test".to_string()]);
2962 let new_path = Path::query(
2963 conn,
2964 "select * from paths where block_group_id = ?1",
2965 rusqlite::params!(SQLValue::from(new_bg_id)),
2966 );
2967 let insert_sequence = Sequence::new()
2968 .sequence_type("DNA")
2969 .sequence("NNNN")
2970 .save(conn);
2971 let insert_node_id = Node::create(conn, &insert_sequence.hash, &HashId::convert_str("1"));
2972 let insert = PathBlock {
2973 node_id: insert_node_id,
2974 block_sequence: insert_sequence.get_sequence(0, 4).to_string(),
2975 sequence_start: 0,
2976 sequence_end: 4,
2977 path_start: 7,
2978 path_end: 15,
2979 strand: Strand::Forward,
2980 };
2981 let change = PathChange {
2982 block_group_id: new_bg_id,
2983 path: new_path[0].clone(),
2984 path_accession: None,
2985 start: 7,
2986 end: 15,
2987 block: insert,
2988 chromosome_index: 1,
2989 phased: 0,
2990 preserve_edge: false,
2991 };
2992 let tree = BlockGroup::intervaltree_for(conn, &new_bg_id, true);
2994 BlockGroup::insert_change(conn, &change, &tree).unwrap();
2995 let all_sequences = BlockGroup::get_all_sequences(conn, &new_bg_id, true);
2996 assert_eq!(
2997 all_sequences,
2998 HashSet::from_iter(vec!["AAAAAAANNNNTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),])
2999 );
3000
3001 let _new_sample = Sample::get_or_create(conn, "grandchild");
3003 let gc_bg_id = get_single_bg_id(
3004 conn,
3005 "test",
3006 "grandchild",
3007 "chr1",
3008 vec!["child".to_string()],
3009 );
3010 let new_path = Path::query(
3011 conn,
3012 "select * from paths where block_group_id = ?1",
3013 rusqlite::params!(SQLValue::from(gc_bg_id)),
3014 );
3015
3016 let insert = PathBlock {
3017 node_id: insert_node_id,
3018 block_sequence: insert_sequence.get_sequence(0, 4).to_string(),
3019 sequence_start: 0,
3020 sequence_end: 4,
3021 path_start: 7,
3022 path_end: 15,
3023 strand: Strand::Forward,
3024 };
3025 let change = PathChange {
3026 block_group_id: gc_bg_id,
3027 path: new_path[0].clone(),
3028 path_accession: None,
3029 start: 7,
3030 end: 15,
3031 block: insert,
3032 chromosome_index: 1,
3033 phased: 0,
3034 preserve_edge: false,
3035 };
3036 let tree = BlockGroup::intervaltree_for(conn, &gc_bg_id, true);
3038 BlockGroup::insert_change(conn, &change, &tree).unwrap();
3039 let all_sequences = BlockGroup::get_all_sequences(conn, &gc_bg_id, true);
3040 assert_eq!(
3041 all_sequences,
3042 HashSet::from_iter(vec!["AAAAAAANNNNTCCCCCCCCCCGGGGGGGGGG".to_string(),])
3043 )
3044 }
3045
3046 #[test]
3047 fn test_changes_against_derivative_diploid_blockgroups() {
3048 let conn = &get_connection(None).unwrap();
3051 let (_block_group_id, _path) = setup_block_group(conn);
3052 let _new_sample = Sample::get_or_create(conn, "child");
3053 let new_bg_id = get_single_bg_id(conn, "test", "child", "chr1", vec!["test".to_string()]);
3054 let new_path = Path::query(
3055 conn,
3056 "select * from paths where block_group_id = ?1",
3057 params![new_bg_id],
3058 );
3059 let insert_sequence = Sequence::new()
3060 .sequence_type("DNA")
3061 .sequence("NNNN")
3062 .save(conn);
3063 let insert_node_id = Node::create(conn, &insert_sequence.hash, &HashId::convert_str("1"));
3064 let insert = PathBlock {
3065 node_id: insert_node_id,
3066 block_sequence: insert_sequence.get_sequence(0, 4).to_string(),
3067 sequence_start: 0,
3068 sequence_end: 4,
3069 path_start: 7,
3070 path_end: 11,
3071 strand: Strand::Forward,
3072 };
3073 let change = PathChange {
3074 block_group_id: new_bg_id,
3075 path: new_path[0].clone(),
3076 path_accession: None,
3077 start: 7,
3078 end: 11,
3079 block: insert,
3080 chromosome_index: 1,
3081 phased: 0,
3082 preserve_edge: true,
3083 };
3084 let tree = BlockGroup::intervaltree_for(conn, &new_bg_id, true);
3086 BlockGroup::insert_change(conn, &change, &tree).unwrap();
3087 let all_sequences = BlockGroup::get_all_sequences(conn, &new_bg_id, true);
3088 assert_eq!(
3089 all_sequences,
3090 HashSet::from_iter(vec![
3091 "AAAAAAANNNNTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
3092 "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
3093 ])
3094 );
3095
3096 let _new_sample = Sample::get_or_create(conn, "grandchild");
3098 let gc_bg_id = get_single_bg_id(
3099 conn,
3100 "test",
3101 "grandchild",
3102 "chr1",
3103 vec!["child".to_string()],
3104 );
3105 let new_path = Path::query(
3106 conn,
3107 "select * from paths where block_group_id = ?1",
3108 params![gc_bg_id],
3109 );
3110
3111 let insert_sequence = Sequence::new()
3112 .sequence_type("DNA")
3113 .sequence("NNNN")
3114 .save(conn);
3115 let insert_node_id = Node::create(
3116 conn,
3117 &insert_sequence.hash,
3118 &HashId::convert_str("new-hash"),
3119 );
3120
3121 let insert = PathBlock {
3122 node_id: insert_node_id,
3123 block_sequence: insert_sequence.get_sequence(0, 4).to_string(),
3124 sequence_start: 0,
3125 sequence_end: 4,
3126 path_start: 20,
3127 path_end: 24,
3128 strand: Strand::Forward,
3129 };
3130 let change = PathChange {
3131 block_group_id: gc_bg_id,
3132 path: new_path[0].clone(),
3133 path_accession: None,
3134 start: 20,
3135 end: 24,
3136 block: insert,
3137 chromosome_index: 1,
3138 phased: 0,
3139 preserve_edge: true,
3140 };
3141 let tree = BlockGroup::intervaltree_for(conn, &gc_bg_id, true);
3143 BlockGroup::insert_change(conn, &change, &tree).unwrap();
3144 let all_sequences = BlockGroup::get_all_sequences(conn, &gc_bg_id, true);
3145 assert_eq!(
3146 all_sequences,
3147 HashSet::from_iter(vec![
3148 "AAAAAAANNNNTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
3149 "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
3150 "AAAAAAAAAATTTTTTTTTTNNNNCCCCCCGGGGGGGGGG".to_string(),
3151 "AAAAAAANNNNTTTTTTTTTNNNNCCCCCCGGGGGGGGGG".to_string()
3152 ])
3153 )
3154 }
3155
3156 #[test]
3157 #[should_panic]
3158 fn test_prohibits_out_of_frame_changes_against_derivative_diploid_blockgroups() {
3159 let conn = &get_connection(None).unwrap();
3161 let (_block_group_id, _path) = setup_block_group(conn);
3162 let _new_sample = Sample::get_or_create(conn, "child");
3163 let new_bg_id = get_single_bg_id(conn, "test", "child", "chr1", vec!["test".to_string()]);
3164 let new_path = Path::query(
3165 conn,
3166 "select * from paths where block_group_id = ?1",
3167 rusqlite::params!(SQLValue::from(new_bg_id)),
3168 );
3169 let insert_sequence = Sequence::new()
3172 .sequence_type("DNA")
3173 .sequence("NNNN")
3174 .save(conn);
3175 let insert_node_id = Node::create(conn, &insert_sequence.hash, &HashId::convert_str("1"));
3176 let insert = PathBlock {
3177 node_id: insert_node_id,
3178 block_sequence: insert_sequence.get_sequence(0, 4).to_string(),
3179 sequence_start: 0,
3180 sequence_end: 4,
3181 path_start: 7,
3182 path_end: 12,
3183 strand: Strand::Forward,
3184 };
3185 let change = PathChange {
3186 block_group_id: new_bg_id,
3187 path: new_path[0].clone(),
3188 path_accession: None,
3189 start: 7,
3190 end: 12,
3191 block: insert,
3192 chromosome_index: 1,
3193 phased: 0,
3194 preserve_edge: true,
3195 };
3196 let tree = BlockGroup::intervaltree_for(conn, &new_bg_id, true);
3198 BlockGroup::insert_change(conn, &change, &tree).unwrap();
3199 let all_sequences = BlockGroup::get_all_sequences(conn, &new_bg_id, true);
3200 assert_eq!(
3201 all_sequences,
3202 HashSet::from_iter(vec![
3203 "AAAAAAANNNNTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
3204 "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
3205 ])
3206 );
3207
3208 let _new_sample = Sample::get_or_create(conn, "grandchild");
3210 let gc_bg_id = get_single_bg_id(
3211 conn,
3212 "test",
3213 "grandchild",
3214 "chr1",
3215 vec!["child".to_string()],
3216 );
3217 let new_path = Path::query(
3218 conn,
3219 "select * from paths where block_group_id = ?1",
3220 rusqlite::params!(SQLValue::from(gc_bg_id)),
3221 );
3222
3223 let insert_sequence = Sequence::new()
3224 .sequence_type("DNA")
3225 .sequence("NNNN")
3226 .save(conn);
3227 let insert_node_id =
3228 Node::create(conn, &insert_sequence.hash, &HashId::pad_str("new-hash"));
3229
3230 let insert = PathBlock {
3231 node_id: insert_node_id,
3232 block_sequence: insert_sequence.get_sequence(0, 4).to_string(),
3233 sequence_start: 0,
3234 sequence_end: 4,
3235 path_start: 20,
3236 path_end: 24,
3237 strand: Strand::Forward,
3238 };
3239 let change = PathChange {
3240 block_group_id: gc_bg_id,
3241 path: new_path[0].clone(),
3242 path_accession: None,
3243 start: 20,
3244 end: 24,
3245 block: insert,
3246 chromosome_index: 1,
3247 phased: 0,
3248 preserve_edge: true,
3249 };
3250 let tree = BlockGroup::intervaltree_for(conn, &gc_bg_id, true);
3252 BlockGroup::insert_change(conn, &change, &tree).unwrap();
3253 }
3254
3255 mod test_derive_subgraph {
3256
3257 use super::*;
3258 use crate::{
3259 collection::Collection,
3260 node::Node,
3261 sequence::Sequence,
3262 test_helpers::{get_connection, setup_block_group},
3263 };
3264
3265 #[test]
3266 fn test_derive_subgraph_one_insertion() {
3267 let conn = &get_connection(None).unwrap();
3274 Collection::create(conn, "test");
3275 let (block_group1_id, original_path) = setup_block_group(conn);
3276
3277 let intervaltree = original_path.intervaltree(conn);
3278 let insert_start_node_id = intervaltree.query_point(16).next().unwrap().value.node_id;
3279 let insert_end_node_id = intervaltree.query_point(24).next().unwrap().value.node_id;
3280
3281 let insert_sequence = Sequence::new()
3282 .sequence_type("DNA")
3283 .sequence("AAAAAAAA")
3284 .save(conn);
3285 let insert_node_id = Node::create(
3286 conn,
3287 &insert_sequence.hash,
3288 &HashId(calculate_hash(&format!(
3289 "test-insert-a-node.{}",
3290 insert_sequence.hash
3291 ))),
3292 );
3293 let edge_into_insert = Edge::create(
3294 conn,
3295 insert_start_node_id,
3296 6,
3297 Strand::Forward,
3298 insert_node_id,
3299 0,
3300 Strand::Forward,
3301 );
3302 let edge_out_of_insert = Edge::create(
3303 conn,
3304 insert_node_id,
3305 8,
3306 Strand::Forward,
3307 insert_end_node_id,
3308 4,
3309 Strand::Forward,
3310 );
3311 let ref_heal_1 = Edge::create(
3312 conn,
3313 insert_start_node_id,
3314 6,
3315 Strand::Forward,
3316 insert_start_node_id,
3317 6,
3318 Strand::Forward,
3319 );
3320 let ref_heal_2 = Edge::create(
3321 conn,
3322 insert_end_node_id,
3323 4,
3324 Strand::Forward,
3325 insert_end_node_id,
3326 4,
3327 Strand::Forward,
3328 );
3329
3330 let edge_ids = [
3331 &edge_into_insert.id,
3332 &edge_out_of_insert.id,
3333 &ref_heal_1.id,
3334 &ref_heal_2.id,
3335 ];
3336 let block_group_edges = edge_ids
3337 .iter()
3338 .enumerate()
3339 .map(|(i, edge_id)| BlockGroupEdgeData {
3340 block_group_id: block_group1_id,
3341 edge_id: *(*edge_id),
3342 chromosome_index: if i < 2 { 1 } else { 0 },
3343 phased: 0,
3344 })
3345 .collect::<Vec<BlockGroupEdgeData>>();
3346 BlockGroupEdge::bulk_create(conn, &block_group_edges);
3347
3348 let insert_path =
3349 original_path.new_path_with(conn, 16, 24, &edge_into_insert, &edge_out_of_insert);
3350 assert_eq!(
3351 insert_path.sequence(conn),
3352 "AAAAAAAAAATTTTTTAAAAAAAACCCCCCGGGGGGGGGG"
3353 );
3354
3355 let all_sequences = BlockGroup::get_all_sequences(conn, &block_group1_id, false);
3356 assert_eq!(
3357 all_sequences,
3358 HashSet::from_iter(vec![
3359 "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
3360 "AAAAAAAAAATTTTTTAAAAAAAACCCCCCGGGGGGGGGG".to_string(),
3361 ])
3362 );
3363
3364 let mut blocks = intervaltree
3365 .query(Range { start: 15, end: 25 })
3366 .map(|x| x.value)
3367 .collect::<Vec<_>>();
3368 blocks.sort_by(|a, b| a.start.cmp(&b.start));
3369 let start_block = blocks[0];
3370 let start_node_coordinate = 15 - start_block.start + start_block.sequence_start;
3371 let end_block = blocks[blocks.len() - 1];
3372 let end_node_coordinate = 25 - end_block.start + end_block.sequence_start;
3373
3374 let block_group2 = create_bg(conn, "test", "test", "chr1.1");
3375 BlockGroup::derive_subgraph(
3376 conn,
3377 "test",
3378 "test",
3379 &block_group1_id,
3380 &start_block,
3381 &end_block,
3382 start_node_coordinate,
3383 end_node_coordinate,
3384 &block_group2.id,
3385 true,
3386 );
3387 let all_sequences2 = BlockGroup::get_all_sequences(conn, &block_group2.id, false);
3388 assert_eq!(
3389 all_sequences2,
3390 HashSet::from_iter(vec!["TTTTTCCCCC".to_string(), "TAAAAAAAAC".to_string(),])
3391 );
3392 }
3393
3394 #[test]
3395 fn test_derive_subgraph_two_independent_insertions() {
3396 let conn = &get_connection(None).unwrap();
3402 Collection::create(conn, "test");
3403 let (block_group1_id, original_path) = setup_block_group(conn);
3404
3405 let intervaltree = original_path.intervaltree(conn);
3406 let insert_start_node_id = intervaltree.query_point(16).next().unwrap().value.node_id;
3407 let insert_end_node_id = intervaltree.query_point(24).next().unwrap().value.node_id;
3408
3409 let insert_sequence = Sequence::new()
3410 .sequence_type("DNA")
3411 .sequence("AAAAAAAA")
3412 .save(conn);
3413 let insert_node_id = Node::create(
3414 conn,
3415 &insert_sequence.hash,
3416 &HashId(calculate_hash(&format!(
3417 "test-insert-a-node.{}",
3418 insert_sequence.hash
3419 ))),
3420 );
3421 let edge_into_insert = Edge::create(
3422 conn,
3423 insert_start_node_id,
3424 6,
3425 Strand::Forward,
3426 insert_node_id,
3427 0,
3428 Strand::Forward,
3429 );
3430 let edge_out_of_insert = Edge::create(
3431 conn,
3432 insert_node_id,
3433 8,
3434 Strand::Forward,
3435 insert_end_node_id,
3436 4,
3437 Strand::Forward,
3438 );
3439 let ref_heal_1 = Edge::create(
3440 conn,
3441 insert_start_node_id,
3442 6,
3443 Strand::Forward,
3444 insert_start_node_id,
3445 6,
3446 Strand::Forward,
3447 );
3448 let ref_heal_2 = Edge::create(
3449 conn,
3450 insert_end_node_id,
3451 4,
3452 Strand::Forward,
3453 insert_end_node_id,
3454 4,
3455 Strand::Forward,
3456 );
3457
3458 let edge_ids = [
3459 &edge_into_insert.id,
3460 &edge_out_of_insert.id,
3461 &ref_heal_1.id,
3462 &ref_heal_2.id,
3463 ];
3464 let block_group_edges = edge_ids
3465 .iter()
3466 .enumerate()
3467 .map(|(i, edge_id)| BlockGroupEdgeData {
3468 block_group_id: block_group1_id,
3469 edge_id: *(*edge_id),
3470 chromosome_index: if i < 2 { 1 } else { 0 },
3471 phased: 0,
3472 })
3473 .collect::<Vec<BlockGroupEdgeData>>();
3474 BlockGroupEdge::bulk_create(conn, &block_group_edges);
3475
3476 let insert_path =
3477 original_path.new_path_with(conn, 16, 24, &edge_into_insert, &edge_out_of_insert);
3478 assert_eq!(
3479 insert_path.sequence(conn),
3480 "AAAAAAAAAATTTTTTAAAAAAAACCCCCCGGGGGGGGGG"
3481 );
3482
3483 let insert2_start_node_id = intervaltree.query_point(28).next().unwrap().value.node_id;
3484 let insert2_end_node_id = intervaltree.query_point(32).next().unwrap().value.node_id;
3485
3486 let insert2_sequence = Sequence::new()
3487 .sequence_type("DNA")
3488 .sequence("TTTTTTTT")
3489 .save(conn);
3490 let insert2_node_id = Node::create(
3491 conn,
3492 &insert2_sequence.hash,
3493 &HashId(calculate_hash(&format!(
3494 "test-insert-t-node.{}",
3495 insert2_sequence.hash
3496 ))),
3497 );
3498 let edge_into_insert2 = Edge::create(
3499 conn,
3500 insert2_start_node_id,
3501 6,
3502 Strand::Forward,
3503 insert2_node_id,
3504 0,
3505 Strand::Forward,
3506 );
3507 let edge_out_of_insert2 = Edge::create(
3508 conn,
3509 insert2_node_id,
3510 8,
3511 Strand::Forward,
3512 insert2_end_node_id,
3513 4,
3514 Strand::Forward,
3515 );
3516 let ref_heal_1 = Edge::create(
3517 conn,
3518 insert2_start_node_id,
3519 6,
3520 Strand::Forward,
3521 insert2_start_node_id,
3522 6,
3523 Strand::Forward,
3524 );
3525 let ref_heal_2 = Edge::create(
3526 conn,
3527 insert2_end_node_id,
3528 4,
3529 Strand::Forward,
3530 insert2_end_node_id,
3531 4,
3532 Strand::Forward,
3533 );
3534
3535 let edge_ids = [
3536 &edge_into_insert2.id,
3537 &edge_out_of_insert2.id,
3538 &ref_heal_1.id,
3539 &ref_heal_2.id,
3540 ];
3541 let block_group_edges = edge_ids
3542 .iter()
3543 .enumerate()
3544 .map(|(i, edge_id)| BlockGroupEdgeData {
3545 block_group_id: block_group1_id,
3546 edge_id: *(*edge_id),
3547 chromosome_index: if i < 2 { 1 } else { 0 },
3548 phased: 0,
3549 })
3550 .collect::<Vec<BlockGroupEdgeData>>();
3551 BlockGroupEdge::bulk_create(conn, &block_group_edges);
3552
3553 let insert2_path =
3554 insert_path.new_path_with(conn, 28, 32, &edge_into_insert2, &edge_out_of_insert2);
3555 assert_eq!(
3556 insert2_path.sequence(conn),
3557 "AAAAAAAAAATTTTTTAAAAAAAACCTTTTTTTTGGGGGG"
3558 );
3559
3560 let all_sequences = BlockGroup::get_all_sequences(conn, &block_group1_id, false);
3561 assert_eq!(
3562 all_sequences,
3563 HashSet::from_iter(vec![
3564 "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
3565 "AAAAAAAAAATTTTTTAAAAAAAACCCCCCGGGGGGGGGG".to_string(),
3566 "AAAAAAAAAATTTTTTTTTTCCCCCCTTTTTTTTGGGGGG".to_string(),
3567 "AAAAAAAAAATTTTTTAAAAAAAACCTTTTTTTTGGGGGG".to_string(),
3568 ])
3569 );
3570
3571 let mut blocks = intervaltree
3572 .query(Range { start: 15, end: 36 })
3573 .map(|x| x.value)
3574 .collect::<Vec<_>>();
3575 blocks.sort_by(|a, b| a.start.cmp(&b.start));
3576 let start_block = blocks[0];
3577 let start_node_coordinate = 15 - start_block.start + start_block.sequence_start;
3578 let end_block = blocks[blocks.len() - 1];
3579 let end_node_coordinate = 36 - end_block.start + end_block.sequence_start;
3580
3581 let block_group2 = create_bg(conn, "test", "test", "chr1.1");
3582 BlockGroup::derive_subgraph(
3583 conn,
3584 "test",
3585 "test",
3586 &block_group1_id,
3587 &start_block,
3588 &end_block,
3589 start_node_coordinate,
3590 end_node_coordinate,
3591 &block_group2.id,
3592 true,
3593 );
3594 let all_sequences2 = BlockGroup::get_all_sequences(conn, &block_group2.id, false);
3595 assert_eq!(
3596 all_sequences2,
3597 HashSet::from_iter(vec![
3598 "TTTTTCCCCCCCCCCGGGGGG".to_string(),
3599 "TAAAAAAAACCCCCCGGGGGG".to_string(),
3600 "TTTTTCCCCCCTTTTTTTTGG".to_string(),
3601 "TAAAAAAAACCTTTTTTTTGG".to_string(),
3602 ])
3603 );
3604 }
3605
3606 #[test]
3607 fn test_derive_subgraph_two_independent_insertions_and_one_deletion() {
3608 let conn = &get_connection(None).unwrap();
3617 Collection::create(conn, "test");
3618 let (block_group1_id, original_path) = setup_block_group(conn);
3619
3620 let intervaltree = original_path.intervaltree(conn);
3621 let insert_start_node_id = intervaltree.query_point(16).next().unwrap().value.node_id;
3622 let insert_end_node_id = intervaltree.query_point(24).next().unwrap().value.node_id;
3623
3624 let insert_sequence = Sequence::new()
3625 .sequence_type("DNA")
3626 .sequence("AAAAAAAA")
3627 .save(conn);
3628 let insert_node_id = Node::create(
3629 conn,
3630 &insert_sequence.hash,
3631 &HashId(calculate_hash(&format!(
3632 "test-insert-a-node.{}",
3633 insert_sequence.hash
3634 ))),
3635 );
3636 let edge_into_insert = Edge::create(
3637 conn,
3638 insert_start_node_id,
3639 6,
3640 Strand::Forward,
3641 insert_node_id,
3642 0,
3643 Strand::Forward,
3644 );
3645 let edge_out_of_insert = Edge::create(
3646 conn,
3647 insert_node_id,
3648 8,
3649 Strand::Forward,
3650 insert_end_node_id,
3651 4,
3652 Strand::Forward,
3653 );
3654 let ref_heal_1 = Edge::create(
3655 conn,
3656 insert_start_node_id,
3657 6,
3658 Strand::Forward,
3659 insert_start_node_id,
3660 6,
3661 Strand::Forward,
3662 );
3663 let ref_heal_2 = Edge::create(
3664 conn,
3665 insert_end_node_id,
3666 4,
3667 Strand::Forward,
3668 insert_end_node_id,
3669 4,
3670 Strand::Forward,
3671 );
3672
3673 let edge_ids = [
3674 &edge_into_insert.id,
3675 &edge_out_of_insert.id,
3676 &ref_heal_1.id,
3677 &ref_heal_2.id,
3678 ];
3679 let block_group_edges = edge_ids
3680 .iter()
3681 .map(|edge_id| BlockGroupEdgeData {
3682 block_group_id: block_group1_id,
3683 edge_id: *(*edge_id),
3684 chromosome_index: NO_CHROMOSOME_INDEX,
3685 phased: 0,
3686 })
3687 .collect::<Vec<BlockGroupEdgeData>>();
3688 BlockGroupEdge::bulk_create(conn, &block_group_edges);
3689
3690 let insert_path =
3691 original_path.new_path_with(conn, 16, 24, &edge_into_insert, &edge_out_of_insert);
3692 assert_eq!(
3693 insert_path.sequence(conn),
3694 "AAAAAAAAAATTTTTTAAAAAAAACCCCCCGGGGGGGGGG"
3695 );
3696
3697 let insert2_start_node_id = intervaltree.query_point(28).next().unwrap().value.node_id;
3698 let insert2_end_node_id = intervaltree.query_point(32).next().unwrap().value.node_id;
3699
3700 let insert2_sequence = Sequence::new()
3701 .sequence_type("DNA")
3702 .sequence("TTTTTTTT")
3703 .save(conn);
3704 let insert2_node_id = Node::create(
3705 conn,
3706 &insert2_sequence.hash,
3707 &HashId(calculate_hash(&format!(
3708 "test-insert-t-node.{}",
3709 insert2_sequence.hash
3710 ))),
3711 );
3712 let edge_into_insert2 = Edge::create(
3713 conn,
3714 insert2_start_node_id,
3715 6,
3716 Strand::Forward,
3717 insert2_node_id,
3718 0,
3719 Strand::Forward,
3720 );
3721 let edge_out_of_insert2 = Edge::create(
3722 conn,
3723 insert2_node_id,
3724 8,
3725 Strand::Forward,
3726 insert2_end_node_id,
3727 4,
3728 Strand::Forward,
3729 );
3730 let ref_heal_1 = Edge::create(
3731 conn,
3732 insert2_start_node_id,
3733 6,
3734 Strand::Forward,
3735 insert2_start_node_id,
3736 6,
3737 Strand::Forward,
3738 );
3739 let ref_heal_2 = Edge::create(
3740 conn,
3741 insert2_end_node_id,
3742 4,
3743 Strand::Forward,
3744 insert2_end_node_id,
3745 4,
3746 Strand::Forward,
3747 );
3748
3749 let edge_ids = [
3750 &edge_into_insert2.id,
3751 &edge_out_of_insert2.id,
3752 &ref_heal_1.id,
3753 &ref_heal_2.id,
3754 ];
3755 let block_group_edges = edge_ids
3756 .iter()
3757 .map(|edge_id| BlockGroupEdgeData {
3758 block_group_id: block_group1_id,
3759 edge_id: *(*edge_id),
3760 chromosome_index: NO_CHROMOSOME_INDEX,
3761 phased: 0,
3762 })
3763 .collect::<Vec<BlockGroupEdgeData>>();
3764 BlockGroupEdge::bulk_create(conn, &block_group_edges);
3765
3766 let insert2_path =
3767 insert_path.new_path_with(conn, 28, 32, &edge_into_insert2, &edge_out_of_insert2);
3768 assert_eq!(
3769 insert2_path.sequence(conn),
3770 "AAAAAAAAAATTTTTTAAAAAAAACCTTTTTTTTGGGGGG"
3771 );
3772
3773 let deletion_end_node_id = intervaltree.query_point(38).next().unwrap().value.node_id;
3774 let deletion_edge = Edge::create(
3775 conn,
3776 insert_node_id,
3777 8,
3778 Strand::Forward,
3779 deletion_end_node_id,
3780 8,
3781 Strand::Forward,
3782 );
3783 let ref_heal_1 = Edge::create(
3784 conn,
3785 insert_node_id,
3786 8,
3787 Strand::Forward,
3788 insert_node_id,
3789 8,
3790 Strand::Forward,
3791 );
3792 let ref_heal_2 = Edge::create(
3793 conn,
3794 deletion_end_node_id,
3795 8,
3796 Strand::Forward,
3797 deletion_end_node_id,
3798 8,
3799 Strand::Forward,
3800 );
3801 let block_group_edges = [
3802 BlockGroupEdgeData {
3803 block_group_id: block_group1_id,
3804 edge_id: deletion_edge.id,
3805 chromosome_index: NO_CHROMOSOME_INDEX,
3806 phased: 0,
3807 },
3808 BlockGroupEdgeData {
3809 block_group_id: block_group1_id,
3810 edge_id: ref_heal_1.id,
3811 chromosome_index: NO_CHROMOSOME_INDEX,
3812 phased: 0,
3813 },
3814 BlockGroupEdgeData {
3815 block_group_id: block_group1_id,
3816 edge_id: ref_heal_2.id,
3817 chromosome_index: NO_CHROMOSOME_INDEX,
3818 phased: 0,
3819 },
3820 ];
3821 BlockGroupEdge::bulk_create(conn, &block_group_edges);
3822
3823 let all_sequences = BlockGroup::get_all_sequences(conn, &block_group1_id, false);
3824 assert_eq!(
3825 all_sequences,
3826 HashSet::from_iter(vec![
3827 "AAAAAAAAAATTTTTTTTTTCCCCCCCCCCGGGGGGGGGG".to_string(),
3828 "AAAAAAAAAATTTTTTAAAAAAAACCCCCCGGGGGGGGGG".to_string(),
3829 "AAAAAAAAAATTTTTTTTTTCCCCCCTTTTTTTTGGGGGG".to_string(),
3830 "AAAAAAAAAATTTTTTAAAAAAAACCTTTTTTTTGGGGGG".to_string(),
3831 "AAAAAAAAAATTTTTTAAAAAAAAGG".to_string(), ])
3833 );
3834
3835 let mut blocks = intervaltree
3836 .query(Range { start: 15, end: 36 })
3837 .map(|x| x.value)
3838 .collect::<Vec<_>>();
3839 blocks.sort_by(|a, b| a.start.cmp(&b.start));
3840 let start_block = blocks[0];
3841 let start_node_coordinate = 15 - start_block.start + start_block.sequence_start;
3842 let end_block = blocks[blocks.len() - 1];
3843 let end_node_coordinate = 36 - end_block.start + end_block.sequence_start;
3844
3845 let block_group2 = create_bg(conn, "test", "test", "chr1.1");
3846 BlockGroup::derive_subgraph(
3847 conn,
3848 "test",
3849 "test",
3850 &block_group1_id,
3851 &start_block,
3852 &end_block,
3853 start_node_coordinate,
3854 end_node_coordinate,
3855 &block_group2.id,
3856 true,
3857 );
3858 let all_sequences2 = BlockGroup::get_all_sequences(conn, &block_group2.id, false);
3859 assert_eq!(
3860 all_sequences2,
3861 HashSet::from_iter(vec![
3864 "TTTTTCCCCCCCCCCGGGGGG".to_string(),
3865 "TAAAAAAAACCCCCCGGGGGG".to_string(),
3866 "TTTTTCCCCCCTTTTTTTTGG".to_string(),
3867 "TAAAAAAAACCTTTTTTTTGG".to_string(),
3868 ])
3869 );
3870 }
3871 }
3872}