1use core::ops::Range as RustRange;
2use std::collections::{HashMap, HashSet};
3
4use gen_core::{
5 HashId, NodeIntervalBlock, PATH_END_NODE_ID, PATH_START_NODE_ID, PathBlock, Strand,
6 calculate_hash, is_end_node, is_start_node,
7 range::{Range, RangeMapping},
8 traits::Capnp,
9};
10use intervaltree::IntervalTree;
11use itertools::Itertools;
12use rusqlite::{Row, params, types::Value as SQLValue};
13use serde::{Deserialize, Serialize};
14
15use crate::{
16 block_group_edge::BlockGroupEdge, db::GraphConnection, edge::Edge, errors::QueryError,
17 gen_models_capnp::path as PathCapnp, node::Node, path_edge::PathEdge, sequence::Sequence,
18 traits::*,
19};
20
21#[derive(Clone, Debug, Eq, Hash, PartialEq, Deserialize, Serialize)]
22pub struct Path {
23 pub id: HashId,
24 pub block_group_id: HashId,
25 pub name: String,
26 pub created_on: i64,
27}
28
29impl<'a> Capnp<'a> for Path {
30 type Builder = PathCapnp::Builder<'a>;
31 type Reader = PathCapnp::Reader<'a>;
32
33 fn write_capnp(&self, builder: &mut Self::Builder) {
34 builder.set_id(&self.id.0).unwrap();
35 builder.set_block_group_id(&self.block_group_id.0).unwrap();
36 builder.set_name(&self.name);
37 builder.set_created_on(self.created_on);
38 }
39
40 fn read_capnp(reader: Self::Reader) -> Self {
41 let id = reader
42 .get_id()
43 .unwrap()
44 .as_slice()
45 .unwrap()
46 .try_into()
47 .unwrap();
48 let block_group_id = reader
49 .get_block_group_id()
50 .unwrap()
51 .as_slice()
52 .unwrap()
53 .try_into()
54 .unwrap();
55 let name = reader.get_name().unwrap().to_string().unwrap();
56 let created_on = reader.get_created_on();
57
58 Path {
59 id,
60 block_group_id,
61 name,
62 created_on,
63 }
64 }
65}
66
67#[derive(Clone, Debug, Eq, Hash, PartialEq)]
68pub struct PathData {
69 pub name: String,
70 pub block_group_id: HashId,
71}
72
73pub fn revcomp(seq: &str) -> String {
76 String::from_utf8(
77 seq.chars()
78 .rev()
79 .map(|c| -> u8 {
80 let is_upper = c.is_ascii_uppercase();
81 let rc = c as u8;
82 let v = if rc == 78 {
83 rc
85 } else if rc == 110 {
86 rc
88 } else if rc & 2 != 0 {
89 rc ^ 4
91 } else {
92 rc ^ 21
94 };
95 if is_upper { v } else { v.to_ascii_lowercase() }
96 })
97 .collect(),
98 )
99 .unwrap()
100}
101
102#[derive(Clone, Debug)]
103pub struct Annotation {
104 pub name: String,
105 pub start: i64,
106 pub end: i64,
107}
108
109impl Path {
110 pub fn validate_edges(conn: &GraphConnection, edge_ids: &[HashId], block_group_id: &HashId) {
111 let edge_id_set = edge_ids.iter().copied().collect::<HashSet<HashId>>();
112
113 if edge_id_set.len() != edge_ids.len() {
115 println!("Duplicate edge IDs detected in path creation");
116 }
117
118 let augmented_edges = BlockGroupEdge::edges_for_block_group(conn, block_group_id);
120 let bg_edge_ids = augmented_edges
121 .iter()
122 .map(|augmented_edge| augmented_edge.edge.id)
123 .collect::<HashSet<_>>();
124
125 assert!(
126 edge_id_set.is_subset(&bg_edge_ids),
127 "Not all edges are in the block group ({block_group_id})"
128 );
129
130 let edges_by_id = augmented_edges
131 .iter()
132 .map(|augmented_edge| (&augmented_edge.edge.id, augmented_edge.edge.clone()))
133 .collect::<HashMap<_, _>>();
134
135 for (edge1_id, edge2_id) in edge_ids.iter().tuple_windows() {
138 let edge1 = edges_by_id.get(edge1_id).unwrap();
139 let edge2 = edges_by_id.get(edge2_id).unwrap();
140 assert!(
141 edge1.target_node_id == edge2.source_node_id,
142 "Edges {} and {} don't share the same node ({} vs. {})",
143 edge1.id,
144 edge2.id,
145 edge1.target_node_id,
146 edge2.source_node_id
147 );
148
149 assert!(
150 edge1.target_coordinate < edge2.source_coordinate,
151 "Source coordinate {} for edge {} is before target coordinate {} for edge {}",
152 edge2.source_coordinate,
153 edge2.id,
154 edge1.target_coordinate,
155 edge1.id
156 );
157
158 assert!(
159 edge1.target_strand == edge2.source_strand,
160 "Strand mismatch between consecutive edges {} and {}",
161 edge1.id,
162 edge2.id,
163 );
164 }
165 }
166
167 pub fn create(
168 conn: &GraphConnection,
169 name: &str,
170 block_group_id: &HashId,
171 edge_ids: &[HashId],
172 ) -> Path {
173 Path::validate_edges(conn, edge_ids, block_group_id);
174 let hash = HashId(calculate_hash(&format!("{block_group_id}:{name}")));
175 let timestamp = chrono::Utc::now().timestamp_nanos_opt().unwrap();
176 let query =
180 "INSERT INTO paths (id, name, block_group_id, created_on) VALUES (?1, ?2, ?3, ?4);";
181 let mut stmt = conn.prepare(query).unwrap();
182
183 let path = match stmt.execute(params![hash, name, block_group_id, timestamp]) {
184 Ok(_) => Path {
185 id: hash,
186 name: name.to_string(),
187 block_group_id: *block_group_id,
188 created_on: timestamp,
189 },
190 Err(rusqlite::Error::SqliteFailure(err, _details)) => {
191 if err.code == rusqlite::ErrorCode::ConstraintViolation {
192 Path {
193 id: hash,
194 name: name.to_string(),
195 block_group_id: *block_group_id,
196 created_on: timestamp,
197 }
198 } else {
199 panic!("something bad happened querying the database")
200 }
201 }
202 Err(_) => {
203 panic!("something bad happened querying the database")
204 }
205 };
206
207 PathEdge::bulk_create(conn, &path.id, edge_ids);
208
209 path
210 }
211
212 pub fn delete(conn: &GraphConnection, name: &str, block_group_id: &HashId) {
213 let path = Path::get(
214 conn,
215 "select * from paths where name = ?1 and block_group_id = ?2",
216 params![name, block_group_id],
217 )
218 .unwrap();
219 PathEdge::delete(conn, &path.id);
220
221 let query = "DELETE FROM paths where name = ?1 AND block_group_id = ?2;";
222 conn.execute(query, params![name.to_string(), block_group_id])
223 .unwrap();
224 }
225
226 pub fn get_by_id(conn: &GraphConnection, path_id: &HashId) -> Path {
227 Path::get(conn, "select * from paths where id = ?1;", params![path_id]).unwrap()
228 }
229
230 pub fn query_for_collection(conn: &GraphConnection, collection_name: &str) -> Vec<Path> {
231 let query = "SELECT * FROM paths JOIN block_groups ON paths.block_group_id = block_groups.id WHERE block_groups.collection_name = ?1";
232 Path::query(conn, query, params![collection_name])
233 }
234
235 pub fn query_for_collection_and_sample(
236 conn: &GraphConnection,
237 collection_name: &str,
238 sample_name: &str,
239 ) -> Vec<Path> {
240 let query = "SELECT * FROM paths JOIN block_groups ON paths.block_group_id = block_groups.id WHERE block_groups.collection_name = ?1 AND block_groups.sample_name = ?2";
241 Path::query(conn, query, params![collection_name, sample_name])
242 }
243
244 pub fn sequence(&self, conn: &GraphConnection) -> String {
245 let blocks = self.blocks(conn);
246 blocks
247 .into_iter()
248 .map(|block| block.block_sequence)
249 .collect::<Vec<_>>()
250 .join("")
251 }
252
253 pub fn length(&self, conn: &GraphConnection) -> i64 {
254 let blocks = self.blocks(conn);
255 let end_block = blocks.last().unwrap();
256 end_block.path_start
258 }
259
260 pub fn edge_pairs_to_block(
261 &self,
262 into: Edge,
263 out_of: Edge,
264 sequences_by_node_id: &HashMap<HashId, Sequence>,
265 current_path_length: i64,
266 ) -> PathBlock {
267 let sequence = sequences_by_node_id.get(&into.target_node_id).unwrap();
268 let start = into.target_coordinate;
269 let end = out_of.source_coordinate;
270
271 let strand = into.target_strand;
272 let block_sequence_length = end - start;
273
274 let block_sequence = if strand == Strand::Reverse {
275 revcomp(&sequence.get_sequence(start, end))
276 } else {
277 sequence.get_sequence(start, end)
278 };
279
280 PathBlock {
281 node_id: into.target_node_id,
282 block_sequence,
283 sequence_start: start,
284 sequence_end: end,
285 path_start: current_path_length,
286 path_end: current_path_length + block_sequence_length,
287 strand,
288 }
289 }
290
291 pub fn blocks(&self, conn: &GraphConnection) -> Vec<PathBlock> {
292 let edges = PathEdge::edges_for_path(conn, &self.id);
293
294 let mut sequence_node_ids = HashSet::new();
295 for edge in &edges {
296 if !is_start_node(edge.source_node_id) {
297 sequence_node_ids.insert(edge.source_node_id);
298 }
299 if !is_end_node(edge.target_node_id) {
300 sequence_node_ids.insert(edge.target_node_id);
301 }
302 }
303 let sequences_by_node_id = Node::get_sequences_by_node_ids(
304 conn,
305 &sequence_node_ids.into_iter().collect::<Vec<_>>(),
306 );
307
308 let mut blocks = vec![];
309 let mut path_length = 0;
310
311 blocks.push(PathBlock {
315 node_id: PATH_START_NODE_ID,
316 block_sequence: "".to_string(),
317 sequence_start: 0,
318 sequence_end: 0,
319 path_start: i64::MIN + 1,
320 path_end: 0,
321 strand: Strand::Forward,
322 });
323
324 for (into, out_of) in edges.into_iter().tuple_windows() {
325 let block = self.edge_pairs_to_block(into, out_of, &sequences_by_node_id, path_length);
326 path_length += block.block_sequence.len() as i64;
327 blocks.push(block);
328 }
329
330 blocks.push(PathBlock {
334 node_id: PATH_END_NODE_ID,
335 block_sequence: "".to_string(),
336 sequence_start: 0,
337 sequence_end: 0,
338 path_start: path_length,
339 path_end: i64::MAX - 1,
340 strand: Strand::Forward,
341 });
342
343 blocks
344 }
345
346 pub fn intervaltree(&self, conn: &GraphConnection) -> IntervalTree<i64, NodeIntervalBlock> {
347 let blocks = self.blocks(conn);
348 let tree: IntervalTree<i64, NodeIntervalBlock> = blocks
349 .into_iter()
350 .map(|block| {
351 (
352 block.path_start..block.path_end,
353 NodeIntervalBlock {
354 node_id: block.node_id,
355 start: block.path_start,
356 end: block.path_end,
357 sequence_start: block.sequence_start,
358 sequence_end: block.sequence_end,
359 strand: block.strand,
360 },
361 )
362 })
363 .collect();
364 tree
365 }
366
367 pub fn find_block_mappings(
368 &self,
369 conn: &GraphConnection,
370 other_path: &Path,
371 ) -> Vec<RangeMapping> {
372 let our_blocks = self.blocks(conn);
375 let their_blocks = other_path.blocks(conn);
376
377 let our_node_ids = our_blocks
378 .iter()
379 .map(|block| block.node_id)
380 .collect::<HashSet<_>>();
381 let their_node_ids = their_blocks
382 .iter()
383 .map(|block| block.node_id)
384 .collect::<HashSet<_>>();
385 let common_node_ids = our_node_ids
386 .intersection(&their_node_ids)
387 .copied()
388 .collect::<HashSet<_>>();
389
390 let mut our_blocks_by_node_id = HashMap::new();
391 for block in our_blocks
392 .iter()
393 .filter(|block| common_node_ids.contains(&block.node_id))
394 {
395 our_blocks_by_node_id
396 .entry(block.node_id)
397 .or_insert(vec![])
398 .push(block);
399 }
400
401 let mut their_blocks_by_node_id = HashMap::new();
402 for block in their_blocks
403 .iter()
404 .filter(|block| common_node_ids.contains(&block.node_id))
405 {
406 their_blocks_by_node_id
407 .entry(block.node_id)
408 .or_insert(vec![])
409 .push(block);
410 }
411
412 let mut mappings = vec![];
413 for node_id in common_node_ids {
414 let our_blocks = our_blocks_by_node_id.get(&node_id).unwrap();
415 let our_sorted_blocks = our_blocks
416 .clone()
417 .into_iter()
418 .sorted_by(|a, b| a.sequence_start.cmp(&b.sequence_start))
419 .collect::<Vec<&PathBlock>>();
420 let their_blocks = their_blocks_by_node_id.get(&node_id).unwrap();
421 let their_sorted_blocks = their_blocks
422 .clone()
423 .into_iter()
424 .sorted_by(|a, b| a.sequence_start.cmp(&b.sequence_start))
425 .collect::<Vec<&PathBlock>>();
426
427 for our_block in our_sorted_blocks {
428 let mut their_block_index = 0;
429
430 while their_block_index < their_sorted_blocks.len() {
431 let their_block = their_sorted_blocks[their_block_index];
432 if their_block.sequence_end <= our_block.sequence_start {
433 their_block_index += 1;
435 } else {
436 let our_range = Range {
437 start: our_block.sequence_start,
438 end: our_block.sequence_end,
439 };
440 let their_range = Range {
441 start: their_block.sequence_start,
442 end: their_block.sequence_end,
443 };
444
445 let common_ranges = our_range.overlap(&their_range);
446 if !common_ranges.is_empty() {
447 if common_ranges.len() > 1 {
448 panic!(
449 "Found more than one common range for blocks with node {node_id}"
450 );
451 }
452
453 let common_range = &common_ranges[0];
454 let our_start = our_block.path_start
455 + (common_range.start - our_block.sequence_start);
456 let our_end = our_block.path_start
457 + (common_range.end - our_block.sequence_start);
458 let their_start = their_block.path_start
459 + (common_range.start - their_block.sequence_start);
460 let their_end = their_block.path_start
461 + (common_range.end - their_block.sequence_start);
462
463 let mapping = RangeMapping {
464 source_range: Range {
465 start: our_start,
466 end: our_end,
467 },
468 target_range: Range {
469 start: their_start,
470 end: their_end,
471 },
472 };
473 mappings.push(mapping);
474 }
475
476 if their_block.sequence_end < our_block.sequence_end {
477 their_block_index += 1;
479 } else {
480 break;
481 }
482 }
483 }
484 }
485 }
486
487 mappings
488 .into_iter()
489 .sorted_by(|a, b| a.source_range.start.cmp(&b.source_range.start))
490 .collect::<Vec<RangeMapping>>()
491 }
492
493 pub fn propagate_annotation(
494 annotation: Annotation,
495 mapping_tree: &IntervalTree<i64, RangeMapping>,
496 sequence_length: i64,
497 ) -> Option<Annotation> {
498 let start = annotation.start;
516 let end = annotation.end;
517 let mappings: Vec<RangeMapping> = mapping_tree
518 .query(RustRange { start, end })
519 .map(|x| x.value.clone())
520 .collect();
521 if mappings.is_empty() {
522 return None;
523 }
524
525 let sorted_mappings: Vec<RangeMapping> = mappings
526 .into_iter()
527 .sorted_by(|a, b| a.source_range.start.cmp(&b.source_range.start))
528 .collect();
529 let first_mapping = sorted_mappings.first().unwrap();
530 let last_mapping = sorted_mappings.last().unwrap();
531 let translated_start = if first_mapping.source_range.contains(start) {
532 first_mapping.source_range.translate_index(
533 start,
534 &first_mapping.target_range,
535 sequence_length,
536 false,
537 )
538 } else {
539 Ok(first_mapping.target_range.start)
540 };
541
542 let translated_end = if last_mapping.source_range.contains(end) {
543 last_mapping.source_range.translate_index(
544 end,
545 &last_mapping.target_range,
546 sequence_length,
547 false,
548 )
549 } else {
550 Ok(last_mapping.target_range.end)
551 };
552
553 if translated_start.is_err() || translated_end.is_err() {
554 return None;
555 }
556
557 Some(Annotation {
558 name: annotation.name,
559 start: translated_start.expect("Failed to translate start"),
560 end: translated_end.expect("Failed to translate end"),
561 })
562 }
563
564 pub fn get_mapping_tree(
565 &self,
566 conn: &GraphConnection,
567 path: &Path,
568 ) -> IntervalTree<i64, RangeMapping> {
569 let mappings = self.find_block_mappings(conn, path);
570 mappings
571 .into_iter()
572 .map(|mapping| {
573 (
574 mapping.source_range.start..mapping.source_range.end,
575 mapping,
576 )
577 })
578 .collect()
579 }
580
581 pub fn propagate_annotations(
582 &self,
583 conn: &GraphConnection,
584 path: &Path,
585 annotations: Vec<Annotation>,
586 ) -> Vec<Annotation> {
587 let mapping_tree = self.get_mapping_tree(conn, path);
588 let sequence_length = path.sequence(conn).len();
589 annotations
590 .into_iter()
591 .filter_map(|annotation| {
592 Path::propagate_annotation(annotation, &mapping_tree, sequence_length as i64)
593 })
594 .clone()
595 .collect()
596 }
597
598 pub fn new_path_with(
599 &self,
600 conn: &GraphConnection,
601 path_start: i64,
602 path_end: i64,
603 edge_to_new_node: &Edge,
604 edge_from_new_node: &Edge,
605 ) -> Path {
606 let tree = self.intervaltree(conn);
609 let block_with_start = tree.query_point(path_start).next().unwrap().value;
610 let block_with_end = tree.query_point(path_end).next().unwrap().value;
611
612 let edges = PathEdge::edges_for_path(conn, &self.id);
613 let edges_by_source = edges
614 .iter()
615 .map(|edge| ((edge.source_node_id, edge.source_coordinate), edge))
616 .collect::<HashMap<(_, i64), &Edge>>();
617 let edges_by_target = edges
618 .iter()
619 .map(|edge| ((edge.target_node_id, edge.target_coordinate), edge))
620 .collect::<HashMap<(_, i64), &Edge>>();
621
622 let edge_before_new_node = if edge_to_new_node.source_node_id == PATH_START_NODE_ID {
623 None
624 } else {
625 let edge = edges_by_target
626 .get(&(block_with_start.node_id, block_with_start.sequence_start))
627 .unwrap();
628 Some(edge)
629 };
630 let edge_after_new_node = if edge_from_new_node.target_node_id == PATH_END_NODE_ID {
631 None
632 } else {
633 let edge = edges_by_source
634 .get(&(block_with_end.node_id, block_with_end.sequence_end))
635 .unwrap();
636 Some(edge)
637 };
638
639 let mut new_edge_ids = vec![];
640 if let Some(edge_before_new_node) = edge_before_new_node {
641 for edge in &edges {
642 new_edge_ids.push(edge.id);
643 if edge.id == edge_before_new_node.id {
644 break;
645 }
646 }
647 }
648
649 new_edge_ids.push(edge_to_new_node.id);
650 new_edge_ids.push(edge_from_new_node.id);
651
652 if let Some(edge_after_new_node) = edge_after_new_node {
653 let mut after_new_node = false;
654 for edge in &edges {
655 if edge.id == edge_after_new_node.id {
656 after_new_node = true;
657 }
658 if after_new_node {
659 new_edge_ids.push(edge.id);
660 }
661 }
662 }
663
664 let new_name = format!(
665 "{}-start-{}-end-{}-node-{}",
666 self.name, path_start, path_end, edge_to_new_node.target_node_id
667 );
668 Path::create(conn, &new_name, &self.block_group_id, &new_edge_ids)
669 }
670
671 pub fn new_path_with_deletion(
672 &self,
673 conn: &GraphConnection,
674 deletion_start: i64,
675 deletion_end: i64,
676 ) -> Result<Path, QueryError> {
677 let tree = self.intervaltree(conn);
680 let block_with_start = tree.query_point(deletion_start).next().unwrap().value;
681 let block_with_end = tree.query_point(deletion_end).next().unwrap().value;
682
683 let node_deletion_start = deletion_start - block_with_start.start;
684 let node_deletion_end = deletion_end - block_with_end.start;
685 let deletion_edge_result = Edge::query(
686 conn,
687 "SELECT * FROM edges WHERE source_node_id = ?1 AND source_coordinate = ?2 AND target_node_id = ?3 AND target_coordinate = ?4",
688 rusqlite::params!(
689 SQLValue::from(block_with_start.node_id),
690 SQLValue::from(node_deletion_start),
691 SQLValue::from(block_with_end.node_id),
692 SQLValue::from(node_deletion_end)
693 ),
694 );
695
696 if deletion_edge_result.is_empty() {
697 let error_string = format!(
698 "No edge found from node {}:{node_deletion_start} to node {}:{node_deletion_end}",
699 block_with_start.node_id, block_with_end.node_id
700 );
701 return Err(QueryError::ResultsNotFound(error_string));
702 }
703
704 let deletion_edge = deletion_edge_result[0].clone();
705
706 let edges = PathEdge::edges_for_path(conn, &self.id);
707 let edges_by_source = edges
708 .iter()
709 .map(|edge| ((edge.source_node_id, edge.source_coordinate), edge))
710 .collect::<HashMap<(_, i64), &Edge>>();
711 let edges_by_target = edges
712 .iter()
713 .map(|edge| ((edge.target_node_id, edge.target_coordinate), edge))
714 .collect::<HashMap<(_, i64), &Edge>>();
715 let edge_before_deletion = edges_by_target
716 .get(&(block_with_start.node_id, block_with_start.sequence_start))
717 .unwrap();
718 let edge_after_deletion = edges_by_source
719 .get(&(block_with_end.node_id, block_with_end.sequence_end))
720 .unwrap();
721
722 let mut new_edge_ids = vec![];
723 let mut before_deletion = true;
724 let mut after_deletion = false;
725 for edge in &edges {
726 if before_deletion {
727 new_edge_ids.push(edge.id);
728 if edge.id == edge_before_deletion.id {
729 before_deletion = false;
730 new_edge_ids.push(deletion_edge.id);
731 }
732 } else if after_deletion {
733 new_edge_ids.push(edge.id);
734 } else if edge.id == edge_after_deletion.id {
735 after_deletion = true;
736 new_edge_ids.push(edge.id);
737 }
738 }
739
740 let new_name = format!(
741 "{}-start-{}-end-{}-node-{}",
742 self.name,
743 deletion_start,
744 deletion_end,
745 HashId::convert_str("")
746 );
747
748 Ok(Path::create(
749 conn,
750 &new_name,
751 &self.block_group_id,
752 &new_edge_ids,
753 ))
754 }
755
756 fn node_blocks_for_range(
757 &self,
758 intervaltree: &IntervalTree<i64, NodeIntervalBlock>,
759 start: i64,
760 end: i64,
761 ) -> Vec<NodeIntervalBlock> {
762 let node_blocks: Vec<NodeIntervalBlock> = intervaltree
764 .query(RustRange { start, end })
765 .map(|x| x.value)
766 .sorted_by(|a, b| a.start.cmp(&b.start))
767 .collect();
768
769 if node_blocks.is_empty() {
770 return vec![];
771 }
772
773 let mut result_node_blocks = vec![];
774 let start_offset = if node_blocks[0].start < start {
775 start - node_blocks[0].start
776 } else {
777 0
778 };
779
780 let mut consolidated_block = NodeIntervalBlock {
781 node_id: node_blocks[0].node_id,
782 start: node_blocks[0].start + start_offset,
783 end: node_blocks[0].end,
784 sequence_start: node_blocks[0].sequence_start + start_offset,
785 sequence_end: node_blocks[0].sequence_end,
786 strand: node_blocks[0].strand,
787 };
788
789 for block in &node_blocks[1..] {
790 if consolidated_block.node_id == block.node_id && consolidated_block.end == block.start
791 {
792 consolidated_block = NodeIntervalBlock {
795 node_id: consolidated_block.node_id,
796 start: consolidated_block.start,
797 end: block.end,
798 sequence_start: consolidated_block.sequence_start,
799 sequence_end: block.sequence_end,
800 strand: consolidated_block.strand,
801 };
802 } else {
803 result_node_blocks.push(consolidated_block);
804 consolidated_block = *block;
805 }
806 }
807
808 let end_offset = if consolidated_block.end > end {
809 consolidated_block.end - end
810 } else {
811 0
812 };
813
814 result_node_blocks.push(NodeIntervalBlock {
815 node_id: consolidated_block.node_id,
816 start: consolidated_block.start,
817 end: consolidated_block.end - end_offset,
818 sequence_start: consolidated_block.sequence_start,
819 sequence_end: consolidated_block.sequence_end - end_offset,
820 strand: consolidated_block.strand,
821 });
822
823 result_node_blocks
824 }
825
826 pub fn node_block_partition(
827 &self,
828 conn: &GraphConnection,
829 ranges: Vec<Range>,
830 ) -> Vec<NodeIntervalBlock> {
831 let intervaltree = self.intervaltree(conn);
832 let mut partitioned_nodes = vec![];
833 for range in ranges {
834 let node_blocks = self.node_blocks_for_range(&intervaltree, range.start, range.end);
835 for node_block in &node_blocks {
836 partitioned_nodes.push(*node_block);
837 }
838 }
839 partitioned_nodes
840 }
841}
842
843impl Query for Path {
844 type Model = Path;
845
846 const TABLE_NAME: &'static str = "paths";
847
848 fn process_row(row: &Row) -> Self::Model {
849 Path {
850 id: row.get(0).unwrap(),
851 block_group_id: row.get(1).unwrap(),
852 name: row.get(2).unwrap(),
853 created_on: row.get(3).unwrap(),
854 }
855 }
856}
857
858#[cfg(test)]
859mod tests {
860 use capnp::message::TypedBuilder;
862 use chrono::Utc;
863
864 use super::*;
865 use crate::{
866 block_group::{BlockGroup, NewBlockGroup},
867 block_group_edge::BlockGroupEdgeData,
868 collection::Collection,
869 test_helpers::get_connection,
870 };
871
872 fn create_test_block_group(conn: &GraphConnection) -> BlockGroup {
873 BlockGroup::create(
874 conn,
875 NewBlockGroup {
876 collection_name: "test collection",
877 sample_name: "test-sample",
878 name: "test block group",
879 ..Default::default()
880 },
881 )
882 }
883
884 #[test]
885 fn test_path_capnp_serialization() {
886 let path = Path {
887 id: HashId::pad_str(100),
888 block_group_id: HashId::pad_str(50),
889 name: "test_path".to_string(),
890 created_on: Utc::now().timestamp_nanos_opt().unwrap(),
891 };
892
893 let mut message = TypedBuilder::<PathCapnp::Owned>::new_default();
894 let mut root = message.init_root();
895 path.write_capnp(&mut root);
896
897 let deserialized = Path::read_capnp(root.into_reader());
898 assert_eq!(path, deserialized);
899 }
900
901 #[test]
902 fn test_path_delete() {
903 let conn = &get_connection(None).unwrap();
904 Collection::create(conn, "test collection");
905 let block_group = create_test_block_group(conn);
906
907 let sequence1 = Sequence::new()
909 .sequence_type("DNA")
910 .sequence("ATCGATCG")
911 .save(conn);
912 let node1_id = Node::create(conn, &sequence1.hash, &HashId::convert_str("1"));
913 let edge1 = Edge::create(
914 conn,
915 PATH_START_NODE_ID,
916 -1,
917 Strand::Forward,
918 node1_id,
919 0,
920 Strand::Forward,
921 );
922 let edge2 = Edge::create(
923 conn,
924 node1_id,
925 8,
926 Strand::Forward,
927 PATH_END_NODE_ID,
928 -1,
929 Strand::Forward,
930 );
931
932 let edge_ids1 = vec![edge1.id, edge2.id];
933 let block_group_edges1 = edge_ids1
934 .iter()
935 .map(|edge_id| BlockGroupEdgeData {
936 block_group_id: block_group.id,
937 edge_id: (*edge_id),
938 chromosome_index: 0,
939 phased: 0,
940 })
941 .collect::<Vec<BlockGroupEdgeData>>();
942 BlockGroupEdge::bulk_create(conn, &block_group_edges1);
943
944 let _ = Path::create(conn, "chr1", &block_group.id, &edge_ids1);
945
946 let sequence2 = Sequence::new()
948 .sequence_type("DNA")
949 .sequence("TTTTTTTT")
950 .save(conn);
951 let node2_id = Node::create(conn, &sequence2.hash, &HashId::convert_str("2"));
952 let edge3 = Edge::create(
953 conn,
954 PATH_START_NODE_ID,
955 -1,
956 Strand::Forward,
957 node2_id,
958 0,
959 Strand::Forward,
960 );
961 let edge4 = Edge::create(
962 conn,
963 node2_id,
964 8,
965 Strand::Forward,
966 PATH_END_NODE_ID,
967 -1,
968 Strand::Forward,
969 );
970
971 let edge_ids2 = vec![edge3.id, edge4.id];
972 let block_group_edges2 = edge_ids2
973 .iter()
974 .map(|edge_id| BlockGroupEdgeData {
975 block_group_id: block_group.id,
976 edge_id: (*edge_id),
977 chromosome_index: 0,
978 phased: 0,
979 })
980 .collect::<Vec<BlockGroupEdgeData>>();
981 BlockGroupEdge::bulk_create(conn, &block_group_edges2);
982
983 let path2 = Path::create(conn, "chr2", &block_group.id, &edge_ids2);
984
985 let paths_before = Path::query_for_collection(conn, "test collection");
986 assert_eq!(paths_before.len(), 2);
987
988 Path::delete(conn, "chr1", &block_group.id);
989
990 let paths_after = Path::query_for_collection(conn, "test collection");
991 assert_eq!(paths_after.len(), 1);
992 assert_eq!(paths_after[0], path2);
993 }
994
995 #[test]
996 fn test_gets_sequence() {
997 let conn = &get_connection(None).unwrap();
998 Collection::create(conn, "test collection");
999 let block_group = create_test_block_group(conn);
1000 let sequence1 = Sequence::new()
1001 .sequence_type("DNA")
1002 .sequence("ATCGATCG")
1003 .save(conn);
1004 let node1_id = Node::create(conn, &sequence1.hash, &HashId::convert_str("1"));
1005 let edge1 = Edge::create(
1006 conn,
1007 PATH_START_NODE_ID,
1008 -123,
1009 Strand::Forward,
1010 node1_id,
1011 0,
1012 Strand::Forward,
1013 );
1014 let sequence2 = Sequence::new()
1015 .sequence_type("DNA")
1016 .sequence("AAAAAAAA")
1017 .save(conn);
1018 let node2_id = Node::create(conn, &sequence2.hash, &HashId::convert_str("2"));
1019 let edge2 = Edge::create(
1020 conn,
1021 node1_id,
1022 8,
1023 Strand::Forward,
1024 node2_id,
1025 1,
1026 Strand::Forward,
1027 );
1028 let sequence3 = Sequence::new()
1029 .sequence_type("DNA")
1030 .sequence("CCCCCCCC")
1031 .save(conn);
1032 let node3_id = Node::create(conn, &sequence3.hash, &HashId::convert_str("3"));
1033 let edge3 = Edge::create(
1034 conn,
1035 node2_id,
1036 8,
1037 Strand::Forward,
1038 node3_id,
1039 1,
1040 Strand::Forward,
1041 );
1042 let sequence4 = Sequence::new()
1043 .sequence_type("DNA")
1044 .sequence("GGGGGGGG")
1045 .save(conn);
1046 let node4_id = Node::create(conn, &sequence4.hash, &HashId::convert_str("4"));
1047 let edge4 = Edge::create(
1048 conn,
1049 node3_id,
1050 8,
1051 Strand::Forward,
1052 node4_id,
1053 1,
1054 Strand::Forward,
1055 );
1056 let edge5 = Edge::create(
1057 conn,
1058 node4_id,
1059 8,
1060 Strand::Forward,
1061 PATH_END_NODE_ID,
1062 -1,
1063 Strand::Forward,
1064 );
1065
1066 let edge_ids = vec![edge1.id, edge2.id, edge3.id, edge4.id, edge5.id];
1067 let block_group_edges = edge_ids
1068 .iter()
1069 .map(|edge_id| BlockGroupEdgeData {
1070 block_group_id: block_group.id,
1071 edge_id: *edge_id,
1072 chromosome_index: 0,
1073 phased: 0,
1074 })
1075 .collect::<Vec<BlockGroupEdgeData>>();
1076 BlockGroupEdge::bulk_create(conn, &block_group_edges);
1077
1078 let path = Path::create(conn, "chr1", &block_group.id, &edge_ids);
1079 assert_eq!(path.sequence(conn), "ATCGATCGAAAAAAACCCCCCCGGGGGGG");
1080 }
1081
1082 #[test]
1083 fn test_gets_sequence_with_rc() {
1084 let conn = &get_connection(None).unwrap();
1085 Collection::create(conn, "test collection");
1086 let block_group = create_test_block_group(conn);
1087 let sequence1 = Sequence::new()
1088 .sequence_type("DNA")
1089 .sequence("ATCGATCG")
1090 .save(conn);
1091 let node1_id = Node::create(conn, &sequence1.hash, &HashId::convert_str("1"));
1092 let edge5 = Edge::create(
1093 conn,
1094 node1_id,
1095 8,
1096 Strand::Reverse,
1097 PATH_END_NODE_ID,
1098 0,
1099 Strand::Reverse,
1100 );
1101 let sequence2 = Sequence::new()
1102 .sequence_type("DNA")
1103 .sequence("AAAAAAAA")
1104 .save(conn);
1105 let node2_id = Node::create(conn, &sequence2.hash, &HashId::convert_str("2"));
1106 let edge4 = Edge::create(
1107 conn,
1108 node2_id,
1109 7,
1110 Strand::Reverse,
1111 node1_id,
1112 0,
1113 Strand::Reverse,
1114 );
1115 let sequence3 = Sequence::new()
1116 .sequence_type("DNA")
1117 .sequence("CCCCCCCC")
1118 .save(conn);
1119 let node3_id = Node::create(conn, &sequence3.hash, &HashId::convert_str("3"));
1120 let edge3 = Edge::create(
1121 conn,
1122 node3_id,
1123 7,
1124 Strand::Reverse,
1125 node2_id,
1126 0,
1127 Strand::Reverse,
1128 );
1129 let sequence4 = Sequence::new()
1130 .sequence_type("DNA")
1131 .sequence("GGGGGGGG")
1132 .save(conn);
1133 let node4_id = Node::create(conn, &sequence4.hash, &HashId::convert_str("4"));
1134 let edge2 = Edge::create(
1135 conn,
1136 node4_id,
1137 7,
1138 Strand::Reverse,
1139 node3_id,
1140 0,
1141 Strand::Reverse,
1142 );
1143 let edge1 = Edge::create(
1144 conn,
1145 PATH_START_NODE_ID,
1146 -1,
1147 Strand::Reverse,
1148 node4_id,
1149 0,
1150 Strand::Reverse,
1151 );
1152
1153 let edge_ids = vec![edge1.id, edge2.id, edge3.id, edge4.id, edge5.id];
1154 let block_group_edges = edge_ids
1155 .iter()
1156 .map(|edge_id| BlockGroupEdgeData {
1157 block_group_id: block_group.id,
1158 edge_id: *edge_id,
1159 chromosome_index: 0,
1160 phased: 0,
1161 })
1162 .collect::<Vec<BlockGroupEdgeData>>();
1163 BlockGroupEdge::bulk_create(conn, &block_group_edges);
1164
1165 let path = Path::create(conn, "chr1", &block_group.id, &edge_ids);
1166 assert_eq!(path.sequence(conn), "CCCCCCCGGGGGGGTTTTTTTCGATCGAT");
1167 }
1168
1169 #[test]
1170 fn test_reverse_complement() {
1171 assert_eq!(revcomp("ATCCGG"), "CCGGAT");
1172 assert_eq!(revcomp("CNNNNA"), "TNNNNG");
1173 assert_eq!(revcomp("cNNgnAt"), "aTncNNg");
1174 }
1175
1176 #[test]
1177 fn test_intervaltree() {
1178 let conn = &get_connection(None).unwrap();
1179 Collection::create(conn, "test collection");
1180 let block_group = create_test_block_group(conn);
1181 let sequence1 = Sequence::new()
1182 .sequence_type("DNA")
1183 .sequence("ATCGATCG")
1184 .save(conn);
1185 let node1_id = Node::create(conn, &sequence1.hash, &HashId::convert_str("1"));
1186 let edge1 = Edge::create(
1187 conn,
1188 PATH_START_NODE_ID,
1189 -1,
1190 Strand::Forward,
1191 node1_id,
1192 0,
1193 Strand::Forward,
1194 );
1195 let sequence2 = Sequence::new()
1196 .sequence_type("DNA")
1197 .sequence("AAAAAAAA")
1198 .save(conn);
1199 let node2_id = Node::create(conn, &sequence2.hash, &HashId::convert_str("2"));
1200 let edge2 = Edge::create(
1201 conn,
1202 node1_id,
1203 8,
1204 Strand::Forward,
1205 node2_id,
1206 1,
1207 Strand::Forward,
1208 );
1209 let sequence3 = Sequence::new()
1210 .sequence_type("DNA")
1211 .sequence("CCCCCCCC")
1212 .save(conn);
1213 let node3_id = Node::create(conn, &sequence3.hash, &HashId::convert_str("3"));
1214 let edge3 = Edge::create(
1215 conn,
1216 node2_id,
1217 8,
1218 Strand::Forward,
1219 node3_id,
1220 1,
1221 Strand::Forward,
1222 );
1223 let sequence4 = Sequence::new()
1224 .sequence_type("DNA")
1225 .sequence("GGGGGGGG")
1226 .save(conn);
1227 let node4_id = Node::create(conn, &sequence4.hash, &HashId::convert_str("4"));
1228 let edge4 = Edge::create(
1229 conn,
1230 node3_id,
1231 8,
1232 Strand::Forward,
1233 node4_id,
1234 1,
1235 Strand::Forward,
1236 );
1237 let edge5 = Edge::create(
1238 conn,
1239 node4_id,
1240 8,
1241 Strand::Forward,
1242 PATH_END_NODE_ID,
1243 -1,
1244 Strand::Forward,
1245 );
1246
1247 let edge_ids = vec![edge1.id, edge2.id, edge3.id, edge4.id, edge5.id];
1248 let block_group_edges = edge_ids
1249 .iter()
1250 .map(|edge_id| BlockGroupEdgeData {
1251 block_group_id: block_group.id,
1252 edge_id: *edge_id,
1253 chromosome_index: 0,
1254 phased: 0,
1255 })
1256 .collect::<Vec<BlockGroupEdgeData>>();
1257 BlockGroupEdge::bulk_create(conn, &block_group_edges);
1258
1259 let path = Path::create(conn, "chr1", &block_group.id, &edge_ids);
1260 let tree = path.intervaltree(conn);
1261 let blocks1: Vec<NodeIntervalBlock> = tree.query_point(2).map(|x| x.value).collect();
1262 assert_eq!(blocks1.len(), 1);
1263 let block1 = &blocks1[0];
1264 assert_eq!(block1.node_id, node1_id);
1265 assert_eq!(block1.sequence_start, 0);
1266 assert_eq!(block1.sequence_end, 8);
1267 assert_eq!(block1.start, 0);
1268 assert_eq!(block1.end, 8);
1269 assert_eq!(block1.strand, Strand::Forward);
1270
1271 let blocks2: Vec<NodeIntervalBlock> = tree.query_point(12).map(|x| x.value).collect();
1272 assert_eq!(blocks2.len(), 1);
1273 let block2 = &blocks2[0];
1274 assert_eq!(block2.node_id, node2_id);
1275 assert_eq!(block2.sequence_start, 1);
1276 assert_eq!(block2.sequence_end, 8);
1277 assert_eq!(block2.start, 8);
1278 assert_eq!(block2.end, 15);
1279 assert_eq!(block2.strand, Strand::Forward);
1280
1281 let blocks4: Vec<NodeIntervalBlock> = tree.query_point(25).map(|x| x.value).collect();
1282 assert_eq!(blocks4.len(), 1);
1283 let block4 = &blocks4[0];
1284 assert_eq!(block4.node_id, node4_id);
1285 assert_eq!(block4.sequence_start, 1);
1286 assert_eq!(block4.sequence_end, 8);
1287 assert_eq!(block4.start, 22);
1288 assert_eq!(block4.end, 29);
1289 assert_eq!(block4.strand, Strand::Forward);
1290 }
1291
1292 #[test]
1293 fn test_gets_sequence_with_edges_into_node_middles() {
1294 let conn = &get_connection(None).unwrap();
1298 Collection::create(conn, "test collection");
1299 let block_group = create_test_block_group(conn);
1300 let sequence1 = Sequence::new()
1301 .sequence_type("DNA")
1302 .sequence("ATCGATCG")
1303 .save(conn);
1304 let node1_id = Node::create(conn, &sequence1.hash, &HashId::convert_str("1"));
1305 let edge1 = Edge::create(
1306 conn,
1307 PATH_START_NODE_ID,
1308 -1,
1309 Strand::Forward,
1310 node1_id,
1311 4,
1312 Strand::Forward,
1313 );
1314 let sequence2 = Sequence::new()
1315 .sequence_type("DNA")
1316 .sequence("AAAAAAAA")
1317 .save(conn);
1318 let node2_id = Node::create(conn, &sequence2.hash, &HashId::convert_str("2"));
1319 let edge2 = Edge::create(
1320 conn,
1321 node1_id,
1322 8,
1323 Strand::Forward,
1324 node2_id,
1325 0,
1326 Strand::Forward,
1327 );
1328 let sequence3 = Sequence::new()
1329 .sequence_type("DNA")
1330 .sequence("CCCCCCCC")
1331 .save(conn);
1332 let node3_id = Node::create(conn, &sequence3.hash, &HashId::convert_str("3"));
1333 let edge3 = Edge::create(
1334 conn,
1335 node2_id,
1336 8,
1337 Strand::Forward,
1338 node3_id,
1339 0,
1340 Strand::Forward,
1341 );
1342 let sequence4 = Sequence::new()
1343 .sequence_type("DNA")
1344 .sequence("GGGGGGGG")
1345 .save(conn);
1346 let node4_id = Node::create(conn, &sequence4.hash, &HashId::convert_str("4"));
1347 let edge4 = Edge::create(
1348 conn,
1349 node3_id,
1350 8,
1351 Strand::Forward,
1352 node4_id,
1353 0,
1354 Strand::Forward,
1355 );
1356 let edge5 = Edge::create(
1357 conn,
1358 node4_id,
1359 4,
1360 Strand::Forward,
1361 PATH_END_NODE_ID,
1362 -1,
1363 Strand::Forward,
1364 );
1365
1366 let edge_ids = vec![edge1.id, edge2.id, edge3.id, edge4.id, edge5.id];
1367 let block_group_edges = edge_ids
1368 .iter()
1369 .map(|edge_id| BlockGroupEdgeData {
1370 block_group_id: block_group.id,
1371 edge_id: *edge_id,
1372 chromosome_index: 0,
1373 phased: 0,
1374 })
1375 .collect::<Vec<BlockGroupEdgeData>>();
1376 BlockGroupEdge::bulk_create(conn, &block_group_edges);
1377
1378 let path = Path::create(conn, "chr1", &block_group.id, &edge_ids);
1379 let tree = path.intervaltree(conn);
1380 let blocks1: Vec<NodeIntervalBlock> = tree.query_point(2).map(|x| x.value).collect();
1381 assert_eq!(blocks1.len(), 1);
1382 let block1 = &blocks1[0];
1383 assert_eq!(block1.node_id, node1_id);
1384 assert_eq!(block1.sequence_start, 4);
1385 assert_eq!(block1.sequence_end, 8);
1386 assert_eq!(block1.start, 0);
1387 assert_eq!(block1.end, 4);
1388 assert_eq!(block1.strand, Strand::Forward);
1389
1390 let blocks2: Vec<NodeIntervalBlock> = tree.query_point(10).map(|x| x.value).collect();
1391 assert_eq!(blocks2.len(), 1);
1392 let block2 = &blocks2[0];
1393 assert_eq!(block2.node_id, node2_id);
1394 assert_eq!(block2.sequence_start, 0);
1395 assert_eq!(block2.sequence_end, 8);
1396 assert_eq!(block2.start, 4);
1397 assert_eq!(block2.end, 12);
1398 assert_eq!(block2.strand, Strand::Forward);
1399
1400 let blocks4: Vec<NodeIntervalBlock> = tree.query_point(22).map(|x| x.value).collect();
1401 assert_eq!(blocks4.len(), 1);
1402 let block4 = &blocks4[0];
1403 assert_eq!(block4.node_id, node4_id);
1404 assert_eq!(block4.sequence_start, 0);
1405 assert_eq!(block4.sequence_end, 4);
1406 assert_eq!(block4.start, 20);
1407 assert_eq!(block4.end, 24);
1408 assert_eq!(block4.strand, Strand::Forward);
1409
1410 assert_eq!(path.sequence(conn), "ATCGAAAAAAAACCCCCCCCGGGG");
1411 }
1412
1413 #[test]
1414 fn test_full_block_mapping() {
1415 let conn = &get_connection(None).unwrap();
1423 Collection::create(conn, "test collection");
1424 let block_group = create_test_block_group(conn);
1425 let sequence1 = Sequence::new()
1426 .sequence_type("DNA")
1427 .sequence("ATCGATCG")
1428 .save(conn);
1429 let node1_id = Node::create(conn, &sequence1.hash, &HashId::convert_str("1"));
1430 let edge1 = Edge::create(
1431 conn,
1432 PATH_START_NODE_ID,
1433 -1,
1434 Strand::Forward,
1435 node1_id,
1436 0,
1437 Strand::Forward,
1438 );
1439 let edge2 = Edge::create(
1440 conn,
1441 node1_id,
1442 8,
1443 Strand::Forward,
1444 PATH_END_NODE_ID,
1445 -1,
1446 Strand::Forward,
1447 );
1448
1449 let edge_ids = vec![edge1.id, edge2.id];
1450 let block_group_edges = edge_ids
1451 .iter()
1452 .map(|edge_id| BlockGroupEdgeData {
1453 block_group_id: block_group.id,
1454 edge_id: *edge_id,
1455 chromosome_index: 0,
1456 phased: 0,
1457 })
1458 .collect::<Vec<BlockGroupEdgeData>>();
1459 BlockGroupEdge::bulk_create(conn, &block_group_edges);
1460
1461 let path = Path::create(conn, "chr1", &block_group.id, &edge_ids);
1462
1463 let mappings = path.find_block_mappings(conn, &path);
1464 assert_eq!(mappings.len(), 1);
1465 let mapping = &mappings[0];
1466 assert_eq!(mapping.source_range, mapping.target_range);
1467 assert_eq!(mapping.source_range.start, 0);
1468 assert_eq!(mapping.source_range.end, 8);
1469 assert_eq!(mapping.target_range.start, 0);
1470 assert_eq!(mapping.target_range.end, 8);
1471 }
1472
1473 #[test]
1474 fn test_no_block_mapping_overlap() {
1475 let conn = &get_connection(None).unwrap();
1485 Collection::create(conn, "test collection");
1486 let block_group = create_test_block_group(conn);
1487 let sequence1 = Sequence::new()
1488 .sequence_type("DNA")
1489 .sequence("ATCGATCG")
1490 .save(conn);
1491 let node1_id = Node::create(conn, &sequence1.hash, &HashId::convert_str("1"));
1492 let edge1 = Edge::create(
1493 conn,
1494 PATH_START_NODE_ID,
1495 -1,
1496 Strand::Forward,
1497 node1_id,
1498 0,
1499 Strand::Forward,
1500 );
1501 let edge2 = Edge::create(
1502 conn,
1503 node1_id,
1504 8,
1505 Strand::Forward,
1506 PATH_END_NODE_ID,
1507 -1,
1508 Strand::Forward,
1509 );
1510
1511 let edge_ids = vec![edge1.id, edge2.id];
1512 let block_group_edges = edge_ids
1513 .iter()
1514 .map(|edge_id| BlockGroupEdgeData {
1515 block_group_id: block_group.id,
1516 edge_id: *edge_id,
1517 chromosome_index: 0,
1518 phased: 0,
1519 })
1520 .collect::<Vec<BlockGroupEdgeData>>();
1521 BlockGroupEdge::bulk_create(conn, &block_group_edges);
1522 let path1 = Path::create(conn, "chr1", &block_group.id, &edge_ids);
1523
1524 let sequence2 = Sequence::new()
1525 .sequence_type("DNA")
1526 .sequence("TTTTTTTT")
1527 .save(conn);
1528 let node2_id = Node::create(conn, &sequence2.hash, &HashId::convert_str("2"));
1529 let edge3 = Edge::create(
1530 conn,
1531 PATH_START_NODE_ID,
1532 -1,
1533 Strand::Forward,
1534 node2_id,
1535 0,
1536 Strand::Forward,
1537 );
1538 let edge4 = Edge::create(
1539 conn,
1540 node2_id,
1541 8,
1542 Strand::Forward,
1543 PATH_END_NODE_ID,
1544 -1,
1545 Strand::Forward,
1546 );
1547
1548 let edge_ids = vec![edge3.id, edge4.id];
1549 let block_group_edges = edge_ids
1550 .iter()
1551 .map(|edge_id| BlockGroupEdgeData {
1552 block_group_id: block_group.id,
1553 edge_id: *edge_id,
1554 chromosome_index: 0,
1555 phased: 0,
1556 })
1557 .collect::<Vec<BlockGroupEdgeData>>();
1558 BlockGroupEdge::bulk_create(conn, &block_group_edges);
1559
1560 let path2 = Path::create(conn, "chr2", &block_group.id, &edge_ids);
1561
1562 let mappings = path1.find_block_mappings(conn, &path2);
1563 assert_eq!(mappings.len(), 0);
1564 }
1565
1566 #[test]
1567 fn test_partial_overlap_block_mapping() {
1568 let conn = &get_connection(None).unwrap();
1582 Collection::create(conn, "test collection");
1583 let block_group = create_test_block_group(conn);
1584 let sequence1 = Sequence::new()
1585 .sequence_type("DNA")
1586 .sequence("ATCGATCG")
1587 .save(conn);
1588 let node1_id = Node::create(conn, &sequence1.hash, &HashId::convert_str("1"));
1589 let edge1 = Edge::create(
1590 conn,
1591 PATH_START_NODE_ID,
1592 -1,
1593 Strand::Forward,
1594 node1_id,
1595 0,
1596 Strand::Forward,
1597 );
1598 let edge2 = Edge::create(
1599 conn,
1600 node1_id,
1601 8,
1602 Strand::Forward,
1603 PATH_END_NODE_ID,
1604 -1,
1605 Strand::Forward,
1606 );
1607
1608 let edge_ids = vec![edge1.id, edge2.id];
1609 let block_group_edges = edge_ids
1610 .iter()
1611 .map(|edge_id| BlockGroupEdgeData {
1612 block_group_id: block_group.id,
1613 edge_id: *edge_id,
1614 chromosome_index: 0,
1615 phased: 0,
1616 })
1617 .collect::<Vec<BlockGroupEdgeData>>();
1618 BlockGroupEdge::bulk_create(conn, &block_group_edges);
1619
1620 let path1 = Path::create(conn, "chr1", &block_group.id, &edge_ids);
1621
1622 let sequence2 = Sequence::new()
1623 .sequence_type("DNA")
1624 .sequence("TTTTTTTT")
1625 .save(conn);
1626 let node2_id = Node::create(conn, &sequence2.hash, &HashId::convert_str("2"));
1627 let edge3 = Edge::create(
1628 conn,
1629 PATH_START_NODE_ID,
1630 -1,
1631 Strand::Forward,
1632 node1_id,
1633 0,
1634 Strand::Forward,
1635 );
1636 let edge4 = Edge::create(
1637 conn,
1638 node1_id,
1639 4,
1640 Strand::Forward,
1641 node2_id,
1642 0,
1643 Strand::Forward,
1644 );
1645 let edge5 = Edge::create(
1646 conn,
1647 node2_id,
1648 8,
1649 Strand::Forward,
1650 PATH_END_NODE_ID,
1651 -1,
1652 Strand::Forward,
1653 );
1654
1655 let edge_ids = vec![edge3.id, edge4.id, edge5.id];
1656 let block_group_edges = edge_ids
1657 .iter()
1658 .map(|edge_id| BlockGroupEdgeData {
1659 block_group_id: block_group.id,
1660 edge_id: *edge_id,
1661 chromosome_index: 0,
1662 phased: 0,
1663 })
1664 .collect::<Vec<BlockGroupEdgeData>>();
1665 BlockGroupEdge::bulk_create(conn, &block_group_edges);
1666
1667 let path2 = Path::create(conn, "chr2", &block_group.id, &edge_ids);
1668
1669 assert_eq!(path2.sequence(conn), "ATCGTTTTTTTT");
1670
1671 let mappings = path1.find_block_mappings(conn, &path2);
1672 assert_eq!(mappings.len(), 1);
1673 let mapping = &mappings[0];
1674 assert_eq!(mapping.source_range, mapping.target_range);
1675 assert_eq!(mapping.source_range.start, 0);
1676 assert_eq!(mapping.source_range.end, 4);
1677 assert_eq!(mapping.target_range.start, 0);
1678 assert_eq!(mapping.target_range.end, 4);
1679 }
1680
1681 #[test]
1682 fn test_insertion_block_mapping() {
1683 let conn = &get_connection(None).unwrap();
1697 Collection::create(conn, "test collection");
1698 let block_group = create_test_block_group(conn);
1699 let sequence1 = Sequence::new()
1700 .sequence_type("DNA")
1701 .sequence("ATCGATCG")
1702 .save(conn);
1703 let node1_id = Node::create(conn, &sequence1.hash, &HashId::convert_str("1"));
1704 let edge1 = Edge::create(
1705 conn,
1706 PATH_START_NODE_ID,
1707 -1,
1708 Strand::Forward,
1709 node1_id,
1710 0,
1711 Strand::Forward,
1712 );
1713 let edge2 = Edge::create(
1714 conn,
1715 node1_id,
1716 8,
1717 Strand::Forward,
1718 PATH_END_NODE_ID,
1719 -1,
1720 Strand::Forward,
1721 );
1722
1723 let edge_ids = vec![edge1.id, edge2.id];
1724 let block_group_edges = edge_ids
1725 .iter()
1726 .map(|edge_id| BlockGroupEdgeData {
1727 block_group_id: block_group.id,
1728 edge_id: *edge_id,
1729 chromosome_index: 0,
1730 phased: 0,
1731 })
1732 .collect::<Vec<BlockGroupEdgeData>>();
1733 BlockGroupEdge::bulk_create(conn, &block_group_edges);
1734
1735 let path1 = Path::create(conn, "chr1", &block_group.id, &edge_ids);
1736
1737 let sequence2 = Sequence::new()
1738 .sequence_type("DNA")
1739 .sequence("TTTTTTTT")
1740 .save(conn);
1741 let node2_id = Node::create(conn, &sequence2.hash, &HashId::convert_str("2"));
1742 let edge4 = Edge::create(
1743 conn,
1744 node1_id,
1745 4,
1746 Strand::Forward,
1747 node2_id,
1748 0,
1749 Strand::Forward,
1750 );
1751 let edge5 = Edge::create(
1752 conn,
1753 node2_id,
1754 8,
1755 Strand::Forward,
1756 node1_id,
1757 4,
1758 Strand::Forward,
1759 );
1760
1761 let edge_ids = [edge4.id, edge5.id];
1762 let block_group_edges = edge_ids
1763 .iter()
1764 .map(|edge_id| BlockGroupEdgeData {
1765 block_group_id: block_group.id,
1766 edge_id: *edge_id,
1767 chromosome_index: 0,
1768 phased: 0,
1769 })
1770 .collect::<Vec<BlockGroupEdgeData>>();
1771 BlockGroupEdge::bulk_create(conn, &block_group_edges);
1772
1773 let path2 = Path::create(
1774 conn,
1775 "chr2",
1776 &block_group.id,
1777 &[edge1.id, edge4.id, edge5.id, edge2.id],
1778 );
1779
1780 assert_eq!(path2.sequence(conn), "ATCGTTTTTTTTATCG");
1781
1782 let mappings = path1.find_block_mappings(conn, &path2);
1783 assert_eq!(mappings.len(), 2);
1784 let mapping1 = &mappings[0];
1785 assert_eq!(mapping1.source_range, mapping1.target_range);
1786 assert_eq!(mapping1.source_range.start, 0);
1787 assert_eq!(mapping1.source_range.end, 4);
1788 assert_eq!(mapping1.target_range.start, 0);
1789 assert_eq!(mapping1.target_range.end, 4);
1790
1791 let mapping2 = &mappings[1];
1792 assert_eq!(mapping2.source_range.start, 4);
1793 assert_eq!(mapping2.source_range.end, 8);
1794 assert_eq!(mapping2.target_range.start, 12);
1795 assert_eq!(mapping2.target_range.end, 16);
1796 }
1797
1798 #[test]
1799 fn test_replacement_block_mapping() {
1800 let conn = &get_connection(None).unwrap();
1814 Collection::create(conn, "test collection");
1815 let block_group = create_test_block_group(conn);
1816 let sequence1 = Sequence::new()
1817 .sequence_type("DNA")
1818 .sequence("ATCGATCG")
1819 .save(conn);
1820 let node1_id = Node::create(conn, &sequence1.hash, &HashId::convert_str("1"));
1821 let edge1 = Edge::create(
1822 conn,
1823 PATH_START_NODE_ID,
1824 -1,
1825 Strand::Forward,
1826 node1_id,
1827 0,
1828 Strand::Forward,
1829 );
1830 let edge2 = Edge::create(
1831 conn,
1832 node1_id,
1833 8,
1834 Strand::Forward,
1835 PATH_END_NODE_ID,
1836 -1,
1837 Strand::Forward,
1838 );
1839
1840 let edge_ids = [edge1.id, edge2.id];
1841 let block_group_edges = edge_ids
1842 .iter()
1843 .map(|edge_id| BlockGroupEdgeData {
1844 block_group_id: block_group.id,
1845 edge_id: *edge_id,
1846 chromosome_index: 0,
1847 phased: 0,
1848 })
1849 .collect::<Vec<BlockGroupEdgeData>>();
1850 BlockGroupEdge::bulk_create(conn, &block_group_edges);
1851
1852 let path1 = Path::create(conn, "chr1", &block_group.id, &edge_ids);
1853
1854 let sequence2 = Sequence::new()
1855 .sequence_type("DNA")
1856 .sequence("TTTTTTTT")
1857 .save(conn);
1858 let node2_id = Node::create(conn, &sequence2.hash, &HashId::convert_str("2"));
1859 let edge4 = Edge::create(
1860 conn,
1861 node1_id,
1862 2,
1863 Strand::Forward,
1864 node2_id,
1865 0,
1866 Strand::Forward,
1867 );
1868 let edge5 = Edge::create(
1869 conn,
1870 node2_id,
1871 8,
1872 Strand::Forward,
1873 node1_id,
1874 6,
1875 Strand::Forward,
1876 );
1877
1878 let edge_ids = [edge4.id, edge5.id];
1879 let block_group_edges = edge_ids
1880 .iter()
1881 .map(|edge_id| BlockGroupEdgeData {
1882 block_group_id: block_group.id,
1883 edge_id: *edge_id,
1884 chromosome_index: 0,
1885 phased: 0,
1886 })
1887 .collect::<Vec<BlockGroupEdgeData>>();
1888 BlockGroupEdge::bulk_create(conn, &block_group_edges);
1889
1890 let path2 = Path::create(
1891 conn,
1892 "chr2",
1893 &block_group.id,
1894 &[edge1.id, edge4.id, edge5.id, edge2.id],
1895 );
1896
1897 assert_eq!(path2.sequence(conn), "ATTTTTTTTTCG");
1898
1899 let mappings = path1.find_block_mappings(conn, &path2);
1900 assert_eq!(mappings.len(), 2);
1901 let mapping1 = &mappings[0];
1902 assert_eq!(mapping1.source_range, mapping1.target_range);
1903 assert_eq!(mapping1.source_range.start, 0);
1904 assert_eq!(mapping1.source_range.end, 2);
1905 assert_eq!(mapping1.target_range.start, 0);
1906 assert_eq!(mapping1.target_range.end, 2);
1907
1908 let mapping2 = &mappings[1];
1909 assert_eq!(mapping2.source_range.start, 6);
1910 assert_eq!(mapping2.source_range.end, 8);
1911 assert_eq!(mapping2.target_range.start, 10);
1912 assert_eq!(mapping2.target_range.end, 12);
1913 }
1914
1915 #[test]
1916 fn test_deletion_block_mapping() {
1917 let conn = &get_connection(None).unwrap();
1930 Collection::create(conn, "test collection");
1931 let block_group = create_test_block_group(conn);
1932 let sequence1 = Sequence::new()
1933 .sequence_type("DNA")
1934 .sequence("ATCGATCG")
1935 .save(conn);
1936 let node1_id = Node::create(conn, &sequence1.hash, &HashId::convert_str("1"));
1937 let edge1 = Edge::create(
1938 conn,
1939 PATH_START_NODE_ID,
1940 -1,
1941 Strand::Forward,
1942 node1_id,
1943 0,
1944 Strand::Forward,
1945 );
1946 let edge2 = Edge::create(
1947 conn,
1948 node1_id,
1949 8,
1950 Strand::Forward,
1951 PATH_END_NODE_ID,
1952 -1,
1953 Strand::Forward,
1954 );
1955
1956 let edge_ids = [edge1.id, edge2.id];
1957 let block_group_edges = edge_ids
1958 .iter()
1959 .map(|edge_id| BlockGroupEdgeData {
1960 block_group_id: block_group.id,
1961 edge_id: *edge_id,
1962 chromosome_index: 0,
1963 phased: 0,
1964 })
1965 .collect::<Vec<BlockGroupEdgeData>>();
1966 BlockGroupEdge::bulk_create(conn, &block_group_edges);
1967
1968 let path1 = Path::create(conn, "chr1", &block_group.id, &edge_ids);
1969
1970 let edge4 = Edge::create(
1971 conn,
1972 node1_id,
1973 2,
1974 Strand::Forward,
1975 node1_id,
1976 6,
1977 Strand::Forward,
1978 );
1979
1980 let edge_ids = [edge4.id];
1981 let block_group_edges = edge_ids
1982 .iter()
1983 .map(|edge_id| BlockGroupEdgeData {
1984 block_group_id: block_group.id,
1985 edge_id: *edge_id,
1986 chromosome_index: 0,
1987 phased: 0,
1988 })
1989 .collect::<Vec<BlockGroupEdgeData>>();
1990 BlockGroupEdge::bulk_create(conn, &block_group_edges);
1991
1992 let path2 = Path::create(
1993 conn,
1994 "chr2",
1995 &block_group.id,
1996 &[edge1.id, edge4.id, edge2.id],
1997 );
1998
1999 assert_eq!(path2.sequence(conn), "ATCG");
2000
2001 let mappings = path1.find_block_mappings(conn, &path2);
2002 assert_eq!(mappings.len(), 2);
2003 let mapping1 = &mappings[0];
2004 assert_eq!(mapping1.source_range, mapping1.target_range);
2005 assert_eq!(mapping1.source_range.start, 0);
2006 assert_eq!(mapping1.source_range.end, 2);
2007 assert_eq!(mapping1.target_range.start, 0);
2008 assert_eq!(mapping1.target_range.end, 2);
2009
2010 let mapping2 = &mappings[1];
2011 assert_eq!(mapping2.source_range.start, 6);
2012 assert_eq!(mapping2.source_range.end, 8);
2013 assert_eq!(mapping2.target_range.start, 2);
2014 assert_eq!(mapping2.target_range.end, 4);
2015 }
2016
2017 #[test]
2018 fn test_two_block_insertion_mapping() {
2019 let conn = &get_connection(None).unwrap();
2034 Collection::create(conn, "test collection");
2035 let block_group = create_test_block_group(conn);
2036 let sequence1 = Sequence::new()
2037 .sequence_type("DNA")
2038 .sequence("ATCGATCG")
2039 .save(conn);
2040 let node1_id = Node::create(conn, &sequence1.hash, &HashId::convert_str("1"));
2041 let sequence2 = Sequence::new()
2042 .sequence_type("DNA")
2043 .sequence("TTTTTTTT")
2044 .save(conn);
2045 let node2_id = Node::create(conn, &sequence2.hash, &HashId::convert_str("2"));
2046 let edge1 = Edge::create(
2047 conn,
2048 PATH_START_NODE_ID,
2049 -1,
2050 Strand::Forward,
2051 node1_id,
2052 0,
2053 Strand::Forward,
2054 );
2055 let edge2 = Edge::create(
2056 conn,
2057 node1_id,
2058 8,
2059 Strand::Forward,
2060 node2_id,
2061 0,
2062 Strand::Forward,
2063 );
2064 let edge3 = Edge::create(
2065 conn,
2066 node2_id,
2067 8,
2068 Strand::Forward,
2069 PATH_END_NODE_ID,
2070 -1,
2071 Strand::Forward,
2072 );
2073
2074 let edge_ids = [edge1.id, edge2.id, edge3.id];
2075 let block_group_edges = edge_ids
2076 .iter()
2077 .map(|edge_id| BlockGroupEdgeData {
2078 block_group_id: block_group.id,
2079 edge_id: *edge_id,
2080 chromosome_index: 0,
2081 phased: 0,
2082 })
2083 .collect::<Vec<BlockGroupEdgeData>>();
2084 BlockGroupEdge::bulk_create(conn, &block_group_edges);
2085
2086 let path1 = Path::create(conn, "chr1", &block_group.id, &edge_ids);
2087
2088 let sequence3 = Sequence::new()
2089 .sequence_type("DNA")
2090 .sequence("AAAAAAAA")
2091 .save(conn);
2092 let node3_id = Node::create(conn, &sequence3.hash, &HashId::convert_str("3"));
2093 let edge4 = Edge::create(
2094 conn,
2095 node1_id,
2096 8,
2097 Strand::Forward,
2098 node3_id,
2099 0,
2100 Strand::Forward,
2101 );
2102 let edge5 = Edge::create(
2103 conn,
2104 node3_id,
2105 8,
2106 Strand::Forward,
2107 node2_id,
2108 0,
2109 Strand::Forward,
2110 );
2111
2112 let edge_ids = [edge4.id, edge5.id];
2113 let block_group_edges = edge_ids
2114 .iter()
2115 .map(|edge_id| BlockGroupEdgeData {
2116 block_group_id: block_group.id,
2117 edge_id: *edge_id,
2118 chromosome_index: 0,
2119 phased: 0,
2120 })
2121 .collect::<Vec<BlockGroupEdgeData>>();
2122 BlockGroupEdge::bulk_create(conn, &block_group_edges);
2123
2124 let path2 = Path::create(
2125 conn,
2126 "chr2",
2127 &block_group.id,
2128 &[edge1.id, edge4.id, edge5.id, edge3.id],
2129 );
2130
2131 assert_eq!(path2.sequence(conn), "ATCGATCGAAAAAAAATTTTTTTT");
2132
2133 let mappings = path1.find_block_mappings(conn, &path2);
2134 assert_eq!(mappings.len(), 2);
2135 let mapping1 = &mappings[0];
2136 assert_eq!(mapping1.source_range, mapping1.target_range);
2137 assert_eq!(mapping1.source_range.start, 0);
2138 assert_eq!(mapping1.source_range.end, 8);
2139 assert_eq!(mapping1.target_range.start, 0);
2140 assert_eq!(mapping1.target_range.end, 8);
2141
2142 let mapping2 = &mappings[1];
2143 assert_eq!(mapping2.source_range.start, 8);
2144 assert_eq!(mapping2.source_range.end, 16);
2145 assert_eq!(mapping2.target_range.start, 16);
2146 assert_eq!(mapping2.target_range.end, 24);
2147 }
2148
2149 #[test]
2150 fn test_two_block_replacement_mapping() {
2151 let conn = &get_connection(None).unwrap();
2166 Collection::create(conn, "test collection");
2167 let block_group = create_test_block_group(conn);
2168 let sequence1 = Sequence::new()
2169 .sequence_type("DNA")
2170 .sequence("ATCGATCG")
2171 .save(conn);
2172 let node1_id = Node::create(conn, &sequence1.hash, &HashId::convert_str("1"));
2173 let sequence2 = Sequence::new()
2174 .sequence_type("DNA")
2175 .sequence("TTTTTTTT")
2176 .save(conn);
2177 let node2_id = Node::create(conn, &sequence2.hash, &HashId::convert_str("2"));
2178 let edge1 = Edge::create(
2179 conn,
2180 PATH_START_NODE_ID,
2181 -1,
2182 Strand::Forward,
2183 node1_id,
2184 0,
2185 Strand::Forward,
2186 );
2187 let edge2 = Edge::create(
2188 conn,
2189 node1_id,
2190 8,
2191 Strand::Forward,
2192 node2_id,
2193 0,
2194 Strand::Forward,
2195 );
2196 let edge3 = Edge::create(
2197 conn,
2198 node2_id,
2199 8,
2200 Strand::Forward,
2201 PATH_END_NODE_ID,
2202 -1,
2203 Strand::Forward,
2204 );
2205
2206 let edge_ids = [edge1.id, edge2.id, edge3.id];
2207 let block_group_edges = edge_ids
2208 .iter()
2209 .map(|edge_id| BlockGroupEdgeData {
2210 block_group_id: block_group.id,
2211 edge_id: *edge_id,
2212 chromosome_index: 0,
2213 phased: 0,
2214 })
2215 .collect::<Vec<BlockGroupEdgeData>>();
2216 BlockGroupEdge::bulk_create(conn, &block_group_edges);
2217
2218 let path1 = Path::create(conn, "chr1", &block_group.id, &edge_ids);
2219
2220 let sequence3 = Sequence::new()
2221 .sequence_type("DNA")
2222 .sequence("AAAAAAAA")
2223 .save(conn);
2224 let node3_id = Node::create(conn, &sequence3.hash, &HashId::convert_str("3"));
2225 let edge4 = Edge::create(
2226 conn,
2227 node1_id,
2228 4,
2229 Strand::Forward,
2230 node3_id,
2231 0,
2232 Strand::Forward,
2233 );
2234 let edge5 = Edge::create(
2235 conn,
2236 node3_id,
2237 8,
2238 Strand::Forward,
2239 node2_id,
2240 4,
2241 Strand::Forward,
2242 );
2243
2244 let edge_ids = [edge4.id, edge5.id];
2245 let block_group_edges = edge_ids
2246 .iter()
2247 .map(|edge_id| BlockGroupEdgeData {
2248 block_group_id: block_group.id,
2249 edge_id: *edge_id,
2250 chromosome_index: 0,
2251 phased: 0,
2252 })
2253 .collect::<Vec<BlockGroupEdgeData>>();
2254 BlockGroupEdge::bulk_create(conn, &block_group_edges);
2255
2256 let path2 = Path::create(
2257 conn,
2258 "chr2",
2259 &block_group.id,
2260 &[edge1.id, edge4.id, edge5.id, edge3.id],
2261 );
2262
2263 assert_eq!(path2.sequence(conn), "ATCGAAAAAAAATTTT");
2264
2265 let mappings = path1.find_block_mappings(conn, &path2);
2266 assert_eq!(mappings.len(), 2);
2267 let mapping1 = &mappings[0];
2268 assert_eq!(mapping1.source_range, mapping1.target_range);
2269 assert_eq!(mapping1.source_range.start, 0);
2270 assert_eq!(mapping1.source_range.end, 4);
2271 assert_eq!(mapping1.target_range.start, 0);
2272 assert_eq!(mapping1.target_range.end, 4);
2273
2274 let mapping2 = &mappings[1];
2275 assert_eq!(mapping2.source_range.start, 12);
2276 assert_eq!(mapping2.source_range.end, 16);
2277 assert_eq!(mapping2.target_range.start, 12);
2278 assert_eq!(mapping2.target_range.end, 16);
2279 }
2280
2281 #[test]
2282 fn test_two_block_deletion_mapping() {
2283 let conn = &get_connection(None).unwrap();
2297 Collection::create(conn, "test collection");
2298 let block_group = create_test_block_group(conn);
2299 let sequence1 = Sequence::new()
2300 .sequence_type("DNA")
2301 .sequence("ATCGATCG")
2302 .save(conn);
2303 let node1_id = Node::create(conn, &sequence1.hash, &HashId::convert_str("1"));
2304 let sequence2 = Sequence::new()
2305 .sequence_type("DNA")
2306 .sequence("TTTTTTTT")
2307 .save(conn);
2308 let node2_id = Node::create(conn, &sequence2.hash, &HashId::convert_str("2"));
2309 let edge1 = Edge::create(
2310 conn,
2311 PATH_START_NODE_ID,
2312 -1,
2313 Strand::Forward,
2314 node1_id,
2315 0,
2316 Strand::Forward,
2317 );
2318 let edge2 = Edge::create(
2319 conn,
2320 node1_id,
2321 8,
2322 Strand::Forward,
2323 node2_id,
2324 0,
2325 Strand::Forward,
2326 );
2327 let edge3 = Edge::create(
2328 conn,
2329 node2_id,
2330 8,
2331 Strand::Forward,
2332 PATH_END_NODE_ID,
2333 -1,
2334 Strand::Forward,
2335 );
2336
2337 let edge_ids = [edge1.id, edge2.id, edge3.id];
2338 let block_group_edges = edge_ids
2339 .iter()
2340 .map(|edge_id| BlockGroupEdgeData {
2341 block_group_id: block_group.id,
2342 edge_id: *edge_id,
2343 chromosome_index: 0,
2344 phased: 0,
2345 })
2346 .collect::<Vec<BlockGroupEdgeData>>();
2347 BlockGroupEdge::bulk_create(conn, &block_group_edges);
2348
2349 let path1 = Path::create(conn, "chr1", &block_group.id, &edge_ids);
2350
2351 let edge4 = Edge::create(
2352 conn,
2353 node1_id,
2354 4,
2355 Strand::Forward,
2356 node2_id,
2357 4,
2358 Strand::Forward,
2359 );
2360
2361 let edge_ids = [edge4.id];
2362 let block_group_edges = edge_ids
2363 .iter()
2364 .map(|edge_id| BlockGroupEdgeData {
2365 block_group_id: block_group.id,
2366 edge_id: *edge_id,
2367 chromosome_index: 0,
2368 phased: 0,
2369 })
2370 .collect::<Vec<BlockGroupEdgeData>>();
2371 BlockGroupEdge::bulk_create(conn, &block_group_edges);
2372
2373 let path2 = Path::create(
2374 conn,
2375 "chr2",
2376 &block_group.id,
2377 &[edge1.id, edge4.id, edge3.id],
2378 );
2379
2380 assert_eq!(path2.sequence(conn), "ATCGTTTT");
2381
2382 let mappings = path1.find_block_mappings(conn, &path2);
2383 assert_eq!(mappings.len(), 2);
2384 let mapping1 = &mappings[0];
2385 assert_eq!(mapping1.source_range, mapping1.target_range);
2386 assert_eq!(mapping1.source_range.start, 0);
2387 assert_eq!(mapping1.source_range.end, 4);
2388 assert_eq!(mapping1.target_range.start, 0);
2389 assert_eq!(mapping1.target_range.end, 4);
2390
2391 let mapping2 = &mappings[1];
2392 assert_eq!(mapping2.source_range.start, 12);
2393 assert_eq!(mapping2.source_range.end, 16);
2394 assert_eq!(mapping2.target_range.start, 4);
2395 assert_eq!(mapping2.target_range.end, 8);
2396 }
2397
2398 #[test]
2399 fn test_annotation_propagation_full_overlap() {
2400 let conn = &get_connection(None).unwrap();
2408 Collection::create(conn, "test collection");
2409 let block_group = create_test_block_group(conn);
2410 let sequence1 = Sequence::new()
2411 .sequence_type("DNA")
2412 .sequence("ATCGATCG")
2413 .save(conn);
2414 let node1_id = Node::create(conn, &sequence1.hash, &HashId::convert_str("1"));
2415 let edge1 = Edge::create(
2416 conn,
2417 PATH_START_NODE_ID,
2418 -1,
2419 Strand::Forward,
2420 node1_id,
2421 0,
2422 Strand::Forward,
2423 );
2424 let edge2 = Edge::create(
2425 conn,
2426 node1_id,
2427 8,
2428 Strand::Forward,
2429 PATH_END_NODE_ID,
2430 -1,
2431 Strand::Forward,
2432 );
2433
2434 let edge_ids = [edge1.id, edge2.id];
2435 let block_group_edges = edge_ids
2436 .iter()
2437 .map(|edge_id| BlockGroupEdgeData {
2438 block_group_id: block_group.id,
2439 edge_id: *edge_id,
2440 chromosome_index: 0,
2441 phased: 0,
2442 })
2443 .collect::<Vec<BlockGroupEdgeData>>();
2444 BlockGroupEdge::bulk_create(conn, &block_group_edges);
2445
2446 let path = Path::create(conn, "chr1", &block_group.id, &edge_ids);
2447
2448 let annotation = Annotation {
2449 name: "foo".to_string(),
2450 start: 0,
2451 end: 8,
2452 };
2453 let annotations = path.propagate_annotations(conn, &path, vec![annotation]);
2454 assert_eq!(annotations.len(), 1);
2455 let result_annotation = &annotations[0];
2456 assert_eq!(result_annotation.name, "foo");
2457 assert_eq!(result_annotation.start, 0);
2458 assert_eq!(result_annotation.end, 8);
2459 }
2460
2461 #[test]
2462 fn test_propagate_annotations_no_overlap() {
2463 let conn = &get_connection(None).unwrap();
2473 Collection::create(conn, "test collection");
2474 let block_group = create_test_block_group(conn);
2475 let sequence1 = Sequence::new()
2476 .sequence_type("DNA")
2477 .sequence("ATCGATCG")
2478 .save(conn);
2479 let node1_id = Node::create(conn, &sequence1.hash, &HashId::convert_str("1"));
2480 let edge1 = Edge::create(
2481 conn,
2482 PATH_START_NODE_ID,
2483 -1,
2484 Strand::Forward,
2485 node1_id,
2486 0,
2487 Strand::Forward,
2488 );
2489 let edge2 = Edge::create(
2490 conn,
2491 node1_id,
2492 8,
2493 Strand::Forward,
2494 PATH_END_NODE_ID,
2495 -1,
2496 Strand::Forward,
2497 );
2498
2499 let edge_ids = [edge1.id, edge2.id];
2500 let block_group_edges = edge_ids
2501 .iter()
2502 .map(|edge_id| BlockGroupEdgeData {
2503 block_group_id: block_group.id,
2504 edge_id: *edge_id,
2505 chromosome_index: 0,
2506 phased: 0,
2507 })
2508 .collect::<Vec<BlockGroupEdgeData>>();
2509 BlockGroupEdge::bulk_create(conn, &block_group_edges);
2510
2511 let path1 = Path::create(conn, "chr1", &block_group.id, &edge_ids);
2512
2513 let sequence2 = Sequence::new()
2514 .sequence_type("DNA")
2515 .sequence("TTTTTTTT")
2516 .save(conn);
2517 let node2_id = Node::create(conn, &sequence2.hash, &HashId::convert_str("2"));
2518 let edge3 = Edge::create(
2519 conn,
2520 PATH_START_NODE_ID,
2521 -1,
2522 Strand::Forward,
2523 node2_id,
2524 0,
2525 Strand::Forward,
2526 );
2527 let edge4 = Edge::create(
2528 conn,
2529 node2_id,
2530 8,
2531 Strand::Forward,
2532 PATH_END_NODE_ID,
2533 -1,
2534 Strand::Forward,
2535 );
2536
2537 let edge_ids = [edge3.id, edge4.id];
2538 let block_group_edges = edge_ids
2539 .iter()
2540 .map(|edge_id| BlockGroupEdgeData {
2541 block_group_id: block_group.id,
2542 edge_id: *edge_id,
2543 chromosome_index: 0,
2544 phased: 0,
2545 })
2546 .collect::<Vec<BlockGroupEdgeData>>();
2547 BlockGroupEdge::bulk_create(conn, &block_group_edges);
2548
2549 let path2 = Path::create(conn, "chr2", &block_group.id, &edge_ids);
2550
2551 let annotation = Annotation {
2552 name: "foo".to_string(),
2553 start: 0,
2554 end: 8,
2555 };
2556 let annotations = path1.propagate_annotations(conn, &path2, vec![annotation]);
2557 assert_eq!(annotations.len(), 0);
2558 }
2559
2560 #[test]
2561 fn test_propagate_annotations_partial_overlap() {
2562 let conn = &get_connection(None).unwrap();
2576 Collection::create(conn, "test collection");
2577 let block_group = create_test_block_group(conn);
2578 let sequence1 = Sequence::new()
2579 .sequence_type("DNA")
2580 .sequence("ATCGATCG")
2581 .save(conn);
2582 let node1_id = Node::create(conn, &sequence1.hash, &HashId::convert_str("1"));
2583 let edge1 = Edge::create(
2584 conn,
2585 PATH_START_NODE_ID,
2586 -1,
2587 Strand::Forward,
2588 node1_id,
2589 0,
2590 Strand::Forward,
2591 );
2592 let edge2 = Edge::create(
2593 conn,
2594 node1_id,
2595 8,
2596 Strand::Forward,
2597 PATH_END_NODE_ID,
2598 -1,
2599 Strand::Forward,
2600 );
2601
2602 let edge_ids = vec![edge1.id, edge2.id];
2603 let block_group_edges = edge_ids
2604 .iter()
2605 .map(|edge_id| BlockGroupEdgeData {
2606 block_group_id: block_group.id,
2607 edge_id: *edge_id,
2608 chromosome_index: 0,
2609 phased: 0,
2610 })
2611 .collect::<Vec<BlockGroupEdgeData>>();
2612 BlockGroupEdge::bulk_create(conn, &block_group_edges);
2613
2614 let path1 = Path::create(conn, "chr1", &block_group.id, &edge_ids);
2615
2616 let sequence2 = Sequence::new()
2617 .sequence_type("DNA")
2618 .sequence("TTTTTTTT")
2619 .save(conn);
2620 let node2_id = Node::create(conn, &sequence2.hash, &HashId::convert_str("2"));
2621 let edge3 = Edge::create(
2622 conn,
2623 PATH_START_NODE_ID,
2624 -1,
2625 Strand::Forward,
2626 node1_id,
2627 0,
2628 Strand::Forward,
2629 );
2630 let edge4 = Edge::create(
2631 conn,
2632 node1_id,
2633 4,
2634 Strand::Forward,
2635 node2_id,
2636 0,
2637 Strand::Forward,
2638 );
2639 let edge5 = Edge::create(
2640 conn,
2641 node2_id,
2642 8,
2643 Strand::Forward,
2644 PATH_END_NODE_ID,
2645 -1,
2646 Strand::Forward,
2647 );
2648
2649 let edge_ids = vec![edge3.id, edge4.id, edge5.id];
2650 let block_group_edges = edge_ids
2651 .iter()
2652 .map(|edge_id| BlockGroupEdgeData {
2653 block_group_id: block_group.id,
2654 edge_id: *edge_id,
2655 chromosome_index: 0,
2656 phased: 0,
2657 })
2658 .collect::<Vec<BlockGroupEdgeData>>();
2659 BlockGroupEdge::bulk_create(conn, &block_group_edges);
2660
2661 let path2 = Path::create(conn, "chr2", &block_group.id, &edge_ids);
2662
2663 assert_eq!(path2.sequence(conn), "ATCGTTTTTTTT");
2664
2665 let annotation = Annotation {
2666 name: "foo".to_string(),
2667 start: 0,
2668 end: 8,
2669 };
2670 let annotations = path1.propagate_annotations(conn, &path2, vec![annotation]);
2671 assert_eq!(annotations.len(), 1);
2672 let result_annotation = &annotations[0];
2673 assert_eq!(result_annotation.name, "foo");
2674 assert_eq!(result_annotation.start, 0);
2675 assert_eq!(result_annotation.end, 4);
2676 }
2677
2678 #[test]
2679 fn test_propagate_annotations_with_insertion() {
2680 let conn = &get_connection(None).unwrap();
2694 Collection::create(conn, "test collection");
2695 let block_group = create_test_block_group(conn);
2696 let sequence1 = Sequence::new()
2697 .sequence_type("DNA")
2698 .sequence("ATCGATCG")
2699 .save(conn);
2700 let node1_id = Node::create(conn, &sequence1.hash, &HashId::convert_str("1"));
2701 let edge1 = Edge::create(
2702 conn,
2703 PATH_START_NODE_ID,
2704 -1,
2705 Strand::Forward,
2706 node1_id,
2707 0,
2708 Strand::Forward,
2709 );
2710 let edge2 = Edge::create(
2711 conn,
2712 node1_id,
2713 8,
2714 Strand::Forward,
2715 PATH_END_NODE_ID,
2716 -1,
2717 Strand::Forward,
2718 );
2719
2720 let edge_ids = vec![edge1.id, edge2.id];
2721 let block_group_edges = edge_ids
2722 .iter()
2723 .map(|edge_id| BlockGroupEdgeData {
2724 block_group_id: block_group.id,
2725 edge_id: *edge_id,
2726 chromosome_index: 0,
2727 phased: 0,
2728 })
2729 .collect::<Vec<BlockGroupEdgeData>>();
2730 BlockGroupEdge::bulk_create(conn, &block_group_edges);
2731
2732 let path1 = Path::create(conn, "chr1", &block_group.id, &edge_ids);
2733
2734 let sequence2 = Sequence::new()
2735 .sequence_type("DNA")
2736 .sequence("TTTTTTTT")
2737 .save(conn);
2738 let node2_id = Node::create(conn, &sequence2.hash, &HashId::convert_str("2"));
2739 let edge4 = Edge::create(
2740 conn,
2741 node1_id,
2742 4,
2743 Strand::Forward,
2744 node2_id,
2745 0,
2746 Strand::Forward,
2747 );
2748 let edge5 = Edge::create(
2749 conn,
2750 node2_id,
2751 8,
2752 Strand::Forward,
2753 node1_id,
2754 4,
2755 Strand::Forward,
2756 );
2757
2758 let edge_ids = [edge4.id, edge5.id];
2759 let block_group_edges = edge_ids
2760 .iter()
2761 .map(|edge_id| BlockGroupEdgeData {
2762 block_group_id: block_group.id,
2763 edge_id: *edge_id,
2764 chromosome_index: 0,
2765 phased: 0,
2766 })
2767 .collect::<Vec<BlockGroupEdgeData>>();
2768 BlockGroupEdge::bulk_create(conn, &block_group_edges);
2769
2770 let path2 = Path::create(
2771 conn,
2772 "chr2",
2773 &block_group.id,
2774 &[edge1.id, edge4.id, edge5.id, edge2.id],
2775 );
2776
2777 assert_eq!(path2.sequence(conn), "ATCGTTTTTTTTATCG");
2778
2779 let annotation = Annotation {
2780 name: "foo".to_string(),
2781 start: 0,
2782 end: 8,
2783 };
2784
2785 let annotations = path1.propagate_annotations(conn, &path2, vec![annotation]);
2786 assert_eq!(annotations.len(), 1);
2787
2788 let result_annotation = &annotations[0];
2791 assert_eq!(result_annotation.name, "foo");
2792 assert_eq!(result_annotation.start, 0);
2793 assert_eq!(result_annotation.end, 16);
2794 }
2795
2796 #[test]
2797 fn test_propagate_annotations_with_replacement() {
2798 let conn = &get_connection(None).unwrap();
2812 Collection::create(conn, "test collection");
2813 let block_group = create_test_block_group(conn);
2814 let sequence1 = Sequence::new()
2815 .sequence_type("DNA")
2816 .sequence("ATCGATCG")
2817 .save(conn);
2818 let node1_id = Node::create(conn, &sequence1.hash, &HashId::convert_str("1"));
2819 let edge1 = Edge::create(
2820 conn,
2821 PATH_START_NODE_ID,
2822 -1,
2823 Strand::Forward,
2824 node1_id,
2825 0,
2826 Strand::Forward,
2827 );
2828 let edge2 = Edge::create(
2829 conn,
2830 node1_id,
2831 8,
2832 Strand::Forward,
2833 PATH_END_NODE_ID,
2834 -1,
2835 Strand::Forward,
2836 );
2837
2838 let edge_ids = [edge1.id, edge2.id];
2839 let block_group_edges = edge_ids
2840 .iter()
2841 .map(|edge_id| BlockGroupEdgeData {
2842 block_group_id: block_group.id,
2843 edge_id: *edge_id,
2844 chromosome_index: 0,
2845 phased: 0,
2846 })
2847 .collect::<Vec<BlockGroupEdgeData>>();
2848 BlockGroupEdge::bulk_create(conn, &block_group_edges);
2849
2850 let path1 = Path::create(conn, "chr1", &block_group.id, &edge_ids);
2851
2852 let sequence2 = Sequence::new()
2853 .sequence_type("DNA")
2854 .sequence("TTTTTTTT")
2855 .save(conn);
2856 let node2_id = Node::create(conn, &sequence2.hash, &HashId::convert_str("2"));
2857 let edge4 = Edge::create(
2858 conn,
2859 node1_id,
2860 2,
2861 Strand::Forward,
2862 node2_id,
2863 0,
2864 Strand::Forward,
2865 );
2866 let edge5 = Edge::create(
2867 conn,
2868 node2_id,
2869 8,
2870 Strand::Forward,
2871 node1_id,
2872 6,
2873 Strand::Forward,
2874 );
2875
2876 let edge_ids = [edge4.id, edge5.id];
2877 let block_group_edges = edge_ids
2878 .iter()
2879 .map(|edge_id| BlockGroupEdgeData {
2880 block_group_id: block_group.id,
2881 edge_id: *edge_id,
2882 chromosome_index: 0,
2883 phased: 0,
2884 })
2885 .collect::<Vec<BlockGroupEdgeData>>();
2886 BlockGroupEdge::bulk_create(conn, &block_group_edges);
2887
2888 let path2 = Path::create(
2889 conn,
2890 "chr2",
2891 &block_group.id,
2892 &[edge1.id, edge4.id, edge5.id, edge2.id],
2893 );
2894
2895 assert_eq!(path2.sequence(conn), "ATTTTTTTTTCG");
2896
2897 let annotation = Annotation {
2898 name: "foo".to_string(),
2899 start: 0,
2900 end: 4,
2901 };
2902
2903 let annotations = path1.propagate_annotations(conn, &path2, vec![annotation]);
2904 assert_eq!(annotations.len(), 1);
2905
2906 let result_annotation = &annotations[0];
2908 assert_eq!(result_annotation.name, "foo");
2909 assert_eq!(result_annotation.start, 0);
2910 assert_eq!(result_annotation.end, 2);
2911 }
2912
2913 #[test]
2914 fn test_propagate_annotations_with_insertion_across_two_blocks() {
2915 let conn = &get_connection(None).unwrap();
2930 Collection::create(conn, "test collection");
2931 let block_group = create_test_block_group(conn);
2932 let sequence1 = Sequence::new()
2933 .sequence_type("DNA")
2934 .sequence("ATCGATCG")
2935 .save(conn);
2936 let node1_id = Node::create(conn, &sequence1.hash, &HashId::convert_str("1"));
2937 let sequence2 = Sequence::new()
2938 .sequence_type("DNA")
2939 .sequence("TTTTTTTT")
2940 .save(conn);
2941 let node2_id = Node::create(conn, &sequence2.hash, &HashId::convert_str("2"));
2942 let edge1 = Edge::create(
2943 conn,
2944 PATH_START_NODE_ID,
2945 -1,
2946 Strand::Forward,
2947 node1_id,
2948 0,
2949 Strand::Forward,
2950 );
2951 let edge2 = Edge::create(
2952 conn,
2953 node1_id,
2954 8,
2955 Strand::Forward,
2956 node2_id,
2957 0,
2958 Strand::Forward,
2959 );
2960 let edge3 = Edge::create(
2961 conn,
2962 node2_id,
2963 8,
2964 Strand::Forward,
2965 PATH_END_NODE_ID,
2966 -1,
2967 Strand::Forward,
2968 );
2969
2970 let edge_ids = [edge1.id, edge2.id, edge3.id];
2971 let block_group_edges = edge_ids
2972 .iter()
2973 .map(|edge_id| BlockGroupEdgeData {
2974 block_group_id: block_group.id,
2975 edge_id: *edge_id,
2976 chromosome_index: 0,
2977 phased: 0,
2978 })
2979 .collect::<Vec<BlockGroupEdgeData>>();
2980 BlockGroupEdge::bulk_create(conn, &block_group_edges);
2981
2982 let path1 = Path::create(conn, "chr1", &block_group.id, &edge_ids);
2983
2984 let sequence3 = Sequence::new()
2985 .sequence_type("DNA")
2986 .sequence("AAAAAAAA")
2987 .save(conn);
2988 let node3_id = Node::create(conn, &sequence3.hash, &HashId::convert_str("3"));
2989 let edge4 = Edge::create(
2990 conn,
2991 node1_id,
2992 8,
2993 Strand::Forward,
2994 node3_id,
2995 0,
2996 Strand::Forward,
2997 );
2998 let edge5 = Edge::create(
2999 conn,
3000 node3_id,
3001 8,
3002 Strand::Forward,
3003 node2_id,
3004 0,
3005 Strand::Forward,
3006 );
3007
3008 let edge_ids = [edge4.id, edge5.id];
3009 let block_group_edges = edge_ids
3010 .iter()
3011 .map(|edge_id| BlockGroupEdgeData {
3012 block_group_id: block_group.id,
3013 edge_id: *edge_id,
3014 chromosome_index: 0,
3015 phased: 0,
3016 })
3017 .collect::<Vec<BlockGroupEdgeData>>();
3018 BlockGroupEdge::bulk_create(conn, &block_group_edges);
3019
3020 let path2 = Path::create(
3021 conn,
3022 "chr2",
3023 &block_group.id,
3024 &[edge1.id, edge4.id, edge5.id, edge3.id],
3025 );
3026
3027 assert_eq!(path2.sequence(conn), "ATCGATCGAAAAAAAATTTTTTTT");
3028
3029 let annotation = Annotation {
3030 name: "foo".to_string(),
3031 start: 0,
3032 end: 16,
3033 };
3034
3035 let annotations = path1.propagate_annotations(conn, &path2, vec![annotation]);
3036 assert_eq!(annotations.len(), 1);
3037
3038 let result_annotation = &annotations[0];
3041 assert_eq!(result_annotation.name, "foo");
3042 assert_eq!(result_annotation.start, 0);
3043 assert_eq!(result_annotation.end, 24);
3044 }
3045
3046 #[test]
3047 fn test_propagate_annotations_with_deletion_across_two_blocks() {
3048 let conn = &get_connection(None).unwrap();
3062 Collection::create(conn, "test collection");
3063 let block_group = create_test_block_group(conn);
3064 let sequence1 = Sequence::new()
3065 .sequence_type("DNA")
3066 .sequence("ATCGATCG")
3067 .save(conn);
3068 let node1_id = Node::create(conn, &sequence1.hash, &HashId::convert_str("1"));
3069 let sequence2 = Sequence::new()
3070 .sequence_type("DNA")
3071 .sequence("TTTTTTTT")
3072 .save(conn);
3073 let node2_id = Node::create(conn, &sequence2.hash, &HashId::convert_str("2"));
3074 let edge1 = Edge::create(
3075 conn,
3076 PATH_START_NODE_ID,
3077 -1,
3078 Strand::Forward,
3079 node1_id,
3080 0,
3081 Strand::Forward,
3082 );
3083 let edge2 = Edge::create(
3084 conn,
3085 node1_id,
3086 8,
3087 Strand::Forward,
3088 node2_id,
3089 0,
3090 Strand::Forward,
3091 );
3092 let edge3 = Edge::create(
3093 conn,
3094 node2_id,
3095 8,
3096 Strand::Forward,
3097 PATH_END_NODE_ID,
3098 -1,
3099 Strand::Forward,
3100 );
3101
3102 let edge_ids = [edge1.id, edge2.id, edge3.id];
3103 let block_group_edges = edge_ids
3104 .iter()
3105 .map(|edge_id| BlockGroupEdgeData {
3106 block_group_id: block_group.id,
3107 edge_id: *edge_id,
3108 chromosome_index: 0,
3109 phased: 0,
3110 })
3111 .collect::<Vec<BlockGroupEdgeData>>();
3112 BlockGroupEdge::bulk_create(conn, &block_group_edges);
3113
3114 let path1 = Path::create(conn, "chr1", &block_group.id, &edge_ids);
3115
3116 let edge4 = Edge::create(
3117 conn,
3118 node1_id,
3119 4,
3120 Strand::Forward,
3121 node2_id,
3122 4,
3123 Strand::Forward,
3124 );
3125
3126 let edge_ids = [edge4.id];
3127 let block_group_edges = edge_ids
3128 .iter()
3129 .map(|edge_id| BlockGroupEdgeData {
3130 block_group_id: block_group.id,
3131 edge_id: *edge_id,
3132 chromosome_index: 0,
3133 phased: 0,
3134 })
3135 .collect::<Vec<BlockGroupEdgeData>>();
3136 BlockGroupEdge::bulk_create(conn, &block_group_edges);
3137
3138 let path2 = Path::create(
3139 conn,
3140 "chr2",
3141 &block_group.id,
3142 &[edge1.id, edge4.id, edge3.id],
3143 );
3144
3145 assert_eq!(path2.sequence(conn), "ATCGTTTT");
3146
3147 let annotation = Annotation {
3148 name: "foo".to_string(),
3149 start: 0,
3150 end: 12,
3151 };
3152
3153 let annotations = path1.propagate_annotations(conn, &path2, vec![annotation]);
3154 assert_eq!(annotations.len(), 1);
3155
3156 let result_annotation = &annotations[0];
3158 assert_eq!(result_annotation.name, "foo");
3159 assert_eq!(result_annotation.start, 0);
3160 assert_eq!(result_annotation.end, 4);
3161 }
3162
3163 #[test]
3164 fn test_new_path_with() {
3165 let conn = &get_connection(None).unwrap();
3166 Collection::create(conn, "test collection");
3167 let block_group = create_test_block_group(conn);
3168 let sequence1 = Sequence::new()
3169 .sequence_type("DNA")
3170 .sequence("ATCGATCG")
3171 .save(conn);
3172 let node1_id = Node::create(conn, &sequence1.hash, &HashId::convert_str("1"));
3173 let edge1 = Edge::create(
3174 conn,
3175 PATH_START_NODE_ID,
3176 -123,
3177 Strand::Forward,
3178 node1_id,
3179 0,
3180 Strand::Forward,
3181 );
3182 let sequence2 = Sequence::new()
3183 .sequence_type("DNA")
3184 .sequence("AAAAAAAA")
3185 .save(conn);
3186 let node2_id = Node::create(conn, &sequence2.hash, &HashId::convert_str("2"));
3187 let edge2 = Edge::create(
3188 conn,
3189 node1_id,
3190 8,
3191 Strand::Forward,
3192 node2_id,
3193 0,
3194 Strand::Forward,
3195 );
3196 let edge3 = Edge::create(
3197 conn,
3198 node2_id,
3199 8,
3200 Strand::Forward,
3201 PATH_END_NODE_ID,
3202 -1,
3203 Strand::Forward,
3204 );
3205
3206 let edge_ids = [edge1.id, edge2.id, edge3.id];
3207 let block_group_edges = edge_ids
3208 .iter()
3209 .map(|edge_id| BlockGroupEdgeData {
3210 block_group_id: block_group.id,
3211 edge_id: *edge_id,
3212 chromosome_index: 0,
3213 phased: 0,
3214 })
3215 .collect::<Vec<BlockGroupEdgeData>>();
3216 BlockGroupEdge::bulk_create(conn, &block_group_edges);
3217
3218 let path1 = Path::create(conn, "chr1", &block_group.id, &edge_ids);
3219 assert_eq!(path1.sequence(conn), "ATCGATCGAAAAAAAA");
3220
3221 let sequence3 = Sequence::new()
3222 .sequence_type("DNA")
3223 .sequence("CCCCCCCC")
3224 .save(conn);
3225 let node3_id = Node::create(conn, &sequence3.hash, &HashId::convert_str("3"));
3226 let edge4 = Edge::create(
3227 conn,
3228 node1_id,
3229 4,
3230 Strand::Forward,
3231 node3_id,
3232 0,
3233 Strand::Forward,
3234 );
3235 let edge5 = Edge::create(
3236 conn,
3237 node3_id,
3238 8,
3239 Strand::Forward,
3240 node2_id,
3241 3,
3242 Strand::Forward,
3243 );
3244
3245 let edge_ids = [edge4.id, edge5.id];
3246 let block_group_edges = edge_ids
3247 .iter()
3248 .map(|edge_id| BlockGroupEdgeData {
3249 block_group_id: block_group.id,
3250 edge_id: *edge_id,
3251 chromosome_index: 0,
3252 phased: 0,
3253 })
3254 .collect::<Vec<BlockGroupEdgeData>>();
3255 BlockGroupEdge::bulk_create(conn, &block_group_edges);
3256
3257 let path2 = path1.new_path_with(conn, 4, 11, &edge4, &edge5);
3258 assert_eq!(path2.sequence(conn), "ATCGCCCCCCCCAAAAA");
3259
3260 let edge6 = Edge::create(
3261 conn,
3262 node1_id,
3263 4,
3264 Strand::Forward,
3265 node3_id,
3266 0,
3267 Strand::Forward,
3268 );
3269 let edge7 = Edge::create(
3270 conn,
3271 node3_id,
3272 8,
3273 Strand::Forward,
3274 node1_id,
3275 7,
3276 Strand::Forward,
3277 );
3278
3279 let edge_ids = [edge6.id, edge7.id];
3280 let block_group_edges = edge_ids
3281 .iter()
3282 .map(|edge_id| BlockGroupEdgeData {
3283 block_group_id: block_group.id,
3284 edge_id: *edge_id,
3285 chromosome_index: 0,
3286 phased: 0,
3287 })
3288 .collect::<Vec<BlockGroupEdgeData>>();
3289 BlockGroupEdge::bulk_create(conn, &block_group_edges);
3290
3291 let path3 = path1.new_path_with(conn, 4, 7, &edge6, &edge7);
3292 assert_eq!(path3.sequence(conn), "ATCGCCCCCCCCGAAAAAAAA");
3293 }
3294
3295 #[test]
3296 fn test_new_path_with_deletion() {
3297 let conn = &get_connection(None).unwrap();
3298 Collection::create(conn, "test collection");
3299 let block_group = create_test_block_group(conn);
3300 let sequence1 = Sequence::new()
3301 .sequence_type("DNA")
3302 .sequence("ATCGATCG")
3303 .save(conn);
3304 let node1_id = Node::create(conn, &sequence1.hash, &HashId::convert_str("1"));
3305 let edge1 = Edge::create(
3306 conn,
3307 PATH_START_NODE_ID,
3308 -123,
3309 Strand::Forward,
3310 node1_id,
3311 0,
3312 Strand::Forward,
3313 );
3314 let sequence2 = Sequence::new()
3315 .sequence_type("DNA")
3316 .sequence("AAAAAAAA")
3317 .save(conn);
3318 let node2_id = Node::create(conn, &sequence2.hash, &HashId::convert_str("2"));
3319 let edge2 = Edge::create(
3320 conn,
3321 node1_id,
3322 8,
3323 Strand::Forward,
3324 node2_id,
3325 0,
3326 Strand::Forward,
3327 );
3328 let edge3 = Edge::create(
3329 conn,
3330 node2_id,
3331 8,
3332 Strand::Forward,
3333 PATH_END_NODE_ID,
3334 -1,
3335 Strand::Forward,
3336 );
3337
3338 let edge_ids = [edge1.id, edge2.id, edge3.id];
3339 let block_group_edges = edge_ids
3340 .iter()
3341 .map(|edge_id| BlockGroupEdgeData {
3342 block_group_id: block_group.id,
3343 edge_id: *edge_id,
3344 chromosome_index: 0,
3345 phased: 0,
3346 })
3347 .collect::<Vec<BlockGroupEdgeData>>();
3348 BlockGroupEdge::bulk_create(conn, &block_group_edges);
3349
3350 let path1 = Path::create(conn, "chr1", &block_group.id, &edge_ids);
3351 assert_eq!(path1.sequence(conn), "ATCGATCGAAAAAAAA");
3352
3353 let deletion_edge = Edge::create(
3354 conn,
3355 node1_id,
3356 4,
3357 Strand::Forward,
3358 node2_id,
3359 3,
3360 Strand::Forward,
3361 );
3362
3363 let block_group_edge = BlockGroupEdgeData {
3364 block_group_id: block_group.id,
3365 edge_id: deletion_edge.id,
3366 chromosome_index: 0,
3367 phased: 0,
3368 };
3369
3370 BlockGroupEdge::bulk_create(conn, &[block_group_edge]);
3371
3372 let path2 = path1.new_path_with_deletion(conn, 4, 11).unwrap();
3373 assert_eq!(path2.sequence(conn), "ATCGAAAAA");
3374 }
3375
3376 #[test]
3377 fn test_duplicate_edge_warning() {
3378 let conn = &get_connection(None).unwrap();
3379 Collection::create(conn, "test collection");
3380 let block_group = create_test_block_group(conn);
3381 let sequence1 = Sequence::new()
3382 .sequence_type("DNA")
3383 .sequence("ATCGATCG")
3384 .save(conn);
3385 let node1_id = Node::create(conn, &sequence1.hash, &HashId::convert_str("1"));
3386 let edge1 = Edge::create(
3387 conn,
3388 PATH_START_NODE_ID,
3389 -123,
3390 Strand::Forward,
3391 node1_id,
3392 0,
3393 Strand::Forward,
3394 );
3395 let edge2 = Edge::create(
3396 conn,
3397 node1_id,
3398 8,
3399 Strand::Forward,
3400 PATH_END_NODE_ID,
3401 -1,
3402 Strand::Forward,
3403 );
3404
3405 let edge_ids = vec![edge1.id, edge2.id];
3406 let block_group_edges = edge_ids
3407 .iter()
3408 .map(|edge_id| BlockGroupEdgeData {
3409 block_group_id: block_group.id,
3410 edge_id: *edge_id,
3411 chromosome_index: 0,
3412 phased: 0,
3413 })
3414 .collect::<Vec<BlockGroupEdgeData>>();
3415 BlockGroupEdge::bulk_create(conn, &block_group_edges);
3416
3417 let _path = Path::create(conn, "chr1", &block_group.id, &edge_ids);
3419 }
3420
3421 #[test]
3422 #[should_panic(expected = "Not all edges are in the block group")]
3423 fn test_edges_must_be_in_path_block_group() {
3424 let conn = &get_connection(None).unwrap();
3425 Collection::create(conn, "test collection");
3426 let block_group = create_test_block_group(conn);
3427 let sequence1 = Sequence::new()
3428 .sequence_type("DNA")
3429 .sequence("ATCGATCG")
3430 .save(conn);
3431 let node1_id = Node::create(conn, &sequence1.hash, &HashId::convert_str("1"));
3432 let edge1 = Edge::create(
3433 conn,
3434 PATH_START_NODE_ID,
3435 -123,
3436 Strand::Forward,
3437 node1_id,
3438 0,
3439 Strand::Forward,
3440 );
3441 let edge2 = Edge::create(
3442 conn,
3443 node1_id,
3444 8,
3445 Strand::Forward,
3446 PATH_END_NODE_ID,
3447 -1,
3448 Strand::Forward,
3449 );
3450
3451 let edge_ids = [edge1.id, edge2.id];
3452 let _path = Path::create(conn, "chr1", &block_group.id, &edge_ids);
3453 }
3454
3455 #[test]
3456 #[should_panic]
3457 fn test_consecutive_edges_must_share_a_node() {
3459 let conn = &get_connection(None).unwrap();
3460 Collection::create(conn, "test collection");
3461 let block_group = create_test_block_group(conn);
3462 let sequence1 = Sequence::new()
3463 .sequence_type("DNA")
3464 .sequence("ATCGATCG")
3465 .save(conn);
3466 let node1_id = Node::create(conn, &sequence1.hash, &HashId::convert_str("1"));
3467 let edge1 = Edge::create(
3468 conn,
3469 PATH_START_NODE_ID,
3470 -123,
3471 Strand::Forward,
3472 node1_id,
3473 0,
3474 Strand::Forward,
3475 );
3476 let sequence2 = Sequence::new()
3477 .sequence_type("DNA")
3478 .sequence("AAAAAAAA")
3479 .save(conn);
3480 let node2_id = Node::create(conn, &sequence2.hash, &HashId::convert_str("2"));
3481 let edge2 = Edge::create(
3482 conn,
3483 node2_id,
3484 8,
3485 Strand::Forward,
3486 PATH_END_NODE_ID,
3487 -1,
3488 Strand::Forward,
3489 );
3490
3491 let block_group_edges = vec![
3492 BlockGroupEdgeData {
3493 block_group_id: block_group.id,
3494 edge_id: edge1.id,
3495 chromosome_index: 0,
3496 phased: 0,
3497 },
3498 BlockGroupEdgeData {
3499 block_group_id: block_group.id,
3500 edge_id: edge2.id,
3501 chromosome_index: 0,
3502 phased: 0,
3503 },
3504 ];
3505 BlockGroupEdge::bulk_create(conn, &block_group_edges);
3506
3507 let edge_ids = vec![edge1.id, edge2.id];
3508
3509 let _path = Path::create(conn, "chr1", &block_group.id, &edge_ids);
3510 }
3511
3512 #[test]
3513 #[should_panic]
3514 fn test_consecutive_edges_must_share_the_same_strand() {
3516 let conn = &get_connection(None).unwrap();
3517 Collection::create(conn, "test collection");
3518 let block_group = create_test_block_group(conn);
3519 let sequence1 = Sequence::new()
3520 .sequence_type("DNA")
3521 .sequence("ATCGATCG")
3522 .save(conn);
3523 let node1_id = Node::create(conn, &sequence1.hash, &HashId::convert_str("1"));
3524 let edge1 = Edge::create(
3525 conn,
3526 PATH_START_NODE_ID,
3527 -123,
3528 Strand::Forward,
3529 node1_id,
3530 0,
3531 Strand::Forward,
3532 );
3533 let sequence2 = Sequence::new()
3534 .sequence_type("DNA")
3535 .sequence("AAAAAAAA")
3536 .save(conn);
3537 let node2_id = Node::create(conn, &sequence2.hash, &HashId::convert_str("2"));
3538 let edge2 = Edge::create(
3539 conn,
3540 node2_id,
3541 8,
3542 Strand::Forward,
3543 PATH_END_NODE_ID,
3544 -1,
3545 Strand::Reverse,
3546 );
3547
3548 let edge_ids = vec![edge1.id, edge2.id];
3549 let block_group_edges = edge_ids
3550 .iter()
3551 .map(|edge_id| BlockGroupEdgeData {
3552 block_group_id: block_group.id,
3553 edge_id: *edge_id,
3554 chromosome_index: 0,
3555 phased: 0,
3556 })
3557 .collect::<Vec<BlockGroupEdgeData>>();
3558
3559 BlockGroupEdge::bulk_create(conn, &block_group_edges);
3560
3561 let _path = Path::create(conn, "chr1", &block_group.id, &edge_ids);
3562 }
3563
3564 #[test]
3565 #[should_panic]
3566 fn test_consecutive_edges_must_have_different_coordinates_on_a_node() {
3568 let conn = &get_connection(None).unwrap();
3569 Collection::create(conn, "test collection");
3570 let block_group = create_test_block_group(conn);
3571 let sequence1 = Sequence::new()
3572 .sequence_type("DNA")
3573 .sequence("ATCGATCG")
3574 .save(conn);
3575 let node1_id = Node::create(conn, &sequence1.hash, &HashId::convert_str("1"));
3576 let edge1 = Edge::create(
3577 conn,
3578 PATH_START_NODE_ID,
3579 -123,
3580 Strand::Forward,
3581 node1_id,
3582 4,
3583 Strand::Forward,
3584 );
3585 let sequence2 = Sequence::new()
3586 .sequence_type("DNA")
3587 .sequence("AAAAAAAA")
3588 .save(conn);
3589 let node2_id = Node::create(conn, &sequence2.hash, &HashId::convert_str("2"));
3590 let edge2 = Edge::create(
3592 conn,
3593 node1_id,
3594 2,
3595 Strand::Forward,
3596 node2_id,
3597 0,
3598 Strand::Forward,
3599 );
3600
3601 let edge_ids = vec![edge1.id, edge2.id];
3602 let block_group_edges = edge_ids
3603 .iter()
3604 .map(|edge_id| BlockGroupEdgeData {
3605 block_group_id: block_group.id,
3606 edge_id: *edge_id,
3607 chromosome_index: 0,
3608 phased: 0,
3609 })
3610 .collect::<Vec<BlockGroupEdgeData>>();
3611 BlockGroupEdge::bulk_create(conn, &block_group_edges);
3612
3613 let _path = Path::create(conn, "chr1", &block_group.id, &edge_ids);
3614 }
3615
3616 #[test]
3617 fn test_node_blocks_for_range() {
3618 let conn = &get_connection(None).unwrap();
3619 Collection::create(conn, "test collection");
3620 let block_group = create_test_block_group(conn);
3621 let sequence1 = Sequence::new()
3622 .sequence_type("DNA")
3623 .sequence("ATCGATCG")
3624 .save(conn);
3625 let node1_id = Node::create(conn, &sequence1.hash, &HashId::convert_str("1"));
3626 let edge1 = Edge::create(
3627 conn,
3628 PATH_START_NODE_ID,
3629 -123,
3630 Strand::Forward,
3631 node1_id,
3632 0,
3633 Strand::Forward,
3634 );
3635 let sequence2 = Sequence::new()
3636 .sequence_type("DNA")
3637 .sequence("AAAAAAAA")
3638 .save(conn);
3639 let node2_id = Node::create(conn, &sequence2.hash, &HashId::convert_str("2"));
3640 let edge2 = Edge::create(
3641 conn,
3642 node1_id,
3643 8,
3644 Strand::Forward,
3645 node2_id,
3646 0,
3647 Strand::Forward,
3648 );
3649 let edge3 = Edge::create(
3650 conn,
3651 node2_id,
3652 8,
3653 Strand::Forward,
3654 PATH_END_NODE_ID,
3655 -1,
3656 Strand::Forward,
3657 );
3658
3659 let edge_ids = vec![edge1.id, edge2.id, edge3.id];
3660 let block_group_edges = edge_ids
3661 .iter()
3662 .map(|edge_id| BlockGroupEdgeData {
3663 block_group_id: block_group.id,
3664 edge_id: *edge_id,
3665 chromosome_index: 0,
3666 phased: 0,
3667 })
3668 .collect::<Vec<BlockGroupEdgeData>>();
3669 BlockGroupEdge::bulk_create(conn, &block_group_edges);
3670
3671 let path = Path::create(conn, "chr1", &block_group.id, &edge_ids);
3672
3673 let intervaltree = path.intervaltree(conn);
3674
3675 let node_blocks1 = path.node_blocks_for_range(&intervaltree, 0, 8);
3676 let expected_node_blocks1 = vec![NodeIntervalBlock {
3677 node_id: node1_id,
3678 start: 0,
3679 end: 8,
3680 sequence_start: 0,
3681 sequence_end: 8,
3682 strand: Strand::Forward,
3683 }];
3684 assert_eq!(node_blocks1, expected_node_blocks1);
3685
3686 let node_blocks2 = path.node_blocks_for_range(&intervaltree, 0, 4);
3687 let expected_node_blocks2 = vec![NodeIntervalBlock {
3688 node_id: node1_id,
3689 start: 0,
3690 end: 4,
3691 sequence_start: 0,
3692 sequence_end: 4,
3693 strand: Strand::Forward,
3694 }];
3695 assert_eq!(node_blocks2, expected_node_blocks2);
3696
3697 let node_blocks3 = path.node_blocks_for_range(&intervaltree, 2, 6);
3698 let expected_node_blocks3 = vec![NodeIntervalBlock {
3699 node_id: node1_id,
3700 start: 2,
3701 end: 6,
3702 sequence_start: 2,
3703 sequence_end: 6,
3704 strand: Strand::Forward,
3705 }];
3706 assert_eq!(node_blocks3, expected_node_blocks3);
3707
3708 let node_blocks4 = path.node_blocks_for_range(&intervaltree, 3, 8);
3709 let expected_node_blocks4 = vec![NodeIntervalBlock {
3710 node_id: node1_id,
3711 start: 3,
3712 end: 8,
3713 sequence_start: 3,
3714 sequence_end: 8,
3715 strand: Strand::Forward,
3716 }];
3717 assert_eq!(node_blocks4, expected_node_blocks4);
3718
3719 let node_blocks5 = path.node_blocks_for_range(&intervaltree, 6, 10);
3720 let expected_node_blocks5 = vec![
3721 NodeIntervalBlock {
3722 node_id: node1_id,
3723 start: 6,
3724 end: 8,
3725 sequence_start: 6,
3726 sequence_end: 8,
3727 strand: Strand::Forward,
3728 },
3729 NodeIntervalBlock {
3730 node_id: node2_id,
3731 start: 8,
3732 end: 10,
3733 sequence_start: 0,
3734 sequence_end: 2,
3735 strand: Strand::Forward,
3736 },
3737 ];
3738 assert_eq!(node_blocks5, expected_node_blocks5);
3739
3740 let node_blocks6 = path.node_blocks_for_range(&intervaltree, 12, 16);
3741 let expected_node_blocks6 = vec![NodeIntervalBlock {
3742 node_id: node2_id,
3743 start: 12,
3744 end: 16,
3745 sequence_start: 4,
3746 sequence_end: 8,
3747 strand: Strand::Forward,
3748 }];
3749 assert_eq!(node_blocks6, expected_node_blocks6);
3750 }
3751
3752 #[test]
3753 fn test_node_blocks_for_range_with_node_parts() {
3754 let conn = &get_connection(None).unwrap();
3755 Collection::create(conn, "test collection");
3756 let block_group = create_test_block_group(conn);
3757 let sequence1 = Sequence::new()
3758 .sequence_type("DNA")
3759 .sequence("AAAAAAAA")
3760 .save(conn);
3761 let node1_id = Node::create(conn, &sequence1.hash, &HashId::convert_str("1"));
3762 let edge1 = Edge::create(
3763 conn,
3764 PATH_START_NODE_ID,
3765 -123,
3766 Strand::Forward,
3767 node1_id,
3768 0,
3769 Strand::Forward,
3770 );
3771 let sequence2 = Sequence::new()
3772 .sequence_type("DNA")
3773 .sequence("TTTTTTTT")
3774 .save(conn);
3775 let node2_id = Node::create(conn, &sequence2.hash, &HashId::convert_str("2"));
3776 let edge2 = Edge::create(
3777 conn,
3778 node1_id,
3779 5,
3780 Strand::Forward,
3781 node2_id,
3782 0,
3783 Strand::Forward,
3784 );
3785 let edge3 = Edge::create(
3786 conn,
3787 node2_id,
3788 8,
3789 Strand::Forward,
3790 node1_id,
3791 6,
3792 Strand::Forward,
3793 );
3794 let edge4 = Edge::create(
3795 conn,
3796 node1_id,
3797 8,
3798 Strand::Forward,
3799 PATH_END_NODE_ID,
3800 -1,
3801 Strand::Forward,
3802 );
3803
3804 let edge_ids = &[edge1.id, edge2.id, edge3.id, edge4.id];
3805 let block_group_edges = edge_ids
3806 .iter()
3807 .map(|edge_id| BlockGroupEdgeData {
3808 block_group_id: block_group.id,
3809 edge_id: *edge_id,
3810 chromosome_index: 0,
3811 phased: 0,
3812 })
3813 .collect::<Vec<BlockGroupEdgeData>>();
3814 BlockGroupEdge::bulk_create(conn, &block_group_edges);
3815
3816 let path1 = Path::create(conn, "chr1.1", &block_group.id, &[edge1.id, edge4.id]);
3817 let path2 = Path::create(conn, "chr1.2", &block_group.id, edge_ids);
3818
3819 let intervaltree1 = path1.intervaltree(conn);
3820
3821 let node_blocks1 = path1.node_blocks_for_range(&intervaltree1, 0, 8);
3822 let expected_node_blocks1 = vec![NodeIntervalBlock {
3823 node_id: node1_id,
3824 start: 0,
3825 end: 8,
3826 sequence_start: 0,
3827 sequence_end: 8,
3828 strand: Strand::Forward,
3829 }];
3830 assert_eq!(node_blocks1, expected_node_blocks1);
3831
3832 let intervaltree2 = path2.intervaltree(conn);
3833
3834 let node_blocks2 = path2.node_blocks_for_range(&intervaltree2, 0, 8);
3835 let expected_node_blocks2 = vec![
3836 NodeIntervalBlock {
3837 node_id: node1_id,
3838 start: 0,
3839 end: 5,
3840 sequence_start: 0,
3841 sequence_end: 5,
3842 strand: Strand::Forward,
3843 },
3844 NodeIntervalBlock {
3845 node_id: node2_id,
3846 start: 5,
3847 end: 8,
3848 sequence_start: 0,
3849 sequence_end: 3,
3850 strand: Strand::Forward,
3851 },
3852 ];
3853 assert_eq!(node_blocks2, expected_node_blocks2);
3854
3855 let node_blocks3 = path2.node_blocks_for_range(&intervaltree2, 4, 14);
3856 let expected_node_blocks3 = vec![
3857 NodeIntervalBlock {
3858 node_id: node1_id,
3859 start: 4,
3860 end: 5,
3861 sequence_start: 4,
3862 sequence_end: 5,
3863 strand: Strand::Forward,
3864 },
3865 NodeIntervalBlock {
3866 node_id: node2_id,
3867 start: 5,
3868 end: 13,
3869 sequence_start: 0,
3870 sequence_end: 8,
3871 strand: Strand::Forward,
3872 },
3873 NodeIntervalBlock {
3874 node_id: node1_id,
3875 start: 13,
3876 end: 14,
3877 sequence_start: 6,
3878 sequence_end: 7,
3879 strand: Strand::Forward,
3880 },
3881 ];
3882 assert_eq!(node_blocks3, expected_node_blocks3);
3883 }
3884}