Skip to main content

gen_models/
path.rs

1use core::ops::Range as RustRange;
2use std::collections::{HashMap, HashSet};
3
4use gen_core::{
5    HashId, NodeIntervalBlock, PATH_END_NODE_ID, PATH_START_NODE_ID, PathBlock, Strand,
6    calculate_hash, is_end_node, is_start_node,
7    range::{Range, RangeMapping},
8    traits::Capnp,
9};
10use intervaltree::IntervalTree;
11use itertools::Itertools;
12use rusqlite::{Row, params, types::Value as SQLValue};
13use serde::{Deserialize, Serialize};
14
15use crate::{
16    block_group_edge::BlockGroupEdge, db::GraphConnection, edge::Edge, errors::QueryError,
17    gen_models_capnp::path as PathCapnp, node::Node, path_edge::PathEdge, sequence::Sequence,
18    traits::*,
19};
20
21#[derive(Clone, Debug, Eq, Hash, PartialEq, Deserialize, Serialize)]
22pub struct Path {
23    pub id: HashId,
24    pub block_group_id: HashId,
25    pub name: String,
26    pub created_on: i64,
27}
28
29impl<'a> Capnp<'a> for Path {
30    type Builder = PathCapnp::Builder<'a>;
31    type Reader = PathCapnp::Reader<'a>;
32
33    fn write_capnp(&self, builder: &mut Self::Builder) {
34        builder.set_id(&self.id.0).unwrap();
35        builder.set_block_group_id(&self.block_group_id.0).unwrap();
36        builder.set_name(&self.name);
37        builder.set_created_on(self.created_on);
38    }
39
40    fn read_capnp(reader: Self::Reader) -> Self {
41        let id = reader
42            .get_id()
43            .unwrap()
44            .as_slice()
45            .unwrap()
46            .try_into()
47            .unwrap();
48        let block_group_id = reader
49            .get_block_group_id()
50            .unwrap()
51            .as_slice()
52            .unwrap()
53            .try_into()
54            .unwrap();
55        let name = reader.get_name().unwrap().to_string().unwrap();
56        let created_on = reader.get_created_on();
57
58        Path {
59            id,
60            block_group_id,
61            name,
62            created_on,
63        }
64    }
65}
66
67#[derive(Clone, Debug, Eq, Hash, PartialEq)]
68pub struct PathData {
69    pub name: String,
70    pub block_group_id: HashId,
71}
72
73// interesting gist here: https://gist.github.com/mbhall88/cd900add6335c96127efea0e0f6a9f48, see if we
74// can expand this to ambiguous bases/keep case
75pub fn revcomp(seq: &str) -> String {
76    String::from_utf8(
77        seq.chars()
78            .rev()
79            .map(|c| -> u8 {
80                let is_upper = c.is_ascii_uppercase();
81                let rc = c as u8;
82                let v = if rc == 78 {
83                    // N
84                    rc
85                } else if rc == 110 {
86                    // n
87                    rc
88                } else if rc & 2 != 0 {
89                    // CG
90                    rc ^ 4
91                } else {
92                    // AT
93                    rc ^ 21
94                };
95                if is_upper { v } else { v.to_ascii_lowercase() }
96            })
97            .collect(),
98    )
99    .unwrap()
100}
101
102#[derive(Clone, Debug)]
103pub struct Annotation {
104    pub name: String,
105    pub start: i64,
106    pub end: i64,
107}
108
109impl Path {
110    pub fn validate_edges(conn: &GraphConnection, edge_ids: &[HashId], block_group_id: &HashId) {
111        let edge_id_set = edge_ids.iter().copied().collect::<HashSet<HashId>>();
112
113        // No duplicate edges allowed
114        if edge_id_set.len() != edge_ids.len() {
115            println!("Duplicate edge IDs detected in path creation");
116        }
117
118        // All path edges must be in the path's block group
119        let augmented_edges = BlockGroupEdge::edges_for_block_group(conn, block_group_id);
120        let bg_edge_ids = augmented_edges
121            .iter()
122            .map(|augmented_edge| augmented_edge.edge.id)
123            .collect::<HashSet<_>>();
124
125        assert!(
126            edge_id_set.is_subset(&bg_edge_ids),
127            "Not all edges are in the block group ({block_group_id})"
128        );
129
130        let edges_by_id = augmented_edges
131            .iter()
132            .map(|augmented_edge| (&augmented_edge.edge.id, augmented_edge.edge.clone()))
133            .collect::<HashMap<_, _>>();
134
135        // Two consecutive edges must share a node
136        // Two consecutive edges must not go into and out of a node at the same coordinate
137        for (edge1_id, edge2_id) in edge_ids.iter().tuple_windows() {
138            let edge1 = edges_by_id.get(edge1_id).unwrap();
139            let edge2 = edges_by_id.get(edge2_id).unwrap();
140            assert!(
141                edge1.target_node_id == edge2.source_node_id,
142                "Edges {} and {} don't share the same node ({} vs. {})",
143                edge1.id,
144                edge2.id,
145                edge1.target_node_id,
146                edge2.source_node_id
147            );
148
149            assert!(
150                edge1.target_coordinate < edge2.source_coordinate,
151                "Source coordinate {} for edge {} is before target coordinate {} for edge {}",
152                edge2.source_coordinate,
153                edge2.id,
154                edge1.target_coordinate,
155                edge1.id
156            );
157
158            assert!(
159                edge1.target_strand == edge2.source_strand,
160                "Strand mismatch between consecutive edges {} and {}",
161                edge1.id,
162                edge2.id,
163            );
164        }
165    }
166
167    pub fn create(
168        conn: &GraphConnection,
169        name: &str,
170        block_group_id: &HashId,
171        edge_ids: &[HashId],
172    ) -> Path {
173        Path::validate_edges(conn, edge_ids, block_group_id);
174        let hash = HashId(calculate_hash(&format!("{block_group_id}:{name}")));
175        let timestamp = chrono::Utc::now().timestamp_nanos_opt().unwrap();
176        // TODO: Should we do something if edge_ids don't match here? Suppose we have a path
177        // for a block group with edges 1,2,3. And then the same path is added again with edges
178        // 5,6,7, should this be an error? Should we just keep adding edges?
179        let query =
180            "INSERT INTO paths (id, name, block_group_id, created_on) VALUES (?1, ?2, ?3, ?4);";
181        let mut stmt = conn.prepare(query).unwrap();
182
183        let path = match stmt.execute(params![hash, name, block_group_id, timestamp]) {
184            Ok(_) => Path {
185                id: hash,
186                name: name.to_string(),
187                block_group_id: *block_group_id,
188                created_on: timestamp,
189            },
190            Err(rusqlite::Error::SqliteFailure(err, _details)) => {
191                if err.code == rusqlite::ErrorCode::ConstraintViolation {
192                    Path {
193                        id: hash,
194                        name: name.to_string(),
195                        block_group_id: *block_group_id,
196                        created_on: timestamp,
197                    }
198                } else {
199                    panic!("something bad happened querying the database")
200                }
201            }
202            Err(_) => {
203                panic!("something bad happened querying the database")
204            }
205        };
206
207        PathEdge::bulk_create(conn, &path.id, edge_ids);
208
209        path
210    }
211
212    pub fn delete(conn: &GraphConnection, name: &str, block_group_id: &HashId) {
213        let path = Path::get(
214            conn,
215            "select * from paths where name = ?1 and block_group_id = ?2",
216            params![name, block_group_id],
217        )
218        .unwrap();
219        PathEdge::delete(conn, &path.id);
220
221        let query = "DELETE FROM paths where name = ?1 AND block_group_id = ?2;";
222        conn.execute(query, params![name.to_string(), block_group_id])
223            .unwrap();
224    }
225
226    pub fn get_by_id(conn: &GraphConnection, path_id: &HashId) -> Path {
227        Path::get(conn, "select * from paths where id = ?1;", params![path_id]).unwrap()
228    }
229
230    pub fn query_for_collection(conn: &GraphConnection, collection_name: &str) -> Vec<Path> {
231        let query = "SELECT * FROM paths JOIN block_groups ON paths.block_group_id = block_groups.id WHERE block_groups.collection_name = ?1";
232        Path::query(conn, query, params![collection_name])
233    }
234
235    pub fn query_for_collection_and_sample(
236        conn: &GraphConnection,
237        collection_name: &str,
238        sample_name: &str,
239    ) -> Vec<Path> {
240        let query = "SELECT * FROM paths JOIN block_groups ON paths.block_group_id = block_groups.id WHERE block_groups.collection_name = ?1 AND block_groups.sample_name = ?2";
241        Path::query(conn, query, params![collection_name, sample_name])
242    }
243
244    pub fn sequence(&self, conn: &GraphConnection) -> String {
245        let blocks = self.blocks(conn);
246        blocks
247            .into_iter()
248            .map(|block| block.block_sequence)
249            .collect::<Vec<_>>()
250            .join("")
251    }
252
253    pub fn length(&self, conn: &GraphConnection) -> i64 {
254        let blocks = self.blocks(conn);
255        let end_block = blocks.last().unwrap();
256        // the last block is the terminal node, which starts at the end of the path
257        end_block.path_start
258    }
259
260    pub fn edge_pairs_to_block(
261        &self,
262        into: Edge,
263        out_of: Edge,
264        sequences_by_node_id: &HashMap<HashId, Sequence>,
265        current_path_length: i64,
266    ) -> PathBlock {
267        let sequence = sequences_by_node_id.get(&into.target_node_id).unwrap();
268        let start = into.target_coordinate;
269        let end = out_of.source_coordinate;
270
271        let strand = into.target_strand;
272        let block_sequence_length = end - start;
273
274        let block_sequence = if strand == Strand::Reverse {
275            revcomp(&sequence.get_sequence(start, end))
276        } else {
277            sequence.get_sequence(start, end)
278        };
279
280        PathBlock {
281            node_id: into.target_node_id,
282            block_sequence,
283            sequence_start: start,
284            sequence_end: end,
285            path_start: current_path_length,
286            path_end: current_path_length + block_sequence_length,
287            strand,
288        }
289    }
290
291    pub fn blocks(&self, conn: &GraphConnection) -> Vec<PathBlock> {
292        let edges = PathEdge::edges_for_path(conn, &self.id);
293
294        let mut sequence_node_ids = HashSet::new();
295        for edge in &edges {
296            if !is_start_node(edge.source_node_id) {
297                sequence_node_ids.insert(edge.source_node_id);
298            }
299            if !is_end_node(edge.target_node_id) {
300                sequence_node_ids.insert(edge.target_node_id);
301            }
302        }
303        let sequences_by_node_id = Node::get_sequences_by_node_ids(
304            conn,
305            &sequence_node_ids.into_iter().collect::<Vec<_>>(),
306        );
307
308        let mut blocks = vec![];
309        let mut path_length = 0;
310
311        // NOTE: Adding a "start block" for the dedicated start sequence with a range from i64::MIN
312        // to 0 makes interval tree lookups work better.  If the point being looked up is -1 (or
313        // below), it will return this block.
314        blocks.push(PathBlock {
315            node_id: PATH_START_NODE_ID,
316            block_sequence: "".to_string(),
317            sequence_start: 0,
318            sequence_end: 0,
319            path_start: i64::MIN + 1,
320            path_end: 0,
321            strand: Strand::Forward,
322        });
323
324        for (into, out_of) in edges.into_iter().tuple_windows() {
325            let block = self.edge_pairs_to_block(into, out_of, &sequences_by_node_id, path_length);
326            path_length += block.block_sequence.len() as i64;
327            blocks.push(block);
328        }
329
330        // NOTE: Adding an "end block" for the dedicated end sequence with a range from the path
331        // length to i64::MAX makes interval tree lookups work better.  If the point being looked up
332        // is the path length (or higher), it will return this block.
333        blocks.push(PathBlock {
334            node_id: PATH_END_NODE_ID,
335            block_sequence: "".to_string(),
336            sequence_start: 0,
337            sequence_end: 0,
338            path_start: path_length,
339            path_end: i64::MAX - 1,
340            strand: Strand::Forward,
341        });
342
343        blocks
344    }
345
346    pub fn intervaltree(&self, conn: &GraphConnection) -> IntervalTree<i64, NodeIntervalBlock> {
347        let blocks = self.blocks(conn);
348        let tree: IntervalTree<i64, NodeIntervalBlock> = blocks
349            .into_iter()
350            .map(|block| {
351                (
352                    block.path_start..block.path_end,
353                    NodeIntervalBlock {
354                        node_id: block.node_id,
355                        start: block.path_start,
356                        end: block.path_end,
357                        sequence_start: block.sequence_start,
358                        sequence_end: block.sequence_end,
359                        strand: block.strand,
360                    },
361                )
362            })
363            .collect();
364        tree
365    }
366
367    pub fn find_block_mappings(
368        &self,
369        conn: &GraphConnection,
370        other_path: &Path,
371    ) -> Vec<RangeMapping> {
372        // Given two paths, find the overlapping parts of common nodes/blocks and return a list af
373        // mappings from subranges of one path to corresponding shared subranges of the other path
374        let our_blocks = self.blocks(conn);
375        let their_blocks = other_path.blocks(conn);
376
377        let our_node_ids = our_blocks
378            .iter()
379            .map(|block| block.node_id)
380            .collect::<HashSet<_>>();
381        let their_node_ids = their_blocks
382            .iter()
383            .map(|block| block.node_id)
384            .collect::<HashSet<_>>();
385        let common_node_ids = our_node_ids
386            .intersection(&their_node_ids)
387            .copied()
388            .collect::<HashSet<_>>();
389
390        let mut our_blocks_by_node_id = HashMap::new();
391        for block in our_blocks
392            .iter()
393            .filter(|block| common_node_ids.contains(&block.node_id))
394        {
395            our_blocks_by_node_id
396                .entry(block.node_id)
397                .or_insert(vec![])
398                .push(block);
399        }
400
401        let mut their_blocks_by_node_id = HashMap::new();
402        for block in their_blocks
403            .iter()
404            .filter(|block| common_node_ids.contains(&block.node_id))
405        {
406            their_blocks_by_node_id
407                .entry(block.node_id)
408                .or_insert(vec![])
409                .push(block);
410        }
411
412        let mut mappings = vec![];
413        for node_id in common_node_ids {
414            let our_blocks = our_blocks_by_node_id.get(&node_id).unwrap();
415            let our_sorted_blocks = our_blocks
416                .clone()
417                .into_iter()
418                .sorted_by(|a, b| a.sequence_start.cmp(&b.sequence_start))
419                .collect::<Vec<&PathBlock>>();
420            let their_blocks = their_blocks_by_node_id.get(&node_id).unwrap();
421            let their_sorted_blocks = their_blocks
422                .clone()
423                .into_iter()
424                .sorted_by(|a, b| a.sequence_start.cmp(&b.sequence_start))
425                .collect::<Vec<&PathBlock>>();
426
427            for our_block in our_sorted_blocks {
428                let mut their_block_index = 0;
429
430                while their_block_index < their_sorted_blocks.len() {
431                    let their_block = their_sorted_blocks[their_block_index];
432                    if their_block.sequence_end <= our_block.sequence_start {
433                        // If their block is before ours, move along to the next one
434                        their_block_index += 1;
435                    } else {
436                        let our_range = Range {
437                            start: our_block.sequence_start,
438                            end: our_block.sequence_end,
439                        };
440                        let their_range = Range {
441                            start: their_block.sequence_start,
442                            end: their_block.sequence_end,
443                        };
444
445                        let common_ranges = our_range.overlap(&their_range);
446                        if !common_ranges.is_empty() {
447                            if common_ranges.len() > 1 {
448                                panic!(
449                                    "Found more than one common range for blocks with node {node_id}"
450                                );
451                            }
452
453                            let common_range = &common_ranges[0];
454                            let our_start = our_block.path_start
455                                + (common_range.start - our_block.sequence_start);
456                            let our_end = our_block.path_start
457                                + (common_range.end - our_block.sequence_start);
458                            let their_start = their_block.path_start
459                                + (common_range.start - their_block.sequence_start);
460                            let their_end = their_block.path_start
461                                + (common_range.end - their_block.sequence_start);
462
463                            let mapping = RangeMapping {
464                                source_range: Range {
465                                    start: our_start,
466                                    end: our_end,
467                                },
468                                target_range: Range {
469                                    start: their_start,
470                                    end: their_end,
471                                },
472                            };
473                            mappings.push(mapping);
474                        }
475
476                        if their_block.sequence_end < our_block.sequence_end {
477                            // If their block ends before ours, move along to the next one
478                            their_block_index += 1;
479                        } else {
480                            break;
481                        }
482                    }
483                }
484            }
485        }
486
487        mappings
488            .into_iter()
489            .sorted_by(|a, b| a.source_range.start.cmp(&b.source_range.start))
490            .collect::<Vec<RangeMapping>>()
491    }
492
493    pub fn propagate_annotation(
494        annotation: Annotation,
495        mapping_tree: &IntervalTree<i64, RangeMapping>,
496        sequence_length: i64,
497    ) -> Option<Annotation> {
498        /*
499        This method contains the core logic for propagating an annotation from one path to another.
500        The core rules are:
501
502        1. If the annotation can be fully propagated to a matching subregion of the other path,
503            we propagate it
504
505        2. If only part of the annotation can be propagated to a partial subregion of the other
506            path, we propagate just that part and truncate the rest
507
508        3. If the first and last parts of the annotation can be propagated to subregions of the
509            other path (but not one or more parts of the middle of the annotation), we propagate the
510            entire annotation, including across the parts that don't match those of this path
511         */
512
513        // TODO: Add support for different propagation strategies
514        // TODO: Handle circular contigs
515        let start = annotation.start;
516        let end = annotation.end;
517        let mappings: Vec<RangeMapping> = mapping_tree
518            .query(RustRange { start, end })
519            .map(|x| x.value.clone())
520            .collect();
521        if mappings.is_empty() {
522            return None;
523        }
524
525        let sorted_mappings: Vec<RangeMapping> = mappings
526            .into_iter()
527            .sorted_by(|a, b| a.source_range.start.cmp(&b.source_range.start))
528            .collect();
529        let first_mapping = sorted_mappings.first().unwrap();
530        let last_mapping = sorted_mappings.last().unwrap();
531        let translated_start = if first_mapping.source_range.contains(start) {
532            first_mapping.source_range.translate_index(
533                start,
534                &first_mapping.target_range,
535                sequence_length,
536                false,
537            )
538        } else {
539            Ok(first_mapping.target_range.start)
540        };
541
542        let translated_end = if last_mapping.source_range.contains(end) {
543            last_mapping.source_range.translate_index(
544                end,
545                &last_mapping.target_range,
546                sequence_length,
547                false,
548            )
549        } else {
550            Ok(last_mapping.target_range.end)
551        };
552
553        if translated_start.is_err() || translated_end.is_err() {
554            return None;
555        }
556
557        Some(Annotation {
558            name: annotation.name,
559            start: translated_start.expect("Failed to translate start"),
560            end: translated_end.expect("Failed to translate end"),
561        })
562    }
563
564    pub fn get_mapping_tree(
565        &self,
566        conn: &GraphConnection,
567        path: &Path,
568    ) -> IntervalTree<i64, RangeMapping> {
569        let mappings = self.find_block_mappings(conn, path);
570        mappings
571            .into_iter()
572            .map(|mapping| {
573                (
574                    mapping.source_range.start..mapping.source_range.end,
575                    mapping,
576                )
577            })
578            .collect()
579    }
580
581    pub fn propagate_annotations(
582        &self,
583        conn: &GraphConnection,
584        path: &Path,
585        annotations: Vec<Annotation>,
586    ) -> Vec<Annotation> {
587        let mapping_tree = self.get_mapping_tree(conn, path);
588        let sequence_length = path.sequence(conn).len();
589        annotations
590            .into_iter()
591            .filter_map(|annotation| {
592                Path::propagate_annotation(annotation, &mapping_tree, sequence_length as i64)
593            })
594            .clone()
595            .collect()
596    }
597
598    pub fn new_path_with(
599        &self,
600        conn: &GraphConnection,
601        path_start: i64,
602        path_end: i64,
603        edge_to_new_node: &Edge,
604        edge_from_new_node: &Edge,
605    ) -> Path {
606        // Creates a new path from the current one by replacing all edges between path_start and
607        // path_end with the input edges that are to and from a new node
608        let tree = self.intervaltree(conn);
609        let block_with_start = tree.query_point(path_start).next().unwrap().value;
610        let block_with_end = tree.query_point(path_end).next().unwrap().value;
611
612        let edges = PathEdge::edges_for_path(conn, &self.id);
613        let edges_by_source = edges
614            .iter()
615            .map(|edge| ((edge.source_node_id, edge.source_coordinate), edge))
616            .collect::<HashMap<(_, i64), &Edge>>();
617        let edges_by_target = edges
618            .iter()
619            .map(|edge| ((edge.target_node_id, edge.target_coordinate), edge))
620            .collect::<HashMap<(_, i64), &Edge>>();
621
622        let edge_before_new_node = if edge_to_new_node.source_node_id == PATH_START_NODE_ID {
623            None
624        } else {
625            let edge = edges_by_target
626                .get(&(block_with_start.node_id, block_with_start.sequence_start))
627                .unwrap();
628            Some(edge)
629        };
630        let edge_after_new_node = if edge_from_new_node.target_node_id == PATH_END_NODE_ID {
631            None
632        } else {
633            let edge = edges_by_source
634                .get(&(block_with_end.node_id, block_with_end.sequence_end))
635                .unwrap();
636            Some(edge)
637        };
638
639        let mut new_edge_ids = vec![];
640        if let Some(edge_before_new_node) = edge_before_new_node {
641            for edge in &edges {
642                new_edge_ids.push(edge.id);
643                if edge.id == edge_before_new_node.id {
644                    break;
645                }
646            }
647        }
648
649        new_edge_ids.push(edge_to_new_node.id);
650        new_edge_ids.push(edge_from_new_node.id);
651
652        if let Some(edge_after_new_node) = edge_after_new_node {
653            let mut after_new_node = false;
654            for edge in &edges {
655                if edge.id == edge_after_new_node.id {
656                    after_new_node = true;
657                }
658                if after_new_node {
659                    new_edge_ids.push(edge.id);
660                }
661            }
662        }
663
664        let new_name = format!(
665            "{}-start-{}-end-{}-node-{}",
666            self.name, path_start, path_end, edge_to_new_node.target_node_id
667        );
668        Path::create(conn, &new_name, &self.block_group_id, &new_edge_ids)
669    }
670
671    pub fn new_path_with_deletion(
672        &self,
673        conn: &GraphConnection,
674        deletion_start: i64,
675        deletion_end: i64,
676    ) -> Result<Path, QueryError> {
677        // Creates a new path from the current one by replacing all edges between deletion_start and
678        // deletion_end with a single edge spanning the deletion.
679        let tree = self.intervaltree(conn);
680        let block_with_start = tree.query_point(deletion_start).next().unwrap().value;
681        let block_with_end = tree.query_point(deletion_end).next().unwrap().value;
682
683        let node_deletion_start = deletion_start - block_with_start.start;
684        let node_deletion_end = deletion_end - block_with_end.start;
685        let deletion_edge_result = Edge::query(
686            conn,
687            "SELECT * FROM edges WHERE source_node_id = ?1 AND source_coordinate = ?2 AND target_node_id = ?3 AND target_coordinate = ?4",
688            rusqlite::params!(
689                SQLValue::from(block_with_start.node_id),
690                SQLValue::from(node_deletion_start),
691                SQLValue::from(block_with_end.node_id),
692                SQLValue::from(node_deletion_end)
693            ),
694        );
695
696        if deletion_edge_result.is_empty() {
697            let error_string = format!(
698                "No edge found from node {}:{node_deletion_start} to node {}:{node_deletion_end}",
699                block_with_start.node_id, block_with_end.node_id
700            );
701            return Err(QueryError::ResultsNotFound(error_string));
702        }
703
704        let deletion_edge = deletion_edge_result[0].clone();
705
706        let edges = PathEdge::edges_for_path(conn, &self.id);
707        let edges_by_source = edges
708            .iter()
709            .map(|edge| ((edge.source_node_id, edge.source_coordinate), edge))
710            .collect::<HashMap<(_, i64), &Edge>>();
711        let edges_by_target = edges
712            .iter()
713            .map(|edge| ((edge.target_node_id, edge.target_coordinate), edge))
714            .collect::<HashMap<(_, i64), &Edge>>();
715        let edge_before_deletion = edges_by_target
716            .get(&(block_with_start.node_id, block_with_start.sequence_start))
717            .unwrap();
718        let edge_after_deletion = edges_by_source
719            .get(&(block_with_end.node_id, block_with_end.sequence_end))
720            .unwrap();
721
722        let mut new_edge_ids = vec![];
723        let mut before_deletion = true;
724        let mut after_deletion = false;
725        for edge in &edges {
726            if before_deletion {
727                new_edge_ids.push(edge.id);
728                if edge.id == edge_before_deletion.id {
729                    before_deletion = false;
730                    new_edge_ids.push(deletion_edge.id);
731                }
732            } else if after_deletion {
733                new_edge_ids.push(edge.id);
734            } else if edge.id == edge_after_deletion.id {
735                after_deletion = true;
736                new_edge_ids.push(edge.id);
737            }
738        }
739
740        let new_name = format!(
741            "{}-start-{}-end-{}-node-{}",
742            self.name,
743            deletion_start,
744            deletion_end,
745            HashId::convert_str("")
746        );
747
748        Ok(Path::create(
749            conn,
750            &new_name,
751            &self.block_group_id,
752            &new_edge_ids,
753        ))
754    }
755
756    fn node_blocks_for_range(
757        &self,
758        intervaltree: &IntervalTree<i64, NodeIntervalBlock>,
759        start: i64,
760        end: i64,
761    ) -> Vec<NodeIntervalBlock> {
762        // TODO: Handle start/end values that are in the middle of blocks
763        let node_blocks: Vec<NodeIntervalBlock> = intervaltree
764            .query(RustRange { start, end })
765            .map(|x| x.value)
766            .sorted_by(|a, b| a.start.cmp(&b.start))
767            .collect();
768
769        if node_blocks.is_empty() {
770            return vec![];
771        }
772
773        let mut result_node_blocks = vec![];
774        let start_offset = if node_blocks[0].start < start {
775            start - node_blocks[0].start
776        } else {
777            0
778        };
779
780        let mut consolidated_block = NodeIntervalBlock {
781            node_id: node_blocks[0].node_id,
782            start: node_blocks[0].start + start_offset,
783            end: node_blocks[0].end,
784            sequence_start: node_blocks[0].sequence_start + start_offset,
785            sequence_end: node_blocks[0].sequence_end,
786            strand: node_blocks[0].strand,
787        };
788
789        for block in &node_blocks[1..] {
790            if consolidated_block.node_id == block.node_id && consolidated_block.end == block.start
791            {
792                // If the current block is immediately adjacent to the previous one (as recorded
793                // in the consolidated block), extend the consolidated block
794                consolidated_block = NodeIntervalBlock {
795                    node_id: consolidated_block.node_id,
796                    start: consolidated_block.start,
797                    end: block.end,
798                    sequence_start: consolidated_block.sequence_start,
799                    sequence_end: block.sequence_end,
800                    strand: consolidated_block.strand,
801                };
802            } else {
803                result_node_blocks.push(consolidated_block);
804                consolidated_block = *block;
805            }
806        }
807
808        let end_offset = if consolidated_block.end > end {
809            consolidated_block.end - end
810        } else {
811            0
812        };
813
814        result_node_blocks.push(NodeIntervalBlock {
815            node_id: consolidated_block.node_id,
816            start: consolidated_block.start,
817            end: consolidated_block.end - end_offset,
818            sequence_start: consolidated_block.sequence_start,
819            sequence_end: consolidated_block.sequence_end - end_offset,
820            strand: consolidated_block.strand,
821        });
822
823        result_node_blocks
824    }
825
826    pub fn node_block_partition(
827        &self,
828        conn: &GraphConnection,
829        ranges: Vec<Range>,
830    ) -> Vec<NodeIntervalBlock> {
831        let intervaltree = self.intervaltree(conn);
832        let mut partitioned_nodes = vec![];
833        for range in ranges {
834            let node_blocks = self.node_blocks_for_range(&intervaltree, range.start, range.end);
835            for node_block in &node_blocks {
836                partitioned_nodes.push(*node_block);
837            }
838        }
839        partitioned_nodes
840    }
841}
842
843impl Query for Path {
844    type Model = Path;
845
846    const TABLE_NAME: &'static str = "paths";
847
848    fn process_row(row: &Row) -> Self::Model {
849        Path {
850            id: row.get(0).unwrap(),
851            block_group_id: row.get(1).unwrap(),
852            name: row.get(2).unwrap(),
853            created_on: row.get(3).unwrap(),
854        }
855    }
856}
857
858#[cfg(test)]
859mod tests {
860    // Note this useful idiom: importing names from outer (for mod tests) scope.
861    use capnp::message::TypedBuilder;
862    use chrono::Utc;
863
864    use super::*;
865    use crate::{
866        block_group::{BlockGroup, NewBlockGroup},
867        block_group_edge::BlockGroupEdgeData,
868        collection::Collection,
869        test_helpers::get_connection,
870    };
871
872    fn create_test_block_group(conn: &GraphConnection) -> BlockGroup {
873        BlockGroup::create(
874            conn,
875            NewBlockGroup {
876                collection_name: "test collection",
877                sample_name: "test-sample",
878                name: "test block group",
879                ..Default::default()
880            },
881        )
882    }
883
884    #[test]
885    fn test_path_capnp_serialization() {
886        let path = Path {
887            id: HashId::pad_str(100),
888            block_group_id: HashId::pad_str(50),
889            name: "test_path".to_string(),
890            created_on: Utc::now().timestamp_nanos_opt().unwrap(),
891        };
892
893        let mut message = TypedBuilder::<PathCapnp::Owned>::new_default();
894        let mut root = message.init_root();
895        path.write_capnp(&mut root);
896
897        let deserialized = Path::read_capnp(root.into_reader());
898        assert_eq!(path, deserialized);
899    }
900
901    #[test]
902    fn test_path_delete() {
903        let conn = &get_connection(None).unwrap();
904        Collection::create(conn, "test collection");
905        let block_group = create_test_block_group(conn);
906
907        // Create first path
908        let sequence1 = Sequence::new()
909            .sequence_type("DNA")
910            .sequence("ATCGATCG")
911            .save(conn);
912        let node1_id = Node::create(conn, &sequence1.hash, &HashId::convert_str("1"));
913        let edge1 = Edge::create(
914            conn,
915            PATH_START_NODE_ID,
916            -1,
917            Strand::Forward,
918            node1_id,
919            0,
920            Strand::Forward,
921        );
922        let edge2 = Edge::create(
923            conn,
924            node1_id,
925            8,
926            Strand::Forward,
927            PATH_END_NODE_ID,
928            -1,
929            Strand::Forward,
930        );
931
932        let edge_ids1 = vec![edge1.id, edge2.id];
933        let block_group_edges1 = edge_ids1
934            .iter()
935            .map(|edge_id| BlockGroupEdgeData {
936                block_group_id: block_group.id,
937                edge_id: (*edge_id),
938                chromosome_index: 0,
939                phased: 0,
940            })
941            .collect::<Vec<BlockGroupEdgeData>>();
942        BlockGroupEdge::bulk_create(conn, &block_group_edges1);
943
944        let _ = Path::create(conn, "chr1", &block_group.id, &edge_ids1);
945
946        // Create second path
947        let sequence2 = Sequence::new()
948            .sequence_type("DNA")
949            .sequence("TTTTTTTT")
950            .save(conn);
951        let node2_id = Node::create(conn, &sequence2.hash, &HashId::convert_str("2"));
952        let edge3 = Edge::create(
953            conn,
954            PATH_START_NODE_ID,
955            -1,
956            Strand::Forward,
957            node2_id,
958            0,
959            Strand::Forward,
960        );
961        let edge4 = Edge::create(
962            conn,
963            node2_id,
964            8,
965            Strand::Forward,
966            PATH_END_NODE_ID,
967            -1,
968            Strand::Forward,
969        );
970
971        let edge_ids2 = vec![edge3.id, edge4.id];
972        let block_group_edges2 = edge_ids2
973            .iter()
974            .map(|edge_id| BlockGroupEdgeData {
975                block_group_id: block_group.id,
976                edge_id: (*edge_id),
977                chromosome_index: 0,
978                phased: 0,
979            })
980            .collect::<Vec<BlockGroupEdgeData>>();
981        BlockGroupEdge::bulk_create(conn, &block_group_edges2);
982
983        let path2 = Path::create(conn, "chr2", &block_group.id, &edge_ids2);
984
985        let paths_before = Path::query_for_collection(conn, "test collection");
986        assert_eq!(paths_before.len(), 2);
987
988        Path::delete(conn, "chr1", &block_group.id);
989
990        let paths_after = Path::query_for_collection(conn, "test collection");
991        assert_eq!(paths_after.len(), 1);
992        assert_eq!(paths_after[0], path2);
993    }
994
995    #[test]
996    fn test_gets_sequence() {
997        let conn = &get_connection(None).unwrap();
998        Collection::create(conn, "test collection");
999        let block_group = create_test_block_group(conn);
1000        let sequence1 = Sequence::new()
1001            .sequence_type("DNA")
1002            .sequence("ATCGATCG")
1003            .save(conn);
1004        let node1_id = Node::create(conn, &sequence1.hash, &HashId::convert_str("1"));
1005        let edge1 = Edge::create(
1006            conn,
1007            PATH_START_NODE_ID,
1008            -123,
1009            Strand::Forward,
1010            node1_id,
1011            0,
1012            Strand::Forward,
1013        );
1014        let sequence2 = Sequence::new()
1015            .sequence_type("DNA")
1016            .sequence("AAAAAAAA")
1017            .save(conn);
1018        let node2_id = Node::create(conn, &sequence2.hash, &HashId::convert_str("2"));
1019        let edge2 = Edge::create(
1020            conn,
1021            node1_id,
1022            8,
1023            Strand::Forward,
1024            node2_id,
1025            1,
1026            Strand::Forward,
1027        );
1028        let sequence3 = Sequence::new()
1029            .sequence_type("DNA")
1030            .sequence("CCCCCCCC")
1031            .save(conn);
1032        let node3_id = Node::create(conn, &sequence3.hash, &HashId::convert_str("3"));
1033        let edge3 = Edge::create(
1034            conn,
1035            node2_id,
1036            8,
1037            Strand::Forward,
1038            node3_id,
1039            1,
1040            Strand::Forward,
1041        );
1042        let sequence4 = Sequence::new()
1043            .sequence_type("DNA")
1044            .sequence("GGGGGGGG")
1045            .save(conn);
1046        let node4_id = Node::create(conn, &sequence4.hash, &HashId::convert_str("4"));
1047        let edge4 = Edge::create(
1048            conn,
1049            node3_id,
1050            8,
1051            Strand::Forward,
1052            node4_id,
1053            1,
1054            Strand::Forward,
1055        );
1056        let edge5 = Edge::create(
1057            conn,
1058            node4_id,
1059            8,
1060            Strand::Forward,
1061            PATH_END_NODE_ID,
1062            -1,
1063            Strand::Forward,
1064        );
1065
1066        let edge_ids = vec![edge1.id, edge2.id, edge3.id, edge4.id, edge5.id];
1067        let block_group_edges = edge_ids
1068            .iter()
1069            .map(|edge_id| BlockGroupEdgeData {
1070                block_group_id: block_group.id,
1071                edge_id: *edge_id,
1072                chromosome_index: 0,
1073                phased: 0,
1074            })
1075            .collect::<Vec<BlockGroupEdgeData>>();
1076        BlockGroupEdge::bulk_create(conn, &block_group_edges);
1077
1078        let path = Path::create(conn, "chr1", &block_group.id, &edge_ids);
1079        assert_eq!(path.sequence(conn), "ATCGATCGAAAAAAACCCCCCCGGGGGGG");
1080    }
1081
1082    #[test]
1083    fn test_gets_sequence_with_rc() {
1084        let conn = &get_connection(None).unwrap();
1085        Collection::create(conn, "test collection");
1086        let block_group = create_test_block_group(conn);
1087        let sequence1 = Sequence::new()
1088            .sequence_type("DNA")
1089            .sequence("ATCGATCG")
1090            .save(conn);
1091        let node1_id = Node::create(conn, &sequence1.hash, &HashId::convert_str("1"));
1092        let edge5 = Edge::create(
1093            conn,
1094            node1_id,
1095            8,
1096            Strand::Reverse,
1097            PATH_END_NODE_ID,
1098            0,
1099            Strand::Reverse,
1100        );
1101        let sequence2 = Sequence::new()
1102            .sequence_type("DNA")
1103            .sequence("AAAAAAAA")
1104            .save(conn);
1105        let node2_id = Node::create(conn, &sequence2.hash, &HashId::convert_str("2"));
1106        let edge4 = Edge::create(
1107            conn,
1108            node2_id,
1109            7,
1110            Strand::Reverse,
1111            node1_id,
1112            0,
1113            Strand::Reverse,
1114        );
1115        let sequence3 = Sequence::new()
1116            .sequence_type("DNA")
1117            .sequence("CCCCCCCC")
1118            .save(conn);
1119        let node3_id = Node::create(conn, &sequence3.hash, &HashId::convert_str("3"));
1120        let edge3 = Edge::create(
1121            conn,
1122            node3_id,
1123            7,
1124            Strand::Reverse,
1125            node2_id,
1126            0,
1127            Strand::Reverse,
1128        );
1129        let sequence4 = Sequence::new()
1130            .sequence_type("DNA")
1131            .sequence("GGGGGGGG")
1132            .save(conn);
1133        let node4_id = Node::create(conn, &sequence4.hash, &HashId::convert_str("4"));
1134        let edge2 = Edge::create(
1135            conn,
1136            node4_id,
1137            7,
1138            Strand::Reverse,
1139            node3_id,
1140            0,
1141            Strand::Reverse,
1142        );
1143        let edge1 = Edge::create(
1144            conn,
1145            PATH_START_NODE_ID,
1146            -1,
1147            Strand::Reverse,
1148            node4_id,
1149            0,
1150            Strand::Reverse,
1151        );
1152
1153        let edge_ids = vec![edge1.id, edge2.id, edge3.id, edge4.id, edge5.id];
1154        let block_group_edges = edge_ids
1155            .iter()
1156            .map(|edge_id| BlockGroupEdgeData {
1157                block_group_id: block_group.id,
1158                edge_id: *edge_id,
1159                chromosome_index: 0,
1160                phased: 0,
1161            })
1162            .collect::<Vec<BlockGroupEdgeData>>();
1163        BlockGroupEdge::bulk_create(conn, &block_group_edges);
1164
1165        let path = Path::create(conn, "chr1", &block_group.id, &edge_ids);
1166        assert_eq!(path.sequence(conn), "CCCCCCCGGGGGGGTTTTTTTCGATCGAT");
1167    }
1168
1169    #[test]
1170    fn test_reverse_complement() {
1171        assert_eq!(revcomp("ATCCGG"), "CCGGAT");
1172        assert_eq!(revcomp("CNNNNA"), "TNNNNG");
1173        assert_eq!(revcomp("cNNgnAt"), "aTncNNg");
1174    }
1175
1176    #[test]
1177    fn test_intervaltree() {
1178        let conn = &get_connection(None).unwrap();
1179        Collection::create(conn, "test collection");
1180        let block_group = create_test_block_group(conn);
1181        let sequence1 = Sequence::new()
1182            .sequence_type("DNA")
1183            .sequence("ATCGATCG")
1184            .save(conn);
1185        let node1_id = Node::create(conn, &sequence1.hash, &HashId::convert_str("1"));
1186        let edge1 = Edge::create(
1187            conn,
1188            PATH_START_NODE_ID,
1189            -1,
1190            Strand::Forward,
1191            node1_id,
1192            0,
1193            Strand::Forward,
1194        );
1195        let sequence2 = Sequence::new()
1196            .sequence_type("DNA")
1197            .sequence("AAAAAAAA")
1198            .save(conn);
1199        let node2_id = Node::create(conn, &sequence2.hash, &HashId::convert_str("2"));
1200        let edge2 = Edge::create(
1201            conn,
1202            node1_id,
1203            8,
1204            Strand::Forward,
1205            node2_id,
1206            1,
1207            Strand::Forward,
1208        );
1209        let sequence3 = Sequence::new()
1210            .sequence_type("DNA")
1211            .sequence("CCCCCCCC")
1212            .save(conn);
1213        let node3_id = Node::create(conn, &sequence3.hash, &HashId::convert_str("3"));
1214        let edge3 = Edge::create(
1215            conn,
1216            node2_id,
1217            8,
1218            Strand::Forward,
1219            node3_id,
1220            1,
1221            Strand::Forward,
1222        );
1223        let sequence4 = Sequence::new()
1224            .sequence_type("DNA")
1225            .sequence("GGGGGGGG")
1226            .save(conn);
1227        let node4_id = Node::create(conn, &sequence4.hash, &HashId::convert_str("4"));
1228        let edge4 = Edge::create(
1229            conn,
1230            node3_id,
1231            8,
1232            Strand::Forward,
1233            node4_id,
1234            1,
1235            Strand::Forward,
1236        );
1237        let edge5 = Edge::create(
1238            conn,
1239            node4_id,
1240            8,
1241            Strand::Forward,
1242            PATH_END_NODE_ID,
1243            -1,
1244            Strand::Forward,
1245        );
1246
1247        let edge_ids = vec![edge1.id, edge2.id, edge3.id, edge4.id, edge5.id];
1248        let block_group_edges = edge_ids
1249            .iter()
1250            .map(|edge_id| BlockGroupEdgeData {
1251                block_group_id: block_group.id,
1252                edge_id: *edge_id,
1253                chromosome_index: 0,
1254                phased: 0,
1255            })
1256            .collect::<Vec<BlockGroupEdgeData>>();
1257        BlockGroupEdge::bulk_create(conn, &block_group_edges);
1258
1259        let path = Path::create(conn, "chr1", &block_group.id, &edge_ids);
1260        let tree = path.intervaltree(conn);
1261        let blocks1: Vec<NodeIntervalBlock> = tree.query_point(2).map(|x| x.value).collect();
1262        assert_eq!(blocks1.len(), 1);
1263        let block1 = &blocks1[0];
1264        assert_eq!(block1.node_id, node1_id);
1265        assert_eq!(block1.sequence_start, 0);
1266        assert_eq!(block1.sequence_end, 8);
1267        assert_eq!(block1.start, 0);
1268        assert_eq!(block1.end, 8);
1269        assert_eq!(block1.strand, Strand::Forward);
1270
1271        let blocks2: Vec<NodeIntervalBlock> = tree.query_point(12).map(|x| x.value).collect();
1272        assert_eq!(blocks2.len(), 1);
1273        let block2 = &blocks2[0];
1274        assert_eq!(block2.node_id, node2_id);
1275        assert_eq!(block2.sequence_start, 1);
1276        assert_eq!(block2.sequence_end, 8);
1277        assert_eq!(block2.start, 8);
1278        assert_eq!(block2.end, 15);
1279        assert_eq!(block2.strand, Strand::Forward);
1280
1281        let blocks4: Vec<NodeIntervalBlock> = tree.query_point(25).map(|x| x.value).collect();
1282        assert_eq!(blocks4.len(), 1);
1283        let block4 = &blocks4[0];
1284        assert_eq!(block4.node_id, node4_id);
1285        assert_eq!(block4.sequence_start, 1);
1286        assert_eq!(block4.sequence_end, 8);
1287        assert_eq!(block4.start, 22);
1288        assert_eq!(block4.end, 29);
1289        assert_eq!(block4.strand, Strand::Forward);
1290    }
1291
1292    #[test]
1293    fn test_gets_sequence_with_edges_into_node_middles() {
1294        // Tests that if the edge from the virtual start node goes into the middle of the first
1295        // node, and the edge to the virtual end node comes from the middle of the last node, the
1296        // sequence is correctly generated
1297        let conn = &get_connection(None).unwrap();
1298        Collection::create(conn, "test collection");
1299        let block_group = create_test_block_group(conn);
1300        let sequence1 = Sequence::new()
1301            .sequence_type("DNA")
1302            .sequence("ATCGATCG")
1303            .save(conn);
1304        let node1_id = Node::create(conn, &sequence1.hash, &HashId::convert_str("1"));
1305        let edge1 = Edge::create(
1306            conn,
1307            PATH_START_NODE_ID,
1308            -1,
1309            Strand::Forward,
1310            node1_id,
1311            4,
1312            Strand::Forward,
1313        );
1314        let sequence2 = Sequence::new()
1315            .sequence_type("DNA")
1316            .sequence("AAAAAAAA")
1317            .save(conn);
1318        let node2_id = Node::create(conn, &sequence2.hash, &HashId::convert_str("2"));
1319        let edge2 = Edge::create(
1320            conn,
1321            node1_id,
1322            8,
1323            Strand::Forward,
1324            node2_id,
1325            0,
1326            Strand::Forward,
1327        );
1328        let sequence3 = Sequence::new()
1329            .sequence_type("DNA")
1330            .sequence("CCCCCCCC")
1331            .save(conn);
1332        let node3_id = Node::create(conn, &sequence3.hash, &HashId::convert_str("3"));
1333        let edge3 = Edge::create(
1334            conn,
1335            node2_id,
1336            8,
1337            Strand::Forward,
1338            node3_id,
1339            0,
1340            Strand::Forward,
1341        );
1342        let sequence4 = Sequence::new()
1343            .sequence_type("DNA")
1344            .sequence("GGGGGGGG")
1345            .save(conn);
1346        let node4_id = Node::create(conn, &sequence4.hash, &HashId::convert_str("4"));
1347        let edge4 = Edge::create(
1348            conn,
1349            node3_id,
1350            8,
1351            Strand::Forward,
1352            node4_id,
1353            0,
1354            Strand::Forward,
1355        );
1356        let edge5 = Edge::create(
1357            conn,
1358            node4_id,
1359            4,
1360            Strand::Forward,
1361            PATH_END_NODE_ID,
1362            -1,
1363            Strand::Forward,
1364        );
1365
1366        let edge_ids = vec![edge1.id, edge2.id, edge3.id, edge4.id, edge5.id];
1367        let block_group_edges = edge_ids
1368            .iter()
1369            .map(|edge_id| BlockGroupEdgeData {
1370                block_group_id: block_group.id,
1371                edge_id: *edge_id,
1372                chromosome_index: 0,
1373                phased: 0,
1374            })
1375            .collect::<Vec<BlockGroupEdgeData>>();
1376        BlockGroupEdge::bulk_create(conn, &block_group_edges);
1377
1378        let path = Path::create(conn, "chr1", &block_group.id, &edge_ids);
1379        let tree = path.intervaltree(conn);
1380        let blocks1: Vec<NodeIntervalBlock> = tree.query_point(2).map(|x| x.value).collect();
1381        assert_eq!(blocks1.len(), 1);
1382        let block1 = &blocks1[0];
1383        assert_eq!(block1.node_id, node1_id);
1384        assert_eq!(block1.sequence_start, 4);
1385        assert_eq!(block1.sequence_end, 8);
1386        assert_eq!(block1.start, 0);
1387        assert_eq!(block1.end, 4);
1388        assert_eq!(block1.strand, Strand::Forward);
1389
1390        let blocks2: Vec<NodeIntervalBlock> = tree.query_point(10).map(|x| x.value).collect();
1391        assert_eq!(blocks2.len(), 1);
1392        let block2 = &blocks2[0];
1393        assert_eq!(block2.node_id, node2_id);
1394        assert_eq!(block2.sequence_start, 0);
1395        assert_eq!(block2.sequence_end, 8);
1396        assert_eq!(block2.start, 4);
1397        assert_eq!(block2.end, 12);
1398        assert_eq!(block2.strand, Strand::Forward);
1399
1400        let blocks4: Vec<NodeIntervalBlock> = tree.query_point(22).map(|x| x.value).collect();
1401        assert_eq!(blocks4.len(), 1);
1402        let block4 = &blocks4[0];
1403        assert_eq!(block4.node_id, node4_id);
1404        assert_eq!(block4.sequence_start, 0);
1405        assert_eq!(block4.sequence_end, 4);
1406        assert_eq!(block4.start, 20);
1407        assert_eq!(block4.end, 24);
1408        assert_eq!(block4.strand, Strand::Forward);
1409
1410        assert_eq!(path.sequence(conn), "ATCGAAAAAAAACCCCCCCCGGGG");
1411    }
1412
1413    #[test]
1414    fn test_full_block_mapping() {
1415        /*
1416            |--------| path: 1 sequence, (0, 8)
1417            |ATCGATCG|
1418            |--------| Same path: 1 sequence, (0, 8)
1419
1420            Mapping: (0, 8) -> (0, 8)
1421        */
1422        let conn = &get_connection(None).unwrap();
1423        Collection::create(conn, "test collection");
1424        let block_group = create_test_block_group(conn);
1425        let sequence1 = Sequence::new()
1426            .sequence_type("DNA")
1427            .sequence("ATCGATCG")
1428            .save(conn);
1429        let node1_id = Node::create(conn, &sequence1.hash, &HashId::convert_str("1"));
1430        let edge1 = Edge::create(
1431            conn,
1432            PATH_START_NODE_ID,
1433            -1,
1434            Strand::Forward,
1435            node1_id,
1436            0,
1437            Strand::Forward,
1438        );
1439        let edge2 = Edge::create(
1440            conn,
1441            node1_id,
1442            8,
1443            Strand::Forward,
1444            PATH_END_NODE_ID,
1445            -1,
1446            Strand::Forward,
1447        );
1448
1449        let edge_ids = vec![edge1.id, edge2.id];
1450        let block_group_edges = edge_ids
1451            .iter()
1452            .map(|edge_id| BlockGroupEdgeData {
1453                block_group_id: block_group.id,
1454                edge_id: *edge_id,
1455                chromosome_index: 0,
1456                phased: 0,
1457            })
1458            .collect::<Vec<BlockGroupEdgeData>>();
1459        BlockGroupEdge::bulk_create(conn, &block_group_edges);
1460
1461        let path = Path::create(conn, "chr1", &block_group.id, &edge_ids);
1462
1463        let mappings = path.find_block_mappings(conn, &path);
1464        assert_eq!(mappings.len(), 1);
1465        let mapping = &mappings[0];
1466        assert_eq!(mapping.source_range, mapping.target_range);
1467        assert_eq!(mapping.source_range.start, 0);
1468        assert_eq!(mapping.source_range.end, 8);
1469        assert_eq!(mapping.target_range.start, 0);
1470        assert_eq!(mapping.target_range.end, 8);
1471    }
1472
1473    #[test]
1474    fn test_no_block_mapping_overlap() {
1475        /*
1476            |--------| -> path 1 (one node)
1477            |ATCGATCG| -> sequence
1478
1479            |--------| -> path 2 (one node, totally different sequence)
1480            |TTTTTTTT| -> other sequence
1481
1482            Mappings: empty
1483        */
1484        let conn = &get_connection(None).unwrap();
1485        Collection::create(conn, "test collection");
1486        let block_group = create_test_block_group(conn);
1487        let sequence1 = Sequence::new()
1488            .sequence_type("DNA")
1489            .sequence("ATCGATCG")
1490            .save(conn);
1491        let node1_id = Node::create(conn, &sequence1.hash, &HashId::convert_str("1"));
1492        let edge1 = Edge::create(
1493            conn,
1494            PATH_START_NODE_ID,
1495            -1,
1496            Strand::Forward,
1497            node1_id,
1498            0,
1499            Strand::Forward,
1500        );
1501        let edge2 = Edge::create(
1502            conn,
1503            node1_id,
1504            8,
1505            Strand::Forward,
1506            PATH_END_NODE_ID,
1507            -1,
1508            Strand::Forward,
1509        );
1510
1511        let edge_ids = vec![edge1.id, edge2.id];
1512        let block_group_edges = edge_ids
1513            .iter()
1514            .map(|edge_id| BlockGroupEdgeData {
1515                block_group_id: block_group.id,
1516                edge_id: *edge_id,
1517                chromosome_index: 0,
1518                phased: 0,
1519            })
1520            .collect::<Vec<BlockGroupEdgeData>>();
1521        BlockGroupEdge::bulk_create(conn, &block_group_edges);
1522        let path1 = Path::create(conn, "chr1", &block_group.id, &edge_ids);
1523
1524        let sequence2 = Sequence::new()
1525            .sequence_type("DNA")
1526            .sequence("TTTTTTTT")
1527            .save(conn);
1528        let node2_id = Node::create(conn, &sequence2.hash, &HashId::convert_str("2"));
1529        let edge3 = Edge::create(
1530            conn,
1531            PATH_START_NODE_ID,
1532            -1,
1533            Strand::Forward,
1534            node2_id,
1535            0,
1536            Strand::Forward,
1537        );
1538        let edge4 = Edge::create(
1539            conn,
1540            node2_id,
1541            8,
1542            Strand::Forward,
1543            PATH_END_NODE_ID,
1544            -1,
1545            Strand::Forward,
1546        );
1547
1548        let edge_ids = vec![edge3.id, edge4.id];
1549        let block_group_edges = edge_ids
1550            .iter()
1551            .map(|edge_id| BlockGroupEdgeData {
1552                block_group_id: block_group.id,
1553                edge_id: *edge_id,
1554                chromosome_index: 0,
1555                phased: 0,
1556            })
1557            .collect::<Vec<BlockGroupEdgeData>>();
1558        BlockGroupEdge::bulk_create(conn, &block_group_edges);
1559
1560        let path2 = Path::create(conn, "chr2", &block_group.id, &edge_ids);
1561
1562        let mappings = path1.find_block_mappings(conn, &path2);
1563        assert_eq!(mappings.len(), 0);
1564    }
1565
1566    #[test]
1567    fn test_partial_overlap_block_mapping() {
1568        /*
1569            path 1 (one node/sequence):
1570            |--------|
1571            |ATCGATCG| -> sequence (0, 8)
1572
1573            path 2:
1574            |----| -> (0, 4)
1575                |--------| -> (4, 12)
1576            |ATCG| -> shared with path 1
1577                |TTTTTTTT| -> unrelated sequence
1578
1579            Mapping: (0, 4) -> (0, 4)
1580        */
1581        let conn = &get_connection(None).unwrap();
1582        Collection::create(conn, "test collection");
1583        let block_group = create_test_block_group(conn);
1584        let sequence1 = Sequence::new()
1585            .sequence_type("DNA")
1586            .sequence("ATCGATCG")
1587            .save(conn);
1588        let node1_id = Node::create(conn, &sequence1.hash, &HashId::convert_str("1"));
1589        let edge1 = Edge::create(
1590            conn,
1591            PATH_START_NODE_ID,
1592            -1,
1593            Strand::Forward,
1594            node1_id,
1595            0,
1596            Strand::Forward,
1597        );
1598        let edge2 = Edge::create(
1599            conn,
1600            node1_id,
1601            8,
1602            Strand::Forward,
1603            PATH_END_NODE_ID,
1604            -1,
1605            Strand::Forward,
1606        );
1607
1608        let edge_ids = vec![edge1.id, edge2.id];
1609        let block_group_edges = edge_ids
1610            .iter()
1611            .map(|edge_id| BlockGroupEdgeData {
1612                block_group_id: block_group.id,
1613                edge_id: *edge_id,
1614                chromosome_index: 0,
1615                phased: 0,
1616            })
1617            .collect::<Vec<BlockGroupEdgeData>>();
1618        BlockGroupEdge::bulk_create(conn, &block_group_edges);
1619
1620        let path1 = Path::create(conn, "chr1", &block_group.id, &edge_ids);
1621
1622        let sequence2 = Sequence::new()
1623            .sequence_type("DNA")
1624            .sequence("TTTTTTTT")
1625            .save(conn);
1626        let node2_id = Node::create(conn, &sequence2.hash, &HashId::convert_str("2"));
1627        let edge3 = Edge::create(
1628            conn,
1629            PATH_START_NODE_ID,
1630            -1,
1631            Strand::Forward,
1632            node1_id,
1633            0,
1634            Strand::Forward,
1635        );
1636        let edge4 = Edge::create(
1637            conn,
1638            node1_id,
1639            4,
1640            Strand::Forward,
1641            node2_id,
1642            0,
1643            Strand::Forward,
1644        );
1645        let edge5 = Edge::create(
1646            conn,
1647            node2_id,
1648            8,
1649            Strand::Forward,
1650            PATH_END_NODE_ID,
1651            -1,
1652            Strand::Forward,
1653        );
1654
1655        let edge_ids = vec![edge3.id, edge4.id, edge5.id];
1656        let block_group_edges = edge_ids
1657            .iter()
1658            .map(|edge_id| BlockGroupEdgeData {
1659                block_group_id: block_group.id,
1660                edge_id: *edge_id,
1661                chromosome_index: 0,
1662                phased: 0,
1663            })
1664            .collect::<Vec<BlockGroupEdgeData>>();
1665        BlockGroupEdge::bulk_create(conn, &block_group_edges);
1666
1667        let path2 = Path::create(conn, "chr2", &block_group.id, &edge_ids);
1668
1669        assert_eq!(path2.sequence(conn), "ATCGTTTTTTTT");
1670
1671        let mappings = path1.find_block_mappings(conn, &path2);
1672        assert_eq!(mappings.len(), 1);
1673        let mapping = &mappings[0];
1674        assert_eq!(mapping.source_range, mapping.target_range);
1675        assert_eq!(mapping.source_range.start, 0);
1676        assert_eq!(mapping.source_range.end, 4);
1677        assert_eq!(mapping.target_range.start, 0);
1678        assert_eq!(mapping.target_range.end, 4);
1679    }
1680
1681    #[test]
1682    fn test_insertion_block_mapping() {
1683        /*
1684            path 1 (one node/sequence):
1685            |ATCGATCG| -> sequence (0, 8)
1686
1687            path 2:	Mimics a pure insertion
1688            |ATCG| -> (0, 4) shared with first half of path 1
1689                |TTTTTTTT| -> (4, 12) unrelated sequence
1690                        |ATCG| -> (12, 16) shared with second half of path 1
1691
1692            Mappings:
1693            (0, 4) -> (0, 4)
1694            (4, 8) -> (12, 16)
1695        */
1696        let conn = &get_connection(None).unwrap();
1697        Collection::create(conn, "test collection");
1698        let block_group = create_test_block_group(conn);
1699        let sequence1 = Sequence::new()
1700            .sequence_type("DNA")
1701            .sequence("ATCGATCG")
1702            .save(conn);
1703        let node1_id = Node::create(conn, &sequence1.hash, &HashId::convert_str("1"));
1704        let edge1 = Edge::create(
1705            conn,
1706            PATH_START_NODE_ID,
1707            -1,
1708            Strand::Forward,
1709            node1_id,
1710            0,
1711            Strand::Forward,
1712        );
1713        let edge2 = Edge::create(
1714            conn,
1715            node1_id,
1716            8,
1717            Strand::Forward,
1718            PATH_END_NODE_ID,
1719            -1,
1720            Strand::Forward,
1721        );
1722
1723        let edge_ids = vec![edge1.id, edge2.id];
1724        let block_group_edges = edge_ids
1725            .iter()
1726            .map(|edge_id| BlockGroupEdgeData {
1727                block_group_id: block_group.id,
1728                edge_id: *edge_id,
1729                chromosome_index: 0,
1730                phased: 0,
1731            })
1732            .collect::<Vec<BlockGroupEdgeData>>();
1733        BlockGroupEdge::bulk_create(conn, &block_group_edges);
1734
1735        let path1 = Path::create(conn, "chr1", &block_group.id, &edge_ids);
1736
1737        let sequence2 = Sequence::new()
1738            .sequence_type("DNA")
1739            .sequence("TTTTTTTT")
1740            .save(conn);
1741        let node2_id = Node::create(conn, &sequence2.hash, &HashId::convert_str("2"));
1742        let edge4 = Edge::create(
1743            conn,
1744            node1_id,
1745            4,
1746            Strand::Forward,
1747            node2_id,
1748            0,
1749            Strand::Forward,
1750        );
1751        let edge5 = Edge::create(
1752            conn,
1753            node2_id,
1754            8,
1755            Strand::Forward,
1756            node1_id,
1757            4,
1758            Strand::Forward,
1759        );
1760
1761        let edge_ids = [edge4.id, edge5.id];
1762        let block_group_edges = edge_ids
1763            .iter()
1764            .map(|edge_id| BlockGroupEdgeData {
1765                block_group_id: block_group.id,
1766                edge_id: *edge_id,
1767                chromosome_index: 0,
1768                phased: 0,
1769            })
1770            .collect::<Vec<BlockGroupEdgeData>>();
1771        BlockGroupEdge::bulk_create(conn, &block_group_edges);
1772
1773        let path2 = Path::create(
1774            conn,
1775            "chr2",
1776            &block_group.id,
1777            &[edge1.id, edge4.id, edge5.id, edge2.id],
1778        );
1779
1780        assert_eq!(path2.sequence(conn), "ATCGTTTTTTTTATCG");
1781
1782        let mappings = path1.find_block_mappings(conn, &path2);
1783        assert_eq!(mappings.len(), 2);
1784        let mapping1 = &mappings[0];
1785        assert_eq!(mapping1.source_range, mapping1.target_range);
1786        assert_eq!(mapping1.source_range.start, 0);
1787        assert_eq!(mapping1.source_range.end, 4);
1788        assert_eq!(mapping1.target_range.start, 0);
1789        assert_eq!(mapping1.target_range.end, 4);
1790
1791        let mapping2 = &mappings[1];
1792        assert_eq!(mapping2.source_range.start, 4);
1793        assert_eq!(mapping2.source_range.end, 8);
1794        assert_eq!(mapping2.target_range.start, 12);
1795        assert_eq!(mapping2.target_range.end, 16);
1796    }
1797
1798    #[test]
1799    fn test_replacement_block_mapping() {
1800        /*
1801            path 1 (one node/sequence):
1802            |ATCGATCG| -> sequence (0, 8)
1803
1804            path 2:	Mimics a replacement
1805            |AT| -> (0, 2) shared with first two bp of path 1
1806              |TTTTTTTT| -> (2, 10) unrelated sequence
1807                      |CG| -> (10, 12) shared with last 2 bp of path 1
1808
1809            Mappings:
1810            (0, 2) -> (0, 2)
1811            (6, 8) -> (10, 12)
1812        */
1813        let conn = &get_connection(None).unwrap();
1814        Collection::create(conn, "test collection");
1815        let block_group = create_test_block_group(conn);
1816        let sequence1 = Sequence::new()
1817            .sequence_type("DNA")
1818            .sequence("ATCGATCG")
1819            .save(conn);
1820        let node1_id = Node::create(conn, &sequence1.hash, &HashId::convert_str("1"));
1821        let edge1 = Edge::create(
1822            conn,
1823            PATH_START_NODE_ID,
1824            -1,
1825            Strand::Forward,
1826            node1_id,
1827            0,
1828            Strand::Forward,
1829        );
1830        let edge2 = Edge::create(
1831            conn,
1832            node1_id,
1833            8,
1834            Strand::Forward,
1835            PATH_END_NODE_ID,
1836            -1,
1837            Strand::Forward,
1838        );
1839
1840        let edge_ids = [edge1.id, edge2.id];
1841        let block_group_edges = edge_ids
1842            .iter()
1843            .map(|edge_id| BlockGroupEdgeData {
1844                block_group_id: block_group.id,
1845                edge_id: *edge_id,
1846                chromosome_index: 0,
1847                phased: 0,
1848            })
1849            .collect::<Vec<BlockGroupEdgeData>>();
1850        BlockGroupEdge::bulk_create(conn, &block_group_edges);
1851
1852        let path1 = Path::create(conn, "chr1", &block_group.id, &edge_ids);
1853
1854        let sequence2 = Sequence::new()
1855            .sequence_type("DNA")
1856            .sequence("TTTTTTTT")
1857            .save(conn);
1858        let node2_id = Node::create(conn, &sequence2.hash, &HashId::convert_str("2"));
1859        let edge4 = Edge::create(
1860            conn,
1861            node1_id,
1862            2,
1863            Strand::Forward,
1864            node2_id,
1865            0,
1866            Strand::Forward,
1867        );
1868        let edge5 = Edge::create(
1869            conn,
1870            node2_id,
1871            8,
1872            Strand::Forward,
1873            node1_id,
1874            6,
1875            Strand::Forward,
1876        );
1877
1878        let edge_ids = [edge4.id, edge5.id];
1879        let block_group_edges = edge_ids
1880            .iter()
1881            .map(|edge_id| BlockGroupEdgeData {
1882                block_group_id: block_group.id,
1883                edge_id: *edge_id,
1884                chromosome_index: 0,
1885                phased: 0,
1886            })
1887            .collect::<Vec<BlockGroupEdgeData>>();
1888        BlockGroupEdge::bulk_create(conn, &block_group_edges);
1889
1890        let path2 = Path::create(
1891            conn,
1892            "chr2",
1893            &block_group.id,
1894            &[edge1.id, edge4.id, edge5.id, edge2.id],
1895        );
1896
1897        assert_eq!(path2.sequence(conn), "ATTTTTTTTTCG");
1898
1899        let mappings = path1.find_block_mappings(conn, &path2);
1900        assert_eq!(mappings.len(), 2);
1901        let mapping1 = &mappings[0];
1902        assert_eq!(mapping1.source_range, mapping1.target_range);
1903        assert_eq!(mapping1.source_range.start, 0);
1904        assert_eq!(mapping1.source_range.end, 2);
1905        assert_eq!(mapping1.target_range.start, 0);
1906        assert_eq!(mapping1.target_range.end, 2);
1907
1908        let mapping2 = &mappings[1];
1909        assert_eq!(mapping2.source_range.start, 6);
1910        assert_eq!(mapping2.source_range.end, 8);
1911        assert_eq!(mapping2.target_range.start, 10);
1912        assert_eq!(mapping2.target_range.end, 12);
1913    }
1914
1915    #[test]
1916    fn test_deletion_block_mapping() {
1917        /*
1918            path 1 (one node/sequence):
1919            |ATCGATCG| -> sequence (0, 8)
1920
1921            path 2: Mimics a pure deletion
1922            |AT| -> (0, 2) shared with first two bp of path 1
1923              |CG| -> (2, 4) shared with last 2 bp of path 1
1924
1925            Mappings:
1926            (0, 2) -> (0, 2)
1927            (6, 8) -> (2, 4)
1928        */
1929        let conn = &get_connection(None).unwrap();
1930        Collection::create(conn, "test collection");
1931        let block_group = create_test_block_group(conn);
1932        let sequence1 = Sequence::new()
1933            .sequence_type("DNA")
1934            .sequence("ATCGATCG")
1935            .save(conn);
1936        let node1_id = Node::create(conn, &sequence1.hash, &HashId::convert_str("1"));
1937        let edge1 = Edge::create(
1938            conn,
1939            PATH_START_NODE_ID,
1940            -1,
1941            Strand::Forward,
1942            node1_id,
1943            0,
1944            Strand::Forward,
1945        );
1946        let edge2 = Edge::create(
1947            conn,
1948            node1_id,
1949            8,
1950            Strand::Forward,
1951            PATH_END_NODE_ID,
1952            -1,
1953            Strand::Forward,
1954        );
1955
1956        let edge_ids = [edge1.id, edge2.id];
1957        let block_group_edges = edge_ids
1958            .iter()
1959            .map(|edge_id| BlockGroupEdgeData {
1960                block_group_id: block_group.id,
1961                edge_id: *edge_id,
1962                chromosome_index: 0,
1963                phased: 0,
1964            })
1965            .collect::<Vec<BlockGroupEdgeData>>();
1966        BlockGroupEdge::bulk_create(conn, &block_group_edges);
1967
1968        let path1 = Path::create(conn, "chr1", &block_group.id, &edge_ids);
1969
1970        let edge4 = Edge::create(
1971            conn,
1972            node1_id,
1973            2,
1974            Strand::Forward,
1975            node1_id,
1976            6,
1977            Strand::Forward,
1978        );
1979
1980        let edge_ids = [edge4.id];
1981        let block_group_edges = edge_ids
1982            .iter()
1983            .map(|edge_id| BlockGroupEdgeData {
1984                block_group_id: block_group.id,
1985                edge_id: *edge_id,
1986                chromosome_index: 0,
1987                phased: 0,
1988            })
1989            .collect::<Vec<BlockGroupEdgeData>>();
1990        BlockGroupEdge::bulk_create(conn, &block_group_edges);
1991
1992        let path2 = Path::create(
1993            conn,
1994            "chr2",
1995            &block_group.id,
1996            &[edge1.id, edge4.id, edge2.id],
1997        );
1998
1999        assert_eq!(path2.sequence(conn), "ATCG");
2000
2001        let mappings = path1.find_block_mappings(conn, &path2);
2002        assert_eq!(mappings.len(), 2);
2003        let mapping1 = &mappings[0];
2004        assert_eq!(mapping1.source_range, mapping1.target_range);
2005        assert_eq!(mapping1.source_range.start, 0);
2006        assert_eq!(mapping1.source_range.end, 2);
2007        assert_eq!(mapping1.target_range.start, 0);
2008        assert_eq!(mapping1.target_range.end, 2);
2009
2010        let mapping2 = &mappings[1];
2011        assert_eq!(mapping2.source_range.start, 6);
2012        assert_eq!(mapping2.source_range.end, 8);
2013        assert_eq!(mapping2.target_range.start, 2);
2014        assert_eq!(mapping2.target_range.end, 4);
2015    }
2016
2017    #[test]
2018    fn test_two_block_insertion_mapping() {
2019        /*
2020            path 1 (two nodes/sequences):
2021            |ATCGATCG| -> sequence (0, 8)
2022                    |TTTTTTTT| -> sequence (8, 16)
2023
2024            path 2: Mimics a pure insertion in the middle of the two blocks
2025            |ATCGATCG| -> sequence (0, 8)
2026                    |AAAAAAAA| -> sequence (8, 16)
2027                            |TTTTTTTT| -> sequence (16, 24)
2028
2029            Mappings:
2030            (0, 8) -> (0, 8)
2031            (8, 16) -> (16, 24)
2032        */
2033        let conn = &get_connection(None).unwrap();
2034        Collection::create(conn, "test collection");
2035        let block_group = create_test_block_group(conn);
2036        let sequence1 = Sequence::new()
2037            .sequence_type("DNA")
2038            .sequence("ATCGATCG")
2039            .save(conn);
2040        let node1_id = Node::create(conn, &sequence1.hash, &HashId::convert_str("1"));
2041        let sequence2 = Sequence::new()
2042            .sequence_type("DNA")
2043            .sequence("TTTTTTTT")
2044            .save(conn);
2045        let node2_id = Node::create(conn, &sequence2.hash, &HashId::convert_str("2"));
2046        let edge1 = Edge::create(
2047            conn,
2048            PATH_START_NODE_ID,
2049            -1,
2050            Strand::Forward,
2051            node1_id,
2052            0,
2053            Strand::Forward,
2054        );
2055        let edge2 = Edge::create(
2056            conn,
2057            node1_id,
2058            8,
2059            Strand::Forward,
2060            node2_id,
2061            0,
2062            Strand::Forward,
2063        );
2064        let edge3 = Edge::create(
2065            conn,
2066            node2_id,
2067            8,
2068            Strand::Forward,
2069            PATH_END_NODE_ID,
2070            -1,
2071            Strand::Forward,
2072        );
2073
2074        let edge_ids = [edge1.id, edge2.id, edge3.id];
2075        let block_group_edges = edge_ids
2076            .iter()
2077            .map(|edge_id| BlockGroupEdgeData {
2078                block_group_id: block_group.id,
2079                edge_id: *edge_id,
2080                chromosome_index: 0,
2081                phased: 0,
2082            })
2083            .collect::<Vec<BlockGroupEdgeData>>();
2084        BlockGroupEdge::bulk_create(conn, &block_group_edges);
2085
2086        let path1 = Path::create(conn, "chr1", &block_group.id, &edge_ids);
2087
2088        let sequence3 = Sequence::new()
2089            .sequence_type("DNA")
2090            .sequence("AAAAAAAA")
2091            .save(conn);
2092        let node3_id = Node::create(conn, &sequence3.hash, &HashId::convert_str("3"));
2093        let edge4 = Edge::create(
2094            conn,
2095            node1_id,
2096            8,
2097            Strand::Forward,
2098            node3_id,
2099            0,
2100            Strand::Forward,
2101        );
2102        let edge5 = Edge::create(
2103            conn,
2104            node3_id,
2105            8,
2106            Strand::Forward,
2107            node2_id,
2108            0,
2109            Strand::Forward,
2110        );
2111
2112        let edge_ids = [edge4.id, edge5.id];
2113        let block_group_edges = edge_ids
2114            .iter()
2115            .map(|edge_id| BlockGroupEdgeData {
2116                block_group_id: block_group.id,
2117                edge_id: *edge_id,
2118                chromosome_index: 0,
2119                phased: 0,
2120            })
2121            .collect::<Vec<BlockGroupEdgeData>>();
2122        BlockGroupEdge::bulk_create(conn, &block_group_edges);
2123
2124        let path2 = Path::create(
2125            conn,
2126            "chr2",
2127            &block_group.id,
2128            &[edge1.id, edge4.id, edge5.id, edge3.id],
2129        );
2130
2131        assert_eq!(path2.sequence(conn), "ATCGATCGAAAAAAAATTTTTTTT");
2132
2133        let mappings = path1.find_block_mappings(conn, &path2);
2134        assert_eq!(mappings.len(), 2);
2135        let mapping1 = &mappings[0];
2136        assert_eq!(mapping1.source_range, mapping1.target_range);
2137        assert_eq!(mapping1.source_range.start, 0);
2138        assert_eq!(mapping1.source_range.end, 8);
2139        assert_eq!(mapping1.target_range.start, 0);
2140        assert_eq!(mapping1.target_range.end, 8);
2141
2142        let mapping2 = &mappings[1];
2143        assert_eq!(mapping2.source_range.start, 8);
2144        assert_eq!(mapping2.source_range.end, 16);
2145        assert_eq!(mapping2.target_range.start, 16);
2146        assert_eq!(mapping2.target_range.end, 24);
2147    }
2148
2149    #[test]
2150    fn test_two_block_replacement_mapping() {
2151        /*
2152            path 1 (two nodes/sequences):
2153            |ATCGATCG| -> sequence (0, 8)
2154                    |TTTTTTTT| -> sequence (8, 16)
2155
2156            path 2: Mimics a replacement across the two blocks
2157            |ATCG| -> sequence (0, 4)
2158                |AAAAAAAA| -> sequence (4, 12)
2159                        |TTTT| -> sequence (12, 16)
2160
2161            Mappings:
2162            (0, 4) -> (0, 4)
2163            (12, 16) -> (12, 16)
2164        */
2165        let conn = &get_connection(None).unwrap();
2166        Collection::create(conn, "test collection");
2167        let block_group = create_test_block_group(conn);
2168        let sequence1 = Sequence::new()
2169            .sequence_type("DNA")
2170            .sequence("ATCGATCG")
2171            .save(conn);
2172        let node1_id = Node::create(conn, &sequence1.hash, &HashId::convert_str("1"));
2173        let sequence2 = Sequence::new()
2174            .sequence_type("DNA")
2175            .sequence("TTTTTTTT")
2176            .save(conn);
2177        let node2_id = Node::create(conn, &sequence2.hash, &HashId::convert_str("2"));
2178        let edge1 = Edge::create(
2179            conn,
2180            PATH_START_NODE_ID,
2181            -1,
2182            Strand::Forward,
2183            node1_id,
2184            0,
2185            Strand::Forward,
2186        );
2187        let edge2 = Edge::create(
2188            conn,
2189            node1_id,
2190            8,
2191            Strand::Forward,
2192            node2_id,
2193            0,
2194            Strand::Forward,
2195        );
2196        let edge3 = Edge::create(
2197            conn,
2198            node2_id,
2199            8,
2200            Strand::Forward,
2201            PATH_END_NODE_ID,
2202            -1,
2203            Strand::Forward,
2204        );
2205
2206        let edge_ids = [edge1.id, edge2.id, edge3.id];
2207        let block_group_edges = edge_ids
2208            .iter()
2209            .map(|edge_id| BlockGroupEdgeData {
2210                block_group_id: block_group.id,
2211                edge_id: *edge_id,
2212                chromosome_index: 0,
2213                phased: 0,
2214            })
2215            .collect::<Vec<BlockGroupEdgeData>>();
2216        BlockGroupEdge::bulk_create(conn, &block_group_edges);
2217
2218        let path1 = Path::create(conn, "chr1", &block_group.id, &edge_ids);
2219
2220        let sequence3 = Sequence::new()
2221            .sequence_type("DNA")
2222            .sequence("AAAAAAAA")
2223            .save(conn);
2224        let node3_id = Node::create(conn, &sequence3.hash, &HashId::convert_str("3"));
2225        let edge4 = Edge::create(
2226            conn,
2227            node1_id,
2228            4,
2229            Strand::Forward,
2230            node3_id,
2231            0,
2232            Strand::Forward,
2233        );
2234        let edge5 = Edge::create(
2235            conn,
2236            node3_id,
2237            8,
2238            Strand::Forward,
2239            node2_id,
2240            4,
2241            Strand::Forward,
2242        );
2243
2244        let edge_ids = [edge4.id, edge5.id];
2245        let block_group_edges = edge_ids
2246            .iter()
2247            .map(|edge_id| BlockGroupEdgeData {
2248                block_group_id: block_group.id,
2249                edge_id: *edge_id,
2250                chromosome_index: 0,
2251                phased: 0,
2252            })
2253            .collect::<Vec<BlockGroupEdgeData>>();
2254        BlockGroupEdge::bulk_create(conn, &block_group_edges);
2255
2256        let path2 = Path::create(
2257            conn,
2258            "chr2",
2259            &block_group.id,
2260            &[edge1.id, edge4.id, edge5.id, edge3.id],
2261        );
2262
2263        assert_eq!(path2.sequence(conn), "ATCGAAAAAAAATTTT");
2264
2265        let mappings = path1.find_block_mappings(conn, &path2);
2266        assert_eq!(mappings.len(), 2);
2267        let mapping1 = &mappings[0];
2268        assert_eq!(mapping1.source_range, mapping1.target_range);
2269        assert_eq!(mapping1.source_range.start, 0);
2270        assert_eq!(mapping1.source_range.end, 4);
2271        assert_eq!(mapping1.target_range.start, 0);
2272        assert_eq!(mapping1.target_range.end, 4);
2273
2274        let mapping2 = &mappings[1];
2275        assert_eq!(mapping2.source_range.start, 12);
2276        assert_eq!(mapping2.source_range.end, 16);
2277        assert_eq!(mapping2.target_range.start, 12);
2278        assert_eq!(mapping2.target_range.end, 16);
2279    }
2280
2281    #[test]
2282    fn test_two_block_deletion_mapping() {
2283        /*
2284            path 1 (two nodes/sequences):
2285            |ATCGATCG| -> sequence (0, 8)
2286                    |TTTTTTTT| -> sequence (8, 16)
2287
2288            path 2: Mimics a deletion across the two blocks
2289            |ATCG| -> sequence (0, 4)
2290                |TTTT| -> sequence (4, 8)
2291
2292            Mappings:
2293            (0, 4) -> (0, 4)
2294            (12, 16) -> (4, 8)
2295        */
2296        let conn = &get_connection(None).unwrap();
2297        Collection::create(conn, "test collection");
2298        let block_group = create_test_block_group(conn);
2299        let sequence1 = Sequence::new()
2300            .sequence_type("DNA")
2301            .sequence("ATCGATCG")
2302            .save(conn);
2303        let node1_id = Node::create(conn, &sequence1.hash, &HashId::convert_str("1"));
2304        let sequence2 = Sequence::new()
2305            .sequence_type("DNA")
2306            .sequence("TTTTTTTT")
2307            .save(conn);
2308        let node2_id = Node::create(conn, &sequence2.hash, &HashId::convert_str("2"));
2309        let edge1 = Edge::create(
2310            conn,
2311            PATH_START_NODE_ID,
2312            -1,
2313            Strand::Forward,
2314            node1_id,
2315            0,
2316            Strand::Forward,
2317        );
2318        let edge2 = Edge::create(
2319            conn,
2320            node1_id,
2321            8,
2322            Strand::Forward,
2323            node2_id,
2324            0,
2325            Strand::Forward,
2326        );
2327        let edge3 = Edge::create(
2328            conn,
2329            node2_id,
2330            8,
2331            Strand::Forward,
2332            PATH_END_NODE_ID,
2333            -1,
2334            Strand::Forward,
2335        );
2336
2337        let edge_ids = [edge1.id, edge2.id, edge3.id];
2338        let block_group_edges = edge_ids
2339            .iter()
2340            .map(|edge_id| BlockGroupEdgeData {
2341                block_group_id: block_group.id,
2342                edge_id: *edge_id,
2343                chromosome_index: 0,
2344                phased: 0,
2345            })
2346            .collect::<Vec<BlockGroupEdgeData>>();
2347        BlockGroupEdge::bulk_create(conn, &block_group_edges);
2348
2349        let path1 = Path::create(conn, "chr1", &block_group.id, &edge_ids);
2350
2351        let edge4 = Edge::create(
2352            conn,
2353            node1_id,
2354            4,
2355            Strand::Forward,
2356            node2_id,
2357            4,
2358            Strand::Forward,
2359        );
2360
2361        let edge_ids = [edge4.id];
2362        let block_group_edges = edge_ids
2363            .iter()
2364            .map(|edge_id| BlockGroupEdgeData {
2365                block_group_id: block_group.id,
2366                edge_id: *edge_id,
2367                chromosome_index: 0,
2368                phased: 0,
2369            })
2370            .collect::<Vec<BlockGroupEdgeData>>();
2371        BlockGroupEdge::bulk_create(conn, &block_group_edges);
2372
2373        let path2 = Path::create(
2374            conn,
2375            "chr2",
2376            &block_group.id,
2377            &[edge1.id, edge4.id, edge3.id],
2378        );
2379
2380        assert_eq!(path2.sequence(conn), "ATCGTTTT");
2381
2382        let mappings = path1.find_block_mappings(conn, &path2);
2383        assert_eq!(mappings.len(), 2);
2384        let mapping1 = &mappings[0];
2385        assert_eq!(mapping1.source_range, mapping1.target_range);
2386        assert_eq!(mapping1.source_range.start, 0);
2387        assert_eq!(mapping1.source_range.end, 4);
2388        assert_eq!(mapping1.target_range.start, 0);
2389        assert_eq!(mapping1.target_range.end, 4);
2390
2391        let mapping2 = &mappings[1];
2392        assert_eq!(mapping2.source_range.start, 12);
2393        assert_eq!(mapping2.source_range.end, 16);
2394        assert_eq!(mapping2.target_range.start, 4);
2395        assert_eq!(mapping2.target_range.end, 8);
2396    }
2397
2398    #[test]
2399    fn test_annotation_propagation_full_overlap() {
2400        /*
2401            |--------| path: 1 sequence, (0, 8)
2402            |ATCGATCG|
2403            |--------| Same path: 1 sequence, (0, 8)
2404
2405            Mapping: (0, 8) -> (0, 8)
2406        */
2407        let conn = &get_connection(None).unwrap();
2408        Collection::create(conn, "test collection");
2409        let block_group = create_test_block_group(conn);
2410        let sequence1 = Sequence::new()
2411            .sequence_type("DNA")
2412            .sequence("ATCGATCG")
2413            .save(conn);
2414        let node1_id = Node::create(conn, &sequence1.hash, &HashId::convert_str("1"));
2415        let edge1 = Edge::create(
2416            conn,
2417            PATH_START_NODE_ID,
2418            -1,
2419            Strand::Forward,
2420            node1_id,
2421            0,
2422            Strand::Forward,
2423        );
2424        let edge2 = Edge::create(
2425            conn,
2426            node1_id,
2427            8,
2428            Strand::Forward,
2429            PATH_END_NODE_ID,
2430            -1,
2431            Strand::Forward,
2432        );
2433
2434        let edge_ids = [edge1.id, edge2.id];
2435        let block_group_edges = edge_ids
2436            .iter()
2437            .map(|edge_id| BlockGroupEdgeData {
2438                block_group_id: block_group.id,
2439                edge_id: *edge_id,
2440                chromosome_index: 0,
2441                phased: 0,
2442            })
2443            .collect::<Vec<BlockGroupEdgeData>>();
2444        BlockGroupEdge::bulk_create(conn, &block_group_edges);
2445
2446        let path = Path::create(conn, "chr1", &block_group.id, &edge_ids);
2447
2448        let annotation = Annotation {
2449            name: "foo".to_string(),
2450            start: 0,
2451            end: 8,
2452        };
2453        let annotations = path.propagate_annotations(conn, &path, vec![annotation]);
2454        assert_eq!(annotations.len(), 1);
2455        let result_annotation = &annotations[0];
2456        assert_eq!(result_annotation.name, "foo");
2457        assert_eq!(result_annotation.start, 0);
2458        assert_eq!(result_annotation.end, 8);
2459    }
2460
2461    #[test]
2462    fn test_propagate_annotations_no_overlap() {
2463        /*
2464            |--------| -> path 1 (one node)
2465            |ATCGATCG| -> sequence
2466
2467            |--------| -> path 2 (one node, totally different sequence)
2468            |TTTTTTTT| -> other sequence
2469
2470            Mappings: empty
2471        */
2472        let conn = &get_connection(None).unwrap();
2473        Collection::create(conn, "test collection");
2474        let block_group = create_test_block_group(conn);
2475        let sequence1 = Sequence::new()
2476            .sequence_type("DNA")
2477            .sequence("ATCGATCG")
2478            .save(conn);
2479        let node1_id = Node::create(conn, &sequence1.hash, &HashId::convert_str("1"));
2480        let edge1 = Edge::create(
2481            conn,
2482            PATH_START_NODE_ID,
2483            -1,
2484            Strand::Forward,
2485            node1_id,
2486            0,
2487            Strand::Forward,
2488        );
2489        let edge2 = Edge::create(
2490            conn,
2491            node1_id,
2492            8,
2493            Strand::Forward,
2494            PATH_END_NODE_ID,
2495            -1,
2496            Strand::Forward,
2497        );
2498
2499        let edge_ids = [edge1.id, edge2.id];
2500        let block_group_edges = edge_ids
2501            .iter()
2502            .map(|edge_id| BlockGroupEdgeData {
2503                block_group_id: block_group.id,
2504                edge_id: *edge_id,
2505                chromosome_index: 0,
2506                phased: 0,
2507            })
2508            .collect::<Vec<BlockGroupEdgeData>>();
2509        BlockGroupEdge::bulk_create(conn, &block_group_edges);
2510
2511        let path1 = Path::create(conn, "chr1", &block_group.id, &edge_ids);
2512
2513        let sequence2 = Sequence::new()
2514            .sequence_type("DNA")
2515            .sequence("TTTTTTTT")
2516            .save(conn);
2517        let node2_id = Node::create(conn, &sequence2.hash, &HashId::convert_str("2"));
2518        let edge3 = Edge::create(
2519            conn,
2520            PATH_START_NODE_ID,
2521            -1,
2522            Strand::Forward,
2523            node2_id,
2524            0,
2525            Strand::Forward,
2526        );
2527        let edge4 = Edge::create(
2528            conn,
2529            node2_id,
2530            8,
2531            Strand::Forward,
2532            PATH_END_NODE_ID,
2533            -1,
2534            Strand::Forward,
2535        );
2536
2537        let edge_ids = [edge3.id, edge4.id];
2538        let block_group_edges = edge_ids
2539            .iter()
2540            .map(|edge_id| BlockGroupEdgeData {
2541                block_group_id: block_group.id,
2542                edge_id: *edge_id,
2543                chromosome_index: 0,
2544                phased: 0,
2545            })
2546            .collect::<Vec<BlockGroupEdgeData>>();
2547        BlockGroupEdge::bulk_create(conn, &block_group_edges);
2548
2549        let path2 = Path::create(conn, "chr2", &block_group.id, &edge_ids);
2550
2551        let annotation = Annotation {
2552            name: "foo".to_string(),
2553            start: 0,
2554            end: 8,
2555        };
2556        let annotations = path1.propagate_annotations(conn, &path2, vec![annotation]);
2557        assert_eq!(annotations.len(), 0);
2558    }
2559
2560    #[test]
2561    fn test_propagate_annotations_partial_overlap() {
2562        /*
2563            path 1 (one node/sequence):
2564            |--------|
2565            |ATCGATCG| -> sequence (0, 8)
2566
2567            path 2:
2568            |----| -> (0, 4)
2569                |--------| -> (4, 12)
2570            |ATCG| -> shared with path 1
2571                |TTTTTTTT| -> unrelated sequence
2572
2573            Mapping: (0, 4) -> (0, 4)
2574        */
2575        let conn = &get_connection(None).unwrap();
2576        Collection::create(conn, "test collection");
2577        let block_group = create_test_block_group(conn);
2578        let sequence1 = Sequence::new()
2579            .sequence_type("DNA")
2580            .sequence("ATCGATCG")
2581            .save(conn);
2582        let node1_id = Node::create(conn, &sequence1.hash, &HashId::convert_str("1"));
2583        let edge1 = Edge::create(
2584            conn,
2585            PATH_START_NODE_ID,
2586            -1,
2587            Strand::Forward,
2588            node1_id,
2589            0,
2590            Strand::Forward,
2591        );
2592        let edge2 = Edge::create(
2593            conn,
2594            node1_id,
2595            8,
2596            Strand::Forward,
2597            PATH_END_NODE_ID,
2598            -1,
2599            Strand::Forward,
2600        );
2601
2602        let edge_ids = vec![edge1.id, edge2.id];
2603        let block_group_edges = edge_ids
2604            .iter()
2605            .map(|edge_id| BlockGroupEdgeData {
2606                block_group_id: block_group.id,
2607                edge_id: *edge_id,
2608                chromosome_index: 0,
2609                phased: 0,
2610            })
2611            .collect::<Vec<BlockGroupEdgeData>>();
2612        BlockGroupEdge::bulk_create(conn, &block_group_edges);
2613
2614        let path1 = Path::create(conn, "chr1", &block_group.id, &edge_ids);
2615
2616        let sequence2 = Sequence::new()
2617            .sequence_type("DNA")
2618            .sequence("TTTTTTTT")
2619            .save(conn);
2620        let node2_id = Node::create(conn, &sequence2.hash, &HashId::convert_str("2"));
2621        let edge3 = Edge::create(
2622            conn,
2623            PATH_START_NODE_ID,
2624            -1,
2625            Strand::Forward,
2626            node1_id,
2627            0,
2628            Strand::Forward,
2629        );
2630        let edge4 = Edge::create(
2631            conn,
2632            node1_id,
2633            4,
2634            Strand::Forward,
2635            node2_id,
2636            0,
2637            Strand::Forward,
2638        );
2639        let edge5 = Edge::create(
2640            conn,
2641            node2_id,
2642            8,
2643            Strand::Forward,
2644            PATH_END_NODE_ID,
2645            -1,
2646            Strand::Forward,
2647        );
2648
2649        let edge_ids = vec![edge3.id, edge4.id, edge5.id];
2650        let block_group_edges = edge_ids
2651            .iter()
2652            .map(|edge_id| BlockGroupEdgeData {
2653                block_group_id: block_group.id,
2654                edge_id: *edge_id,
2655                chromosome_index: 0,
2656                phased: 0,
2657            })
2658            .collect::<Vec<BlockGroupEdgeData>>();
2659        BlockGroupEdge::bulk_create(conn, &block_group_edges);
2660
2661        let path2 = Path::create(conn, "chr2", &block_group.id, &edge_ids);
2662
2663        assert_eq!(path2.sequence(conn), "ATCGTTTTTTTT");
2664
2665        let annotation = Annotation {
2666            name: "foo".to_string(),
2667            start: 0,
2668            end: 8,
2669        };
2670        let annotations = path1.propagate_annotations(conn, &path2, vec![annotation]);
2671        assert_eq!(annotations.len(), 1);
2672        let result_annotation = &annotations[0];
2673        assert_eq!(result_annotation.name, "foo");
2674        assert_eq!(result_annotation.start, 0);
2675        assert_eq!(result_annotation.end, 4);
2676    }
2677
2678    #[test]
2679    fn test_propagate_annotations_with_insertion() {
2680        /*
2681            path 1 (one node/sequence):
2682            |ATCGATCG| -> sequence (0, 8)
2683
2684            path 2:	Mimics a pure insertion
2685            |ATCG| -> (0, 4) shared with first half of path 1
2686                |TTTTTTTT| -> (4, 12) unrelated sequence
2687                        |ATCG| -> (12, 16) shared with second half of path 1
2688
2689            Mappings:
2690            (0, 4) -> (0, 4)
2691            (4, 8) -> (12, 16)
2692        */
2693        let conn = &get_connection(None).unwrap();
2694        Collection::create(conn, "test collection");
2695        let block_group = create_test_block_group(conn);
2696        let sequence1 = Sequence::new()
2697            .sequence_type("DNA")
2698            .sequence("ATCGATCG")
2699            .save(conn);
2700        let node1_id = Node::create(conn, &sequence1.hash, &HashId::convert_str("1"));
2701        let edge1 = Edge::create(
2702            conn,
2703            PATH_START_NODE_ID,
2704            -1,
2705            Strand::Forward,
2706            node1_id,
2707            0,
2708            Strand::Forward,
2709        );
2710        let edge2 = Edge::create(
2711            conn,
2712            node1_id,
2713            8,
2714            Strand::Forward,
2715            PATH_END_NODE_ID,
2716            -1,
2717            Strand::Forward,
2718        );
2719
2720        let edge_ids = vec![edge1.id, edge2.id];
2721        let block_group_edges = edge_ids
2722            .iter()
2723            .map(|edge_id| BlockGroupEdgeData {
2724                block_group_id: block_group.id,
2725                edge_id: *edge_id,
2726                chromosome_index: 0,
2727                phased: 0,
2728            })
2729            .collect::<Vec<BlockGroupEdgeData>>();
2730        BlockGroupEdge::bulk_create(conn, &block_group_edges);
2731
2732        let path1 = Path::create(conn, "chr1", &block_group.id, &edge_ids);
2733
2734        let sequence2 = Sequence::new()
2735            .sequence_type("DNA")
2736            .sequence("TTTTTTTT")
2737            .save(conn);
2738        let node2_id = Node::create(conn, &sequence2.hash, &HashId::convert_str("2"));
2739        let edge4 = Edge::create(
2740            conn,
2741            node1_id,
2742            4,
2743            Strand::Forward,
2744            node2_id,
2745            0,
2746            Strand::Forward,
2747        );
2748        let edge5 = Edge::create(
2749            conn,
2750            node2_id,
2751            8,
2752            Strand::Forward,
2753            node1_id,
2754            4,
2755            Strand::Forward,
2756        );
2757
2758        let edge_ids = [edge4.id, edge5.id];
2759        let block_group_edges = edge_ids
2760            .iter()
2761            .map(|edge_id| BlockGroupEdgeData {
2762                block_group_id: block_group.id,
2763                edge_id: *edge_id,
2764                chromosome_index: 0,
2765                phased: 0,
2766            })
2767            .collect::<Vec<BlockGroupEdgeData>>();
2768        BlockGroupEdge::bulk_create(conn, &block_group_edges);
2769
2770        let path2 = Path::create(
2771            conn,
2772            "chr2",
2773            &block_group.id,
2774            &[edge1.id, edge4.id, edge5.id, edge2.id],
2775        );
2776
2777        assert_eq!(path2.sequence(conn), "ATCGTTTTTTTTATCG");
2778
2779        let annotation = Annotation {
2780            name: "foo".to_string(),
2781            start: 0,
2782            end: 8,
2783        };
2784
2785        let annotations = path1.propagate_annotations(conn, &path2, vec![annotation]);
2786        assert_eq!(annotations.len(), 1);
2787
2788        // Under the default propagation strategy, the annotation is expanded to cover anything in
2789        // between parts it covers
2790        let result_annotation = &annotations[0];
2791        assert_eq!(result_annotation.name, "foo");
2792        assert_eq!(result_annotation.start, 0);
2793        assert_eq!(result_annotation.end, 16);
2794    }
2795
2796    #[test]
2797    fn test_propagate_annotations_with_replacement() {
2798        /*
2799            path 1 (one node/sequence):
2800            |ATCGATCG| -> sequence (0, 8)
2801
2802            path 2:	Mimics a replacement
2803            |AT| -> (0, 2) shared with first two bp of path 1
2804              |TTTTTTTT| -> (2, 10) unrelated sequence
2805                      |CG| -> (10, 12) shared with last 2 bp of path 1
2806
2807            Mappings:
2808            (0, 2) -> (0, 2)
2809            (6, 8) -> (10, 12)
2810        */
2811        let conn = &get_connection(None).unwrap();
2812        Collection::create(conn, "test collection");
2813        let block_group = create_test_block_group(conn);
2814        let sequence1 = Sequence::new()
2815            .sequence_type("DNA")
2816            .sequence("ATCGATCG")
2817            .save(conn);
2818        let node1_id = Node::create(conn, &sequence1.hash, &HashId::convert_str("1"));
2819        let edge1 = Edge::create(
2820            conn,
2821            PATH_START_NODE_ID,
2822            -1,
2823            Strand::Forward,
2824            node1_id,
2825            0,
2826            Strand::Forward,
2827        );
2828        let edge2 = Edge::create(
2829            conn,
2830            node1_id,
2831            8,
2832            Strand::Forward,
2833            PATH_END_NODE_ID,
2834            -1,
2835            Strand::Forward,
2836        );
2837
2838        let edge_ids = [edge1.id, edge2.id];
2839        let block_group_edges = edge_ids
2840            .iter()
2841            .map(|edge_id| BlockGroupEdgeData {
2842                block_group_id: block_group.id,
2843                edge_id: *edge_id,
2844                chromosome_index: 0,
2845                phased: 0,
2846            })
2847            .collect::<Vec<BlockGroupEdgeData>>();
2848        BlockGroupEdge::bulk_create(conn, &block_group_edges);
2849
2850        let path1 = Path::create(conn, "chr1", &block_group.id, &edge_ids);
2851
2852        let sequence2 = Sequence::new()
2853            .sequence_type("DNA")
2854            .sequence("TTTTTTTT")
2855            .save(conn);
2856        let node2_id = Node::create(conn, &sequence2.hash, &HashId::convert_str("2"));
2857        let edge4 = Edge::create(
2858            conn,
2859            node1_id,
2860            2,
2861            Strand::Forward,
2862            node2_id,
2863            0,
2864            Strand::Forward,
2865        );
2866        let edge5 = Edge::create(
2867            conn,
2868            node2_id,
2869            8,
2870            Strand::Forward,
2871            node1_id,
2872            6,
2873            Strand::Forward,
2874        );
2875
2876        let edge_ids = [edge4.id, edge5.id];
2877        let block_group_edges = edge_ids
2878            .iter()
2879            .map(|edge_id| BlockGroupEdgeData {
2880                block_group_id: block_group.id,
2881                edge_id: *edge_id,
2882                chromosome_index: 0,
2883                phased: 0,
2884            })
2885            .collect::<Vec<BlockGroupEdgeData>>();
2886        BlockGroupEdge::bulk_create(conn, &block_group_edges);
2887
2888        let path2 = Path::create(
2889            conn,
2890            "chr2",
2891            &block_group.id,
2892            &[edge1.id, edge4.id, edge5.id, edge2.id],
2893        );
2894
2895        assert_eq!(path2.sequence(conn), "ATTTTTTTTTCG");
2896
2897        let annotation = Annotation {
2898            name: "foo".to_string(),
2899            start: 0,
2900            end: 4,
2901        };
2902
2903        let annotations = path1.propagate_annotations(conn, &path2, vec![annotation]);
2904        assert_eq!(annotations.len(), 1);
2905
2906        // Under the default propagation strategy, the annotation is truncated
2907        let result_annotation = &annotations[0];
2908        assert_eq!(result_annotation.name, "foo");
2909        assert_eq!(result_annotation.start, 0);
2910        assert_eq!(result_annotation.end, 2);
2911    }
2912
2913    #[test]
2914    fn test_propagate_annotations_with_insertion_across_two_blocks() {
2915        /*
2916            path 1 (two nodes/sequences):
2917            |ATCGATCG| -> sequence (0, 8)
2918                    |TTTTTTTT| -> sequence (8, 16)
2919
2920            path 2: Mimics a pure insertion in the middle of the two blocks
2921            |ATCGATCG| -> sequence (0, 8)
2922                    |AAAAAAAA| -> sequence (8, 16)
2923                            |TTTTTTTT| -> sequence (16, 24)
2924
2925            Mappings:
2926            (0, 8) -> (0, 8)
2927            (8, 16) -> (16, 24)
2928        */
2929        let conn = &get_connection(None).unwrap();
2930        Collection::create(conn, "test collection");
2931        let block_group = create_test_block_group(conn);
2932        let sequence1 = Sequence::new()
2933            .sequence_type("DNA")
2934            .sequence("ATCGATCG")
2935            .save(conn);
2936        let node1_id = Node::create(conn, &sequence1.hash, &HashId::convert_str("1"));
2937        let sequence2 = Sequence::new()
2938            .sequence_type("DNA")
2939            .sequence("TTTTTTTT")
2940            .save(conn);
2941        let node2_id = Node::create(conn, &sequence2.hash, &HashId::convert_str("2"));
2942        let edge1 = Edge::create(
2943            conn,
2944            PATH_START_NODE_ID,
2945            -1,
2946            Strand::Forward,
2947            node1_id,
2948            0,
2949            Strand::Forward,
2950        );
2951        let edge2 = Edge::create(
2952            conn,
2953            node1_id,
2954            8,
2955            Strand::Forward,
2956            node2_id,
2957            0,
2958            Strand::Forward,
2959        );
2960        let edge3 = Edge::create(
2961            conn,
2962            node2_id,
2963            8,
2964            Strand::Forward,
2965            PATH_END_NODE_ID,
2966            -1,
2967            Strand::Forward,
2968        );
2969
2970        let edge_ids = [edge1.id, edge2.id, edge3.id];
2971        let block_group_edges = edge_ids
2972            .iter()
2973            .map(|edge_id| BlockGroupEdgeData {
2974                block_group_id: block_group.id,
2975                edge_id: *edge_id,
2976                chromosome_index: 0,
2977                phased: 0,
2978            })
2979            .collect::<Vec<BlockGroupEdgeData>>();
2980        BlockGroupEdge::bulk_create(conn, &block_group_edges);
2981
2982        let path1 = Path::create(conn, "chr1", &block_group.id, &edge_ids);
2983
2984        let sequence3 = Sequence::new()
2985            .sequence_type("DNA")
2986            .sequence("AAAAAAAA")
2987            .save(conn);
2988        let node3_id = Node::create(conn, &sequence3.hash, &HashId::convert_str("3"));
2989        let edge4 = Edge::create(
2990            conn,
2991            node1_id,
2992            8,
2993            Strand::Forward,
2994            node3_id,
2995            0,
2996            Strand::Forward,
2997        );
2998        let edge5 = Edge::create(
2999            conn,
3000            node3_id,
3001            8,
3002            Strand::Forward,
3003            node2_id,
3004            0,
3005            Strand::Forward,
3006        );
3007
3008        let edge_ids = [edge4.id, edge5.id];
3009        let block_group_edges = edge_ids
3010            .iter()
3011            .map(|edge_id| BlockGroupEdgeData {
3012                block_group_id: block_group.id,
3013                edge_id: *edge_id,
3014                chromosome_index: 0,
3015                phased: 0,
3016            })
3017            .collect::<Vec<BlockGroupEdgeData>>();
3018        BlockGroupEdge::bulk_create(conn, &block_group_edges);
3019
3020        let path2 = Path::create(
3021            conn,
3022            "chr2",
3023            &block_group.id,
3024            &[edge1.id, edge4.id, edge5.id, edge3.id],
3025        );
3026
3027        assert_eq!(path2.sequence(conn), "ATCGATCGAAAAAAAATTTTTTTT");
3028
3029        let annotation = Annotation {
3030            name: "foo".to_string(),
3031            start: 0,
3032            end: 16,
3033        };
3034
3035        let annotations = path1.propagate_annotations(conn, &path2, vec![annotation]);
3036        assert_eq!(annotations.len(), 1);
3037
3038        // Under the default propagation strategy, the annotation is extended across the inserted
3039        // region
3040        let result_annotation = &annotations[0];
3041        assert_eq!(result_annotation.name, "foo");
3042        assert_eq!(result_annotation.start, 0);
3043        assert_eq!(result_annotation.end, 24);
3044    }
3045
3046    #[test]
3047    fn test_propagate_annotations_with_deletion_across_two_blocks() {
3048        /*
3049            path 1 (two nodes/sequences):
3050            |ATCGATCG| -> sequence (0, 8)
3051                    |TTTTTTTT| -> sequence (8, 16)
3052
3053            path 2: Mimics a deletion across the two blocks
3054            |ATCG| -> sequence (0, 4)
3055                |TTTT| -> sequence (4, 8)
3056
3057            Mappings:
3058            (0, 4) -> (0, 4)
3059            (12, 16) -> (4, 8)
3060        */
3061        let conn = &get_connection(None).unwrap();
3062        Collection::create(conn, "test collection");
3063        let block_group = create_test_block_group(conn);
3064        let sequence1 = Sequence::new()
3065            .sequence_type("DNA")
3066            .sequence("ATCGATCG")
3067            .save(conn);
3068        let node1_id = Node::create(conn, &sequence1.hash, &HashId::convert_str("1"));
3069        let sequence2 = Sequence::new()
3070            .sequence_type("DNA")
3071            .sequence("TTTTTTTT")
3072            .save(conn);
3073        let node2_id = Node::create(conn, &sequence2.hash, &HashId::convert_str("2"));
3074        let edge1 = Edge::create(
3075            conn,
3076            PATH_START_NODE_ID,
3077            -1,
3078            Strand::Forward,
3079            node1_id,
3080            0,
3081            Strand::Forward,
3082        );
3083        let edge2 = Edge::create(
3084            conn,
3085            node1_id,
3086            8,
3087            Strand::Forward,
3088            node2_id,
3089            0,
3090            Strand::Forward,
3091        );
3092        let edge3 = Edge::create(
3093            conn,
3094            node2_id,
3095            8,
3096            Strand::Forward,
3097            PATH_END_NODE_ID,
3098            -1,
3099            Strand::Forward,
3100        );
3101
3102        let edge_ids = [edge1.id, edge2.id, edge3.id];
3103        let block_group_edges = edge_ids
3104            .iter()
3105            .map(|edge_id| BlockGroupEdgeData {
3106                block_group_id: block_group.id,
3107                edge_id: *edge_id,
3108                chromosome_index: 0,
3109                phased: 0,
3110            })
3111            .collect::<Vec<BlockGroupEdgeData>>();
3112        BlockGroupEdge::bulk_create(conn, &block_group_edges);
3113
3114        let path1 = Path::create(conn, "chr1", &block_group.id, &edge_ids);
3115
3116        let edge4 = Edge::create(
3117            conn,
3118            node1_id,
3119            4,
3120            Strand::Forward,
3121            node2_id,
3122            4,
3123            Strand::Forward,
3124        );
3125
3126        let edge_ids = [edge4.id];
3127        let block_group_edges = edge_ids
3128            .iter()
3129            .map(|edge_id| BlockGroupEdgeData {
3130                block_group_id: block_group.id,
3131                edge_id: *edge_id,
3132                chromosome_index: 0,
3133                phased: 0,
3134            })
3135            .collect::<Vec<BlockGroupEdgeData>>();
3136        BlockGroupEdge::bulk_create(conn, &block_group_edges);
3137
3138        let path2 = Path::create(
3139            conn,
3140            "chr2",
3141            &block_group.id,
3142            &[edge1.id, edge4.id, edge3.id],
3143        );
3144
3145        assert_eq!(path2.sequence(conn), "ATCGTTTT");
3146
3147        let annotation = Annotation {
3148            name: "foo".to_string(),
3149            start: 0,
3150            end: 12,
3151        };
3152
3153        let annotations = path1.propagate_annotations(conn, &path2, vec![annotation]);
3154        assert_eq!(annotations.len(), 1);
3155
3156        // Under the default propagation strategy, the annotation is truncated
3157        let result_annotation = &annotations[0];
3158        assert_eq!(result_annotation.name, "foo");
3159        assert_eq!(result_annotation.start, 0);
3160        assert_eq!(result_annotation.end, 4);
3161    }
3162
3163    #[test]
3164    fn test_new_path_with() {
3165        let conn = &get_connection(None).unwrap();
3166        Collection::create(conn, "test collection");
3167        let block_group = create_test_block_group(conn);
3168        let sequence1 = Sequence::new()
3169            .sequence_type("DNA")
3170            .sequence("ATCGATCG")
3171            .save(conn);
3172        let node1_id = Node::create(conn, &sequence1.hash, &HashId::convert_str("1"));
3173        let edge1 = Edge::create(
3174            conn,
3175            PATH_START_NODE_ID,
3176            -123,
3177            Strand::Forward,
3178            node1_id,
3179            0,
3180            Strand::Forward,
3181        );
3182        let sequence2 = Sequence::new()
3183            .sequence_type("DNA")
3184            .sequence("AAAAAAAA")
3185            .save(conn);
3186        let node2_id = Node::create(conn, &sequence2.hash, &HashId::convert_str("2"));
3187        let edge2 = Edge::create(
3188            conn,
3189            node1_id,
3190            8,
3191            Strand::Forward,
3192            node2_id,
3193            0,
3194            Strand::Forward,
3195        );
3196        let edge3 = Edge::create(
3197            conn,
3198            node2_id,
3199            8,
3200            Strand::Forward,
3201            PATH_END_NODE_ID,
3202            -1,
3203            Strand::Forward,
3204        );
3205
3206        let edge_ids = [edge1.id, edge2.id, edge3.id];
3207        let block_group_edges = edge_ids
3208            .iter()
3209            .map(|edge_id| BlockGroupEdgeData {
3210                block_group_id: block_group.id,
3211                edge_id: *edge_id,
3212                chromosome_index: 0,
3213                phased: 0,
3214            })
3215            .collect::<Vec<BlockGroupEdgeData>>();
3216        BlockGroupEdge::bulk_create(conn, &block_group_edges);
3217
3218        let path1 = Path::create(conn, "chr1", &block_group.id, &edge_ids);
3219        assert_eq!(path1.sequence(conn), "ATCGATCGAAAAAAAA");
3220
3221        let sequence3 = Sequence::new()
3222            .sequence_type("DNA")
3223            .sequence("CCCCCCCC")
3224            .save(conn);
3225        let node3_id = Node::create(conn, &sequence3.hash, &HashId::convert_str("3"));
3226        let edge4 = Edge::create(
3227            conn,
3228            node1_id,
3229            4,
3230            Strand::Forward,
3231            node3_id,
3232            0,
3233            Strand::Forward,
3234        );
3235        let edge5 = Edge::create(
3236            conn,
3237            node3_id,
3238            8,
3239            Strand::Forward,
3240            node2_id,
3241            3,
3242            Strand::Forward,
3243        );
3244
3245        let edge_ids = [edge4.id, edge5.id];
3246        let block_group_edges = edge_ids
3247            .iter()
3248            .map(|edge_id| BlockGroupEdgeData {
3249                block_group_id: block_group.id,
3250                edge_id: *edge_id,
3251                chromosome_index: 0,
3252                phased: 0,
3253            })
3254            .collect::<Vec<BlockGroupEdgeData>>();
3255        BlockGroupEdge::bulk_create(conn, &block_group_edges);
3256
3257        let path2 = path1.new_path_with(conn, 4, 11, &edge4, &edge5);
3258        assert_eq!(path2.sequence(conn), "ATCGCCCCCCCCAAAAA");
3259
3260        let edge6 = Edge::create(
3261            conn,
3262            node1_id,
3263            4,
3264            Strand::Forward,
3265            node3_id,
3266            0,
3267            Strand::Forward,
3268        );
3269        let edge7 = Edge::create(
3270            conn,
3271            node3_id,
3272            8,
3273            Strand::Forward,
3274            node1_id,
3275            7,
3276            Strand::Forward,
3277        );
3278
3279        let edge_ids = [edge6.id, edge7.id];
3280        let block_group_edges = edge_ids
3281            .iter()
3282            .map(|edge_id| BlockGroupEdgeData {
3283                block_group_id: block_group.id,
3284                edge_id: *edge_id,
3285                chromosome_index: 0,
3286                phased: 0,
3287            })
3288            .collect::<Vec<BlockGroupEdgeData>>();
3289        BlockGroupEdge::bulk_create(conn, &block_group_edges);
3290
3291        let path3 = path1.new_path_with(conn, 4, 7, &edge6, &edge7);
3292        assert_eq!(path3.sequence(conn), "ATCGCCCCCCCCGAAAAAAAA");
3293    }
3294
3295    #[test]
3296    fn test_new_path_with_deletion() {
3297        let conn = &get_connection(None).unwrap();
3298        Collection::create(conn, "test collection");
3299        let block_group = create_test_block_group(conn);
3300        let sequence1 = Sequence::new()
3301            .sequence_type("DNA")
3302            .sequence("ATCGATCG")
3303            .save(conn);
3304        let node1_id = Node::create(conn, &sequence1.hash, &HashId::convert_str("1"));
3305        let edge1 = Edge::create(
3306            conn,
3307            PATH_START_NODE_ID,
3308            -123,
3309            Strand::Forward,
3310            node1_id,
3311            0,
3312            Strand::Forward,
3313        );
3314        let sequence2 = Sequence::new()
3315            .sequence_type("DNA")
3316            .sequence("AAAAAAAA")
3317            .save(conn);
3318        let node2_id = Node::create(conn, &sequence2.hash, &HashId::convert_str("2"));
3319        let edge2 = Edge::create(
3320            conn,
3321            node1_id,
3322            8,
3323            Strand::Forward,
3324            node2_id,
3325            0,
3326            Strand::Forward,
3327        );
3328        let edge3 = Edge::create(
3329            conn,
3330            node2_id,
3331            8,
3332            Strand::Forward,
3333            PATH_END_NODE_ID,
3334            -1,
3335            Strand::Forward,
3336        );
3337
3338        let edge_ids = [edge1.id, edge2.id, edge3.id];
3339        let block_group_edges = edge_ids
3340            .iter()
3341            .map(|edge_id| BlockGroupEdgeData {
3342                block_group_id: block_group.id,
3343                edge_id: *edge_id,
3344                chromosome_index: 0,
3345                phased: 0,
3346            })
3347            .collect::<Vec<BlockGroupEdgeData>>();
3348        BlockGroupEdge::bulk_create(conn, &block_group_edges);
3349
3350        let path1 = Path::create(conn, "chr1", &block_group.id, &edge_ids);
3351        assert_eq!(path1.sequence(conn), "ATCGATCGAAAAAAAA");
3352
3353        let deletion_edge = Edge::create(
3354            conn,
3355            node1_id,
3356            4,
3357            Strand::Forward,
3358            node2_id,
3359            3,
3360            Strand::Forward,
3361        );
3362
3363        let block_group_edge = BlockGroupEdgeData {
3364            block_group_id: block_group.id,
3365            edge_id: deletion_edge.id,
3366            chromosome_index: 0,
3367            phased: 0,
3368        };
3369
3370        BlockGroupEdge::bulk_create(conn, &[block_group_edge]);
3371
3372        let path2 = path1.new_path_with_deletion(conn, 4, 11).unwrap();
3373        assert_eq!(path2.sequence(conn), "ATCGAAAAA");
3374    }
3375
3376    #[test]
3377    fn test_duplicate_edge_warning() {
3378        let conn = &get_connection(None).unwrap();
3379        Collection::create(conn, "test collection");
3380        let block_group = create_test_block_group(conn);
3381        let sequence1 = Sequence::new()
3382            .sequence_type("DNA")
3383            .sequence("ATCGATCG")
3384            .save(conn);
3385        let node1_id = Node::create(conn, &sequence1.hash, &HashId::convert_str("1"));
3386        let edge1 = Edge::create(
3387            conn,
3388            PATH_START_NODE_ID,
3389            -123,
3390            Strand::Forward,
3391            node1_id,
3392            0,
3393            Strand::Forward,
3394        );
3395        let edge2 = Edge::create(
3396            conn,
3397            node1_id,
3398            8,
3399            Strand::Forward,
3400            PATH_END_NODE_ID,
3401            -1,
3402            Strand::Forward,
3403        );
3404
3405        let edge_ids = vec![edge1.id, edge2.id];
3406        let block_group_edges = edge_ids
3407            .iter()
3408            .map(|edge_id| BlockGroupEdgeData {
3409                block_group_id: block_group.id,
3410                edge_id: *edge_id,
3411                chromosome_index: 0,
3412                phased: 0,
3413            })
3414            .collect::<Vec<BlockGroupEdgeData>>();
3415        BlockGroupEdge::bulk_create(conn, &block_group_edges);
3416
3417        // Should print a warning that there are duplicate edges, but continue
3418        let _path = Path::create(conn, "chr1", &block_group.id, &edge_ids);
3419    }
3420
3421    #[test]
3422    #[should_panic(expected = "Not all edges are in the block group")]
3423    fn test_edges_must_be_in_path_block_group() {
3424        let conn = &get_connection(None).unwrap();
3425        Collection::create(conn, "test collection");
3426        let block_group = create_test_block_group(conn);
3427        let sequence1 = Sequence::new()
3428            .sequence_type("DNA")
3429            .sequence("ATCGATCG")
3430            .save(conn);
3431        let node1_id = Node::create(conn, &sequence1.hash, &HashId::convert_str("1"));
3432        let edge1 = Edge::create(
3433            conn,
3434            PATH_START_NODE_ID,
3435            -123,
3436            Strand::Forward,
3437            node1_id,
3438            0,
3439            Strand::Forward,
3440        );
3441        let edge2 = Edge::create(
3442            conn,
3443            node1_id,
3444            8,
3445            Strand::Forward,
3446            PATH_END_NODE_ID,
3447            -1,
3448            Strand::Forward,
3449        );
3450
3451        let edge_ids = [edge1.id, edge2.id];
3452        let _path = Path::create(conn, "chr1", &block_group.id, &edge_ids);
3453    }
3454
3455    #[test]
3456    #[should_panic]
3457    // Panic message is something like "Edges 1 and 2 don't share the same node (3 vs. 4)"
3458    fn test_consecutive_edges_must_share_a_node() {
3459        let conn = &get_connection(None).unwrap();
3460        Collection::create(conn, "test collection");
3461        let block_group = create_test_block_group(conn);
3462        let sequence1 = Sequence::new()
3463            .sequence_type("DNA")
3464            .sequence("ATCGATCG")
3465            .save(conn);
3466        let node1_id = Node::create(conn, &sequence1.hash, &HashId::convert_str("1"));
3467        let edge1 = Edge::create(
3468            conn,
3469            PATH_START_NODE_ID,
3470            -123,
3471            Strand::Forward,
3472            node1_id,
3473            0,
3474            Strand::Forward,
3475        );
3476        let sequence2 = Sequence::new()
3477            .sequence_type("DNA")
3478            .sequence("AAAAAAAA")
3479            .save(conn);
3480        let node2_id = Node::create(conn, &sequence2.hash, &HashId::convert_str("2"));
3481        let edge2 = Edge::create(
3482            conn,
3483            node2_id,
3484            8,
3485            Strand::Forward,
3486            PATH_END_NODE_ID,
3487            -1,
3488            Strand::Forward,
3489        );
3490
3491        let block_group_edges = vec![
3492            BlockGroupEdgeData {
3493                block_group_id: block_group.id,
3494                edge_id: edge1.id,
3495                chromosome_index: 0,
3496                phased: 0,
3497            },
3498            BlockGroupEdgeData {
3499                block_group_id: block_group.id,
3500                edge_id: edge2.id,
3501                chromosome_index: 0,
3502                phased: 0,
3503            },
3504        ];
3505        BlockGroupEdge::bulk_create(conn, &block_group_edges);
3506
3507        let edge_ids = vec![edge1.id, edge2.id];
3508
3509        let _path = Path::create(conn, "chr1", &block_group.id, &edge_ids);
3510    }
3511
3512    #[test]
3513    #[should_panic]
3514    // Panic message is something like "Strand mismatch between consecutive edges 1 and 2"
3515    fn test_consecutive_edges_must_share_the_same_strand() {
3516        let conn = &get_connection(None).unwrap();
3517        Collection::create(conn, "test collection");
3518        let block_group = create_test_block_group(conn);
3519        let sequence1 = Sequence::new()
3520            .sequence_type("DNA")
3521            .sequence("ATCGATCG")
3522            .save(conn);
3523        let node1_id = Node::create(conn, &sequence1.hash, &HashId::convert_str("1"));
3524        let edge1 = Edge::create(
3525            conn,
3526            PATH_START_NODE_ID,
3527            -123,
3528            Strand::Forward,
3529            node1_id,
3530            0,
3531            Strand::Forward,
3532        );
3533        let sequence2 = Sequence::new()
3534            .sequence_type("DNA")
3535            .sequence("AAAAAAAA")
3536            .save(conn);
3537        let node2_id = Node::create(conn, &sequence2.hash, &HashId::convert_str("2"));
3538        let edge2 = Edge::create(
3539            conn,
3540            node2_id,
3541            8,
3542            Strand::Forward,
3543            PATH_END_NODE_ID,
3544            -1,
3545            Strand::Reverse,
3546        );
3547
3548        let edge_ids = vec![edge1.id, edge2.id];
3549        let block_group_edges = edge_ids
3550            .iter()
3551            .map(|edge_id| BlockGroupEdgeData {
3552                block_group_id: block_group.id,
3553                edge_id: *edge_id,
3554                chromosome_index: 0,
3555                phased: 0,
3556            })
3557            .collect::<Vec<BlockGroupEdgeData>>();
3558
3559        BlockGroupEdge::bulk_create(conn, &block_group_edges);
3560
3561        let _path = Path::create(conn, "chr1", &block_group.id, &edge_ids);
3562    }
3563
3564    #[test]
3565    #[should_panic]
3566    // Panic message is something like "Source coordinate 2 for edge 2 is before target coordinate 4 for edge 1"
3567    fn test_consecutive_edges_must_have_different_coordinates_on_a_node() {
3568        let conn = &get_connection(None).unwrap();
3569        Collection::create(conn, "test collection");
3570        let block_group = create_test_block_group(conn);
3571        let sequence1 = Sequence::new()
3572            .sequence_type("DNA")
3573            .sequence("ATCGATCG")
3574            .save(conn);
3575        let node1_id = Node::create(conn, &sequence1.hash, &HashId::convert_str("1"));
3576        let edge1 = Edge::create(
3577            conn,
3578            PATH_START_NODE_ID,
3579            -123,
3580            Strand::Forward,
3581            node1_id,
3582            4,
3583            Strand::Forward,
3584        );
3585        let sequence2 = Sequence::new()
3586            .sequence_type("DNA")
3587            .sequence("AAAAAAAA")
3588            .save(conn);
3589        let node2_id = Node::create(conn, &sequence2.hash, &HashId::convert_str("2"));
3590        // Source coordinate on node 1 is before target coordinate on node1 for edge1
3591        let edge2 = Edge::create(
3592            conn,
3593            node1_id,
3594            2,
3595            Strand::Forward,
3596            node2_id,
3597            0,
3598            Strand::Forward,
3599        );
3600
3601        let edge_ids = vec![edge1.id, edge2.id];
3602        let block_group_edges = edge_ids
3603            .iter()
3604            .map(|edge_id| BlockGroupEdgeData {
3605                block_group_id: block_group.id,
3606                edge_id: *edge_id,
3607                chromosome_index: 0,
3608                phased: 0,
3609            })
3610            .collect::<Vec<BlockGroupEdgeData>>();
3611        BlockGroupEdge::bulk_create(conn, &block_group_edges);
3612
3613        let _path = Path::create(conn, "chr1", &block_group.id, &edge_ids);
3614    }
3615
3616    #[test]
3617    fn test_node_blocks_for_range() {
3618        let conn = &get_connection(None).unwrap();
3619        Collection::create(conn, "test collection");
3620        let block_group = create_test_block_group(conn);
3621        let sequence1 = Sequence::new()
3622            .sequence_type("DNA")
3623            .sequence("ATCGATCG")
3624            .save(conn);
3625        let node1_id = Node::create(conn, &sequence1.hash, &HashId::convert_str("1"));
3626        let edge1 = Edge::create(
3627            conn,
3628            PATH_START_NODE_ID,
3629            -123,
3630            Strand::Forward,
3631            node1_id,
3632            0,
3633            Strand::Forward,
3634        );
3635        let sequence2 = Sequence::new()
3636            .sequence_type("DNA")
3637            .sequence("AAAAAAAA")
3638            .save(conn);
3639        let node2_id = Node::create(conn, &sequence2.hash, &HashId::convert_str("2"));
3640        let edge2 = Edge::create(
3641            conn,
3642            node1_id,
3643            8,
3644            Strand::Forward,
3645            node2_id,
3646            0,
3647            Strand::Forward,
3648        );
3649        let edge3 = Edge::create(
3650            conn,
3651            node2_id,
3652            8,
3653            Strand::Forward,
3654            PATH_END_NODE_ID,
3655            -1,
3656            Strand::Forward,
3657        );
3658
3659        let edge_ids = vec![edge1.id, edge2.id, edge3.id];
3660        let block_group_edges = edge_ids
3661            .iter()
3662            .map(|edge_id| BlockGroupEdgeData {
3663                block_group_id: block_group.id,
3664                edge_id: *edge_id,
3665                chromosome_index: 0,
3666                phased: 0,
3667            })
3668            .collect::<Vec<BlockGroupEdgeData>>();
3669        BlockGroupEdge::bulk_create(conn, &block_group_edges);
3670
3671        let path = Path::create(conn, "chr1", &block_group.id, &edge_ids);
3672
3673        let intervaltree = path.intervaltree(conn);
3674
3675        let node_blocks1 = path.node_blocks_for_range(&intervaltree, 0, 8);
3676        let expected_node_blocks1 = vec![NodeIntervalBlock {
3677            node_id: node1_id,
3678            start: 0,
3679            end: 8,
3680            sequence_start: 0,
3681            sequence_end: 8,
3682            strand: Strand::Forward,
3683        }];
3684        assert_eq!(node_blocks1, expected_node_blocks1);
3685
3686        let node_blocks2 = path.node_blocks_for_range(&intervaltree, 0, 4);
3687        let expected_node_blocks2 = vec![NodeIntervalBlock {
3688            node_id: node1_id,
3689            start: 0,
3690            end: 4,
3691            sequence_start: 0,
3692            sequence_end: 4,
3693            strand: Strand::Forward,
3694        }];
3695        assert_eq!(node_blocks2, expected_node_blocks2);
3696
3697        let node_blocks3 = path.node_blocks_for_range(&intervaltree, 2, 6);
3698        let expected_node_blocks3 = vec![NodeIntervalBlock {
3699            node_id: node1_id,
3700            start: 2,
3701            end: 6,
3702            sequence_start: 2,
3703            sequence_end: 6,
3704            strand: Strand::Forward,
3705        }];
3706        assert_eq!(node_blocks3, expected_node_blocks3);
3707
3708        let node_blocks4 = path.node_blocks_for_range(&intervaltree, 3, 8);
3709        let expected_node_blocks4 = vec![NodeIntervalBlock {
3710            node_id: node1_id,
3711            start: 3,
3712            end: 8,
3713            sequence_start: 3,
3714            sequence_end: 8,
3715            strand: Strand::Forward,
3716        }];
3717        assert_eq!(node_blocks4, expected_node_blocks4);
3718
3719        let node_blocks5 = path.node_blocks_for_range(&intervaltree, 6, 10);
3720        let expected_node_blocks5 = vec![
3721            NodeIntervalBlock {
3722                node_id: node1_id,
3723                start: 6,
3724                end: 8,
3725                sequence_start: 6,
3726                sequence_end: 8,
3727                strand: Strand::Forward,
3728            },
3729            NodeIntervalBlock {
3730                node_id: node2_id,
3731                start: 8,
3732                end: 10,
3733                sequence_start: 0,
3734                sequence_end: 2,
3735                strand: Strand::Forward,
3736            },
3737        ];
3738        assert_eq!(node_blocks5, expected_node_blocks5);
3739
3740        let node_blocks6 = path.node_blocks_for_range(&intervaltree, 12, 16);
3741        let expected_node_blocks6 = vec![NodeIntervalBlock {
3742            node_id: node2_id,
3743            start: 12,
3744            end: 16,
3745            sequence_start: 4,
3746            sequence_end: 8,
3747            strand: Strand::Forward,
3748        }];
3749        assert_eq!(node_blocks6, expected_node_blocks6);
3750    }
3751
3752    #[test]
3753    fn test_node_blocks_for_range_with_node_parts() {
3754        let conn = &get_connection(None).unwrap();
3755        Collection::create(conn, "test collection");
3756        let block_group = create_test_block_group(conn);
3757        let sequence1 = Sequence::new()
3758            .sequence_type("DNA")
3759            .sequence("AAAAAAAA")
3760            .save(conn);
3761        let node1_id = Node::create(conn, &sequence1.hash, &HashId::convert_str("1"));
3762        let edge1 = Edge::create(
3763            conn,
3764            PATH_START_NODE_ID,
3765            -123,
3766            Strand::Forward,
3767            node1_id,
3768            0,
3769            Strand::Forward,
3770        );
3771        let sequence2 = Sequence::new()
3772            .sequence_type("DNA")
3773            .sequence("TTTTTTTT")
3774            .save(conn);
3775        let node2_id = Node::create(conn, &sequence2.hash, &HashId::convert_str("2"));
3776        let edge2 = Edge::create(
3777            conn,
3778            node1_id,
3779            5,
3780            Strand::Forward,
3781            node2_id,
3782            0,
3783            Strand::Forward,
3784        );
3785        let edge3 = Edge::create(
3786            conn,
3787            node2_id,
3788            8,
3789            Strand::Forward,
3790            node1_id,
3791            6,
3792            Strand::Forward,
3793        );
3794        let edge4 = Edge::create(
3795            conn,
3796            node1_id,
3797            8,
3798            Strand::Forward,
3799            PATH_END_NODE_ID,
3800            -1,
3801            Strand::Forward,
3802        );
3803
3804        let edge_ids = &[edge1.id, edge2.id, edge3.id, edge4.id];
3805        let block_group_edges = edge_ids
3806            .iter()
3807            .map(|edge_id| BlockGroupEdgeData {
3808                block_group_id: block_group.id,
3809                edge_id: *edge_id,
3810                chromosome_index: 0,
3811                phased: 0,
3812            })
3813            .collect::<Vec<BlockGroupEdgeData>>();
3814        BlockGroupEdge::bulk_create(conn, &block_group_edges);
3815
3816        let path1 = Path::create(conn, "chr1.1", &block_group.id, &[edge1.id, edge4.id]);
3817        let path2 = Path::create(conn, "chr1.2", &block_group.id, edge_ids);
3818
3819        let intervaltree1 = path1.intervaltree(conn);
3820
3821        let node_blocks1 = path1.node_blocks_for_range(&intervaltree1, 0, 8);
3822        let expected_node_blocks1 = vec![NodeIntervalBlock {
3823            node_id: node1_id,
3824            start: 0,
3825            end: 8,
3826            sequence_start: 0,
3827            sequence_end: 8,
3828            strand: Strand::Forward,
3829        }];
3830        assert_eq!(node_blocks1, expected_node_blocks1);
3831
3832        let intervaltree2 = path2.intervaltree(conn);
3833
3834        let node_blocks2 = path2.node_blocks_for_range(&intervaltree2, 0, 8);
3835        let expected_node_blocks2 = vec![
3836            NodeIntervalBlock {
3837                node_id: node1_id,
3838                start: 0,
3839                end: 5,
3840                sequence_start: 0,
3841                sequence_end: 5,
3842                strand: Strand::Forward,
3843            },
3844            NodeIntervalBlock {
3845                node_id: node2_id,
3846                start: 5,
3847                end: 8,
3848                sequence_start: 0,
3849                sequence_end: 3,
3850                strand: Strand::Forward,
3851            },
3852        ];
3853        assert_eq!(node_blocks2, expected_node_blocks2);
3854
3855        let node_blocks3 = path2.node_blocks_for_range(&intervaltree2, 4, 14);
3856        let expected_node_blocks3 = vec![
3857            NodeIntervalBlock {
3858                node_id: node1_id,
3859                start: 4,
3860                end: 5,
3861                sequence_start: 4,
3862                sequence_end: 5,
3863                strand: Strand::Forward,
3864            },
3865            NodeIntervalBlock {
3866                node_id: node2_id,
3867                start: 5,
3868                end: 13,
3869                sequence_start: 0,
3870                sequence_end: 8,
3871                strand: Strand::Forward,
3872            },
3873            NodeIntervalBlock {
3874                node_id: node1_id,
3875                start: 13,
3876                end: 14,
3877                sequence_start: 6,
3878                sequence_end: 7,
3879                strand: Strand::Forward,
3880            },
3881        ];
3882        assert_eq!(node_blocks3, expected_node_blocks3);
3883    }
3884}