Skip to main content

gen_models/
path.rs

1use core::ops::Range as RustRange;
2use std::collections::{HashMap, HashSet};
3
4use gen_core::{
5    HashId, NodeIntervalBlock, PATH_END_NODE_ID, PATH_START_NODE_ID, PathBlock, Strand,
6    calculate_hash,
7    range::{Range, RangeMapping},
8    traits::Capnp,
9};
10use intervaltree::IntervalTree;
11use itertools::Itertools;
12use rusqlite::{Row, params, types::Value as SQLValue};
13use serde::{Deserialize, Serialize};
14
15use crate::{
16    block_group_edge::BlockGroupEdge, db::GraphConnection, edge::Edge, errors::QueryError,
17    gen_models_capnp::path as PathCapnp, node::Node, path_edge::PathEdge, sequence::Sequence,
18    traits::*,
19};
20
21#[derive(Clone, Debug, Eq, Hash, PartialEq, Deserialize, Serialize)]
22pub struct Path {
23    pub id: HashId,
24    pub block_group_id: HashId,
25    pub name: String,
26    pub created_on: i64,
27}
28
29impl<'a> Capnp<'a> for Path {
30    type Builder = PathCapnp::Builder<'a>;
31    type Reader = PathCapnp::Reader<'a>;
32
33    fn write_capnp(&self, builder: &mut Self::Builder) {
34        builder.set_id(&self.id.0).unwrap();
35        builder.set_block_group_id(&self.block_group_id.0).unwrap();
36        builder.set_name(&self.name);
37        builder.set_created_on(self.created_on);
38    }
39
40    fn read_capnp(reader: Self::Reader) -> Self {
41        let id = reader
42            .get_id()
43            .unwrap()
44            .as_slice()
45            .unwrap()
46            .try_into()
47            .unwrap();
48        let block_group_id = reader
49            .get_block_group_id()
50            .unwrap()
51            .as_slice()
52            .unwrap()
53            .try_into()
54            .unwrap();
55        let name = reader.get_name().unwrap().to_string().unwrap();
56        let created_on = reader.get_created_on();
57
58        Path {
59            id,
60            block_group_id,
61            name,
62            created_on,
63        }
64    }
65}
66
67#[derive(Clone, Debug, Eq, Hash, PartialEq)]
68pub struct PathData {
69    pub name: String,
70    pub block_group_id: HashId,
71}
72
73// interesting gist here: https://gist.github.com/mbhall88/cd900add6335c96127efea0e0f6a9f48, see if we
74// can expand this to ambiguous bases/keep case
75pub fn revcomp(seq: &str) -> String {
76    String::from_utf8(
77        seq.chars()
78            .rev()
79            .map(|c| -> u8 {
80                let is_upper = c.is_ascii_uppercase();
81                let rc = c as u8;
82                let v = if rc == 78 {
83                    // N
84                    rc
85                } else if rc == 110 {
86                    // n
87                    rc
88                } else if rc & 2 != 0 {
89                    // CG
90                    rc ^ 4
91                } else {
92                    // AT
93                    rc ^ 21
94                };
95                if is_upper { v } else { v.to_ascii_lowercase() }
96            })
97            .collect(),
98    )
99    .unwrap()
100}
101
102#[derive(Clone, Debug)]
103pub struct Annotation {
104    pub name: String,
105    pub start: i64,
106    pub end: i64,
107}
108
109impl Path {
110    pub fn validate_edges(conn: &GraphConnection, edge_ids: &[HashId], block_group_id: &HashId) {
111        let edge_id_set = edge_ids.iter().copied().collect::<HashSet<HashId>>();
112
113        // No duplicate edges allowed
114        if edge_id_set.len() != edge_ids.len() {
115            println!("Duplicate edge IDs detected in path creation");
116        }
117
118        // All path edges must be in the path's block group
119        let augmented_edges = BlockGroupEdge::edges_for_block_group(conn, block_group_id);
120        let bg_edge_ids = augmented_edges
121            .iter()
122            .map(|augmented_edge| augmented_edge.edge.id)
123            .collect::<HashSet<_>>();
124        assert!(
125            edge_id_set.is_subset(&bg_edge_ids),
126            "Not all edges are in the block group ({block_group_id})"
127        );
128
129        let edges_by_id = augmented_edges
130            .iter()
131            .map(|augmented_edge| (&augmented_edge.edge.id, augmented_edge.edge.clone()))
132            .collect::<HashMap<_, _>>();
133
134        // Two consecutive edges must share a node
135        // Two consecutive edges must not go into and out of a node at the same coordinate
136        for (edge1_id, edge2_id) in edge_ids.iter().tuple_windows() {
137            let edge1 = edges_by_id.get(edge1_id).unwrap();
138            let edge2 = edges_by_id.get(edge2_id).unwrap();
139            assert!(
140                edge1.target_node_id == edge2.source_node_id,
141                "Edges {} and {} don't share the same node ({} vs. {})",
142                edge1.id,
143                edge2.id,
144                edge1.target_node_id,
145                edge2.source_node_id
146            );
147
148            assert!(
149                edge1.target_coordinate < edge2.source_coordinate,
150                "Source coordinate {} for edge {} is before target coordinate {} for edge {}",
151                edge2.source_coordinate,
152                edge2.id,
153                edge1.target_coordinate,
154                edge1.id
155            );
156
157            assert!(
158                edge1.target_strand == edge2.source_strand,
159                "Strand mismatch between consecutive edges {} and {}",
160                edge1.id,
161                edge2.id,
162            );
163        }
164
165        // An edge shouldn't start and end at the same coordinate on the same node
166        for edge_id in edge_ids {
167            let edge = edges_by_id.get(edge_id).unwrap();
168            assert!(
169                edge.source_node_id != edge.target_node_id
170                    || edge.source_coordinate != edge.target_coordinate,
171                "Edge {} goes into and out of the same node at the same coordinate",
172                edge.id
173            );
174        }
175    }
176
177    pub fn create(
178        conn: &GraphConnection,
179        name: &str,
180        block_group_id: &HashId,
181        edge_ids: &[HashId],
182    ) -> Path {
183        Path::validate_edges(conn, edge_ids, block_group_id);
184        let hash = HashId(calculate_hash(&format!("{block_group_id}:{name}")));
185        let timestamp = chrono::Utc::now().timestamp_nanos_opt().unwrap();
186        // TODO: Should we do something if edge_ids don't match here? Suppose we have a path
187        // for a block group with edges 1,2,3. And then the same path is added again with edges
188        // 5,6,7, should this be an error? Should we just keep adding edges?
189        let query =
190            "INSERT INTO paths (id, name, block_group_id, created_on) VALUES (?1, ?2, ?3, ?4);";
191        let mut stmt = conn.prepare(query).unwrap();
192
193        let path = match stmt.execute(params![hash, name, block_group_id, timestamp]) {
194            Ok(_) => Path {
195                id: hash,
196                name: name.to_string(),
197                block_group_id: *block_group_id,
198                created_on: timestamp,
199            },
200            Err(rusqlite::Error::SqliteFailure(err, _details)) => {
201                if err.code == rusqlite::ErrorCode::ConstraintViolation {
202                    Path {
203                        id: hash,
204                        name: name.to_string(),
205                        block_group_id: *block_group_id,
206                        created_on: timestamp,
207                    }
208                } else {
209                    panic!("something bad happened querying the database")
210                }
211            }
212            Err(_) => {
213                panic!("something bad happened querying the database")
214            }
215        };
216
217        PathEdge::bulk_create(conn, &path.id, edge_ids);
218
219        path
220    }
221
222    pub fn delete(conn: &GraphConnection, name: &str, block_group_id: &HashId) {
223        let path = Path::get(
224            conn,
225            "select * from paths where name = ?1 and block_group_id = ?2",
226            params![name, block_group_id],
227        )
228        .unwrap();
229        PathEdge::delete(conn, &path.id);
230
231        let query = "DELETE FROM paths where name = ?1 AND block_group_id = ?2;";
232        conn.execute(query, params![name.to_string(), block_group_id])
233            .unwrap();
234    }
235
236    pub fn get_by_id(conn: &GraphConnection, path_id: &HashId) -> Path {
237        Path::get(conn, "select * from paths where id = ?1;", params![path_id]).unwrap()
238    }
239
240    pub fn query_for_collection(conn: &GraphConnection, collection_name: &str) -> Vec<Path> {
241        let query = "SELECT * FROM paths JOIN block_groups ON paths.block_group_id = block_groups.id WHERE block_groups.collection_name = ?1";
242        Path::query(conn, query, params![collection_name])
243    }
244
245    pub fn query_for_collection_and_sample(
246        conn: &GraphConnection,
247        collection_name: &str,
248        sample_name: Option<String>,
249    ) -> Vec<Path> {
250        if let Some(actual_sample_name) = sample_name {
251            let query = "SELECT * FROM paths JOIN block_groups ON paths.block_group_id = block_groups.id WHERE block_groups.collection_name = ?1 AND block_groups.sample_name = ?2";
252            Path::query(conn, query, params![collection_name, actual_sample_name])
253        } else {
254            let query = "SELECT * FROM paths JOIN block_groups ON paths.block_group_id = block_groups.id WHERE block_groups.collection_name = ?1 AND block_groups.sample_name IS NULL";
255            Path::query(conn, query, params![collection_name])
256        }
257    }
258
259    pub fn sequence(&self, conn: &GraphConnection) -> String {
260        let blocks = self.blocks(conn);
261        blocks
262            .into_iter()
263            .map(|block| block.block_sequence)
264            .collect::<Vec<_>>()
265            .join("")
266    }
267
268    pub fn length(&self, conn: &GraphConnection) -> i64 {
269        let blocks = self.blocks(conn);
270        let end_block = blocks.last().unwrap();
271        // the last block is the terminal node, which starts at the end of the path
272        end_block.path_start
273    }
274
275    pub fn edge_pairs_to_block(
276        &self,
277        block_id: i64,
278        into: Edge,
279        out_of: Edge,
280        sequences_by_node_id: &HashMap<HashId, Sequence>,
281        current_path_length: i64,
282    ) -> PathBlock {
283        let sequence = sequences_by_node_id.get(&into.target_node_id).unwrap();
284        let start = into.target_coordinate;
285        let end = out_of.source_coordinate;
286
287        let strand = into.target_strand;
288        let block_sequence_length = end - start;
289
290        let block_sequence = if strand == Strand::Reverse {
291            revcomp(&sequence.get_sequence(start, end))
292        } else {
293            sequence.get_sequence(start, end)
294        };
295
296        PathBlock {
297            id: block_id,
298            node_id: into.target_node_id,
299            block_sequence,
300            sequence_start: start,
301            sequence_end: end,
302            path_start: current_path_length,
303            path_end: current_path_length + block_sequence_length,
304            strand,
305        }
306    }
307
308    pub fn blocks(&self, conn: &GraphConnection) -> Vec<PathBlock> {
309        let edges = PathEdge::edges_for_path(conn, &self.id);
310
311        let mut sequence_node_ids = HashSet::new();
312        for edge in &edges {
313            if edge.source_node_id != PATH_START_NODE_ID {
314                sequence_node_ids.insert(edge.source_node_id);
315            }
316            if edge.target_node_id != PATH_END_NODE_ID {
317                sequence_node_ids.insert(edge.target_node_id);
318            }
319        }
320        let sequences_by_node_id = Node::get_sequences_by_node_ids(
321            conn,
322            &sequence_node_ids.into_iter().collect::<Vec<_>>(),
323        );
324
325        let mut blocks = vec![];
326        let mut path_length = 0;
327
328        // NOTE: Adding a "start block" for the dedicated start sequence with a range from i64::MIN
329        // to 0 makes interval tree lookups work better.  If the point being looked up is -1 (or
330        // below), it will return this block.
331        blocks.push(PathBlock {
332            id: -1,
333            node_id: PATH_START_NODE_ID,
334            block_sequence: "".to_string(),
335            sequence_start: 0,
336            sequence_end: 0,
337            path_start: i64::MIN + 1,
338            path_end: 0,
339            strand: Strand::Forward,
340        });
341
342        for (index, (into, out_of)) in edges.into_iter().tuple_windows().enumerate() {
343            let block = self.edge_pairs_to_block(
344                index as i64,
345                into,
346                out_of,
347                &sequences_by_node_id,
348                path_length,
349            );
350            path_length += block.block_sequence.len() as i64;
351            blocks.push(block);
352        }
353
354        // NOTE: Adding an "end block" for the dedicated end sequence with a range from the path
355        // length to i64::MAX makes interval tree lookups work better.  If the point being looked up
356        // is the path length (or higher), it will return this block.
357        blocks.push(PathBlock {
358            id: -2,
359            node_id: PATH_END_NODE_ID,
360            block_sequence: "".to_string(),
361            sequence_start: 0,
362            sequence_end: 0,
363            path_start: path_length,
364            path_end: i64::MAX - 1,
365            strand: Strand::Forward,
366        });
367
368        blocks
369    }
370
371    pub fn intervaltree(&self, conn: &GraphConnection) -> IntervalTree<i64, NodeIntervalBlock> {
372        let blocks = self.blocks(conn);
373        let tree: IntervalTree<i64, NodeIntervalBlock> = blocks
374            .into_iter()
375            .map(|block| {
376                (
377                    block.path_start..block.path_end,
378                    NodeIntervalBlock {
379                        block_id: block.id,
380                        node_id: block.node_id,
381                        start: block.path_start,
382                        end: block.path_end,
383                        sequence_start: block.sequence_start,
384                        sequence_end: block.sequence_end,
385                        strand: block.strand,
386                    },
387                )
388            })
389            .collect();
390        tree
391    }
392
393    pub fn find_block_mappings(
394        &self,
395        conn: &GraphConnection,
396        other_path: &Path,
397    ) -> Vec<RangeMapping> {
398        // Given two paths, find the overlapping parts of common nodes/blocks and return a list af
399        // mappings from subranges of one path to corresponding shared subranges of the other path
400        let our_blocks = self.blocks(conn);
401        let their_blocks = other_path.blocks(conn);
402
403        let our_node_ids = our_blocks
404            .iter()
405            .map(|block| block.node_id)
406            .collect::<HashSet<_>>();
407        let their_node_ids = their_blocks
408            .iter()
409            .map(|block| block.node_id)
410            .collect::<HashSet<_>>();
411        let common_node_ids = our_node_ids
412            .intersection(&their_node_ids)
413            .copied()
414            .collect::<HashSet<_>>();
415
416        let mut our_blocks_by_node_id = HashMap::new();
417        for block in our_blocks
418            .iter()
419            .filter(|block| common_node_ids.contains(&block.node_id))
420        {
421            our_blocks_by_node_id
422                .entry(block.node_id)
423                .or_insert(vec![])
424                .push(block);
425        }
426
427        let mut their_blocks_by_node_id = HashMap::new();
428        for block in their_blocks
429            .iter()
430            .filter(|block| common_node_ids.contains(&block.node_id))
431        {
432            their_blocks_by_node_id
433                .entry(block.node_id)
434                .or_insert(vec![])
435                .push(block);
436        }
437
438        let mut mappings = vec![];
439        for node_id in common_node_ids {
440            let our_blocks = our_blocks_by_node_id.get(&node_id).unwrap();
441            let our_sorted_blocks = our_blocks
442                .clone()
443                .into_iter()
444                .sorted_by(|a, b| a.sequence_start.cmp(&b.sequence_start))
445                .collect::<Vec<&PathBlock>>();
446            let their_blocks = their_blocks_by_node_id.get(&node_id).unwrap();
447            let their_sorted_blocks = their_blocks
448                .clone()
449                .into_iter()
450                .sorted_by(|a, b| a.sequence_start.cmp(&b.sequence_start))
451                .collect::<Vec<&PathBlock>>();
452
453            for our_block in our_sorted_blocks {
454                let mut their_block_index = 0;
455
456                while their_block_index < their_sorted_blocks.len() {
457                    let their_block = their_sorted_blocks[their_block_index];
458                    if their_block.sequence_end <= our_block.sequence_start {
459                        // If their block is before ours, move along to the next one
460                        their_block_index += 1;
461                    } else {
462                        let our_range = Range {
463                            start: our_block.sequence_start,
464                            end: our_block.sequence_end,
465                        };
466                        let their_range = Range {
467                            start: their_block.sequence_start,
468                            end: their_block.sequence_end,
469                        };
470
471                        let common_ranges = our_range.overlap(&their_range);
472                        if !common_ranges.is_empty() {
473                            if common_ranges.len() > 1 {
474                                panic!(
475                                    "Found more than one common range for blocks with node {node_id}"
476                                );
477                            }
478
479                            let common_range = &common_ranges[0];
480                            let our_start = our_block.path_start
481                                + (common_range.start - our_block.sequence_start);
482                            let our_end = our_block.path_start
483                                + (common_range.end - our_block.sequence_start);
484                            let their_start = their_block.path_start
485                                + (common_range.start - their_block.sequence_start);
486                            let their_end = their_block.path_start
487                                + (common_range.end - their_block.sequence_start);
488
489                            let mapping = RangeMapping {
490                                source_range: Range {
491                                    start: our_start,
492                                    end: our_end,
493                                },
494                                target_range: Range {
495                                    start: their_start,
496                                    end: their_end,
497                                },
498                            };
499                            mappings.push(mapping);
500                        }
501
502                        if their_block.sequence_end < our_block.sequence_end {
503                            // If their block ends before ours, move along to the next one
504                            their_block_index += 1;
505                        } else {
506                            break;
507                        }
508                    }
509                }
510            }
511        }
512
513        mappings
514            .into_iter()
515            .sorted_by(|a, b| a.source_range.start.cmp(&b.source_range.start))
516            .collect::<Vec<RangeMapping>>()
517    }
518
519    pub fn propagate_annotation(
520        annotation: Annotation,
521        mapping_tree: &IntervalTree<i64, RangeMapping>,
522        sequence_length: i64,
523    ) -> Option<Annotation> {
524        /*
525        This method contains the core logic for propagating an annotation from one path to another.
526        The core rules are:
527
528        1. If the annotation can be fully propagated to a matching subregion of the other path,
529            we propagate it
530
531        2. If only part of the annotation can be propagated to a partial subregion of the other
532            path, we propagate just that part and truncate the rest
533
534        3. If the first and last parts of the annotation can be propagated to subregions of the
535            other path (but not one or more parts of the middle of the annotation), we propagate the
536            entire annotation, including across the parts that don't match those of this path
537         */
538
539        // TODO: Add support for different propagation strategies
540        // TODO: Handle circular contigs
541        let start = annotation.start;
542        let end = annotation.end;
543        let mappings: Vec<RangeMapping> = mapping_tree
544            .query(RustRange { start, end })
545            .map(|x| x.value.clone())
546            .collect();
547        if mappings.is_empty() {
548            return None;
549        }
550
551        let sorted_mappings: Vec<RangeMapping> = mappings
552            .into_iter()
553            .sorted_by(|a, b| a.source_range.start.cmp(&b.source_range.start))
554            .collect();
555        let first_mapping = sorted_mappings.first().unwrap();
556        let last_mapping = sorted_mappings.last().unwrap();
557        let translated_start = if first_mapping.source_range.contains(start) {
558            first_mapping.source_range.translate_index(
559                start,
560                &first_mapping.target_range,
561                sequence_length,
562                false,
563            )
564        } else {
565            Ok(first_mapping.target_range.start)
566        };
567
568        let translated_end = if last_mapping.source_range.contains(end) {
569            last_mapping.source_range.translate_index(
570                end,
571                &last_mapping.target_range,
572                sequence_length,
573                false,
574            )
575        } else {
576            Ok(last_mapping.target_range.end)
577        };
578
579        if translated_start.is_err() || translated_end.is_err() {
580            return None;
581        }
582
583        Some(Annotation {
584            name: annotation.name,
585            start: translated_start.expect("Failed to translate start"),
586            end: translated_end.expect("Failed to translate end"),
587        })
588    }
589
590    pub fn get_mapping_tree(
591        &self,
592        conn: &GraphConnection,
593        path: &Path,
594    ) -> IntervalTree<i64, RangeMapping> {
595        let mappings = self.find_block_mappings(conn, path);
596        mappings
597            .into_iter()
598            .map(|mapping| {
599                (
600                    mapping.source_range.start..mapping.source_range.end,
601                    mapping,
602                )
603            })
604            .collect()
605    }
606
607    pub fn propagate_annotations(
608        &self,
609        conn: &GraphConnection,
610        path: &Path,
611        annotations: Vec<Annotation>,
612    ) -> Vec<Annotation> {
613        let mapping_tree = self.get_mapping_tree(conn, path);
614        let sequence_length = path.sequence(conn).len();
615        annotations
616            .into_iter()
617            .filter_map(|annotation| {
618                Path::propagate_annotation(annotation, &mapping_tree, sequence_length as i64)
619            })
620            .clone()
621            .collect()
622    }
623
624    pub fn new_path_with(
625        &self,
626        conn: &GraphConnection,
627        path_start: i64,
628        path_end: i64,
629        edge_to_new_node: &Edge,
630        edge_from_new_node: &Edge,
631    ) -> Path {
632        // Creates a new path from the current one by replacing all edges between path_start and
633        // path_end with the input edges that are to and from a new node
634        let tree = self.intervaltree(conn);
635        let block_with_start = tree.query_point(path_start).next().unwrap().value;
636        let block_with_end = tree.query_point(path_end).next().unwrap().value;
637
638        let edges = PathEdge::edges_for_path(conn, &self.id);
639        let edges_by_source = edges
640            .iter()
641            .map(|edge| ((edge.source_node_id, edge.source_coordinate), edge))
642            .collect::<HashMap<(_, i64), &Edge>>();
643        let edges_by_target = edges
644            .iter()
645            .map(|edge| ((edge.target_node_id, edge.target_coordinate), edge))
646            .collect::<HashMap<(_, i64), &Edge>>();
647        let edge_before_new_node = edges_by_target
648            .get(&(block_with_start.node_id, block_with_start.sequence_start))
649            .unwrap();
650        let edge_after_new_node = edges_by_source
651            .get(&(block_with_end.node_id, block_with_end.sequence_end))
652            .unwrap();
653
654        let mut new_edge_ids = vec![];
655        let mut before_new_node = true;
656        let mut after_new_node = false;
657        for edge in &edges {
658            if before_new_node {
659                new_edge_ids.push(edge.id);
660                if edge.id == edge_before_new_node.id {
661                    before_new_node = false;
662                    new_edge_ids.push(edge_to_new_node.id);
663                    new_edge_ids.push(edge_from_new_node.id);
664                }
665            } else if after_new_node {
666                new_edge_ids.push(edge.id);
667            } else if edge.id == edge_after_new_node.id {
668                after_new_node = true;
669                new_edge_ids.push(edge.id);
670            }
671        }
672
673        let new_name = format!(
674            "{}-start-{}-end-{}-node-{}",
675            self.name, path_start, path_end, edge_to_new_node.target_node_id
676        );
677        Path::create(conn, &new_name, &self.block_group_id, &new_edge_ids)
678    }
679
680    pub fn new_path_with_deletion(
681        &self,
682        conn: &GraphConnection,
683        deletion_start: i64,
684        deletion_end: i64,
685    ) -> Result<Path, QueryError> {
686        // Creates a new path from the current one by replacing all edges between deletion_start and
687        // deletion_end with a single edge spanning the deletion.
688        let tree = self.intervaltree(conn);
689        let block_with_start = tree.query_point(deletion_start).next().unwrap().value;
690        let block_with_end = tree.query_point(deletion_end).next().unwrap().value;
691
692        let node_deletion_start = deletion_start - block_with_start.start;
693        let node_deletion_end = deletion_end - block_with_end.start;
694        let deletion_edge_result = Edge::query(
695            conn,
696            "SELECT * FROM edges WHERE source_node_id = ?1 AND source_coordinate = ?2 AND target_node_id = ?3 AND target_coordinate = ?4",
697            rusqlite::params!(
698                SQLValue::from(block_with_start.node_id),
699                SQLValue::from(node_deletion_start),
700                SQLValue::from(block_with_end.node_id),
701                SQLValue::from(node_deletion_end)
702            ),
703        );
704
705        if deletion_edge_result.is_empty() {
706            let error_string = format!(
707                "No edge found from node {}:{node_deletion_start} to node {}:{node_deletion_end}",
708                block_with_start.node_id, block_with_end.node_id
709            );
710            return Err(QueryError::ResultsNotFound(error_string));
711        }
712
713        let deletion_edge = deletion_edge_result[0].clone();
714
715        let edges = PathEdge::edges_for_path(conn, &self.id);
716        let edges_by_source = edges
717            .iter()
718            .map(|edge| ((edge.source_node_id, edge.source_coordinate), edge))
719            .collect::<HashMap<(_, i64), &Edge>>();
720        let edges_by_target = edges
721            .iter()
722            .map(|edge| ((edge.target_node_id, edge.target_coordinate), edge))
723            .collect::<HashMap<(_, i64), &Edge>>();
724        let edge_before_deletion = edges_by_target
725            .get(&(block_with_start.node_id, block_with_start.sequence_start))
726            .unwrap();
727        let edge_after_deletion = edges_by_source
728            .get(&(block_with_end.node_id, block_with_end.sequence_end))
729            .unwrap();
730
731        let mut new_edge_ids = vec![];
732        let mut before_deletion = true;
733        let mut after_deletion = false;
734        for edge in &edges {
735            if before_deletion {
736                new_edge_ids.push(edge.id);
737                if edge.id == edge_before_deletion.id {
738                    before_deletion = false;
739                    new_edge_ids.push(deletion_edge.id);
740                }
741            } else if after_deletion {
742                new_edge_ids.push(edge.id);
743            } else if edge.id == edge_after_deletion.id {
744                after_deletion = true;
745                new_edge_ids.push(edge.id);
746            }
747        }
748
749        let new_name = format!(
750            "{}-start-{}-end-{}-node-{}",
751            self.name,
752            deletion_start,
753            deletion_end,
754            HashId::convert_str("")
755        );
756
757        Ok(Path::create(
758            conn,
759            &new_name,
760            &self.block_group_id,
761            &new_edge_ids,
762        ))
763    }
764
765    fn node_blocks_for_range(
766        &self,
767        intervaltree: &IntervalTree<i64, NodeIntervalBlock>,
768        start: i64,
769        end: i64,
770    ) -> Vec<NodeIntervalBlock> {
771        // TODO: Handle start/end values that are in the middle of blocks
772        let node_blocks: Vec<NodeIntervalBlock> = intervaltree
773            .query(RustRange { start, end })
774            .map(|x| x.value)
775            .sorted_by(|a, b| a.start.cmp(&b.start))
776            .collect();
777
778        if node_blocks.is_empty() {
779            return vec![];
780        }
781
782        let mut result_node_blocks = vec![];
783        let start_offset = if node_blocks[0].start < start {
784            start - node_blocks[0].start
785        } else {
786            0
787        };
788
789        let mut consolidated_block = NodeIntervalBlock {
790            block_id: 0,
791            node_id: node_blocks[0].node_id,
792            start: node_blocks[0].start + start_offset,
793            end: node_blocks[0].end,
794            sequence_start: node_blocks[0].sequence_start + start_offset,
795            sequence_end: node_blocks[0].sequence_end,
796            strand: node_blocks[0].strand,
797        };
798
799        for block in &node_blocks[1..] {
800            if consolidated_block.node_id == block.node_id && consolidated_block.end == block.start
801            {
802                // If the current block is immediately adjacent to the previous one (as recorded
803                // in the consolidated block), extend the consolidated block
804                consolidated_block = NodeIntervalBlock {
805                    block_id: consolidated_block.block_id,
806                    node_id: consolidated_block.node_id,
807                    start: consolidated_block.start,
808                    end: block.end,
809                    sequence_start: consolidated_block.sequence_start,
810                    sequence_end: block.sequence_end,
811                    strand: consolidated_block.strand,
812                };
813            } else {
814                result_node_blocks.push(consolidated_block);
815                consolidated_block = *block;
816            }
817        }
818
819        let end_offset = if consolidated_block.end > end {
820            consolidated_block.end - end
821        } else {
822            0
823        };
824
825        result_node_blocks.push(NodeIntervalBlock {
826            block_id: consolidated_block.block_id,
827            node_id: consolidated_block.node_id,
828            start: consolidated_block.start,
829            end: consolidated_block.end - end_offset,
830            sequence_start: consolidated_block.sequence_start,
831            sequence_end: consolidated_block.sequence_end - end_offset,
832            strand: consolidated_block.strand,
833        });
834
835        result_node_blocks
836    }
837
838    pub fn node_block_partition(
839        &self,
840        conn: &GraphConnection,
841        ranges: Vec<Range>,
842    ) -> Vec<NodeIntervalBlock> {
843        let intervaltree = self.intervaltree(conn);
844        let mut partitioned_nodes = vec![];
845        for range in ranges {
846            let node_blocks = self.node_blocks_for_range(&intervaltree, range.start, range.end);
847            for node_block in &node_blocks {
848                partitioned_nodes.push(*node_block);
849            }
850        }
851        partitioned_nodes
852    }
853}
854
855impl Query for Path {
856    type Model = Path;
857
858    const TABLE_NAME: &'static str = "paths";
859
860    fn process_row(row: &Row) -> Self::Model {
861        Path {
862            id: row.get(0).unwrap(),
863            block_group_id: row.get(1).unwrap(),
864            name: row.get(2).unwrap(),
865            created_on: row.get(3).unwrap(),
866        }
867    }
868}
869
870#[cfg(test)]
871mod tests {
872    // Note this useful idiom: importing names from outer (for mod tests) scope.
873    use capnp::message::TypedBuilder;
874    use chrono::Utc;
875
876    use super::*;
877    use crate::{
878        block_group::BlockGroup, block_group_edge::BlockGroupEdgeData, collection::Collection,
879        test_helpers::get_connection,
880    };
881
882    #[test]
883    fn test_path_capnp_serialization() {
884        let path = Path {
885            id: HashId::pad_str(100),
886            block_group_id: HashId::pad_str(50),
887            name: "test_path".to_string(),
888            created_on: Utc::now().timestamp_nanos_opt().unwrap(),
889        };
890
891        let mut message = TypedBuilder::<PathCapnp::Owned>::new_default();
892        let mut root = message.init_root();
893        path.write_capnp(&mut root);
894
895        let deserialized = Path::read_capnp(root.into_reader());
896        assert_eq!(path, deserialized);
897    }
898
899    #[test]
900    fn test_path_delete() {
901        let conn = &get_connection(None).unwrap();
902        Collection::create(conn, "test collection");
903        let block_group = BlockGroup::create(conn, "test collection", None, "test block group");
904
905        // Create first path
906        let sequence1 = Sequence::new()
907            .sequence_type("DNA")
908            .sequence("ATCGATCG")
909            .save(conn);
910        let node1_id = Node::create(conn, &sequence1.hash, &HashId::convert_str("1"));
911        let edge1 = Edge::create(
912            conn,
913            PATH_START_NODE_ID,
914            -1,
915            Strand::Forward,
916            node1_id,
917            0,
918            Strand::Forward,
919        );
920        let edge2 = Edge::create(
921            conn,
922            node1_id,
923            8,
924            Strand::Forward,
925            PATH_END_NODE_ID,
926            -1,
927            Strand::Forward,
928        );
929
930        let edge_ids1 = vec![edge1.id, edge2.id];
931        let block_group_edges1 = edge_ids1
932            .iter()
933            .map(|edge_id| BlockGroupEdgeData {
934                block_group_id: block_group.id,
935                edge_id: (*edge_id),
936                chromosome_index: 0,
937                phased: 0,
938            })
939            .collect::<Vec<BlockGroupEdgeData>>();
940        BlockGroupEdge::bulk_create(conn, &block_group_edges1);
941
942        let _ = Path::create(conn, "chr1", &block_group.id, &edge_ids1);
943
944        // Create second path
945        let sequence2 = Sequence::new()
946            .sequence_type("DNA")
947            .sequence("TTTTTTTT")
948            .save(conn);
949        let node2_id = Node::create(conn, &sequence2.hash, &HashId::convert_str("2"));
950        let edge3 = Edge::create(
951            conn,
952            PATH_START_NODE_ID,
953            -1,
954            Strand::Forward,
955            node2_id,
956            0,
957            Strand::Forward,
958        );
959        let edge4 = Edge::create(
960            conn,
961            node2_id,
962            8,
963            Strand::Forward,
964            PATH_END_NODE_ID,
965            -1,
966            Strand::Forward,
967        );
968
969        let edge_ids2 = vec![edge3.id, edge4.id];
970        let block_group_edges2 = edge_ids2
971            .iter()
972            .map(|edge_id| BlockGroupEdgeData {
973                block_group_id: block_group.id,
974                edge_id: (*edge_id),
975                chromosome_index: 0,
976                phased: 0,
977            })
978            .collect::<Vec<BlockGroupEdgeData>>();
979        BlockGroupEdge::bulk_create(conn, &block_group_edges2);
980
981        let path2 = Path::create(conn, "chr2", &block_group.id, &edge_ids2);
982
983        let paths_before = Path::query_for_collection(conn, "test collection");
984        assert_eq!(paths_before.len(), 2);
985
986        Path::delete(conn, "chr1", &block_group.id);
987
988        let paths_after = Path::query_for_collection(conn, "test collection");
989        assert_eq!(paths_after.len(), 1);
990        assert_eq!(paths_after[0], path2);
991    }
992
993    #[test]
994    fn test_gets_sequence() {
995        let conn = &get_connection(None).unwrap();
996        Collection::create(conn, "test collection");
997        let block_group = BlockGroup::create(conn, "test collection", None, "test block group");
998        let sequence1 = Sequence::new()
999            .sequence_type("DNA")
1000            .sequence("ATCGATCG")
1001            .save(conn);
1002        let node1_id = Node::create(conn, &sequence1.hash, &HashId::convert_str("1"));
1003        let edge1 = Edge::create(
1004            conn,
1005            PATH_START_NODE_ID,
1006            -123,
1007            Strand::Forward,
1008            node1_id,
1009            0,
1010            Strand::Forward,
1011        );
1012        let sequence2 = Sequence::new()
1013            .sequence_type("DNA")
1014            .sequence("AAAAAAAA")
1015            .save(conn);
1016        let node2_id = Node::create(conn, &sequence2.hash, &HashId::convert_str("2"));
1017        let edge2 = Edge::create(
1018            conn,
1019            node1_id,
1020            8,
1021            Strand::Forward,
1022            node2_id,
1023            1,
1024            Strand::Forward,
1025        );
1026        let sequence3 = Sequence::new()
1027            .sequence_type("DNA")
1028            .sequence("CCCCCCCC")
1029            .save(conn);
1030        let node3_id = Node::create(conn, &sequence3.hash, &HashId::convert_str("3"));
1031        let edge3 = Edge::create(
1032            conn,
1033            node2_id,
1034            8,
1035            Strand::Forward,
1036            node3_id,
1037            1,
1038            Strand::Forward,
1039        );
1040        let sequence4 = Sequence::new()
1041            .sequence_type("DNA")
1042            .sequence("GGGGGGGG")
1043            .save(conn);
1044        let node4_id = Node::create(conn, &sequence4.hash, &HashId::convert_str("4"));
1045        let edge4 = Edge::create(
1046            conn,
1047            node3_id,
1048            8,
1049            Strand::Forward,
1050            node4_id,
1051            1,
1052            Strand::Forward,
1053        );
1054        let edge5 = Edge::create(
1055            conn,
1056            node4_id,
1057            8,
1058            Strand::Forward,
1059            PATH_END_NODE_ID,
1060            -1,
1061            Strand::Forward,
1062        );
1063
1064        let edge_ids = vec![edge1.id, edge2.id, edge3.id, edge4.id, edge5.id];
1065        let block_group_edges = edge_ids
1066            .iter()
1067            .map(|edge_id| BlockGroupEdgeData {
1068                block_group_id: block_group.id,
1069                edge_id: *edge_id,
1070                chromosome_index: 0,
1071                phased: 0,
1072            })
1073            .collect::<Vec<BlockGroupEdgeData>>();
1074        BlockGroupEdge::bulk_create(conn, &block_group_edges);
1075
1076        let path = Path::create(conn, "chr1", &block_group.id, &edge_ids);
1077        assert_eq!(path.sequence(conn), "ATCGATCGAAAAAAACCCCCCCGGGGGGG");
1078    }
1079
1080    #[test]
1081    fn test_gets_sequence_with_rc() {
1082        let conn = &get_connection(None).unwrap();
1083        Collection::create(conn, "test collection");
1084        let block_group = BlockGroup::create(conn, "test collection", None, "test block group");
1085        let sequence1 = Sequence::new()
1086            .sequence_type("DNA")
1087            .sequence("ATCGATCG")
1088            .save(conn);
1089        let node1_id = Node::create(conn, &sequence1.hash, &HashId::convert_str("1"));
1090        let edge5 = Edge::create(
1091            conn,
1092            node1_id,
1093            8,
1094            Strand::Reverse,
1095            PATH_END_NODE_ID,
1096            0,
1097            Strand::Reverse,
1098        );
1099        let sequence2 = Sequence::new()
1100            .sequence_type("DNA")
1101            .sequence("AAAAAAAA")
1102            .save(conn);
1103        let node2_id = Node::create(conn, &sequence2.hash, &HashId::convert_str("2"));
1104        let edge4 = Edge::create(
1105            conn,
1106            node2_id,
1107            7,
1108            Strand::Reverse,
1109            node1_id,
1110            0,
1111            Strand::Reverse,
1112        );
1113        let sequence3 = Sequence::new()
1114            .sequence_type("DNA")
1115            .sequence("CCCCCCCC")
1116            .save(conn);
1117        let node3_id = Node::create(conn, &sequence3.hash, &HashId::convert_str("3"));
1118        let edge3 = Edge::create(
1119            conn,
1120            node3_id,
1121            7,
1122            Strand::Reverse,
1123            node2_id,
1124            0,
1125            Strand::Reverse,
1126        );
1127        let sequence4 = Sequence::new()
1128            .sequence_type("DNA")
1129            .sequence("GGGGGGGG")
1130            .save(conn);
1131        let node4_id = Node::create(conn, &sequence4.hash, &HashId::convert_str("4"));
1132        let edge2 = Edge::create(
1133            conn,
1134            node4_id,
1135            7,
1136            Strand::Reverse,
1137            node3_id,
1138            0,
1139            Strand::Reverse,
1140        );
1141        let edge1 = Edge::create(
1142            conn,
1143            PATH_START_NODE_ID,
1144            -1,
1145            Strand::Reverse,
1146            node4_id,
1147            0,
1148            Strand::Reverse,
1149        );
1150
1151        let edge_ids = vec![edge1.id, edge2.id, edge3.id, edge4.id, edge5.id];
1152        let block_group_edges = edge_ids
1153            .iter()
1154            .map(|edge_id| BlockGroupEdgeData {
1155                block_group_id: block_group.id,
1156                edge_id: *edge_id,
1157                chromosome_index: 0,
1158                phased: 0,
1159            })
1160            .collect::<Vec<BlockGroupEdgeData>>();
1161        BlockGroupEdge::bulk_create(conn, &block_group_edges);
1162
1163        let path = Path::create(conn, "chr1", &block_group.id, &edge_ids);
1164        assert_eq!(path.sequence(conn), "CCCCCCCGGGGGGGTTTTTTTCGATCGAT");
1165    }
1166
1167    #[test]
1168    fn test_reverse_complement() {
1169        assert_eq!(revcomp("ATCCGG"), "CCGGAT");
1170        assert_eq!(revcomp("CNNNNA"), "TNNNNG");
1171        assert_eq!(revcomp("cNNgnAt"), "aTncNNg");
1172    }
1173
1174    #[test]
1175    fn test_intervaltree() {
1176        let conn = &get_connection(None).unwrap();
1177        Collection::create(conn, "test collection");
1178        let block_group = BlockGroup::create(conn, "test collection", None, "test block group");
1179        let sequence1 = Sequence::new()
1180            .sequence_type("DNA")
1181            .sequence("ATCGATCG")
1182            .save(conn);
1183        let node1_id = Node::create(conn, &sequence1.hash, &HashId::convert_str("1"));
1184        let edge1 = Edge::create(
1185            conn,
1186            PATH_START_NODE_ID,
1187            -1,
1188            Strand::Forward,
1189            node1_id,
1190            0,
1191            Strand::Forward,
1192        );
1193        let sequence2 = Sequence::new()
1194            .sequence_type("DNA")
1195            .sequence("AAAAAAAA")
1196            .save(conn);
1197        let node2_id = Node::create(conn, &sequence2.hash, &HashId::convert_str("2"));
1198        let edge2 = Edge::create(
1199            conn,
1200            node1_id,
1201            8,
1202            Strand::Forward,
1203            node2_id,
1204            1,
1205            Strand::Forward,
1206        );
1207        let sequence3 = Sequence::new()
1208            .sequence_type("DNA")
1209            .sequence("CCCCCCCC")
1210            .save(conn);
1211        let node3_id = Node::create(conn, &sequence3.hash, &HashId::convert_str("3"));
1212        let edge3 = Edge::create(
1213            conn,
1214            node2_id,
1215            8,
1216            Strand::Forward,
1217            node3_id,
1218            1,
1219            Strand::Forward,
1220        );
1221        let sequence4 = Sequence::new()
1222            .sequence_type("DNA")
1223            .sequence("GGGGGGGG")
1224            .save(conn);
1225        let node4_id = Node::create(conn, &sequence4.hash, &HashId::convert_str("4"));
1226        let edge4 = Edge::create(
1227            conn,
1228            node3_id,
1229            8,
1230            Strand::Forward,
1231            node4_id,
1232            1,
1233            Strand::Forward,
1234        );
1235        let edge5 = Edge::create(
1236            conn,
1237            node4_id,
1238            8,
1239            Strand::Forward,
1240            PATH_END_NODE_ID,
1241            -1,
1242            Strand::Forward,
1243        );
1244
1245        let edge_ids = vec![edge1.id, edge2.id, edge3.id, edge4.id, edge5.id];
1246        let block_group_edges = edge_ids
1247            .iter()
1248            .map(|edge_id| BlockGroupEdgeData {
1249                block_group_id: block_group.id,
1250                edge_id: *edge_id,
1251                chromosome_index: 0,
1252                phased: 0,
1253            })
1254            .collect::<Vec<BlockGroupEdgeData>>();
1255        BlockGroupEdge::bulk_create(conn, &block_group_edges);
1256
1257        let path = Path::create(conn, "chr1", &block_group.id, &edge_ids);
1258        let tree = path.intervaltree(conn);
1259        let blocks1: Vec<NodeIntervalBlock> = tree.query_point(2).map(|x| x.value).collect();
1260        assert_eq!(blocks1.len(), 1);
1261        let block1 = &blocks1[0];
1262        assert_eq!(block1.node_id, node1_id);
1263        assert_eq!(block1.sequence_start, 0);
1264        assert_eq!(block1.sequence_end, 8);
1265        assert_eq!(block1.start, 0);
1266        assert_eq!(block1.end, 8);
1267        assert_eq!(block1.strand, Strand::Forward);
1268
1269        let blocks2: Vec<NodeIntervalBlock> = tree.query_point(12).map(|x| x.value).collect();
1270        assert_eq!(blocks2.len(), 1);
1271        let block2 = &blocks2[0];
1272        assert_eq!(block2.node_id, node2_id);
1273        assert_eq!(block2.sequence_start, 1);
1274        assert_eq!(block2.sequence_end, 8);
1275        assert_eq!(block2.start, 8);
1276        assert_eq!(block2.end, 15);
1277        assert_eq!(block2.strand, Strand::Forward);
1278
1279        let blocks4: Vec<NodeIntervalBlock> = tree.query_point(25).map(|x| x.value).collect();
1280        assert_eq!(blocks4.len(), 1);
1281        let block4 = &blocks4[0];
1282        assert_eq!(block4.node_id, node4_id);
1283        assert_eq!(block4.sequence_start, 1);
1284        assert_eq!(block4.sequence_end, 8);
1285        assert_eq!(block4.start, 22);
1286        assert_eq!(block4.end, 29);
1287        assert_eq!(block4.strand, Strand::Forward);
1288    }
1289
1290    #[test]
1291    fn test_gets_sequence_with_edges_into_node_middles() {
1292        // Tests that if the edge from the virtual start node goes into the middle of the first
1293        // node, and the edge to the virtual end node comes from the middle of the last node, the
1294        // sequence is correctly generated
1295        let conn = &get_connection(None).unwrap();
1296        Collection::create(conn, "test collection");
1297        let block_group = BlockGroup::create(conn, "test collection", None, "test block group");
1298        let sequence1 = Sequence::new()
1299            .sequence_type("DNA")
1300            .sequence("ATCGATCG")
1301            .save(conn);
1302        let node1_id = Node::create(conn, &sequence1.hash, &HashId::convert_str("1"));
1303        let edge1 = Edge::create(
1304            conn,
1305            PATH_START_NODE_ID,
1306            -1,
1307            Strand::Forward,
1308            node1_id,
1309            4,
1310            Strand::Forward,
1311        );
1312        let sequence2 = Sequence::new()
1313            .sequence_type("DNA")
1314            .sequence("AAAAAAAA")
1315            .save(conn);
1316        let node2_id = Node::create(conn, &sequence2.hash, &HashId::convert_str("2"));
1317        let edge2 = Edge::create(
1318            conn,
1319            node1_id,
1320            8,
1321            Strand::Forward,
1322            node2_id,
1323            0,
1324            Strand::Forward,
1325        );
1326        let sequence3 = Sequence::new()
1327            .sequence_type("DNA")
1328            .sequence("CCCCCCCC")
1329            .save(conn);
1330        let node3_id = Node::create(conn, &sequence3.hash, &HashId::convert_str("3"));
1331        let edge3 = Edge::create(
1332            conn,
1333            node2_id,
1334            8,
1335            Strand::Forward,
1336            node3_id,
1337            0,
1338            Strand::Forward,
1339        );
1340        let sequence4 = Sequence::new()
1341            .sequence_type("DNA")
1342            .sequence("GGGGGGGG")
1343            .save(conn);
1344        let node4_id = Node::create(conn, &sequence4.hash, &HashId::convert_str("4"));
1345        let edge4 = Edge::create(
1346            conn,
1347            node3_id,
1348            8,
1349            Strand::Forward,
1350            node4_id,
1351            0,
1352            Strand::Forward,
1353        );
1354        let edge5 = Edge::create(
1355            conn,
1356            node4_id,
1357            4,
1358            Strand::Forward,
1359            PATH_END_NODE_ID,
1360            -1,
1361            Strand::Forward,
1362        );
1363
1364        let edge_ids = vec![edge1.id, edge2.id, edge3.id, edge4.id, edge5.id];
1365        let block_group_edges = edge_ids
1366            .iter()
1367            .map(|edge_id| BlockGroupEdgeData {
1368                block_group_id: block_group.id,
1369                edge_id: *edge_id,
1370                chromosome_index: 0,
1371                phased: 0,
1372            })
1373            .collect::<Vec<BlockGroupEdgeData>>();
1374        BlockGroupEdge::bulk_create(conn, &block_group_edges);
1375
1376        let path = Path::create(conn, "chr1", &block_group.id, &edge_ids);
1377        let tree = path.intervaltree(conn);
1378        let blocks1: Vec<NodeIntervalBlock> = tree.query_point(2).map(|x| x.value).collect();
1379        assert_eq!(blocks1.len(), 1);
1380        let block1 = &blocks1[0];
1381        assert_eq!(block1.node_id, node1_id);
1382        assert_eq!(block1.sequence_start, 4);
1383        assert_eq!(block1.sequence_end, 8);
1384        assert_eq!(block1.start, 0);
1385        assert_eq!(block1.end, 4);
1386        assert_eq!(block1.strand, Strand::Forward);
1387
1388        let blocks2: Vec<NodeIntervalBlock> = tree.query_point(10).map(|x| x.value).collect();
1389        assert_eq!(blocks2.len(), 1);
1390        let block2 = &blocks2[0];
1391        assert_eq!(block2.node_id, node2_id);
1392        assert_eq!(block2.sequence_start, 0);
1393        assert_eq!(block2.sequence_end, 8);
1394        assert_eq!(block2.start, 4);
1395        assert_eq!(block2.end, 12);
1396        assert_eq!(block2.strand, Strand::Forward);
1397
1398        let blocks4: Vec<NodeIntervalBlock> = tree.query_point(22).map(|x| x.value).collect();
1399        assert_eq!(blocks4.len(), 1);
1400        let block4 = &blocks4[0];
1401        assert_eq!(block4.node_id, node4_id);
1402        assert_eq!(block4.sequence_start, 0);
1403        assert_eq!(block4.sequence_end, 4);
1404        assert_eq!(block4.start, 20);
1405        assert_eq!(block4.end, 24);
1406        assert_eq!(block4.strand, Strand::Forward);
1407
1408        assert_eq!(path.sequence(conn), "ATCGAAAAAAAACCCCCCCCGGGG");
1409    }
1410
1411    #[test]
1412    fn test_full_block_mapping() {
1413        /*
1414            |--------| path: 1 sequence, (0, 8)
1415            |ATCGATCG|
1416            |--------| Same path: 1 sequence, (0, 8)
1417
1418            Mapping: (0, 8) -> (0, 8)
1419        */
1420        let conn = &get_connection(None).unwrap();
1421        Collection::create(conn, "test collection");
1422        let block_group = BlockGroup::create(conn, "test collection", None, "test block group");
1423        let sequence1 = Sequence::new()
1424            .sequence_type("DNA")
1425            .sequence("ATCGATCG")
1426            .save(conn);
1427        let node1_id = Node::create(conn, &sequence1.hash, &HashId::convert_str("1"));
1428        let edge1 = Edge::create(
1429            conn,
1430            PATH_START_NODE_ID,
1431            -1,
1432            Strand::Forward,
1433            node1_id,
1434            0,
1435            Strand::Forward,
1436        );
1437        let edge2 = Edge::create(
1438            conn,
1439            node1_id,
1440            8,
1441            Strand::Forward,
1442            PATH_END_NODE_ID,
1443            -1,
1444            Strand::Forward,
1445        );
1446
1447        let edge_ids = vec![edge1.id, edge2.id];
1448        let block_group_edges = edge_ids
1449            .iter()
1450            .map(|edge_id| BlockGroupEdgeData {
1451                block_group_id: block_group.id,
1452                edge_id: *edge_id,
1453                chromosome_index: 0,
1454                phased: 0,
1455            })
1456            .collect::<Vec<BlockGroupEdgeData>>();
1457        BlockGroupEdge::bulk_create(conn, &block_group_edges);
1458
1459        let path = Path::create(conn, "chr1", &block_group.id, &edge_ids);
1460
1461        let mappings = path.find_block_mappings(conn, &path);
1462        assert_eq!(mappings.len(), 1);
1463        let mapping = &mappings[0];
1464        assert_eq!(mapping.source_range, mapping.target_range);
1465        assert_eq!(mapping.source_range.start, 0);
1466        assert_eq!(mapping.source_range.end, 8);
1467        assert_eq!(mapping.target_range.start, 0);
1468        assert_eq!(mapping.target_range.end, 8);
1469    }
1470
1471    #[test]
1472    fn test_no_block_mapping_overlap() {
1473        /*
1474            |--------| -> path 1 (one node)
1475            |ATCGATCG| -> sequence
1476
1477            |--------| -> path 2 (one node, totally different sequence)
1478            |TTTTTTTT| -> other sequence
1479
1480            Mappings: empty
1481        */
1482        let conn = &get_connection(None).unwrap();
1483        Collection::create(conn, "test collection");
1484        let block_group = BlockGroup::create(conn, "test collection", None, "test block group");
1485        let sequence1 = Sequence::new()
1486            .sequence_type("DNA")
1487            .sequence("ATCGATCG")
1488            .save(conn);
1489        let node1_id = Node::create(conn, &sequence1.hash, &HashId::convert_str("1"));
1490        let edge1 = Edge::create(
1491            conn,
1492            PATH_START_NODE_ID,
1493            -1,
1494            Strand::Forward,
1495            node1_id,
1496            0,
1497            Strand::Forward,
1498        );
1499        let edge2 = Edge::create(
1500            conn,
1501            node1_id,
1502            8,
1503            Strand::Forward,
1504            PATH_END_NODE_ID,
1505            -1,
1506            Strand::Forward,
1507        );
1508
1509        let edge_ids = vec![edge1.id, edge2.id];
1510        let block_group_edges = edge_ids
1511            .iter()
1512            .map(|edge_id| BlockGroupEdgeData {
1513                block_group_id: block_group.id,
1514                edge_id: *edge_id,
1515                chromosome_index: 0,
1516                phased: 0,
1517            })
1518            .collect::<Vec<BlockGroupEdgeData>>();
1519        BlockGroupEdge::bulk_create(conn, &block_group_edges);
1520        let path1 = Path::create(conn, "chr1", &block_group.id, &edge_ids);
1521
1522        let sequence2 = Sequence::new()
1523            .sequence_type("DNA")
1524            .sequence("TTTTTTTT")
1525            .save(conn);
1526        let node2_id = Node::create(conn, &sequence2.hash, &HashId::convert_str("2"));
1527        let edge3 = Edge::create(
1528            conn,
1529            PATH_START_NODE_ID,
1530            -1,
1531            Strand::Forward,
1532            node2_id,
1533            0,
1534            Strand::Forward,
1535        );
1536        let edge4 = Edge::create(
1537            conn,
1538            node2_id,
1539            8,
1540            Strand::Forward,
1541            PATH_END_NODE_ID,
1542            -1,
1543            Strand::Forward,
1544        );
1545
1546        let edge_ids = vec![edge3.id, edge4.id];
1547        let block_group_edges = edge_ids
1548            .iter()
1549            .map(|edge_id| BlockGroupEdgeData {
1550                block_group_id: block_group.id,
1551                edge_id: *edge_id,
1552                chromosome_index: 0,
1553                phased: 0,
1554            })
1555            .collect::<Vec<BlockGroupEdgeData>>();
1556        BlockGroupEdge::bulk_create(conn, &block_group_edges);
1557
1558        let path2 = Path::create(conn, "chr2", &block_group.id, &edge_ids);
1559
1560        let mappings = path1.find_block_mappings(conn, &path2);
1561        assert_eq!(mappings.len(), 0);
1562    }
1563
1564    #[test]
1565    fn test_partial_overlap_block_mapping() {
1566        /*
1567            path 1 (one node/sequence):
1568            |--------|
1569            |ATCGATCG| -> sequence (0, 8)
1570
1571            path 2:
1572            |----| -> (0, 4)
1573                |--------| -> (4, 12)
1574            |ATCG| -> shared with path 1
1575                |TTTTTTTT| -> unrelated sequence
1576
1577            Mapping: (0, 4) -> (0, 4)
1578        */
1579        let conn = &get_connection(None).unwrap();
1580        Collection::create(conn, "test collection");
1581        let block_group = BlockGroup::create(conn, "test collection", None, "test block group");
1582        let sequence1 = Sequence::new()
1583            .sequence_type("DNA")
1584            .sequence("ATCGATCG")
1585            .save(conn);
1586        let node1_id = Node::create(conn, &sequence1.hash, &HashId::convert_str("1"));
1587        let edge1 = Edge::create(
1588            conn,
1589            PATH_START_NODE_ID,
1590            -1,
1591            Strand::Forward,
1592            node1_id,
1593            0,
1594            Strand::Forward,
1595        );
1596        let edge2 = Edge::create(
1597            conn,
1598            node1_id,
1599            8,
1600            Strand::Forward,
1601            PATH_END_NODE_ID,
1602            -1,
1603            Strand::Forward,
1604        );
1605
1606        let edge_ids = vec![edge1.id, edge2.id];
1607        let block_group_edges = edge_ids
1608            .iter()
1609            .map(|edge_id| BlockGroupEdgeData {
1610                block_group_id: block_group.id,
1611                edge_id: *edge_id,
1612                chromosome_index: 0,
1613                phased: 0,
1614            })
1615            .collect::<Vec<BlockGroupEdgeData>>();
1616        BlockGroupEdge::bulk_create(conn, &block_group_edges);
1617
1618        let path1 = Path::create(conn, "chr1", &block_group.id, &edge_ids);
1619
1620        let sequence2 = Sequence::new()
1621            .sequence_type("DNA")
1622            .sequence("TTTTTTTT")
1623            .save(conn);
1624        let node2_id = Node::create(conn, &sequence2.hash, &HashId::convert_str("2"));
1625        let edge3 = Edge::create(
1626            conn,
1627            PATH_START_NODE_ID,
1628            -1,
1629            Strand::Forward,
1630            node1_id,
1631            0,
1632            Strand::Forward,
1633        );
1634        let edge4 = Edge::create(
1635            conn,
1636            node1_id,
1637            4,
1638            Strand::Forward,
1639            node2_id,
1640            0,
1641            Strand::Forward,
1642        );
1643        let edge5 = Edge::create(
1644            conn,
1645            node2_id,
1646            8,
1647            Strand::Forward,
1648            PATH_END_NODE_ID,
1649            -1,
1650            Strand::Forward,
1651        );
1652
1653        let edge_ids = vec![edge3.id, edge4.id, edge5.id];
1654        let block_group_edges = edge_ids
1655            .iter()
1656            .map(|edge_id| BlockGroupEdgeData {
1657                block_group_id: block_group.id,
1658                edge_id: *edge_id,
1659                chromosome_index: 0,
1660                phased: 0,
1661            })
1662            .collect::<Vec<BlockGroupEdgeData>>();
1663        BlockGroupEdge::bulk_create(conn, &block_group_edges);
1664
1665        let path2 = Path::create(conn, "chr2", &block_group.id, &edge_ids);
1666
1667        assert_eq!(path2.sequence(conn), "ATCGTTTTTTTT");
1668
1669        let mappings = path1.find_block_mappings(conn, &path2);
1670        assert_eq!(mappings.len(), 1);
1671        let mapping = &mappings[0];
1672        assert_eq!(mapping.source_range, mapping.target_range);
1673        assert_eq!(mapping.source_range.start, 0);
1674        assert_eq!(mapping.source_range.end, 4);
1675        assert_eq!(mapping.target_range.start, 0);
1676        assert_eq!(mapping.target_range.end, 4);
1677    }
1678
1679    #[test]
1680    fn test_insertion_block_mapping() {
1681        /*
1682            path 1 (one node/sequence):
1683            |ATCGATCG| -> sequence (0, 8)
1684
1685            path 2:	Mimics a pure insertion
1686            |ATCG| -> (0, 4) shared with first half of path 1
1687                |TTTTTTTT| -> (4, 12) unrelated sequence
1688                        |ATCG| -> (12, 16) shared with second half of path 1
1689
1690            Mappings:
1691            (0, 4) -> (0, 4)
1692            (4, 8) -> (12, 16)
1693        */
1694        let conn = &get_connection(None).unwrap();
1695        Collection::create(conn, "test collection");
1696        let block_group = BlockGroup::create(conn, "test collection", None, "test block group");
1697        let sequence1 = Sequence::new()
1698            .sequence_type("DNA")
1699            .sequence("ATCGATCG")
1700            .save(conn);
1701        let node1_id = Node::create(conn, &sequence1.hash, &HashId::convert_str("1"));
1702        let edge1 = Edge::create(
1703            conn,
1704            PATH_START_NODE_ID,
1705            -1,
1706            Strand::Forward,
1707            node1_id,
1708            0,
1709            Strand::Forward,
1710        );
1711        let edge2 = Edge::create(
1712            conn,
1713            node1_id,
1714            8,
1715            Strand::Forward,
1716            PATH_END_NODE_ID,
1717            -1,
1718            Strand::Forward,
1719        );
1720
1721        let edge_ids = vec![edge1.id, edge2.id];
1722        let block_group_edges = edge_ids
1723            .iter()
1724            .map(|edge_id| BlockGroupEdgeData {
1725                block_group_id: block_group.id,
1726                edge_id: *edge_id,
1727                chromosome_index: 0,
1728                phased: 0,
1729            })
1730            .collect::<Vec<BlockGroupEdgeData>>();
1731        BlockGroupEdge::bulk_create(conn, &block_group_edges);
1732
1733        let path1 = Path::create(conn, "chr1", &block_group.id, &edge_ids);
1734
1735        let sequence2 = Sequence::new()
1736            .sequence_type("DNA")
1737            .sequence("TTTTTTTT")
1738            .save(conn);
1739        let node2_id = Node::create(conn, &sequence2.hash, &HashId::convert_str("2"));
1740        let edge4 = Edge::create(
1741            conn,
1742            node1_id,
1743            4,
1744            Strand::Forward,
1745            node2_id,
1746            0,
1747            Strand::Forward,
1748        );
1749        let edge5 = Edge::create(
1750            conn,
1751            node2_id,
1752            8,
1753            Strand::Forward,
1754            node1_id,
1755            4,
1756            Strand::Forward,
1757        );
1758
1759        let edge_ids = [edge4.id, edge5.id];
1760        let block_group_edges = edge_ids
1761            .iter()
1762            .map(|edge_id| BlockGroupEdgeData {
1763                block_group_id: block_group.id,
1764                edge_id: *edge_id,
1765                chromosome_index: 0,
1766                phased: 0,
1767            })
1768            .collect::<Vec<BlockGroupEdgeData>>();
1769        BlockGroupEdge::bulk_create(conn, &block_group_edges);
1770
1771        let path2 = Path::create(
1772            conn,
1773            "chr2",
1774            &block_group.id,
1775            &[edge1.id, edge4.id, edge5.id, edge2.id],
1776        );
1777
1778        assert_eq!(path2.sequence(conn), "ATCGTTTTTTTTATCG");
1779
1780        let mappings = path1.find_block_mappings(conn, &path2);
1781        assert_eq!(mappings.len(), 2);
1782        let mapping1 = &mappings[0];
1783        assert_eq!(mapping1.source_range, mapping1.target_range);
1784        assert_eq!(mapping1.source_range.start, 0);
1785        assert_eq!(mapping1.source_range.end, 4);
1786        assert_eq!(mapping1.target_range.start, 0);
1787        assert_eq!(mapping1.target_range.end, 4);
1788
1789        let mapping2 = &mappings[1];
1790        assert_eq!(mapping2.source_range.start, 4);
1791        assert_eq!(mapping2.source_range.end, 8);
1792        assert_eq!(mapping2.target_range.start, 12);
1793        assert_eq!(mapping2.target_range.end, 16);
1794    }
1795
1796    #[test]
1797    fn test_replacement_block_mapping() {
1798        /*
1799            path 1 (one node/sequence):
1800            |ATCGATCG| -> sequence (0, 8)
1801
1802            path 2:	Mimics a replacement
1803            |AT| -> (0, 2) shared with first two bp of path 1
1804              |TTTTTTTT| -> (2, 10) unrelated sequence
1805                      |CG| -> (10, 12) shared with last 2 bp of path 1
1806
1807            Mappings:
1808            (0, 2) -> (0, 2)
1809            (6, 8) -> (10, 12)
1810        */
1811        let conn = &get_connection(None).unwrap();
1812        Collection::create(conn, "test collection");
1813        let block_group = BlockGroup::create(conn, "test collection", None, "test block group");
1814        let sequence1 = Sequence::new()
1815            .sequence_type("DNA")
1816            .sequence("ATCGATCG")
1817            .save(conn);
1818        let node1_id = Node::create(conn, &sequence1.hash, &HashId::convert_str("1"));
1819        let edge1 = Edge::create(
1820            conn,
1821            PATH_START_NODE_ID,
1822            -1,
1823            Strand::Forward,
1824            node1_id,
1825            0,
1826            Strand::Forward,
1827        );
1828        let edge2 = Edge::create(
1829            conn,
1830            node1_id,
1831            8,
1832            Strand::Forward,
1833            PATH_END_NODE_ID,
1834            -1,
1835            Strand::Forward,
1836        );
1837
1838        let edge_ids = [edge1.id, edge2.id];
1839        let block_group_edges = edge_ids
1840            .iter()
1841            .map(|edge_id| BlockGroupEdgeData {
1842                block_group_id: block_group.id,
1843                edge_id: *edge_id,
1844                chromosome_index: 0,
1845                phased: 0,
1846            })
1847            .collect::<Vec<BlockGroupEdgeData>>();
1848        BlockGroupEdge::bulk_create(conn, &block_group_edges);
1849
1850        let path1 = Path::create(conn, "chr1", &block_group.id, &edge_ids);
1851
1852        let sequence2 = Sequence::new()
1853            .sequence_type("DNA")
1854            .sequence("TTTTTTTT")
1855            .save(conn);
1856        let node2_id = Node::create(conn, &sequence2.hash, &HashId::convert_str("2"));
1857        let edge4 = Edge::create(
1858            conn,
1859            node1_id,
1860            2,
1861            Strand::Forward,
1862            node2_id,
1863            0,
1864            Strand::Forward,
1865        );
1866        let edge5 = Edge::create(
1867            conn,
1868            node2_id,
1869            8,
1870            Strand::Forward,
1871            node1_id,
1872            6,
1873            Strand::Forward,
1874        );
1875
1876        let edge_ids = [edge4.id, edge5.id];
1877        let block_group_edges = edge_ids
1878            .iter()
1879            .map(|edge_id| BlockGroupEdgeData {
1880                block_group_id: block_group.id,
1881                edge_id: *edge_id,
1882                chromosome_index: 0,
1883                phased: 0,
1884            })
1885            .collect::<Vec<BlockGroupEdgeData>>();
1886        BlockGroupEdge::bulk_create(conn, &block_group_edges);
1887
1888        let path2 = Path::create(
1889            conn,
1890            "chr2",
1891            &block_group.id,
1892            &[edge1.id, edge4.id, edge5.id, edge2.id],
1893        );
1894
1895        assert_eq!(path2.sequence(conn), "ATTTTTTTTTCG");
1896
1897        let mappings = path1.find_block_mappings(conn, &path2);
1898        assert_eq!(mappings.len(), 2);
1899        let mapping1 = &mappings[0];
1900        assert_eq!(mapping1.source_range, mapping1.target_range);
1901        assert_eq!(mapping1.source_range.start, 0);
1902        assert_eq!(mapping1.source_range.end, 2);
1903        assert_eq!(mapping1.target_range.start, 0);
1904        assert_eq!(mapping1.target_range.end, 2);
1905
1906        let mapping2 = &mappings[1];
1907        assert_eq!(mapping2.source_range.start, 6);
1908        assert_eq!(mapping2.source_range.end, 8);
1909        assert_eq!(mapping2.target_range.start, 10);
1910        assert_eq!(mapping2.target_range.end, 12);
1911    }
1912
1913    #[test]
1914    fn test_deletion_block_mapping() {
1915        /*
1916            path 1 (one node/sequence):
1917            |ATCGATCG| -> sequence (0, 8)
1918
1919            path 2: Mimics a pure deletion
1920            |AT| -> (0, 2) shared with first two bp of path 1
1921              |CG| -> (2, 4) shared with last 2 bp of path 1
1922
1923            Mappings:
1924            (0, 2) -> (0, 2)
1925            (6, 8) -> (2, 4)
1926        */
1927        let conn = &get_connection(None).unwrap();
1928        Collection::create(conn, "test collection");
1929        let block_group = BlockGroup::create(conn, "test collection", None, "test block group");
1930        let sequence1 = Sequence::new()
1931            .sequence_type("DNA")
1932            .sequence("ATCGATCG")
1933            .save(conn);
1934        let node1_id = Node::create(conn, &sequence1.hash, &HashId::convert_str("1"));
1935        let edge1 = Edge::create(
1936            conn,
1937            PATH_START_NODE_ID,
1938            -1,
1939            Strand::Forward,
1940            node1_id,
1941            0,
1942            Strand::Forward,
1943        );
1944        let edge2 = Edge::create(
1945            conn,
1946            node1_id,
1947            8,
1948            Strand::Forward,
1949            PATH_END_NODE_ID,
1950            -1,
1951            Strand::Forward,
1952        );
1953
1954        let edge_ids = [edge1.id, edge2.id];
1955        let block_group_edges = edge_ids
1956            .iter()
1957            .map(|edge_id| BlockGroupEdgeData {
1958                block_group_id: block_group.id,
1959                edge_id: *edge_id,
1960                chromosome_index: 0,
1961                phased: 0,
1962            })
1963            .collect::<Vec<BlockGroupEdgeData>>();
1964        BlockGroupEdge::bulk_create(conn, &block_group_edges);
1965
1966        let path1 = Path::create(conn, "chr1", &block_group.id, &edge_ids);
1967
1968        let edge4 = Edge::create(
1969            conn,
1970            node1_id,
1971            2,
1972            Strand::Forward,
1973            node1_id,
1974            6,
1975            Strand::Forward,
1976        );
1977
1978        let edge_ids = [edge4.id];
1979        let block_group_edges = edge_ids
1980            .iter()
1981            .map(|edge_id| BlockGroupEdgeData {
1982                block_group_id: block_group.id,
1983                edge_id: *edge_id,
1984                chromosome_index: 0,
1985                phased: 0,
1986            })
1987            .collect::<Vec<BlockGroupEdgeData>>();
1988        BlockGroupEdge::bulk_create(conn, &block_group_edges);
1989
1990        let path2 = Path::create(
1991            conn,
1992            "chr2",
1993            &block_group.id,
1994            &[edge1.id, edge4.id, edge2.id],
1995        );
1996
1997        assert_eq!(path2.sequence(conn), "ATCG");
1998
1999        let mappings = path1.find_block_mappings(conn, &path2);
2000        assert_eq!(mappings.len(), 2);
2001        let mapping1 = &mappings[0];
2002        assert_eq!(mapping1.source_range, mapping1.target_range);
2003        assert_eq!(mapping1.source_range.start, 0);
2004        assert_eq!(mapping1.source_range.end, 2);
2005        assert_eq!(mapping1.target_range.start, 0);
2006        assert_eq!(mapping1.target_range.end, 2);
2007
2008        let mapping2 = &mappings[1];
2009        assert_eq!(mapping2.source_range.start, 6);
2010        assert_eq!(mapping2.source_range.end, 8);
2011        assert_eq!(mapping2.target_range.start, 2);
2012        assert_eq!(mapping2.target_range.end, 4);
2013    }
2014
2015    #[test]
2016    fn test_two_block_insertion_mapping() {
2017        /*
2018            path 1 (two nodes/sequences):
2019            |ATCGATCG| -> sequence (0, 8)
2020                    |TTTTTTTT| -> sequence (8, 16)
2021
2022            path 2: Mimics a pure insertion in the middle of the two blocks
2023            |ATCGATCG| -> sequence (0, 8)
2024                    |AAAAAAAA| -> sequence (8, 16)
2025                            |TTTTTTTT| -> sequence (16, 24)
2026
2027            Mappings:
2028            (0, 8) -> (0, 8)
2029            (8, 16) -> (16, 24)
2030        */
2031        let conn = &get_connection(None).unwrap();
2032        Collection::create(conn, "test collection");
2033        let block_group = BlockGroup::create(conn, "test collection", None, "test block group");
2034        let sequence1 = Sequence::new()
2035            .sequence_type("DNA")
2036            .sequence("ATCGATCG")
2037            .save(conn);
2038        let node1_id = Node::create(conn, &sequence1.hash, &HashId::convert_str("1"));
2039        let sequence2 = Sequence::new()
2040            .sequence_type("DNA")
2041            .sequence("TTTTTTTT")
2042            .save(conn);
2043        let node2_id = Node::create(conn, &sequence2.hash, &HashId::convert_str("2"));
2044        let edge1 = Edge::create(
2045            conn,
2046            PATH_START_NODE_ID,
2047            -1,
2048            Strand::Forward,
2049            node1_id,
2050            0,
2051            Strand::Forward,
2052        );
2053        let edge2 = Edge::create(
2054            conn,
2055            node1_id,
2056            8,
2057            Strand::Forward,
2058            node2_id,
2059            0,
2060            Strand::Forward,
2061        );
2062        let edge3 = Edge::create(
2063            conn,
2064            node2_id,
2065            8,
2066            Strand::Forward,
2067            PATH_END_NODE_ID,
2068            -1,
2069            Strand::Forward,
2070        );
2071
2072        let edge_ids = [edge1.id, edge2.id, edge3.id];
2073        let block_group_edges = edge_ids
2074            .iter()
2075            .map(|edge_id| BlockGroupEdgeData {
2076                block_group_id: block_group.id,
2077                edge_id: *edge_id,
2078                chromosome_index: 0,
2079                phased: 0,
2080            })
2081            .collect::<Vec<BlockGroupEdgeData>>();
2082        BlockGroupEdge::bulk_create(conn, &block_group_edges);
2083
2084        let path1 = Path::create(conn, "chr1", &block_group.id, &edge_ids);
2085
2086        let sequence3 = Sequence::new()
2087            .sequence_type("DNA")
2088            .sequence("AAAAAAAA")
2089            .save(conn);
2090        let node3_id = Node::create(conn, &sequence3.hash, &HashId::convert_str("3"));
2091        let edge4 = Edge::create(
2092            conn,
2093            node1_id,
2094            8,
2095            Strand::Forward,
2096            node3_id,
2097            0,
2098            Strand::Forward,
2099        );
2100        let edge5 = Edge::create(
2101            conn,
2102            node3_id,
2103            8,
2104            Strand::Forward,
2105            node2_id,
2106            0,
2107            Strand::Forward,
2108        );
2109
2110        let edge_ids = [edge4.id, edge5.id];
2111        let block_group_edges = edge_ids
2112            .iter()
2113            .map(|edge_id| BlockGroupEdgeData {
2114                block_group_id: block_group.id,
2115                edge_id: *edge_id,
2116                chromosome_index: 0,
2117                phased: 0,
2118            })
2119            .collect::<Vec<BlockGroupEdgeData>>();
2120        BlockGroupEdge::bulk_create(conn, &block_group_edges);
2121
2122        let path2 = Path::create(
2123            conn,
2124            "chr2",
2125            &block_group.id,
2126            &[edge1.id, edge4.id, edge5.id, edge3.id],
2127        );
2128
2129        assert_eq!(path2.sequence(conn), "ATCGATCGAAAAAAAATTTTTTTT");
2130
2131        let mappings = path1.find_block_mappings(conn, &path2);
2132        assert_eq!(mappings.len(), 2);
2133        let mapping1 = &mappings[0];
2134        assert_eq!(mapping1.source_range, mapping1.target_range);
2135        assert_eq!(mapping1.source_range.start, 0);
2136        assert_eq!(mapping1.source_range.end, 8);
2137        assert_eq!(mapping1.target_range.start, 0);
2138        assert_eq!(mapping1.target_range.end, 8);
2139
2140        let mapping2 = &mappings[1];
2141        assert_eq!(mapping2.source_range.start, 8);
2142        assert_eq!(mapping2.source_range.end, 16);
2143        assert_eq!(mapping2.target_range.start, 16);
2144        assert_eq!(mapping2.target_range.end, 24);
2145    }
2146
2147    #[test]
2148    fn test_two_block_replacement_mapping() {
2149        /*
2150            path 1 (two nodes/sequences):
2151            |ATCGATCG| -> sequence (0, 8)
2152                    |TTTTTTTT| -> sequence (8, 16)
2153
2154            path 2: Mimics a replacement across the two blocks
2155            |ATCG| -> sequence (0, 4)
2156                |AAAAAAAA| -> sequence (4, 12)
2157                        |TTTT| -> sequence (12, 16)
2158
2159            Mappings:
2160            (0, 4) -> (0, 4)
2161            (12, 16) -> (12, 16)
2162        */
2163        let conn = &get_connection(None).unwrap();
2164        Collection::create(conn, "test collection");
2165        let block_group = BlockGroup::create(conn, "test collection", None, "test block group");
2166        let sequence1 = Sequence::new()
2167            .sequence_type("DNA")
2168            .sequence("ATCGATCG")
2169            .save(conn);
2170        let node1_id = Node::create(conn, &sequence1.hash, &HashId::convert_str("1"));
2171        let sequence2 = Sequence::new()
2172            .sequence_type("DNA")
2173            .sequence("TTTTTTTT")
2174            .save(conn);
2175        let node2_id = Node::create(conn, &sequence2.hash, &HashId::convert_str("2"));
2176        let edge1 = Edge::create(
2177            conn,
2178            PATH_START_NODE_ID,
2179            -1,
2180            Strand::Forward,
2181            node1_id,
2182            0,
2183            Strand::Forward,
2184        );
2185        let edge2 = Edge::create(
2186            conn,
2187            node1_id,
2188            8,
2189            Strand::Forward,
2190            node2_id,
2191            0,
2192            Strand::Forward,
2193        );
2194        let edge3 = Edge::create(
2195            conn,
2196            node2_id,
2197            8,
2198            Strand::Forward,
2199            PATH_END_NODE_ID,
2200            -1,
2201            Strand::Forward,
2202        );
2203
2204        let edge_ids = [edge1.id, edge2.id, edge3.id];
2205        let block_group_edges = edge_ids
2206            .iter()
2207            .map(|edge_id| BlockGroupEdgeData {
2208                block_group_id: block_group.id,
2209                edge_id: *edge_id,
2210                chromosome_index: 0,
2211                phased: 0,
2212            })
2213            .collect::<Vec<BlockGroupEdgeData>>();
2214        BlockGroupEdge::bulk_create(conn, &block_group_edges);
2215
2216        let path1 = Path::create(conn, "chr1", &block_group.id, &edge_ids);
2217
2218        let sequence3 = Sequence::new()
2219            .sequence_type("DNA")
2220            .sequence("AAAAAAAA")
2221            .save(conn);
2222        let node3_id = Node::create(conn, &sequence3.hash, &HashId::convert_str("3"));
2223        let edge4 = Edge::create(
2224            conn,
2225            node1_id,
2226            4,
2227            Strand::Forward,
2228            node3_id,
2229            0,
2230            Strand::Forward,
2231        );
2232        let edge5 = Edge::create(
2233            conn,
2234            node3_id,
2235            8,
2236            Strand::Forward,
2237            node2_id,
2238            4,
2239            Strand::Forward,
2240        );
2241
2242        let edge_ids = [edge4.id, edge5.id];
2243        let block_group_edges = edge_ids
2244            .iter()
2245            .map(|edge_id| BlockGroupEdgeData {
2246                block_group_id: block_group.id,
2247                edge_id: *edge_id,
2248                chromosome_index: 0,
2249                phased: 0,
2250            })
2251            .collect::<Vec<BlockGroupEdgeData>>();
2252        BlockGroupEdge::bulk_create(conn, &block_group_edges);
2253
2254        let path2 = Path::create(
2255            conn,
2256            "chr2",
2257            &block_group.id,
2258            &[edge1.id, edge4.id, edge5.id, edge3.id],
2259        );
2260
2261        assert_eq!(path2.sequence(conn), "ATCGAAAAAAAATTTT");
2262
2263        let mappings = path1.find_block_mappings(conn, &path2);
2264        assert_eq!(mappings.len(), 2);
2265        let mapping1 = &mappings[0];
2266        assert_eq!(mapping1.source_range, mapping1.target_range);
2267        assert_eq!(mapping1.source_range.start, 0);
2268        assert_eq!(mapping1.source_range.end, 4);
2269        assert_eq!(mapping1.target_range.start, 0);
2270        assert_eq!(mapping1.target_range.end, 4);
2271
2272        let mapping2 = &mappings[1];
2273        assert_eq!(mapping2.source_range.start, 12);
2274        assert_eq!(mapping2.source_range.end, 16);
2275        assert_eq!(mapping2.target_range.start, 12);
2276        assert_eq!(mapping2.target_range.end, 16);
2277    }
2278
2279    #[test]
2280    fn test_two_block_deletion_mapping() {
2281        /*
2282            path 1 (two nodes/sequences):
2283            |ATCGATCG| -> sequence (0, 8)
2284                    |TTTTTTTT| -> sequence (8, 16)
2285
2286            path 2: Mimics a deletion across the two blocks
2287            |ATCG| -> sequence (0, 4)
2288                |TTTT| -> sequence (4, 8)
2289
2290            Mappings:
2291            (0, 4) -> (0, 4)
2292            (12, 16) -> (4, 8)
2293        */
2294        let conn = &get_connection(None).unwrap();
2295        Collection::create(conn, "test collection");
2296        let block_group = BlockGroup::create(conn, "test collection", None, "test block group");
2297        let sequence1 = Sequence::new()
2298            .sequence_type("DNA")
2299            .sequence("ATCGATCG")
2300            .save(conn);
2301        let node1_id = Node::create(conn, &sequence1.hash, &HashId::convert_str("1"));
2302        let sequence2 = Sequence::new()
2303            .sequence_type("DNA")
2304            .sequence("TTTTTTTT")
2305            .save(conn);
2306        let node2_id = Node::create(conn, &sequence2.hash, &HashId::convert_str("2"));
2307        let edge1 = Edge::create(
2308            conn,
2309            PATH_START_NODE_ID,
2310            -1,
2311            Strand::Forward,
2312            node1_id,
2313            0,
2314            Strand::Forward,
2315        );
2316        let edge2 = Edge::create(
2317            conn,
2318            node1_id,
2319            8,
2320            Strand::Forward,
2321            node2_id,
2322            0,
2323            Strand::Forward,
2324        );
2325        let edge3 = Edge::create(
2326            conn,
2327            node2_id,
2328            8,
2329            Strand::Forward,
2330            PATH_END_NODE_ID,
2331            -1,
2332            Strand::Forward,
2333        );
2334
2335        let edge_ids = [edge1.id, edge2.id, edge3.id];
2336        let block_group_edges = edge_ids
2337            .iter()
2338            .map(|edge_id| BlockGroupEdgeData {
2339                block_group_id: block_group.id,
2340                edge_id: *edge_id,
2341                chromosome_index: 0,
2342                phased: 0,
2343            })
2344            .collect::<Vec<BlockGroupEdgeData>>();
2345        BlockGroupEdge::bulk_create(conn, &block_group_edges);
2346
2347        let path1 = Path::create(conn, "chr1", &block_group.id, &edge_ids);
2348
2349        let edge4 = Edge::create(
2350            conn,
2351            node1_id,
2352            4,
2353            Strand::Forward,
2354            node2_id,
2355            4,
2356            Strand::Forward,
2357        );
2358
2359        let edge_ids = [edge4.id];
2360        let block_group_edges = edge_ids
2361            .iter()
2362            .map(|edge_id| BlockGroupEdgeData {
2363                block_group_id: block_group.id,
2364                edge_id: *edge_id,
2365                chromosome_index: 0,
2366                phased: 0,
2367            })
2368            .collect::<Vec<BlockGroupEdgeData>>();
2369        BlockGroupEdge::bulk_create(conn, &block_group_edges);
2370
2371        let path2 = Path::create(
2372            conn,
2373            "chr2",
2374            &block_group.id,
2375            &[edge1.id, edge4.id, edge3.id],
2376        );
2377
2378        assert_eq!(path2.sequence(conn), "ATCGTTTT");
2379
2380        let mappings = path1.find_block_mappings(conn, &path2);
2381        assert_eq!(mappings.len(), 2);
2382        let mapping1 = &mappings[0];
2383        assert_eq!(mapping1.source_range, mapping1.target_range);
2384        assert_eq!(mapping1.source_range.start, 0);
2385        assert_eq!(mapping1.source_range.end, 4);
2386        assert_eq!(mapping1.target_range.start, 0);
2387        assert_eq!(mapping1.target_range.end, 4);
2388
2389        let mapping2 = &mappings[1];
2390        assert_eq!(mapping2.source_range.start, 12);
2391        assert_eq!(mapping2.source_range.end, 16);
2392        assert_eq!(mapping2.target_range.start, 4);
2393        assert_eq!(mapping2.target_range.end, 8);
2394    }
2395
2396    #[test]
2397    fn test_annotation_propagation_full_overlap() {
2398        /*
2399            |--------| path: 1 sequence, (0, 8)
2400            |ATCGATCG|
2401            |--------| Same path: 1 sequence, (0, 8)
2402
2403            Mapping: (0, 8) -> (0, 8)
2404        */
2405        let conn = &get_connection(None).unwrap();
2406        Collection::create(conn, "test collection");
2407        let block_group = BlockGroup::create(conn, "test collection", None, "test block group");
2408        let sequence1 = Sequence::new()
2409            .sequence_type("DNA")
2410            .sequence("ATCGATCG")
2411            .save(conn);
2412        let node1_id = Node::create(conn, &sequence1.hash, &HashId::convert_str("1"));
2413        let edge1 = Edge::create(
2414            conn,
2415            PATH_START_NODE_ID,
2416            -1,
2417            Strand::Forward,
2418            node1_id,
2419            0,
2420            Strand::Forward,
2421        );
2422        let edge2 = Edge::create(
2423            conn,
2424            node1_id,
2425            8,
2426            Strand::Forward,
2427            PATH_END_NODE_ID,
2428            -1,
2429            Strand::Forward,
2430        );
2431
2432        let edge_ids = [edge1.id, edge2.id];
2433        let block_group_edges = edge_ids
2434            .iter()
2435            .map(|edge_id| BlockGroupEdgeData {
2436                block_group_id: block_group.id,
2437                edge_id: *edge_id,
2438                chromosome_index: 0,
2439                phased: 0,
2440            })
2441            .collect::<Vec<BlockGroupEdgeData>>();
2442        BlockGroupEdge::bulk_create(conn, &block_group_edges);
2443
2444        let path = Path::create(conn, "chr1", &block_group.id, &edge_ids);
2445
2446        let annotation = Annotation {
2447            name: "foo".to_string(),
2448            start: 0,
2449            end: 8,
2450        };
2451        let annotations = path.propagate_annotations(conn, &path, vec![annotation]);
2452        assert_eq!(annotations.len(), 1);
2453        let result_annotation = &annotations[0];
2454        assert_eq!(result_annotation.name, "foo");
2455        assert_eq!(result_annotation.start, 0);
2456        assert_eq!(result_annotation.end, 8);
2457    }
2458
2459    #[test]
2460    fn test_propagate_annotations_no_overlap() {
2461        /*
2462            |--------| -> path 1 (one node)
2463            |ATCGATCG| -> sequence
2464
2465            |--------| -> path 2 (one node, totally different sequence)
2466            |TTTTTTTT| -> other sequence
2467
2468            Mappings: empty
2469        */
2470        let conn = &get_connection(None).unwrap();
2471        Collection::create(conn, "test collection");
2472        let block_group = BlockGroup::create(conn, "test collection", None, "test block group");
2473        let sequence1 = Sequence::new()
2474            .sequence_type("DNA")
2475            .sequence("ATCGATCG")
2476            .save(conn);
2477        let node1_id = Node::create(conn, &sequence1.hash, &HashId::convert_str("1"));
2478        let edge1 = Edge::create(
2479            conn,
2480            PATH_START_NODE_ID,
2481            -1,
2482            Strand::Forward,
2483            node1_id,
2484            0,
2485            Strand::Forward,
2486        );
2487        let edge2 = Edge::create(
2488            conn,
2489            node1_id,
2490            8,
2491            Strand::Forward,
2492            PATH_END_NODE_ID,
2493            -1,
2494            Strand::Forward,
2495        );
2496
2497        let edge_ids = [edge1.id, edge2.id];
2498        let block_group_edges = edge_ids
2499            .iter()
2500            .map(|edge_id| BlockGroupEdgeData {
2501                block_group_id: block_group.id,
2502                edge_id: *edge_id,
2503                chromosome_index: 0,
2504                phased: 0,
2505            })
2506            .collect::<Vec<BlockGroupEdgeData>>();
2507        BlockGroupEdge::bulk_create(conn, &block_group_edges);
2508
2509        let path1 = Path::create(conn, "chr1", &block_group.id, &edge_ids);
2510
2511        let sequence2 = Sequence::new()
2512            .sequence_type("DNA")
2513            .sequence("TTTTTTTT")
2514            .save(conn);
2515        let node2_id = Node::create(conn, &sequence2.hash, &HashId::convert_str("2"));
2516        let edge3 = Edge::create(
2517            conn,
2518            PATH_START_NODE_ID,
2519            -1,
2520            Strand::Forward,
2521            node2_id,
2522            0,
2523            Strand::Forward,
2524        );
2525        let edge4 = Edge::create(
2526            conn,
2527            node2_id,
2528            8,
2529            Strand::Forward,
2530            PATH_END_NODE_ID,
2531            -1,
2532            Strand::Forward,
2533        );
2534
2535        let edge_ids = [edge3.id, edge4.id];
2536        let block_group_edges = edge_ids
2537            .iter()
2538            .map(|edge_id| BlockGroupEdgeData {
2539                block_group_id: block_group.id,
2540                edge_id: *edge_id,
2541                chromosome_index: 0,
2542                phased: 0,
2543            })
2544            .collect::<Vec<BlockGroupEdgeData>>();
2545        BlockGroupEdge::bulk_create(conn, &block_group_edges);
2546
2547        let path2 = Path::create(conn, "chr2", &block_group.id, &edge_ids);
2548
2549        let annotation = Annotation {
2550            name: "foo".to_string(),
2551            start: 0,
2552            end: 8,
2553        };
2554        let annotations = path1.propagate_annotations(conn, &path2, vec![annotation]);
2555        assert_eq!(annotations.len(), 0);
2556    }
2557
2558    #[test]
2559    fn test_propagate_annotations_partial_overlap() {
2560        /*
2561            path 1 (one node/sequence):
2562            |--------|
2563            |ATCGATCG| -> sequence (0, 8)
2564
2565            path 2:
2566            |----| -> (0, 4)
2567                |--------| -> (4, 12)
2568            |ATCG| -> shared with path 1
2569                |TTTTTTTT| -> unrelated sequence
2570
2571            Mapping: (0, 4) -> (0, 4)
2572        */
2573        let conn = &get_connection(None).unwrap();
2574        Collection::create(conn, "test collection");
2575        let block_group = BlockGroup::create(conn, "test collection", None, "test block group");
2576        let sequence1 = Sequence::new()
2577            .sequence_type("DNA")
2578            .sequence("ATCGATCG")
2579            .save(conn);
2580        let node1_id = Node::create(conn, &sequence1.hash, &HashId::convert_str("1"));
2581        let edge1 = Edge::create(
2582            conn,
2583            PATH_START_NODE_ID,
2584            -1,
2585            Strand::Forward,
2586            node1_id,
2587            0,
2588            Strand::Forward,
2589        );
2590        let edge2 = Edge::create(
2591            conn,
2592            node1_id,
2593            8,
2594            Strand::Forward,
2595            PATH_END_NODE_ID,
2596            -1,
2597            Strand::Forward,
2598        );
2599
2600        let edge_ids = vec![edge1.id, edge2.id];
2601        let block_group_edges = edge_ids
2602            .iter()
2603            .map(|edge_id| BlockGroupEdgeData {
2604                block_group_id: block_group.id,
2605                edge_id: *edge_id,
2606                chromosome_index: 0,
2607                phased: 0,
2608            })
2609            .collect::<Vec<BlockGroupEdgeData>>();
2610        BlockGroupEdge::bulk_create(conn, &block_group_edges);
2611
2612        let path1 = Path::create(conn, "chr1", &block_group.id, &edge_ids);
2613
2614        let sequence2 = Sequence::new()
2615            .sequence_type("DNA")
2616            .sequence("TTTTTTTT")
2617            .save(conn);
2618        let node2_id = Node::create(conn, &sequence2.hash, &HashId::convert_str("2"));
2619        let edge3 = Edge::create(
2620            conn,
2621            PATH_START_NODE_ID,
2622            -1,
2623            Strand::Forward,
2624            node1_id,
2625            0,
2626            Strand::Forward,
2627        );
2628        let edge4 = Edge::create(
2629            conn,
2630            node1_id,
2631            4,
2632            Strand::Forward,
2633            node2_id,
2634            0,
2635            Strand::Forward,
2636        );
2637        let edge5 = Edge::create(
2638            conn,
2639            node2_id,
2640            8,
2641            Strand::Forward,
2642            PATH_END_NODE_ID,
2643            -1,
2644            Strand::Forward,
2645        );
2646
2647        let edge_ids = vec![edge3.id, edge4.id, edge5.id];
2648        let block_group_edges = edge_ids
2649            .iter()
2650            .map(|edge_id| BlockGroupEdgeData {
2651                block_group_id: block_group.id,
2652                edge_id: *edge_id,
2653                chromosome_index: 0,
2654                phased: 0,
2655            })
2656            .collect::<Vec<BlockGroupEdgeData>>();
2657        BlockGroupEdge::bulk_create(conn, &block_group_edges);
2658
2659        let path2 = Path::create(conn, "chr2", &block_group.id, &edge_ids);
2660
2661        assert_eq!(path2.sequence(conn), "ATCGTTTTTTTT");
2662
2663        let annotation = Annotation {
2664            name: "foo".to_string(),
2665            start: 0,
2666            end: 8,
2667        };
2668        let annotations = path1.propagate_annotations(conn, &path2, vec![annotation]);
2669        assert_eq!(annotations.len(), 1);
2670        let result_annotation = &annotations[0];
2671        assert_eq!(result_annotation.name, "foo");
2672        assert_eq!(result_annotation.start, 0);
2673        assert_eq!(result_annotation.end, 4);
2674    }
2675
2676    #[test]
2677    fn test_propagate_annotations_with_insertion() {
2678        /*
2679            path 1 (one node/sequence):
2680            |ATCGATCG| -> sequence (0, 8)
2681
2682            path 2:	Mimics a pure insertion
2683            |ATCG| -> (0, 4) shared with first half of path 1
2684                |TTTTTTTT| -> (4, 12) unrelated sequence
2685                        |ATCG| -> (12, 16) shared with second half of path 1
2686
2687            Mappings:
2688            (0, 4) -> (0, 4)
2689            (4, 8) -> (12, 16)
2690        */
2691        let conn = &get_connection(None).unwrap();
2692        Collection::create(conn, "test collection");
2693        let block_group = BlockGroup::create(conn, "test collection", None, "test block group");
2694        let sequence1 = Sequence::new()
2695            .sequence_type("DNA")
2696            .sequence("ATCGATCG")
2697            .save(conn);
2698        let node1_id = Node::create(conn, &sequence1.hash, &HashId::convert_str("1"));
2699        let edge1 = Edge::create(
2700            conn,
2701            PATH_START_NODE_ID,
2702            -1,
2703            Strand::Forward,
2704            node1_id,
2705            0,
2706            Strand::Forward,
2707        );
2708        let edge2 = Edge::create(
2709            conn,
2710            node1_id,
2711            8,
2712            Strand::Forward,
2713            PATH_END_NODE_ID,
2714            -1,
2715            Strand::Forward,
2716        );
2717
2718        let edge_ids = vec![edge1.id, edge2.id];
2719        let block_group_edges = edge_ids
2720            .iter()
2721            .map(|edge_id| BlockGroupEdgeData {
2722                block_group_id: block_group.id,
2723                edge_id: *edge_id,
2724                chromosome_index: 0,
2725                phased: 0,
2726            })
2727            .collect::<Vec<BlockGroupEdgeData>>();
2728        BlockGroupEdge::bulk_create(conn, &block_group_edges);
2729
2730        let path1 = Path::create(conn, "chr1", &block_group.id, &edge_ids);
2731
2732        let sequence2 = Sequence::new()
2733            .sequence_type("DNA")
2734            .sequence("TTTTTTTT")
2735            .save(conn);
2736        let node2_id = Node::create(conn, &sequence2.hash, &HashId::convert_str("2"));
2737        let edge4 = Edge::create(
2738            conn,
2739            node1_id,
2740            4,
2741            Strand::Forward,
2742            node2_id,
2743            0,
2744            Strand::Forward,
2745        );
2746        let edge5 = Edge::create(
2747            conn,
2748            node2_id,
2749            8,
2750            Strand::Forward,
2751            node1_id,
2752            4,
2753            Strand::Forward,
2754        );
2755
2756        let edge_ids = [edge4.id, edge5.id];
2757        let block_group_edges = edge_ids
2758            .iter()
2759            .map(|edge_id| BlockGroupEdgeData {
2760                block_group_id: block_group.id,
2761                edge_id: *edge_id,
2762                chromosome_index: 0,
2763                phased: 0,
2764            })
2765            .collect::<Vec<BlockGroupEdgeData>>();
2766        BlockGroupEdge::bulk_create(conn, &block_group_edges);
2767
2768        let path2 = Path::create(
2769            conn,
2770            "chr2",
2771            &block_group.id,
2772            &[edge1.id, edge4.id, edge5.id, edge2.id],
2773        );
2774
2775        assert_eq!(path2.sequence(conn), "ATCGTTTTTTTTATCG");
2776
2777        let annotation = Annotation {
2778            name: "foo".to_string(),
2779            start: 0,
2780            end: 8,
2781        };
2782
2783        let annotations = path1.propagate_annotations(conn, &path2, vec![annotation]);
2784        assert_eq!(annotations.len(), 1);
2785
2786        // Under the default propagation strategy, the annotation is expanded to cover anything in
2787        // between parts it covers
2788        let result_annotation = &annotations[0];
2789        assert_eq!(result_annotation.name, "foo");
2790        assert_eq!(result_annotation.start, 0);
2791        assert_eq!(result_annotation.end, 16);
2792    }
2793
2794    #[test]
2795    fn test_propagate_annotations_with_replacement() {
2796        /*
2797            path 1 (one node/sequence):
2798            |ATCGATCG| -> sequence (0, 8)
2799
2800            path 2:	Mimics a replacement
2801            |AT| -> (0, 2) shared with first two bp of path 1
2802              |TTTTTTTT| -> (2, 10) unrelated sequence
2803                      |CG| -> (10, 12) shared with last 2 bp of path 1
2804
2805            Mappings:
2806            (0, 2) -> (0, 2)
2807            (6, 8) -> (10, 12)
2808        */
2809        let conn = &get_connection(None).unwrap();
2810        Collection::create(conn, "test collection");
2811        let block_group = BlockGroup::create(conn, "test collection", None, "test block group");
2812        let sequence1 = Sequence::new()
2813            .sequence_type("DNA")
2814            .sequence("ATCGATCG")
2815            .save(conn);
2816        let node1_id = Node::create(conn, &sequence1.hash, &HashId::convert_str("1"));
2817        let edge1 = Edge::create(
2818            conn,
2819            PATH_START_NODE_ID,
2820            -1,
2821            Strand::Forward,
2822            node1_id,
2823            0,
2824            Strand::Forward,
2825        );
2826        let edge2 = Edge::create(
2827            conn,
2828            node1_id,
2829            8,
2830            Strand::Forward,
2831            PATH_END_NODE_ID,
2832            -1,
2833            Strand::Forward,
2834        );
2835
2836        let edge_ids = [edge1.id, edge2.id];
2837        let block_group_edges = edge_ids
2838            .iter()
2839            .map(|edge_id| BlockGroupEdgeData {
2840                block_group_id: block_group.id,
2841                edge_id: *edge_id,
2842                chromosome_index: 0,
2843                phased: 0,
2844            })
2845            .collect::<Vec<BlockGroupEdgeData>>();
2846        BlockGroupEdge::bulk_create(conn, &block_group_edges);
2847
2848        let path1 = Path::create(conn, "chr1", &block_group.id, &edge_ids);
2849
2850        let sequence2 = Sequence::new()
2851            .sequence_type("DNA")
2852            .sequence("TTTTTTTT")
2853            .save(conn);
2854        let node2_id = Node::create(conn, &sequence2.hash, &HashId::convert_str("2"));
2855        let edge4 = Edge::create(
2856            conn,
2857            node1_id,
2858            2,
2859            Strand::Forward,
2860            node2_id,
2861            0,
2862            Strand::Forward,
2863        );
2864        let edge5 = Edge::create(
2865            conn,
2866            node2_id,
2867            8,
2868            Strand::Forward,
2869            node1_id,
2870            6,
2871            Strand::Forward,
2872        );
2873
2874        let edge_ids = [edge4.id, edge5.id];
2875        let block_group_edges = edge_ids
2876            .iter()
2877            .map(|edge_id| BlockGroupEdgeData {
2878                block_group_id: block_group.id,
2879                edge_id: *edge_id,
2880                chromosome_index: 0,
2881                phased: 0,
2882            })
2883            .collect::<Vec<BlockGroupEdgeData>>();
2884        BlockGroupEdge::bulk_create(conn, &block_group_edges);
2885
2886        let path2 = Path::create(
2887            conn,
2888            "chr2",
2889            &block_group.id,
2890            &[edge1.id, edge4.id, edge5.id, edge2.id],
2891        );
2892
2893        assert_eq!(path2.sequence(conn), "ATTTTTTTTTCG");
2894
2895        let annotation = Annotation {
2896            name: "foo".to_string(),
2897            start: 0,
2898            end: 4,
2899        };
2900
2901        let annotations = path1.propagate_annotations(conn, &path2, vec![annotation]);
2902        assert_eq!(annotations.len(), 1);
2903
2904        // Under the default propagation strategy, the annotation is truncated
2905        let result_annotation = &annotations[0];
2906        assert_eq!(result_annotation.name, "foo");
2907        assert_eq!(result_annotation.start, 0);
2908        assert_eq!(result_annotation.end, 2);
2909    }
2910
2911    #[test]
2912    fn test_propagate_annotations_with_insertion_across_two_blocks() {
2913        /*
2914            path 1 (two nodes/sequences):
2915            |ATCGATCG| -> sequence (0, 8)
2916                    |TTTTTTTT| -> sequence (8, 16)
2917
2918            path 2: Mimics a pure insertion in the middle of the two blocks
2919            |ATCGATCG| -> sequence (0, 8)
2920                    |AAAAAAAA| -> sequence (8, 16)
2921                            |TTTTTTTT| -> sequence (16, 24)
2922
2923            Mappings:
2924            (0, 8) -> (0, 8)
2925            (8, 16) -> (16, 24)
2926        */
2927        let conn = &get_connection(None).unwrap();
2928        Collection::create(conn, "test collection");
2929        let block_group = BlockGroup::create(conn, "test collection", None, "test block group");
2930        let sequence1 = Sequence::new()
2931            .sequence_type("DNA")
2932            .sequence("ATCGATCG")
2933            .save(conn);
2934        let node1_id = Node::create(conn, &sequence1.hash, &HashId::convert_str("1"));
2935        let sequence2 = Sequence::new()
2936            .sequence_type("DNA")
2937            .sequence("TTTTTTTT")
2938            .save(conn);
2939        let node2_id = Node::create(conn, &sequence2.hash, &HashId::convert_str("2"));
2940        let edge1 = Edge::create(
2941            conn,
2942            PATH_START_NODE_ID,
2943            -1,
2944            Strand::Forward,
2945            node1_id,
2946            0,
2947            Strand::Forward,
2948        );
2949        let edge2 = Edge::create(
2950            conn,
2951            node1_id,
2952            8,
2953            Strand::Forward,
2954            node2_id,
2955            0,
2956            Strand::Forward,
2957        );
2958        let edge3 = Edge::create(
2959            conn,
2960            node2_id,
2961            8,
2962            Strand::Forward,
2963            PATH_END_NODE_ID,
2964            -1,
2965            Strand::Forward,
2966        );
2967
2968        let edge_ids = [edge1.id, edge2.id, edge3.id];
2969        let block_group_edges = edge_ids
2970            .iter()
2971            .map(|edge_id| BlockGroupEdgeData {
2972                block_group_id: block_group.id,
2973                edge_id: *edge_id,
2974                chromosome_index: 0,
2975                phased: 0,
2976            })
2977            .collect::<Vec<BlockGroupEdgeData>>();
2978        BlockGroupEdge::bulk_create(conn, &block_group_edges);
2979
2980        let path1 = Path::create(conn, "chr1", &block_group.id, &edge_ids);
2981
2982        let sequence3 = Sequence::new()
2983            .sequence_type("DNA")
2984            .sequence("AAAAAAAA")
2985            .save(conn);
2986        let node3_id = Node::create(conn, &sequence3.hash, &HashId::convert_str("3"));
2987        let edge4 = Edge::create(
2988            conn,
2989            node1_id,
2990            8,
2991            Strand::Forward,
2992            node3_id,
2993            0,
2994            Strand::Forward,
2995        );
2996        let edge5 = Edge::create(
2997            conn,
2998            node3_id,
2999            8,
3000            Strand::Forward,
3001            node2_id,
3002            0,
3003            Strand::Forward,
3004        );
3005
3006        let edge_ids = [edge4.id, edge5.id];
3007        let block_group_edges = edge_ids
3008            .iter()
3009            .map(|edge_id| BlockGroupEdgeData {
3010                block_group_id: block_group.id,
3011                edge_id: *edge_id,
3012                chromosome_index: 0,
3013                phased: 0,
3014            })
3015            .collect::<Vec<BlockGroupEdgeData>>();
3016        BlockGroupEdge::bulk_create(conn, &block_group_edges);
3017
3018        let path2 = Path::create(
3019            conn,
3020            "chr2",
3021            &block_group.id,
3022            &[edge1.id, edge4.id, edge5.id, edge3.id],
3023        );
3024
3025        assert_eq!(path2.sequence(conn), "ATCGATCGAAAAAAAATTTTTTTT");
3026
3027        let annotation = Annotation {
3028            name: "foo".to_string(),
3029            start: 0,
3030            end: 16,
3031        };
3032
3033        let annotations = path1.propagate_annotations(conn, &path2, vec![annotation]);
3034        assert_eq!(annotations.len(), 1);
3035
3036        // Under the default propagation strategy, the annotation is extended across the inserted
3037        // region
3038        let result_annotation = &annotations[0];
3039        assert_eq!(result_annotation.name, "foo");
3040        assert_eq!(result_annotation.start, 0);
3041        assert_eq!(result_annotation.end, 24);
3042    }
3043
3044    #[test]
3045    fn test_propagate_annotations_with_deletion_across_two_blocks() {
3046        /*
3047            path 1 (two nodes/sequences):
3048            |ATCGATCG| -> sequence (0, 8)
3049                    |TTTTTTTT| -> sequence (8, 16)
3050
3051            path 2: Mimics a deletion across the two blocks
3052            |ATCG| -> sequence (0, 4)
3053                |TTTT| -> sequence (4, 8)
3054
3055            Mappings:
3056            (0, 4) -> (0, 4)
3057            (12, 16) -> (4, 8)
3058        */
3059        let conn = &get_connection(None).unwrap();
3060        Collection::create(conn, "test collection");
3061        let block_group = BlockGroup::create(conn, "test collection", None, "test block group");
3062        let sequence1 = Sequence::new()
3063            .sequence_type("DNA")
3064            .sequence("ATCGATCG")
3065            .save(conn);
3066        let node1_id = Node::create(conn, &sequence1.hash, &HashId::convert_str("1"));
3067        let sequence2 = Sequence::new()
3068            .sequence_type("DNA")
3069            .sequence("TTTTTTTT")
3070            .save(conn);
3071        let node2_id = Node::create(conn, &sequence2.hash, &HashId::convert_str("2"));
3072        let edge1 = Edge::create(
3073            conn,
3074            PATH_START_NODE_ID,
3075            -1,
3076            Strand::Forward,
3077            node1_id,
3078            0,
3079            Strand::Forward,
3080        );
3081        let edge2 = Edge::create(
3082            conn,
3083            node1_id,
3084            8,
3085            Strand::Forward,
3086            node2_id,
3087            0,
3088            Strand::Forward,
3089        );
3090        let edge3 = Edge::create(
3091            conn,
3092            node2_id,
3093            8,
3094            Strand::Forward,
3095            PATH_END_NODE_ID,
3096            -1,
3097            Strand::Forward,
3098        );
3099
3100        let edge_ids = [edge1.id, edge2.id, edge3.id];
3101        let block_group_edges = edge_ids
3102            .iter()
3103            .map(|edge_id| BlockGroupEdgeData {
3104                block_group_id: block_group.id,
3105                edge_id: *edge_id,
3106                chromosome_index: 0,
3107                phased: 0,
3108            })
3109            .collect::<Vec<BlockGroupEdgeData>>();
3110        BlockGroupEdge::bulk_create(conn, &block_group_edges);
3111
3112        let path1 = Path::create(conn, "chr1", &block_group.id, &edge_ids);
3113
3114        let edge4 = Edge::create(
3115            conn,
3116            node1_id,
3117            4,
3118            Strand::Forward,
3119            node2_id,
3120            4,
3121            Strand::Forward,
3122        );
3123
3124        let edge_ids = [edge4.id];
3125        let block_group_edges = edge_ids
3126            .iter()
3127            .map(|edge_id| BlockGroupEdgeData {
3128                block_group_id: block_group.id,
3129                edge_id: *edge_id,
3130                chromosome_index: 0,
3131                phased: 0,
3132            })
3133            .collect::<Vec<BlockGroupEdgeData>>();
3134        BlockGroupEdge::bulk_create(conn, &block_group_edges);
3135
3136        let path2 = Path::create(
3137            conn,
3138            "chr2",
3139            &block_group.id,
3140            &[edge1.id, edge4.id, edge3.id],
3141        );
3142
3143        assert_eq!(path2.sequence(conn), "ATCGTTTT");
3144
3145        let annotation = Annotation {
3146            name: "foo".to_string(),
3147            start: 0,
3148            end: 12,
3149        };
3150
3151        let annotations = path1.propagate_annotations(conn, &path2, vec![annotation]);
3152        assert_eq!(annotations.len(), 1);
3153
3154        // Under the default propagation strategy, the annotation is truncated
3155        let result_annotation = &annotations[0];
3156        assert_eq!(result_annotation.name, "foo");
3157        assert_eq!(result_annotation.start, 0);
3158        assert_eq!(result_annotation.end, 4);
3159    }
3160
3161    #[test]
3162    fn test_new_path_with() {
3163        let conn = &get_connection(None).unwrap();
3164        Collection::create(conn, "test collection");
3165        let block_group = BlockGroup::create(conn, "test collection", None, "test block group");
3166        let sequence1 = Sequence::new()
3167            .sequence_type("DNA")
3168            .sequence("ATCGATCG")
3169            .save(conn);
3170        let node1_id = Node::create(conn, &sequence1.hash, &HashId::convert_str("1"));
3171        let edge1 = Edge::create(
3172            conn,
3173            PATH_START_NODE_ID,
3174            -123,
3175            Strand::Forward,
3176            node1_id,
3177            0,
3178            Strand::Forward,
3179        );
3180        let sequence2 = Sequence::new()
3181            .sequence_type("DNA")
3182            .sequence("AAAAAAAA")
3183            .save(conn);
3184        let node2_id = Node::create(conn, &sequence2.hash, &HashId::convert_str("2"));
3185        let edge2 = Edge::create(
3186            conn,
3187            node1_id,
3188            8,
3189            Strand::Forward,
3190            node2_id,
3191            0,
3192            Strand::Forward,
3193        );
3194        let edge3 = Edge::create(
3195            conn,
3196            node2_id,
3197            8,
3198            Strand::Forward,
3199            PATH_END_NODE_ID,
3200            -1,
3201            Strand::Forward,
3202        );
3203
3204        let edge_ids = [edge1.id, edge2.id, edge3.id];
3205        let block_group_edges = edge_ids
3206            .iter()
3207            .map(|edge_id| BlockGroupEdgeData {
3208                block_group_id: block_group.id,
3209                edge_id: *edge_id,
3210                chromosome_index: 0,
3211                phased: 0,
3212            })
3213            .collect::<Vec<BlockGroupEdgeData>>();
3214        BlockGroupEdge::bulk_create(conn, &block_group_edges);
3215
3216        let path1 = Path::create(conn, "chr1", &block_group.id, &edge_ids);
3217        assert_eq!(path1.sequence(conn), "ATCGATCGAAAAAAAA");
3218
3219        let sequence3 = Sequence::new()
3220            .sequence_type("DNA")
3221            .sequence("CCCCCCCC")
3222            .save(conn);
3223        let node3_id = Node::create(conn, &sequence3.hash, &HashId::convert_str("3"));
3224        let edge4 = Edge::create(
3225            conn,
3226            node1_id,
3227            4,
3228            Strand::Forward,
3229            node3_id,
3230            0,
3231            Strand::Forward,
3232        );
3233        let edge5 = Edge::create(
3234            conn,
3235            node3_id,
3236            8,
3237            Strand::Forward,
3238            node2_id,
3239            3,
3240            Strand::Forward,
3241        );
3242
3243        let edge_ids = [edge4.id, edge5.id];
3244        let block_group_edges = edge_ids
3245            .iter()
3246            .map(|edge_id| BlockGroupEdgeData {
3247                block_group_id: block_group.id,
3248                edge_id: *edge_id,
3249                chromosome_index: 0,
3250                phased: 0,
3251            })
3252            .collect::<Vec<BlockGroupEdgeData>>();
3253        BlockGroupEdge::bulk_create(conn, &block_group_edges);
3254
3255        let path2 = path1.new_path_with(conn, 4, 11, &edge4, &edge5);
3256        assert_eq!(path2.sequence(conn), "ATCGCCCCCCCCAAAAA");
3257
3258        let edge6 = Edge::create(
3259            conn,
3260            node1_id,
3261            4,
3262            Strand::Forward,
3263            node3_id,
3264            0,
3265            Strand::Forward,
3266        );
3267        let edge7 = Edge::create(
3268            conn,
3269            node3_id,
3270            8,
3271            Strand::Forward,
3272            node1_id,
3273            7,
3274            Strand::Forward,
3275        );
3276
3277        let edge_ids = [edge6.id, edge7.id];
3278        let block_group_edges = edge_ids
3279            .iter()
3280            .map(|edge_id| BlockGroupEdgeData {
3281                block_group_id: block_group.id,
3282                edge_id: *edge_id,
3283                chromosome_index: 0,
3284                phased: 0,
3285            })
3286            .collect::<Vec<BlockGroupEdgeData>>();
3287        BlockGroupEdge::bulk_create(conn, &block_group_edges);
3288
3289        let path3 = path1.new_path_with(conn, 4, 7, &edge6, &edge7);
3290        assert_eq!(path3.sequence(conn), "ATCGCCCCCCCCGAAAAAAAA");
3291    }
3292
3293    #[test]
3294    fn test_new_path_with_deletion() {
3295        let conn = &get_connection(None).unwrap();
3296        Collection::create(conn, "test collection");
3297        let block_group = BlockGroup::create(conn, "test collection", None, "test block group");
3298        let sequence1 = Sequence::new()
3299            .sequence_type("DNA")
3300            .sequence("ATCGATCG")
3301            .save(conn);
3302        let node1_id = Node::create(conn, &sequence1.hash, &HashId::convert_str("1"));
3303        let edge1 = Edge::create(
3304            conn,
3305            PATH_START_NODE_ID,
3306            -123,
3307            Strand::Forward,
3308            node1_id,
3309            0,
3310            Strand::Forward,
3311        );
3312        let sequence2 = Sequence::new()
3313            .sequence_type("DNA")
3314            .sequence("AAAAAAAA")
3315            .save(conn);
3316        let node2_id = Node::create(conn, &sequence2.hash, &HashId::convert_str("2"));
3317        let edge2 = Edge::create(
3318            conn,
3319            node1_id,
3320            8,
3321            Strand::Forward,
3322            node2_id,
3323            0,
3324            Strand::Forward,
3325        );
3326        let edge3 = Edge::create(
3327            conn,
3328            node2_id,
3329            8,
3330            Strand::Forward,
3331            PATH_END_NODE_ID,
3332            -1,
3333            Strand::Forward,
3334        );
3335
3336        let edge_ids = [edge1.id, edge2.id, edge3.id];
3337        let block_group_edges = edge_ids
3338            .iter()
3339            .map(|edge_id| BlockGroupEdgeData {
3340                block_group_id: block_group.id,
3341                edge_id: *edge_id,
3342                chromosome_index: 0,
3343                phased: 0,
3344            })
3345            .collect::<Vec<BlockGroupEdgeData>>();
3346        BlockGroupEdge::bulk_create(conn, &block_group_edges);
3347
3348        let path1 = Path::create(conn, "chr1", &block_group.id, &edge_ids);
3349        assert_eq!(path1.sequence(conn), "ATCGATCGAAAAAAAA");
3350
3351        let deletion_edge = Edge::create(
3352            conn,
3353            node1_id,
3354            4,
3355            Strand::Forward,
3356            node2_id,
3357            3,
3358            Strand::Forward,
3359        );
3360
3361        let block_group_edge = BlockGroupEdgeData {
3362            block_group_id: block_group.id,
3363            edge_id: deletion_edge.id,
3364            chromosome_index: 0,
3365            phased: 0,
3366        };
3367
3368        BlockGroupEdge::bulk_create(conn, &[block_group_edge]);
3369
3370        let path2 = path1.new_path_with_deletion(conn, 4, 11).unwrap();
3371        assert_eq!(path2.sequence(conn), "ATCGAAAAA");
3372    }
3373
3374    #[test]
3375    fn test_duplicate_edge_warning() {
3376        let conn = &get_connection(None).unwrap();
3377        Collection::create(conn, "test collection");
3378        let block_group = BlockGroup::create(conn, "test collection", None, "test block group");
3379        let sequence1 = Sequence::new()
3380            .sequence_type("DNA")
3381            .sequence("ATCGATCG")
3382            .save(conn);
3383        let node1_id = Node::create(conn, &sequence1.hash, &HashId::convert_str("1"));
3384        let edge1 = Edge::create(
3385            conn,
3386            PATH_START_NODE_ID,
3387            -123,
3388            Strand::Forward,
3389            node1_id,
3390            0,
3391            Strand::Forward,
3392        );
3393        let edge2 = Edge::create(
3394            conn,
3395            node1_id,
3396            8,
3397            Strand::Forward,
3398            PATH_END_NODE_ID,
3399            -1,
3400            Strand::Forward,
3401        );
3402
3403        let edge_ids = vec![edge1.id, edge2.id];
3404        let block_group_edges = edge_ids
3405            .iter()
3406            .map(|edge_id| BlockGroupEdgeData {
3407                block_group_id: block_group.id,
3408                edge_id: *edge_id,
3409                chromosome_index: 0,
3410                phased: 0,
3411            })
3412            .collect::<Vec<BlockGroupEdgeData>>();
3413        BlockGroupEdge::bulk_create(conn, &block_group_edges);
3414
3415        // Should print a warning that there are duplicate edges, but continue
3416        let _path = Path::create(conn, "chr1", &block_group.id, &edge_ids);
3417    }
3418
3419    #[test]
3420    #[should_panic(expected = "Not all edges are in the block group")]
3421    fn test_edges_must_be_in_path_block_group() {
3422        let conn = &get_connection(None).unwrap();
3423        Collection::create(conn, "test collection");
3424        let block_group = BlockGroup::create(conn, "test collection", None, "test block group");
3425        let sequence1 = Sequence::new()
3426            .sequence_type("DNA")
3427            .sequence("ATCGATCG")
3428            .save(conn);
3429        let node1_id = Node::create(conn, &sequence1.hash, &HashId::convert_str("1"));
3430        let edge1 = Edge::create(
3431            conn,
3432            PATH_START_NODE_ID,
3433            -123,
3434            Strand::Forward,
3435            node1_id,
3436            0,
3437            Strand::Forward,
3438        );
3439        let edge2 = Edge::create(
3440            conn,
3441            node1_id,
3442            8,
3443            Strand::Forward,
3444            PATH_END_NODE_ID,
3445            -1,
3446            Strand::Forward,
3447        );
3448
3449        let edge_ids = [edge1.id, edge2.id];
3450        let _path = Path::create(conn, "chr1", &block_group.id, &edge_ids);
3451    }
3452
3453    #[test]
3454    #[should_panic]
3455    // Panic message is something like "Edges 1 and 2 don't share the same node (3 vs. 4)"
3456    fn test_consecutive_edges_must_share_a_node() {
3457        let conn = &get_connection(None).unwrap();
3458        Collection::create(conn, "test collection");
3459        let block_group = BlockGroup::create(conn, "test collection", None, "test block group");
3460        let sequence1 = Sequence::new()
3461            .sequence_type("DNA")
3462            .sequence("ATCGATCG")
3463            .save(conn);
3464        let node1_id = Node::create(conn, &sequence1.hash, &HashId::convert_str("1"));
3465        let edge1 = Edge::create(
3466            conn,
3467            PATH_START_NODE_ID,
3468            -123,
3469            Strand::Forward,
3470            node1_id,
3471            0,
3472            Strand::Forward,
3473        );
3474        let sequence2 = Sequence::new()
3475            .sequence_type("DNA")
3476            .sequence("AAAAAAAA")
3477            .save(conn);
3478        let node2_id = Node::create(conn, &sequence2.hash, &HashId::convert_str("2"));
3479        let edge2 = Edge::create(
3480            conn,
3481            node2_id,
3482            8,
3483            Strand::Forward,
3484            PATH_END_NODE_ID,
3485            -1,
3486            Strand::Forward,
3487        );
3488
3489        let block_group_edges = vec![
3490            BlockGroupEdgeData {
3491                block_group_id: block_group.id,
3492                edge_id: edge1.id,
3493                chromosome_index: 0,
3494                phased: 0,
3495            },
3496            BlockGroupEdgeData {
3497                block_group_id: block_group.id,
3498                edge_id: edge2.id,
3499                chromosome_index: 0,
3500                phased: 0,
3501            },
3502        ];
3503        BlockGroupEdge::bulk_create(conn, &block_group_edges);
3504
3505        let edge_ids = vec![edge1.id, edge2.id];
3506
3507        let _path = Path::create(conn, "chr1", &block_group.id, &edge_ids);
3508    }
3509
3510    #[test]
3511    #[should_panic]
3512    // Panic message is something like "Strand mismatch between consecutive edges 1 and 2"
3513    fn test_consecutive_edges_must_share_the_same_strand() {
3514        let conn = &get_connection(None).unwrap();
3515        Collection::create(conn, "test collection");
3516        let block_group = BlockGroup::create(conn, "test collection", None, "test block group");
3517        let sequence1 = Sequence::new()
3518            .sequence_type("DNA")
3519            .sequence("ATCGATCG")
3520            .save(conn);
3521        let node1_id = Node::create(conn, &sequence1.hash, &HashId::convert_str("1"));
3522        let edge1 = Edge::create(
3523            conn,
3524            PATH_START_NODE_ID,
3525            -123,
3526            Strand::Forward,
3527            node1_id,
3528            0,
3529            Strand::Forward,
3530        );
3531        let sequence2 = Sequence::new()
3532            .sequence_type("DNA")
3533            .sequence("AAAAAAAA")
3534            .save(conn);
3535        let node2_id = Node::create(conn, &sequence2.hash, &HashId::convert_str("2"));
3536        let edge2 = Edge::create(
3537            conn,
3538            node2_id,
3539            8,
3540            Strand::Forward,
3541            PATH_END_NODE_ID,
3542            -1,
3543            Strand::Reverse,
3544        );
3545
3546        let edge_ids = vec![edge1.id, edge2.id];
3547        let block_group_edges = edge_ids
3548            .iter()
3549            .map(|edge_id| BlockGroupEdgeData {
3550                block_group_id: block_group.id,
3551                edge_id: *edge_id,
3552                chromosome_index: 0,
3553                phased: 0,
3554            })
3555            .collect::<Vec<BlockGroupEdgeData>>();
3556
3557        BlockGroupEdge::bulk_create(conn, &block_group_edges);
3558
3559        let _path = Path::create(conn, "chr1", &block_group.id, &edge_ids);
3560    }
3561
3562    #[test]
3563    #[should_panic]
3564    // Panic message is something like "Source coordinate 2 for edge 2 is before target coordinate 4 for edge 1"
3565    fn test_consecutive_edges_must_have_different_coordinates_on_a_node() {
3566        let conn = &get_connection(None).unwrap();
3567        Collection::create(conn, "test collection");
3568        let block_group = BlockGroup::create(conn, "test collection", None, "test block group");
3569        let sequence1 = Sequence::new()
3570            .sequence_type("DNA")
3571            .sequence("ATCGATCG")
3572            .save(conn);
3573        let node1_id = Node::create(conn, &sequence1.hash, &HashId::convert_str("1"));
3574        let edge1 = Edge::create(
3575            conn,
3576            PATH_START_NODE_ID,
3577            -123,
3578            Strand::Forward,
3579            node1_id,
3580            4,
3581            Strand::Forward,
3582        );
3583        let sequence2 = Sequence::new()
3584            .sequence_type("DNA")
3585            .sequence("AAAAAAAA")
3586            .save(conn);
3587        let node2_id = Node::create(conn, &sequence2.hash, &HashId::convert_str("2"));
3588        // Source coordinate on node 1 is before target coordinate on node1 for edge1
3589        let edge2 = Edge::create(
3590            conn,
3591            node1_id,
3592            2,
3593            Strand::Forward,
3594            node2_id,
3595            0,
3596            Strand::Forward,
3597        );
3598
3599        let edge_ids = vec![edge1.id, edge2.id];
3600        let block_group_edges = edge_ids
3601            .iter()
3602            .map(|edge_id| BlockGroupEdgeData {
3603                block_group_id: block_group.id,
3604                edge_id: *edge_id,
3605                chromosome_index: 0,
3606                phased: 0,
3607            })
3608            .collect::<Vec<BlockGroupEdgeData>>();
3609        BlockGroupEdge::bulk_create(conn, &block_group_edges);
3610
3611        let _path = Path::create(conn, "chr1", &block_group.id, &edge_ids);
3612    }
3613
3614    #[test]
3615    #[should_panic]
3616    // Panic message is something like: "Edge 1 goes into and out of the same node at the same coordinate"
3617    fn test_edge_does_not_start_and_end_on_same_bp() {
3618        let conn = &get_connection(None).unwrap();
3619        Collection::create(conn, "test collection");
3620        let block_group = BlockGroup::create(conn, "test collection", None, "test block group");
3621        let sequence1 = Sequence::new()
3622            .sequence_type("DNA")
3623            .sequence("ATCGATCG")
3624            .save(conn);
3625        let node1_id = Node::create(conn, &sequence1.hash, &HashId::convert_str("1"));
3626        let edge1 = Edge::create(
3627            conn,
3628            node1_id,
3629            2,
3630            Strand::Forward,
3631            node1_id,
3632            2,
3633            Strand::Forward,
3634        );
3635
3636        let block_group_edges = vec![BlockGroupEdgeData {
3637            block_group_id: block_group.id,
3638            edge_id: edge1.id,
3639            chromosome_index: 0,
3640            phased: 0,
3641        }];
3642        BlockGroupEdge::bulk_create(conn, &block_group_edges);
3643
3644        let _path = Path::create(conn, "chr1", &block_group.id, &[edge1.id]);
3645    }
3646
3647    #[test]
3648    fn test_node_blocks_for_range() {
3649        let conn = &get_connection(None).unwrap();
3650        Collection::create(conn, "test collection");
3651        let block_group = BlockGroup::create(conn, "test collection", None, "test block group");
3652        let sequence1 = Sequence::new()
3653            .sequence_type("DNA")
3654            .sequence("ATCGATCG")
3655            .save(conn);
3656        let node1_id = Node::create(conn, &sequence1.hash, &HashId::convert_str("1"));
3657        let edge1 = Edge::create(
3658            conn,
3659            PATH_START_NODE_ID,
3660            -123,
3661            Strand::Forward,
3662            node1_id,
3663            0,
3664            Strand::Forward,
3665        );
3666        let sequence2 = Sequence::new()
3667            .sequence_type("DNA")
3668            .sequence("AAAAAAAA")
3669            .save(conn);
3670        let node2_id = Node::create(conn, &sequence2.hash, &HashId::convert_str("2"));
3671        let edge2 = Edge::create(
3672            conn,
3673            node1_id,
3674            8,
3675            Strand::Forward,
3676            node2_id,
3677            0,
3678            Strand::Forward,
3679        );
3680        let edge3 = Edge::create(
3681            conn,
3682            node2_id,
3683            8,
3684            Strand::Forward,
3685            PATH_END_NODE_ID,
3686            -1,
3687            Strand::Forward,
3688        );
3689
3690        let edge_ids = vec![edge1.id, edge2.id, edge3.id];
3691        let block_group_edges = edge_ids
3692            .iter()
3693            .map(|edge_id| BlockGroupEdgeData {
3694                block_group_id: block_group.id,
3695                edge_id: *edge_id,
3696                chromosome_index: 0,
3697                phased: 0,
3698            })
3699            .collect::<Vec<BlockGroupEdgeData>>();
3700        BlockGroupEdge::bulk_create(conn, &block_group_edges);
3701
3702        let path = Path::create(conn, "chr1", &block_group.id, &edge_ids);
3703
3704        let intervaltree = path.intervaltree(conn);
3705
3706        let node_blocks1 = path.node_blocks_for_range(&intervaltree, 0, 8);
3707        let expected_node_blocks1 = vec![NodeIntervalBlock {
3708            block_id: 0,
3709            node_id: node1_id,
3710            start: 0,
3711            end: 8,
3712            sequence_start: 0,
3713            sequence_end: 8,
3714            strand: Strand::Forward,
3715        }];
3716        assert_eq!(node_blocks1, expected_node_blocks1);
3717
3718        let node_blocks2 = path.node_blocks_for_range(&intervaltree, 0, 4);
3719        let expected_node_blocks2 = vec![NodeIntervalBlock {
3720            block_id: 0,
3721            node_id: node1_id,
3722            start: 0,
3723            end: 4,
3724            sequence_start: 0,
3725            sequence_end: 4,
3726            strand: Strand::Forward,
3727        }];
3728        assert_eq!(node_blocks2, expected_node_blocks2);
3729
3730        let node_blocks3 = path.node_blocks_for_range(&intervaltree, 2, 6);
3731        let expected_node_blocks3 = vec![NodeIntervalBlock {
3732            block_id: 0,
3733            node_id: node1_id,
3734            start: 2,
3735            end: 6,
3736            sequence_start: 2,
3737            sequence_end: 6,
3738            strand: Strand::Forward,
3739        }];
3740        assert_eq!(node_blocks3, expected_node_blocks3);
3741
3742        let node_blocks4 = path.node_blocks_for_range(&intervaltree, 3, 8);
3743        let expected_node_blocks4 = vec![NodeIntervalBlock {
3744            block_id: 0,
3745            node_id: node1_id,
3746            start: 3,
3747            end: 8,
3748            sequence_start: 3,
3749            sequence_end: 8,
3750            strand: Strand::Forward,
3751        }];
3752        assert_eq!(node_blocks4, expected_node_blocks4);
3753
3754        let node_blocks5 = path.node_blocks_for_range(&intervaltree, 6, 10);
3755        let expected_node_blocks5 = vec![
3756            NodeIntervalBlock {
3757                block_id: 0,
3758                node_id: node1_id,
3759                start: 6,
3760                end: 8,
3761                sequence_start: 6,
3762                sequence_end: 8,
3763                strand: Strand::Forward,
3764            },
3765            NodeIntervalBlock {
3766                block_id: 1,
3767                node_id: node2_id,
3768                start: 8,
3769                end: 10,
3770                sequence_start: 0,
3771                sequence_end: 2,
3772                strand: Strand::Forward,
3773            },
3774        ];
3775        assert_eq!(node_blocks5, expected_node_blocks5);
3776
3777        let node_blocks6 = path.node_blocks_for_range(&intervaltree, 12, 16);
3778        let expected_node_blocks6 = vec![NodeIntervalBlock {
3779            block_id: 0,
3780            node_id: node2_id,
3781            start: 12,
3782            end: 16,
3783            sequence_start: 4,
3784            sequence_end: 8,
3785            strand: Strand::Forward,
3786        }];
3787        assert_eq!(node_blocks6, expected_node_blocks6);
3788    }
3789
3790    #[test]
3791    fn test_node_blocks_for_range_with_node_parts() {
3792        let conn = &get_connection(None).unwrap();
3793        Collection::create(conn, "test collection");
3794        let block_group = BlockGroup::create(conn, "test collection", None, "test block group");
3795        let sequence1 = Sequence::new()
3796            .sequence_type("DNA")
3797            .sequence("AAAAAAAA")
3798            .save(conn);
3799        let node1_id = Node::create(conn, &sequence1.hash, &HashId::convert_str("1"));
3800        let edge1 = Edge::create(
3801            conn,
3802            PATH_START_NODE_ID,
3803            -123,
3804            Strand::Forward,
3805            node1_id,
3806            0,
3807            Strand::Forward,
3808        );
3809        let sequence2 = Sequence::new()
3810            .sequence_type("DNA")
3811            .sequence("TTTTTTTT")
3812            .save(conn);
3813        let node2_id = Node::create(conn, &sequence2.hash, &HashId::convert_str("2"));
3814        let edge2 = Edge::create(
3815            conn,
3816            node1_id,
3817            5,
3818            Strand::Forward,
3819            node2_id,
3820            0,
3821            Strand::Forward,
3822        );
3823        let edge3 = Edge::create(
3824            conn,
3825            node2_id,
3826            8,
3827            Strand::Forward,
3828            node1_id,
3829            6,
3830            Strand::Forward,
3831        );
3832        let edge4 = Edge::create(
3833            conn,
3834            node1_id,
3835            8,
3836            Strand::Forward,
3837            PATH_END_NODE_ID,
3838            -1,
3839            Strand::Forward,
3840        );
3841
3842        let edge_ids = &[edge1.id, edge2.id, edge3.id, edge4.id];
3843        let block_group_edges = edge_ids
3844            .iter()
3845            .map(|edge_id| BlockGroupEdgeData {
3846                block_group_id: block_group.id,
3847                edge_id: *edge_id,
3848                chromosome_index: 0,
3849                phased: 0,
3850            })
3851            .collect::<Vec<BlockGroupEdgeData>>();
3852        BlockGroupEdge::bulk_create(conn, &block_group_edges);
3853
3854        let path1 = Path::create(conn, "chr1.1", &block_group.id, &[edge1.id, edge4.id]);
3855        let path2 = Path::create(conn, "chr1.2", &block_group.id, edge_ids);
3856
3857        let intervaltree1 = path1.intervaltree(conn);
3858
3859        let node_blocks1 = path1.node_blocks_for_range(&intervaltree1, 0, 8);
3860        let expected_node_blocks1 = vec![NodeIntervalBlock {
3861            block_id: 0,
3862            node_id: node1_id,
3863            start: 0,
3864            end: 8,
3865            sequence_start: 0,
3866            sequence_end: 8,
3867            strand: Strand::Forward,
3868        }];
3869        assert_eq!(node_blocks1, expected_node_blocks1);
3870
3871        let intervaltree2 = path2.intervaltree(conn);
3872
3873        let node_blocks2 = path2.node_blocks_for_range(&intervaltree2, 0, 8);
3874        let expected_node_blocks2 = vec![
3875            NodeIntervalBlock {
3876                block_id: 0,
3877                node_id: node1_id,
3878                start: 0,
3879                end: 5,
3880                sequence_start: 0,
3881                sequence_end: 5,
3882                strand: Strand::Forward,
3883            },
3884            NodeIntervalBlock {
3885                block_id: 1,
3886                node_id: node2_id,
3887                start: 5,
3888                end: 8,
3889                sequence_start: 0,
3890                sequence_end: 3,
3891                strand: Strand::Forward,
3892            },
3893        ];
3894        assert_eq!(node_blocks2, expected_node_blocks2);
3895
3896        let node_blocks3 = path2.node_blocks_for_range(&intervaltree2, 4, 14);
3897        let expected_node_blocks3 = vec![
3898            NodeIntervalBlock {
3899                block_id: 0,
3900                node_id: node1_id,
3901                start: 4,
3902                end: 5,
3903                sequence_start: 4,
3904                sequence_end: 5,
3905                strand: Strand::Forward,
3906            },
3907            NodeIntervalBlock {
3908                block_id: 1,
3909                node_id: node2_id,
3910                start: 5,
3911                end: 13,
3912                sequence_start: 0,
3913                sequence_end: 8,
3914                strand: Strand::Forward,
3915            },
3916            NodeIntervalBlock {
3917                block_id: 2,
3918                node_id: node1_id,
3919                start: 13,
3920                end: 14,
3921                sequence_start: 6,
3922                sequence_end: 7,
3923                strand: Strand::Forward,
3924            },
3925        ];
3926        assert_eq!(node_blocks3, expected_node_blocks3);
3927    }
3928}