Skip to main content

gen_models/
edge.rs

1use std::{
2    collections::{HashMap, HashSet},
3    hash::Hash,
4};
5
6use gen_core::{
7    HashId, PATH_END_NODE_ID, PATH_START_NODE_ID, Strand, calculate_hash, is_end_node,
8    is_start_node, traits::Capnp,
9};
10use gen_graph::{GenGraph, GraphEdge, GraphNode};
11use indexmap::IndexSet;
12use itertools::Itertools;
13use rusqlite::{Row, params};
14use serde::{Deserialize, Serialize};
15
16use crate::{
17    block_group_edge::AugmentedEdge,
18    db::GraphConnection,
19    gen_models_capnp::edge,
20    node::Node,
21    sequence::{Sequence, cached_sequence},
22    traits::*,
23};
24
25#[derive(Clone, Debug, Eq, Hash, PartialEq, Deserialize, Serialize, Ord, PartialOrd)]
26pub struct Edge {
27    pub id: HashId,
28    pub source_node_id: HashId,
29    pub source_coordinate: i64,
30    pub source_strand: Strand,
31    pub target_node_id: HashId,
32    pub target_coordinate: i64,
33    pub target_strand: Strand,
34}
35
36impl<'a> Capnp<'a> for Edge {
37    type Builder = edge::Builder<'a>;
38    type Reader = edge::Reader<'a>;
39
40    fn write_capnp(&self, builder: &mut Self::Builder) {
41        builder.set_id(&self.id.0).unwrap();
42        builder.set_source_node_id(&self.source_node_id.0).unwrap();
43        builder.set_source_coordinate(self.source_coordinate);
44        builder.set_source_strand(self.source_strand.into());
45        builder.set_target_node_id(&self.target_node_id.0).unwrap();
46        builder.set_target_coordinate(self.target_coordinate);
47        builder.set_target_strand(self.target_strand.into());
48    }
49
50    fn read_capnp(reader: Self::Reader) -> Self {
51        let id: HashId = reader
52            .get_id()
53            .unwrap()
54            .as_slice()
55            .unwrap()
56            .try_into()
57            .unwrap();
58        let source_node_id = reader
59            .get_source_node_id()
60            .unwrap()
61            .as_slice()
62            .unwrap()
63            .try_into()
64            .unwrap();
65        let source_coordinate = reader.get_source_coordinate();
66        let source_strand = reader.get_source_strand().unwrap().into();
67        let target_node_id = reader
68            .get_target_node_id()
69            .unwrap()
70            .as_slice()
71            .unwrap()
72            .try_into()
73            .unwrap();
74        let target_coordinate = reader.get_target_coordinate();
75        let target_strand = reader.get_target_strand().unwrap().into();
76
77        Edge {
78            id,
79            source_node_id,
80            source_coordinate,
81            source_strand,
82            target_node_id,
83            target_coordinate,
84            target_strand,
85        }
86    }
87}
88
89#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq, Ord, PartialOrd)]
90pub struct EdgeData {
91    pub source_node_id: HashId,
92    pub source_coordinate: i64,
93    pub source_strand: Strand,
94    pub target_node_id: HashId,
95    pub target_coordinate: i64,
96    pub target_strand: Strand,
97}
98
99impl EdgeData {
100    pub fn id_hash(&self) -> HashId {
101        HashId(calculate_hash(&format!(
102            "{}:{}:{}:{}:{}:{}",
103            self.source_node_id,
104            self.source_coordinate,
105            self.source_strand,
106            self.target_node_id,
107            self.target_coordinate,
108            self.target_strand,
109        )))
110    }
111}
112
113impl From<&Edge> for EdgeData {
114    fn from(item: &Edge) -> Self {
115        EdgeData {
116            source_node_id: item.source_node_id,
117            source_coordinate: item.source_coordinate,
118            source_strand: item.source_strand,
119            target_node_id: item.target_node_id,
120            target_coordinate: item.target_coordinate,
121            target_strand: item.target_strand,
122        }
123    }
124}
125
126#[derive(Eq, Hash, PartialEq)]
127pub struct BlockKey {
128    pub node_id: HashId,
129    pub coordinate: i64,
130}
131
132#[derive(Clone, Debug)]
133pub struct GroupBlock {
134    pub id: i64,
135    pub node_id: HashId,
136    sequence: Option<String>,
137    external_sequence: Option<(String, String)>,
138    pub start: i64,
139    pub end: i64,
140}
141
142impl GroupBlock {
143    pub fn new(id: i64, node_id: HashId, sequence: &Sequence, start: i64, end: i64) -> Self {
144        if sequence.external_sequence {
145            GroupBlock {
146                id,
147                node_id,
148                sequence: None,
149                external_sequence: Some((sequence.file_path.clone(), sequence.name.clone())),
150                start,
151                end,
152            }
153        } else {
154            GroupBlock {
155                id,
156                node_id,
157                sequence: Some(sequence.get_sequence(start, end)),
158                external_sequence: None,
159                start,
160                end,
161            }
162        }
163    }
164
165    pub fn sequence(&self) -> String {
166        if let Some(sequence) = &self.sequence {
167            sequence.to_string()
168        } else if let Some((path, name)) = &self.external_sequence {
169            cached_sequence(path, name, self.start as usize, self.end as usize).unwrap()
170        } else {
171            panic!("Sequence or external sequence is not set.")
172        }
173    }
174}
175
176impl Query for Edge {
177    type Model = Edge;
178
179    const TABLE_NAME: &'static str = "edges";
180
181    fn process_row(row: &Row) -> Self::Model {
182        Edge {
183            id: row.get(0).unwrap(),
184            source_node_id: row.get(1).unwrap(),
185            source_coordinate: row.get(2).unwrap(),
186            source_strand: row.get(3).unwrap(),
187            target_node_id: row.get(4).unwrap(),
188            target_coordinate: row.get(5).unwrap(),
189            target_strand: row.get(6).unwrap(),
190        }
191    }
192}
193
194impl Edge {
195    #[allow(clippy::too_many_arguments)]
196    pub fn create(
197        conn: &GraphConnection,
198        source_node_id: HashId,
199        source_coordinate: i64,
200        source_strand: Strand,
201        target_node_id: HashId,
202        target_coordinate: i64,
203        target_strand: Strand,
204    ) -> Edge {
205        let hash = HashId(calculate_hash(&format!(
206            "{source_node_id}:{source_coordinate}:{source_strand}:{target_node_id}:{target_coordinate}:{target_strand}"
207        )));
208        let query = "INSERT INTO edges (id, source_node_id, source_coordinate, source_strand, target_node_id, target_coordinate, target_strand) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7);";
209        let mut stmt = conn.prepare(query).unwrap();
210        match stmt.execute(params![
211            hash,
212            source_node_id,
213            source_coordinate,
214            source_strand,
215            target_node_id,
216            target_coordinate,
217            target_strand
218        ]) {
219            Ok(_) => {}
220            Err(rusqlite::Error::SqliteFailure(err, _details)) => {
221                if err.code != rusqlite::ErrorCode::ConstraintViolation {
222                    panic!("something bad happened querying the database")
223                }
224            }
225            Err(_) => {
226                panic!("something bad happened querying the database")
227            }
228        }
229        Edge {
230            id: hash,
231            source_node_id,
232            source_coordinate,
233            source_strand,
234            target_node_id,
235            target_coordinate,
236            target_strand,
237        }
238    }
239
240    pub fn bulk_create(conn: &GraphConnection, edges: &[EdgeData]) -> Vec<HashId> {
241        let edge_ids = edges.iter().map(|edge| edge.id_hash()).collect::<Vec<_>>();
242        let query = Edge::query_by_ids(conn, &edge_ids);
243        let existing_edges = query.iter().map(|edge| &edge.id).collect::<HashSet<_>>();
244
245        let mut edges_to_insert = IndexSet::new();
246        for (index, edge) in edge_ids.iter().enumerate() {
247            if !existing_edges.contains(edge) {
248                edges_to_insert.insert(&edges[index]);
249            }
250        }
251
252        let batch_size = max_rows_per_batch(conn, 7);
253
254        for chunk in &edges_to_insert.iter().chunks(batch_size) {
255            let mut rows = vec![];
256            let mut params: Vec<Box<dyn rusqlite::ToSql>> = Vec::new();
257            for edge in chunk {
258                params.push(Box::new(edge.id_hash()));
259                params.push(Box::new(edge.source_node_id));
260                params.push(Box::new(edge.source_coordinate));
261                params.push(Box::new(edge.source_strand));
262                params.push(Box::new(edge.target_node_id));
263                params.push(Box::new(edge.target_coordinate));
264                params.push(Box::new(edge.target_strand));
265                rows.push("(?, ?, ?, ?, ?, ?, ?)");
266            }
267            let sql = format!(
268                "INSERT INTO edges (id, source_node_id, source_coordinate, source_strand, target_node_id, target_coordinate, target_strand) VALUES {};",
269                rows.join(",")
270            );
271            conn.execute(&sql, rusqlite::params_from_iter(params))
272                .unwrap();
273        }
274        edge_ids
275    }
276
277    pub fn to_data(edge: Edge) -> EdgeData {
278        EdgeData {
279            source_node_id: edge.source_node_id,
280            source_coordinate: edge.source_coordinate,
281            source_strand: edge.source_strand,
282            target_node_id: edge.target_node_id,
283            target_coordinate: edge.target_coordinate,
284            target_strand: edge.target_strand,
285        }
286    }
287
288    fn get_block_boundaries(
289        source_edges: Option<&Vec<&Edge>>,
290        target_edges: Option<&Vec<&Edge>>,
291    ) -> Vec<i64> {
292        let mut block_boundary_coordinates = HashSet::new();
293        if let Some(actual_source_edges) = source_edges {
294            for source_edge in actual_source_edges {
295                block_boundary_coordinates.insert(source_edge.source_coordinate);
296            }
297        }
298        if let Some(actual_target_edges) = target_edges {
299            for target_edge in actual_target_edges {
300                block_boundary_coordinates.insert(target_edge.target_coordinate);
301            }
302        }
303
304        block_boundary_coordinates
305            .into_iter()
306            .sorted_by(|c1, c2| Ord::cmp(&c1, &c2))
307            .collect::<Vec<i64>>()
308    }
309
310    pub fn blocks_from_edges(conn: &GraphConnection, edges: &[AugmentedEdge]) -> Vec<GroupBlock> {
311        let mut node_ids = IndexSet::new();
312        let mut edges_by_source_node_id: HashMap<HashId, Vec<&Edge>> = HashMap::new();
313        let mut edges_by_target_node_id: HashMap<HashId, Vec<&Edge>> = HashMap::new();
314        for edge in edges.iter().map(|edge| &edge.edge) {
315            if !is_start_node(edge.source_node_id) {
316                node_ids.insert(edge.source_node_id);
317            }
318            edges_by_source_node_id
319                .entry(edge.source_node_id)
320                .and_modify(|edges| edges.push(edge))
321                .or_insert(vec![edge]);
322
323            if !is_end_node(edge.target_node_id) {
324                node_ids.insert(edge.target_node_id);
325            }
326            edges_by_target_node_id
327                .entry(edge.target_node_id)
328                .and_modify(|edges| edges.push(edge))
329                .or_insert(vec![edge]);
330        }
331
332        let sequences_by_node_id = Node::get_sequences_by_node_ids(
333            conn,
334            &node_ids.iter().copied().collect::<Vec<HashId>>(),
335        );
336
337        let mut blocks = vec![];
338        let mut block_index = 0;
339        // we sort by keys to exploit the external sequence cache which keeps the most recently used
340        // external sequence in memory.
341        for (node_id, sequence) in sequences_by_node_id
342            .iter()
343            .sorted_by_key(|(_node_id, seq)| seq.hash)
344        {
345            let block_boundaries = Edge::get_block_boundaries(
346                edges_by_source_node_id.get(node_id),
347                edges_by_target_node_id.get(node_id),
348            );
349
350            if !block_boundaries.is_empty() {
351                for (start, end) in block_boundaries.clone().into_iter().tuple_windows() {
352                    let block = GroupBlock::new(block_index, *node_id, sequence, start, end);
353                    blocks.push(block);
354                    block_index += 1;
355                }
356            } else {
357                blocks.push(GroupBlock::new(
358                    block_index,
359                    *node_id,
360                    sequence,
361                    0,
362                    sequence.length,
363                ));
364                block_index += 1;
365            }
366        }
367
368        // NOTE: We need a dedicated start node and a dedicated end node for the graph formed by the
369        // block group, since different paths in the block group may start or end at different
370        // places on sequences.  These two "start sequence" and "end sequence" blocks will serve
371        // that role.
372        let start_block = GroupBlock::new(
373            block_index + 1,
374            PATH_START_NODE_ID,
375            &Sequence::new().sequence_type("DNA").sequence("").build(),
376            0,
377            0,
378        );
379        blocks.push(start_block);
380        let end_block = GroupBlock::new(
381            block_index + 2,
382            PATH_END_NODE_ID,
383            &Sequence::new().sequence_type("DNA").sequence("").build(),
384            0,
385            0,
386        );
387        blocks.push(end_block);
388        blocks
389    }
390
391    pub fn build_graph(
392        edges: &Vec<AugmentedEdge>,
393        blocks: &Vec<GroupBlock>,
394    ) -> (GenGraph, HashMap<(i64, i64), Edge>) {
395        let blocks_by_start = blocks
396            .clone()
397            .into_iter()
398            .map(|block| {
399                (
400                    BlockKey {
401                        node_id: block.node_id,
402                        coordinate: block.start,
403                    },
404                    block.id,
405                )
406            })
407            .collect::<HashMap<BlockKey, i64>>();
408        let blocks_by_end = blocks
409            .clone()
410            .into_iter()
411            .map(|block| {
412                (
413                    BlockKey {
414                        node_id: block.node_id,
415                        coordinate: block.end,
416                    },
417                    block.id,
418                )
419            })
420            .collect::<HashMap<BlockKey, i64>>();
421        let block_coordinates = blocks
422            .clone()
423            .into_iter()
424            .map(|block| (block.id, (block.start, block.end)))
425            .collect::<HashMap<i64, (i64, i64)>>();
426
427        let mut graph = GenGraph::new();
428        let mut edges_by_node_pair = HashMap::new();
429        for block in blocks {
430            graph.add_node(GraphNode {
431                block_id: block.id,
432                node_id: block.node_id,
433                sequence_start: block.start,
434                sequence_end: block.end,
435            });
436        }
437        for augmented_edge in edges {
438            let edge = &augmented_edge.edge;
439            let source_key = BlockKey {
440                node_id: edge.source_node_id,
441                coordinate: edge.source_coordinate,
442            };
443            let source_id = blocks_by_end.get(&source_key);
444            let target_key = BlockKey {
445                node_id: edge.target_node_id,
446                coordinate: edge.target_coordinate,
447            };
448            let target_id = blocks_by_start.get(&target_key);
449
450            if let Some(source_id_value) = source_id
451                && let Some(target_id_value) = target_id
452            {
453                let source_node = GraphNode {
454                    block_id: *source_id_value,
455                    node_id: edge.source_node_id,
456                    sequence_start: block_coordinates[source_id_value].0,
457                    sequence_end: block_coordinates[source_id_value].1,
458                };
459                let target_node = GraphNode {
460                    block_id: *target_id_value,
461                    node_id: edge.target_node_id,
462                    sequence_start: block_coordinates[target_id_value].0,
463                    sequence_end: block_coordinates[target_id_value].1,
464                };
465                let graph_edge = GraphEdge {
466                    edge_id: edge.id,
467                    source_strand: edge.source_strand,
468                    target_strand: edge.target_strand,
469                    chromosome_index: augmented_edge.chromosome_index,
470                    phased: augmented_edge.phased,
471                    created_on: augmented_edge.created_on,
472                };
473                if let Some(existing_edges) = graph.edge_weight_mut(source_node, target_node) {
474                    existing_edges.push(graph_edge);
475                } else {
476                    graph.add_edge(source_node, target_node, vec![graph_edge]);
477                }
478                edges_by_node_pair.insert((*source_id_value, *target_id_value), edge.clone());
479            }
480        }
481
482        (graph, edges_by_node_pair)
483    }
484
485    pub fn is_start_edge(&self) -> bool {
486        self.source_node_id == PATH_START_NODE_ID
487    }
488
489    pub fn is_end_edge(&self) -> bool {
490        self.target_node_id == PATH_END_NODE_ID
491    }
492}
493
494#[cfg(test)]
495mod tests {
496    // Note this useful idiom: importing names from outer (for mod tests) scope.
497    use gen_core::PathBlock;
498
499    use super::*;
500    use crate::{
501        block_group::{BlockGroup, PathChange},
502        block_group_edge::BlockGroupEdge,
503        collection::Collection,
504        sequence::Sequence,
505        test_helpers::{get_connection, setup_block_group},
506    };
507
508    #[test]
509    fn test_bulk_create() {
510        let conn = &mut get_connection(None).unwrap();
511        Collection::create(conn, "test collection");
512        let sequence1 = Sequence::new()
513            .sequence_type("DNA")
514            .sequence("ATCGATCG")
515            .save(conn);
516        let node1_id = Node::create(conn, &sequence1.hash, &HashId::convert_str("1"));
517        let edge1 = EdgeData {
518            source_node_id: PATH_START_NODE_ID,
519            source_coordinate: -1,
520            source_strand: Strand::Forward,
521            target_node_id: node1_id,
522            target_coordinate: 1,
523            target_strand: Strand::Forward,
524        };
525        let sequence2 = Sequence::new()
526            .sequence_type("DNA")
527            .sequence("AAAAAAAA")
528            .save(conn);
529        let node2_id = Node::create(conn, &sequence2.hash, &HashId::convert_str("2"));
530        let edge2 = EdgeData {
531            source_node_id: node1_id,
532            source_coordinate: 2,
533            source_strand: Strand::Forward,
534            target_node_id: node2_id,
535            target_coordinate: 3,
536            target_strand: Strand::Forward,
537        };
538        let edge3 = EdgeData {
539            source_node_id: node2_id,
540            source_coordinate: 4,
541            source_strand: Strand::Forward,
542            target_node_id: PATH_END_NODE_ID,
543            target_coordinate: -1,
544            target_strand: Strand::Forward,
545        };
546
547        let edge_ids = Edge::bulk_create(conn, &[edge1, edge2, edge3]);
548        assert_eq!(edge_ids.len(), 3);
549        let edges = Edge::query_by_ids(conn, &edge_ids);
550        assert_eq!(edges.len(), 3);
551
552        let edges_by_source_node_id = edges
553            .into_iter()
554            .map(|edge| (edge.source_node_id, edge))
555            .collect::<HashMap<_, Edge>>();
556
557        let edge_result1 = edges_by_source_node_id.get(&PATH_START_NODE_ID).unwrap();
558        assert_eq!(edge_result1.source_coordinate, -1);
559        assert_eq!(edge_result1.target_node_id, node1_id);
560        assert_eq!(edge_result1.target_coordinate, 1);
561        let edge_result2 = edges_by_source_node_id.get(&node1_id).unwrap();
562        assert_eq!(edge_result2.source_coordinate, 2);
563        assert_eq!(edge_result2.target_node_id, node2_id);
564        assert_eq!(edge_result2.target_coordinate, 3);
565        let edge_result3 = edges_by_source_node_id.get(&node2_id).unwrap();
566        assert_eq!(edge_result3.source_coordinate, 4);
567        assert_eq!(edge_result3.target_node_id, PATH_END_NODE_ID);
568        assert_eq!(edge_result3.target_coordinate, -1);
569    }
570
571    #[test]
572    fn test_bulk_create_returns_edges_in_order() {
573        let conn = &mut get_connection(None).unwrap();
574        Collection::create(conn, "test collection");
575        let sequence1 = Sequence::new()
576            .sequence_type("DNA")
577            .sequence("ATCGATCG")
578            .save(conn);
579        let node1_id = Node::create(conn, &sequence1.hash, &HashId::convert_str("1"));
580        let edge1 = EdgeData {
581            source_node_id: PATH_START_NODE_ID,
582            source_coordinate: -1,
583            source_strand: Strand::Forward,
584            target_node_id: node1_id,
585            target_coordinate: 1,
586            target_strand: Strand::Forward,
587        };
588        let sequence2 = Sequence::new()
589            .sequence_type("DNA")
590            .sequence("AAAAAAAA")
591            .save(conn);
592        let node2_id = Node::create(conn, &sequence2.hash, &HashId::convert_str("2"));
593        let edge2 = EdgeData {
594            source_node_id: node1_id,
595            source_coordinate: 2,
596            source_strand: Strand::Forward,
597            target_node_id: node2_id,
598            target_coordinate: 3,
599            target_strand: Strand::Forward,
600        };
601        let edge3 = EdgeData {
602            source_node_id: node2_id,
603            source_coordinate: 4,
604            source_strand: Strand::Forward,
605            target_node_id: PATH_END_NODE_ID,
606            target_coordinate: -1,
607            target_strand: Strand::Forward,
608        };
609
610        let edges = vec![edge2, edge3];
611        let edge_ids1 = Edge::bulk_create(conn, &edges);
612        assert_eq!(edge_ids1.len(), 2);
613        for (index, id) in edge_ids1.iter().enumerate() {
614            let edge = Edge::get_by_id(conn, id).unwrap();
615            assert_eq!(EdgeData::from(&edge), edges[index]);
616        }
617
618        let edges = vec![edge1, edge2, edge3];
619        let edge_ids2 = Edge::bulk_create(conn, &edges);
620        assert_eq!(edge_ids2[1], edge_ids1[0]);
621        assert_eq!(edge_ids2[2], edge_ids1[1]);
622        assert_eq!(edge_ids2.len(), 3);
623        for (index, id) in edge_ids2.iter().enumerate() {
624            let edge = Edge::get_by_id(conn, id).unwrap();
625            assert_eq!(EdgeData::from(&edge), edges[index]);
626        }
627    }
628
629    #[test]
630    fn test_bulk_create_with_existing_edge() {
631        let conn = &mut get_connection(None).unwrap();
632        Collection::create(conn, "test collection");
633        let sequence1 = Sequence::new()
634            .sequence_type("DNA")
635            .sequence("ATCGATCG")
636            .save(conn);
637        let node1_id = Node::create(conn, &sequence1.hash, &HashId::convert_str("1"));
638        // NOTE: Create one edge ahead of time to confirm an existing row ID gets returned in the bulk create
639        let existing_edge = Edge::create(
640            conn,
641            PATH_START_NODE_ID,
642            -1,
643            Strand::Forward,
644            node1_id,
645            1,
646            Strand::Forward,
647        );
648        assert_eq!(existing_edge.source_node_id, PATH_START_NODE_ID);
649        assert_eq!(existing_edge.source_coordinate, -1);
650        assert_eq!(existing_edge.target_node_id, node1_id);
651        assert_eq!(existing_edge.target_coordinate, 1);
652
653        let edge1 = EdgeData {
654            source_coordinate: -1,
655            source_node_id: PATH_START_NODE_ID,
656            source_strand: Strand::Forward,
657            target_node_id: node1_id,
658            target_coordinate: 1,
659            target_strand: Strand::Forward,
660        };
661        let sequence2 = Sequence::new()
662            .sequence_type("DNA")
663            .sequence("AAAAAAAA")
664            .save(conn);
665        let node2_id = Node::create(conn, &sequence2.hash, &HashId::convert_str("2"));
666        let edge2 = EdgeData {
667            source_node_id: node1_id,
668            source_coordinate: 2,
669            source_strand: Strand::Forward,
670            target_node_id: node2_id,
671            target_coordinate: 3,
672            target_strand: Strand::Forward,
673        };
674        let edge3 = EdgeData {
675            source_node_id: node2_id,
676            source_coordinate: 4,
677            source_strand: Strand::Forward,
678            target_node_id: PATH_END_NODE_ID,
679            target_coordinate: -1,
680            target_strand: Strand::Forward,
681        };
682
683        let edge_ids = Edge::bulk_create(conn, &[edge1, edge2, edge3]);
684        assert_eq!(edge_ids.len(), 3);
685        let edges = Edge::query_by_ids(conn, &edge_ids);
686        assert_eq!(edges.len(), 3);
687
688        let edges_by_source_node_id = edges
689            .into_iter()
690            .map(|edge| (edge.source_node_id, edge))
691            .collect::<HashMap<_, Edge>>();
692
693        let edge_result1 = edges_by_source_node_id.get(&PATH_START_NODE_ID).unwrap();
694
695        assert_eq!(edge_result1.id, existing_edge.id);
696
697        assert_eq!(edge_result1.source_coordinate, -1);
698        assert_eq!(edge_result1.target_node_id, node1_id);
699        assert_eq!(edge_result1.target_coordinate, 1);
700        let edge_result2 = edges_by_source_node_id.get(&node1_id).unwrap();
701        assert_eq!(edge_result2.source_coordinate, 2);
702        assert_eq!(edge_result2.target_node_id, node2_id);
703        assert_eq!(edge_result2.target_coordinate, 3);
704        let edge_result3 = edges_by_source_node_id.get(&node2_id).unwrap();
705        assert_eq!(edge_result3.source_coordinate, 4);
706        assert_eq!(edge_result3.target_node_id, PATH_END_NODE_ID);
707        assert_eq!(edge_result3.target_coordinate, -1);
708    }
709
710    #[test]
711    fn test_blocks_from_edges() {
712        let conn = get_connection(None).unwrap();
713        let (block_group_id, path) = setup_block_group(&conn);
714
715        let edges = BlockGroupEdge::edges_for_block_group(&conn, &block_group_id);
716        let blocks = Edge::blocks_from_edges(&conn, &edges);
717
718        // 4 actual sequences: 10-length ones of all A, all T, all C, all G
719        // 2 terminal node blocks (start/end)
720        // 6 total
721        assert_eq!(blocks.len(), 6);
722
723        let insert_sequence = Sequence::new()
724            .sequence_type("DNA")
725            .sequence("NNNN")
726            .save(&conn);
727        let insert_node_id = Node::create(&conn, &insert_sequence.hash, &HashId::convert_str("1"));
728        let insert = PathBlock {
729            id: 0,
730            node_id: insert_node_id,
731            block_sequence: insert_sequence.get_sequence(0, 4).to_string(),
732            sequence_start: 0,
733            sequence_end: 4,
734            path_start: 7,
735            path_end: 15,
736            strand: Strand::Forward,
737        };
738        let change = PathChange {
739            block_group_id,
740            path: path.clone(),
741            path_accession: None,
742            start: 7,
743            end: 15,
744            block: insert,
745            chromosome_index: 0,
746            phased: 0,
747            preserve_edge: true,
748        };
749        let tree = path.intervaltree(&conn);
750        BlockGroup::insert_change(&conn, &change, &tree).unwrap();
751        let mut edges = BlockGroupEdge::edges_for_block_group(&conn, &block_group_id);
752
753        let blocks = Edge::blocks_from_edges(&conn, &edges);
754
755        // 2 10-length sequences of all C, all G
756        // 1 inserted NNNN sequence
757        // 4 split blocks (A and T sequences were split) resulting from the inserted sequence
758        // 2 terminal node blocks (start/end)
759        // 9 total
760        assert_eq!(blocks.len(), 9);
761
762        // Confirm that ordering doesn't matter
763        edges.reverse();
764        let blocks = Edge::blocks_from_edges(&conn, &edges);
765
766        // 2 10-length sequences of all C, all G
767        // 1 inserted NNNN sequence
768        // 4 split blocks (A and T sequences were split) resulting from the inserted sequence
769        // 2 terminal node blocks (start/end)
770        // 9 total
771        assert_eq!(blocks.len(), 9);
772    }
773
774    #[test]
775    fn test_get_block_boundaries() {
776        let conn = get_connection(None).unwrap();
777        let template_sequence = Sequence::new()
778            .sequence_type("DNA")
779            .sequence("AAAAAAAAAA")
780            .save(&conn);
781        let template_node_id =
782            Node::create(&conn, &template_sequence.hash, &HashId::convert_str("1"));
783
784        let insert_sequence = Sequence::new()
785            .sequence_type("DNA")
786            .sequence("NNNN")
787            .save(&conn);
788        let insert_node_id = Node::create(&conn, &insert_sequence.hash, &HashId::convert_str("2"));
789
790        let edge1 = Edge::create(
791            &conn,
792            template_node_id,
793            2,
794            Strand::Forward,
795            insert_node_id,
796            0,
797            Strand::Forward,
798        );
799        let edge2 = Edge::create(
800            &conn,
801            insert_node_id,
802            4,
803            Strand::Forward,
804            template_node_id,
805            3,
806            Strand::Forward,
807        );
808
809        let boundaries = Edge::get_block_boundaries(Some(&vec![&edge1]), Some(&vec![&edge2]));
810        assert_eq!(boundaries, vec![2, 3]);
811    }
812
813    #[test]
814    fn test_get_block_boundaries_with_two_original_sequences() {
815        let conn = get_connection(None).unwrap();
816        let template_sequence1 = Sequence::new()
817            .sequence_type("DNA")
818            .sequence("AAAAAAAAAA")
819            .save(&conn);
820        let template1_node_id =
821            Node::create(&conn, &template_sequence1.hash, &HashId::convert_str("1"));
822
823        let template_sequence2 = Sequence::new()
824            .sequence_type("DNA")
825            .sequence("TTTTTTTTTT")
826            .save(&conn);
827        let template2_node_id =
828            Node::create(&conn, &template_sequence2.hash, &HashId::convert_str("2"));
829
830        let insert_sequence = Sequence::new()
831            .sequence_type("DNA")
832            .sequence("NNNN")
833            .save(&conn);
834        let insert_node_id = Node::create(&conn, &insert_sequence.hash, &HashId::convert_str("3"));
835
836        let edge1 = Edge::create(
837            &conn,
838            template1_node_id,
839            2,
840            Strand::Forward,
841            insert_node_id,
842            0,
843            Strand::Forward,
844        );
845        let edge2 = Edge::create(
846            &conn,
847            insert_node_id,
848            4,
849            Strand::Forward,
850            template2_node_id,
851            3,
852            Strand::Forward,
853        );
854
855        let outgoing_boundaries = Edge::get_block_boundaries(Some(&vec![&edge1]), None);
856        assert_eq!(outgoing_boundaries, vec![2]);
857        let incoming_boundaries = Edge::get_block_boundaries(None, Some(&vec![&edge2]));
858        assert_eq!(incoming_boundaries, vec![3]);
859    }
860
861    #[test]
862    fn test_edge_capnp_serialization() {
863        use capnp::message::TypedBuilder;
864
865        let edge = Edge {
866            id: HashId::pad_str(789),
867            source_node_id: HashId::convert_str("1"),
868            source_coordinate: 10,
869            source_strand: Strand::Forward,
870            target_node_id: HashId::convert_str("2"),
871            target_coordinate: 20,
872            target_strand: Strand::Reverse,
873        };
874
875        let mut message = TypedBuilder::<edge::Owned>::new_default();
876        let mut root = message.init_root();
877        edge.write_capnp(&mut root);
878
879        let deserialized = Edge::read_capnp(root.into_reader());
880        assert_eq!(edge, deserialized);
881    }
882}