Skip to main content

gen_models/
edge.rs

1use std::{
2    collections::{HashMap, HashSet},
3    hash::Hash,
4};
5
6use gen_core::{
7    HashId, PATH_END_NODE_ID, PATH_START_NODE_ID, Strand, calculate_hash, is_end_node,
8    is_start_node, traits::Capnp,
9};
10use gen_graph::{GenGraph, GraphEdge, GraphNode};
11use indexmap::IndexSet;
12use itertools::Itertools;
13use rusqlite::{Row, params};
14use serde::{Deserialize, Serialize};
15
16use crate::{
17    block_group_edge::AugmentedEdge,
18    db::GraphConnection,
19    gen_models_capnp::edge,
20    node::Node,
21    sequence::{Sequence, cached_sequence},
22    traits::*,
23};
24
25#[derive(Clone, Debug, Eq, Hash, PartialEq, Deserialize, Serialize, Ord, PartialOrd)]
26pub struct Edge {
27    pub id: HashId,
28    pub source_node_id: HashId,
29    pub source_coordinate: i64,
30    pub source_strand: Strand,
31    pub target_node_id: HashId,
32    pub target_coordinate: i64,
33    pub target_strand: Strand,
34}
35
36impl<'a> Capnp<'a> for Edge {
37    type Builder = edge::Builder<'a>;
38    type Reader = edge::Reader<'a>;
39
40    fn write_capnp(&self, builder: &mut Self::Builder) {
41        builder.set_id(&self.id.0).unwrap();
42        builder.set_source_node_id(&self.source_node_id.0).unwrap();
43        builder.set_source_coordinate(self.source_coordinate);
44        builder.set_source_strand(self.source_strand.into());
45        builder.set_target_node_id(&self.target_node_id.0).unwrap();
46        builder.set_target_coordinate(self.target_coordinate);
47        builder.set_target_strand(self.target_strand.into());
48    }
49
50    fn read_capnp(reader: Self::Reader) -> Self {
51        let id: HashId = reader
52            .get_id()
53            .unwrap()
54            .as_slice()
55            .unwrap()
56            .try_into()
57            .unwrap();
58        let source_node_id = reader
59            .get_source_node_id()
60            .unwrap()
61            .as_slice()
62            .unwrap()
63            .try_into()
64            .unwrap();
65        let source_coordinate = reader.get_source_coordinate();
66        let source_strand = reader.get_source_strand().unwrap().into();
67        let target_node_id = reader
68            .get_target_node_id()
69            .unwrap()
70            .as_slice()
71            .unwrap()
72            .try_into()
73            .unwrap();
74        let target_coordinate = reader.get_target_coordinate();
75        let target_strand = reader.get_target_strand().unwrap().into();
76
77        Edge {
78            id,
79            source_node_id,
80            source_coordinate,
81            source_strand,
82            target_node_id,
83            target_coordinate,
84            target_strand,
85        }
86    }
87}
88
89#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq, Ord, PartialOrd)]
90pub struct EdgeData {
91    pub source_node_id: HashId,
92    pub source_coordinate: i64,
93    pub source_strand: Strand,
94    pub target_node_id: HashId,
95    pub target_coordinate: i64,
96    pub target_strand: Strand,
97}
98
99impl EdgeData {
100    pub fn id_hash(&self) -> HashId {
101        HashId(calculate_hash(&format!(
102            "{}:{}:{}:{}:{}:{}",
103            self.source_node_id,
104            self.source_coordinate,
105            self.source_strand,
106            self.target_node_id,
107            self.target_coordinate,
108            self.target_strand,
109        )))
110    }
111}
112
113impl From<&Edge> for EdgeData {
114    fn from(item: &Edge) -> Self {
115        EdgeData {
116            source_node_id: item.source_node_id,
117            source_coordinate: item.source_coordinate,
118            source_strand: item.source_strand,
119            target_node_id: item.target_node_id,
120            target_coordinate: item.target_coordinate,
121            target_strand: item.target_strand,
122        }
123    }
124}
125
126#[derive(Eq, Hash, PartialEq)]
127pub struct BlockKey {
128    pub node_id: HashId,
129    pub coordinate: i64,
130}
131
132#[derive(Clone, Debug)]
133pub struct GroupBlock {
134    pub id: i64,
135    pub node_id: HashId,
136    sequence: Option<String>,
137    external_sequence: Option<(String, String)>,
138    pub start: i64,
139    pub end: i64,
140}
141
142impl GroupBlock {
143    pub fn new(id: i64, node_id: HashId, sequence: &Sequence, start: i64, end: i64) -> Self {
144        if sequence.external_sequence {
145            GroupBlock {
146                id,
147                node_id,
148                sequence: None,
149                external_sequence: Some((sequence.file_path.clone(), sequence.name.clone())),
150                start,
151                end,
152            }
153        } else {
154            GroupBlock {
155                id,
156                node_id,
157                sequence: Some(sequence.get_sequence(start, end)),
158                external_sequence: None,
159                start,
160                end,
161            }
162        }
163    }
164
165    pub fn sequence(&self) -> String {
166        if let Some(sequence) = &self.sequence {
167            sequence.to_string()
168        } else if let Some((path, name)) = &self.external_sequence {
169            cached_sequence(path, name, self.start as usize, self.end as usize).unwrap()
170        } else {
171            panic!("Sequence or external sequence is not set.")
172        }
173    }
174}
175
176impl Query for Edge {
177    type Model = Edge;
178
179    const TABLE_NAME: &'static str = "edges";
180
181    fn process_row(row: &Row) -> Self::Model {
182        Edge {
183            id: row.get(0).unwrap(),
184            source_node_id: row.get(1).unwrap(),
185            source_coordinate: row.get(2).unwrap(),
186            source_strand: row.get(3).unwrap(),
187            target_node_id: row.get(4).unwrap(),
188            target_coordinate: row.get(5).unwrap(),
189            target_strand: row.get(6).unwrap(),
190        }
191    }
192}
193
194impl Edge {
195    #[allow(clippy::too_many_arguments)]
196    pub fn create(
197        conn: &GraphConnection,
198        source_node_id: HashId,
199        source_coordinate: i64,
200        source_strand: Strand,
201        target_node_id: HashId,
202        target_coordinate: i64,
203        target_strand: Strand,
204    ) -> Edge {
205        let hash = HashId(calculate_hash(&format!(
206            "{source_node_id}:{source_coordinate}:{source_strand}:{target_node_id}:{target_coordinate}:{target_strand}"
207        )));
208        let query = "INSERT INTO edges (id, source_node_id, source_coordinate, source_strand, target_node_id, target_coordinate, target_strand) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7);";
209        let mut stmt = conn.prepare(query).unwrap();
210        match stmt.execute(params![
211            hash,
212            source_node_id,
213            source_coordinate,
214            source_strand,
215            target_node_id,
216            target_coordinate,
217            target_strand
218        ]) {
219            Ok(_) => {}
220            Err(rusqlite::Error::SqliteFailure(err, _details)) => {
221                if err.code != rusqlite::ErrorCode::ConstraintViolation {
222                    panic!("something bad happened querying the database")
223                }
224            }
225            Err(_) => {
226                panic!("something bad happened querying the database")
227            }
228        }
229        Edge {
230            id: hash,
231            source_node_id,
232            source_coordinate,
233            source_strand,
234            target_node_id,
235            target_coordinate,
236            target_strand,
237        }
238    }
239
240    pub fn bulk_create(conn: &GraphConnection, edges: &[EdgeData]) -> Vec<HashId> {
241        let edge_ids = edges.iter().map(|edge| edge.id_hash()).collect::<Vec<_>>();
242        let query = Edge::query_by_ids(conn, &edge_ids);
243        let existing_edges = query.iter().map(|edge| &edge.id).collect::<HashSet<_>>();
244
245        let mut edges_to_insert = IndexSet::new();
246        for (index, edge) in edge_ids.iter().enumerate() {
247            if !existing_edges.contains(edge) {
248                edges_to_insert.insert(&edges[index]);
249            }
250        }
251
252        let batch_size = max_rows_per_batch(conn, 7);
253
254        for chunk in &edges_to_insert.iter().chunks(batch_size) {
255            let mut rows = vec![];
256            let mut params: Vec<Box<dyn rusqlite::ToSql>> = Vec::new();
257            for edge in chunk {
258                params.push(Box::new(edge.id_hash()));
259                params.push(Box::new(edge.source_node_id));
260                params.push(Box::new(edge.source_coordinate));
261                params.push(Box::new(edge.source_strand));
262                params.push(Box::new(edge.target_node_id));
263                params.push(Box::new(edge.target_coordinate));
264                params.push(Box::new(edge.target_strand));
265                rows.push("(?, ?, ?, ?, ?, ?, ?)");
266            }
267            let sql = format!(
268                "INSERT INTO edges (id, source_node_id, source_coordinate, source_strand, target_node_id, target_coordinate, target_strand) VALUES {};",
269                rows.join(",")
270            );
271            conn.execute(&sql, rusqlite::params_from_iter(params))
272                .unwrap();
273        }
274        edge_ids
275    }
276
277    pub fn to_data(edge: Edge) -> EdgeData {
278        EdgeData {
279            source_node_id: edge.source_node_id,
280            source_coordinate: edge.source_coordinate,
281            source_strand: edge.source_strand,
282            target_node_id: edge.target_node_id,
283            target_coordinate: edge.target_coordinate,
284            target_strand: edge.target_strand,
285        }
286    }
287
288    fn get_block_boundaries(
289        source_edges: Option<&Vec<&Edge>>,
290        target_edges: Option<&Vec<&Edge>>,
291    ) -> Vec<i64> {
292        let mut block_boundary_coordinates = HashSet::new();
293        if let Some(actual_source_edges) = source_edges {
294            for source_edge in actual_source_edges {
295                block_boundary_coordinates.insert(source_edge.source_coordinate);
296            }
297        }
298        if let Some(actual_target_edges) = target_edges {
299            for target_edge in actual_target_edges {
300                block_boundary_coordinates.insert(target_edge.target_coordinate);
301            }
302        }
303
304        block_boundary_coordinates
305            .into_iter()
306            .sorted_by(|c1, c2| Ord::cmp(&c1, &c2))
307            .collect::<Vec<i64>>()
308    }
309
310    pub fn blocks_from_edges(conn: &GraphConnection, edges: &[AugmentedEdge]) -> Vec<GroupBlock> {
311        let mut node_ids = IndexSet::new();
312        let mut edges_by_source_node_id: HashMap<HashId, Vec<&Edge>> = HashMap::new();
313        let mut edges_by_target_node_id: HashMap<HashId, Vec<&Edge>> = HashMap::new();
314        for edge in edges.iter().map(|edge| &edge.edge) {
315            if !is_start_node(edge.source_node_id) {
316                node_ids.insert(edge.source_node_id);
317            }
318            edges_by_source_node_id
319                .entry(edge.source_node_id)
320                .and_modify(|edges| edges.push(edge))
321                .or_insert(vec![edge]);
322
323            if !is_end_node(edge.target_node_id) {
324                node_ids.insert(edge.target_node_id);
325            }
326            edges_by_target_node_id
327                .entry(edge.target_node_id)
328                .and_modify(|edges| edges.push(edge))
329                .or_insert(vec![edge]);
330        }
331
332        let sequences_by_node_id = Node::get_sequences_by_node_ids(
333            conn,
334            &node_ids.iter().copied().collect::<Vec<HashId>>(),
335        );
336
337        let mut blocks = vec![];
338        let mut block_index = 0;
339        // we sort by keys to exploit the external sequence cache which keeps the most recently used
340        // external sequence in memory.
341        for (node_id, sequence) in sequences_by_node_id
342            .iter()
343            .sorted_by_key(|(_node_id, seq)| seq.hash)
344        {
345            let block_boundaries = Edge::get_block_boundaries(
346                edges_by_source_node_id.get(node_id),
347                edges_by_target_node_id.get(node_id),
348            );
349
350            if !block_boundaries.is_empty() {
351                for (start, end) in block_boundaries.clone().into_iter().tuple_windows() {
352                    let block = GroupBlock::new(block_index, *node_id, sequence, start, end);
353                    blocks.push(block);
354                    block_index += 1;
355                }
356            } else {
357                blocks.push(GroupBlock::new(
358                    block_index,
359                    *node_id,
360                    sequence,
361                    0,
362                    sequence.length,
363                ));
364                block_index += 1;
365            }
366        }
367
368        // NOTE: We need a dedicated start node and a dedicated end node for the graph formed by the
369        // block group, since different paths in the block group may start or end at different
370        // places on sequences.  These two "start sequence" and "end sequence" blocks will serve
371        // that role.
372        let start_block = GroupBlock::new(
373            block_index + 1,
374            PATH_START_NODE_ID,
375            &Sequence::new().sequence_type("DNA").sequence("").build(),
376            0,
377            0,
378        );
379        blocks.push(start_block);
380        let end_block = GroupBlock::new(
381            block_index + 2,
382            PATH_END_NODE_ID,
383            &Sequence::new().sequence_type("DNA").sequence("").build(),
384            0,
385            0,
386        );
387        blocks.push(end_block);
388        blocks
389    }
390
391    pub fn build_graph(
392        edges: &Vec<AugmentedEdge>,
393        blocks: &Vec<GroupBlock>,
394    ) -> (GenGraph, HashMap<(i64, i64), Edge>) {
395        let graph_node_for_block = |block: &GroupBlock| GraphNode {
396            node_id: block.node_id,
397            sequence_start: block.start,
398            sequence_end: block.end,
399        };
400        let blocks_by_start = blocks
401            .iter()
402            .map(|block| {
403                (
404                    BlockKey {
405                        node_id: block.node_id,
406                        coordinate: block.start,
407                    },
408                    block,
409                )
410            })
411            .collect::<HashMap<BlockKey, &GroupBlock>>();
412        let blocks_by_end = blocks
413            .iter()
414            .map(|block| {
415                (
416                    BlockKey {
417                        node_id: block.node_id,
418                        coordinate: block.end,
419                    },
420                    block,
421                )
422            })
423            .collect::<HashMap<BlockKey, &GroupBlock>>();
424
425        let mut graph = GenGraph::new();
426        let mut edges_by_node_pair = HashMap::new();
427        for block in blocks {
428            graph.add_node(graph_node_for_block(block));
429        }
430        for augmented_edge in edges {
431            let edge = &augmented_edge.edge;
432            let source_key = BlockKey {
433                node_id: edge.source_node_id,
434                coordinate: edge.source_coordinate,
435            };
436            let source_id = blocks_by_end.get(&source_key);
437            let target_key = BlockKey {
438                node_id: edge.target_node_id,
439                coordinate: edge.target_coordinate,
440            };
441            let target_id = blocks_by_start.get(&target_key);
442
443            if let Some(source_block) = source_id
444                && let Some(target_block) = target_id
445            {
446                let source_node = graph_node_for_block(source_block);
447                let target_node = graph_node_for_block(target_block);
448                let graph_edge = GraphEdge {
449                    edge_id: edge.id,
450                    source_strand: edge.source_strand,
451                    target_strand: edge.target_strand,
452                    chromosome_index: augmented_edge.chromosome_index,
453                    phased: augmented_edge.phased,
454                    created_on: augmented_edge.created_on,
455                };
456                if let Some(existing_edges) = graph.edge_weight_mut(source_node, target_node) {
457                    existing_edges.push(graph_edge);
458                } else {
459                    graph.add_edge(source_node, target_node, vec![graph_edge]);
460                }
461                edges_by_node_pair.insert((source_block.id, target_block.id), edge.clone());
462            }
463        }
464
465        (graph, edges_by_node_pair)
466    }
467
468    pub fn is_start_edge(&self) -> bool {
469        self.source_node_id == PATH_START_NODE_ID
470    }
471
472    pub fn is_end_edge(&self) -> bool {
473        self.target_node_id == PATH_END_NODE_ID
474    }
475}
476
477#[cfg(test)]
478mod tests {
479    // Note this useful idiom: importing names from outer (for mod tests) scope.
480    use gen_core::PathBlock;
481
482    use super::*;
483    use crate::{
484        block_group::{BlockGroup, PathChange},
485        block_group_edge::BlockGroupEdge,
486        collection::Collection,
487        sequence::Sequence,
488        test_helpers::{get_connection, setup_block_group},
489    };
490
491    #[test]
492    fn test_bulk_create() {
493        let conn = &mut get_connection(None).unwrap();
494        Collection::create(conn, "test collection");
495        let sequence1 = Sequence::new()
496            .sequence_type("DNA")
497            .sequence("ATCGATCG")
498            .save(conn);
499        let node1_id = Node::create(conn, &sequence1.hash, &HashId::convert_str("1"));
500        let edge1 = EdgeData {
501            source_node_id: PATH_START_NODE_ID,
502            source_coordinate: -1,
503            source_strand: Strand::Forward,
504            target_node_id: node1_id,
505            target_coordinate: 1,
506            target_strand: Strand::Forward,
507        };
508        let sequence2 = Sequence::new()
509            .sequence_type("DNA")
510            .sequence("AAAAAAAA")
511            .save(conn);
512        let node2_id = Node::create(conn, &sequence2.hash, &HashId::convert_str("2"));
513        let edge2 = EdgeData {
514            source_node_id: node1_id,
515            source_coordinate: 2,
516            source_strand: Strand::Forward,
517            target_node_id: node2_id,
518            target_coordinate: 3,
519            target_strand: Strand::Forward,
520        };
521        let edge3 = EdgeData {
522            source_node_id: node2_id,
523            source_coordinate: 4,
524            source_strand: Strand::Forward,
525            target_node_id: PATH_END_NODE_ID,
526            target_coordinate: -1,
527            target_strand: Strand::Forward,
528        };
529
530        let edge_ids = Edge::bulk_create(conn, &[edge1, edge2, edge3]);
531        assert_eq!(edge_ids.len(), 3);
532        let edges = Edge::query_by_ids(conn, &edge_ids);
533        assert_eq!(edges.len(), 3);
534
535        let edges_by_source_node_id = edges
536            .into_iter()
537            .map(|edge| (edge.source_node_id, edge))
538            .collect::<HashMap<_, Edge>>();
539
540        let edge_result1 = edges_by_source_node_id.get(&PATH_START_NODE_ID).unwrap();
541        assert_eq!(edge_result1.source_coordinate, -1);
542        assert_eq!(edge_result1.target_node_id, node1_id);
543        assert_eq!(edge_result1.target_coordinate, 1);
544        let edge_result2 = edges_by_source_node_id.get(&node1_id).unwrap();
545        assert_eq!(edge_result2.source_coordinate, 2);
546        assert_eq!(edge_result2.target_node_id, node2_id);
547        assert_eq!(edge_result2.target_coordinate, 3);
548        let edge_result3 = edges_by_source_node_id.get(&node2_id).unwrap();
549        assert_eq!(edge_result3.source_coordinate, 4);
550        assert_eq!(edge_result3.target_node_id, PATH_END_NODE_ID);
551        assert_eq!(edge_result3.target_coordinate, -1);
552    }
553
554    #[test]
555    fn test_bulk_create_returns_edges_in_order() {
556        let conn = &mut get_connection(None).unwrap();
557        Collection::create(conn, "test collection");
558        let sequence1 = Sequence::new()
559            .sequence_type("DNA")
560            .sequence("ATCGATCG")
561            .save(conn);
562        let node1_id = Node::create(conn, &sequence1.hash, &HashId::convert_str("1"));
563        let edge1 = EdgeData {
564            source_node_id: PATH_START_NODE_ID,
565            source_coordinate: -1,
566            source_strand: Strand::Forward,
567            target_node_id: node1_id,
568            target_coordinate: 1,
569            target_strand: Strand::Forward,
570        };
571        let sequence2 = Sequence::new()
572            .sequence_type("DNA")
573            .sequence("AAAAAAAA")
574            .save(conn);
575        let node2_id = Node::create(conn, &sequence2.hash, &HashId::convert_str("2"));
576        let edge2 = EdgeData {
577            source_node_id: node1_id,
578            source_coordinate: 2,
579            source_strand: Strand::Forward,
580            target_node_id: node2_id,
581            target_coordinate: 3,
582            target_strand: Strand::Forward,
583        };
584        let edge3 = EdgeData {
585            source_node_id: node2_id,
586            source_coordinate: 4,
587            source_strand: Strand::Forward,
588            target_node_id: PATH_END_NODE_ID,
589            target_coordinate: -1,
590            target_strand: Strand::Forward,
591        };
592
593        let edges = vec![edge2, edge3];
594        let edge_ids1 = Edge::bulk_create(conn, &edges);
595        assert_eq!(edge_ids1.len(), 2);
596        for (index, id) in edge_ids1.iter().enumerate() {
597            let edge = Edge::get_by_id(conn, id).unwrap();
598            assert_eq!(EdgeData::from(&edge), edges[index]);
599        }
600
601        let edges = vec![edge1, edge2, edge3];
602        let edge_ids2 = Edge::bulk_create(conn, &edges);
603        assert_eq!(edge_ids2[1], edge_ids1[0]);
604        assert_eq!(edge_ids2[2], edge_ids1[1]);
605        assert_eq!(edge_ids2.len(), 3);
606        for (index, id) in edge_ids2.iter().enumerate() {
607            let edge = Edge::get_by_id(conn, id).unwrap();
608            assert_eq!(EdgeData::from(&edge), edges[index]);
609        }
610    }
611
612    #[test]
613    fn test_bulk_create_with_existing_edge() {
614        let conn = &mut get_connection(None).unwrap();
615        Collection::create(conn, "test collection");
616        let sequence1 = Sequence::new()
617            .sequence_type("DNA")
618            .sequence("ATCGATCG")
619            .save(conn);
620        let node1_id = Node::create(conn, &sequence1.hash, &HashId::convert_str("1"));
621        // NOTE: Create one edge ahead of time to confirm an existing row ID gets returned in the bulk create
622        let existing_edge = Edge::create(
623            conn,
624            PATH_START_NODE_ID,
625            -1,
626            Strand::Forward,
627            node1_id,
628            1,
629            Strand::Forward,
630        );
631        assert_eq!(existing_edge.source_node_id, PATH_START_NODE_ID);
632        assert_eq!(existing_edge.source_coordinate, -1);
633        assert_eq!(existing_edge.target_node_id, node1_id);
634        assert_eq!(existing_edge.target_coordinate, 1);
635
636        let edge1 = EdgeData {
637            source_coordinate: -1,
638            source_node_id: PATH_START_NODE_ID,
639            source_strand: Strand::Forward,
640            target_node_id: node1_id,
641            target_coordinate: 1,
642            target_strand: Strand::Forward,
643        };
644        let sequence2 = Sequence::new()
645            .sequence_type("DNA")
646            .sequence("AAAAAAAA")
647            .save(conn);
648        let node2_id = Node::create(conn, &sequence2.hash, &HashId::convert_str("2"));
649        let edge2 = EdgeData {
650            source_node_id: node1_id,
651            source_coordinate: 2,
652            source_strand: Strand::Forward,
653            target_node_id: node2_id,
654            target_coordinate: 3,
655            target_strand: Strand::Forward,
656        };
657        let edge3 = EdgeData {
658            source_node_id: node2_id,
659            source_coordinate: 4,
660            source_strand: Strand::Forward,
661            target_node_id: PATH_END_NODE_ID,
662            target_coordinate: -1,
663            target_strand: Strand::Forward,
664        };
665
666        let edge_ids = Edge::bulk_create(conn, &[edge1, edge2, edge3]);
667        assert_eq!(edge_ids.len(), 3);
668        let edges = Edge::query_by_ids(conn, &edge_ids);
669        assert_eq!(edges.len(), 3);
670
671        let edges_by_source_node_id = edges
672            .into_iter()
673            .map(|edge| (edge.source_node_id, edge))
674            .collect::<HashMap<_, Edge>>();
675
676        let edge_result1 = edges_by_source_node_id.get(&PATH_START_NODE_ID).unwrap();
677
678        assert_eq!(edge_result1.id, existing_edge.id);
679
680        assert_eq!(edge_result1.source_coordinate, -1);
681        assert_eq!(edge_result1.target_node_id, node1_id);
682        assert_eq!(edge_result1.target_coordinate, 1);
683        let edge_result2 = edges_by_source_node_id.get(&node1_id).unwrap();
684        assert_eq!(edge_result2.source_coordinate, 2);
685        assert_eq!(edge_result2.target_node_id, node2_id);
686        assert_eq!(edge_result2.target_coordinate, 3);
687        let edge_result3 = edges_by_source_node_id.get(&node2_id).unwrap();
688        assert_eq!(edge_result3.source_coordinate, 4);
689        assert_eq!(edge_result3.target_node_id, PATH_END_NODE_ID);
690        assert_eq!(edge_result3.target_coordinate, -1);
691    }
692
693    #[test]
694    fn test_blocks_from_edges() {
695        let conn = get_connection(None).unwrap();
696        let (block_group_id, path) = setup_block_group(&conn);
697
698        let edges = BlockGroupEdge::edges_for_block_group(&conn, &block_group_id);
699        let blocks = Edge::blocks_from_edges(&conn, &edges);
700
701        // 4 actual sequences: 10-length ones of all A, all T, all C, all G
702        // 2 terminal node blocks (start/end)
703        // 6 total
704        assert_eq!(blocks.len(), 6);
705
706        let insert_sequence = Sequence::new()
707            .sequence_type("DNA")
708            .sequence("NNNN")
709            .save(&conn);
710        let insert_node_id = Node::create(&conn, &insert_sequence.hash, &HashId::convert_str("1"));
711        let insert = PathBlock {
712            node_id: insert_node_id,
713            block_sequence: insert_sequence.get_sequence(0, 4).to_string(),
714            sequence_start: 0,
715            sequence_end: 4,
716            path_start: 7,
717            path_end: 15,
718            strand: Strand::Forward,
719        };
720        let change = PathChange {
721            block_group_id,
722            path: path.clone(),
723            path_accession: None,
724            start: 7,
725            end: 15,
726            block: insert,
727            chromosome_index: 0,
728            phased: 0,
729            preserve_edge: true,
730        };
731        let tree = path.intervaltree(&conn);
732        BlockGroup::insert_change(&conn, &change, &tree).unwrap();
733        let mut edges = BlockGroupEdge::edges_for_block_group(&conn, &block_group_id);
734
735        let blocks = Edge::blocks_from_edges(&conn, &edges);
736
737        // 2 10-length sequences of all C, all G
738        // 1 inserted NNNN sequence
739        // 4 split blocks (A and T sequences were split) resulting from the inserted sequence
740        // 2 terminal node blocks (start/end)
741        // 9 total
742        assert_eq!(blocks.len(), 9);
743
744        // Confirm that ordering doesn't matter
745        edges.reverse();
746        let blocks = Edge::blocks_from_edges(&conn, &edges);
747
748        // 2 10-length sequences of all C, all G
749        // 1 inserted NNNN sequence
750        // 4 split blocks (A and T sequences were split) resulting from the inserted sequence
751        // 2 terminal node blocks (start/end)
752        // 9 total
753        assert_eq!(blocks.len(), 9);
754    }
755
756    #[test]
757    fn test_get_block_boundaries() {
758        let conn = get_connection(None).unwrap();
759        let template_sequence = Sequence::new()
760            .sequence_type("DNA")
761            .sequence("AAAAAAAAAA")
762            .save(&conn);
763        let template_node_id =
764            Node::create(&conn, &template_sequence.hash, &HashId::convert_str("1"));
765
766        let insert_sequence = Sequence::new()
767            .sequence_type("DNA")
768            .sequence("NNNN")
769            .save(&conn);
770        let insert_node_id = Node::create(&conn, &insert_sequence.hash, &HashId::convert_str("2"));
771
772        let edge1 = Edge::create(
773            &conn,
774            template_node_id,
775            2,
776            Strand::Forward,
777            insert_node_id,
778            0,
779            Strand::Forward,
780        );
781        let edge2 = Edge::create(
782            &conn,
783            insert_node_id,
784            4,
785            Strand::Forward,
786            template_node_id,
787            3,
788            Strand::Forward,
789        );
790
791        let boundaries = Edge::get_block_boundaries(Some(&vec![&edge1]), Some(&vec![&edge2]));
792        assert_eq!(boundaries, vec![2, 3]);
793    }
794
795    #[test]
796    fn test_get_block_boundaries_with_two_original_sequences() {
797        let conn = get_connection(None).unwrap();
798        let template_sequence1 = Sequence::new()
799            .sequence_type("DNA")
800            .sequence("AAAAAAAAAA")
801            .save(&conn);
802        let template1_node_id =
803            Node::create(&conn, &template_sequence1.hash, &HashId::convert_str("1"));
804
805        let template_sequence2 = Sequence::new()
806            .sequence_type("DNA")
807            .sequence("TTTTTTTTTT")
808            .save(&conn);
809        let template2_node_id =
810            Node::create(&conn, &template_sequence2.hash, &HashId::convert_str("2"));
811
812        let insert_sequence = Sequence::new()
813            .sequence_type("DNA")
814            .sequence("NNNN")
815            .save(&conn);
816        let insert_node_id = Node::create(&conn, &insert_sequence.hash, &HashId::convert_str("3"));
817
818        let edge1 = Edge::create(
819            &conn,
820            template1_node_id,
821            2,
822            Strand::Forward,
823            insert_node_id,
824            0,
825            Strand::Forward,
826        );
827        let edge2 = Edge::create(
828            &conn,
829            insert_node_id,
830            4,
831            Strand::Forward,
832            template2_node_id,
833            3,
834            Strand::Forward,
835        );
836
837        let outgoing_boundaries = Edge::get_block_boundaries(Some(&vec![&edge1]), None);
838        assert_eq!(outgoing_boundaries, vec![2]);
839        let incoming_boundaries = Edge::get_block_boundaries(None, Some(&vec![&edge2]));
840        assert_eq!(incoming_boundaries, vec![3]);
841    }
842
843    #[test]
844    fn test_edge_capnp_serialization() {
845        use capnp::message::TypedBuilder;
846
847        let edge = Edge {
848            id: HashId::pad_str(789),
849            source_node_id: HashId::convert_str("1"),
850            source_coordinate: 10,
851            source_strand: Strand::Forward,
852            target_node_id: HashId::convert_str("2"),
853            target_coordinate: 20,
854            target_strand: Strand::Reverse,
855        };
856
857        let mut message = TypedBuilder::<edge::Owned>::new_default();
858        let mut root = message.init_root();
859        edge.write_capnp(&mut root);
860
861        let deserialized = Edge::read_capnp(root.into_reader());
862        assert_eq!(edge, deserialized);
863    }
864}